This patch provides an approximation of the memory locations touched by `llvm.matrix.column.major.load` and `llvm.matrix.column.major.store`, enabling dead store elimination and GVN to remove redundant loads and dead stores. PR: https://github.com/llvm/llvm-project/pull/163368
416 lines
16 KiB
C++
416 lines
16 KiB
C++
//===- MemoryLocation.cpp - Memory location descriptions -------------------==//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/Analysis/MemoryLocation.h"
|
|
#include "llvm/Analysis/TargetLibraryInfo.h"
|
|
#include "llvm/IR/DataLayout.h"
|
|
#include "llvm/IR/Instructions.h"
|
|
#include "llvm/IR/IntrinsicInst.h"
|
|
#include "llvm/IR/IntrinsicsARM.h"
|
|
#include "llvm/IR/PatternMatch.h"
|
|
#include "llvm/IR/Type.h"
|
|
#include <optional>
|
|
using namespace llvm;
|
|
|
|
void LocationSize::print(raw_ostream &OS) const {
|
|
OS << "LocationSize::";
|
|
if (*this == beforeOrAfterPointer())
|
|
OS << "beforeOrAfterPointer";
|
|
else if (*this == afterPointer())
|
|
OS << "afterPointer";
|
|
else if (*this == mapEmpty())
|
|
OS << "mapEmpty";
|
|
else if (*this == mapTombstone())
|
|
OS << "mapTombstone";
|
|
else if (isPrecise())
|
|
OS << "precise(" << getValue() << ')';
|
|
else
|
|
OS << "upperBound(" << getValue() << ')';
|
|
}
|
|
|
|
MemoryLocation MemoryLocation::get(const LoadInst *LI) {
|
|
const auto &DL = LI->getDataLayout();
|
|
|
|
return MemoryLocation(
|
|
LI->getPointerOperand(),
|
|
LocationSize::precise(DL.getTypeStoreSize(LI->getType())),
|
|
LI->getAAMetadata());
|
|
}
|
|
|
|
MemoryLocation MemoryLocation::get(const StoreInst *SI) {
|
|
const auto &DL = SI->getDataLayout();
|
|
|
|
return MemoryLocation(SI->getPointerOperand(),
|
|
LocationSize::precise(DL.getTypeStoreSize(
|
|
SI->getValueOperand()->getType())),
|
|
SI->getAAMetadata());
|
|
}
|
|
|
|
MemoryLocation MemoryLocation::get(const VAArgInst *VI) {
|
|
return MemoryLocation(VI->getPointerOperand(),
|
|
LocationSize::afterPointer(), VI->getAAMetadata());
|
|
}
|
|
|
|
MemoryLocation MemoryLocation::get(const AtomicCmpXchgInst *CXI) {
|
|
const auto &DL = CXI->getDataLayout();
|
|
|
|
return MemoryLocation(CXI->getPointerOperand(),
|
|
LocationSize::precise(DL.getTypeStoreSize(
|
|
CXI->getCompareOperand()->getType())),
|
|
CXI->getAAMetadata());
|
|
}
|
|
|
|
MemoryLocation MemoryLocation::get(const AtomicRMWInst *RMWI) {
|
|
const auto &DL = RMWI->getDataLayout();
|
|
|
|
return MemoryLocation(RMWI->getPointerOperand(),
|
|
LocationSize::precise(DL.getTypeStoreSize(
|
|
RMWI->getValOperand()->getType())),
|
|
RMWI->getAAMetadata());
|
|
}
|
|
|
|
std::optional<MemoryLocation>
|
|
MemoryLocation::getOrNone(const Instruction *Inst) {
|
|
switch (Inst->getOpcode()) {
|
|
case Instruction::Load:
|
|
return get(cast<LoadInst>(Inst));
|
|
case Instruction::Store:
|
|
return get(cast<StoreInst>(Inst));
|
|
case Instruction::VAArg:
|
|
return get(cast<VAArgInst>(Inst));
|
|
case Instruction::AtomicCmpXchg:
|
|
return get(cast<AtomicCmpXchgInst>(Inst));
|
|
case Instruction::AtomicRMW:
|
|
return get(cast<AtomicRMWInst>(Inst));
|
|
default:
|
|
return std::nullopt;
|
|
}
|
|
}
|
|
|
|
MemoryLocation MemoryLocation::getForSource(const MemTransferInst *MTI) {
|
|
return getForSource(cast<AnyMemTransferInst>(MTI));
|
|
}
|
|
|
|
MemoryLocation MemoryLocation::getForSource(const AnyMemTransferInst *MTI) {
|
|
assert(MTI->getRawSource() == MTI->getArgOperand(1));
|
|
return getForArgument(MTI, 1, nullptr);
|
|
}
|
|
|
|
MemoryLocation MemoryLocation::getForDest(const MemIntrinsic *MI) {
|
|
return getForDest(cast<AnyMemIntrinsic>(MI));
|
|
}
|
|
|
|
MemoryLocation MemoryLocation::getForDest(const AnyMemIntrinsic *MI) {
|
|
assert(MI->getRawDest() == MI->getArgOperand(0));
|
|
return getForArgument(MI, 0, nullptr);
|
|
}
|
|
|
|
std::optional<MemoryLocation>
|
|
MemoryLocation::getForDest(const CallBase *CB, const TargetLibraryInfo &TLI) {
|
|
// Check that the only possible writes are to arguments.
|
|
MemoryEffects WriteME = CB->getMemoryEffects() & MemoryEffects::writeOnly();
|
|
if (!WriteME.onlyAccessesArgPointees())
|
|
return std::nullopt;
|
|
|
|
if (CB->hasOperandBundles())
|
|
// TODO: remove implementation restriction
|
|
return std::nullopt;
|
|
|
|
Value *UsedV = nullptr;
|
|
std::optional<unsigned> UsedIdx;
|
|
for (unsigned i = 0; i < CB->arg_size(); i++) {
|
|
if (!CB->getArgOperand(i)->getType()->isPointerTy())
|
|
continue;
|
|
if (CB->onlyReadsMemory(i))
|
|
continue;
|
|
if (!UsedV) {
|
|
// First potentially writing parameter
|
|
UsedV = CB->getArgOperand(i);
|
|
UsedIdx = i;
|
|
continue;
|
|
}
|
|
UsedIdx = std::nullopt;
|
|
if (UsedV != CB->getArgOperand(i))
|
|
// Can't describe writing to two distinct locations.
|
|
// TODO: This results in an inprecision when two values derived from the
|
|
// same object are passed as arguments to the same function.
|
|
return std::nullopt;
|
|
}
|
|
if (!UsedV)
|
|
// We don't currently have a way to represent a "does not write" result
|
|
// and thus have to be conservative and return unknown.
|
|
return std::nullopt;
|
|
|
|
if (UsedIdx)
|
|
return getForArgument(CB, *UsedIdx, &TLI);
|
|
return MemoryLocation::getBeforeOrAfter(UsedV, CB->getAAMetadata());
|
|
}
|
|
|
|
// If the mask for a memory op is a get active lane mask intrinsic
|
|
// we can possibly infer the size of memory written or read
|
|
static std::optional<FixedVectorType *>
|
|
getKnownTypeFromMaskedOp(Value *Mask, VectorType *Ty) {
|
|
using namespace llvm::PatternMatch;
|
|
ConstantInt *Op0, *Op1;
|
|
if (!match(Mask, m_Intrinsic<Intrinsic::get_active_lane_mask>(
|
|
m_ConstantInt(Op0), m_ConstantInt(Op1))))
|
|
return std::nullopt;
|
|
|
|
APInt LaneMaskLo = Op0->getValue();
|
|
APInt LaneMaskHi = Op1->getValue();
|
|
if (LaneMaskHi.ule(LaneMaskLo))
|
|
return std::nullopt;
|
|
|
|
APInt NumElts = LaneMaskHi - LaneMaskLo;
|
|
if (NumElts.ugt(Ty->getElementCount().getKnownMinValue())) {
|
|
if (isa<ScalableVectorType>(Ty))
|
|
return std::nullopt;
|
|
// Unlike scalable vectors, fixed vector types are guaranteed to handle the
|
|
// KnownMinValue and can be clamped
|
|
NumElts = Ty->getElementCount().getKnownMinValue();
|
|
}
|
|
|
|
return FixedVectorType::get(Ty->getElementType(), NumElts.getZExtValue());
|
|
}
|
|
|
|
MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
|
|
unsigned ArgIdx,
|
|
const TargetLibraryInfo *TLI) {
|
|
AAMDNodes AATags = Call->getAAMetadata();
|
|
const Value *Arg = Call->getArgOperand(ArgIdx);
|
|
|
|
// We may be able to produce an exact size for known intrinsics.
|
|
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Call)) {
|
|
const DataLayout &DL = II->getDataLayout();
|
|
|
|
switch (II->getIntrinsicID()) {
|
|
default:
|
|
break;
|
|
case Intrinsic::memset:
|
|
case Intrinsic::memcpy:
|
|
case Intrinsic::memcpy_inline:
|
|
case Intrinsic::memmove:
|
|
case Intrinsic::memcpy_element_unordered_atomic:
|
|
case Intrinsic::memmove_element_unordered_atomic:
|
|
case Intrinsic::memset_element_unordered_atomic:
|
|
assert((ArgIdx == 0 || ArgIdx == 1) &&
|
|
"Invalid argument index for memory intrinsic");
|
|
if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
|
|
return MemoryLocation(Arg, LocationSize::precise(LenCI->getZExtValue()),
|
|
AATags);
|
|
return MemoryLocation::getAfter(Arg, AATags);
|
|
|
|
case Intrinsic::experimental_memset_pattern:
|
|
assert((ArgIdx == 0 || ArgIdx == 1) &&
|
|
"Invalid argument index for memory intrinsic");
|
|
if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
|
|
return MemoryLocation(
|
|
Arg,
|
|
LocationSize::precise(
|
|
LenCI->getZExtValue() *
|
|
DL.getTypeAllocSize(II->getArgOperand(1)->getType())),
|
|
AATags);
|
|
return MemoryLocation::getAfter(Arg, AATags);
|
|
|
|
case Intrinsic::lifetime_start:
|
|
case Intrinsic::lifetime_end: {
|
|
assert(ArgIdx == 0 && "Invalid argument index");
|
|
auto *AI = dyn_cast<AllocaInst>(Arg);
|
|
if (!AI)
|
|
// lifetime of poison value.
|
|
return MemoryLocation::getBeforeOrAfter(Arg);
|
|
|
|
std::optional<TypeSize> AllocSize =
|
|
AI->getAllocationSize(II->getDataLayout());
|
|
return MemoryLocation(Arg,
|
|
AllocSize ? LocationSize::precise(*AllocSize)
|
|
: LocationSize::afterPointer(),
|
|
AATags);
|
|
}
|
|
|
|
case Intrinsic::invariant_start:
|
|
assert(ArgIdx == 1 && "Invalid argument index");
|
|
return MemoryLocation(
|
|
Arg,
|
|
LocationSize::precise(
|
|
cast<ConstantInt>(II->getArgOperand(0))->getZExtValue()),
|
|
AATags);
|
|
|
|
case Intrinsic::masked_load: {
|
|
assert(ArgIdx == 0 && "Invalid argument index");
|
|
|
|
auto *Ty = cast<VectorType>(II->getType());
|
|
if (auto KnownType = getKnownTypeFromMaskedOp(II->getOperand(1), Ty))
|
|
return MemoryLocation(Arg, DL.getTypeStoreSize(*KnownType), AATags);
|
|
|
|
return MemoryLocation(
|
|
Arg, LocationSize::upperBound(DL.getTypeStoreSize(Ty)), AATags);
|
|
}
|
|
case Intrinsic::masked_store: {
|
|
assert(ArgIdx == 1 && "Invalid argument index");
|
|
|
|
auto *Ty = cast<VectorType>(II->getArgOperand(0)->getType());
|
|
if (auto KnownType = getKnownTypeFromMaskedOp(II->getOperand(2), Ty))
|
|
return MemoryLocation(Arg, DL.getTypeStoreSize(*KnownType), AATags);
|
|
|
|
return MemoryLocation(
|
|
Arg, LocationSize::upperBound(DL.getTypeStoreSize(Ty)), AATags);
|
|
}
|
|
|
|
case Intrinsic::invariant_end:
|
|
// The first argument to an invariant.end is a "descriptor" type (e.g. a
|
|
// pointer to a empty struct) which is never actually dereferenced.
|
|
if (ArgIdx == 0)
|
|
return MemoryLocation(Arg, LocationSize::precise(0), AATags);
|
|
assert(ArgIdx == 2 && "Invalid argument index");
|
|
return MemoryLocation(
|
|
Arg,
|
|
LocationSize::precise(
|
|
cast<ConstantInt>(II->getArgOperand(1))->getZExtValue()),
|
|
AATags);
|
|
|
|
case Intrinsic::arm_neon_vld1:
|
|
assert(ArgIdx == 0 && "Invalid argument index");
|
|
// LLVM's vld1 and vst1 intrinsics currently only support a single
|
|
// vector register.
|
|
return MemoryLocation(
|
|
Arg, LocationSize::precise(DL.getTypeStoreSize(II->getType())),
|
|
AATags);
|
|
|
|
case Intrinsic::arm_neon_vst1:
|
|
assert(ArgIdx == 0 && "Invalid argument index");
|
|
return MemoryLocation(Arg,
|
|
LocationSize::precise(DL.getTypeStoreSize(
|
|
II->getArgOperand(1)->getType())),
|
|
AATags);
|
|
case Intrinsic::matrix_column_major_load:
|
|
case Intrinsic::matrix_column_major_store: {
|
|
bool IsLoad = II->getIntrinsicID() == Intrinsic::matrix_column_major_load;
|
|
assert(ArgIdx == (IsLoad ? 0 : 1) && "Invalid argument index");
|
|
|
|
auto *Stride = dyn_cast<ConstantInt>(II->getArgOperand(IsLoad ? 1 : 2));
|
|
uint64_t Rows =
|
|
cast<ConstantInt>(II->getArgOperand(IsLoad ? 3 : 4))->getZExtValue();
|
|
uint64_t Cols =
|
|
cast<ConstantInt>(II->getArgOperand(IsLoad ? 4 : 5))->getZExtValue();
|
|
|
|
// The stride is dynamic, so there's nothing we can say.
|
|
if (!Stride)
|
|
return MemoryLocation(Arg, LocationSize::afterPointer(), AATags);
|
|
|
|
uint64_t ConstStride = Stride->getZExtValue();
|
|
auto *VT = cast<VectorType>(IsLoad ? II->getType()
|
|
: II->getArgOperand(0)->getType());
|
|
assert(Cols != 0 && "Matrix cannot have 0 columns");
|
|
TypeSize Size = DL.getTypeAllocSize(VT->getScalarType()) *
|
|
(ConstStride * (Cols - 1) + Rows);
|
|
|
|
// In the unstrided case, we have a precise size, ...
|
|
if (ConstStride == Rows)
|
|
return MemoryLocation(Arg, LocationSize::precise(Size), AATags);
|
|
// otherwise we merely obtain an upper bound.
|
|
return MemoryLocation(Arg, LocationSize::upperBound(Size), AATags);
|
|
}
|
|
}
|
|
|
|
assert(
|
|
!isa<AnyMemTransferInst>(II) &&
|
|
"all memory transfer intrinsics should be handled by the switch above");
|
|
}
|
|
|
|
// We can bound the aliasing properties of memset_pattern16 just as we can
|
|
// for memcpy/memset. This is particularly important because the
|
|
// LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
|
|
// whenever possible.
|
|
LibFunc F;
|
|
if (TLI && TLI->getLibFunc(*Call, F) && TLI->has(F)) {
|
|
switch (F) {
|
|
case LibFunc_strcpy:
|
|
case LibFunc_strcat:
|
|
case LibFunc_strncat:
|
|
assert((ArgIdx == 0 || ArgIdx == 1) && "Invalid argument index for str function");
|
|
return MemoryLocation::getAfter(Arg, AATags);
|
|
|
|
case LibFunc_memset_chk:
|
|
assert(ArgIdx == 0 && "Invalid argument index for memset_chk");
|
|
[[fallthrough]];
|
|
case LibFunc_memcpy_chk: {
|
|
assert((ArgIdx == 0 || ArgIdx == 1) &&
|
|
"Invalid argument index for memcpy_chk");
|
|
LocationSize Size = LocationSize::afterPointer();
|
|
if (const auto *Len = dyn_cast<ConstantInt>(Call->getArgOperand(2))) {
|
|
// memset_chk writes at most Len bytes, memcpy_chk reads/writes at most
|
|
// Len bytes. They may read/write less, if Len exceeds the specified max
|
|
// size and aborts.
|
|
Size = LocationSize::upperBound(Len->getZExtValue());
|
|
}
|
|
return MemoryLocation(Arg, Size, AATags);
|
|
}
|
|
case LibFunc_strncpy: {
|
|
assert((ArgIdx == 0 || ArgIdx == 1) &&
|
|
"Invalid argument index for strncpy");
|
|
LocationSize Size = LocationSize::afterPointer();
|
|
if (const auto *Len = dyn_cast<ConstantInt>(Call->getArgOperand(2))) {
|
|
// strncpy is guaranteed to write Len bytes, but only reads up to Len
|
|
// bytes.
|
|
Size = ArgIdx == 0 ? LocationSize::precise(Len->getZExtValue())
|
|
: LocationSize::upperBound(Len->getZExtValue());
|
|
}
|
|
return MemoryLocation(Arg, Size, AATags);
|
|
}
|
|
case LibFunc_memset_pattern16:
|
|
case LibFunc_memset_pattern4:
|
|
case LibFunc_memset_pattern8:
|
|
assert((ArgIdx == 0 || ArgIdx == 1) &&
|
|
"Invalid argument index for memset_pattern16");
|
|
if (ArgIdx == 1) {
|
|
unsigned Size = 16;
|
|
if (F == LibFunc_memset_pattern4)
|
|
Size = 4;
|
|
else if (F == LibFunc_memset_pattern8)
|
|
Size = 8;
|
|
return MemoryLocation(Arg, LocationSize::precise(Size), AATags);
|
|
}
|
|
if (const ConstantInt *LenCI =
|
|
dyn_cast<ConstantInt>(Call->getArgOperand(2)))
|
|
return MemoryLocation(Arg, LocationSize::precise(LenCI->getZExtValue()),
|
|
AATags);
|
|
return MemoryLocation::getAfter(Arg, AATags);
|
|
case LibFunc_bcmp:
|
|
case LibFunc_memcmp:
|
|
assert((ArgIdx == 0 || ArgIdx == 1) &&
|
|
"Invalid argument index for memcmp/bcmp");
|
|
if (const ConstantInt *LenCI =
|
|
dyn_cast<ConstantInt>(Call->getArgOperand(2)))
|
|
return MemoryLocation(Arg, LocationSize::precise(LenCI->getZExtValue()),
|
|
AATags);
|
|
return MemoryLocation::getAfter(Arg, AATags);
|
|
case LibFunc_memchr:
|
|
assert((ArgIdx == 0) && "Invalid argument index for memchr");
|
|
if (const ConstantInt *LenCI =
|
|
dyn_cast<ConstantInt>(Call->getArgOperand(2)))
|
|
return MemoryLocation(Arg, LocationSize::precise(LenCI->getZExtValue()),
|
|
AATags);
|
|
return MemoryLocation::getAfter(Arg, AATags);
|
|
case LibFunc_memccpy:
|
|
assert((ArgIdx == 0 || ArgIdx == 1) &&
|
|
"Invalid argument index for memccpy");
|
|
// We only know an upper bound on the number of bytes read/written.
|
|
if (const ConstantInt *LenCI =
|
|
dyn_cast<ConstantInt>(Call->getArgOperand(3)))
|
|
return MemoryLocation(
|
|
Arg, LocationSize::upperBound(LenCI->getZExtValue()), AATags);
|
|
return MemoryLocation::getAfter(Arg, AATags);
|
|
default:
|
|
break;
|
|
};
|
|
}
|
|
|
|
return MemoryLocation::getBeforeOrAfter(Call->getArgOperand(ArgIdx), AATags);
|
|
}
|