Move the logic to validate FOR users and introduce the split directly to header phi creation. It makes sense to introduce the header phi and the splice together. It also means sinking only needs to be done once, instead for each VPlan. Depends on https://github.com/llvm/llvm-project/pull/190681. PR: https://github.com/llvm/llvm-project/pull/191298
763 lines
30 KiB
C++
763 lines
30 KiB
C++
//===- VPlanUtils.cpp - VPlan-related utilities ---------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "VPlanUtils.h"
|
|
#include "VPlanAnalysis.h"
|
|
#include "VPlanCFG.h"
|
|
#include "VPlanDominatorTree.h"
|
|
#include "VPlanPatternMatch.h"
|
|
#include "llvm/ADT/TypeSwitch.h"
|
|
#include "llvm/Analysis/MemoryLocation.h"
|
|
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
|
|
#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
|
|
|
|
using namespace llvm;
|
|
using namespace llvm::VPlanPatternMatch;
|
|
using namespace llvm::SCEVPatternMatch;
|
|
|
|
bool vputils::onlyFirstLaneUsed(const VPValue *Def) {
|
|
return all_of(Def->users(),
|
|
[Def](const VPUser *U) { return U->usesFirstLaneOnly(Def); });
|
|
}
|
|
|
|
bool vputils::onlyFirstPartUsed(const VPValue *Def) {
|
|
return all_of(Def->users(),
|
|
[Def](const VPUser *U) { return U->usesFirstPartOnly(Def); });
|
|
}
|
|
|
|
bool vputils::onlyScalarValuesUsed(const VPValue *Def) {
|
|
return all_of(Def->users(),
|
|
[Def](const VPUser *U) { return U->usesScalars(Def); });
|
|
}
|
|
|
|
VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) {
|
|
if (auto *E = dyn_cast<SCEVConstant>(Expr))
|
|
return Plan.getOrAddLiveIn(E->getValue());
|
|
// Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction
|
|
// value. Otherwise the value may be defined in a loop and using it directly
|
|
// will break LCSSA form. The SCEV expansion takes care of preserving LCSSA
|
|
// form.
|
|
auto *U = dyn_cast<SCEVUnknown>(Expr);
|
|
if (U && !isa<Instruction>(U->getValue()))
|
|
return Plan.getOrAddLiveIn(U->getValue());
|
|
auto *Expanded = new VPExpandSCEVRecipe(Expr);
|
|
Plan.getEntry()->appendRecipe(Expanded);
|
|
return Expanded;
|
|
}
|
|
|
|
bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
|
|
if (isa<VPActiveLaneMaskPHIRecipe>(V))
|
|
return true;
|
|
|
|
auto IsWideCanonicalIV = [](VPValue *A) {
|
|
return isa<VPWidenCanonicalIVRecipe>(A) ||
|
|
(isa<VPWidenIntOrFpInductionRecipe>(A) &&
|
|
cast<VPWidenIntOrFpInductionRecipe>(A)->isCanonical());
|
|
};
|
|
|
|
VPValue *A, *B;
|
|
|
|
auto m_CanonicalScalarIVSteps = m_ScalarIVSteps(
|
|
m_CombineOr(m_CanonicalIV(),
|
|
m_DerivedIV(m_ZeroInt(), m_CanonicalIV(), m_One())),
|
|
m_One(), m_Specific(&Plan.getVF()));
|
|
|
|
if (match(V, m_ActiveLaneMask(m_VPValue(A), m_VPValue(B), m_One())))
|
|
return B == Plan.getTripCount() &&
|
|
(match(A, m_CanonicalScalarIVSteps) || IsWideCanonicalIV(A));
|
|
|
|
// For scalar plans, the header mask uses the scalar steps.
|
|
if (match(V, m_ICmp(m_CanonicalScalarIVSteps,
|
|
m_Specific(Plan.getBackedgeTakenCount())))) {
|
|
assert(Plan.hasScalarVFOnly() &&
|
|
"Non-scalar VF using scalar IV steps for header mask?");
|
|
return true;
|
|
}
|
|
|
|
return match(V, m_ICmp(m_VPValue(A), m_VPValue(B))) && IsWideCanonicalIV(A) &&
|
|
B == Plan.getBackedgeTakenCount();
|
|
}
|
|
|
|
/// Returns true if \p R propagates poison from any operand to its result.
|
|
static bool propagatesPoisonFromRecipeOp(const VPRecipeBase *R) {
|
|
return TypeSwitch<const VPRecipeBase *, bool>(R)
|
|
.Case<VPWidenGEPRecipe, VPWidenCastRecipe>(
|
|
[](const VPRecipeBase *) { return true; })
|
|
.Case([](const VPReplicateRecipe *Rep) {
|
|
// GEP and casts propagate poison from all operands.
|
|
unsigned Opcode = Rep->getOpcode();
|
|
return Opcode == Instruction::GetElementPtr ||
|
|
Instruction::isCast(Opcode);
|
|
})
|
|
.Default([](const VPRecipeBase *) { return false; });
|
|
}
|
|
|
|
/// Returns true if \p V being poison is guaranteed to trigger UB because it
|
|
/// propagates to the address of a memory recipe.
|
|
static bool poisonGuaranteesUB(const VPValue *V) {
|
|
SmallPtrSet<const VPValue *, 8> Visited;
|
|
SmallVector<const VPValue *, 16> Worklist;
|
|
|
|
Worklist.push_back(V);
|
|
|
|
while (!Worklist.empty()) {
|
|
const VPValue *Current = Worklist.pop_back_val();
|
|
if (!Visited.insert(Current).second)
|
|
continue;
|
|
|
|
for (VPUser *U : Current->users()) {
|
|
// Check if Current is used as an address operand for load/store.
|
|
if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U)) {
|
|
if (MemR->getAddr() == Current)
|
|
return true;
|
|
continue;
|
|
}
|
|
if (auto *Rep = dyn_cast<VPReplicateRecipe>(U)) {
|
|
unsigned Opcode = Rep->getOpcode();
|
|
if ((Opcode == Instruction::Load && Rep->getOperand(0) == Current) ||
|
|
(Opcode == Instruction::Store && Rep->getOperand(1) == Current))
|
|
return true;
|
|
}
|
|
|
|
// Check if poison propagates through this recipe to any of its users.
|
|
auto *R = cast<VPRecipeBase>(U);
|
|
for (const VPValue *Op : R->operands()) {
|
|
if (Op == Current && propagatesPoisonFromRecipeOp(R)) {
|
|
Worklist.push_back(R->getVPSingleValue());
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
const SCEV *vputils::getSCEVExprForVPValue(const VPValue *V,
|
|
PredicatedScalarEvolution &PSE,
|
|
const Loop *L) {
|
|
ScalarEvolution &SE = *PSE.getSE();
|
|
if (isa<VPIRValue, VPSymbolicValue>(V)) {
|
|
Value *LiveIn = V->getUnderlyingValue();
|
|
if (LiveIn && SE.isSCEVable(LiveIn->getType()))
|
|
return SE.getSCEV(LiveIn);
|
|
return SE.getCouldNotCompute();
|
|
}
|
|
|
|
if (auto *RV = dyn_cast<VPRegionValue>(V)) {
|
|
assert(RV == RV->getDefiningRegion()->getCanonicalIV() &&
|
|
"RegionValue must be canonical IV");
|
|
if (!L)
|
|
return SE.getCouldNotCompute();
|
|
return SE.getAddRecExpr(SE.getZero(RV->getType()), SE.getOne(RV->getType()),
|
|
L, SCEV::FlagAnyWrap);
|
|
}
|
|
|
|
// Helper to create SCEVs for binary and unary operations.
|
|
auto CreateSCEV = [&](ArrayRef<VPValue *> Ops,
|
|
function_ref<const SCEV *(ArrayRef<SCEVUse>)> CreateFn)
|
|
-> const SCEV * {
|
|
SmallVector<SCEVUse, 2> SCEVOps;
|
|
for (VPValue *Op : Ops) {
|
|
const SCEV *S = getSCEVExprForVPValue(Op, PSE, L);
|
|
if (isa<SCEVCouldNotCompute>(S))
|
|
return SE.getCouldNotCompute();
|
|
SCEVOps.push_back(S);
|
|
}
|
|
return CreateFn(SCEVOps);
|
|
};
|
|
|
|
VPValue *LHSVal, *RHSVal;
|
|
if (match(V, m_Add(m_VPValue(LHSVal), m_VPValue(RHSVal))))
|
|
return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
|
|
return SE.getAddExpr(Ops[0], Ops[1], SCEV::FlagAnyWrap, 0);
|
|
});
|
|
if (match(V, m_Sub(m_VPValue(LHSVal), m_VPValue(RHSVal))))
|
|
return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
|
|
return SE.getMinusSCEV(Ops[0], Ops[1], SCEV::FlagAnyWrap, 0);
|
|
});
|
|
if (match(V, m_Not(m_VPValue(LHSVal)))) {
|
|
// not X = xor X, -1 = -1 - X
|
|
return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
|
|
return SE.getMinusSCEV(SE.getMinusOne(Ops[0]->getType()), Ops[0]);
|
|
});
|
|
}
|
|
if (match(V, m_Mul(m_VPValue(LHSVal), m_VPValue(RHSVal))))
|
|
return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
|
|
return SE.getMulExpr(Ops[0], Ops[1], SCEV::FlagAnyWrap, 0);
|
|
});
|
|
if (match(V,
|
|
m_Binary<Instruction::UDiv>(m_VPValue(LHSVal), m_VPValue(RHSVal))))
|
|
return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
|
|
return SE.getUDivExpr(Ops[0], Ops[1]);
|
|
});
|
|
// Handle AND with constant mask: x & (2^n - 1) can be represented as x % 2^n.
|
|
const APInt *Mask;
|
|
if (match(V, m_c_BinaryAnd(m_VPValue(LHSVal), m_APInt(Mask))) &&
|
|
(*Mask + 1).isPowerOf2())
|
|
return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
|
|
return SE.getURemExpr(Ops[0], SE.getConstant(*Mask + 1));
|
|
});
|
|
if (match(V, m_Trunc(m_VPValue(LHSVal)))) {
|
|
const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan();
|
|
Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V);
|
|
return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
|
|
return SE.getTruncateExpr(Ops[0], DestTy);
|
|
});
|
|
}
|
|
if (match(V, m_ZExt(m_VPValue(LHSVal)))) {
|
|
const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan();
|
|
Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V);
|
|
return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
|
|
return SE.getZeroExtendExpr(Ops[0], DestTy);
|
|
});
|
|
}
|
|
if (match(V, m_SExt(m_VPValue(LHSVal)))) {
|
|
const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan();
|
|
Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V);
|
|
|
|
// Mirror SCEV's createSCEV handling for sext(sub nsw): push sign extension
|
|
// onto the operands before computing the subtraction.
|
|
VPValue *SubLHS, *SubRHS;
|
|
auto *SubR = dyn_cast<VPRecipeWithIRFlags>(LHSVal);
|
|
if (match(LHSVal, m_Sub(m_VPValue(SubLHS), m_VPValue(SubRHS))) && SubR &&
|
|
SubR->hasNoSignedWrap() && poisonGuaranteesUB(LHSVal)) {
|
|
const SCEV *V1 = getSCEVExprForVPValue(SubLHS, PSE, L);
|
|
const SCEV *V2 = getSCEVExprForVPValue(SubRHS, PSE, L);
|
|
if (!isa<SCEVCouldNotCompute>(V1) && !isa<SCEVCouldNotCompute>(V2))
|
|
return SE.getMinusSCEV(SE.getSignExtendExpr(V1, DestTy),
|
|
SE.getSignExtendExpr(V2, DestTy), SCEV::FlagNSW);
|
|
}
|
|
|
|
return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
|
|
return SE.getSignExtendExpr(Ops[0], DestTy);
|
|
});
|
|
}
|
|
if (match(V,
|
|
m_Intrinsic<Intrinsic::umax>(m_VPValue(LHSVal), m_VPValue(RHSVal))))
|
|
return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
|
|
return SE.getUMaxExpr(Ops[0], Ops[1]);
|
|
});
|
|
if (match(V,
|
|
m_Intrinsic<Intrinsic::smax>(m_VPValue(LHSVal), m_VPValue(RHSVal))))
|
|
return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
|
|
return SE.getSMaxExpr(Ops[0], Ops[1]);
|
|
});
|
|
if (match(V,
|
|
m_Intrinsic<Intrinsic::umin>(m_VPValue(LHSVal), m_VPValue(RHSVal))))
|
|
return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
|
|
return SE.getUMinExpr(Ops[0], Ops[1]);
|
|
});
|
|
if (match(V,
|
|
m_Intrinsic<Intrinsic::smin>(m_VPValue(LHSVal), m_VPValue(RHSVal))))
|
|
return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
|
|
return SE.getSMinExpr(Ops[0], Ops[1]);
|
|
});
|
|
|
|
ArrayRef<VPValue *> Ops;
|
|
Type *SourceElementType;
|
|
if (match(V, m_GetElementPtr(SourceElementType, Ops))) {
|
|
const SCEV *GEPExpr = CreateSCEV(Ops, [&](ArrayRef<SCEVUse> Ops) {
|
|
return SE.getGEPExpr(Ops.front(), Ops.drop_front(), SourceElementType);
|
|
});
|
|
return PSE.getPredicatedSCEV(GEPExpr);
|
|
}
|
|
|
|
// TODO: Support constructing SCEVs for more recipes as needed.
|
|
const VPRecipeBase *DefR = V->getDefiningRecipe();
|
|
const SCEV *Expr =
|
|
TypeSwitch<const VPRecipeBase *, const SCEV *>(DefR)
|
|
.Case([](const VPExpandSCEVRecipe *R) { return R->getSCEV(); })
|
|
.Case([&SE, &PSE, L](const VPWidenIntOrFpInductionRecipe *R) {
|
|
const SCEV *Step = getSCEVExprForVPValue(R->getStepValue(), PSE, L);
|
|
if (!L || isa<SCEVCouldNotCompute>(Step))
|
|
return SE.getCouldNotCompute();
|
|
const SCEV *Start =
|
|
getSCEVExprForVPValue(R->getStartValue(), PSE, L);
|
|
const SCEV *AddRec =
|
|
SE.getAddRecExpr(Start, Step, L, SCEV::FlagAnyWrap);
|
|
if (R->getTruncInst())
|
|
return SE.getTruncateExpr(AddRec, R->getScalarType());
|
|
return AddRec;
|
|
})
|
|
.Case([&SE, &PSE, L](const VPWidenPointerInductionRecipe *R) {
|
|
const SCEV *Start =
|
|
getSCEVExprForVPValue(R->getStartValue(), PSE, L);
|
|
if (!L || isa<SCEVCouldNotCompute>(Start))
|
|
return SE.getCouldNotCompute();
|
|
const SCEV *Step = getSCEVExprForVPValue(R->getStepValue(), PSE, L);
|
|
if (isa<SCEVCouldNotCompute>(Step))
|
|
return SE.getCouldNotCompute();
|
|
return SE.getAddRecExpr(Start, Step, L, SCEV::FlagAnyWrap);
|
|
})
|
|
.Case([&SE, &PSE, L](const VPDerivedIVRecipe *R) {
|
|
const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), PSE, L);
|
|
const SCEV *IV = getSCEVExprForVPValue(R->getOperand(1), PSE, L);
|
|
const SCEV *Scale = getSCEVExprForVPValue(R->getOperand(2), PSE, L);
|
|
if (any_of(ArrayRef({Start, IV, Scale}),
|
|
IsaPred<SCEVCouldNotCompute>))
|
|
return SE.getCouldNotCompute();
|
|
|
|
return SE.getAddExpr(
|
|
SE.getTruncateOrSignExtend(Start, IV->getType()),
|
|
SE.getMulExpr(
|
|
IV, SE.getTruncateOrSignExtend(Scale, IV->getType())));
|
|
})
|
|
.Case([&SE, &PSE, L](const VPScalarIVStepsRecipe *R) {
|
|
const SCEV *IV = getSCEVExprForVPValue(R->getOperand(0), PSE, L);
|
|
const SCEV *Step = getSCEVExprForVPValue(R->getOperand(1), PSE, L);
|
|
if (isa<SCEVCouldNotCompute>(IV) || !isa<SCEVConstant>(Step))
|
|
return SE.getCouldNotCompute();
|
|
return SE.getTruncateOrSignExtend(IV, Step->getType());
|
|
})
|
|
.Default(
|
|
[&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); });
|
|
|
|
return PSE.getPredicatedSCEV(Expr);
|
|
}
|
|
|
|
bool vputils::isAddressSCEVForCost(const SCEV *Addr, ScalarEvolution &SE,
|
|
const Loop *L) {
|
|
// If address is an SCEVAddExpr, we require that all operands must be either
|
|
// be invariant or a (possibly sign-extend) affine AddRec.
|
|
if (auto *PtrAdd = dyn_cast<SCEVAddExpr>(Addr)) {
|
|
return all_of(PtrAdd->operands(), [&SE, L](const SCEV *Op) {
|
|
return SE.isLoopInvariant(Op, L) ||
|
|
match(Op, m_scev_SExt(m_scev_AffineAddRec(m_SCEV(), m_SCEV()))) ||
|
|
match(Op, m_scev_AffineAddRec(m_SCEV(), m_SCEV()));
|
|
});
|
|
}
|
|
|
|
// Otherwise, check if address is loop invariant or an affine add recurrence.
|
|
return SE.isLoopInvariant(Addr, L) ||
|
|
match(Addr, m_scev_AffineAddRec(m_SCEV(), m_SCEV()));
|
|
}
|
|
|
|
/// Returns true if \p Opcode preserves uniformity, i.e., if all operands are
|
|
/// uniform, the result will also be uniform.
|
|
static bool preservesUniformity(unsigned Opcode) {
|
|
if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode))
|
|
return true;
|
|
switch (Opcode) {
|
|
case Instruction::Freeze:
|
|
case Instruction::GetElementPtr:
|
|
case Instruction::ICmp:
|
|
case Instruction::FCmp:
|
|
case Instruction::Select:
|
|
case VPInstruction::Not:
|
|
case VPInstruction::Broadcast:
|
|
case VPInstruction::MaskedCond:
|
|
case VPInstruction::PtrAdd:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool vputils::isSingleScalar(const VPValue *VPV) {
|
|
// Live-in, symbolic and region-values represent single-scalar values.
|
|
if (isa<VPIRValue, VPSymbolicValue, VPRegionValue>(VPV))
|
|
return true;
|
|
|
|
if (auto *Rep = dyn_cast<VPReplicateRecipe>(VPV)) {
|
|
const VPRegionBlock *RegionOfR = Rep->getRegion();
|
|
// Don't consider recipes in replicate regions as uniform yet; their first
|
|
// lane cannot be accessed when executing the replicate region for other
|
|
// lanes.
|
|
if (RegionOfR && RegionOfR->isReplicator())
|
|
return false;
|
|
return Rep->isSingleScalar() || (preservesUniformity(Rep->getOpcode()) &&
|
|
all_of(Rep->operands(), isSingleScalar));
|
|
}
|
|
if (isa<VPWidenGEPRecipe, VPBlendRecipe>(VPV))
|
|
return all_of(VPV->getDefiningRecipe()->operands(), isSingleScalar);
|
|
if (auto *WidenR = dyn_cast<VPWidenRecipe>(VPV)) {
|
|
return preservesUniformity(WidenR->getOpcode()) &&
|
|
all_of(WidenR->operands(), isSingleScalar);
|
|
}
|
|
if (auto *VPI = dyn_cast<VPInstruction>(VPV))
|
|
return VPI->isSingleScalar() || VPI->isVectorToScalar() ||
|
|
(preservesUniformity(VPI->getOpcode()) &&
|
|
all_of(VPI->operands(), isSingleScalar));
|
|
if (auto *RR = dyn_cast<VPReductionRecipe>(VPV))
|
|
return !RR->isPartialReduction();
|
|
if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe, VPDerivedIVRecipe>(
|
|
VPV))
|
|
return true;
|
|
if (auto *Expr = dyn_cast<VPExpressionRecipe>(VPV))
|
|
return Expr->isSingleScalar();
|
|
|
|
// VPExpandSCEVRecipes must be placed in the entry and are always uniform.
|
|
return isa<VPExpandSCEVRecipe>(VPV);
|
|
}
|
|
|
|
bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) {
|
|
// Live-ins and region values are uniform.
|
|
if (isa<VPIRValue, VPSymbolicValue, VPRegionValue>(V))
|
|
return true;
|
|
|
|
VPRecipeBase *R = V->getDefiningRecipe();
|
|
VPBasicBlock *VPBB = R ? R->getParent() : nullptr;
|
|
VPlan *Plan = VPBB ? VPBB->getPlan() : nullptr;
|
|
if (VPBB) {
|
|
if ((VPBB == Plan->getVectorPreheader() || VPBB == Plan->getEntry())) {
|
|
if (match(V->getDefiningRecipe(),
|
|
m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>()))
|
|
return false;
|
|
return all_of(R->operands(), isUniformAcrossVFsAndUFs);
|
|
}
|
|
}
|
|
|
|
return TypeSwitch<const VPRecipeBase *, bool>(R)
|
|
.Case([](const VPDerivedIVRecipe *R) { return true; })
|
|
.Case([](const VPReplicateRecipe *R) {
|
|
// Be conservative about side-effects, except for the
|
|
// known-side-effecting assumes and stores, which we know will be
|
|
// uniform.
|
|
return R->isSingleScalar() &&
|
|
(!R->mayHaveSideEffects() ||
|
|
isa<AssumeInst, StoreInst>(R->getUnderlyingInstr())) &&
|
|
all_of(R->operands(), isUniformAcrossVFsAndUFs);
|
|
})
|
|
.Case([](const VPWidenRecipe *R) {
|
|
return preservesUniformity(R->getOpcode()) &&
|
|
all_of(R->operands(), isUniformAcrossVFsAndUFs);
|
|
})
|
|
.Case([](const VPInstruction *VPI) {
|
|
return preservesUniformity(VPI->getOpcode()) &&
|
|
all_of(VPI->operands(), isUniformAcrossVFsAndUFs);
|
|
})
|
|
.Case([](const VPWidenCastRecipe *R) {
|
|
// A cast is uniform according to its operand.
|
|
return isUniformAcrossVFsAndUFs(R->getOperand(0));
|
|
})
|
|
.Default([](const VPRecipeBase *) { // A value is considered non-uniform
|
|
// unless proven otherwise.
|
|
return false;
|
|
});
|
|
}
|
|
|
|
VPBasicBlock *vputils::getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT) {
|
|
auto DepthFirst = vp_depth_first_shallow(Plan.getEntry());
|
|
auto I = find_if(DepthFirst, [&VPDT](VPBlockBase *VPB) {
|
|
return VPBlockUtils::isHeader(VPB, VPDT);
|
|
});
|
|
return I == DepthFirst.end() ? nullptr : cast<VPBasicBlock>(*I);
|
|
}
|
|
|
|
unsigned vputils::getVFScaleFactor(VPRecipeBase *R) {
|
|
if (!R)
|
|
return 1;
|
|
if (auto *RR = dyn_cast<VPReductionPHIRecipe>(R))
|
|
return RR->getVFScaleFactor();
|
|
if (auto *RR = dyn_cast<VPReductionRecipe>(R))
|
|
return RR->getVFScaleFactor();
|
|
if (auto *ER = dyn_cast<VPExpressionRecipe>(R))
|
|
return ER->getVFScaleFactor();
|
|
assert(
|
|
(!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() !=
|
|
VPInstruction::ReductionStartVector) &&
|
|
"getting scaling factor of reduction-start-vector not implemented yet");
|
|
return 1;
|
|
}
|
|
|
|
bool vputils::cannotHoistOrSinkRecipe(const VPRecipeBase &R, bool Sinking) {
|
|
// Assumes don't alias anything or throw; as long as they're guaranteed to
|
|
// execute, they're safe to hoist. They should however not be sunk, as it
|
|
// would destroy information.
|
|
if (match(&R, m_Intrinsic<Intrinsic::assume>()))
|
|
return Sinking;
|
|
// TODO: Relax checks in the future, e.g. we could also hoist reads, if their
|
|
// memory location is not modified in the vector loop.
|
|
if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
|
|
return true;
|
|
// Allocas cannot be hoisted.
|
|
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
|
|
return RepR && RepR->getOpcode() == Instruction::Alloca;
|
|
}
|
|
|
|
std::optional<VPValue *>
|
|
vputils::getRecipesForUncountableExit(SmallVectorImpl<VPInstruction *> &Recipes,
|
|
SmallVectorImpl<VPInstruction *> &GEPs,
|
|
VPBasicBlock *LatchVPBB) {
|
|
// Given a plain CFG VPlan loop with countable latch exiting block
|
|
// \p LatchVPBB, we're looking to match the recipes contributing to the
|
|
// uncountable exit condition comparison (here, vp<%4>) back to either
|
|
// live-ins or the address nodes for the load used as part of the uncountable
|
|
// exit comparison so that we can either move them within the loop, or copy
|
|
// them to the preheader depending on the chosen method for dealing with
|
|
// stores in uncountable exit loops.
|
|
//
|
|
// Currently, the address of the load is restricted to a GEP with 2 operands
|
|
// and a live-in base address. This constraint may be relaxed later.
|
|
//
|
|
// VPlan ' for UF>=1' {
|
|
// Live-in vp<%0> = VF * UF
|
|
// Live-in vp<%1> = vector-trip-count
|
|
// Live-in ir<20> = original trip-count
|
|
//
|
|
// ir-bb<entry>:
|
|
// Successor(s): scalar.ph, vector.ph
|
|
//
|
|
// vector.ph:
|
|
// Successor(s): for.body
|
|
//
|
|
// for.body:
|
|
// EMIT vp<%2> = phi ir<0>, vp<%index.next>
|
|
// EMIT-SCALAR ir<%iv> = phi [ ir<0>, vector.ph ], [ ir<%iv.next>, for.inc ]
|
|
// EMIT ir<%uncountable.addr> = getelementptr inbounds nuw ir<%pred>,ir<%iv>
|
|
// EMIT ir<%uncountable.val> = load ir<%uncountable.addr>
|
|
// EMIT ir<%uncountable.cond> = icmp sgt ir<%uncountable.val>, ir<500>
|
|
// EMIT vp<%3> = masked-cond ir<%uncountable.cond>
|
|
// Successor(s): for.inc
|
|
//
|
|
// for.inc:
|
|
// EMIT ir<%iv.next> = add nuw nsw ir<%iv>, ir<1>
|
|
// EMIT ir<%countable.cond> = icmp eq ir<%iv.next>, ir<20>
|
|
// EMIT vp<%index.next> = add nuw vp<%2>, vp<%0>
|
|
// EMIT vp<%4> = any-of ir<%3>
|
|
// EMIT vp<%5> = icmp eq vp<%index.next>, vp<%1>
|
|
// EMIT vp<%6> = or vp<%4>, vp<%5>
|
|
// EMIT branch-on-cond vp<%6>
|
|
// Successor(s): middle.block, for.body
|
|
//
|
|
// middle.block:
|
|
// Successor(s): ir-bb<exit>, scalar.ph
|
|
//
|
|
// ir-bb<exit>:
|
|
// No successors
|
|
//
|
|
// scalar.ph:
|
|
// }
|
|
|
|
// Find the uncountable loop exit condition.
|
|
VPValue *UncountableCondition = nullptr;
|
|
if (!match(LatchVPBB->getTerminator(),
|
|
m_BranchOnCond(m_c_BinaryOr(
|
|
m_AnyOf(m_VPValue(UncountableCondition)), m_VPValue()))))
|
|
return std::nullopt;
|
|
|
|
SmallVector<VPValue *, 4> Worklist;
|
|
Worklist.push_back(UncountableCondition);
|
|
while (!Worklist.empty()) {
|
|
VPValue *V = Worklist.pop_back_val();
|
|
|
|
// Any value defined outside the loop does not need to be copied.
|
|
if (V->isDefinedOutsideLoopRegions())
|
|
continue;
|
|
|
|
// FIXME: Remove the single user restriction; it's here because we're
|
|
// starting with the simplest set of loops we can, and multiple
|
|
// users means needing to add PHI nodes in the transform.
|
|
if (V->getNumUsers() > 1)
|
|
return std::nullopt;
|
|
|
|
VPValue *Op1, *Op2;
|
|
// Walk back through recipes until we find at least one load from memory.
|
|
if (match(V, m_ICmp(m_VPValue(Op1), m_VPValue(Op2)))) {
|
|
Worklist.push_back(Op1);
|
|
Worklist.push_back(Op2);
|
|
Recipes.push_back(cast<VPInstruction>(V->getDefiningRecipe()));
|
|
} else if (match(V, m_VPInstruction<Instruction::Load>(m_VPValue(Op1)))) {
|
|
VPRecipeBase *GepR = Op1->getDefiningRecipe();
|
|
// Only matching base + single offset term for now.
|
|
if (GepR->getNumOperands() != 2)
|
|
return std::nullopt;
|
|
// Matching a GEP with a loop-invariant base ptr.
|
|
if (!match(GepR, m_VPInstruction<Instruction::GetElementPtr>(
|
|
m_LiveIn(), m_VPValue())))
|
|
return std::nullopt;
|
|
Recipes.push_back(cast<VPInstruction>(V->getDefiningRecipe()));
|
|
Recipes.push_back(cast<VPInstruction>(GepR));
|
|
GEPs.push_back(cast<VPInstruction>(GepR));
|
|
} else if (match(V, m_VPInstruction<VPInstruction::MaskedCond>(
|
|
m_VPValue(Op1)))) {
|
|
Worklist.push_back(Op1);
|
|
Recipes.push_back(cast<VPInstruction>(V->getDefiningRecipe()));
|
|
} else
|
|
return std::nullopt;
|
|
}
|
|
|
|
// If we couldn't match anything, don't return the condition. It may be
|
|
// defined outside the loop.
|
|
if (Recipes.empty() || GEPs.empty())
|
|
return std::nullopt;
|
|
|
|
return UncountableCondition;
|
|
}
|
|
|
|
VPSingleDefRecipe *vputils::findHeaderMask(VPlan &Plan) {
|
|
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
|
|
SmallVector<VPValue *> WideCanonicalIVs;
|
|
auto *WideCanonicalIV = vputils::findUserOf<VPWidenCanonicalIVRecipe>(
|
|
LoopRegion->getCanonicalIV());
|
|
assert(count_if(LoopRegion->getCanonicalIV()->users(),
|
|
IsaPred<VPWidenCanonicalIVRecipe>) <= 1 &&
|
|
"Must have at most one VPWideCanonicalIVRecipe");
|
|
if (WideCanonicalIV)
|
|
WideCanonicalIVs.push_back(WideCanonicalIV);
|
|
|
|
// Also include VPWidenIntOrFpInductionRecipes that represent a widened
|
|
// version of the canonical induction.
|
|
VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
|
|
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
|
|
auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
|
|
if (WidenOriginalIV && WidenOriginalIV->isCanonical())
|
|
WideCanonicalIVs.push_back(WidenOriginalIV);
|
|
}
|
|
|
|
// Walk users of wide canonical IVs and find the single compare of the form
|
|
// (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
|
|
VPSingleDefRecipe *HeaderMask = nullptr;
|
|
for (auto *Wide : WideCanonicalIVs) {
|
|
for (VPUser *U : Wide->users()) {
|
|
auto *VPI = dyn_cast<VPInstruction>(U);
|
|
if (!VPI || !vputils::isHeaderMask(VPI, Plan))
|
|
continue;
|
|
|
|
assert(VPI->getOperand(0) == Wide &&
|
|
"WidenCanonicalIV must be the first operand of the compare");
|
|
assert(!HeaderMask && "Multiple header masks found?");
|
|
HeaderMask = VPI;
|
|
}
|
|
}
|
|
return HeaderMask;
|
|
}
|
|
|
|
SmallVector<VPBasicBlock *>
|
|
VPBlockUtils::blocksInSingleSuccessorChainBetween(VPBasicBlock *FirstBB,
|
|
VPBasicBlock *LastBB) {
|
|
assert(FirstBB->getParent() == LastBB->getParent() &&
|
|
"FirstBB and LastBB from different regions");
|
|
#ifndef NDEBUG
|
|
bool InSingleSuccChain = false;
|
|
for (VPBlockBase *Succ = FirstBB; Succ; Succ = Succ->getSingleSuccessor())
|
|
InSingleSuccChain |= (Succ == LastBB);
|
|
assert(InSingleSuccChain &&
|
|
"LastBB unreachable from FirstBB in single-successor chain");
|
|
#endif
|
|
auto Blocks = to_vector(
|
|
VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_deep(FirstBB)));
|
|
auto *LastIt = find(Blocks, LastBB);
|
|
assert(LastIt != Blocks.end() &&
|
|
"LastBB unreachable from FirstBB in depth-first traversal");
|
|
Blocks.erase(std::next(LastIt), Blocks.end());
|
|
return Blocks;
|
|
}
|
|
|
|
bool VPBlockUtils::isHeader(const VPBlockBase *VPB,
|
|
const VPDominatorTree &VPDT) {
|
|
auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
|
|
if (!VPBB)
|
|
return false;
|
|
|
|
// If VPBB is in a region R, VPBB is a loop header if R is a loop region with
|
|
// VPBB as its entry, i.e., free of predecessors.
|
|
if (auto *R = VPBB->getParent())
|
|
return !R->isReplicator() && !VPBB->hasPredecessors();
|
|
|
|
// A header dominates its second predecessor (the latch), with the other
|
|
// predecessor being the preheader
|
|
return VPB->getPredecessors().size() == 2 &&
|
|
VPDT.dominates(VPB, VPB->getPredecessors()[1]);
|
|
}
|
|
|
|
bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
|
|
const VPDominatorTree &VPDT) {
|
|
// A latch has a header as its last successor, with its other successors
|
|
// leaving the loop. A preheader OTOH has a header as its first (and only)
|
|
// successor.
|
|
return VPB->getNumSuccessors() >= 2 &&
|
|
VPBlockUtils::isHeader(VPB->getSuccessors().back(), VPDT);
|
|
}
|
|
|
|
std::optional<MemoryLocation>
|
|
vputils::getMemoryLocation(const VPRecipeBase &R) {
|
|
auto *M = dyn_cast<VPIRMetadata>(&R);
|
|
if (!M)
|
|
return std::nullopt;
|
|
MemoryLocation Loc;
|
|
// Populate noalias metadata from VPIRMetadata.
|
|
if (MDNode *NoAliasMD = M->getMetadata(LLVMContext::MD_noalias))
|
|
Loc.AATags.NoAlias = NoAliasMD;
|
|
if (MDNode *AliasScopeMD = M->getMetadata(LLVMContext::MD_alias_scope))
|
|
Loc.AATags.Scope = AliasScopeMD;
|
|
return Loc;
|
|
}
|
|
|
|
VPInstruction *vputils::findCanonicalIVIncrement(VPlan &Plan) {
|
|
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
|
|
VPRegionValue *CanIV = LoopRegion->getCanonicalIV();
|
|
assert(CanIV && "Expected loop region to have a canonical IV");
|
|
|
|
VPSymbolicValue &VFxUF = Plan.getVFxUF();
|
|
|
|
// Check if \p Step matches the expected increment step, accounting for
|
|
// materialization of VFxUF and UF.
|
|
auto IsIncrementStep = [&](VPValue *Step) -> bool {
|
|
if (!VFxUF.isMaterialized())
|
|
return Step == &VFxUF;
|
|
|
|
VPSymbolicValue &UF = Plan.getUF();
|
|
if (!UF.isMaterialized())
|
|
return Step == &UF;
|
|
|
|
unsigned ConcreteUF = Plan.getConcreteUF();
|
|
// Fixed VF: step is just the concrete UF.
|
|
if (match(Step, m_SpecificInt(ConcreteUF)))
|
|
return true;
|
|
|
|
// Scalable VF: step involves VScale.
|
|
if (ConcreteUF == 1)
|
|
return match(Step, m_VPInstruction<VPInstruction::VScale>());
|
|
if (match(Step, m_c_Mul(m_SpecificInt(ConcreteUF),
|
|
m_VPInstruction<VPInstruction::VScale>())))
|
|
return true;
|
|
// mul(VScale, ConcreteUF) may have been simplified to
|
|
// shl(VScale, log2(ConcreteUF)) when ConcreteUF is a power of 2.
|
|
return isPowerOf2_32(ConcreteUF) &&
|
|
match(Step, m_Binary<Instruction::Shl>(
|
|
m_VPInstruction<VPInstruction::VScale>(),
|
|
m_SpecificInt(Log2_32(ConcreteUF))));
|
|
};
|
|
|
|
VPInstruction *Increment = nullptr;
|
|
for (VPUser *U : CanIV->users()) {
|
|
VPValue *Step;
|
|
if (match(U, m_c_Add(m_Specific(CanIV), m_VPValue(Step))) &&
|
|
IsIncrementStep(Step)) {
|
|
assert(!Increment && "There must be a unique increment");
|
|
Increment = cast<VPInstruction>(U);
|
|
}
|
|
}
|
|
|
|
assert((!VFxUF.isMaterialized() || Increment) &&
|
|
"After materializing VFxUF, an increment must exist");
|
|
assert((!Increment ||
|
|
LoopRegion->hasCanonicalIVNUW() == Increment->hasNoUnsignedWrap()) &&
|
|
"NUW flag in region and increment must match");
|
|
return Increment;
|
|
}
|
|
|
|
/// Find the ComputeReductionResult recipe for \p PhiR, looking through selects
|
|
/// inserted for predicated reductions or tail folding.
|
|
VPInstruction *vputils::findComputeReductionResult(VPReductionPHIRecipe *PhiR) {
|
|
VPValue *BackedgeVal = PhiR->getBackedgeValue();
|
|
if (auto *Res = vputils::findUserOf<VPInstruction::ComputeReductionResult>(
|
|
BackedgeVal))
|
|
return Res;
|
|
|
|
// Look through selects inserted for tail folding or predicated reductions.
|
|
VPRecipeBase *SelR = vputils::findUserOf(
|
|
BackedgeVal, m_Select(m_VPValue(), m_VPValue(), m_VPValue()));
|
|
if (!SelR)
|
|
return nullptr;
|
|
return vputils::findUserOf<VPInstruction::ComputeReductionResult>(
|
|
cast<VPSingleDefRecipe>(SelR));
|
|
}
|