//===- VPlanUtils.cpp - VPlan-related utilities ---------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "VPlanUtils.h" #include "VPlanAnalysis.h" #include "VPlanCFG.h" #include "VPlanDominatorTree.h" #include "VPlanPatternMatch.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolutionPatternMatch.h" using namespace llvm; using namespace llvm::VPlanPatternMatch; using namespace llvm::SCEVPatternMatch; bool vputils::onlyFirstLaneUsed(const VPValue *Def) { return all_of(Def->users(), [Def](const VPUser *U) { return U->usesFirstLaneOnly(Def); }); } bool vputils::onlyFirstPartUsed(const VPValue *Def) { return all_of(Def->users(), [Def](const VPUser *U) { return U->usesFirstPartOnly(Def); }); } bool vputils::onlyScalarValuesUsed(const VPValue *Def) { return all_of(Def->users(), [Def](const VPUser *U) { return U->usesScalars(Def); }); } VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) { if (auto *E = dyn_cast(Expr)) return Plan.getOrAddLiveIn(E->getValue()); // Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction // value. Otherwise the value may be defined in a loop and using it directly // will break LCSSA form. The SCEV expansion takes care of preserving LCSSA // form. auto *U = dyn_cast(Expr); if (U && !isa(U->getValue())) return Plan.getOrAddLiveIn(U->getValue()); auto *Expanded = new VPExpandSCEVRecipe(Expr); Plan.getEntry()->appendRecipe(Expanded); return Expanded; } bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) { if (isa(V)) return true; auto IsWideCanonicalIV = [](VPValue *A) { return isa(A) || (isa(A) && cast(A)->isCanonical()); }; VPValue *A, *B; auto m_CanonicalScalarIVSteps = m_ScalarIVSteps( m_CombineOr(m_CanonicalIV(), m_DerivedIV(m_ZeroInt(), m_CanonicalIV(), m_One())), m_One(), m_Specific(&Plan.getVF())); if (match(V, m_ActiveLaneMask(m_VPValue(A), m_VPValue(B), m_One()))) return B == Plan.getTripCount() && (match(A, m_CanonicalScalarIVSteps) || IsWideCanonicalIV(A)); // For scalar plans, the header mask uses the scalar steps. if (match(V, m_ICmp(m_CanonicalScalarIVSteps, m_Specific(Plan.getBackedgeTakenCount())))) { assert(Plan.hasScalarVFOnly() && "Non-scalar VF using scalar IV steps for header mask?"); return true; } return match(V, m_ICmp(m_VPValue(A), m_VPValue(B))) && IsWideCanonicalIV(A) && B == Plan.getBackedgeTakenCount(); } /// Returns true if \p R propagates poison from any operand to its result. static bool propagatesPoisonFromRecipeOp(const VPRecipeBase *R) { return TypeSwitch(R) .Case( [](const VPRecipeBase *) { return true; }) .Case([](const VPReplicateRecipe *Rep) { // GEP and casts propagate poison from all operands. unsigned Opcode = Rep->getOpcode(); return Opcode == Instruction::GetElementPtr || Instruction::isCast(Opcode); }) .Default([](const VPRecipeBase *) { return false; }); } /// Returns true if \p V being poison is guaranteed to trigger UB because it /// propagates to the address of a memory recipe. static bool poisonGuaranteesUB(const VPValue *V) { SmallPtrSet Visited; SmallVector Worklist; Worklist.push_back(V); while (!Worklist.empty()) { const VPValue *Current = Worklist.pop_back_val(); if (!Visited.insert(Current).second) continue; for (VPUser *U : Current->users()) { // Check if Current is used as an address operand for load/store. if (auto *MemR = dyn_cast(U)) { if (MemR->getAddr() == Current) return true; continue; } if (auto *Rep = dyn_cast(U)) { unsigned Opcode = Rep->getOpcode(); if ((Opcode == Instruction::Load && Rep->getOperand(0) == Current) || (Opcode == Instruction::Store && Rep->getOperand(1) == Current)) return true; } // Check if poison propagates through this recipe to any of its users. auto *R = cast(U); for (const VPValue *Op : R->operands()) { if (Op == Current && propagatesPoisonFromRecipeOp(R)) { Worklist.push_back(R->getVPSingleValue()); break; } } } } return false; } const SCEV *vputils::getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L) { ScalarEvolution &SE = *PSE.getSE(); if (isa(V)) { Value *LiveIn = V->getUnderlyingValue(); if (LiveIn && SE.isSCEVable(LiveIn->getType())) return SE.getSCEV(LiveIn); return SE.getCouldNotCompute(); } if (auto *RV = dyn_cast(V)) { assert(RV == RV->getDefiningRegion()->getCanonicalIV() && "RegionValue must be canonical IV"); if (!L) return SE.getCouldNotCompute(); return SE.getAddRecExpr(SE.getZero(RV->getType()), SE.getOne(RV->getType()), L, SCEV::FlagAnyWrap); } // Helper to create SCEVs for binary and unary operations. auto CreateSCEV = [&](ArrayRef Ops, function_ref)> CreateFn) -> const SCEV * { SmallVector SCEVOps; for (VPValue *Op : Ops) { const SCEV *S = getSCEVExprForVPValue(Op, PSE, L); if (isa(S)) return SE.getCouldNotCompute(); SCEVOps.push_back(S); } return CreateFn(SCEVOps); }; VPValue *LHSVal, *RHSVal; if (match(V, m_Add(m_VPValue(LHSVal), m_VPValue(RHSVal)))) return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef Ops) { return SE.getAddExpr(Ops[0], Ops[1], SCEV::FlagAnyWrap, 0); }); if (match(V, m_Sub(m_VPValue(LHSVal), m_VPValue(RHSVal)))) return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef Ops) { return SE.getMinusSCEV(Ops[0], Ops[1], SCEV::FlagAnyWrap, 0); }); if (match(V, m_Not(m_VPValue(LHSVal)))) { // not X = xor X, -1 = -1 - X return CreateSCEV({LHSVal}, [&](ArrayRef Ops) { return SE.getMinusSCEV(SE.getMinusOne(Ops[0]->getType()), Ops[0]); }); } if (match(V, m_Mul(m_VPValue(LHSVal), m_VPValue(RHSVal)))) return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef Ops) { return SE.getMulExpr(Ops[0], Ops[1], SCEV::FlagAnyWrap, 0); }); if (match(V, m_Binary(m_VPValue(LHSVal), m_VPValue(RHSVal)))) return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef Ops) { return SE.getUDivExpr(Ops[0], Ops[1]); }); // Handle AND with constant mask: x & (2^n - 1) can be represented as x % 2^n. const APInt *Mask; if (match(V, m_c_BinaryAnd(m_VPValue(LHSVal), m_APInt(Mask))) && (*Mask + 1).isPowerOf2()) return CreateSCEV({LHSVal}, [&](ArrayRef Ops) { return SE.getURemExpr(Ops[0], SE.getConstant(*Mask + 1)); }); if (match(V, m_Trunc(m_VPValue(LHSVal)))) { const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan(); Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V); return CreateSCEV({LHSVal}, [&](ArrayRef Ops) { return SE.getTruncateExpr(Ops[0], DestTy); }); } if (match(V, m_ZExt(m_VPValue(LHSVal)))) { const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan(); Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V); return CreateSCEV({LHSVal}, [&](ArrayRef Ops) { return SE.getZeroExtendExpr(Ops[0], DestTy); }); } if (match(V, m_SExt(m_VPValue(LHSVal)))) { const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan(); Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V); // Mirror SCEV's createSCEV handling for sext(sub nsw): push sign extension // onto the operands before computing the subtraction. VPValue *SubLHS, *SubRHS; auto *SubR = dyn_cast(LHSVal); if (match(LHSVal, m_Sub(m_VPValue(SubLHS), m_VPValue(SubRHS))) && SubR && SubR->hasNoSignedWrap() && poisonGuaranteesUB(LHSVal)) { const SCEV *V1 = getSCEVExprForVPValue(SubLHS, PSE, L); const SCEV *V2 = getSCEVExprForVPValue(SubRHS, PSE, L); if (!isa(V1) && !isa(V2)) return SE.getMinusSCEV(SE.getSignExtendExpr(V1, DestTy), SE.getSignExtendExpr(V2, DestTy), SCEV::FlagNSW); } return CreateSCEV({LHSVal}, [&](ArrayRef Ops) { return SE.getSignExtendExpr(Ops[0], DestTy); }); } if (match(V, m_Intrinsic(m_VPValue(LHSVal), m_VPValue(RHSVal)))) return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef Ops) { return SE.getUMaxExpr(Ops[0], Ops[1]); }); if (match(V, m_Intrinsic(m_VPValue(LHSVal), m_VPValue(RHSVal)))) return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef Ops) { return SE.getSMaxExpr(Ops[0], Ops[1]); }); if (match(V, m_Intrinsic(m_VPValue(LHSVal), m_VPValue(RHSVal)))) return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef Ops) { return SE.getUMinExpr(Ops[0], Ops[1]); }); if (match(V, m_Intrinsic(m_VPValue(LHSVal), m_VPValue(RHSVal)))) return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef Ops) { return SE.getSMinExpr(Ops[0], Ops[1]); }); ArrayRef Ops; Type *SourceElementType; if (match(V, m_GetElementPtr(SourceElementType, Ops))) { const SCEV *GEPExpr = CreateSCEV(Ops, [&](ArrayRef Ops) { return SE.getGEPExpr(Ops.front(), Ops.drop_front(), SourceElementType); }); return PSE.getPredicatedSCEV(GEPExpr); } // TODO: Support constructing SCEVs for more recipes as needed. const VPRecipeBase *DefR = V->getDefiningRecipe(); const SCEV *Expr = TypeSwitch(DefR) .Case([](const VPExpandSCEVRecipe *R) { return R->getSCEV(); }) .Case([&SE, &PSE, L](const VPWidenIntOrFpInductionRecipe *R) { const SCEV *Step = getSCEVExprForVPValue(R->getStepValue(), PSE, L); if (!L || isa(Step)) return SE.getCouldNotCompute(); const SCEV *Start = getSCEVExprForVPValue(R->getStartValue(), PSE, L); const SCEV *AddRec = SE.getAddRecExpr(Start, Step, L, SCEV::FlagAnyWrap); if (R->getTruncInst()) return SE.getTruncateExpr(AddRec, R->getScalarType()); return AddRec; }) .Case([&SE, &PSE, L](const VPWidenPointerInductionRecipe *R) { const SCEV *Start = getSCEVExprForVPValue(R->getStartValue(), PSE, L); if (!L || isa(Start)) return SE.getCouldNotCompute(); const SCEV *Step = getSCEVExprForVPValue(R->getStepValue(), PSE, L); if (isa(Step)) return SE.getCouldNotCompute(); return SE.getAddRecExpr(Start, Step, L, SCEV::FlagAnyWrap); }) .Case([&SE, &PSE, L](const VPDerivedIVRecipe *R) { const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), PSE, L); const SCEV *IV = getSCEVExprForVPValue(R->getOperand(1), PSE, L); const SCEV *Scale = getSCEVExprForVPValue(R->getOperand(2), PSE, L); if (any_of(ArrayRef({Start, IV, Scale}), IsaPred)) return SE.getCouldNotCompute(); return SE.getAddExpr( SE.getTruncateOrSignExtend(Start, IV->getType()), SE.getMulExpr( IV, SE.getTruncateOrSignExtend(Scale, IV->getType()))); }) .Case([&SE, &PSE, L](const VPScalarIVStepsRecipe *R) { const SCEV *IV = getSCEVExprForVPValue(R->getOperand(0), PSE, L); const SCEV *Step = getSCEVExprForVPValue(R->getOperand(1), PSE, L); if (isa(IV) || !isa(Step)) return SE.getCouldNotCompute(); return SE.getTruncateOrSignExtend(IV, Step->getType()); }) .Default( [&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); }); return PSE.getPredicatedSCEV(Expr); } bool vputils::isAddressSCEVForCost(const SCEV *Addr, ScalarEvolution &SE, const Loop *L) { // If address is an SCEVAddExpr, we require that all operands must be either // be invariant or a (possibly sign-extend) affine AddRec. if (auto *PtrAdd = dyn_cast(Addr)) { return all_of(PtrAdd->operands(), [&SE, L](const SCEV *Op) { return SE.isLoopInvariant(Op, L) || match(Op, m_scev_SExt(m_scev_AffineAddRec(m_SCEV(), m_SCEV()))) || match(Op, m_scev_AffineAddRec(m_SCEV(), m_SCEV())); }); } // Otherwise, check if address is loop invariant or an affine add recurrence. return SE.isLoopInvariant(Addr, L) || match(Addr, m_scev_AffineAddRec(m_SCEV(), m_SCEV())); } /// Returns true if \p Opcode preserves uniformity, i.e., if all operands are /// uniform, the result will also be uniform. static bool preservesUniformity(unsigned Opcode) { if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode)) return true; switch (Opcode) { case Instruction::Freeze: case Instruction::GetElementPtr: case Instruction::ICmp: case Instruction::FCmp: case Instruction::Select: case VPInstruction::Not: case VPInstruction::Broadcast: case VPInstruction::MaskedCond: case VPInstruction::PtrAdd: return true; default: return false; } } bool vputils::isSingleScalar(const VPValue *VPV) { // Live-in, symbolic and region-values represent single-scalar values. if (isa(VPV)) return true; if (auto *Rep = dyn_cast(VPV)) { const VPRegionBlock *RegionOfR = Rep->getRegion(); // Don't consider recipes in replicate regions as uniform yet; their first // lane cannot be accessed when executing the replicate region for other // lanes. if (RegionOfR && RegionOfR->isReplicator()) return false; return Rep->isSingleScalar() || (preservesUniformity(Rep->getOpcode()) && all_of(Rep->operands(), isSingleScalar)); } if (isa(VPV)) return all_of(VPV->getDefiningRecipe()->operands(), isSingleScalar); if (auto *WidenR = dyn_cast(VPV)) { return preservesUniformity(WidenR->getOpcode()) && all_of(WidenR->operands(), isSingleScalar); } if (auto *VPI = dyn_cast(VPV)) return VPI->isSingleScalar() || VPI->isVectorToScalar() || (preservesUniformity(VPI->getOpcode()) && all_of(VPI->operands(), isSingleScalar)); if (auto *RR = dyn_cast(VPV)) return !RR->isPartialReduction(); if (isa( VPV)) return true; if (auto *Expr = dyn_cast(VPV)) return Expr->isSingleScalar(); // VPExpandSCEVRecipes must be placed in the entry and are always uniform. return isa(VPV); } bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) { // Live-ins and region values are uniform. if (isa(V)) return true; VPRecipeBase *R = V->getDefiningRecipe(); VPBasicBlock *VPBB = R ? R->getParent() : nullptr; VPlan *Plan = VPBB ? VPBB->getPlan() : nullptr; if (VPBB) { if ((VPBB == Plan->getVectorPreheader() || VPBB == Plan->getEntry())) { if (match(V->getDefiningRecipe(), m_VPInstruction())) return false; return all_of(R->operands(), isUniformAcrossVFsAndUFs); } } return TypeSwitch(R) .Case([](const VPDerivedIVRecipe *R) { return true; }) .Case([](const VPReplicateRecipe *R) { // Be conservative about side-effects, except for the // known-side-effecting assumes and stores, which we know will be // uniform. return R->isSingleScalar() && (!R->mayHaveSideEffects() || isa(R->getUnderlyingInstr())) && all_of(R->operands(), isUniformAcrossVFsAndUFs); }) .Case([](const VPWidenRecipe *R) { return preservesUniformity(R->getOpcode()) && all_of(R->operands(), isUniformAcrossVFsAndUFs); }) .Case([](const VPInstruction *VPI) { return preservesUniformity(VPI->getOpcode()) && all_of(VPI->operands(), isUniformAcrossVFsAndUFs); }) .Case([](const VPWidenCastRecipe *R) { // A cast is uniform according to its operand. return isUniformAcrossVFsAndUFs(R->getOperand(0)); }) .Default([](const VPRecipeBase *) { // A value is considered non-uniform // unless proven otherwise. return false; }); } VPBasicBlock *vputils::getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT) { auto DepthFirst = vp_depth_first_shallow(Plan.getEntry()); auto I = find_if(DepthFirst, [&VPDT](VPBlockBase *VPB) { return VPBlockUtils::isHeader(VPB, VPDT); }); return I == DepthFirst.end() ? nullptr : cast(*I); } unsigned vputils::getVFScaleFactor(VPRecipeBase *R) { if (!R) return 1; if (auto *RR = dyn_cast(R)) return RR->getVFScaleFactor(); if (auto *RR = dyn_cast(R)) return RR->getVFScaleFactor(); if (auto *ER = dyn_cast(R)) return ER->getVFScaleFactor(); assert( (!isa(R) || cast(R)->getOpcode() != VPInstruction::ReductionStartVector) && "getting scaling factor of reduction-start-vector not implemented yet"); return 1; } bool vputils::cannotHoistOrSinkRecipe(const VPRecipeBase &R, bool Sinking) { // Assumes don't alias anything or throw; as long as they're guaranteed to // execute, they're safe to hoist. They should however not be sunk, as it // would destroy information. if (match(&R, m_Intrinsic())) return Sinking; // TODO: Relax checks in the future, e.g. we could also hoist reads, if their // memory location is not modified in the vector loop. if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi()) return true; // Allocas cannot be hoisted. auto *RepR = dyn_cast(&R); return RepR && RepR->getOpcode() == Instruction::Alloca; } std::optional vputils::getRecipesForUncountableExit(SmallVectorImpl &Recipes, SmallVectorImpl &GEPs, VPBasicBlock *LatchVPBB) { // Given a plain CFG VPlan loop with countable latch exiting block // \p LatchVPBB, we're looking to match the recipes contributing to the // uncountable exit condition comparison (here, vp<%4>) back to either // live-ins or the address nodes for the load used as part of the uncountable // exit comparison so that we can either move them within the loop, or copy // them to the preheader depending on the chosen method for dealing with // stores in uncountable exit loops. // // Currently, the address of the load is restricted to a GEP with 2 operands // and a live-in base address. This constraint may be relaxed later. // // VPlan ' for UF>=1' { // Live-in vp<%0> = VF * UF // Live-in vp<%1> = vector-trip-count // Live-in ir<20> = original trip-count // // ir-bb: // Successor(s): scalar.ph, vector.ph // // vector.ph: // Successor(s): for.body // // for.body: // EMIT vp<%2> = phi ir<0>, vp<%index.next> // EMIT-SCALAR ir<%iv> = phi [ ir<0>, vector.ph ], [ ir<%iv.next>, for.inc ] // EMIT ir<%uncountable.addr> = getelementptr inbounds nuw ir<%pred>,ir<%iv> // EMIT ir<%uncountable.val> = load ir<%uncountable.addr> // EMIT ir<%uncountable.cond> = icmp sgt ir<%uncountable.val>, ir<500> // EMIT vp<%3> = masked-cond ir<%uncountable.cond> // Successor(s): for.inc // // for.inc: // EMIT ir<%iv.next> = add nuw nsw ir<%iv>, ir<1> // EMIT ir<%countable.cond> = icmp eq ir<%iv.next>, ir<20> // EMIT vp<%index.next> = add nuw vp<%2>, vp<%0> // EMIT vp<%4> = any-of ir<%3> // EMIT vp<%5> = icmp eq vp<%index.next>, vp<%1> // EMIT vp<%6> = or vp<%4>, vp<%5> // EMIT branch-on-cond vp<%6> // Successor(s): middle.block, for.body // // middle.block: // Successor(s): ir-bb, scalar.ph // // ir-bb: // No successors // // scalar.ph: // } // Find the uncountable loop exit condition. VPValue *UncountableCondition = nullptr; if (!match(LatchVPBB->getTerminator(), m_BranchOnCond(m_c_BinaryOr( m_AnyOf(m_VPValue(UncountableCondition)), m_VPValue())))) return std::nullopt; SmallVector Worklist; Worklist.push_back(UncountableCondition); while (!Worklist.empty()) { VPValue *V = Worklist.pop_back_val(); // Any value defined outside the loop does not need to be copied. if (V->isDefinedOutsideLoopRegions()) continue; // FIXME: Remove the single user restriction; it's here because we're // starting with the simplest set of loops we can, and multiple // users means needing to add PHI nodes in the transform. if (V->getNumUsers() > 1) return std::nullopt; VPValue *Op1, *Op2; // Walk back through recipes until we find at least one load from memory. if (match(V, m_ICmp(m_VPValue(Op1), m_VPValue(Op2)))) { Worklist.push_back(Op1); Worklist.push_back(Op2); Recipes.push_back(cast(V->getDefiningRecipe())); } else if (match(V, m_VPInstruction(m_VPValue(Op1)))) { VPRecipeBase *GepR = Op1->getDefiningRecipe(); // Only matching base + single offset term for now. if (GepR->getNumOperands() != 2) return std::nullopt; // Matching a GEP with a loop-invariant base ptr. if (!match(GepR, m_VPInstruction( m_LiveIn(), m_VPValue()))) return std::nullopt; Recipes.push_back(cast(V->getDefiningRecipe())); Recipes.push_back(cast(GepR)); GEPs.push_back(cast(GepR)); } else if (match(V, m_VPInstruction( m_VPValue(Op1)))) { Worklist.push_back(Op1); Recipes.push_back(cast(V->getDefiningRecipe())); } else return std::nullopt; } // If we couldn't match anything, don't return the condition. It may be // defined outside the loop. if (Recipes.empty() || GEPs.empty()) return std::nullopt; return UncountableCondition; } VPSingleDefRecipe *vputils::findHeaderMask(VPlan &Plan) { VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); SmallVector WideCanonicalIVs; auto *WideCanonicalIV = vputils::findUserOf( LoopRegion->getCanonicalIV()); assert(count_if(LoopRegion->getCanonicalIV()->users(), IsaPred) <= 1 && "Must have at most one VPWideCanonicalIVRecipe"); if (WideCanonicalIV) WideCanonicalIVs.push_back(WideCanonicalIV); // Also include VPWidenIntOrFpInductionRecipes that represent a widened // version of the canonical induction. VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock(); for (VPRecipeBase &Phi : HeaderVPBB->phis()) { auto *WidenOriginalIV = dyn_cast(&Phi); if (WidenOriginalIV && WidenOriginalIV->isCanonical()) WideCanonicalIVs.push_back(WidenOriginalIV); } // Walk users of wide canonical IVs and find the single compare of the form // (ICMP_ULE, WideCanonicalIV, backedge-taken-count). VPSingleDefRecipe *HeaderMask = nullptr; for (auto *Wide : WideCanonicalIVs) { for (VPUser *U : Wide->users()) { auto *VPI = dyn_cast(U); if (!VPI || !vputils::isHeaderMask(VPI, Plan)) continue; assert(VPI->getOperand(0) == Wide && "WidenCanonicalIV must be the first operand of the compare"); assert(!HeaderMask && "Multiple header masks found?"); HeaderMask = VPI; } } return HeaderMask; } SmallVector VPBlockUtils::blocksInSingleSuccessorChainBetween(VPBasicBlock *FirstBB, VPBasicBlock *LastBB) { assert(FirstBB->getParent() == LastBB->getParent() && "FirstBB and LastBB from different regions"); #ifndef NDEBUG bool InSingleSuccChain = false; for (VPBlockBase *Succ = FirstBB; Succ; Succ = Succ->getSingleSuccessor()) InSingleSuccChain |= (Succ == LastBB); assert(InSingleSuccChain && "LastBB unreachable from FirstBB in single-successor chain"); #endif auto Blocks = to_vector( VPBlockUtils::blocksOnly(vp_depth_first_deep(FirstBB))); auto *LastIt = find(Blocks, LastBB); assert(LastIt != Blocks.end() && "LastBB unreachable from FirstBB in depth-first traversal"); Blocks.erase(std::next(LastIt), Blocks.end()); return Blocks; } bool VPBlockUtils::isHeader(const VPBlockBase *VPB, const VPDominatorTree &VPDT) { auto *VPBB = dyn_cast(VPB); if (!VPBB) return false; // If VPBB is in a region R, VPBB is a loop header if R is a loop region with // VPBB as its entry, i.e., free of predecessors. if (auto *R = VPBB->getParent()) return !R->isReplicator() && !VPBB->hasPredecessors(); // A header dominates its second predecessor (the latch), with the other // predecessor being the preheader return VPB->getPredecessors().size() == 2 && VPDT.dominates(VPB, VPB->getPredecessors()[1]); } bool VPBlockUtils::isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT) { // A latch has a header as its last successor, with its other successors // leaving the loop. A preheader OTOH has a header as its first (and only) // successor. return VPB->getNumSuccessors() >= 2 && VPBlockUtils::isHeader(VPB->getSuccessors().back(), VPDT); } std::optional vputils::getMemoryLocation(const VPRecipeBase &R) { auto *M = dyn_cast(&R); if (!M) return std::nullopt; MemoryLocation Loc; // Populate noalias metadata from VPIRMetadata. if (MDNode *NoAliasMD = M->getMetadata(LLVMContext::MD_noalias)) Loc.AATags.NoAlias = NoAliasMD; if (MDNode *AliasScopeMD = M->getMetadata(LLVMContext::MD_alias_scope)) Loc.AATags.Scope = AliasScopeMD; return Loc; } VPInstruction *vputils::findCanonicalIVIncrement(VPlan &Plan) { VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); VPRegionValue *CanIV = LoopRegion->getCanonicalIV(); assert(CanIV && "Expected loop region to have a canonical IV"); VPSymbolicValue &VFxUF = Plan.getVFxUF(); // Check if \p Step matches the expected increment step, accounting for // materialization of VFxUF and UF. auto IsIncrementStep = [&](VPValue *Step) -> bool { if (!VFxUF.isMaterialized()) return Step == &VFxUF; VPSymbolicValue &UF = Plan.getUF(); if (!UF.isMaterialized()) return Step == &UF; unsigned ConcreteUF = Plan.getConcreteUF(); // Fixed VF: step is just the concrete UF. if (match(Step, m_SpecificInt(ConcreteUF))) return true; // Scalable VF: step involves VScale. if (ConcreteUF == 1) return match(Step, m_VPInstruction()); if (match(Step, m_c_Mul(m_SpecificInt(ConcreteUF), m_VPInstruction()))) return true; // mul(VScale, ConcreteUF) may have been simplified to // shl(VScale, log2(ConcreteUF)) when ConcreteUF is a power of 2. return isPowerOf2_32(ConcreteUF) && match(Step, m_Binary( m_VPInstruction(), m_SpecificInt(Log2_32(ConcreteUF)))); }; VPInstruction *Increment = nullptr; for (VPUser *U : CanIV->users()) { VPValue *Step; if (match(U, m_c_Add(m_Specific(CanIV), m_VPValue(Step))) && IsIncrementStep(Step)) { assert(!Increment && "There must be a unique increment"); Increment = cast(U); } } assert((!VFxUF.isMaterialized() || Increment) && "After materializing VFxUF, an increment must exist"); assert((!Increment || LoopRegion->hasCanonicalIVNUW() == Increment->hasNoUnsignedWrap()) && "NUW flag in region and increment must match"); return Increment; } /// Find the ComputeReductionResult recipe for \p PhiR, looking through selects /// inserted for predicated reductions or tail folding. VPInstruction *vputils::findComputeReductionResult(VPReductionPHIRecipe *PhiR) { VPValue *BackedgeVal = PhiR->getBackedgeValue(); if (auto *Res = vputils::findUserOf( BackedgeVal)) return Res; // Look through selects inserted for tail folding or predicated reductions. VPRecipeBase *SelR = vputils::findUserOf( BackedgeVal, m_Select(m_VPValue(), m_VPValue(), m_VPValue())); if (!SelR) return nullptr; return vputils::findUserOf( cast(SelR)); }