Files
llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Florian Hahn ec14a1f40c [VPlan] Add transform to replace VPWidenCanonicalIV with wide IV. (#194267)
Add a new cost-based transform that replaces VPWidenCanonicalIVRecipe
with a canonical VPWidenIntOrFpInductionPHIRecipe, if it does not
increase spills.

The main benefit of VPWidenCanonicalIVRecipe is that it has shorter
live-ranges than wide IV phis. The new transform introduces wide IV
unless VPWidenCanonicalIVRecipe is cheaper or the wide IV introduces
additional spills.

This introduces wide IVs in a number of cases, where previously had
VPWidenCanonicalIVRecipe, because there was no existing wide canonical
IV we could re-use. It should also help avoid somewhat unrelated changes
in https://github.com/llvm/llvm-project/pull/190191.

PR: https://github.com/llvm/llvm-project/pull/194267
2026-04-29 19:54:35 +00:00

549 lines
26 KiB
C++

//===- VPlanTransforms.h - Utility VPlan to VPlan transforms --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file provides utility VPlan to VPlan transformations.
//===----------------------------------------------------------------------===//
#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
#define LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
#include "VPlan.h"
#include "VPlanVerifier.h"
#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Regex.h"
namespace llvm {
class InductionDescriptor;
class Instruction;
class Loop;
class LoopVersioning;
class OptimizationRemarkEmitter;
class PHINode;
class ScalarEvolution;
class PredicatedScalarEvolution;
class TargetLibraryInfo;
class TargetTransformInfo;
class VPBuilder;
class VPRecipeBuilder;
struct VFRange;
LLVM_ABI_FOR_TEST extern cl::opt<bool> VerifyEachVPlan;
LLVM_ABI_FOR_TEST extern cl::opt<bool> EnableWideActiveLaneMask;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_ABI_FOR_TEST extern cl::opt<bool> VPlanPrintAfterAll;
LLVM_ABI_FOR_TEST extern cl::list<std::string> VPlanPrintAfterPasses;
LLVM_ABI_FOR_TEST extern cl::opt<bool> VPlanPrintVectorRegionScope;
#endif
struct VPlanTransforms {
/// Helper to run a VPlan pass \p Pass on \p VPlan, forwarding extra arguments
/// to the pass. Performs verification/printing after each VPlan pass if
/// requested via command line options.
template <bool EnableVerify = true, typename PassTy, typename... ArgsTy>
static decltype(auto) runPass(StringRef PassName, PassTy &&Pass, VPlan &Plan,
ArgsTy &&...Args) {
scope_exit PostTransformActions{[&]() {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
// Make sure to print before verification, so that output is more useful
// in case of failures:
if (VPlanPrintAfterAll ||
(VPlanPrintAfterPasses.getNumOccurrences() > 0 &&
any_of(VPlanPrintAfterPasses, [PassName](StringRef Entry) {
return Regex(Entry).match(PassName);
}))) {
dbgs()
<< "VPlan for loop in '"
<< Plan.getScalarHeader()->getIRBasicBlock()->getParent()->getName()
<< "' after " << PassName << '\n';
if (VPlanPrintVectorRegionScope && Plan.getVectorLoopRegion())
Plan.getVectorLoopRegion()->print(dbgs());
else
dbgs() << Plan << '\n';
}
#endif
if (VerifyEachVPlan && EnableVerify) {
if (!verifyVPlanIsValid(Plan))
report_fatal_error("Broken VPlan found, compilation aborted!");
}
}};
return std::forward<PassTy>(Pass)(Plan, std::forward<ArgsTy>(Args)...);
}
#define RUN_VPLAN_PASS(PASS, ...) \
llvm::VPlanTransforms::runPass(#PASS, PASS, __VA_ARGS__)
#define RUN_VPLAN_PASS_NO_VERIFY(PASS, ...) \
llvm::VPlanTransforms::runPass<false>(#PASS, PASS, __VA_ARGS__)
/// Create a base VPlan0, serving as the common starting point for all later
/// candidates. It consists of an initial plain CFG loop with loop blocks from
/// \p TheLoop being directly translated to VPBasicBlocks with VPInstruction
/// corresponding to the input IR.
///
/// The created loop is wrapped in an initial skeleton to facilitate
/// vectorization, consisting of a vector pre-header, an exit block for the
/// main vector loop (middle.block) and a new block as preheader of the scalar
/// loop (scalar.ph). See below for an illustration. It also adds a canonical
/// IV and its increment, using \p InductionTy and \p IVDL, and creates a
/// VPValue expression for the original trip count.
///
/// [ ] <-- Plan's entry VPIRBasicBlock, wrapping the original loop's
/// / \ old preheader. Will contain iteration number check and SCEV
/// | | expansions.
/// | |
/// / v
/// | [ ] <-- vector loop bypass (may consist of multiple blocks) will be
/// | / | added later.
/// | / v
/// || [ ] <-- vector pre header.
/// |/ |
/// | v
/// | [ ] \ <-- plain CFG loop wrapping original loop to be vectorized.
/// | [ ]_|
/// | |
/// | v
/// | [ ] <--- middle-block with the branch to successors
/// | / |
/// | / |
/// | | v
/// \--->[ ] <--- scalar preheader (initial a VPBasicBlock, which will be
/// | | replaced later by a VPIRBasicBlock wrapping the scalar
/// | | preheader basic block.
/// | |
/// v <-- edge from middle to exit iff epilogue is not required.
/// | [ ] \
/// | [ ]_| <-- old scalar loop to handle remainder (scalar epilogue,
/// | | header wrapped in VPIRBasicBlock).
/// \ |
/// \ v
/// >[ ] <-- original loop exit block(s), wrapped in VPIRBasicBlocks.
LLVM_ABI_FOR_TEST static std::unique_ptr<VPlan>
buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL,
PredicatedScalarEvolution &PSE, LoopVersioning *LVer = nullptr);
/// Replace VPPhi recipes in \p Plan's header with corresponding
/// VPHeaderPHIRecipe subclasses for inductions, reductions, and
/// fixed-order recurrences. This processes all header phis and creates
/// the appropriate widened recipe for each one. For fixed-order
/// recurrences, also creates FirstOrderRecurrenceSplice instructions and
/// sinks/hoists users as needed. Returns false if any fixed-order
/// recurrence cannot be handled.
static bool createHeaderPhiRecipes(
VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop,
const MapVector<PHINode *, InductionDescriptor> &Inductions,
const MapVector<PHINode *, RecurrenceDescriptor> &Reductions,
const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering);
/// Create VPReductionRecipes for in-loop reductions. This processes chains
/// of operations contributing to in-loop reductions and creates appropriate
/// VPReductionRecipe instances.
static void createInLoopReductionRecipes(
VPlan &Plan, const DenseSet<BasicBlock *> &BlocksNeedingPredication,
ElementCount MinVF);
/// Update \p Plan to account for all early exits. If \p Style is not
/// NoUncountableExit, handles uncountable early exits and checks that all
/// loads are dereferenceable. Returns false if a non-dereferenceable load is
/// found.
LLVM_ABI_FOR_TEST static bool
handleEarlyExits(VPlan &Plan, UncountableExitStyle Style, Loop *TheLoop,
PredicatedScalarEvolution &PSE, DominatorTree &DT,
AssumptionCache *AC);
/// If a check is needed to guard executing the scalar epilogue loop, it will
/// be added to the middle block.
LLVM_ABI_FOR_TEST static void addMiddleCheck(VPlan &Plan, bool TailFolded);
// Create a check to \p Plan to see if the vector loop should be executed.
// If \p CheckBlock is non-null, the compare and branch are placed there;
// ExpandSCEV recipes are always placed in Entry.
static void addMinimumIterationCheck(
VPlan &Plan, ElementCount VF, unsigned UF,
ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights,
DebugLoc DL, PredicatedScalarEvolution &PSE,
VPBasicBlock *CheckBlock = nullptr);
/// Add a new check block before the vector preheader to \p Plan to check if
/// the main vector loop should be executed (TC >= VF * UF).
static void
addIterationCountCheckBlock(VPlan &Plan, ElementCount VF, unsigned UF,
bool RequiresScalarEpilogue, Loop *OrigLoop,
const uint32_t *MinItersBypassWeights,
DebugLoc DL, PredicatedScalarEvolution &PSE);
/// Add a check to \p Plan to see if the epilogue vector loop should be
/// executed.
static void addMinimumVectorEpilogueIterationCheck(
VPlan &Plan, Value *VectorTripCount, bool RequiresScalarEpilogue,
ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep,
unsigned EpilogueLoopStep, ScalarEvolution &SE);
/// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
/// flat CFG into a hierarchical CFG.
LLVM_ABI_FOR_TEST static void createLoopRegions(VPlan &Plan);
/// Wrap runtime check block \p CheckBlock in a VPIRBB and \p Cond in a
/// VPValue and connect the block to \p Plan, using the VPValue as branch
/// condition.
static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock,
bool AddBranchWeights);
/// Replaces the VPInstructions in \p Plan with corresponding
/// widen recipes. Returns false if any VPInstructions could not be converted
/// to a wide recipe if needed.
LLVM_ABI_FOR_TEST static bool
tryToConvertVPInstructionsToVPRecipes(VPlan &Plan,
const TargetLibraryInfo &TLI);
/// Try to legalize reductions with multiple in-loop uses. Currently only
/// strict and non-strict min/max reductions used by FindLastIV reductions are
/// supported, corresponding to computing the first and last argmin/argmax,
/// respectively. Otherwise return false.
static bool handleMultiUseReductions(VPlan &Plan,
OptimizationRemarkEmitter *ORE,
Loop *TheLoop);
/// Check if \p Plan contains any FMaxNum or FMinNum reductions. If they do,
/// try to update the vector loop to exit early if any input is NaN and resume
/// executing in the scalar loop to handle the NaNs there. Return false if
/// this attempt was unsuccessful.
static bool handleMaxMinNumReductions(VPlan &Plan);
/// Check if \p Plan contains any FindLast reductions. If it does, try to
/// update the vector loop to save the appropriate state using selects
/// for entire vectors for both the latest mask containing at least one active
/// element and the corresponding data vector. Return false if this attempt
/// was unsuccessful.
static bool handleFindLastReductions(VPlan &Plan);
/// Clear NSW/NUW flags from reduction instructions if necessary.
static void clearReductionWrapFlags(VPlan &Plan);
/// Explicitly unroll \p Plan by \p UF.
static void unrollByUF(VPlan &Plan, unsigned UF);
/// Replace replicating VPReplicateRecipe, VPScalarIVStepsRecipe and
/// VPInstruction in \p Plan with \p VF single-scalar recipes. Replicate
/// regions are dissolved by replicating their blocks and their recipes \p VF
/// times.
/// TODO: Also dissolve replicate regions with live outs.
static void replicateByVF(VPlan &Plan, ElementCount VF);
/// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
/// resulting plan to \p BestVF and \p BestUF.
static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
unsigned BestUF,
PredicatedScalarEvolution &PSE);
/// Try to simplify VPInstruction::ExplicitVectorLength recipes when the AVL
/// is known to be <= VF, replacing them with the AVL directly.
static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF,
PredicatedScalarEvolution &PSE);
/// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe
/// optimizations, dead recipe removal, replicate region optimizations and
/// block merging.
LLVM_ABI_FOR_TEST static void optimize(VPlan &Plan);
/// Remove redundant VPBasicBlocks by merging them into their single
/// predecessor if the latter has a single successor.
static bool mergeBlocksIntoPredecessors(VPlan &Plan);
/// Wrap predicated VPReplicateRecipes with a mask operand in an if-then
/// region block and remove the mask operand. Optimize the created regions by
/// iteratively sinking scalar operands into the region, followed by merging
/// regions until no improvements are remaining.
static void createAndOptimizeReplicateRegions(VPlan &Plan);
/// Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an
/// (active-lane-mask recipe, wide canonical IV, trip-count). If \p
/// UseActiveLaneMaskForControlFlow is true, introduce an
/// VPActiveLaneMaskPHIRecipe.
static void addActiveLaneMask(VPlan &Plan,
bool UseActiveLaneMaskForControlFlow);
/// Insert truncates and extends for any truncated recipe. Redundant casts
/// will be folded later.
static void
truncateToMinimalBitwidths(VPlan &Plan,
const MapVector<Instruction *, uint64_t> &MinBWs);
/// Replace symbolic strides from \p StridesMap in \p Plan with constants when
/// possible.
static void
replaceSymbolicStrides(VPlan &Plan, PredicatedScalarEvolution &PSE,
const DenseMap<Value *, const SCEV *> &StridesMap);
/// Drop poison flags from recipes that may generate a poison value that is
/// used after vectorization, even when their operands are not poison. Those
/// recipes meet the following conditions:
/// * Contribute to the address computation of a recipe generating a widen
/// memory load/store (VPWidenMemoryInstructionRecipe or
/// VPInterleaveRecipe).
/// * Such a widen memory load/store has at least one underlying Instruction
/// that is in a basic block that needs predication and after vectorization
/// the generated instruction won't be predicated.
/// Uses \p BlockNeedsPredication to check if a block needs predicating.
/// TODO: Replace BlockNeedsPredication callback with retrieving info from
/// VPlan directly.
static void dropPoisonGeneratingRecipes(
VPlan &Plan,
const std::function<bool(BasicBlock *)> &BlockNeedsPredication);
/// Add a VPCurrentIterationPHIRecipe and related recipes to \p Plan and
/// replaces all uses of the canonical IV except for the canonical IV
/// increment with a VPCurrentIterationPHIRecipe. The canonical IV is only
/// used to control the loop after this transformation.
static void
addExplicitVectorLength(VPlan &Plan,
const std::optional<unsigned> &MaxEVLSafeElements);
/// Optimize recipes which use an EVL-based header mask to VP intrinsics, for
/// example:
///
/// %mask = icmp ult step-vector, EVL
/// %load = load %ptr, %mask
/// -->
/// %load = vp.load %ptr, EVL
static void optimizeEVLMasks(VPlan &Plan);
// For each Interleave Group in \p InterleaveGroups replace the Recipes
// widening its memory instructions with a single VPInterleaveRecipe at its
// insertion point.
static void createInterleaveGroups(
VPlan &Plan,
const SmallPtrSetImpl<const InterleaveGroup<Instruction> *>
&InterleaveGroups,
VPRecipeBuilder &RecipeBuilder, const bool &EpilogueAllowed);
/// Remove dead recipes from \p Plan.
static void removeDeadRecipes(VPlan &Plan);
/// Update \p Plan to account for uncountable early exits by introducing
/// appropriate branching logic in the latch that handles early exits and the
/// latch exit condition. Multiple exits are handled with a dispatch block
/// that determines which exit to take based on lane-by-lane semantics.
static void handleUncountableEarlyExits(VPlan &Plan, VPBasicBlock *HeaderVPBB,
VPBasicBlock *LatchVPBB,
VPBasicBlock *MiddleVPBB,
UncountableExitStyle Style);
/// Replaces the exit condition from
/// (branch-on-cond eq CanonicalIVInc, VectorTripCount)
/// to
/// (branch-on-cond eq AVLNext, 0)
static void convertEVLExitCond(VPlan &Plan);
/// Replace loop regions with explicit CFG.
static void dissolveLoopRegions(VPlan &Plan);
/// Expand BranchOnTwoConds instructions into explicit CFG with
/// BranchOnCond instructions. Should be called after dissolveLoopRegions.
static void expandBranchOnTwoConds(VPlan &Plan);
/// Transform loops with variable-length stepping after region
/// dissolution.
///
/// Once loop regions are replaced with explicit CFG, loops can step with
/// variable vector lengths instead of fixed lengths. This transformation:
/// * Makes CurrentIteration-Phi concrete.
// * Removes CanonicalIV and increment.
static void convertToVariableLengthStep(VPlan &Plan);
/// Lower abstract recipes to concrete ones, that can be codegen'd.
static void convertToConcreteRecipes(VPlan &Plan);
/// This function converts initial recipes to the abstract recipes and clamps
/// \p Range based on cost model for following optimizations and cost
/// estimations. The converted abstract recipes will lower to concrete
/// recipes before codegen.
static void convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx,
VFRange &Range);
/// Perform instcombine-like simplifications on recipes in \p Plan.
static void simplifyRecipes(VPlan &Plan);
/// Remove BranchOnCond recipes with true or false conditions together with
/// removing dead edges to their successors. If \p OnlyLatches is true, only
/// process loop latches.
static void removeBranchOnConst(VPlan &Plan, bool OnlyLatches = false);
/// Perform common-subexpression-elimination on \p Plan.
static void cse(VPlan &Plan);
/// If there's a single exit block, optimize its phi recipes that use exiting
/// IV values by feeding them precomputed end values instead, possibly taken
/// one step backwards.
static void optimizeInductionLiveOutUsers(VPlan &Plan,
PredicatedScalarEvolution &PSE,
bool FoldTail);
/// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
static void materializeBroadcasts(VPlan &Plan);
/// Hoist single-scalar loads with invariant addresses out of the vector loop
/// to the preheader, if they are proven not to alias with any stores in the
/// plan using noalias metadata.
static void hoistInvariantLoads(VPlan &Plan);
/// Hoist predicated loads from the same address to the loop entry block, if
/// they are guaranteed to execute on both paths (i.e., in replicate regions
/// with complementary masks P and NOT P).
static void hoistPredicatedLoads(VPlan &Plan, PredicatedScalarEvolution &PSE,
const Loop *L);
/// Sink predicated stores to the same address with complementary predicates
/// (P and NOT P) to an unconditional store with select recipes for the
/// stored values. This eliminates branching overhead when all paths
/// unconditionally store to the same location.
static void sinkPredicatedStores(VPlan &Plan, PredicatedScalarEvolution &PSE,
const Loop *L);
// Materialize vector trip counts for constants early if it can simply be
// computed as (Original TC / VF * UF) * VF * UF.
static void
materializeConstantVectorTripCount(VPlan &Plan, ElementCount BestVF,
unsigned BestUF,
PredicatedScalarEvolution &PSE);
/// Materialize vector trip count computations to a set of VPInstructions.
/// \p Step is used as the step value for the trip count computation.
/// \p MaxRuntimeStep is the maximum possible runtime value of Step, used to
/// prove the trip count is divisible by the step for scalable VFs.
static void materializeVectorTripCount(
VPlan &Plan, VPBasicBlock *VectorPHVPBB, bool TailByMasking,
bool RequiresScalarEpilogue, VPValue *Step,
std::optional<uint64_t> MaxRuntimeStep = std::nullopt);
/// Materialize the backedge-taken count to be computed explicitly using
/// VPInstructions.
static void materializeBackedgeTakenCount(VPlan &Plan,
VPBasicBlock *VectorPH);
/// Add explicit Build[Struct]Vector recipes to Pack multiple scalar values
/// into vectors and Unpack recipes to extract scalars from vectors as
/// needed.
static void materializePacksAndUnpacks(VPlan &Plan);
/// Materialize UF, VF and VFxUF to be computed explicitly using
/// VPInstructions.
static void materializeFactors(VPlan &Plan, VPBasicBlock *VectorPH,
ElementCount VF);
/// Expand VPExpandSCEVRecipes in \p Plan's entry block. Each
/// VPExpandSCEVRecipe is replaced with a live-in wrapping the expanded IR
/// value. A mapping from SCEV expressions to their expanded IR value is
/// returned.
static DenseMap<const SCEV *, Value *> expandSCEVs(VPlan &Plan,
ScalarEvolution &SE);
/// Try to find a single VF among \p Plan's VFs for which all interleave
/// groups (with known minimum VF elements) can be replaced by wide loads and
/// stores processing VF elements, if all transformed interleave groups access
/// the full vector width (checked via the maximum vector register width). If
/// the transformation can be applied, the original \p Plan will be split in
/// 2:
/// 1. The original Plan with the single VF containing the optimized recipes
/// using wide loads instead of interleave groups.
/// 2. A new clone which contains all VFs of Plan except the optimized VF.
///
/// This effectively is a very simple form of loop-aware SLP, where we use
/// interleave groups to identify candidates.
static std::unique_ptr<VPlan>
narrowInterleaveGroups(VPlan &Plan, const TargetTransformInfo &TTI);
/// Adapts the vector loop region for tail folding by introducing a header
/// mask and conditionally executing the content of the region:
///
/// Vector loop region before:
/// +-------------------------------------------+
/// |%iv = ... |
/// |... |
/// |%iv.next = add %iv, vfxuf |
/// |branch-on-count %iv.next, vector-trip-count|
/// +-------------------------------------------+
///
/// Vector loop region after:
/// +-------------------------------------------+
/// |%iv = ... |
/// |%wide.iv = widen-canonical-iv ... |
/// |%header-mask = icmp ule %wide.iv, BTC |
/// |branch-on-cond %header-mask |---+
/// +-------------------------------------------+ |
/// | |
/// v |
/// +-------------------------------------------+ |
/// | ... | |
/// +-------------------------------------------+ |
/// | |
/// v |
/// +-------------------------------------------+ |
/// |<phis> = phi [..., ...], [poison, header] |
/// |%iv.next = add %iv, vfxuf |<--+
/// |branch-on-count %iv.next, vector-trip-count|
/// +-------------------------------------------+
///
/// Any VPInstruction::ExtractLastLanes are also updated to extract from the
/// last active lane of the header mask.
static void foldTailByMasking(VPlan &Plan);
/// Predicate and linearize the control-flow in the only loop region of
/// \p Plan.
static void introduceMasksAndLinearize(VPlan &Plan);
/// Replace a VPWidenCanonicalIVRecipe if it is present in \p Plan, with a
/// VPWidenIntOrFpInductionRecipe, provided it would not cause additional
/// spills for \p VF at unroll factor \p UF.
static void replaceWideCanonicalIVWithWideIV(
VPlan &Plan, ScalarEvolution &SE, const TargetTransformInfo &TTI,
TargetTransformInfo::TargetCostKind CostKind, ElementCount VF,
unsigned UF, const SmallPtrSetImpl<const Value *> &ValuesToIgnore);
/// Add branch weight metadata, if the \p Plan's middle block is terminated by
/// a BranchOnCond recipe.
static void
addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF,
std::optional<unsigned> VScaleForTuning);
/// Adjust first-order recurrence users in the middle block: create
/// penultimate element extracts for LCSSA phi users, and handle penultimate
/// extracts of the last active lane edge.
static void adjustFirstOrderRecurrenceMiddleUsers(VPlan &Plan,
VFRange &Range);
/// Optimize FindLast reductions selecting IVs (or expressions of IVs) by
/// converting them to FindIV reductions, if their IV range excludes a
/// suitable sentinel value. For expressions of IVs, the expression is sunk
/// to the middle block.
static void optimizeFindIVReductions(VPlan &Plan,
PredicatedScalarEvolution &PSE, Loop &L);
/// Detect and create partial reduction recipes for scaled reductions in
/// \p Plan. Must be called after recipe construction. If partial reductions
/// are only valid for a subset of VFs in Range, Range.End is updated.
static void createPartialReductions(VPlan &Plan, VPCostContext &CostCtx,
VFRange &Range);
/// Convert load/store VPInstructions in \p Plan into widened or replicate
/// recipes. Non load/store input instructions are left unchanged.
static void makeMemOpWideningDecisions(VPlan &Plan, VFRange &Range,
VPRecipeBuilder &RecipeBuilder);
};
} // namespace llvm
#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H