[VPlanSLP] Strip stub (#192635)
VPlanSLP hasn't seen much progress since it was checked in 7 years ago, and it is unclear if there ever will be any progress. Strip it from the tree to avoid confusion.
This commit is contained in:
committed by
GitHub
parent
4975ad9ae2
commit
680a990819
@@ -29,7 +29,6 @@ add_llvm_component_library(LLVMVectorize
|
||||
VPlanConstruction.cpp
|
||||
VPlanPredicator.cpp
|
||||
VPlanRecipes.cpp
|
||||
VPlanSLP.cpp
|
||||
VPlanTransforms.cpp
|
||||
VPlanUnroll.cpp
|
||||
VPlanVerifier.cpp
|
||||
|
||||
@@ -65,7 +65,6 @@ class VPRegionBlock;
|
||||
class VPlan;
|
||||
class VPLane;
|
||||
class VPReplicateRecipe;
|
||||
class VPlanSlp;
|
||||
class Value;
|
||||
class LoopVectorizationCostModel;
|
||||
|
||||
@@ -1221,8 +1220,6 @@ public:
|
||||
/// predication.
|
||||
class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
|
||||
public VPIRMetadata {
|
||||
friend class VPlanSlp;
|
||||
|
||||
public:
|
||||
/// VPlan opcodes, extending LLVM IR with idiomatics instructions.
|
||||
enum {
|
||||
@@ -1230,8 +1227,6 @@ public:
|
||||
Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
|
||||
// values of a first-order recurrence.
|
||||
Not,
|
||||
SLPLoad,
|
||||
SLPStore,
|
||||
// Creates a mask where each lane is active (true) whilst the current
|
||||
// counter (first operand + index) is less than the second operand. i.e.
|
||||
// mask[i] = icmpt ult (op0 + i), op1
|
||||
|
||||
@@ -517,8 +517,6 @@ unsigned VPInstruction::getNumOperandsForOpcode() const {
|
||||
case VPInstruction::ComputeReductionResult:
|
||||
case VPInstruction::FirstActiveLane:
|
||||
case VPInstruction::LastActiveLane:
|
||||
case VPInstruction::SLPLoad:
|
||||
case VPInstruction::SLPStore:
|
||||
case VPInstruction::ExtractLane:
|
||||
case VPInstruction::ExtractLastActive:
|
||||
// Cannot determine the number of operands from the opcode.
|
||||
@@ -1469,12 +1467,6 @@ void VPInstruction::printRecipe(raw_ostream &O, const Twine &Indent,
|
||||
case VPInstruction::Not:
|
||||
O << "not";
|
||||
break;
|
||||
case VPInstruction::SLPLoad:
|
||||
O << "combined load";
|
||||
break;
|
||||
case VPInstruction::SLPStore:
|
||||
O << "combined store";
|
||||
break;
|
||||
case VPInstruction::ActiveLaneMask:
|
||||
O << "active lane mask";
|
||||
break;
|
||||
|
||||
@@ -1,528 +0,0 @@
|
||||
//===- VPlanSLP.cpp - SLP Analysis based on VPlan -------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
/// This file implements SLP analysis based on VPlan. The analysis is based on
|
||||
/// the ideas described in
|
||||
///
|
||||
/// Look-ahead SLP: auto-vectorization in the presence of commutative
|
||||
/// operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,
|
||||
/// Luís F. W. Góes
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "VPlanSLP.h"
|
||||
#include "VPlan.h"
|
||||
#include "VPlanCFG.h"
|
||||
#include "VPlanValue.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/VectorUtils.h"
|
||||
#include "llvm/IR/Instruction.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/IR/Value.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "vplan-slp"
|
||||
|
||||
// Number of levels to look ahead when re-ordering multi node operands.
|
||||
static unsigned LookaheadMaxDepth = 5;
|
||||
|
||||
void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
|
||||
Old2NewTy &Old2New,
|
||||
InterleavedAccessInfo &IAI) {
|
||||
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
|
||||
Region->getEntry());
|
||||
for (VPBlockBase *Base : RPOT) {
|
||||
visitBlock(Base, Old2New, IAI);
|
||||
}
|
||||
}
|
||||
|
||||
void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
|
||||
InterleavedAccessInfo &IAI) {
|
||||
if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
|
||||
for (VPRecipeBase &VPI : *VPBB) {
|
||||
if (isa<VPWidenPHIRecipe>(&VPI))
|
||||
continue;
|
||||
auto *VPInst = dyn_cast<VPInstruction>(&VPI);
|
||||
if (!VPInst)
|
||||
continue;
|
||||
auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue());
|
||||
if (!Inst)
|
||||
continue;
|
||||
auto *IG = IAI.getInterleaveGroup(Inst);
|
||||
if (!IG)
|
||||
continue;
|
||||
|
||||
auto NewIGIter = Old2New.find(IG);
|
||||
if (NewIGIter == Old2New.end())
|
||||
Old2New[IG] = new InterleaveGroup<VPInstruction>(
|
||||
IG->getFactor(), IG->isReverse(), IG->getAlign());
|
||||
|
||||
if (Inst == IG->getInsertPos())
|
||||
Old2New[IG]->setInsertPos(VPInst);
|
||||
|
||||
InterleaveGroupMap[VPInst] = Old2New[IG];
|
||||
InterleaveGroupMap[VPInst]->insertMember(
|
||||
VPInst, IG->getIndex(Inst),
|
||||
Align(IG->isReverse() ? (-1) * int(IG->getFactor())
|
||||
: IG->getFactor()));
|
||||
}
|
||||
} else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) {
|
||||
visitRegion(Region, Old2New, IAI);
|
||||
} else {
|
||||
llvm_unreachable("Unsupported kind of VPBlock.");
|
||||
}
|
||||
}
|
||||
|
||||
VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
|
||||
InterleavedAccessInfo &IAI) {
|
||||
Old2NewTy Old2New;
|
||||
visitRegion(Plan.getVectorLoopRegion(), Old2New, IAI);
|
||||
}
|
||||
|
||||
VPInstruction *VPlanSlp::markFailed() {
|
||||
// FIXME: Currently this is used to signal we hit instructions we cannot
|
||||
// trivially SLP'ize.
|
||||
CompletelySLP = false;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void VPlanSlp::addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New) {
|
||||
if (all_of(Operands, [](VPValue *V) {
|
||||
return cast<VPInstruction>(V)->getUnderlyingInstr();
|
||||
})) {
|
||||
unsigned BundleSize = 0;
|
||||
for (VPValue *V : Operands) {
|
||||
Type *T = cast<VPInstruction>(V)->getUnderlyingInstr()->getType();
|
||||
assert(!T->isVectorTy() && "Only scalar types supported for now");
|
||||
BundleSize += T->getScalarSizeInBits();
|
||||
}
|
||||
WidestBundleBits = std::max(WidestBundleBits, BundleSize);
|
||||
}
|
||||
|
||||
auto Res = BundleToCombined.try_emplace(to_vector<4>(Operands), New);
|
||||
assert(Res.second &&
|
||||
"Already created a combined instruction for the operand bundle");
|
||||
(void)Res;
|
||||
}
|
||||
|
||||
bool VPlanSlp::areVectorizable(ArrayRef<VPValue *> Operands) const {
|
||||
// Currently we only support VPInstructions.
|
||||
if (!all_of(Operands, [](VPValue *Op) {
|
||||
return Op && isa<VPInstruction>(Op) &&
|
||||
cast<VPInstruction>(Op)->getUnderlyingInstr();
|
||||
})) {
|
||||
LLVM_DEBUG(dbgs() << "VPSLP: not all operands are VPInstructions\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if opcodes and type width agree for all instructions in the bundle.
|
||||
// FIXME: Differing widths/opcodes can be handled by inserting additional
|
||||
// instructions.
|
||||
// FIXME: Deal with non-primitive types.
|
||||
const Instruction *OriginalInstr =
|
||||
cast<VPInstruction>(Operands[0])->getUnderlyingInstr();
|
||||
unsigned Opcode = OriginalInstr->getOpcode();
|
||||
unsigned Width = OriginalInstr->getType()->getPrimitiveSizeInBits();
|
||||
if (!all_of(Operands, [Opcode, Width](VPValue *Op) {
|
||||
const Instruction *I = cast<VPInstruction>(Op)->getUnderlyingInstr();
|
||||
return I->getOpcode() == Opcode &&
|
||||
I->getType()->getPrimitiveSizeInBits() == Width;
|
||||
})) {
|
||||
LLVM_DEBUG(dbgs() << "VPSLP: Opcodes do not agree \n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// For now, all operands must be defined in the same BB.
|
||||
if (any_of(Operands, [this](VPValue *Op) {
|
||||
return cast<VPInstruction>(Op)->getParent() != &this->BB;
|
||||
})) {
|
||||
LLVM_DEBUG(dbgs() << "VPSLP: operands in different BBs\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (any_of(Operands,
|
||||
[](VPValue *Op) { return Op->hasMoreThanOneUniqueUser(); })) {
|
||||
LLVM_DEBUG(dbgs() << "VPSLP: Some operands have multiple users.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// For loads, check that there are no instructions writing to memory in
|
||||
// between them.
|
||||
// TODO: we only have to forbid instructions writing to memory that could
|
||||
// interfere with any of the loads in the bundle
|
||||
if (Opcode == Instruction::Load) {
|
||||
unsigned LoadsSeen = 0;
|
||||
VPBasicBlock *Parent = cast<VPInstruction>(Operands[0])->getParent();
|
||||
for (auto &I : make_range(Parent->getFirstNonPhi(), Parent->end())) {
|
||||
auto *VPI = dyn_cast<VPInstruction>(&I);
|
||||
if (!VPI)
|
||||
return false;
|
||||
if (VPI->getOpcode() == Instruction::Load &&
|
||||
llvm::is_contained(Operands, VPI))
|
||||
LoadsSeen++;
|
||||
|
||||
if (LoadsSeen == Operands.size())
|
||||
break;
|
||||
if (LoadsSeen > 0 && VPI->mayWriteToMemory()) {
|
||||
LLVM_DEBUG(
|
||||
dbgs() << "VPSLP: instruction modifying memory between loads\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!all_of(Operands, [](VPValue *Op) {
|
||||
return cast<LoadInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
|
||||
->isSimple();
|
||||
})) {
|
||||
LLVM_DEBUG(dbgs() << "VPSLP: only simple loads are supported.\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (Opcode == Instruction::Store)
|
||||
if (!all_of(Operands, [](VPValue *Op) {
|
||||
return cast<StoreInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
|
||||
->isSimple();
|
||||
})) {
|
||||
LLVM_DEBUG(dbgs() << "VPSLP: only simple stores are supported.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static SmallVector<VPValue *, 4> getOperands(ArrayRef<VPValue *> Values,
|
||||
unsigned OperandIndex) {
|
||||
SmallVector<VPValue *, 4> Operands;
|
||||
for (VPValue *V : Values) {
|
||||
// Currently we only support VPInstructions.
|
||||
auto *U = cast<VPInstruction>(V);
|
||||
Operands.push_back(U->getOperand(OperandIndex));
|
||||
}
|
||||
return Operands;
|
||||
}
|
||||
|
||||
static bool areCommutative(ArrayRef<VPValue *> Values) {
|
||||
return Instruction::isCommutative(
|
||||
cast<VPInstruction>(Values[0])->getOpcode());
|
||||
}
|
||||
|
||||
static SmallVector<SmallVector<VPValue *, 4>, 4>
|
||||
getOperands(ArrayRef<VPValue *> Values) {
|
||||
SmallVector<SmallVector<VPValue *, 4>, 4> Result;
|
||||
auto *VPI = cast<VPInstruction>(Values[0]);
|
||||
|
||||
switch (VPI->getOpcode()) {
|
||||
case Instruction::Load:
|
||||
llvm_unreachable("Loads terminate a tree, no need to get operands");
|
||||
case Instruction::Store:
|
||||
Result.push_back(getOperands(Values, 0));
|
||||
break;
|
||||
default:
|
||||
for (unsigned I = 0, NumOps = VPI->getNumOperands(); I < NumOps; ++I)
|
||||
Result.push_back(getOperands(Values, I));
|
||||
break;
|
||||
}
|
||||
|
||||
return Result;
|
||||
}
|
||||
|
||||
/// Returns the opcode of Values or ~0 if they do not all agree.
|
||||
static std::optional<unsigned> getOpcode(ArrayRef<VPValue *> Values) {
|
||||
unsigned Opcode = cast<VPInstruction>(Values[0])->getOpcode();
|
||||
if (any_of(Values, [Opcode](VPValue *V) {
|
||||
return cast<VPInstruction>(V)->getOpcode() != Opcode;
|
||||
}))
|
||||
return std::nullopt;
|
||||
return {Opcode};
|
||||
}
|
||||
|
||||
/// Returns true if A and B access sequential memory if they are loads or
|
||||
/// stores or if they have identical opcodes otherwise.
|
||||
static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B,
|
||||
VPInterleavedAccessInfo &IAI) {
|
||||
if (A->getOpcode() != B->getOpcode())
|
||||
return false;
|
||||
|
||||
if (A->getOpcode() != Instruction::Load &&
|
||||
A->getOpcode() != Instruction::Store)
|
||||
return true;
|
||||
auto *GA = IAI.getInterleaveGroup(A);
|
||||
auto *GB = IAI.getInterleaveGroup(B);
|
||||
|
||||
return GA && GB && GA == GB && GA->getIndex(A) + 1 == GB->getIndex(B);
|
||||
}
|
||||
|
||||
/// Implements getLAScore from Listing 7 in the paper.
|
||||
/// Traverses and compares operands of V1 and V2 to MaxLevel.
|
||||
static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel,
|
||||
VPInterleavedAccessInfo &IAI) {
|
||||
auto *I1 = dyn_cast<VPInstruction>(V1);
|
||||
auto *I2 = dyn_cast<VPInstruction>(V2);
|
||||
// Currently we only support VPInstructions.
|
||||
if (!I1 || !I2)
|
||||
return 0;
|
||||
|
||||
if (MaxLevel == 0)
|
||||
return (unsigned)areConsecutiveOrMatch(I1, I2, IAI);
|
||||
|
||||
unsigned Score = 0;
|
||||
for (unsigned I = 0, EV1 = I1->getNumOperands(); I < EV1; ++I)
|
||||
for (unsigned J = 0, EV2 = I2->getNumOperands(); J < EV2; ++J)
|
||||
Score +=
|
||||
getLAScore(I1->getOperand(I), I2->getOperand(J), MaxLevel - 1, IAI);
|
||||
return Score;
|
||||
}
|
||||
|
||||
std::pair<VPlanSlp::OpMode, VPValue *>
|
||||
VPlanSlp::getBest(OpMode Mode, VPValue *Last,
|
||||
SmallPtrSetImpl<VPValue *> &Candidates,
|
||||
VPInterleavedAccessInfo &IAI) {
|
||||
assert((Mode == OpMode::Load || Mode == OpMode::Opcode) &&
|
||||
"Currently we only handle load and commutative opcodes");
|
||||
LLVM_DEBUG(dbgs() << " getBest\n");
|
||||
|
||||
SmallVector<VPValue *, 4> BestCandidates;
|
||||
LLVM_DEBUG(dbgs() << " Candidates for "
|
||||
<< *cast<VPInstruction>(Last)->getUnderlyingInstr() << " ");
|
||||
for (auto *Candidate : Candidates) {
|
||||
auto *LastI = cast<VPInstruction>(Last);
|
||||
auto *CandidateI = cast<VPInstruction>(Candidate);
|
||||
if (areConsecutiveOrMatch(LastI, CandidateI, IAI)) {
|
||||
LLVM_DEBUG(dbgs() << *cast<VPInstruction>(Candidate)->getUnderlyingInstr()
|
||||
<< " ");
|
||||
BestCandidates.push_back(Candidate);
|
||||
}
|
||||
}
|
||||
LLVM_DEBUG(dbgs() << "\n");
|
||||
|
||||
if (BestCandidates.empty())
|
||||
return {OpMode::Failed, nullptr};
|
||||
|
||||
if (BestCandidates.size() == 1)
|
||||
return {Mode, BestCandidates[0]};
|
||||
|
||||
VPValue *Best = nullptr;
|
||||
unsigned BestScore = 0;
|
||||
for (unsigned Depth = 1; Depth < LookaheadMaxDepth; Depth++) {
|
||||
unsigned PrevScore = ~0u;
|
||||
bool AllSame = true;
|
||||
|
||||
// FIXME: Avoid visiting the same operands multiple times.
|
||||
for (auto *Candidate : BestCandidates) {
|
||||
unsigned Score = getLAScore(Last, Candidate, Depth, IAI);
|
||||
if (PrevScore == ~0u)
|
||||
PrevScore = Score;
|
||||
if (PrevScore != Score)
|
||||
AllSame = false;
|
||||
PrevScore = Score;
|
||||
|
||||
if (Score > BestScore) {
|
||||
BestScore = Score;
|
||||
Best = Candidate;
|
||||
}
|
||||
}
|
||||
if (!AllSame)
|
||||
break;
|
||||
}
|
||||
LLVM_DEBUG(dbgs() << "Found best "
|
||||
<< *cast<VPInstruction>(Best)->getUnderlyingInstr()
|
||||
<< "\n");
|
||||
Candidates.erase(Best);
|
||||
|
||||
return {Mode, Best};
|
||||
}
|
||||
|
||||
SmallVector<VPlanSlp::MultiNodeOpTy, 4> VPlanSlp::reorderMultiNodeOps() {
|
||||
SmallVector<MultiNodeOpTy, 4> FinalOrder;
|
||||
SmallVector<OpMode, 4> Mode;
|
||||
FinalOrder.reserve(MultiNodeOps.size());
|
||||
Mode.reserve(MultiNodeOps.size());
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Reordering multinode\n");
|
||||
|
||||
for (auto &Operands : MultiNodeOps) {
|
||||
FinalOrder.push_back({Operands.first, {Operands.second[0]}});
|
||||
if (cast<VPInstruction>(Operands.second[0])->getOpcode() ==
|
||||
Instruction::Load)
|
||||
Mode.push_back(OpMode::Load);
|
||||
else
|
||||
Mode.push_back(OpMode::Opcode);
|
||||
}
|
||||
|
||||
for (unsigned Lane = 1, E = MultiNodeOps[0].second.size(); Lane < E; ++Lane) {
|
||||
LLVM_DEBUG(dbgs() << " Finding best value for lane " << Lane << "\n");
|
||||
SmallPtrSet<VPValue *, 4> Candidates;
|
||||
LLVM_DEBUG(dbgs() << " Candidates ");
|
||||
for (auto Ops : MultiNodeOps) {
|
||||
LLVM_DEBUG(
|
||||
dbgs() << *cast<VPInstruction>(Ops.second[Lane])->getUnderlyingInstr()
|
||||
<< " ");
|
||||
Candidates.insert(Ops.second[Lane]);
|
||||
}
|
||||
LLVM_DEBUG(dbgs() << "\n");
|
||||
|
||||
for (unsigned Op = 0, E = MultiNodeOps.size(); Op < E; ++Op) {
|
||||
LLVM_DEBUG(dbgs() << " Checking " << Op << "\n");
|
||||
if (Mode[Op] == OpMode::Failed)
|
||||
continue;
|
||||
|
||||
VPValue *Last = FinalOrder[Op].second[Lane - 1];
|
||||
std::pair<OpMode, VPValue *> Res =
|
||||
getBest(Mode[Op], Last, Candidates, IAI);
|
||||
if (Res.second)
|
||||
FinalOrder[Op].second.push_back(Res.second);
|
||||
else
|
||||
// TODO: handle this case
|
||||
FinalOrder[Op].second.push_back(markFailed());
|
||||
}
|
||||
}
|
||||
|
||||
return FinalOrder;
|
||||
}
|
||||
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
void VPlanSlp::dumpBundle(ArrayRef<VPValue *> Values) {
|
||||
dbgs() << " Ops: ";
|
||||
for (auto *Op : Values) {
|
||||
if (auto *VPInstr = cast_or_null<VPInstruction>(Op))
|
||||
if (auto *Instr = VPInstr->getUnderlyingInstr()) {
|
||||
dbgs() << *Instr << " | ";
|
||||
continue;
|
||||
}
|
||||
dbgs() << " nullptr | ";
|
||||
}
|
||||
dbgs() << "\n";
|
||||
}
|
||||
#endif
|
||||
|
||||
VPInstruction *VPlanSlp::buildGraph(ArrayRef<VPValue *> Values) {
|
||||
assert(!Values.empty() && "Need some operands!");
|
||||
|
||||
// If we already visited this instruction bundle, re-use the existing node
|
||||
auto I = BundleToCombined.find(to_vector<4>(Values));
|
||||
if (I != BundleToCombined.end()) {
|
||||
#ifndef NDEBUG
|
||||
// Check that the resulting graph is a tree. If we re-use a node, this means
|
||||
// its values have multiple users. We only allow this, if all users of each
|
||||
// value are the same instruction.
|
||||
for (auto *V : Values) {
|
||||
auto UI = V->user_begin();
|
||||
auto *FirstUser = *UI++;
|
||||
while (UI != V->user_end()) {
|
||||
assert(*UI == FirstUser && "Currently we only support SLP trees.");
|
||||
UI++;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return I->second;
|
||||
}
|
||||
|
||||
// Dump inputs
|
||||
LLVM_DEBUG({
|
||||
dbgs() << "buildGraph: ";
|
||||
dumpBundle(Values);
|
||||
});
|
||||
|
||||
if (!areVectorizable(Values))
|
||||
return markFailed();
|
||||
|
||||
assert(getOpcode(Values) && "Opcodes for all values must match");
|
||||
unsigned ValuesOpcode = *getOpcode(Values);
|
||||
|
||||
SmallVector<VPValue *, 4> CombinedOperands;
|
||||
if (areCommutative(Values)) {
|
||||
bool MultiNodeRoot = !MultiNodeActive;
|
||||
MultiNodeActive = true;
|
||||
for (auto &Operands : getOperands(Values)) {
|
||||
LLVM_DEBUG({
|
||||
dbgs() << " Visiting Commutative";
|
||||
dumpBundle(Operands);
|
||||
});
|
||||
|
||||
auto OperandsOpcode = getOpcode(Operands);
|
||||
if (OperandsOpcode && OperandsOpcode == getOpcode(Values)) {
|
||||
LLVM_DEBUG(dbgs() << " Same opcode, continue building\n");
|
||||
CombinedOperands.push_back(buildGraph(Operands));
|
||||
} else {
|
||||
LLVM_DEBUG(dbgs() << " Adding multinode Ops\n");
|
||||
// Create dummy VPInstruction, which will we replace later by the
|
||||
// re-ordered operand.
|
||||
VPInstruction *Op =
|
||||
new VPInstruction(VPInstruction::Broadcast, {Values[0]});
|
||||
CombinedOperands.push_back(Op);
|
||||
MultiNodeOps.emplace_back(Op, Operands);
|
||||
}
|
||||
}
|
||||
|
||||
if (MultiNodeRoot) {
|
||||
LLVM_DEBUG(dbgs() << "Reorder \n");
|
||||
MultiNodeActive = false;
|
||||
|
||||
auto FinalOrder = reorderMultiNodeOps();
|
||||
|
||||
MultiNodeOps.clear();
|
||||
for (auto &Ops : FinalOrder) {
|
||||
VPInstruction *NewOp = buildGraph(Ops.second);
|
||||
Ops.first->replaceAllUsesWith(NewOp);
|
||||
for (unsigned i = 0; i < CombinedOperands.size(); i++)
|
||||
if (CombinedOperands[i] == Ops.first)
|
||||
CombinedOperands[i] = NewOp;
|
||||
delete Ops.first;
|
||||
Ops.first = NewOp;
|
||||
}
|
||||
LLVM_DEBUG(dbgs() << "Found final order\n");
|
||||
}
|
||||
} else {
|
||||
LLVM_DEBUG(dbgs() << " NonCommuntative\n");
|
||||
if (ValuesOpcode == Instruction::Load)
|
||||
for (VPValue *V : Values)
|
||||
CombinedOperands.push_back(cast<VPInstruction>(V)->getOperand(0));
|
||||
else
|
||||
for (auto &Operands : getOperands(Values))
|
||||
CombinedOperands.push_back(buildGraph(Operands));
|
||||
}
|
||||
|
||||
unsigned Opcode;
|
||||
switch (ValuesOpcode) {
|
||||
case Instruction::Load:
|
||||
Opcode = VPInstruction::SLPLoad;
|
||||
break;
|
||||
case Instruction::Store:
|
||||
Opcode = VPInstruction::SLPStore;
|
||||
break;
|
||||
default:
|
||||
Opcode = ValuesOpcode;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!CompletelySLP)
|
||||
return markFailed();
|
||||
|
||||
assert(CombinedOperands.size() > 0 && "Need more some operands");
|
||||
auto *Inst = cast<VPInstruction>(Values[0])->getUnderlyingInstr();
|
||||
auto *VPI = new VPInstruction(Opcode, CombinedOperands,
|
||||
VPIRFlags::getDefaultFlags(Opcode), {},
|
||||
Inst->getDebugLoc());
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Create VPInstruction " << *VPI << " " << Values[0]
|
||||
<< "\n");
|
||||
addCombined(Values, VPI);
|
||||
return VPI;
|
||||
}
|
||||
@@ -1,145 +0,0 @@
|
||||
//===- VPlan.h - VPlan-based SLP ------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// This file contains the declarations for VPlan-based SLP.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANSLP_H
|
||||
#define LLVM_TRANSFORMS_VECTORIZE_VPLANSLP_H
|
||||
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/Analysis/VectorUtils.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class VPBasicBlock;
|
||||
class VPBlockBase;
|
||||
class VPRegionBlock;
|
||||
class VPlan;
|
||||
class VPValue;
|
||||
class VPInstruction;
|
||||
|
||||
class VPInterleavedAccessInfo {
|
||||
DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *>
|
||||
InterleaveGroupMap;
|
||||
|
||||
/// Type for mapping of instruction based interleave groups to VPInstruction
|
||||
/// interleave groups
|
||||
using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *,
|
||||
InterleaveGroup<VPInstruction> *>;
|
||||
|
||||
/// Recursively \p Region and populate VPlan based interleave groups based on
|
||||
/// \p IAI.
|
||||
void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
|
||||
InterleavedAccessInfo &IAI);
|
||||
/// Recursively traverse \p Block and populate VPlan based interleave groups
|
||||
/// based on \p IAI.
|
||||
void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
|
||||
InterleavedAccessInfo &IAI);
|
||||
|
||||
public:
|
||||
LLVM_ABI_FOR_TEST VPInterleavedAccessInfo(VPlan &Plan,
|
||||
InterleavedAccessInfo &IAI);
|
||||
VPInterleavedAccessInfo(const VPInterleavedAccessInfo &) = delete;
|
||||
VPInterleavedAccessInfo &operator=(const VPInterleavedAccessInfo &) = delete;
|
||||
|
||||
~VPInterleavedAccessInfo() {
|
||||
// Avoid releasing a pointer twice.
|
||||
SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet(
|
||||
llvm::from_range, llvm::make_second_range(InterleaveGroupMap));
|
||||
for (auto *Ptr : DelSet)
|
||||
delete Ptr;
|
||||
}
|
||||
|
||||
/// Get the interleave group that \p Instr belongs to.
|
||||
///
|
||||
/// \returns nullptr if doesn't have such group.
|
||||
InterleaveGroup<VPInstruction> *
|
||||
getInterleaveGroup(VPInstruction *Instr) const {
|
||||
return InterleaveGroupMap.lookup(Instr);
|
||||
}
|
||||
};
|
||||
|
||||
/// Class that maps (parts of) an existing VPlan to trees of combined
|
||||
/// VPInstructions.
|
||||
class VPlanSlp {
|
||||
enum class OpMode { Failed, Load, Opcode };
|
||||
|
||||
/// Mapping of values in the original VPlan to a combined VPInstruction.
|
||||
DenseMap<SmallVector<VPValue *, 4>, VPInstruction *> BundleToCombined;
|
||||
|
||||
VPInterleavedAccessInfo &IAI;
|
||||
|
||||
/// Basic block to operate on. For now, only instructions in a single BB are
|
||||
/// considered.
|
||||
const VPBasicBlock &BB;
|
||||
|
||||
/// Indicates whether we managed to combine all visited instructions or not.
|
||||
bool CompletelySLP = true;
|
||||
|
||||
/// Width of the widest combined bundle in bits.
|
||||
unsigned WidestBundleBits = 0;
|
||||
|
||||
using MultiNodeOpTy = std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
|
||||
|
||||
// Input operand bundles for the current multi node. Each multi node operand
|
||||
// bundle contains values not matching the multi node's opcode. They will
|
||||
// be reordered in reorderMultiNodeOps, once we completed building a
|
||||
// multi node.
|
||||
SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
|
||||
|
||||
/// Indicates whether we are building a multi node currently.
|
||||
bool MultiNodeActive = false;
|
||||
|
||||
/// Check if we can vectorize Operands together.
|
||||
bool areVectorizable(ArrayRef<VPValue *> Operands) const;
|
||||
|
||||
/// Add combined instruction \p New for the bundle \p Operands.
|
||||
void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
|
||||
|
||||
/// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
|
||||
VPInstruction *markFailed();
|
||||
|
||||
/// Reorder operands in the multi node to maximize sequential memory access
|
||||
/// and commutative operations.
|
||||
SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
|
||||
|
||||
/// Choose the best candidate to use for the lane after \p Last. The set of
|
||||
/// candidates to choose from are values with an opcode matching \p Last's
|
||||
/// or loads consecutive to \p Last.
|
||||
std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
|
||||
SmallPtrSetImpl<VPValue *> &Candidates,
|
||||
VPInterleavedAccessInfo &IAI);
|
||||
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
/// Print bundle \p Values to dbgs().
|
||||
void dumpBundle(ArrayRef<VPValue *> Values);
|
||||
#endif
|
||||
|
||||
public:
|
||||
VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
|
||||
|
||||
~VPlanSlp() = default;
|
||||
|
||||
/// Tries to build an SLP tree rooted at \p Operands and returns a
|
||||
/// VPInstruction combining \p Operands, if they can be combined.
|
||||
LLVM_ABI_FOR_TEST VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);
|
||||
|
||||
/// Return the width of the widest combined bundle in bits.
|
||||
unsigned getWidestBundleBits() const { return WidestBundleBits; }
|
||||
|
||||
/// Return true if all visited instruction can be combined.
|
||||
bool isCompletelySLP() const { return CompletelySLP; }
|
||||
};
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
|
||||
@@ -14,7 +14,6 @@ add_llvm_unittest(VectorizeTests
|
||||
VPPostDomFrontierTest.cpp
|
||||
VPlanHCFGTest.cpp
|
||||
VPlanPatternMatchTest.cpp
|
||||
VPlanSlpTest.cpp
|
||||
VPlanUncountableExitTest.cpp
|
||||
VPlanVerifierTest.cpp
|
||||
)
|
||||
|
||||
@@ -1,896 +0,0 @@
|
||||
//===- llvm/unittest/Transforms/Vectorize/VPlanSlpTest.cpp ---------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "../lib/Transforms/Vectorize/VPlanSLP.h"
|
||||
#include "../lib/Transforms/Vectorize/VPlan.h"
|
||||
#include "VPlanTestBase.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/Analysis/VectorUtils.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace {
|
||||
|
||||
class VPlanSlpTest : public VPlanTestIRBase {
|
||||
protected:
|
||||
DataLayout DL;
|
||||
|
||||
std::unique_ptr<AssumptionCache> AC;
|
||||
std::unique_ptr<ScalarEvolution> SE;
|
||||
std::unique_ptr<AAResults> AARes;
|
||||
std::unique_ptr<BasicAAResult> BasicAA;
|
||||
std::unique_ptr<LoopAccessInfo> LAI;
|
||||
std::unique_ptr<PredicatedScalarEvolution> PSE;
|
||||
std::unique_ptr<InterleavedAccessInfo> IAI;
|
||||
|
||||
VPlanSlpTest()
|
||||
: DL("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-"
|
||||
"f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:"
|
||||
"16:32:64-S128") {}
|
||||
|
||||
VPInterleavedAccessInfo getInterleavedAccessInfo(Function &F, Loop *L,
|
||||
VPlan &Plan) {
|
||||
AC.reset(new AssumptionCache(F));
|
||||
SE.reset(new ScalarEvolution(F, *TLI, *AC, *DT, *LI));
|
||||
BasicAA.reset(new BasicAAResult(DL, F, *TLI, *AC, &*DT));
|
||||
AARes.reset(new AAResults(*TLI));
|
||||
AARes->addAAResult(*BasicAA);
|
||||
PSE.reset(new PredicatedScalarEvolution(*SE, *L));
|
||||
LAI.reset(
|
||||
new LoopAccessInfo(L, &*SE, nullptr, &*TLI, &*AARes, &*DT, &*LI, &*AC));
|
||||
IAI.reset(new InterleavedAccessInfo(*PSE, L, &*DT, &*LI, &*LAI));
|
||||
IAI->analyzeInterleaving(false);
|
||||
return {Plan, *IAI};
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(VPlanSlpTest, testSlpSimple_2) {
|
||||
const char *ModuleString =
|
||||
"%struct.Test = type { i32, i32 }\n"
|
||||
"%struct.Test3 = type { i32, i32, i32 }\n"
|
||||
"%struct.Test4xi8 = type { i8, i8, i8 }\n"
|
||||
"define void @add_x2(ptr nocapture readonly %A, ptr "
|
||||
"nocapture readonly %B, ptr nocapture %C) {\n"
|
||||
"entry:\n"
|
||||
" br label %for.body\n"
|
||||
"for.body: ; preds = %for.body, "
|
||||
"%entry\n"
|
||||
" %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"
|
||||
" %A0 = getelementptr inbounds %struct.Test, ptr %A, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vA0 = load i32, ptr %A0, align 4\n"
|
||||
" %B0 = getelementptr inbounds %struct.Test, ptr %B, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vB0 = load i32, ptr %B0, align 4\n"
|
||||
" %add0 = add nsw i32 %vA0, %vB0\n"
|
||||
" %A1 = getelementptr inbounds %struct.Test, ptr %A, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vA1 = load i32, ptr %A1, align 4\n"
|
||||
" %B1 = getelementptr inbounds %struct.Test, ptr %B, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vB1 = load i32, ptr %B1, align 4\n"
|
||||
" %add1 = add nsw i32 %vA1, %vB1\n"
|
||||
" %C0 = getelementptr inbounds %struct.Test, ptr %C, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" store i32 %add0, ptr %C0, align 4\n"
|
||||
" %C1 = getelementptr inbounds %struct.Test, ptr %C, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" store i32 %add1, ptr %C1, align 4\n"
|
||||
" %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
|
||||
" %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
|
||||
" br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
|
||||
"for.cond.cleanup: ; preds = %for.body\n"
|
||||
" ret void\n"
|
||||
"}\n";
|
||||
|
||||
Module &M = parseModule(ModuleString);
|
||||
|
||||
Function *F = M.getFunction("add_x2");
|
||||
BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
|
||||
auto Plan = buildVPlan(LoopHeader);
|
||||
auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);
|
||||
|
||||
VPBasicBlock *Body = Plan->getVectorLoopRegion()->getEntryBasicBlock();
|
||||
|
||||
VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 12));
|
||||
VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 14));
|
||||
|
||||
VPlanSlp Slp(VPIAI, *Body);
|
||||
SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
|
||||
VPInstruction *CombinedStore = Slp.buildGraph(StoreRoot);
|
||||
EXPECT_EQ(64u, Slp.getWidestBundleBits());
|
||||
EXPECT_EQ(VPInstruction::SLPStore, CombinedStore->getOpcode());
|
||||
|
||||
auto *CombinedAdd = cast<VPInstruction>(CombinedStore->getOperand(0));
|
||||
EXPECT_EQ(Instruction::Add, CombinedAdd->getOpcode());
|
||||
|
||||
auto *CombinedLoadA = cast<VPInstruction>(CombinedAdd->getOperand(0));
|
||||
auto *CombinedLoadB = cast<VPInstruction>(CombinedAdd->getOperand(1));
|
||||
EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadA->getOpcode());
|
||||
EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadB->getOpcode());
|
||||
|
||||
delete CombinedStore;
|
||||
delete CombinedAdd;
|
||||
delete CombinedLoadA;
|
||||
delete CombinedLoadB;
|
||||
}
|
||||
|
||||
TEST_F(VPlanSlpTest, testSlpSimple_3) {
|
||||
const char *ModuleString =
|
||||
"%struct.Test = type { i32, i32 }\n"
|
||||
"%struct.Test3 = type { i32, i32, i32 }\n"
|
||||
"%struct.Test4xi8 = type { i8, i8, i8 }\n"
|
||||
"define void @add_x2(ptr nocapture readonly %A, ptr "
|
||||
"nocapture readonly %B, ptr nocapture %C) {\n"
|
||||
"entry:\n"
|
||||
" br label %for.body\n"
|
||||
"for.body: ; preds = %for.body, "
|
||||
"%entry\n"
|
||||
" %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"
|
||||
" %A0 = getelementptr %struct.Test, ptr %A, i64 "
|
||||
" %indvars.iv, i32 0\n"
|
||||
" %vA0 = load i32, ptr %A0, align 4\n"
|
||||
" %B0 = getelementptr inbounds %struct.Test, ptr %B, i64 "
|
||||
" %indvars.iv, i32 0\n"
|
||||
" %vB0 = load i32, ptr %B0, align 4\n"
|
||||
" %add0 = add nsw i32 %vA0, %vB0\n"
|
||||
" %A1 = getelementptr inbounds %struct.Test, ptr %A, i64 "
|
||||
" %indvars.iv, i32 1\n"
|
||||
" %vA1 = load i32, ptr %A1, align 4\n"
|
||||
" %B1 = getelementptr inbounds %struct.Test, ptr %B, i64 "
|
||||
" %indvars.iv, i32 1\n"
|
||||
" %vB1 = load i32, ptr %B1, align 4\n"
|
||||
" %add1 = add nsw i32 %vA1, %vB1\n"
|
||||
" %C0 = getelementptr inbounds %struct.Test, ptr %C, i64 "
|
||||
" %indvars.iv, i32 0\n"
|
||||
" store i32 %add0, ptr %C0, align 4\n"
|
||||
" %C1 = getelementptr inbounds %struct.Test, ptr %C, i64 "
|
||||
" %indvars.iv, i32 1\n"
|
||||
" store i32 %add1, ptr %C1, align 4\n"
|
||||
" %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
|
||||
" %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
|
||||
" br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
|
||||
"for.cond.cleanup: ; preds = %for.body\n"
|
||||
" ret void\n"
|
||||
"}\n";
|
||||
|
||||
Module &M = parseModule(ModuleString);
|
||||
|
||||
Function *F = M.getFunction("add_x2");
|
||||
BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
|
||||
auto Plan = buildVPlan(LoopHeader);
|
||||
|
||||
VPBasicBlock *Body = Plan->getVectorLoopRegion()->getEntryBasicBlock();
|
||||
|
||||
VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 12));
|
||||
VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 14));
|
||||
|
||||
auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);
|
||||
|
||||
VPlanSlp Slp(VPIAI, *Body);
|
||||
SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
|
||||
VPInstruction *CombinedStore = Slp.buildGraph(StoreRoot);
|
||||
EXPECT_EQ(64u, Slp.getWidestBundleBits());
|
||||
EXPECT_EQ(VPInstruction::SLPStore, CombinedStore->getOpcode());
|
||||
|
||||
auto *CombinedAdd = cast<VPInstruction>(CombinedStore->getOperand(0));
|
||||
EXPECT_EQ(Instruction::Add, CombinedAdd->getOpcode());
|
||||
|
||||
auto *CombinedLoadA = cast<VPInstruction>(CombinedAdd->getOperand(0));
|
||||
auto *CombinedLoadB = cast<VPInstruction>(CombinedAdd->getOperand(1));
|
||||
EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadA->getOpcode());
|
||||
EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadB->getOpcode());
|
||||
|
||||
VPInstruction *GetA = cast<VPInstruction>(&*std::next(Body->begin(), 1));
|
||||
VPInstruction *GetB = cast<VPInstruction>(&*std::next(Body->begin(), 3));
|
||||
EXPECT_EQ(GetA, CombinedLoadA->getOperand(0));
|
||||
EXPECT_EQ(GetB, CombinedLoadB->getOperand(0));
|
||||
|
||||
delete CombinedStore;
|
||||
delete CombinedAdd;
|
||||
delete CombinedLoadA;
|
||||
delete CombinedLoadB;
|
||||
}
|
||||
|
||||
TEST_F(VPlanSlpTest, testSlpReuse_1) {
|
||||
const char *ModuleString =
|
||||
"%struct.Test = type { i32, i32 }\n"
|
||||
"define void @add_x2(ptr nocapture readonly %A, ptr "
|
||||
"nocapture readonly %B, ptr nocapture %C) {\n"
|
||||
"entry:\n"
|
||||
" br label %for.body\n"
|
||||
"for.body: ; preds = %for.body, "
|
||||
"%entry\n"
|
||||
" %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"
|
||||
" %A0 = getelementptr inbounds %struct.Test, ptr %A, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vA0 = load i32, ptr %A0, align 4\n"
|
||||
" %add0 = add nsw i32 %vA0, %vA0\n"
|
||||
" %A1 = getelementptr inbounds %struct.Test, ptr %A, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vA1 = load i32, ptr %A1, align 4\n"
|
||||
" %add1 = add nsw i32 %vA1, %vA1\n"
|
||||
" %C0 = getelementptr inbounds %struct.Test, ptr %C, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" store i32 %add0, ptr %C0, align 4\n"
|
||||
" %C1 = getelementptr inbounds %struct.Test, ptr %C, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" store i32 %add1, ptr %C1, align 4\n"
|
||||
" %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
|
||||
" %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
|
||||
" br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
|
||||
"for.cond.cleanup: ; preds = %for.body\n"
|
||||
" ret void\n"
|
||||
"}\n";
|
||||
|
||||
Module &M = parseModule(ModuleString);
|
||||
|
||||
Function *F = M.getFunction("add_x2");
|
||||
BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
|
||||
auto Plan = buildVPlan(LoopHeader);
|
||||
auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);
|
||||
|
||||
VPBasicBlock *Body = Plan->getVectorLoopRegion()->getEntryBasicBlock();
|
||||
|
||||
VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 8));
|
||||
VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 10));
|
||||
|
||||
VPlanSlp Slp(VPIAI, *Body);
|
||||
SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
|
||||
VPInstruction *CombinedStore = Slp.buildGraph(StoreRoot);
|
||||
EXPECT_EQ(64u, Slp.getWidestBundleBits());
|
||||
EXPECT_EQ(VPInstruction::SLPStore, CombinedStore->getOpcode());
|
||||
|
||||
auto *CombinedAdd = cast<VPInstruction>(CombinedStore->getOperand(0));
|
||||
EXPECT_EQ(Instruction::Add, CombinedAdd->getOpcode());
|
||||
|
||||
auto *CombinedLoadA = cast<VPInstruction>(CombinedAdd->getOperand(0));
|
||||
EXPECT_EQ(CombinedLoadA, CombinedAdd->getOperand(1));
|
||||
EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadA->getOpcode());
|
||||
|
||||
delete CombinedStore;
|
||||
delete CombinedAdd;
|
||||
delete CombinedLoadA;
|
||||
}
|
||||
|
||||
TEST_F(VPlanSlpTest, testSlpReuse_2) {
|
||||
const char *ModuleString =
|
||||
"%struct.Test = type { i32, i32 }\n"
|
||||
"define void @add_x2(ptr nocapture readonly %A, ptr "
|
||||
"nocapture readonly %B, ptr nocapture %C) {\n"
|
||||
"entry:\n"
|
||||
" br label %for.body\n"
|
||||
"for.body: ; preds = %for.body, "
|
||||
"%entry\n"
|
||||
" %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"
|
||||
" %A0 = getelementptr inbounds %struct.Test, ptr %A, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vA0 = load i32, ptr %A0, align 4\n"
|
||||
" %add0 = add nsw i32 %vA0, %vA0\n"
|
||||
" %C0 = getelementptr inbounds %struct.Test, ptr %C, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" store i32 %add0, ptr %C0, align 4\n"
|
||||
" %A1 = getelementptr inbounds %struct.Test, ptr %A, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vA1 = load i32, ptr %A1, align 4\n"
|
||||
" %add1 = add nsw i32 %vA1, %vA1\n"
|
||||
" %C1 = getelementptr inbounds %struct.Test, ptr %C, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" store i32 %add1, ptr %C1, align 4\n"
|
||||
" %use = add i32 %vA1, 1\n"
|
||||
" %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
|
||||
" %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
|
||||
" br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
|
||||
"for.cond.cleanup: ; preds = %for.body\n"
|
||||
" ret void\n"
|
||||
"}\n";
|
||||
|
||||
Module &M = parseModule(ModuleString);
|
||||
|
||||
Function *F = M.getFunction("add_x2");
|
||||
BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
|
||||
auto Plan = buildVPlan(LoopHeader);
|
||||
auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);
|
||||
|
||||
VPBasicBlock *Body = Plan->getVectorLoopRegion()->getEntryBasicBlock();
|
||||
|
||||
VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 5));
|
||||
VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 10));
|
||||
|
||||
VPlanSlp Slp(VPIAI, *Body);
|
||||
SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
|
||||
Slp.buildGraph(StoreRoot);
|
||||
EXPECT_FALSE(Slp.isCompletelySLP());
|
||||
}
|
||||
|
||||
static void checkReorderExample(VPInstruction *Store1, VPInstruction *Store2,
|
||||
VPBasicBlock *Body,
|
||||
VPInterleavedAccessInfo &&IAI) {
|
||||
VPlanSlp Slp(IAI, *Body);
|
||||
SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
|
||||
VPInstruction *CombinedStore = Slp.buildGraph(StoreRoot);
|
||||
|
||||
EXPECT_TRUE(Slp.isCompletelySLP());
|
||||
EXPECT_EQ(CombinedStore->getOpcode(), VPInstruction::SLPStore);
|
||||
|
||||
VPInstruction *CombinedAdd =
|
||||
cast<VPInstruction>(CombinedStore->getOperand(0));
|
||||
EXPECT_EQ(CombinedAdd->getOpcode(), Instruction::Add);
|
||||
|
||||
VPInstruction *CombinedMulAB =
|
||||
cast<VPInstruction>(CombinedAdd->getOperand(0));
|
||||
VPInstruction *CombinedMulCD =
|
||||
cast<VPInstruction>(CombinedAdd->getOperand(1));
|
||||
EXPECT_EQ(CombinedMulAB->getOpcode(), Instruction::Mul);
|
||||
|
||||
VPInstruction *CombinedLoadA =
|
||||
cast<VPInstruction>(CombinedMulAB->getOperand(0));
|
||||
EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadA->getOpcode());
|
||||
VPInstruction *LoadvA0 = cast<VPInstruction>(&*std::next(Body->begin(), 2));
|
||||
VPInstruction *LoadvA1 = cast<VPInstruction>(&*std::next(Body->begin(), 12));
|
||||
EXPECT_EQ(LoadvA0->getOperand(0), CombinedLoadA->getOperand(0));
|
||||
EXPECT_EQ(LoadvA1->getOperand(0), CombinedLoadA->getOperand(1));
|
||||
|
||||
VPInstruction *CombinedLoadB =
|
||||
cast<VPInstruction>(CombinedMulAB->getOperand(1));
|
||||
EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadB->getOpcode());
|
||||
VPInstruction *LoadvB0 = cast<VPInstruction>(&*std::next(Body->begin(), 4));
|
||||
VPInstruction *LoadvB1 = cast<VPInstruction>(&*std::next(Body->begin(), 14));
|
||||
EXPECT_EQ(LoadvB0->getOperand(0), CombinedLoadB->getOperand(0));
|
||||
EXPECT_EQ(LoadvB1->getOperand(0), CombinedLoadB->getOperand(1));
|
||||
|
||||
EXPECT_EQ(CombinedMulCD->getOpcode(), Instruction::Mul);
|
||||
|
||||
VPInstruction *CombinedLoadC =
|
||||
cast<VPInstruction>(CombinedMulCD->getOperand(0));
|
||||
EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadC->getOpcode());
|
||||
VPInstruction *LoadvC0 = cast<VPInstruction>(&*std::next(Body->begin(), 7));
|
||||
VPInstruction *LoadvC1 = cast<VPInstruction>(&*std::next(Body->begin(), 17));
|
||||
EXPECT_EQ(LoadvC0->getOperand(0), CombinedLoadC->getOperand(0));
|
||||
EXPECT_EQ(LoadvC1->getOperand(0), CombinedLoadC->getOperand(1));
|
||||
|
||||
VPInstruction *CombinedLoadD =
|
||||
cast<VPInstruction>(CombinedMulCD->getOperand(1));
|
||||
EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadD->getOpcode());
|
||||
VPInstruction *LoadvD0 = cast<VPInstruction>(&*std::next(Body->begin(), 9));
|
||||
VPInstruction *LoadvD1 = cast<VPInstruction>(&*std::next(Body->begin(), 19));
|
||||
EXPECT_EQ(LoadvD0->getOperand(0), CombinedLoadD->getOperand(0));
|
||||
EXPECT_EQ(LoadvD1->getOperand(0), CombinedLoadD->getOperand(1));
|
||||
|
||||
delete CombinedStore;
|
||||
delete CombinedAdd;
|
||||
delete CombinedMulAB;
|
||||
delete CombinedMulCD;
|
||||
delete CombinedLoadA;
|
||||
delete CombinedLoadB;
|
||||
delete CombinedLoadC;
|
||||
delete CombinedLoadD;
|
||||
}
|
||||
|
||||
TEST_F(VPlanSlpTest, testSlpReorder_1) {
|
||||
LLVMContext Ctx;
|
||||
const char *ModuleString =
|
||||
"%struct.Test = type { i32, i32 }\n"
|
||||
"define void @add_x3(ptr %A, ptr %B, ptr "
|
||||
"%C, ptr %D, ptr %E) {\n"
|
||||
"entry:\n"
|
||||
" br label %for.body\n"
|
||||
"for.body: ; preds = %for.body, "
|
||||
"%entry\n"
|
||||
" %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"
|
||||
" %A0 = getelementptr inbounds %struct.Test, ptr %A, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vA0 = load i32, ptr %A0, align 4\n"
|
||||
" %B0 = getelementptr inbounds %struct.Test, ptr %B, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vB0 = load i32, ptr %B0, align 4\n"
|
||||
" %mul11 = mul nsw i32 %vA0, %vB0\n"
|
||||
" %C0 = getelementptr inbounds %struct.Test, ptr %C, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vC0 = load i32, ptr %C0, align 4\n"
|
||||
" %D0 = getelementptr inbounds %struct.Test, ptr %D, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vD0 = load i32, ptr %D0, align 4\n"
|
||||
" %mul12 = mul nsw i32 %vC0, %vD0\n"
|
||||
" %A1 = getelementptr inbounds %struct.Test, ptr %A, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vA1 = load i32, ptr %A1, align 4\n"
|
||||
" %B1 = getelementptr inbounds %struct.Test, ptr %B, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vB1 = load i32, ptr %B1, align 4\n"
|
||||
" %mul21 = mul nsw i32 %vA1, %vB1\n"
|
||||
" %C1 = getelementptr inbounds %struct.Test, ptr %C, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vC1 = load i32, ptr %C1, align 4\n"
|
||||
" %D1 = getelementptr inbounds %struct.Test, ptr %D, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vD1 = load i32, ptr %D1, align 4\n"
|
||||
" %mul22 = mul nsw i32 %vC1, %vD1\n"
|
||||
" %add1 = add nsw i32 %mul11, %mul12\n"
|
||||
" %add2 = add nsw i32 %mul22, %mul21\n"
|
||||
" %E0 = getelementptr inbounds %struct.Test, ptr %E, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" store i32 %add1, ptr %E0, align 4\n"
|
||||
" %E1 = getelementptr inbounds %struct.Test, ptr %E, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" store i32 %add2, ptr %E1, align 4\n"
|
||||
" %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
|
||||
" %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
|
||||
" br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
|
||||
"for.cond.cleanup: ; preds = %for.body\n"
|
||||
" ret void\n"
|
||||
"}\n";
|
||||
|
||||
Module &M = parseModule(ModuleString);
|
||||
|
||||
Function *F = M.getFunction("add_x3");
|
||||
BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
|
||||
auto Plan = buildVPlan(LoopHeader);
|
||||
|
||||
VPBasicBlock *Body = Plan->getVectorLoopRegion()->getEntryBasicBlock();
|
||||
|
||||
VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 24));
|
||||
VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 26));
|
||||
|
||||
checkReorderExample(
|
||||
Store1, Store2, Body,
|
||||
getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan));
|
||||
}
|
||||
|
||||
TEST_F(VPlanSlpTest, testSlpReorder_2) {
|
||||
LLVMContext Ctx;
|
||||
const char *ModuleString =
|
||||
"%struct.Test = type { i32, i32 }\n"
|
||||
"define void @add_x3(ptr %A, ptr %B, ptr "
|
||||
"%C, ptr %D, ptr %E) {\n"
|
||||
"entry:\n"
|
||||
" br label %for.body\n"
|
||||
"for.body: ; preds = %for.body, "
|
||||
"%entry\n"
|
||||
" %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"
|
||||
" %A0 = getelementptr inbounds %struct.Test, ptr %A, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vA0 = load i32, ptr %A0, align 4\n"
|
||||
" %B0 = getelementptr inbounds %struct.Test, ptr %B, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vB0 = load i32, ptr %B0, align 4\n"
|
||||
" %mul11 = mul nsw i32 %vA0, %vB0\n"
|
||||
" %C0 = getelementptr inbounds %struct.Test, ptr %C, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vC0 = load i32, ptr %C0, align 4\n"
|
||||
" %D0 = getelementptr inbounds %struct.Test, ptr %D, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vD0 = load i32, ptr %D0, align 4\n"
|
||||
" %mul12 = mul nsw i32 %vC0, %vD0\n"
|
||||
" %A1 = getelementptr inbounds %struct.Test, ptr %A, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vA1 = load i32, ptr %A1, align 4\n"
|
||||
" %B1 = getelementptr inbounds %struct.Test, ptr %B, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vB1 = load i32, ptr %B1, align 4\n"
|
||||
" %mul21 = mul nsw i32 %vB1, %vA1\n"
|
||||
" %C1 = getelementptr inbounds %struct.Test, ptr %C, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vC1 = load i32, ptr %C1, align 4\n"
|
||||
" %D1 = getelementptr inbounds %struct.Test, ptr %D, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vD1 = load i32, ptr %D1, align 4\n"
|
||||
" %mul22 = mul nsw i32 %vD1, %vC1\n"
|
||||
" %add1 = add nsw i32 %mul11, %mul12\n"
|
||||
" %add2 = add nsw i32 %mul22, %mul21\n"
|
||||
" %E0 = getelementptr inbounds %struct.Test, ptr %E, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" store i32 %add1, ptr %E0, align 4\n"
|
||||
" %E1 = getelementptr inbounds %struct.Test, ptr %E, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" store i32 %add2, ptr %E1, align 4\n"
|
||||
" %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
|
||||
" %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
|
||||
" br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
|
||||
"for.cond.cleanup: ; preds = %for.body\n"
|
||||
" ret void\n"
|
||||
"}\n";
|
||||
|
||||
Module &M = parseModule(ModuleString);
|
||||
|
||||
Function *F = M.getFunction("add_x3");
|
||||
BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
|
||||
auto Plan = buildVPlan(LoopHeader);
|
||||
|
||||
VPBasicBlock *Body = Plan->getVectorLoopRegion()->getEntryBasicBlock();
|
||||
|
||||
VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 24));
|
||||
VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 26));
|
||||
|
||||
checkReorderExample(
|
||||
Store1, Store2, Body,
|
||||
getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan));
|
||||
}
|
||||
|
||||
TEST_F(VPlanSlpTest, testSlpReorder_3) {
|
||||
LLVMContext Ctx;
|
||||
const char *ModuleString =
|
||||
"%struct.Test = type { i32, i32 }\n"
|
||||
"define void @add_x3(ptr %A, ptr %B, ptr "
|
||||
"%C, ptr %D, ptr %E) {\n"
|
||||
"entry:\n"
|
||||
" br label %for.body\n"
|
||||
"for.body: ; preds = %for.body, "
|
||||
"%entry\n"
|
||||
" %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"
|
||||
" %A1 = getelementptr inbounds %struct.Test, ptr %A, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vA1 = load i32, ptr %A1, align 4\n"
|
||||
" %B0 = getelementptr inbounds %struct.Test, ptr %B, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vB0 = load i32, ptr %B0, align 4\n"
|
||||
" %mul11 = mul nsw i32 %vA1, %vB0\n"
|
||||
" %C0 = getelementptr inbounds %struct.Test, ptr %C, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vC0 = load i32, ptr %C0, align 4\n"
|
||||
" %D0 = getelementptr inbounds %struct.Test, ptr %D, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vD0 = load i32, ptr %D0, align 4\n"
|
||||
" %mul12 = mul nsw i32 %vC0, %vD0\n"
|
||||
" %A0 = getelementptr inbounds %struct.Test, ptr %A, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vA0 = load i32, ptr %A0, align 4\n"
|
||||
" %B1 = getelementptr inbounds %struct.Test, ptr %B, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vB1 = load i32, ptr %B1, align 4\n"
|
||||
" %mul21 = mul nsw i32 %vB1, %vA0\n"
|
||||
" %C1 = getelementptr inbounds %struct.Test, ptr %C, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vC1 = load i32, ptr %C1, align 4\n"
|
||||
" %D1 = getelementptr inbounds %struct.Test, ptr %D, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vD1 = load i32, ptr %D1, align 4\n"
|
||||
" %mul22 = mul nsw i32 %vD1, %vC1\n"
|
||||
" %add1 = add nsw i32 %mul11, %mul12\n"
|
||||
" %add2 = add nsw i32 %mul22, %mul21\n"
|
||||
" %E0 = getelementptr inbounds %struct.Test, ptr %E, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" store i32 %add1, ptr %E0, align 4\n"
|
||||
" %E1 = getelementptr inbounds %struct.Test, ptr %E, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" store i32 %add2, ptr %E1, align 4\n"
|
||||
" %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
|
||||
" %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
|
||||
" br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
|
||||
"for.cond.cleanup: ; preds = %for.body\n"
|
||||
" ret void\n"
|
||||
"}\n";
|
||||
|
||||
Module &M = parseModule(ModuleString);
|
||||
|
||||
Function *F = M.getFunction("add_x3");
|
||||
BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
|
||||
auto Plan = buildVPlan(LoopHeader);
|
||||
|
||||
VPBasicBlock *Body = Plan->getVectorLoopRegion()->getEntryBasicBlock();
|
||||
|
||||
VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 25));
|
||||
VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 27));
|
||||
|
||||
auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);
|
||||
VPlanSlp Slp(VPIAI, *Body);
|
||||
SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
|
||||
EXPECT_EQ(nullptr, Slp.buildGraph(StoreRoot));
|
||||
|
||||
// FIXME Need to select better first value for lane0.
|
||||
EXPECT_FALSE(Slp.isCompletelySLP());
|
||||
}
|
||||
|
||||
TEST_F(VPlanSlpTest, testSlpReorder_4) {
|
||||
LLVMContext Ctx;
|
||||
const char *ModuleString =
|
||||
"%struct.Test = type { i32, i32 }\n"
|
||||
"define void @add_x3(ptr %A, ptr %B, ptr "
|
||||
"%C, ptr %D, ptr %E) {\n"
|
||||
"entry:\n"
|
||||
" br label %for.body\n"
|
||||
"for.body: ; preds = %for.body, "
|
||||
"%entry\n"
|
||||
" %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"
|
||||
" %A0 = getelementptr inbounds %struct.Test, ptr %A, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vA0 = load i32, ptr %A0, align 4\n"
|
||||
" %B0 = getelementptr inbounds %struct.Test, ptr %B, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vB0 = load i32, ptr %B0, align 4\n"
|
||||
" %mul11 = mul nsw i32 %vA0, %vB0\n"
|
||||
" %C0 = getelementptr inbounds %struct.Test, ptr %C, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vC0 = load i32, ptr %C0, align 4\n"
|
||||
" %D0 = getelementptr inbounds %struct.Test, ptr %D, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vD0 = load i32, ptr %D0, align 4\n"
|
||||
" %mul12 = mul nsw i32 %vC0, %vD0\n"
|
||||
" %A1 = getelementptr inbounds %struct.Test, ptr %A, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vA1 = load i32, ptr %A1, align 4\n"
|
||||
" %B1 = getelementptr inbounds %struct.Test, ptr %B, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vB1 = load i32, ptr %B1, align 4\n"
|
||||
" %mul21 = mul nsw i32 %vA1, %vB1\n"
|
||||
" %C1 = getelementptr inbounds %struct.Test, ptr %C, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vC1 = load i32, ptr %C1, align 4\n"
|
||||
" %D1 = getelementptr inbounds %struct.Test, ptr %D, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vD1 = load i32, ptr %D1, align 4\n"
|
||||
" %mul22 = mul nsw i32 %vC1, %vD1\n"
|
||||
" %add1 = add nsw i32 %mul11, %mul12\n"
|
||||
" %add2 = add nsw i32 %mul22, %mul21\n"
|
||||
" %E0 = getelementptr inbounds %struct.Test, ptr %E, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" store i32 %add1, ptr %E0, align 4\n"
|
||||
" %E1 = getelementptr inbounds %struct.Test, ptr %E, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" store i32 %add2, ptr %E1, align 4\n"
|
||||
" %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
|
||||
" %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
|
||||
" br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
|
||||
"for.cond.cleanup: ; preds = %for.body\n"
|
||||
" ret void\n"
|
||||
"}\n";
|
||||
|
||||
Module &M = parseModule(ModuleString);
|
||||
|
||||
Function *F = M.getFunction("add_x3");
|
||||
BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
|
||||
auto Plan = buildVPlan(LoopHeader);
|
||||
|
||||
VPBasicBlock *Body = Plan->getVectorLoopRegion()->getEntryBasicBlock();
|
||||
|
||||
VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 24));
|
||||
VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 26));
|
||||
|
||||
checkReorderExample(
|
||||
Store1, Store2, Body,
|
||||
getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan));
|
||||
}
|
||||
|
||||
// Make sure we do not combine instructions with operands in different BBs.
|
||||
TEST_F(VPlanSlpTest, testInstrsInDifferentBBs) {
|
||||
const char *ModuleString =
|
||||
"%struct.Test = type { i32, i32 }\n"
|
||||
"%struct.Test3 = type { i32, i32, i32 }\n"
|
||||
"%struct.Test4xi8 = type { i8, i8, i8 }\n"
|
||||
"define void @add_x2(ptr nocapture readonly %A, ptr "
|
||||
"nocapture readonly %B, ptr nocapture %C) {\n"
|
||||
"entry:\n"
|
||||
" br label %for.body\n"
|
||||
"for.body: ; preds = %for.body, "
|
||||
"%entry\n"
|
||||
" %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %bb2 ]\n"
|
||||
" %A0 = getelementptr inbounds %struct.Test, ptr %A, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vA0 = load i32, ptr %A0, align 4\n"
|
||||
" %B0 = getelementptr inbounds %struct.Test, ptr %B, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vB0 = load i32, ptr %B0, align 4\n"
|
||||
" %add0 = add nsw i32 %vA0, %vB0\n"
|
||||
" %A1 = getelementptr inbounds %struct.Test, ptr %A, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vA1 = load i32, ptr %A1, align 4\n"
|
||||
" %B1 = getelementptr inbounds %struct.Test, ptr %B, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" br label %bb2\n"
|
||||
"bb2:\n"
|
||||
" %vB1 = load i32, ptr %B1, align 4\n"
|
||||
" %add1 = add nsw i32 %vA1, %vB1\n"
|
||||
" %C0 = getelementptr inbounds %struct.Test, ptr %C, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" store i32 %add0, ptr %C0, align 4\n"
|
||||
" %C1 = getelementptr inbounds %struct.Test, ptr %C, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" store i32 %add1, ptr %C1, align 4\n"
|
||||
" %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
|
||||
" %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
|
||||
" br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
|
||||
"for.cond.cleanup: ; preds = %for.body\n"
|
||||
" ret void\n"
|
||||
"}\n";
|
||||
|
||||
Module &M = parseModule(ModuleString);
|
||||
|
||||
Function *F = M.getFunction("add_x2");
|
||||
BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
|
||||
auto Plan = buildVPlan(LoopHeader);
|
||||
auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);
|
||||
|
||||
VPBasicBlock *Body = Plan->getVectorLoopRegion()->getEntryBasicBlock();
|
||||
VPBasicBlock *BB2 = Body->getSingleSuccessor()->getEntryBasicBlock();
|
||||
|
||||
VPInstruction *Store1 = cast<VPInstruction>(&*std::next(BB2->begin(), 4));
|
||||
VPInstruction *Store2 = cast<VPInstruction>(&*std::next(BB2->begin(), 6));
|
||||
|
||||
VPlanSlp Slp(VPIAI, *BB2);
|
||||
SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
|
||||
EXPECT_EQ(nullptr, Slp.buildGraph(StoreRoot));
|
||||
EXPECT_EQ(0u, Slp.getWidestBundleBits());
|
||||
}
|
||||
|
||||
// Make sure we do not combine instructions with operands in different BBs.
|
||||
TEST_F(VPlanSlpTest, testInstrsInDifferentBBs2) {
|
||||
const char *ModuleString =
|
||||
"%struct.Test = type { i32, i32 }\n"
|
||||
"%struct.Test3 = type { i32, i32, i32 }\n"
|
||||
"%struct.Test4xi8 = type { i8, i8, i8 }\n"
|
||||
"define void @add_x2(ptr nocapture readonly %A, ptr "
|
||||
"nocapture readonly %B, ptr nocapture %C) {\n"
|
||||
"entry:\n"
|
||||
" br label %for.body\n"
|
||||
"for.body: ; preds = %for.body, "
|
||||
"%entry\n"
|
||||
" %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %bb2 ]\n"
|
||||
" %A0 = getelementptr inbounds %struct.Test, ptr %A, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vA0 = load i32, ptr %A0, align 4\n"
|
||||
" %B0 = getelementptr inbounds %struct.Test, ptr %B, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vB0 = load i32, ptr %B0, align 4\n"
|
||||
" %add0 = add nsw i32 %vA0, %vB0\n"
|
||||
" %A1 = getelementptr inbounds %struct.Test, ptr %A, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vA1 = load i32, ptr %A1, align 4\n"
|
||||
" %B1 = getelementptr inbounds %struct.Test, ptr %B, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vB1 = load i32, ptr %B1, align 4\n"
|
||||
" %add1 = add nsw i32 %vA1, %vB1\n"
|
||||
" br label %bb2\n"
|
||||
"bb2:\n"
|
||||
" %C0 = getelementptr inbounds %struct.Test, ptr %C, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" store i32 %add0, ptr %C0, align 4\n"
|
||||
" %C1 = getelementptr inbounds %struct.Test, ptr %C, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" store i32 %add1, ptr %C1, align 4\n"
|
||||
" %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
|
||||
" %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
|
||||
" br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
|
||||
"for.cond.cleanup: ; preds = %for.body\n"
|
||||
" ret void\n"
|
||||
"}\n";
|
||||
|
||||
Module &M = parseModule(ModuleString);
|
||||
|
||||
Function *F = M.getFunction("add_x2");
|
||||
BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
|
||||
auto Plan = buildVPlan(LoopHeader);
|
||||
auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);
|
||||
|
||||
VPBasicBlock *Body = Plan->getVectorLoopRegion()->getEntryBasicBlock();
|
||||
VPBasicBlock *BB2 = Body->getSingleSuccessor()->getEntryBasicBlock();
|
||||
|
||||
VPInstruction *Store1 = cast<VPInstruction>(&*std::next(BB2->begin(), 2));
|
||||
VPInstruction *Store2 = cast<VPInstruction>(&*std::next(BB2->begin(), 4));
|
||||
|
||||
VPlanSlp Slp(VPIAI, *BB2);
|
||||
SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
|
||||
EXPECT_EQ(nullptr, Slp.buildGraph(StoreRoot));
|
||||
EXPECT_EQ(0u, Slp.getWidestBundleBits());
|
||||
}
|
||||
|
||||
TEST_F(VPlanSlpTest, testSlpAtomicLoad) {
|
||||
const char *ModuleString =
|
||||
"%struct.Test = type { i32, i32 }\n"
|
||||
"%struct.Test3 = type { i32, i32, i32 }\n"
|
||||
"%struct.Test4xi8 = type { i8, i8, i8 }\n"
|
||||
"define void @add_x2(ptr nocapture readonly %A, ptr "
|
||||
"nocapture readonly %B, ptr nocapture %C) {\n"
|
||||
"entry:\n"
|
||||
" br label %for.body\n"
|
||||
"for.body: ; preds = %for.body, "
|
||||
"%entry\n"
|
||||
" %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"
|
||||
" %A0 = getelementptr inbounds %struct.Test, ptr %A, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vA0 = load atomic i32, ptr %A0 monotonic, align 4\n"
|
||||
" %B0 = getelementptr inbounds %struct.Test, ptr %B, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vB0 = load i32, ptr %B0, align 4\n"
|
||||
" %add0 = add nsw i32 %vA0, %vB0\n"
|
||||
" %A1 = getelementptr inbounds %struct.Test, ptr %A, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vA1 = load i32, ptr %A1, align 4\n"
|
||||
" %B1 = getelementptr inbounds %struct.Test, ptr %B, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vB1 = load i32, ptr %B1, align 4\n"
|
||||
" %add1 = add nsw i32 %vA1, %vB1\n"
|
||||
" %C0 = getelementptr inbounds %struct.Test, ptr %C, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" store i32 %add0, ptr %C0, align 4\n"
|
||||
" %C1 = getelementptr inbounds %struct.Test, ptr %C, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" store i32 %add1, ptr %C1, align 4\n"
|
||||
" %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
|
||||
" %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
|
||||
" br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
|
||||
"for.cond.cleanup: ; preds = %for.body\n"
|
||||
" ret void\n"
|
||||
"}\n";
|
||||
|
||||
Module &M = parseModule(ModuleString);
|
||||
|
||||
Function *F = M.getFunction("add_x2");
|
||||
BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
|
||||
auto Plan = buildVPlan(LoopHeader);
|
||||
auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);
|
||||
|
||||
VPBasicBlock *Body = Plan->getVectorLoopRegion()->getEntryBasicBlock();
|
||||
|
||||
VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 13));
|
||||
VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 15));
|
||||
|
||||
VPlanSlp Slp(VPIAI, *Body);
|
||||
SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
|
||||
EXPECT_EQ(nullptr, Slp.buildGraph(StoreRoot));
|
||||
EXPECT_FALSE(Slp.isCompletelySLP());
|
||||
}
|
||||
|
||||
TEST_F(VPlanSlpTest, testSlpAtomicStore) {
|
||||
const char *ModuleString =
|
||||
"%struct.Test = type { i32, i32 }\n"
|
||||
"%struct.Test3 = type { i32, i32, i32 }\n"
|
||||
"%struct.Test4xi8 = type { i8, i8, i8 }\n"
|
||||
"define void @add_x2(ptr nocapture readonly %A, ptr "
|
||||
"nocapture readonly %B, ptr nocapture %C) {\n"
|
||||
"entry:\n"
|
||||
" br label %for.body\n"
|
||||
"for.body: ; preds = %for.body, "
|
||||
"%entry\n"
|
||||
" %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"
|
||||
" %A0 = getelementptr inbounds %struct.Test, ptr %A, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vA0 = load i32, ptr %A0, align 4\n"
|
||||
" %B0 = getelementptr inbounds %struct.Test, ptr %B, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" %vB0 = load i32, ptr %B0, align 4\n"
|
||||
" %add0 = add nsw i32 %vA0, %vB0\n"
|
||||
" %A1 = getelementptr inbounds %struct.Test, ptr %A, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vA1 = load i32, ptr %A1, align 4\n"
|
||||
" %B1 = getelementptr inbounds %struct.Test, ptr %B, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" %vB1 = load i32, ptr %B1, align 4\n"
|
||||
" %add1 = add nsw i32 %vA1, %vB1\n"
|
||||
" %C0 = getelementptr inbounds %struct.Test, ptr %C, i64 "
|
||||
"%indvars.iv, i32 0\n"
|
||||
" store atomic i32 %add0, ptr %C0 monotonic, align 4\n"
|
||||
" %C1 = getelementptr inbounds %struct.Test, ptr %C, i64 "
|
||||
"%indvars.iv, i32 1\n"
|
||||
" store i32 %add1, ptr %C1, align 4\n"
|
||||
" %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
|
||||
" %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
|
||||
" br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
|
||||
"for.cond.cleanup: ; preds = %for.body\n"
|
||||
" ret void\n"
|
||||
"}\n";
|
||||
|
||||
Module &M = parseModule(ModuleString);
|
||||
|
||||
Function *F = M.getFunction("add_x2");
|
||||
BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
|
||||
auto Plan = buildVPlan(LoopHeader);
|
||||
auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);
|
||||
|
||||
VPBasicBlock *Body = Plan->getVectorLoopRegion()->getEntryBasicBlock();
|
||||
|
||||
VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 13));
|
||||
VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 15));
|
||||
|
||||
VPlanSlp Slp(VPIAI, *Body);
|
||||
SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
|
||||
Slp.buildGraph(StoreRoot);
|
||||
EXPECT_FALSE(Slp.isCompletelySLP());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace llvm
|
||||
Reference in New Issue
Block a user