Reapply "[VPlan] Handle calls in VPInstruction:opcodeMayReadOrWriteFromMemory." (#191886)
This reverts commit
3bf9639ec0.
The reapply adds trivial support for ExtractValue and InsertValue to fix
the crash causing the revert.
Original message:
Retrieve the called function and check its memory attributes, to
determine if a VPInstruction calling a function reads or writes memory.
Use it to strengthen assert in areAllLoadsDereferenceable.
PR: https://github.com/llvm/llvm-project/pull/190681
This commit is contained in:
@@ -951,12 +951,14 @@ static bool areAllLoadsDereferenceable(VPBasicBlock *HeaderVPBB,
|
||||
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
|
||||
vp_depth_first_shallow(HeaderVPBB))) {
|
||||
// Skip blocks outside the loop (exit blocks and their successors).
|
||||
if (VPBB == MiddleVPBB)
|
||||
if (VPBB == MiddleVPBB || isa<VPIRBasicBlock>(VPBB))
|
||||
continue;
|
||||
for (VPRecipeBase &R : *VPBB) {
|
||||
auto *VPI = dyn_cast<VPInstructionWithType>(&R);
|
||||
if (!VPI || VPI->getOpcode() != Instruction::Load)
|
||||
if (!VPI || VPI->getOpcode() != Instruction::Load) {
|
||||
assert(!R.mayReadFromMemory() && "unexpected recipe reading memory");
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get the pointer SCEV for dereferenceability checking.
|
||||
VPValue *Ptr = VPI->getOperand(0);
|
||||
|
||||
@@ -440,6 +440,27 @@ VPInstruction::VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
|
||||
"number of operands does not match opcode");
|
||||
}
|
||||
|
||||
/// For call VPInstructions, return the operand index of the called function.
|
||||
/// The function is either the last operand (for unmasked calls) or the
|
||||
/// second-to-last operand (for masked calls).
|
||||
static unsigned getCalledFnOperandIndex(const VPInstruction &VPI) {
|
||||
assert(VPI.getOpcode() == Instruction::Call && "must be a call");
|
||||
unsigned NumOps = VPI.getNumOperands();
|
||||
auto *LastOp = dyn_cast<VPIRValue>(VPI.getOperand(NumOps - 1));
|
||||
if (LastOp && isa<Function>(LastOp->getValue()))
|
||||
return NumOps - 1;
|
||||
assert(
|
||||
isa<Function>(cast<VPIRValue>(VPI.getOperand(NumOps - 2))->getValue()) &&
|
||||
"expected function operand");
|
||||
return NumOps - 2;
|
||||
}
|
||||
|
||||
/// For call VPInstructions, return the called function.
|
||||
static Function *getCalledFunction(const VPInstruction &VPI) {
|
||||
unsigned Idx = getCalledFnOperandIndex(VPI);
|
||||
return cast<Function>(cast<VPIRValue>(VPI.getOperand(Idx))->getValue());
|
||||
}
|
||||
|
||||
unsigned VPInstruction::getNumOperandsForOpcode() const {
|
||||
if (Instruction::isUnaryOp(Opcode) || Instruction::isCast(Opcode))
|
||||
return 1;
|
||||
@@ -486,14 +507,8 @@ unsigned VPInstruction::getNumOperandsForOpcode() const {
|
||||
case VPInstruction::ActiveLaneMask:
|
||||
case VPInstruction::ReductionStartVector:
|
||||
return 3;
|
||||
case Instruction::Call: {
|
||||
// For unmasked calls, the last argument will the called function. Use that
|
||||
// to compute the number of operands without the mask.
|
||||
VPValue *LastOp = getOperand(getNumOperands() - 1);
|
||||
if (isa<VPIRValue>(LastOp) && isa<Function>(LastOp->getLiveInIRValue()))
|
||||
return getNumOperands();
|
||||
return getNumOperands() - 1;
|
||||
}
|
||||
case Instruction::Call:
|
||||
return getCalledFnOperandIndex(*this) + 1;
|
||||
case Instruction::GetElementPtr:
|
||||
case Instruction::PHI:
|
||||
case Instruction::Switch:
|
||||
@@ -1318,6 +1333,8 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
|
||||
Instruction::isUnaryOp(getOpcode()) || Instruction::isCast(getOpcode()))
|
||||
return false;
|
||||
switch (getOpcode()) {
|
||||
case Instruction::ExtractValue:
|
||||
case Instruction::InsertValue:
|
||||
case Instruction::GetElementPtr:
|
||||
case Instruction::ExtractElement:
|
||||
case Instruction::Freeze:
|
||||
@@ -1358,6 +1375,8 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
|
||||
case VPInstruction::VScale:
|
||||
case VPInstruction::Unpack:
|
||||
return false;
|
||||
case Instruction::Call:
|
||||
return !getCalledFunction(*this)->doesNotAccessMemory();
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
|
||||
74
llvm/test/Transforms/LoopVectorize/early-exit-calls.ll
Normal file
74
llvm/test/Transforms/LoopVectorize/early-exit-calls.ll
Normal file
@@ -0,0 +1,74 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
|
||||
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s
|
||||
|
||||
define i32 @early_exit_with_extractvalue(ptr dereferenceable(1024) align 8 %src, i32 noundef %x) {
|
||||
; CHECK-LABEL: define i32 @early_exit_with_extractvalue(
|
||||
; CHECK-SAME: ptr align 8 dereferenceable(1024) [[SRC:%.*]], i32 noundef [[X:%.*]]) {
|
||||
; CHECK-NEXT: [[LOOP_PREHEADER:.*:]]
|
||||
; CHECK-NEXT: br label %[[LOOP:.*]]
|
||||
; CHECK: [[LOOP]]:
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: br label %[[LATCH:.*]]
|
||||
; CHECK: [[LATCH]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[LOOP]] ], [ [[IV_NEXT:%.*]], %[[EARLY_EXIT:.*]] ]
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[LOOP]] ], [ [[VEC_IND_NEXT:%.*]], %[[EARLY_EXIT]] ]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> [[VEC_IND]], <4 x i32> [[BROADCAST_SPLAT]])
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i1> } [[TMP1]], 1
|
||||
; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[IV_NEXT]], 60
|
||||
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4)
|
||||
; CHECK-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[EARLY_EXIT]]
|
||||
; CHECK: [[EARLY_EXIT]]:
|
||||
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[LATCH]], !llvm.loop [[LOOP0:![0-9]+]]
|
||||
; CHECK: [[MIDDLE_BLOCK]]:
|
||||
; CHECK-NEXT: br label %[[EXIT_LOOPEXIT:.*]]
|
||||
; CHECK: [[VECTOR_EARLY_EXIT]]:
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 false)
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32
|
||||
; CHECK-NEXT: [[IV_LCSSA1:%.*]] = add i32 [[INDEX]], [[TMP8]]
|
||||
; CHECK-NEXT: br label %[[EARLY_EXIT1:.*]]
|
||||
; CHECK: [[EXIT_LOOPEXIT]]:
|
||||
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
|
||||
; CHECK: [[LOOP_HEADER]]:
|
||||
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 60, %[[EXIT_LOOPEXIT]] ], [ [[IV_NEXT1:%.*]], %[[LOOP_LATCH:.*]] ]
|
||||
; CHECK-NEXT: [[MUL_OV:%.*]] = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[IV]], i32 [[X]])
|
||||
; CHECK-NEXT: [[MUL_OV_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL_OV]], 1
|
||||
; CHECK-NEXT: br i1 [[MUL_OV_OVERFLOW]], label %[[EARLY_EXIT1]], label %[[LOOP_LATCH]]
|
||||
; CHECK: [[LOOP_LATCH]]:
|
||||
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]]
|
||||
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP]], align 4
|
||||
; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i32 [[IV]], 1
|
||||
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT1]], 63
|
||||
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
|
||||
; CHECK: [[EARLY_EXIT1]]:
|
||||
; CHECK-NEXT: [[IV_LCSSA2:%.*]] = phi i32 [ [[IV]], %[[LOOP_HEADER]] ], [ [[IV_LCSSA1]], %[[VECTOR_EARLY_EXIT]] ]
|
||||
; CHECK-NEXT: ret i32 [[IV_LCSSA2]]
|
||||
; CHECK: [[EXIT]]:
|
||||
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[VAL]], %[[LOOP_LATCH]] ]
|
||||
; CHECK-NEXT: ret i32 [[IV_LCSSA]]
|
||||
;
|
||||
entry:
|
||||
br label %loop.header
|
||||
|
||||
loop.header:
|
||||
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
|
||||
%mul.ov = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %iv, i32 %x)
|
||||
%mul.ov.overflow = extractvalue { i32, i1 } %mul.ov, 1
|
||||
br i1 %mul.ov.overflow, label %early.exit, label %loop.latch
|
||||
|
||||
loop.latch:
|
||||
%gep = getelementptr inbounds i32, ptr %src, i32 %iv
|
||||
%val = load i32, ptr %gep, align 4
|
||||
%iv.next = add nuw nsw i32 %iv, 1
|
||||
%ec = icmp eq i32 %iv.next, 63
|
||||
br i1 %ec, label %exit, label %loop.header
|
||||
|
||||
early.exit:
|
||||
ret i32 %iv
|
||||
|
||||
exit:
|
||||
ret i32 %val
|
||||
}
|
||||
70
llvm/test/Transforms/LoopVectorize/early-exit-unary-ops.ll
Normal file
70
llvm/test/Transforms/LoopVectorize/early-exit-unary-ops.ll
Normal file
@@ -0,0 +1,70 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
|
||||
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s
|
||||
|
||||
define i64 @early_exit_with_fneg(ptr dereferenceable(1024) align 8 %src, i1 %cond) {
|
||||
; CHECK-LABEL: define i64 @early_exit_with_fneg(
|
||||
; CHECK-SAME: ptr align 8 dereferenceable(1024) [[SRC:%.*]], i1 [[COND:%.*]]) {
|
||||
; CHECK-NEXT: [[LOOP:.*:]]
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY1:.*]]
|
||||
; CHECK: [[VECTOR_BODY1]]:
|
||||
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_BODY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
|
||||
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds double, ptr [[SRC]], i64 [[IV]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[GEP]], align 8
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fneg <4 x double> [[WIDE_LOAD]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq <4 x double> [[TMP1]], splat (double 1.000000e+01)
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 124
|
||||
; CHECK-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]]
|
||||
; CHECK: [[VECTOR_BODY_INTERIM]]:
|
||||
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]]
|
||||
; CHECK: [[MIDDLE_BLOCK]]:
|
||||
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
|
||||
; CHECK: [[VECTOR_EARLY_EXIT]]:
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 false)
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[IV]], [[TMP6]]
|
||||
; CHECK-NEXT: br label %[[EARLY_EXIT:.*]]
|
||||
; CHECK: [[SCALAR_PH]]:
|
||||
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
|
||||
; CHECK: [[LOOP_HEADER]]:
|
||||
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ], [ 124, %[[SCALAR_PH]] ]
|
||||
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds double, ptr [[SRC]], i64 [[IV1]]
|
||||
; CHECK-NEXT: [[VAL:%.*]] = load double, ptr [[GEP1]], align 8
|
||||
; CHECK-NEXT: [[NEG:%.*]] = fneg double [[VAL]]
|
||||
; CHECK-NEXT: [[C_1:%.*]] = fcmp une double [[NEG]], 1.000000e+01
|
||||
; CHECK-NEXT: br i1 [[C_1]], label %[[LATCH]], label %[[EARLY_EXIT]]
|
||||
; CHECK: [[LATCH]]:
|
||||
; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV1]], 1
|
||||
; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 127
|
||||
; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
|
||||
; CHECK: [[EARLY_EXIT]]:
|
||||
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ [[TMP7]], %[[VECTOR_EARLY_EXIT]] ]
|
||||
; CHECK-NEXT: ret i64 [[IV_LCSSA]]
|
||||
; CHECK: [[EXIT]]:
|
||||
; CHECK-NEXT: ret i64 10
|
||||
;
|
||||
entry:
|
||||
br label %loop.header
|
||||
|
||||
loop.header:
|
||||
%iv = phi i64 [ %iv.next, %latch ], [ 0, %entry ]
|
||||
%gep = getelementptr inbounds double, ptr %src, i64 %iv
|
||||
%val = load double, ptr %gep, align 8
|
||||
%neg = fneg double %val
|
||||
%c.1 = fcmp une double %neg, 10.0
|
||||
br i1 %c.1, label %latch, label %early.exit
|
||||
|
||||
latch:
|
||||
%iv.next = add nuw i64 %iv, 1
|
||||
%exit.cond = icmp eq i64 %iv.next, 127
|
||||
br i1 %exit.cond, label %exit, label %loop.header
|
||||
|
||||
early.exit:
|
||||
ret i64 %iv
|
||||
|
||||
exit:
|
||||
ret i64 10
|
||||
}
|
||||
Reference in New Issue
Block a user