Reapply "[VPlan] Handle calls in VPInstruction:opcodeMayReadOrWriteFromMemory." (#191886)

This reverts commit
3bf9639ec0.

The reapply adds trivial support for ExtractValue and InsertValue to fix
the crash causing the revert.

Original message:

Retrieve the called function and check its memory attributes, to
determine if a VPInstruction calling a function reads or writes memory.

Use it to strengthen assert in areAllLoadsDereferenceable.

PR: https://github.com/llvm/llvm-project/pull/190681
This commit is contained in:
Florian Hahn
2026-04-13 22:43:33 +01:00
committed by GitHub
parent c9f175bed4
commit ca318abfe6
4 changed files with 175 additions and 10 deletions

View File

@@ -951,12 +951,14 @@ static bool areAllLoadsDereferenceable(VPBasicBlock *HeaderVPBB,
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(HeaderVPBB))) {
// Skip blocks outside the loop (exit blocks and their successors).
if (VPBB == MiddleVPBB)
if (VPBB == MiddleVPBB || isa<VPIRBasicBlock>(VPBB))
continue;
for (VPRecipeBase &R : *VPBB) {
auto *VPI = dyn_cast<VPInstructionWithType>(&R);
if (!VPI || VPI->getOpcode() != Instruction::Load)
if (!VPI || VPI->getOpcode() != Instruction::Load) {
assert(!R.mayReadFromMemory() && "unexpected recipe reading memory");
continue;
}
// Get the pointer SCEV for dereferenceability checking.
VPValue *Ptr = VPI->getOperand(0);

View File

@@ -440,6 +440,27 @@ VPInstruction::VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
"number of operands does not match opcode");
}
/// For call VPInstructions, return the operand index of the called function.
/// The function is either the last operand (for unmasked calls) or the
/// second-to-last operand (for masked calls).
static unsigned getCalledFnOperandIndex(const VPInstruction &VPI) {
assert(VPI.getOpcode() == Instruction::Call && "must be a call");
unsigned NumOps = VPI.getNumOperands();
auto *LastOp = dyn_cast<VPIRValue>(VPI.getOperand(NumOps - 1));
if (LastOp && isa<Function>(LastOp->getValue()))
return NumOps - 1;
assert(
isa<Function>(cast<VPIRValue>(VPI.getOperand(NumOps - 2))->getValue()) &&
"expected function operand");
return NumOps - 2;
}
/// For call VPInstructions, return the called function.
static Function *getCalledFunction(const VPInstruction &VPI) {
unsigned Idx = getCalledFnOperandIndex(VPI);
return cast<Function>(cast<VPIRValue>(VPI.getOperand(Idx))->getValue());
}
unsigned VPInstruction::getNumOperandsForOpcode() const {
if (Instruction::isUnaryOp(Opcode) || Instruction::isCast(Opcode))
return 1;
@@ -486,14 +507,8 @@ unsigned VPInstruction::getNumOperandsForOpcode() const {
case VPInstruction::ActiveLaneMask:
case VPInstruction::ReductionStartVector:
return 3;
case Instruction::Call: {
// For unmasked calls, the last argument will the called function. Use that
// to compute the number of operands without the mask.
VPValue *LastOp = getOperand(getNumOperands() - 1);
if (isa<VPIRValue>(LastOp) && isa<Function>(LastOp->getLiveInIRValue()))
return getNumOperands();
return getNumOperands() - 1;
}
case Instruction::Call:
return getCalledFnOperandIndex(*this) + 1;
case Instruction::GetElementPtr:
case Instruction::PHI:
case Instruction::Switch:
@@ -1318,6 +1333,8 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
Instruction::isUnaryOp(getOpcode()) || Instruction::isCast(getOpcode()))
return false;
switch (getOpcode()) {
case Instruction::ExtractValue:
case Instruction::InsertValue:
case Instruction::GetElementPtr:
case Instruction::ExtractElement:
case Instruction::Freeze:
@@ -1358,6 +1375,8 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::VScale:
case VPInstruction::Unpack:
return false;
case Instruction::Call:
return !getCalledFunction(*this)->doesNotAccessMemory();
default:
return true;
}

View File

@@ -0,0 +1,74 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s
define i32 @early_exit_with_extractvalue(ptr dereferenceable(1024) align 8 %src, i32 noundef %x) {
; CHECK-LABEL: define i32 @early_exit_with_extractvalue(
; CHECK-SAME: ptr align 8 dereferenceable(1024) [[SRC:%.*]], i32 noundef [[X:%.*]]) {
; CHECK-NEXT: [[LOOP_PREHEADER:.*:]]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[LATCH:.*]]
; CHECK: [[LATCH]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[LOOP]] ], [ [[IV_NEXT:%.*]], %[[EARLY_EXIT:.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[LOOP]] ], [ [[VEC_IND_NEXT:%.*]], %[[EARLY_EXIT]] ]
; CHECK-NEXT: [[TMP1:%.*]] = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> [[VEC_IND]], <4 x i32> [[BROADCAST_SPLAT]])
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i1> } [[TMP1]], 1
; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[IV_NEXT]], 60
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[EARLY_EXIT]]
; CHECK: [[EARLY_EXIT]]:
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[LATCH]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT_LOOPEXIT:.*]]
; CHECK: [[VECTOR_EARLY_EXIT]]:
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 false)
; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32
; CHECK-NEXT: [[IV_LCSSA1:%.*]] = add i32 [[INDEX]], [[TMP8]]
; CHECK-NEXT: br label %[[EARLY_EXIT1:.*]]
; CHECK: [[EXIT_LOOPEXIT]]:
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 60, %[[EXIT_LOOPEXIT]] ], [ [[IV_NEXT1:%.*]], %[[LOOP_LATCH:.*]] ]
; CHECK-NEXT: [[MUL_OV:%.*]] = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[IV]], i32 [[X]])
; CHECK-NEXT: [[MUL_OV_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL_OV]], 1
; CHECK-NEXT: br i1 [[MUL_OV_OVERFLOW]], label %[[EARLY_EXIT1]], label %[[LOOP_LATCH]]
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]]
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP]], align 4
; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i32 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT1]], 63
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EARLY_EXIT1]]:
; CHECK-NEXT: [[IV_LCSSA2:%.*]] = phi i32 [ [[IV]], %[[LOOP_HEADER]] ], [ [[IV_LCSSA1]], %[[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i32 [[IV_LCSSA2]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[VAL]], %[[LOOP_LATCH]] ]
; CHECK-NEXT: ret i32 [[IV_LCSSA]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
%mul.ov = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %iv, i32 %x)
%mul.ov.overflow = extractvalue { i32, i1 } %mul.ov, 1
br i1 %mul.ov.overflow, label %early.exit, label %loop.latch
loop.latch:
%gep = getelementptr inbounds i32, ptr %src, i32 %iv
%val = load i32, ptr %gep, align 4
%iv.next = add nuw nsw i32 %iv, 1
%ec = icmp eq i32 %iv.next, 63
br i1 %ec, label %exit, label %loop.header
early.exit:
ret i32 %iv
exit:
ret i32 %val
}

View File

@@ -0,0 +1,70 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s
define i64 @early_exit_with_fneg(ptr dereferenceable(1024) align 8 %src, i1 %cond) {
; CHECK-LABEL: define i64 @early_exit_with_fneg(
; CHECK-SAME: ptr align 8 dereferenceable(1024) [[SRC:%.*]], i1 [[COND:%.*]]) {
; CHECK-NEXT: [[LOOP:.*:]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: br label %[[VECTOR_BODY1:.*]]
; CHECK: [[VECTOR_BODY1]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_BODY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds double, ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[GEP]], align 8
; CHECK-NEXT: [[TMP1:%.*]] = fneg <4 x double> [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq <4 x double> [[TMP1]], splat (double 1.000000e+01)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 124
; CHECK-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]]
; CHECK: [[VECTOR_BODY_INTERIM]]:
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[VECTOR_EARLY_EXIT]]:
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 false)
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[IV]], [[TMP6]]
; CHECK-NEXT: br label %[[EARLY_EXIT:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ], [ 124, %[[SCALAR_PH]] ]
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds double, ptr [[SRC]], i64 [[IV1]]
; CHECK-NEXT: [[VAL:%.*]] = load double, ptr [[GEP1]], align 8
; CHECK-NEXT: [[NEG:%.*]] = fneg double [[VAL]]
; CHECK-NEXT: [[C_1:%.*]] = fcmp une double [[NEG]], 1.000000e+01
; CHECK-NEXT: br i1 [[C_1]], label %[[LATCH]], label %[[EARLY_EXIT]]
; CHECK: [[LATCH]]:
; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV1]], 1
; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 127
; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EARLY_EXIT]]:
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ [[TMP7]], %[[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[IV_LCSSA]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i64 10
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ %iv.next, %latch ], [ 0, %entry ]
%gep = getelementptr inbounds double, ptr %src, i64 %iv
%val = load double, ptr %gep, align 8
%neg = fneg double %val
%c.1 = fcmp une double %neg, 10.0
br i1 %c.1, label %latch, label %early.exit
latch:
%iv.next = add nuw i64 %iv, 1
%exit.cond = icmp eq i64 %iv.next, 127
br i1 %exit.cond, label %exit, label %loop.header
early.exit:
ret i64 %iv
exit:
ret i64 10
}