[llubi] Vector manipulation intrinsics cleanup (#195004)

This PR fixes llvm.vector.insert and llvm.vector.extract by adding a
missing UB case and handle scalable vectors correctly.

See also #194345.
This commit is contained in:
Zhige Chen
2026-04-30 23:43:19 +08:00
committed by GitHub
parent 3d47936143
commit 9dcb6f709b
4 changed files with 52 additions and 13 deletions

View File

@@ -5,12 +5,12 @@ define void @main() {
%insert_mid = call <6 x i32> @llvm.vector.insert.v6i32.v2i32(<6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>, <2 x i32> <i32 10, i32 11>, i64 2)
%insert_poison_lane = call <6 x i32> @llvm.vector.insert.v6i32.v2i32(<6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>, <2 x i32> <i32 poison, i32 11>, i64 2)
%insert_tail = call <6 x i32> @llvm.vector.insert.v6i32.v2i32(<6 x i32> zeroinitializer, <2 x i32> <i32 9, i32 10>, i64 4)
%insert_poison = call <6 x i32> @llvm.vector.insert.v6i32.v2i32(<6 x i32> zeroinitializer, <2 x i32> <i32 9, i32 10>, i64 5)
%insert_poison = call <6 x i32> @llvm.vector.insert.v6i32.v2i32(<6 x i32> zeroinitializer, <2 x i32> <i32 9, i32 10>, i64 6)
%extract_mid = call <2 x i32> @llvm.vector.extract.v2i32.v6i32(<6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>, i64 2)
%extract_poison_lane = call <2 x i32> @llvm.vector.extract.v2i32.v6i32(<6 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5>, i64 0)
%extract_tail = call <2 x i32> @llvm.vector.extract.v2i32.v6i32(<6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>, i64 4)
%extract_poison = call <2 x i32> @llvm.vector.extract.v2i32.v6i32(<6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>, i64 5)
%extract_poison = call <2 x i32> @llvm.vector.extract.v2i32.v6i32(<6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>, i64 6)
%reverse = call <4 x i32> @llvm.vector.reverse.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>)
%reverse_poison = call <4 x i32> @llvm.vector.reverse.v4i32(<4 x i32> <i32 0, i32 poison, i32 2, i32 3>)
@@ -28,6 +28,12 @@ define void @main() {
%splice_left_poison_idx = call <4 x i32> @llvm.vector.splice.left.v4i32(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer, i32 poison)
%splice_right_poison_idx = call <4 x i32> @llvm.vector.splice.right.v4i32(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer, i32 poison)
%insert_bad_idx = call <6 x i32> @llvm.vector.insert.v6i32.v2i32(<6 x i32> zeroinitializer, <2 x i32> zeroinitializer, i64 1)
%extract_bad_idx = call <2 x i32> @llvm.vector.extract.v2i32.v6i32(<6 x i32> zeroinitializer, i64 1)
%insert_idx_overflow = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.nxv2i32(<vscale x 4 x i32> zeroinitializer, <vscale x 2 x i32> zeroinitializer, i64 9223372036854775808)
%extract_idx_overflow = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> zeroinitializer, i64 9223372036854775808)
ret void
}
@@ -35,11 +41,11 @@ define void @main() {
; CHECK-NEXT: %insert_mid = call <6 x i32> @llvm.vector.insert.v6i32.v2i32(<6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>, <2 x i32> <i32 10, i32 11>, i64 2) => { i32 0, i32 1, i32 10, i32 11, i32 4, i32 5 }
; CHECK-NEXT: %insert_poison_lane = call <6 x i32> @llvm.vector.insert.v6i32.v2i32(<6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>, <2 x i32> <i32 poison, i32 11>, i64 2) => { i32 0, i32 1, poison, i32 11, i32 4, i32 5 }
; CHECK-NEXT: %insert_tail = call <6 x i32> @llvm.vector.insert.v6i32.v2i32(<6 x i32> zeroinitializer, <2 x i32> <i32 9, i32 10>, i64 4) => { i32 0, i32 0, i32 0, i32 0, i32 9, i32 10 }
; CHECK-NEXT: %insert_poison = call <6 x i32> @llvm.vector.insert.v6i32.v2i32(<6 x i32> zeroinitializer, <2 x i32> <i32 9, i32 10>, i64 5) => poison
; CHECK-NEXT: %insert_poison = call <6 x i32> @llvm.vector.insert.v6i32.v2i32(<6 x i32> zeroinitializer, <2 x i32> <i32 9, i32 10>, i64 6) => poison
; CHECK-NEXT: %extract_mid = call <2 x i32> @llvm.vector.extract.v2i32.v6i32(<6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>, i64 2) => { i32 2, i32 3 }
; CHECK-NEXT: %extract_poison_lane = call <2 x i32> @llvm.vector.extract.v2i32.v6i32(<6 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5>, i64 0) => { i32 0, poison }
; CHECK-NEXT: %extract_tail = call <2 x i32> @llvm.vector.extract.v2i32.v6i32(<6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>, i64 4) => { i32 4, i32 5 }
; CHECK-NEXT: %extract_poison = call <2 x i32> @llvm.vector.extract.v2i32.v6i32(<6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>, i64 5) => poison
; CHECK-NEXT: %extract_poison = call <2 x i32> @llvm.vector.extract.v2i32.v6i32(<6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>, i64 6) => poison
; CHECK-NEXT: %reverse = call <4 x i32> @llvm.vector.reverse.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>) => { i32 3, i32 2, i32 1, i32 0 }
; CHECK-NEXT: %reverse_poison = call <4 x i32> @llvm.vector.reverse.v4i32(<4 x i32> <i32 0, i32 poison, i32 2, i32 3>) => { i32 3, i32 2, poison, i32 0 }
; CHECK-NEXT: %splice_left = call <4 x i32> @llvm.vector.splice.left.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 10, i32 11, i32 12, i32 13>, i32 2) => { i32 2, i32 3, i32 10, i32 11 }
@@ -52,5 +58,9 @@ define void @main() {
; CHECK-NEXT: %insert_poison_idx = call <6 x i32> @llvm.vector.insert.v6i32.v2i32(<6 x i32> zeroinitializer, <2 x i32> <i32 1, i32 2>, i64 poison) => poison
; CHECK-NEXT: %splice_left_poison_idx = call <4 x i32> @llvm.vector.splice.left.v4i32(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer, i32 poison) => poison
; CHECK-NEXT: %splice_right_poison_idx = call <4 x i32> @llvm.vector.splice.right.v4i32(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer, i32 poison) => poison
; CHECK-NEXT: %insert_bad_idx = call <6 x i32> @llvm.vector.insert.v6i32.v2i32(<6 x i32> zeroinitializer, <2 x i32> zeroinitializer, i64 1) => poison
; CHECK-NEXT: %extract_bad_idx = call <2 x i32> @llvm.vector.extract.v2i32.v6i32(<6 x i32> zeroinitializer, i64 1) => poison
; CHECK-NEXT: %insert_idx_overflow = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.nxv2i32(<vscale x 4 x i32> zeroinitializer, <vscale x 2 x i32> zeroinitializer, i64 -9223372036854775808) => poison
; CHECK-NEXT: %extract_idx_overflow = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> zeroinitializer, i64 -9223372036854775808) => poison
; CHECK-NEXT: ret void
; CHECK-NEXT: Exiting function: main

View File

@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llubi_test_checks.py UTC_ARGS: --version 6
; RUN: llubi --vscale=257 --verbose < %s 2>&1 | FileCheck %s
; RUN: llubi --vscale=512 --verbose < %s 2>&1 | FileCheck %s
define void @main() {
call i8 @llvm.vscale.i8()
@@ -8,6 +8,6 @@ define void @main() {
}
; CHECK: Entering function: main
; CHECK-NEXT: %1 = call i8 @llvm.vscale.i8() => poison
; CHECK-NEXT: %2 = call i16 @llvm.vscale.i16() => i16 257
; CHECK-NEXT: %2 = call i16 @llvm.vscale.i16() => i16 512
; CHECK-NEXT: ret void
; CHECK-NEXT: Exiting function: main

View File

@@ -22,6 +22,8 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Allocator.h"
#include <limits>
namespace llvm::ubi {
using namespace PatternMatch;
@@ -744,8 +746,18 @@ public:
const auto &Vec = Args[0].asAggregate();
const auto &SubVec = Args[1].asAggregate();
const auto &Idx = Args[2].asInteger();
const uint64_t Offset = Idx.getZExtValue();
if (Offset + SubVec.size() > Vec.size())
auto EC =
cast<VectorType>(CB.getArgOperand(1)->getType())->getElementCount();
const uint64_t RawOffset = Idx.getZExtValue();
const uint32_t MinSize = EC.getKnownMinValue();
if (RawOffset % MinSize != 0)
return AnyValue::poison();
const uint64_t Chunk = RawOffset / MinSize;
const uint64_t EVL = Ctx.getEVL(EC);
if (Chunk > std::numeric_limits<uint64_t>::max() / EVL)
return AnyValue::poison();
const uint64_t Offset = Chunk * EVL;
if (Offset > Vec.size() || SubVec.size() > Vec.size() - Offset)
return AnyValue::poison();
std::vector<AnyValue> Res;
Res.reserve(Vec.size());
@@ -762,12 +774,19 @@ public:
return AnyValue::poison();
const auto &Vec = Args[0].asAggregate();
const auto &Idx = Args[1].asInteger();
const uint64_t Offset = Idx.getZExtValue();
const uint64_t DstSize =
Ctx.getEVL(cast<VectorType>(RetTy)->getElementCount());
if (Offset + DstSize > Vec.size())
auto EC = cast<VectorType>(RetTy)->getElementCount();
const uint64_t RawOffset = Idx.getZExtValue();
const uint32_t MinSize = EC.getKnownMinValue();
if (RawOffset % MinSize != 0)
return AnyValue::poison();
return std::vector(Vec.begin() + Offset, Vec.begin() + Offset + DstSize);
const uint64_t Chunk = RawOffset / MinSize;
const uint64_t EVL = Ctx.getEVL(EC);
if (Chunk > std::numeric_limits<uint64_t>::max() / EVL)
return AnyValue::poison();
const uint64_t Offset = Chunk * EVL;
if (Offset > Vec.size() || EVL > Vec.size() - Offset)
return AnyValue::poison();
return std::vector(Vec.begin() + Offset, Vec.begin() + Offset + EVL);
}
case Intrinsic::vector_reverse: {
auto Vec = Args[0].asAggregate();

View File

@@ -166,6 +166,16 @@ int main(int argc, char **argv) {
return 1;
}
if (VScale == 0) {
WithColor::error() << "--vscale value must be positive\n";
return 1;
}
if (!isPowerOf2_32(VScale)) {
WithColor::error() << "--vscale value must be a power of 2\n";
return 1;
}
LLVMContext Context;
// Load the bitcode...