; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -mtriple=s390x-unknown-linux -mcpu=z13 -passes=loop-vectorize,instcombine -force-vector-width=2 < %s | FileCheck %s ; ; Test that loop vectorizer does not generate vector addresses that must then ; always be extracted. ; Check that the addresses for a scalarized memory access is not extracted ; from a vector register. define void @foo(ptr nocapture %A) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[DOTIDX:%.*]] = shl nsw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[DOTIDX]] ; CHECK-NEXT: [[DOTIDX1:%.*]] = shl i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[DOTIDX1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 ; CHECK-NEXT: [[DOTIDX2:%.*]] = shl i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[A]], i64 [[DOTIDX2]] ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[TMP31]], i64 32 ; CHECK-NEXT: [[DOTIDX3:%.*]] = shl i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[A]], i64 [[DOTIDX3]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i64 48 ; CHECK-NEXT: [[DOTIDX4:%.*]] = shl i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[DOTIDX4]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i64 64 ; CHECK-NEXT: [[DOTIDX5:%.*]] = shl i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[A]], i64 [[DOTIDX5]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP9]], i64 80 ; CHECK-NEXT: [[DOTIDX6:%.*]] = shl i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[A]], i64 [[DOTIDX6]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP11]], i64 96 ; CHECK-NEXT: [[DOTIDX7:%.*]] = shl i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[A]], i64 [[DOTIDX7]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i64 112 ; CHECK-NEXT: [[DOTIDX8:%.*]] = shl i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[A]], i64 [[DOTIDX8]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP15]], i64 128 ; CHECK-NEXT: [[DOTIDX9:%.*]] = shl i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[A]], i64 [[DOTIDX9]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP17]], i64 144 ; CHECK-NEXT: [[DOTIDX10:%.*]] = shl i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[A]], i64 [[DOTIDX10]] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i64 160 ; CHECK-NEXT: [[DOTIDX11:%.*]] = shl i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[A]], i64 [[DOTIDX11]] ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[TMP21]], i64 176 ; CHECK-NEXT: [[DOTIDX12:%.*]] = shl i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[A]], i64 [[DOTIDX12]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP23]], i64 192 ; CHECK-NEXT: [[DOTIDX13:%.*]] = shl i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[A]], i64 [[DOTIDX13]] ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[TMP25]], i64 208 ; CHECK-NEXT: [[DOTIDX14:%.*]] = shl i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[A]], i64 [[DOTIDX14]] ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[TMP27]], i64 224 ; CHECK-NEXT: [[DOTIDX15:%.*]] = shl i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[A]], i64 [[DOTIDX15]] ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[TMP29]], i64 240 ; CHECK-NEXT: store i32 4, ptr [[TMP2]], align 4 ; CHECK-NEXT: store i32 4, ptr [[TMP3]], align 4 ; CHECK-NEXT: store i32 4, ptr [[TMP32]], align 4 ; CHECK-NEXT: store i32 4, ptr [[TMP6]], align 4 ; CHECK-NEXT: store i32 4, ptr [[TMP8]], align 4 ; CHECK-NEXT: store i32 4, ptr [[TMP10]], align 4 ; CHECK-NEXT: store i32 4, ptr [[TMP12]], align 4 ; CHECK-NEXT: store i32 4, ptr [[TMP14]], align 4 ; CHECK-NEXT: store i32 4, ptr [[TMP16]], align 4 ; CHECK-NEXT: store i32 4, ptr [[TMP18]], align 4 ; CHECK-NEXT: store i32 4, ptr [[TMP20]], align 4 ; CHECK-NEXT: store i32 4, ptr [[TMP22]], align 4 ; CHECK-NEXT: store i32 4, ptr [[TMP24]], align 4 ; CHECK-NEXT: store i32 4, ptr [[TMP26]], align 4 ; CHECK-NEXT: store i32 4, ptr [[TMP28]], align 4 ; CHECK-NEXT: store i32 4, ptr [[TMP30]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; entry: br label %for.body for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %0 = shl nsw i64 %indvars.iv, 2 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %0 store i32 4, ptr %arrayidx, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, 10000 br i1 %exitcond, label %for.end, label %for.body for.end: ret void } ; Check that a load of address is scalarized. define void @foo1(ptr nocapture noalias %A, ptr nocapture %PtrPtr) { ; CHECK-LABEL: @foo1( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [8 x i8], ptr [[PTRPTR:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr [8 x i8], ptr [[PTRPTR]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP11]], i64 8 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr [8 x i8], ptr [[PTRPTR]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP15]], i64 16 ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr [8 x i8], ptr [[PTRPTR]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP23]], i64 24 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr [8 x i8], ptr [[PTRPTR]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[TMP25]], i64 32 ; CHECK-NEXT: [[TMP39:%.*]] = getelementptr [8 x i8], ptr [[PTRPTR]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP39]], i64 40 ; CHECK-NEXT: [[TMP44:%.*]] = getelementptr [8 x i8], ptr [[PTRPTR]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP44]], i64 48 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr [8 x i8], ptr [[PTRPTR]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i64 56 ; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP1]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 ; CHECK-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP24]], align 8 ; CHECK-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP26]], align 8 ; CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP43]], align 8 ; CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP12]], align 8 ; CHECK-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP14]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i64 0 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1 ; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP17]], align 4 ; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP18]], align 4 ; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x i32> poison, i32 [[TMP27]], i64 0 ; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x i32> [[TMP29]], i32 [[TMP28]], i64 1 ; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP19]], align 4 ; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP20]], align 4 ; CHECK-NEXT: [[TMP33:%.*]] = insertelement <2 x i32> poison, i32 [[TMP31]], i64 0 ; CHECK-NEXT: [[TMP34:%.*]] = insertelement <2 x i32> [[TMP33]], i32 [[TMP32]], i64 1 ; CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP21]], align 4 ; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP22]], align 4 ; CHECK-NEXT: [[TMP37:%.*]] = insertelement <2 x i32> poison, i32 [[TMP35]], i64 0 ; CHECK-NEXT: [[TMP38:%.*]] = insertelement <2 x i32> [[TMP37]], i32 [[TMP36]], i64 1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x i8], ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP9]], i64 8 ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP9]], i64 16 ; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP9]], i64 24 ; CHECK-NEXT: store <2 x i32> [[TMP8]], ptr [[TMP9]], align 4 ; CHECK-NEXT: store <2 x i32> [[TMP30]], ptr [[TMP40]], align 4 ; CHECK-NEXT: store <2 x i32> [[TMP34]], ptr [[TMP41]], align 4 ; CHECK-NEXT: store <2 x i32> [[TMP38]], ptr [[TMP42]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; entry: br label %for.body for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %ptr = getelementptr inbounds ptr, ptr %PtrPtr, i64 %indvars.iv %el = load ptr, ptr %ptr %v = load i32, ptr %el %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv store i32 %v, ptr %arrayidx, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, 10000 br i1 %exitcond, label %for.end, label %for.body for.end: ret void }