The idea is similar to ba40a7bc2e. Due to
tail folding, the recurrence vector in the final iteration may contain
only a single active element, making it impossible to extract the
penultimate active element. This patch instead directly extracts the
last active element from the vector produced by splicing the recurrence
phi and the previous value, without needing to select which value to
extract based on the number of active lanes.
1103 lines
56 KiB
LLVM
1103 lines
56 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; This test verifies that the loop vectorizer will NOT produce a tail
|
|
; loop with the optimize for size or the minimize size attributes.
|
|
; RUN: opt < %s -passes='require<profile-summary>,loop-vectorize' -S | FileCheck %s
|
|
; RUN: opt < %s -passes='require<profile-summary>,loop-vectorize' -pgso -S | FileCheck %s -check-prefix=PGSO
|
|
; RUN: opt < %s -passes='require<profile-summary>,loop-vectorize' -pgso=false -S | FileCheck %s -check-prefix=NPGSO
|
|
|
|
target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128"
|
|
|
|
@tab = common global [32 x i8] zeroinitializer, align 1
|
|
|
|
define i32 @foo_optsize() #0 {
|
|
; CHECK-LABEL: define i32 @foo_optsize(
|
|
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0
|
|
; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
|
|
; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
|
|
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
; PGSO-LABEL: define i32 @foo_optsize(
|
|
; PGSO-SAME: ) #[[ATTR0:[0-9]+]] {
|
|
; PGSO-NEXT: [[ENTRY:.*]]:
|
|
; PGSO-NEXT: br label %[[FOR_BODY:.*]]
|
|
; PGSO: [[FOR_BODY]]:
|
|
; PGSO-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
|
|
; PGSO-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
|
|
; PGSO-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
|
|
; PGSO-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0
|
|
; PGSO-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
|
|
; PGSO-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
|
|
; PGSO-NEXT: [[INC]] = add nsw i32 [[I_08]], 1
|
|
; PGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202
|
|
; PGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
|
|
; PGSO: [[FOR_END]]:
|
|
; PGSO-NEXT: ret i32 0
|
|
;
|
|
; NPGSO-LABEL: define i32 @foo_optsize(
|
|
; NPGSO-SAME: ) #[[ATTR0:[0-9]+]] {
|
|
; NPGSO-NEXT: [[ENTRY:.*]]:
|
|
; NPGSO-NEXT: br label %[[FOR_BODY:.*]]
|
|
; NPGSO: [[FOR_BODY]]:
|
|
; NPGSO-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
|
|
; NPGSO-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
|
|
; NPGSO-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
|
|
; NPGSO-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0
|
|
; NPGSO-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
|
|
; NPGSO-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
|
|
; NPGSO-NEXT: [[INC]] = add nsw i32 [[I_08]], 1
|
|
; NPGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202
|
|
; NPGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
|
|
; NPGSO: [[FOR_END]]:
|
|
; NPGSO-NEXT: ret i32 0
|
|
;
|
|
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
|
|
%0 = load i8, ptr %arrayidx, align 1
|
|
%cmp1 = icmp eq i8 %0, 0
|
|
%. = select i1 %cmp1, i8 2, i8 1
|
|
store i8 %., ptr %arrayidx, align 1
|
|
%inc = add nsw i32 %i.08, 1
|
|
%exitcond = icmp eq i32 %i.08, 202
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret i32 0
|
|
}
|
|
|
|
attributes #0 = { optsize }
|
|
|
|
define i32 @foo_minsize() #1 {
|
|
; CHECK-LABEL: define i32 @foo_minsize(
|
|
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0
|
|
; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
|
|
; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
|
|
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
; PGSO-LABEL: define i32 @foo_minsize(
|
|
; PGSO-SAME: ) #[[ATTR1:[0-9]+]] {
|
|
; PGSO-NEXT: [[ENTRY:.*]]:
|
|
; PGSO-NEXT: br label %[[FOR_BODY:.*]]
|
|
; PGSO: [[FOR_BODY]]:
|
|
; PGSO-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
|
|
; PGSO-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
|
|
; PGSO-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
|
|
; PGSO-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0
|
|
; PGSO-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
|
|
; PGSO-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
|
|
; PGSO-NEXT: [[INC]] = add nsw i32 [[I_08]], 1
|
|
; PGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202
|
|
; PGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
|
|
; PGSO: [[FOR_END]]:
|
|
; PGSO-NEXT: ret i32 0
|
|
;
|
|
; NPGSO-LABEL: define i32 @foo_minsize(
|
|
; NPGSO-SAME: ) #[[ATTR1:[0-9]+]] {
|
|
; NPGSO-NEXT: [[ENTRY:.*]]:
|
|
; NPGSO-NEXT: br label %[[FOR_BODY:.*]]
|
|
; NPGSO: [[FOR_BODY]]:
|
|
; NPGSO-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
|
|
; NPGSO-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
|
|
; NPGSO-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
|
|
; NPGSO-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0
|
|
; NPGSO-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
|
|
; NPGSO-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
|
|
; NPGSO-NEXT: [[INC]] = add nsw i32 [[I_08]], 1
|
|
; NPGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202
|
|
; NPGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
|
|
; NPGSO: [[FOR_END]]:
|
|
; NPGSO-NEXT: ret i32 0
|
|
;
|
|
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
|
|
%0 = load i8, ptr %arrayidx, align 1
|
|
%cmp1 = icmp eq i8 %0, 0
|
|
%. = select i1 %cmp1, i8 2, i8 1
|
|
store i8 %., ptr %arrayidx, align 1
|
|
%inc = add nsw i32 %i.08, 1
|
|
%exitcond = icmp eq i32 %i.08, 202
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret i32 0
|
|
}
|
|
|
|
attributes #1 = { minsize }
|
|
|
|
define i32 @foo_pgso() !prof !14 {
|
|
; CHECK-LABEL: define i32 @foo_pgso(
|
|
; CHECK-SAME: ) !prof [[PROF14:![0-9]+]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0
|
|
; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
|
|
; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
|
|
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
; PGSO-LABEL: define i32 @foo_pgso(
|
|
; PGSO-SAME: ) !prof [[PROF14:![0-9]+]] {
|
|
; PGSO-NEXT: [[ENTRY:.*]]:
|
|
; PGSO-NEXT: br label %[[FOR_BODY:.*]]
|
|
; PGSO: [[FOR_BODY]]:
|
|
; PGSO-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
|
|
; PGSO-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
|
|
; PGSO-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
|
|
; PGSO-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0
|
|
; PGSO-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
|
|
; PGSO-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
|
|
; PGSO-NEXT: [[INC]] = add nsw i32 [[I_08]], 1
|
|
; PGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202
|
|
; PGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
|
|
; PGSO: [[FOR_END]]:
|
|
; PGSO-NEXT: ret i32 0
|
|
;
|
|
; NPGSO-LABEL: define i32 @foo_pgso(
|
|
; NPGSO-SAME: ) !prof [[PROF14:![0-9]+]] {
|
|
; NPGSO-NEXT: [[ENTRY:.*:]]
|
|
; NPGSO-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; NPGSO: [[VECTOR_PH]]:
|
|
; NPGSO-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; NPGSO: [[VECTOR_BODY]]:
|
|
; NPGSO-NEXT: [[TMP0:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; NPGSO-NEXT: [[TMP1:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[TMP0]]
|
|
; NPGSO-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
|
|
; NPGSO-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer
|
|
; NPGSO-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i8> splat (i8 2), <4 x i8> splat (i8 1)
|
|
; NPGSO-NEXT: store <4 x i8> [[TMP4]], ptr [[TMP1]], align 1
|
|
; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP0]], 4
|
|
; NPGSO-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 200
|
|
; NPGSO-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
|
|
; NPGSO: [[MIDDLE_BLOCK]]:
|
|
; NPGSO-NEXT: br label %[[SCALAR_PH:.*]]
|
|
; NPGSO: [[SCALAR_PH]]:
|
|
; NPGSO-NEXT: br label %[[FOR_BODY:.*]]
|
|
; NPGSO: [[FOR_BODY]]:
|
|
; NPGSO-NEXT: [[I_08:%.*]] = phi i32 [ 200, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
|
|
; NPGSO-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
|
|
; NPGSO-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
|
|
; NPGSO-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP6]], 0
|
|
; NPGSO-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
|
|
; NPGSO-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
|
|
; NPGSO-NEXT: [[INC]] = add nsw i32 [[I_08]], 1
|
|
; NPGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202
|
|
; NPGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
|
|
; NPGSO: [[FOR_END]]:
|
|
; NPGSO-NEXT: ret i32 0
|
|
;
|
|
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
|
|
%0 = load i8, ptr %arrayidx, align 1
|
|
%cmp1 = icmp eq i8 %0, 0
|
|
%. = select i1 %cmp1, i8 2, i8 1
|
|
store i8 %., ptr %arrayidx, align 1
|
|
%inc = add nsw i32 %i.08, 1
|
|
%exitcond = icmp eq i32 %i.08, 202
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret i32 0
|
|
}
|
|
|
|
; PR43371: don't run into an assert due to emitting SCEV runtime checks
|
|
; with OptForSize.
|
|
;
|
|
@cm_array = external global [2592 x i16], align 1
|
|
|
|
define void @pr43371(i16 %val) optsize {
|
|
;
|
|
; CHECK-LABEL: define void @pr43371(
|
|
; CHECK-SAME: i16 [[VAL:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[VAL]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], [[VEC_IND]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32>
|
|
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i64 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i64 1
|
|
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP4]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP3]]
|
|
; CHECK-NEXT: store i16 0, ptr [[TMP5]], align 1
|
|
; CHECK-NEXT: store i16 0, ptr [[TMP7]], align 1
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 756
|
|
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: br label %[[FOR_COND_CLEANUP28:.*]]
|
|
; CHECK: [[FOR_COND_CLEANUP28]]:
|
|
; CHECK-NEXT: unreachable
|
|
;
|
|
; PGSO-LABEL: define void @pr43371(
|
|
; PGSO-SAME: i16 [[VAL:%.*]]) #[[ATTR0]] {
|
|
; PGSO-NEXT: [[ENTRY:.*:]]
|
|
; PGSO-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; PGSO: [[VECTOR_PH]]:
|
|
; PGSO-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[VAL]], i64 0
|
|
; PGSO-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer
|
|
; PGSO-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; PGSO: [[VECTOR_BODY]]:
|
|
; PGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; PGSO-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; PGSO-NEXT: [[TMP0:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], [[VEC_IND]]
|
|
; PGSO-NEXT: [[TMP1:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32>
|
|
; PGSO-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i64 0
|
|
; PGSO-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i64 1
|
|
; PGSO-NEXT: [[TMP5:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP4]]
|
|
; PGSO-NEXT: [[TMP7:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP3]]
|
|
; PGSO-NEXT: store i16 0, ptr [[TMP5]], align 1
|
|
; PGSO-NEXT: store i16 0, ptr [[TMP7]], align 1
|
|
; PGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
|
; PGSO-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
|
|
; PGSO-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 756
|
|
; PGSO-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
|
|
; PGSO: [[MIDDLE_BLOCK]]:
|
|
; PGSO-NEXT: br label %[[FOR_COND_CLEANUP28:.*]]
|
|
; PGSO: [[FOR_COND_CLEANUP28]]:
|
|
; PGSO-NEXT: unreachable
|
|
;
|
|
; NPGSO-LABEL: define void @pr43371(
|
|
; NPGSO-SAME: i16 [[VAL:%.*]]) #[[ATTR0]] {
|
|
; NPGSO-NEXT: [[ENTRY:.*:]]
|
|
; NPGSO-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; NPGSO: [[VECTOR_PH]]:
|
|
; NPGSO-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[VAL]], i64 0
|
|
; NPGSO-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer
|
|
; NPGSO-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; NPGSO: [[VECTOR_BODY]]:
|
|
; NPGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; NPGSO-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; NPGSO-NEXT: [[TMP0:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], [[VEC_IND]]
|
|
; NPGSO-NEXT: [[TMP1:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32>
|
|
; NPGSO-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i64 0
|
|
; NPGSO-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i64 1
|
|
; NPGSO-NEXT: [[TMP5:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP4]]
|
|
; NPGSO-NEXT: [[TMP7:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP3]]
|
|
; NPGSO-NEXT: store i16 0, ptr [[TMP5]], align 1
|
|
; NPGSO-NEXT: store i16 0, ptr [[TMP7]], align 1
|
|
; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
|
; NPGSO-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
|
|
; NPGSO-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 756
|
|
; NPGSO-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
|
|
; NPGSO: [[MIDDLE_BLOCK]]:
|
|
; NPGSO-NEXT: br label %[[FOR_COND_CLEANUP28:.*]]
|
|
; NPGSO: [[FOR_COND_CLEANUP28]]:
|
|
; NPGSO-NEXT: unreachable
|
|
;
|
|
; We do not want to generate SCEV predicates when optimising for size, because
|
|
; that will lead to extra code generation such as the SCEV overflow runtime
|
|
; checks. Not generating SCEV predicates can still result in vectorisation as
|
|
; the non-consecutive loads/stores can be scalarized:
|
|
entry:
|
|
br label %for.body29
|
|
|
|
for.cond.cleanup28:
|
|
unreachable
|
|
|
|
for.body29:
|
|
%i24.0170 = phi i16 [ 0, %entry], [ %inc37, %for.body29]
|
|
%add33 = add i16 %val, %i24.0170
|
|
%idxprom34 = zext i16 %add33 to i32
|
|
%arrayidx35 = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 %idxprom34
|
|
store i16 0, ptr %arrayidx35, align 1
|
|
%inc37 = add i16 %i24.0170, 1
|
|
%cmp26 = icmp ult i16 %inc37, 756
|
|
br i1 %cmp26, label %for.body29, label %for.cond.cleanup28
|
|
}
|
|
|
|
define void @pr43371_pgso(i16 %val) !prof !14 {
|
|
;
|
|
; CHECK-LABEL: define void @pr43371_pgso(
|
|
; CHECK-SAME: i16 [[VAL:%.*]]) !prof [[PROF14]] {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[VAL]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], [[VEC_IND]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32>
|
|
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i64 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i64 1
|
|
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP4]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP3]]
|
|
; CHECK-NEXT: store i16 0, ptr [[TMP5]], align 1
|
|
; CHECK-NEXT: store i16 0, ptr [[TMP7]], align 1
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 756
|
|
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: br label %[[FOR_COND_CLEANUP28:.*]]
|
|
; CHECK: [[FOR_COND_CLEANUP28]]:
|
|
; CHECK-NEXT: unreachable
|
|
;
|
|
; PGSO-LABEL: define void @pr43371_pgso(
|
|
; PGSO-SAME: i16 [[VAL:%.*]]) !prof [[PROF14]] {
|
|
; PGSO-NEXT: [[ENTRY:.*:]]
|
|
; PGSO-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; PGSO: [[VECTOR_PH]]:
|
|
; PGSO-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[VAL]], i64 0
|
|
; PGSO-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer
|
|
; PGSO-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; PGSO: [[VECTOR_BODY]]:
|
|
; PGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; PGSO-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; PGSO-NEXT: [[TMP0:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], [[VEC_IND]]
|
|
; PGSO-NEXT: [[TMP1:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32>
|
|
; PGSO-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i64 0
|
|
; PGSO-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i64 1
|
|
; PGSO-NEXT: [[TMP5:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP4]]
|
|
; PGSO-NEXT: [[TMP7:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP3]]
|
|
; PGSO-NEXT: store i16 0, ptr [[TMP5]], align 1
|
|
; PGSO-NEXT: store i16 0, ptr [[TMP7]], align 1
|
|
; PGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
|
; PGSO-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
|
|
; PGSO-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 756
|
|
; PGSO-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
|
|
; PGSO: [[MIDDLE_BLOCK]]:
|
|
; PGSO-NEXT: br label %[[FOR_COND_CLEANUP28:.*]]
|
|
; PGSO: [[FOR_COND_CLEANUP28]]:
|
|
; PGSO-NEXT: unreachable
|
|
;
|
|
; NPGSO-LABEL: define void @pr43371_pgso(
|
|
; NPGSO-SAME: i16 [[VAL:%.*]]) !prof [[PROF14]] {
|
|
; NPGSO-NEXT: [[ENTRY:.*:]]
|
|
; NPGSO-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
|
|
; NPGSO: [[VECTOR_SCEVCHECK]]:
|
|
; NPGSO-NEXT: [[TMP0:%.*]] = add i16 [[VAL]], 755
|
|
; NPGSO-NEXT: [[TMP4:%.*]] = icmp ult i16 [[TMP0]], [[VAL]]
|
|
; NPGSO-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; NPGSO: [[VECTOR_PH]]:
|
|
; NPGSO-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; NPGSO: [[VECTOR_BODY]]:
|
|
; NPGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; NPGSO-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
|
|
; NPGSO-NEXT: [[TMP1:%.*]] = add i16 [[VAL]], [[OFFSET_IDX]]
|
|
; NPGSO-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
|
|
; NPGSO-NEXT: [[TMP3:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP2]]
|
|
; NPGSO-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP3]], align 1
|
|
; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
|
; NPGSO-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 756
|
|
; NPGSO-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
|
|
; NPGSO: [[MIDDLE_BLOCK]]:
|
|
; NPGSO-NEXT: br label %[[FOR_COND_CLEANUP28:.*]]
|
|
; NPGSO: [[SCALAR_PH]]:
|
|
; NPGSO-NEXT: br label %[[FOR_BODY29:.*]]
|
|
; NPGSO: [[FOR_COND_CLEANUP28]]:
|
|
; NPGSO-NEXT: unreachable
|
|
; NPGSO: [[FOR_BODY29]]:
|
|
; NPGSO-NEXT: [[I24_0170:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ]
|
|
; NPGSO-NEXT: [[ADD33:%.*]] = add i16 [[VAL]], [[I24_0170]]
|
|
; NPGSO-NEXT: [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32
|
|
; NPGSO-NEXT: [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]]
|
|
; NPGSO-NEXT: store i16 0, ptr [[ARRAYIDX35]], align 1
|
|
; NPGSO-NEXT: [[INC37]] = add i16 [[I24_0170]], 1
|
|
; NPGSO-NEXT: [[CMP26:%.*]] = icmp ult i16 [[INC37]], 756
|
|
; NPGSO-NEXT: br i1 [[CMP26]], label %[[FOR_BODY29]], label %[[FOR_COND_CLEANUP28]], !llvm.loop [[LOOP21:![0-9]+]]
|
|
;
|
|
; We do not want to generate SCEV predicates when optimising for size, because
|
|
; that will lead to extra code generation such as the SCEV overflow runtime
|
|
; checks. Not generating SCEV predicates can still result in vectorisation as
|
|
; the non-consecutive loads/stores can be scalarized:
|
|
entry:
|
|
br label %for.body29
|
|
|
|
for.cond.cleanup28:
|
|
unreachable
|
|
|
|
for.body29:
|
|
%i24.0170 = phi i16 [ 0, %entry], [ %inc37, %for.body29]
|
|
%add33 = add i16 %val, %i24.0170
|
|
%idxprom34 = zext i16 %add33 to i32
|
|
%arrayidx35 = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 %idxprom34
|
|
store i16 0, ptr %arrayidx35, align 1
|
|
%inc37 = add i16 %i24.0170, 1
|
|
%cmp26 = icmp ult i16 %inc37, 756
|
|
br i1 %cmp26, label %for.body29, label %for.cond.cleanup28
|
|
}
|
|
|
|
; PR45526: don't vectorize with fold-tail if first-order-recurrence is live-out.
|
|
;
|
|
define i32 @pr45526() optsize {
|
|
;
|
|
; CHECK-LABEL: define i32 @pr45526(
|
|
; CHECK-SAME: ) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 5>, %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP1]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 1)
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 512
|
|
; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i32> [[VEC_IND]], splat (i32 510)
|
|
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
|
|
; CHECK-NEXT: [[FIRST_INACTIVE_LANE:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> [[TMP0]], i1 false)
|
|
; CHECK-NEXT: [[LAST_ACTIVE_LANE:%.*]] = sub i32 [[FIRST_INACTIVE_LANE]], 1
|
|
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP4]], i32 [[LAST_ACTIVE_LANE]]
|
|
; CHECK-NEXT: br label %[[EXIT:.*]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: ret i32 [[TMP10]]
|
|
;
|
|
; PGSO-LABEL: define i32 @pr45526(
|
|
; PGSO-SAME: ) #[[ATTR0]] {
|
|
; PGSO-NEXT: [[ENTRY:.*:]]
|
|
; PGSO-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; PGSO: [[VECTOR_PH]]:
|
|
; PGSO-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; PGSO: [[VECTOR_BODY]]:
|
|
; PGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; PGSO-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; PGSO-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 5>, %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ]
|
|
; PGSO-NEXT: [[TMP1]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 1)
|
|
; PGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
|
|
; PGSO-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
|
|
; PGSO-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 512
|
|
; PGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
|
|
; PGSO: [[MIDDLE_BLOCK]]:
|
|
; PGSO-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i32> [[VEC_IND]], splat (i32 510)
|
|
; PGSO-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
|
|
; PGSO-NEXT: [[FIRST_INACTIVE_LANE:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> [[TMP0]], i1 false)
|
|
; PGSO-NEXT: [[LAST_ACTIVE_LANE:%.*]] = sub i32 [[FIRST_INACTIVE_LANE]], 1
|
|
; PGSO-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP4]], i32 [[LAST_ACTIVE_LANE]]
|
|
; PGSO-NEXT: br label %[[EXIT:.*]]
|
|
; PGSO: [[EXIT]]:
|
|
; PGSO-NEXT: ret i32 [[TMP10]]
|
|
;
|
|
; NPGSO-LABEL: define i32 @pr45526(
|
|
; NPGSO-SAME: ) #[[ATTR0]] {
|
|
; NPGSO-NEXT: [[ENTRY:.*:]]
|
|
; NPGSO-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; NPGSO: [[VECTOR_PH]]:
|
|
; NPGSO-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; NPGSO: [[VECTOR_BODY]]:
|
|
; NPGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; NPGSO-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; NPGSO-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 5>, %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ]
|
|
; NPGSO-NEXT: [[TMP1]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 1)
|
|
; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
|
|
; NPGSO-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
|
|
; NPGSO-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 512
|
|
; NPGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
|
|
; NPGSO: [[MIDDLE_BLOCK]]:
|
|
; NPGSO-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i32> [[VEC_IND]], splat (i32 510)
|
|
; NPGSO-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
|
|
; NPGSO-NEXT: [[FIRST_INACTIVE_LANE:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> [[TMP0]], i1 false)
|
|
; NPGSO-NEXT: [[LAST_ACTIVE_LANE:%.*]] = sub i32 [[FIRST_INACTIVE_LANE]], 1
|
|
; NPGSO-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP4]], i32 [[LAST_ACTIVE_LANE]]
|
|
; NPGSO-NEXT: br label %[[EXIT:.*]]
|
|
; NPGSO: [[EXIT]]:
|
|
; NPGSO-NEXT: ret i32 [[TMP10]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%piv = phi i32 [ 0, %entry ], [ %pivPlus1, %loop ]
|
|
%for = phi i32 [ 5, %entry ], [ %pivPlus1, %loop ]
|
|
%pivPlus1 = add nuw nsw i32 %piv, 1
|
|
%cond = icmp ult i32 %piv, 510
|
|
br i1 %cond, label %loop, label %exit
|
|
|
|
exit:
|
|
ret i32 %for
|
|
}
|
|
|
|
define i32 @pr45526_pgso() !prof !14 {
|
|
;
|
|
; CHECK-LABEL: define i32 @pr45526_pgso(
|
|
; CHECK-SAME: ) !prof [[PROF14]] {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 5>, %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP1]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 1)
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 512
|
|
; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i32> [[VEC_IND]], splat (i32 510)
|
|
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
|
|
; CHECK-NEXT: [[FIRST_INACTIVE_LANE:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> [[TMP0]], i1 false)
|
|
; CHECK-NEXT: [[LAST_ACTIVE_LANE:%.*]] = sub i32 [[FIRST_INACTIVE_LANE]], 1
|
|
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP4]], i32 [[LAST_ACTIVE_LANE]]
|
|
; CHECK-NEXT: br label %[[EXIT:.*]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: ret i32 [[TMP10]]
|
|
;
|
|
; PGSO-LABEL: define i32 @pr45526_pgso(
|
|
; PGSO-SAME: ) !prof [[PROF14]] {
|
|
; PGSO-NEXT: [[ENTRY:.*:]]
|
|
; PGSO-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; PGSO: [[VECTOR_PH]]:
|
|
; PGSO-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; PGSO: [[VECTOR_BODY]]:
|
|
; PGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; PGSO-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; PGSO-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 5>, %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ]
|
|
; PGSO-NEXT: [[TMP1]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 1)
|
|
; PGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
|
|
; PGSO-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
|
|
; PGSO-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 512
|
|
; PGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
|
|
; PGSO: [[MIDDLE_BLOCK]]:
|
|
; PGSO-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i32> [[VEC_IND]], splat (i32 510)
|
|
; PGSO-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
|
|
; PGSO-NEXT: [[FIRST_INACTIVE_LANE:%.*]] = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> [[TMP0]], i1 false)
|
|
; PGSO-NEXT: [[LAST_ACTIVE_LANE:%.*]] = sub i32 [[FIRST_INACTIVE_LANE]], 1
|
|
; PGSO-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP4]], i32 [[LAST_ACTIVE_LANE]]
|
|
; PGSO-NEXT: br label %[[EXIT:.*]]
|
|
; PGSO: [[EXIT]]:
|
|
; PGSO-NEXT: ret i32 [[TMP10]]
|
|
;
|
|
; NPGSO-LABEL: define i32 @pr45526_pgso(
|
|
; NPGSO-SAME: ) !prof [[PROF14]] {
|
|
; NPGSO-NEXT: [[ENTRY:.*:]]
|
|
; NPGSO-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; NPGSO: [[VECTOR_PH]]:
|
|
; NPGSO-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; NPGSO: [[VECTOR_BODY]]:
|
|
; NPGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
|
|
; NPGSO-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 508
|
|
; NPGSO-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
|
|
; NPGSO: [[MIDDLE_BLOCK]]:
|
|
; NPGSO-NEXT: br label %[[SCALAR_PH:.*]]
|
|
; NPGSO: [[SCALAR_PH]]:
|
|
; NPGSO-NEXT: br label %[[LOOP:.*]]
|
|
; NPGSO: [[LOOP]]:
|
|
; NPGSO-NEXT: [[PIV:%.*]] = phi i32 [ 508, %[[SCALAR_PH]] ], [ [[PIVPLUS1:%.*]], %[[LOOP]] ]
|
|
; NPGSO-NEXT: [[FOR:%.*]] = phi i32 [ 508, %[[SCALAR_PH]] ], [ [[PIVPLUS1]], %[[LOOP]] ]
|
|
; NPGSO-NEXT: [[PIVPLUS1]] = add nuw nsw i32 [[PIV]], 1
|
|
; NPGSO-NEXT: [[COND:%.*]] = icmp ult i32 [[PIV]], 510
|
|
; NPGSO-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP24:![0-9]+]]
|
|
; NPGSO: [[EXIT]]:
|
|
; NPGSO-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ]
|
|
; NPGSO-NEXT: ret i32 [[FOR_LCSSA]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%piv = phi i32 [ 0, %entry ], [ %pivPlus1, %loop ]
|
|
%for = phi i32 [ 5, %entry ], [ %pivPlus1, %loop ]
|
|
%pivPlus1 = add nuw nsw i32 %piv, 1
|
|
%cond = icmp ult i32 %piv, 510
|
|
br i1 %cond, label %loop, label %exit
|
|
|
|
exit:
|
|
ret i32 %for
|
|
}
|
|
|
|
; PR46228: Vectorize w/o versioning for unit stride under optsize and enabled
|
|
; vectorization.
|
|
|
|
; NOTE: Some assertions have been autogenerated by utils/update_test_checks.py
|
|
define void @stride1(ptr noalias %B, i32 %BStride) optsize {
|
|
; CHECK-LABEL: define void @stride1(
|
|
; CHECK-SAME: ptr noalias [[B:%.*]], i32 [[BSTRIDE:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[BSTRIDE]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 1024)
|
|
; CHECK-NEXT: [[TMP0:%.*]] = mul nsw <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i64 0
|
|
; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
|
; CHECK: [[PRED_STORE_IF]]:
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0
|
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP3]]
|
|
; CHECK-NEXT: store i16 42, ptr [[TMP4]], align 4
|
|
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
|
|
; CHECK: [[PRED_STORE_CONTINUE]]:
|
|
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i64 1
|
|
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
|
|
; CHECK: [[PRED_STORE_IF1]]:
|
|
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP0]], i64 1
|
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP6]]
|
|
; CHECK-NEXT: store i16 42, ptr [[TMP7]], align 4
|
|
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
|
|
; CHECK: [[PRED_STORE_CONTINUE2]]:
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
|
|
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026
|
|
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: br label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; PGSO-LABEL: define void @stride1(
|
|
; PGSO-SAME: ptr noalias [[B:%.*]], i32 [[BSTRIDE:%.*]]) #[[ATTR0]] {
|
|
; PGSO-NEXT: [[ENTRY:.*:]]
|
|
; PGSO-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; PGSO: [[VECTOR_PH]]:
|
|
; PGSO-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[BSTRIDE]], i64 0
|
|
; PGSO-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
|
|
; PGSO-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; PGSO: [[VECTOR_BODY]]:
|
|
; PGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
|
|
; PGSO-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
|
|
; PGSO-NEXT: [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 1024)
|
|
; PGSO-NEXT: [[TMP1:%.*]] = mul nsw <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
|
|
; PGSO-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i64 0
|
|
; PGSO-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
|
; PGSO: [[PRED_STORE_IF]]:
|
|
; PGSO-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i64 0
|
|
; PGSO-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP3]]
|
|
; PGSO-NEXT: store i16 42, ptr [[TMP4]], align 4
|
|
; PGSO-NEXT: br label %[[PRED_STORE_CONTINUE]]
|
|
; PGSO: [[PRED_STORE_CONTINUE]]:
|
|
; PGSO-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP0]], i64 1
|
|
; PGSO-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
|
|
; PGSO: [[PRED_STORE_IF1]]:
|
|
; PGSO-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP1]], i64 1
|
|
; PGSO-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP6]]
|
|
; PGSO-NEXT: store i16 42, ptr [[TMP7]], align 4
|
|
; PGSO-NEXT: br label %[[PRED_STORE_CONTINUE2]]
|
|
; PGSO: [[PRED_STORE_CONTINUE2]]:
|
|
; PGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
|
; PGSO-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
|
|
; PGSO-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026
|
|
; PGSO-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
|
|
; PGSO: [[MIDDLE_BLOCK]]:
|
|
; PGSO-NEXT: br label %[[FOR_END:.*]]
|
|
; PGSO: [[FOR_END]]:
|
|
; PGSO-NEXT: ret void
|
|
;
|
|
; NPGSO-LABEL: define void @stride1(
|
|
; NPGSO-SAME: ptr noalias [[B:%.*]], i32 [[BSTRIDE:%.*]]) #[[ATTR0]] {
|
|
; NPGSO-NEXT: [[ENTRY:.*:]]
|
|
; NPGSO-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; NPGSO: [[VECTOR_PH]]:
|
|
; NPGSO-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[BSTRIDE]], i64 0
|
|
; NPGSO-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
|
|
; NPGSO-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; NPGSO: [[VECTOR_BODY]]:
|
|
; NPGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
|
|
; NPGSO-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
|
|
; NPGSO-NEXT: [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 1024)
|
|
; NPGSO-NEXT: [[TMP1:%.*]] = mul nsw <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
|
|
; NPGSO-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i64 0
|
|
; NPGSO-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
|
; NPGSO: [[PRED_STORE_IF]]:
|
|
; NPGSO-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i64 0
|
|
; NPGSO-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP3]]
|
|
; NPGSO-NEXT: store i16 42, ptr [[TMP4]], align 4
|
|
; NPGSO-NEXT: br label %[[PRED_STORE_CONTINUE]]
|
|
; NPGSO: [[PRED_STORE_CONTINUE]]:
|
|
; NPGSO-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP0]], i64 1
|
|
; NPGSO-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
|
|
; NPGSO: [[PRED_STORE_IF1]]:
|
|
; NPGSO-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP1]], i64 1
|
|
; NPGSO-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP6]]
|
|
; NPGSO-NEXT: store i16 42, ptr [[TMP7]], align 4
|
|
; NPGSO-NEXT: br label %[[PRED_STORE_CONTINUE2]]
|
|
; NPGSO: [[PRED_STORE_CONTINUE2]]:
|
|
; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
|
; NPGSO-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
|
|
; NPGSO-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026
|
|
; NPGSO-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
|
|
; NPGSO: [[MIDDLE_BLOCK]]:
|
|
; NPGSO-NEXT: br label %[[FOR_END:.*]]
|
|
; NPGSO: [[FOR_END]]:
|
|
; NPGSO-NEXT: ret void
|
|
;
|
|
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
|
|
%mulB = mul nsw i32 %iv, %BStride
|
|
%gepOfB = getelementptr inbounds i16, ptr %B, i32 %mulB
|
|
store i16 42, ptr %gepOfB, align 4
|
|
%iv.next = add nuw nsw i32 %iv, 1
|
|
%exitcond = icmp eq i32 %iv.next, 1025
|
|
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; Vectorize with versioning for unit stride for PGSO and enabled vectorization.
|
|
;
|
|
define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 {
|
|
; CHECK-LABEL: define void @stride1_pgso(
|
|
; CHECK-SAME: ptr noalias [[B:%.*]], i32 [[BSTRIDE:%.*]]) !prof [[PROF14]] {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
|
|
; CHECK: [[VECTOR_SCEVCHECK]]:
|
|
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[BSTRIDE]], 1
|
|
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP0]]
|
|
; CHECK-NEXT: store <2 x i16> splat (i16 42), ptr [[TMP1]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP0]], 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
|
|
; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: br label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[MULB:%.*]] = mul nsw i32 [[IV]], [[BSTRIDE]]
|
|
; CHECK-NEXT: [[GEPOFB:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[MULB]]
|
|
; CHECK-NEXT: store i16 42, ptr [[GEPOFB]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; PGSO-LABEL: define void @stride1_pgso(
|
|
; PGSO-SAME: ptr noalias [[B:%.*]], i32 [[BSTRIDE:%.*]]) !prof [[PROF14]] {
|
|
; PGSO-NEXT: [[ENTRY:.*:]]
|
|
; PGSO-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
|
|
; PGSO: [[VECTOR_SCEVCHECK]]:
|
|
; PGSO-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[BSTRIDE]], 1
|
|
; PGSO-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; PGSO: [[VECTOR_PH]]:
|
|
; PGSO-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; PGSO: [[VECTOR_BODY]]:
|
|
; PGSO-NEXT: [[TMP0:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; PGSO-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP0]]
|
|
; PGSO-NEXT: store <2 x i16> splat (i16 42), ptr [[TMP1]], align 4
|
|
; PGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP0]], 2
|
|
; PGSO-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
|
|
; PGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
|
|
; PGSO: [[MIDDLE_BLOCK]]:
|
|
; PGSO-NEXT: br label %[[SCALAR_PH]]
|
|
; PGSO: [[SCALAR_PH]]:
|
|
; PGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
|
|
; PGSO-NEXT: br label %[[FOR_BODY:.*]]
|
|
; PGSO: [[FOR_BODY]]:
|
|
; PGSO-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
|
|
; PGSO-NEXT: [[MULB:%.*]] = mul nsw i32 [[IV]], [[BSTRIDE]]
|
|
; PGSO-NEXT: [[GEPOFB:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[MULB]]
|
|
; PGSO-NEXT: store i16 42, ptr [[GEPOFB]], align 4
|
|
; PGSO-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
|
|
; PGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025
|
|
; PGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
|
|
; PGSO: [[FOR_END]]:
|
|
; PGSO-NEXT: ret void
|
|
;
|
|
; NPGSO-LABEL: define void @stride1_pgso(
|
|
; NPGSO-SAME: ptr noalias [[B:%.*]], i32 [[BSTRIDE:%.*]]) !prof [[PROF14]] {
|
|
; NPGSO-NEXT: [[ENTRY:.*:]]
|
|
; NPGSO-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
|
|
; NPGSO: [[VECTOR_SCEVCHECK]]:
|
|
; NPGSO-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[BSTRIDE]], 1
|
|
; NPGSO-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; NPGSO: [[VECTOR_PH]]:
|
|
; NPGSO-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; NPGSO: [[VECTOR_BODY]]:
|
|
; NPGSO-NEXT: [[TMP0:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; NPGSO-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP0]]
|
|
; NPGSO-NEXT: store <2 x i16> splat (i16 42), ptr [[TMP1]], align 4
|
|
; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP0]], 2
|
|
; NPGSO-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
|
|
; NPGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
|
|
; NPGSO: [[MIDDLE_BLOCK]]:
|
|
; NPGSO-NEXT: br label %[[SCALAR_PH]]
|
|
; NPGSO: [[SCALAR_PH]]:
|
|
; NPGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
|
|
; NPGSO-NEXT: br label %[[FOR_BODY:.*]]
|
|
; NPGSO: [[FOR_BODY]]:
|
|
; NPGSO-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
|
|
; NPGSO-NEXT: [[MULB:%.*]] = mul nsw i32 [[IV]], [[BSTRIDE]]
|
|
; NPGSO-NEXT: [[GEPOFB:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[MULB]]
|
|
; NPGSO-NEXT: store i16 42, ptr [[GEPOFB]], align 4
|
|
; NPGSO-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
|
|
; NPGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025
|
|
; NPGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
|
|
; NPGSO: [[FOR_END]]:
|
|
; NPGSO-NEXT: ret void
|
|
;
|
|
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
|
|
%mulB = mul nsw i32 %iv, %BStride
|
|
%gepOfB = getelementptr inbounds i16, ptr %B, i32 %mulB
|
|
store i16 42, ptr %gepOfB, align 4
|
|
%iv.next = add nuw nsw i32 %iv, 1
|
|
%exitcond = icmp eq i32 %iv.next, 1025
|
|
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; PR46652: Check that the need for stride==1 check prevents vectorizing a loop
|
|
; having tiny trip count, when compiling w/o -Os/-Oz.
|
|
|
|
@g = external global [1 x i16], align 1
|
|
|
|
define void @pr46652(i16 %stride) {
|
|
; CHECK-LABEL: define void @pr46652(
|
|
; CHECK-SAME: i16 [[STRIDE:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[L1_02:%.*]] = phi i16 [ 1, %[[ENTRY]] ], [ [[INC9:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i16 [[L1_02]], [[STRIDE]]
|
|
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1 x i16], ptr @g, i16 0, i16 [[MUL]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX6]], align 1
|
|
; CHECK-NEXT: [[INC9]] = add nuw nsw i16 [[L1_02]], 1
|
|
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i16 [[INC9]], 16
|
|
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; PGSO-LABEL: define void @pr46652(
|
|
; PGSO-SAME: i16 [[STRIDE:%.*]]) {
|
|
; PGSO-NEXT: [[ENTRY:.*]]:
|
|
; PGSO-NEXT: br label %[[FOR_BODY:.*]]
|
|
; PGSO: [[FOR_BODY]]:
|
|
; PGSO-NEXT: [[L1_02:%.*]] = phi i16 [ 1, %[[ENTRY]] ], [ [[INC9:%.*]], %[[FOR_BODY]] ]
|
|
; PGSO-NEXT: [[MUL:%.*]] = mul nsw i16 [[L1_02]], [[STRIDE]]
|
|
; PGSO-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1 x i16], ptr @g, i16 0, i16 [[MUL]]
|
|
; PGSO-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX6]], align 1
|
|
; PGSO-NEXT: [[INC9]] = add nuw nsw i16 [[L1_02]], 1
|
|
; PGSO-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i16 [[INC9]], 16
|
|
; PGSO-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
|
|
; PGSO: [[FOR_END]]:
|
|
; PGSO-NEXT: ret void
|
|
;
|
|
; NPGSO-LABEL: define void @pr46652(
|
|
; NPGSO-SAME: i16 [[STRIDE:%.*]]) {
|
|
; NPGSO-NEXT: [[ENTRY:.*]]:
|
|
; NPGSO-NEXT: br label %[[FOR_BODY:.*]]
|
|
; NPGSO: [[FOR_BODY]]:
|
|
; NPGSO-NEXT: [[L1_02:%.*]] = phi i16 [ 1, %[[ENTRY]] ], [ [[INC9:%.*]], %[[FOR_BODY]] ]
|
|
; NPGSO-NEXT: [[MUL:%.*]] = mul nsw i16 [[L1_02]], [[STRIDE]]
|
|
; NPGSO-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1 x i16], ptr @g, i16 0, i16 [[MUL]]
|
|
; NPGSO-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX6]], align 1
|
|
; NPGSO-NEXT: [[INC9]] = add nuw nsw i16 [[L1_02]], 1
|
|
; NPGSO-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i16 [[INC9]], 16
|
|
; NPGSO-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
|
|
; NPGSO: [[FOR_END]]:
|
|
; NPGSO-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%l1.02 = phi i16 [ 1, %entry ], [ %inc9, %for.body ]
|
|
%mul = mul nsw i16 %l1.02, %stride
|
|
%arrayidx6 = getelementptr inbounds [1 x i16], ptr @g, i16 0, i16 %mul
|
|
%0 = load i16, ptr %arrayidx6, align 1
|
|
%inc9 = add nuw nsw i16 %l1.02, 1
|
|
%exitcond.not = icmp eq i16 %inc9, 16
|
|
br i1 %exitcond.not, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; Make sure we do not crash while building the VPlan for the loop with the
|
|
; select below.
|
|
define i32 @PR48142(ptr %ptr.start, ptr %ptr.end) optsize {
|
|
; CHECK-LABEL: define i32 @PR48142(
|
|
; CHECK-SAME: ptr [[PTR_START:%.*]], ptr [[PTR_END:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[I_014:%.*]] = phi i32 [ 20, %[[ENTRY]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_START]], %[[ENTRY]] ], [ [[PTR_NEXT:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[CMP4:%.*]] = icmp slt i32 [[I_014]], 99
|
|
; CHECK-NEXT: [[COND]] = select i1 [[CMP4]], i32 99, i32 [[I_014]]
|
|
; CHECK-NEXT: store i32 0, ptr [[PTR_IV]], align 4
|
|
; CHECK-NEXT: [[PTR_NEXT]] = getelementptr inbounds i32, ptr [[PTR_IV]], i64 1
|
|
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[PTR_NEXT]], [[PTR_END]]
|
|
; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: ret i32 [[RES]]
|
|
;
|
|
; PGSO-LABEL: define i32 @PR48142(
|
|
; PGSO-SAME: ptr [[PTR_START:%.*]], ptr [[PTR_END:%.*]]) #[[ATTR0]] {
|
|
; PGSO-NEXT: [[ENTRY:.*]]:
|
|
; PGSO-NEXT: br label %[[FOR_BODY:.*]]
|
|
; PGSO: [[FOR_BODY]]:
|
|
; PGSO-NEXT: [[I_014:%.*]] = phi i32 [ 20, %[[ENTRY]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
|
|
; PGSO-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_START]], %[[ENTRY]] ], [ [[PTR_NEXT:%.*]], %[[FOR_BODY]] ]
|
|
; PGSO-NEXT: [[CMP4:%.*]] = icmp slt i32 [[I_014]], 99
|
|
; PGSO-NEXT: [[COND]] = select i1 [[CMP4]], i32 99, i32 [[I_014]]
|
|
; PGSO-NEXT: store i32 0, ptr [[PTR_IV]], align 4
|
|
; PGSO-NEXT: [[PTR_NEXT]] = getelementptr inbounds i32, ptr [[PTR_IV]], i64 1
|
|
; PGSO-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[PTR_NEXT]], [[PTR_END]]
|
|
; PGSO-NEXT: br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
|
|
; PGSO: [[EXIT]]:
|
|
; PGSO-NEXT: [[RES:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ]
|
|
; PGSO-NEXT: ret i32 [[RES]]
|
|
;
|
|
; NPGSO-LABEL: define i32 @PR48142(
|
|
; NPGSO-SAME: ptr [[PTR_START:%.*]], ptr [[PTR_END:%.*]]) #[[ATTR0]] {
|
|
; NPGSO-NEXT: [[ENTRY:.*]]:
|
|
; NPGSO-NEXT: br label %[[FOR_BODY:.*]]
|
|
; NPGSO: [[FOR_BODY]]:
|
|
; NPGSO-NEXT: [[I_014:%.*]] = phi i32 [ 20, %[[ENTRY]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
|
|
; NPGSO-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_START]], %[[ENTRY]] ], [ [[PTR_NEXT:%.*]], %[[FOR_BODY]] ]
|
|
; NPGSO-NEXT: [[CMP4:%.*]] = icmp slt i32 [[I_014]], 99
|
|
; NPGSO-NEXT: [[COND]] = select i1 [[CMP4]], i32 99, i32 [[I_014]]
|
|
; NPGSO-NEXT: store i32 0, ptr [[PTR_IV]], align 4
|
|
; NPGSO-NEXT: [[PTR_NEXT]] = getelementptr inbounds i32, ptr [[PTR_IV]], i64 1
|
|
; NPGSO-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[PTR_NEXT]], [[PTR_END]]
|
|
; NPGSO-NEXT: br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
|
|
; NPGSO: [[EXIT]]:
|
|
; NPGSO-NEXT: [[RES:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ]
|
|
; NPGSO-NEXT: ret i32 [[RES]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%i.014 = phi i32 [ 20, %entry ], [ %cond, %for.body ]
|
|
%ptr.iv = phi ptr [ %ptr.start, %entry ], [ %ptr.next, %for.body ]
|
|
%cmp4 = icmp slt i32 %i.014, 99
|
|
%cond = select i1 %cmp4, i32 99, i32 %i.014
|
|
store i32 0, ptr %ptr.iv
|
|
%ptr.next = getelementptr inbounds i32, ptr %ptr.iv, i64 1
|
|
%cmp.not = icmp eq ptr %ptr.next, %ptr.end
|
|
br i1 %cmp.not, label %exit, label %for.body
|
|
|
|
exit:
|
|
%res = phi i32 [ %cond, %for.body ]
|
|
ret i32 %res
|
|
}
|
|
|
|
!llvm.module.flags = !{!0}
|
|
!0 = !{i32 1, !"ProfileSummary", !1}
|
|
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
|
!2 = !{!"ProfileFormat", !"InstrProf"}
|
|
!3 = !{!"TotalCount", i64 10000}
|
|
!4 = !{!"MaxCount", i64 10}
|
|
!5 = !{!"MaxInternalCount", i64 1}
|
|
!6 = !{!"MaxFunctionCount", i64 1000}
|
|
!7 = !{!"NumCounts", i64 3}
|
|
!8 = !{!"NumFunctions", i64 3}
|
|
!9 = !{!"DetailedSummary", !10}
|
|
!10 = !{!11, !12, !13}
|
|
!11 = !{i32 10000, i64 100, i32 1}
|
|
!12 = !{i32 999000, i64 100, i32 1}
|
|
!13 = !{i32 999999, i64 1, i32 2}
|
|
!14 = !{!"function_entry_count", i64 0}
|
|
!15 = distinct !{!15, !16}
|
|
!16 = !{!"llvm.loop.vectorize.enable", i1 true}
|
|
;.
|
|
; CHECK: [[PROF14]] = !{!"function_entry_count", i64 0}
|
|
; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META16:![0-9]+]], [[META17:![0-9]+]]}
|
|
; CHECK: [[META16]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; CHECK: [[META17]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META16]], [[META17]]}
|
|
; CHECK: [[LOOP19]] = distinct !{[[LOOP19]], [[META16]], [[META17]]}
|
|
; CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META16]], [[META17]]}
|
|
; CHECK: [[LOOP21]] = distinct !{[[LOOP21]], [[META16]], [[META17]]}
|
|
; CHECK: [[LOOP22]] = distinct !{[[LOOP22]], [[META16]], [[META17]]}
|
|
; CHECK: [[LOOP23]] = distinct !{[[LOOP23]], [[META16]]}
|
|
;.
|
|
; PGSO: [[PROF14]] = !{!"function_entry_count", i64 0}
|
|
; PGSO: [[LOOP15]] = distinct !{[[LOOP15]], [[META16:![0-9]+]], [[META17:![0-9]+]]}
|
|
; PGSO: [[META16]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; PGSO: [[META17]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; PGSO: [[LOOP18]] = distinct !{[[LOOP18]], [[META16]], [[META17]]}
|
|
; PGSO: [[LOOP19]] = distinct !{[[LOOP19]], [[META16]], [[META17]]}
|
|
; PGSO: [[LOOP20]] = distinct !{[[LOOP20]], [[META16]], [[META17]]}
|
|
; PGSO: [[LOOP21]] = distinct !{[[LOOP21]], [[META16]], [[META17]]}
|
|
; PGSO: [[LOOP22]] = distinct !{[[LOOP22]], [[META16]], [[META17]]}
|
|
; PGSO: [[LOOP23]] = distinct !{[[LOOP23]], [[META16]]}
|
|
;.
|
|
; NPGSO: [[PROF14]] = !{!"function_entry_count", i64 0}
|
|
; NPGSO: [[LOOP15]] = distinct !{[[LOOP15]], [[META16:![0-9]+]], [[META17:![0-9]+]]}
|
|
; NPGSO: [[META16]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; NPGSO: [[META17]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; NPGSO: [[LOOP18]] = distinct !{[[LOOP18]], [[META17]], [[META16]]}
|
|
; NPGSO: [[LOOP19]] = distinct !{[[LOOP19]], [[META16]], [[META17]]}
|
|
; NPGSO: [[LOOP20]] = distinct !{[[LOOP20]], [[META16]], [[META17]]}
|
|
; NPGSO: [[LOOP21]] = distinct !{[[LOOP21]], [[META16]]}
|
|
; NPGSO: [[LOOP22]] = distinct !{[[LOOP22]], [[META16]], [[META17]]}
|
|
; NPGSO: [[LOOP23]] = distinct !{[[LOOP23]], [[META16]], [[META17]]}
|
|
; NPGSO: [[LOOP24]] = distinct !{[[LOOP24]], [[META17]], [[META16]]}
|
|
; NPGSO: [[LOOP25]] = distinct !{[[LOOP25]], [[META16]], [[META17]]}
|
|
; NPGSO: [[LOOP26]] = distinct !{[[LOOP26]], [[META16]], [[META17]]}
|
|
; NPGSO: [[LOOP27]] = distinct !{[[LOOP27]], [[META16]]}
|
|
;.
|