437 lines
13 KiB
LLVM
437 lines
13 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-50 < %s | FileCheck %s --check-prefixes=CHECK,X86 %}
|
|
; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu -slp-threshold=-50 < %s | FileCheck %s --check-prefixes=CHECK,AARCH64 %}
|
|
|
|
; Don't care about the profitability with these tests, just want to demonstrate the ability
|
|
; to combine opcodes
|
|
|
|
define void @shl_add(ptr %p, ptr %s) {
|
|
; CHECK-LABEL: @shl_add(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[P:%.*]], align 2
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i16> [[TMP0]], <i16 3, i16 5, i16 0, i16 3>
|
|
; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[S:%.*]], align 2
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%p1 = getelementptr i16, ptr %p, i64 1
|
|
%p2 = getelementptr i16, ptr %p, i64 2
|
|
%p3 = getelementptr i16, ptr %p, i64 3
|
|
|
|
%l0 = load i16, ptr %p
|
|
%l1 = load i16, ptr %p1
|
|
%l2 = load i16, ptr %p2
|
|
%l3 = load i16, ptr %p3
|
|
|
|
%shl0 = shl i16 %l0, 3
|
|
%shl1 = shl i16 %l1, 5
|
|
%shl2 = add i16 %l2, 0
|
|
%shl3 = shl i16 %l3, 3
|
|
|
|
%s1 = getelementptr i16, ptr %s, i64 1
|
|
%s2 = getelementptr i16, ptr %s, i64 2
|
|
%s3 = getelementptr i16, ptr %s, i64 3
|
|
|
|
store i16 %shl0, ptr %s
|
|
store i16 %shl1, ptr %s1
|
|
store i16 %shl2, ptr %s2
|
|
store i16 %shl3, ptr %s3
|
|
ret void
|
|
}
|
|
|
|
define void @add_shl(ptr %p, ptr %s) {
|
|
; CHECK-LABEL: @add_shl(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[P:%.*]], align 2
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i16> [[TMP0]], <i16 0, i16 5, i16 2, i16 3>
|
|
; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[S:%.*]], align 2
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%p1 = getelementptr i16, ptr %p, i64 1
|
|
%p2 = getelementptr i16, ptr %p, i64 2
|
|
%p3 = getelementptr i16, ptr %p, i64 3
|
|
|
|
%l0 = load i16, ptr %p
|
|
%l1 = load i16, ptr %p1
|
|
%l2 = load i16, ptr %p2
|
|
%l3 = load i16, ptr %p3
|
|
|
|
%add0 = shl i16 %l0, 0
|
|
%add1 = add i16 %l1, 5
|
|
%add2 = add i16 %l2, 2
|
|
%add3 = add i16 %l3, 3
|
|
|
|
%s1 = getelementptr i16, ptr %s, i64 1
|
|
%s2 = getelementptr i16, ptr %s, i64 2
|
|
%s3 = getelementptr i16, ptr %s, i64 3
|
|
|
|
store i16 %add0, ptr %s
|
|
store i16 %add1, ptr %s1
|
|
store i16 %add2, ptr %s2
|
|
store i16 %add3, ptr %s3
|
|
ret void
|
|
}
|
|
|
|
define void @sub_mul(ptr %p, ptr %s) {
|
|
; CHECK-LABEL: @sub_mul(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[P:%.*]], align 2
|
|
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i16> [[TMP0]], <i16 1, i16 5, i16 2, i16 3>
|
|
; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[S:%.*]], align 2
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%p1 = getelementptr i16, ptr %p, i64 1
|
|
%p2 = getelementptr i16, ptr %p, i64 2
|
|
%p3 = getelementptr i16, ptr %p, i64 3
|
|
|
|
%l0 = load i16, ptr %p
|
|
%l1 = load i16, ptr %p1
|
|
%l2 = load i16, ptr %p2
|
|
%l3 = load i16, ptr %p3
|
|
|
|
%mul0 = sub i16 %l0, 0
|
|
%mul1 = mul i16 %l1, 5
|
|
%mul2 = mul i16 %l2, 2
|
|
%mul3 = mul i16 %l3, 3
|
|
|
|
%s1 = getelementptr i16, ptr %s, i64 1
|
|
%s2 = getelementptr i16, ptr %s, i64 2
|
|
%s3 = getelementptr i16, ptr %s, i64 3
|
|
|
|
store i16 %mul0, ptr %s
|
|
store i16 %mul1, ptr %s1
|
|
store i16 %mul2, ptr %s2
|
|
store i16 %mul3, ptr %s3
|
|
ret void
|
|
}
|
|
|
|
define void @add_mul(ptr %p, ptr %s) {
|
|
; CHECK-LABEL: @add_mul(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[P:%.*]], align 2
|
|
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i16> [[TMP0]], <i16 1, i16 5, i16 2, i16 3>
|
|
; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[S:%.*]], align 2
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%p1 = getelementptr i16, ptr %p, i64 1
|
|
%p2 = getelementptr i16, ptr %p, i64 2
|
|
%p3 = getelementptr i16, ptr %p, i64 3
|
|
|
|
%l0 = load i16, ptr %p
|
|
%l1 = load i16, ptr %p1
|
|
%l2 = load i16, ptr %p2
|
|
%l3 = load i16, ptr %p3
|
|
|
|
%mul0 = add i16 %l0, 0
|
|
%mul1 = mul i16 %l1, 5
|
|
%mul2 = mul i16 %l2, 2
|
|
%mul3 = mul i16 %l3, 3
|
|
|
|
%s1 = getelementptr i16, ptr %s, i64 1
|
|
%s2 = getelementptr i16, ptr %s, i64 2
|
|
%s3 = getelementptr i16, ptr %s, i64 3
|
|
|
|
store i16 %mul0, ptr %s
|
|
store i16 %mul1, ptr %s1
|
|
store i16 %mul2, ptr %s2
|
|
store i16 %mul3, ptr %s3
|
|
ret void
|
|
}
|
|
|
|
define void @sub_and(ptr %p, ptr %s) {
|
|
; CHECK-LABEL: @sub_and(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[P:%.*]], align 2
|
|
; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i16> [[TMP0]], <i16 -1, i16 5, i16 2, i16 3>
|
|
; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[S:%.*]], align 2
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%p1 = getelementptr i16, ptr %p, i64 1
|
|
%p2 = getelementptr i16, ptr %p, i64 2
|
|
%p3 = getelementptr i16, ptr %p, i64 3
|
|
|
|
%l0 = load i16, ptr %p
|
|
%l1 = load i16, ptr %p1
|
|
%l2 = load i16, ptr %p2
|
|
%l3 = load i16, ptr %p3
|
|
|
|
%mul0 = add i16 %l0, 0
|
|
%mul1 = and i16 %l1, 5
|
|
%mul2 = and i16 %l2, 2
|
|
%mul3 = and i16 %l3, 3
|
|
|
|
%s1 = getelementptr i16, ptr %s, i64 1
|
|
%s2 = getelementptr i16, ptr %s, i64 2
|
|
%s3 = getelementptr i16, ptr %s, i64 3
|
|
|
|
store i16 %mul0, ptr %s
|
|
store i16 %mul1, ptr %s1
|
|
store i16 %mul2, ptr %s2
|
|
store i16 %mul3, ptr %s3
|
|
ret void
|
|
}
|
|
|
|
define void @shl_mul(ptr %p, ptr %s) {
|
|
; CHECK-LABEL: @shl_mul(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[P:%.*]], align 2
|
|
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i16> [[TMP0]], <i16 8, i16 5, i16 2, i16 3>
|
|
; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[S:%.*]], align 2
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%p1 = getelementptr i16, ptr %p, i64 1
|
|
%p2 = getelementptr i16, ptr %p, i64 2
|
|
%p3 = getelementptr i16, ptr %p, i64 3
|
|
|
|
%l0 = load i16, ptr %p
|
|
%l1 = load i16, ptr %p1
|
|
%l2 = load i16, ptr %p2
|
|
%l3 = load i16, ptr %p3
|
|
|
|
%mul0 = shl i16 %l0, 3
|
|
%mul1 = mul i16 %l1, 5
|
|
%mul2 = mul i16 %l2, 2
|
|
%mul3 = mul i16 %l3, 3
|
|
|
|
%s1 = getelementptr i16, ptr %s, i64 1
|
|
%s2 = getelementptr i16, ptr %s, i64 2
|
|
%s3 = getelementptr i16, ptr %s, i64 3
|
|
|
|
store i16 %mul0, ptr %s
|
|
store i16 %mul1, ptr %s1
|
|
store i16 %mul2, ptr %s2
|
|
store i16 %mul3, ptr %s3
|
|
ret void
|
|
}
|
|
|
|
define void @lshr_div(ptr %p, ptr %s) {
|
|
; CHECK-LABEL: @lshr_div(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[P:%.*]], align 2
|
|
; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i16> [[TMP0]], <i16 3, i16 0, i16 0, i16 0>
|
|
; CHECK-NEXT: [[TMP1:%.*]] = udiv <4 x i16> [[TMP2]], <i16 1, i16 5, i16 2, i16 3>
|
|
; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[S:%.*]], align 2
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%p1 = getelementptr i16, ptr %p, i64 1
|
|
%p2 = getelementptr i16, ptr %p, i64 2
|
|
%p3 = getelementptr i16, ptr %p, i64 3
|
|
|
|
%l0 = load i16, ptr %p
|
|
%l1 = load i16, ptr %p1
|
|
%l2 = load i16, ptr %p2
|
|
%l3 = load i16, ptr %p3
|
|
|
|
%div0 = lshr i16 %l0, 3
|
|
%div1 = udiv i16 %l1, 5
|
|
%div2 = udiv i16 %l2, 2
|
|
%div3 = udiv i16 %l3, 3
|
|
|
|
%s1 = getelementptr i16, ptr %s, i64 1
|
|
%s2 = getelementptr i16, ptr %s, i64 2
|
|
%s3 = getelementptr i16, ptr %s, i64 3
|
|
|
|
store i16 %div0, ptr %s
|
|
store i16 %div1, ptr %s1
|
|
store i16 %div2, ptr %s2
|
|
store i16 %div3, ptr %s3
|
|
ret void
|
|
}
|
|
|
|
define void @div_lshr(ptr %p, ptr %s) {
|
|
; CHECK-LABEL: @div_lshr(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[P:%.*]], align 2
|
|
; CHECK-NEXT: [[TMP2:%.*]] = udiv <4 x i16> [[TMP0]], <i16 5, i16 1, i16 2, i16 1>
|
|
; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i16> [[TMP2]], <i16 0, i16 3, i16 0, i16 9>
|
|
; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[S:%.*]], align 2
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%p1 = getelementptr i16, ptr %p, i64 1
|
|
%p2 = getelementptr i16, ptr %p, i64 2
|
|
%p3 = getelementptr i16, ptr %p, i64 3
|
|
|
|
%l0 = load i16, ptr %p
|
|
%l1 = load i16, ptr %p1
|
|
%l2 = load i16, ptr %p2
|
|
%l3 = load i16, ptr %p3
|
|
|
|
%div0 = udiv i16 %l0, 5
|
|
%div1 = lshr i16 %l1, 3
|
|
%div2 = udiv i16 %l2, 2
|
|
%div3 = lshr i16 %l3, 9
|
|
|
|
%s1 = getelementptr i16, ptr %s, i64 1
|
|
%s2 = getelementptr i16, ptr %s, i64 2
|
|
%s3 = getelementptr i16, ptr %s, i64 3
|
|
|
|
store i16 %div0, ptr %s
|
|
store i16 %div1, ptr %s1
|
|
store i16 %div2, ptr %s2
|
|
store i16 %div3, ptr %s3
|
|
ret void
|
|
}
|
|
|
|
define void @div_lshr_too_large(ptr %p, ptr %s) {
|
|
; CHECK-LABEL: @div_lshr_too_large(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[P:%.*]], align 2
|
|
; CHECK-NEXT: [[TMP1:%.*]] = udiv <4 x i16> [[TMP0]], <i16 5, i16 1, i16 2, i16 1>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i16> [[TMP1]], <i16 0, i16 17, i16 0, i16 9>
|
|
; CHECK-NEXT: store <4 x i16> [[TMP2]], ptr [[S:%.*]], align 2
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%p1 = getelementptr i16, ptr %p, i64 1
|
|
%p2 = getelementptr i16, ptr %p, i64 2
|
|
%p3 = getelementptr i16, ptr %p, i64 3
|
|
|
|
%l0 = load i16, ptr %p
|
|
%l1 = load i16, ptr %p1
|
|
%l2 = load i16, ptr %p2
|
|
%l3 = load i16, ptr %p3
|
|
|
|
%div0 = udiv i16 %l0, 5
|
|
%div1 = lshr i16 %l1, 17
|
|
%div2 = udiv i16 %l2, 2
|
|
%div3 = lshr i16 %l3, 9
|
|
|
|
%s1 = getelementptr i16, ptr %s, i64 1
|
|
%s2 = getelementptr i16, ptr %s, i64 2
|
|
%s3 = getelementptr i16, ptr %s, i64 3
|
|
|
|
store i16 %div0, ptr %s
|
|
store i16 %div1, ptr %s1
|
|
store i16 %div2, ptr %s2
|
|
store i16 %div3, ptr %s3
|
|
ret void
|
|
}
|
|
|
|
define void @lshr_div2(ptr %p, ptr %s) {
|
|
; X86-LABEL: @lshr_div2(
|
|
; X86-NEXT: entry:
|
|
; X86-NEXT: [[P1:%.*]] = getelementptr i16, ptr [[P:%.*]], i64 1
|
|
; X86-NEXT: [[P3:%.*]] = getelementptr i16, ptr [[P]], i64 3
|
|
; X86-NEXT: [[L0:%.*]] = load i16, ptr [[P]], align 2
|
|
; X86-NEXT: [[L3:%.*]] = load i16, ptr [[P3]], align 2
|
|
; X86-NEXT: [[DIV0:%.*]] = lshr i16 [[L0]], 3
|
|
; X86-NEXT: [[DIV3:%.*]] = lshr i16 [[L3]], 5
|
|
; X86-NEXT: [[S1:%.*]] = getelementptr i16, ptr [[S:%.*]], i64 1
|
|
; X86-NEXT: [[S3:%.*]] = getelementptr i16, ptr [[S]], i64 3
|
|
; X86-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P1]], align 2
|
|
; X86-NEXT: [[TMP1:%.*]] = udiv <2 x i16> [[TMP0]], <i16 8, i16 2>
|
|
; X86-NEXT: store i16 [[DIV0]], ptr [[S]], align 2
|
|
; X86-NEXT: store <2 x i16> [[TMP1]], ptr [[S1]], align 2
|
|
; X86-NEXT: store i16 [[DIV3]], ptr [[S3]], align 2
|
|
; X86-NEXT: ret void
|
|
;
|
|
; AARCH64-LABEL: @lshr_div2(
|
|
; AARCH64-NEXT: entry:
|
|
; AARCH64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[P:%.*]], align 2
|
|
; AARCH64-NEXT: [[TMP1:%.*]] = lshr <4 x i16> [[TMP0]], <i16 3, i16 0, i16 0, i16 5>
|
|
; AARCH64-NEXT: [[TMP2:%.*]] = udiv <4 x i16> [[TMP1]], <i16 1, i16 8, i16 2, i16 1>
|
|
; AARCH64-NEXT: store <4 x i16> [[TMP2]], ptr [[S:%.*]], align 2
|
|
; AARCH64-NEXT: ret void
|
|
;
|
|
entry:
|
|
%p1 = getelementptr i16, ptr %p, i64 1
|
|
%p2 = getelementptr i16, ptr %p, i64 2
|
|
%p3 = getelementptr i16, ptr %p, i64 3
|
|
|
|
%l0 = load i16, ptr %p
|
|
%l1 = load i16, ptr %p1
|
|
%l2 = load i16, ptr %p2
|
|
%l3 = load i16, ptr %p3
|
|
|
|
%div0 = lshr i16 %l0, 3
|
|
%div1 = udiv i16 %l1, 8
|
|
%div2 = udiv i16 %l2, 2
|
|
%div3 = lshr i16 %l3, 5
|
|
|
|
%s1 = getelementptr i16, ptr %s, i64 1
|
|
%s2 = getelementptr i16, ptr %s, i64 2
|
|
%s3 = getelementptr i16, ptr %s, i64 3
|
|
|
|
store i16 %div0, ptr %s
|
|
store i16 %div1, ptr %s1
|
|
store i16 %div2, ptr %s2
|
|
store i16 %div3, ptr %s3
|
|
ret void
|
|
}
|
|
|
|
define void @div2_lshr(ptr %p, ptr %s) {
|
|
; CHECK-LABEL: @div2_lshr(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[P:%.*]], align 2
|
|
; CHECK-NEXT: [[TMP2:%.*]] = udiv <4 x i16> [[TMP0]], <i16 8, i16 1, i16 4, i16 1>
|
|
; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i16> [[TMP2]], <i16 0, i16 3, i16 0, i16 4>
|
|
; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[S:%.*]], align 2
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%p1 = getelementptr i16, ptr %p, i64 1
|
|
%p2 = getelementptr i16, ptr %p, i64 2
|
|
%p3 = getelementptr i16, ptr %p, i64 3
|
|
|
|
%l0 = load i16, ptr %p
|
|
%l1 = load i16, ptr %p1
|
|
%l2 = load i16, ptr %p2
|
|
%l3 = load i16, ptr %p3
|
|
|
|
%div0 = udiv i16 %l0, 8
|
|
%div1 = lshr i16 %l1, 3
|
|
%div2 = udiv i16 %l2, 4
|
|
%div3 = lshr i16 %l3, 4
|
|
|
|
%s1 = getelementptr i16, ptr %s, i64 1
|
|
%s2 = getelementptr i16, ptr %s, i64 2
|
|
%s3 = getelementptr i16, ptr %s, i64 3
|
|
|
|
store i16 %div0, ptr %s
|
|
store i16 %div1, ptr %s1
|
|
store i16 %div2, ptr %s2
|
|
store i16 %div3, ptr %s3
|
|
ret void
|
|
}
|
|
|
|
define void @add_sub(ptr %p, ptr %s) {
|
|
; CHECK-LABEL: @add_sub(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[P:%.*]], align 2
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i16> [[TMP0]], <i16 3, i16 5, i16 2, i16 3>
|
|
; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[S:%.*]], align 2
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%p1 = getelementptr i16, ptr %p, i64 1
|
|
%p2 = getelementptr i16, ptr %p, i64 2
|
|
%p3 = getelementptr i16, ptr %p, i64 3
|
|
|
|
%l0 = load i16, ptr %p
|
|
%l1 = load i16, ptr %p1
|
|
%l2 = load i16, ptr %p2
|
|
%l3 = load i16, ptr %p3
|
|
|
|
%add0 = sub i16 %l0, -3
|
|
%add1 = add i16 %l1, 5
|
|
%add2 = add i16 %l2, 2
|
|
%add3 = add i16 %l3, 3
|
|
|
|
%s1 = getelementptr i16, ptr %s, i64 1
|
|
%s2 = getelementptr i16, ptr %s, i64 2
|
|
%s3 = getelementptr i16, ptr %s, i64 3
|
|
|
|
store i16 %add0, ptr %s
|
|
store i16 %add1, ptr %s1
|
|
store i16 %add2, ptr %s2
|
|
store i16 %add3, ptr %s3
|
|
ret void
|
|
}
|