[DAGCombiner] Teach reduceLoadWidth to look past ISD:FREEZE (#189317)
Teach `DAGCombiner::reduceLoadWidth` to look through freeze SDNodes when narrowing loads. The narrowed result is then wrapped in freeze to preserve the original semantics. Currently, several folds were blocked by the freeze: ``` and(freeze(load), 0xff) -> AssertZext(freeze(zextload, i8)) trunc(freeze(load i32), i8) -> freeze(load i8) sext_inreg(freeze(load), i8) -> AssertSext(freeze(sextload, i8)) ``` and many other patterns due to legalizer or upstream IR passes inserting freeze. This generally has the positive effects of narrowing the load type.
This commit is contained in:
@@ -7977,9 +7977,14 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
|
||||
|
||||
// fold (and (load x), 255) -> (zextload x, i8)
|
||||
// fold (and (extload x, i16), 255) -> (zextload x, i8)
|
||||
if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
|
||||
if (SDValue Res = reduceLoadWidth(N))
|
||||
return Res;
|
||||
// fold (and (freeze (load x)), 255) -> (freeze (zextload x, i8))
|
||||
// fold (and (freeze (extload x, i16)), 255) -> (freeze (zextload x, i8))
|
||||
if (N1C && !VT.isVector()) {
|
||||
SDValue Inner = N0.getOpcode() == ISD::FREEZE ? N0.getOperand(0) : N0;
|
||||
if (Inner.getOpcode() == ISD::LOAD)
|
||||
if (SDValue Res = reduceLoadWidth(N))
|
||||
return Res;
|
||||
}
|
||||
|
||||
if (LegalTypes) {
|
||||
// Attempt to propagate the AND back up to the leaves which, if they're
|
||||
@@ -16513,6 +16518,14 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
|
||||
}
|
||||
}
|
||||
|
||||
// Look through a freeze if present between the operation and the load.
|
||||
// The freeze will be preserved on the narrowed result.
|
||||
SDValue FreezeNode;
|
||||
if (N0.getOpcode() == ISD::FREEZE) {
|
||||
FreezeNode = N0;
|
||||
N0 = N0.getOperand(0);
|
||||
}
|
||||
|
||||
// If we haven't found a load, we can't narrow it.
|
||||
if (!isa<LoadSDNode>(N0))
|
||||
return SDValue();
|
||||
@@ -16524,6 +16537,13 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
|
||||
!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
|
||||
return SDValue();
|
||||
|
||||
// Bail early when looking through a multi-use freeze, since other users of
|
||||
// the freeze can depend on the full load value. But its still safe to change
|
||||
// the extension type from anyext to zext.
|
||||
if (FreezeNode && !FreezeNode.hasOneUse() &&
|
||||
(LN0->getMemoryVT().bitsGT(ExtVT) || ExtType != ISD::ZEXTLOAD))
|
||||
return SDValue();
|
||||
|
||||
auto AdjustBigEndianShift = [&](unsigned ShAmt) {
|
||||
unsigned LVTStoreBits =
|
||||
LN0->getMemoryVT().getStoreSizeInBits().getFixedValue();
|
||||
@@ -16585,8 +16605,24 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
|
||||
WorklistRemover DeadNodes(*this);
|
||||
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
|
||||
|
||||
// Shift the result left, if we've swallowed a left shift.
|
||||
// Replace old load value for multi-use freeze so all users benefit.
|
||||
if (FreezeNode && !FreezeNode.hasOneUse())
|
||||
DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), Load.getValue(0));
|
||||
|
||||
// If we looked through a freeze, rewrap the narrowed result and add an
|
||||
// Assert node so downstream analyses can see the range.
|
||||
SDValue Result = Load;
|
||||
if (FreezeNode) {
|
||||
Result = DAG.getNode(ISD::FREEZE, DL, VT, Result);
|
||||
if (ExtType == ISD::ZEXTLOAD)
|
||||
Result =
|
||||
DAG.getNode(ISD::AssertZext, DL, VT, Result, DAG.getValueType(ExtVT));
|
||||
else if (ExtType == ISD::SEXTLOAD)
|
||||
Result =
|
||||
DAG.getNode(ISD::AssertSext, DL, VT, Result, DAG.getValueType(ExtVT));
|
||||
}
|
||||
|
||||
// Shift the result left, if we've swallowed a left shift.
|
||||
if (ShLeftAmt != 0) {
|
||||
// If the shift amount is as large as the result size (but, presumably,
|
||||
// no larger than the source) then the useful bits of the result are
|
||||
|
||||
@@ -108,19 +108,26 @@ entry:
|
||||
}
|
||||
|
||||
define i32 @load_i16_store_i8_freeze(ptr %p, ptr %q) {
|
||||
; CHECK-LE-LABEL: load_i16_store_i8_freeze:
|
||||
; CHECK-LE: // %bb.0: // %entry
|
||||
; CHECK-LE-NEXT: ldrh w8, [x0]
|
||||
; CHECK-LE-NEXT: mov w0, wzr
|
||||
; CHECK-LE-NEXT: strb w8, [x1]
|
||||
; CHECK-LE-NEXT: ret
|
||||
; CHECK-SD-LABEL: load_i16_store_i8_freeze:
|
||||
; CHECK-SD: // %bb.0: // %entry
|
||||
; CHECK-SD-NEXT: ldrb w8, [x0]
|
||||
; CHECK-SD-NEXT: mov w0, wzr
|
||||
; CHECK-SD-NEXT: strb w8, [x1]
|
||||
; CHECK-SD-NEXT: ret
|
||||
;
|
||||
; CHECK-BE-LABEL: load_i16_store_i8_freeze:
|
||||
; CHECK-BE: // %bb.0: // %entry
|
||||
; CHECK-BE-NEXT: ldrh w8, [x0]
|
||||
; CHECK-BE-NEXT: ldrb w8, [x0, #1]
|
||||
; CHECK-BE-NEXT: mov w0, wzr
|
||||
; CHECK-BE-NEXT: strb w8, [x1]
|
||||
; CHECK-BE-NEXT: ret
|
||||
;
|
||||
; CHECK-GI-LABEL: load_i16_store_i8_freeze:
|
||||
; CHECK-GI: // %bb.0: // %entry
|
||||
; CHECK-GI-NEXT: ldrh w8, [x0]
|
||||
; CHECK-GI-NEXT: mov w0, wzr
|
||||
; CHECK-GI-NEXT: strb w8, [x1]
|
||||
; CHECK-GI-NEXT: ret
|
||||
entry:
|
||||
%l = load i16, ptr %p, align 4
|
||||
%fr = freeze i16 %l
|
||||
|
||||
343
llvm/test/CodeGen/AArch64/reduce-load-width-freeze.ll
Normal file
343
llvm/test/CodeGen/AArch64/reduce-load-width-freeze.ll
Normal file
@@ -0,0 +1,343 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-apple-darwin < %s | FileCheck %s
|
||||
|
||||
define i32 @and_freeze_load_i32_to_i8(ptr %p) {
|
||||
; CHECK-LABEL: and_freeze_load_i32_to_i8:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldrb w0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%masked = and i32 %freeze, 255
|
||||
ret i32 %masked
|
||||
}
|
||||
|
||||
define i32 @and_freeze_load_i32_to_i16(ptr %p) {
|
||||
; CHECK-LABEL: and_freeze_load_i32_to_i16:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldrh w0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%masked = and i32 %freeze, 65535
|
||||
ret i32 %masked
|
||||
}
|
||||
|
||||
define i64 @and_freeze_load_i64_to_i8(ptr %p) {
|
||||
; CHECK-LABEL: and_freeze_load_i64_to_i8:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldrb w0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i64, ptr %p, align 8
|
||||
%freeze = freeze i64 %load
|
||||
%masked = and i64 %freeze, 255
|
||||
ret i64 %masked
|
||||
}
|
||||
|
||||
define i64 @and_freeze_load_i64_to_i16(ptr %p) {
|
||||
; CHECK-LABEL: and_freeze_load_i64_to_i16:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldrh w0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i64, ptr %p, align 8
|
||||
%freeze = freeze i64 %load
|
||||
%masked = and i64 %freeze, 65535
|
||||
ret i64 %masked
|
||||
}
|
||||
|
||||
define i64 @and_freeze_load_i64_to_i32(ptr %p) {
|
||||
; CHECK-LABEL: and_freeze_load_i64_to_i32:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldr w0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i64, ptr %p, align 8
|
||||
%freeze = freeze i64 %load
|
||||
%masked = and i64 %freeze, 4294967295
|
||||
ret i64 %masked
|
||||
}
|
||||
|
||||
define i32 @and_freeze_load_shifted_mask(ptr %p) {
|
||||
; CHECK-LABEL: and_freeze_load_shifted_mask:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldrb w8, [x0, #1]
|
||||
; CHECK-NEXT: lsl w0, w8, #8
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%masked = and i32 %freeze, 65280
|
||||
ret i32 %masked
|
||||
}
|
||||
|
||||
define i8 @trunc_freeze_load_i16_to_i8(ptr %p) {
|
||||
; CHECK-LABEL: trunc_freeze_load_i16_to_i8:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldrb w0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i16, ptr %p, align 2
|
||||
%freeze = freeze i16 %load
|
||||
%trunc = trunc i16 %freeze to i8
|
||||
ret i8 %trunc
|
||||
}
|
||||
|
||||
define i8 @trunc_freeze_load_i32_to_i8(ptr %p) {
|
||||
; CHECK-LABEL: trunc_freeze_load_i32_to_i8:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldr w0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%trunc = trunc i32 %freeze to i8
|
||||
ret i8 %trunc
|
||||
}
|
||||
|
||||
define i8 @trunc_freeze_load_i64_to_i8(ptr %p) {
|
||||
; CHECK-LABEL: trunc_freeze_load_i64_to_i8:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldrb w0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i64, ptr %p, align 8
|
||||
%freeze = freeze i64 %load
|
||||
%trunc = trunc i64 %freeze to i8
|
||||
ret i8 %trunc
|
||||
}
|
||||
|
||||
define i16 @trunc_freeze_load_i64_to_i16(ptr %p) {
|
||||
; CHECK-LABEL: trunc_freeze_load_i64_to_i16:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldrh w0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i64, ptr %p, align 8
|
||||
%freeze = freeze i64 %load
|
||||
%trunc = trunc i64 %freeze to i16
|
||||
ret i16 %trunc
|
||||
}
|
||||
|
||||
define i16 @trunc_freeze_load_i32_to_i16(ptr %p) {
|
||||
; CHECK-LABEL: trunc_freeze_load_i32_to_i16:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldr w0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%trunc = trunc i32 %freeze to i16
|
||||
ret i16 %trunc
|
||||
}
|
||||
|
||||
define i32 @sext_inreg_freeze_load_i8(ptr %p) {
|
||||
; CHECK-LABEL: sext_inreg_freeze_load_i8:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldrsb w0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%shl = shl i32 %freeze, 24
|
||||
%ashr = ashr i32 %shl, 24
|
||||
ret i32 %ashr
|
||||
}
|
||||
|
||||
define i32 @sext_inreg_freeze_load_i16(ptr %p) {
|
||||
; CHECK-LABEL: sext_inreg_freeze_load_i16:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldrsh w0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%shl = shl i32 %freeze, 16
|
||||
%ashr = ashr i32 %shl, 16
|
||||
ret i32 %ashr
|
||||
}
|
||||
|
||||
define i32 @zext_trunc_freeze_load(ptr %p) {
|
||||
; CHECK-LABEL: zext_trunc_freeze_load:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldrb w0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i64, ptr %p, align 8
|
||||
%freeze = freeze i64 %load
|
||||
%trunc = trunc i64 %freeze to i8
|
||||
%zext = zext i8 %trunc to i32
|
||||
ret i32 %zext
|
||||
}
|
||||
|
||||
define i32 @sext_trunc_freeze_load(ptr %p) {
|
||||
; CHECK-LABEL: sext_trunc_freeze_load:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldrsb w0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i64, ptr %p, align 8
|
||||
%freeze = freeze i64 %load
|
||||
%trunc = trunc i64 %freeze to i8
|
||||
%sext = sext i8 %trunc to i32
|
||||
ret i32 %sext
|
||||
}
|
||||
|
||||
define i32 @trunc_freeze_load_store(ptr %p, ptr %q) {
|
||||
; CHECK-LABEL: trunc_freeze_load_store:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldrb w8, [x0]
|
||||
; CHECK-NEXT: mov w0, wzr
|
||||
; CHECK-NEXT: strb w8, [x1]
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i16, ptr %p, align 4
|
||||
%freeze = freeze i16 %load
|
||||
%trunc = trunc i16 %freeze to i8
|
||||
store i8 %trunc, ptr %q, align 1
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define i32 @and_freeze_load_unaligned(ptr %p) {
|
||||
; CHECK-LABEL: and_freeze_load_unaligned:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldrb w0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i32, ptr %p, align 1
|
||||
%freeze = freeze i32 %load
|
||||
%masked = and i32 %freeze, 255
|
||||
ret i32 %masked
|
||||
}
|
||||
|
||||
define i32 @and_freeze_volatile_load(ptr %p) {
|
||||
; CHECK-LABEL: and_freeze_volatile_load:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldr w8, [x0]
|
||||
; CHECK-NEXT: and w0, w8, #0xff
|
||||
; CHECK-NEXT: ret
|
||||
%load = load volatile i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%masked = and i32 %freeze, 255
|
||||
ret i32 %masked
|
||||
}
|
||||
|
||||
define i32 @and_freeze_atomic_load(ptr %p) {
|
||||
; CHECK-LABEL: and_freeze_atomic_load:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldar w8, [x0]
|
||||
; CHECK-NEXT: and w0, w8, #0xff
|
||||
; CHECK-NEXT: ret
|
||||
%load = load atomic i32, ptr %p seq_cst, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%masked = and i32 %freeze, 255
|
||||
ret i32 %masked
|
||||
}
|
||||
|
||||
define i32 @and_freeze_non_contiguous_mask(ptr %p) {
|
||||
; CHECK-LABEL: and_freeze_non_contiguous_mask:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldr w8, [x0]
|
||||
; CHECK-NEXT: mov w9, #170 ; =0xaa
|
||||
; CHECK-NEXT: and w0, w8, w9
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%masked = and i32 %freeze, 170
|
||||
ret i32 %masked
|
||||
}
|
||||
|
||||
define i32 @and_freeze_variable_mask(ptr %p, i32 %mask) {
|
||||
; CHECK-LABEL: and_freeze_variable_mask:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldr w8, [x0]
|
||||
; CHECK-NEXT: and w0, w8, w1
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%masked = and i32 %freeze, %mask
|
||||
ret i32 %masked
|
||||
}
|
||||
|
||||
define i32 @and_multiuse_freeze_extload_no_narrow(ptr %p) {
|
||||
; CHECK-LABEL: and_multiuse_freeze_extload_no_narrow:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldrb w8, [x0]
|
||||
; CHECK-NEXT: cmp w8, #42
|
||||
; CHECK-NEXT: csel w0, w8, wzr, eq
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i8, ptr %p
|
||||
%freeze = freeze i8 %load
|
||||
%zext = zext i8 %freeze to i32
|
||||
%cmp = icmp eq i8 %freeze, 42
|
||||
%sel = select i1 %cmp, i32 %zext, i32 0
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
define i32 @and_multiuse_freeze_narrowing(ptr %p, ptr %q) {
|
||||
; CHECK-LABEL: and_multiuse_freeze_narrowing:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldr w8, [x0]
|
||||
; CHECK-NEXT: and w0, w8, #0xff
|
||||
; CHECK-NEXT: str w8, [x1]
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i32, ptr %p
|
||||
%freeze = freeze i32 %load
|
||||
%masked = and i32 %freeze, 255
|
||||
store i32 %freeze, ptr %q
|
||||
ret i32 %masked
|
||||
}
|
||||
|
||||
define i64 @and_multiuse_freeze_i64_narrowing(ptr %p, ptr %q) {
|
||||
; CHECK-LABEL: and_multiuse_freeze_i64_narrowing:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldr x8, [x0]
|
||||
; CHECK-NEXT: and x0, x8, #0xff
|
||||
; CHECK-NEXT: str x8, [x1]
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i64, ptr %p
|
||||
%freeze = freeze i64 %load
|
||||
%masked = and i64 %freeze, 255
|
||||
store i64 %freeze, ptr %q
|
||||
ret i64 %masked
|
||||
}
|
||||
|
||||
; SRL/SRA through freeze are not yet folded because the freeze peek-through
|
||||
; happens after the SRL early-return in reduceLoadWidth.
|
||||
|
||||
define i32 @srl_freeze_load_i32(ptr %p) {
|
||||
; CHECK-LABEL: srl_freeze_load_i32:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldr w8, [x0]
|
||||
; CHECK-NEXT: lsr w0, w8, #8
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%srl = lshr i32 %freeze, 8
|
||||
ret i32 %srl
|
||||
}
|
||||
|
||||
define i32 @sra_freeze_load_i32(ptr %p) {
|
||||
; CHECK-LABEL: sra_freeze_load_i32:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldr w8, [x0]
|
||||
; CHECK-NEXT: asr w0, w8, #8
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%sra = ashr i32 %freeze, 8
|
||||
ret i32 %sra
|
||||
}
|
||||
|
||||
define i8 @trunc_srl_freeze_load(ptr %p) {
|
||||
; CHECK-LABEL: trunc_srl_freeze_load:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldr w8, [x0]
|
||||
; CHECK-NEXT: lsr w0, w8, #8
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%srl = lshr i32 %freeze, 8
|
||||
%trunc = trunc i32 %srl to i8
|
||||
ret i8 %trunc
|
||||
}
|
||||
|
||||
define i16 @srl_freeze_load_i64_to_i16(ptr %p) {
|
||||
; CHECK-LABEL: srl_freeze_load_i64_to_i16:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: ldr x8, [x0]
|
||||
; CHECK-NEXT: lsr x0, x8, #16
|
||||
; CHECK-NEXT: ; kill: def $w0 killed $w0 killed $x0
|
||||
; CHECK-NEXT: ret
|
||||
%load = load i64, ptr %p, align 8
|
||||
%freeze = freeze i64 %load
|
||||
%srl = lshr i64 %freeze, 16
|
||||
%trunc = trunc i64 %srl to i16
|
||||
ret i16 %trunc
|
||||
}
|
||||
@@ -8,7 +8,7 @@
|
||||
define void @test() local_unnamed_addr #0 align 2 {
|
||||
; CHECK-BE-LABEL: test:
|
||||
; CHECK-BE: # %bb.0: # %bb
|
||||
; CHECK-BE-NEXT: lwz r3, 0(r3)
|
||||
; CHECK-BE-NEXT: lhz r3, 0(r3)
|
||||
; CHECK-BE-NEXT: vspltisw v2, -16
|
||||
; CHECK-BE-NEXT: addi r3, r3, 1
|
||||
; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0
|
||||
@@ -31,7 +31,7 @@ define void @test() local_unnamed_addr #0 align 2 {
|
||||
;
|
||||
; CHECK-P9-BE-LABEL: test:
|
||||
; CHECK-P9-BE: # %bb.0: # %bb
|
||||
; CHECK-P9-BE-NEXT: lwz r3, 0(r3)
|
||||
; CHECK-P9-BE-NEXT: lhz r3, 0(r3)
|
||||
; CHECK-P9-BE-NEXT: vspltisw v3, -16
|
||||
; CHECK-P9-BE-NEXT: xxlxor vs2, vs2, vs2
|
||||
; CHECK-P9-BE-NEXT: addi r3, r3, 1
|
||||
|
||||
@@ -113,13 +113,12 @@ define void @test_bitint_200_add(ptr %a, ptr %b, ptr %out) nounwind {
|
||||
define void @test_bitint_200_to_float(ptr %in, ptr %out) nounwind {
|
||||
; RV64-LABEL: test_bitint_200_to_float:
|
||||
; RV64: # %bb.0: # %itofp-entry
|
||||
; RV64-NEXT: ld a5, 16(a0)
|
||||
; RV64-NEXT: lbu a4, 24(a0)
|
||||
; RV64-NEXT: ld a3, 0(a0)
|
||||
; RV64-NEXT: ld a2, 8(a0)
|
||||
; RV64-NEXT: zext.b a0, a4
|
||||
; RV64-NEXT: lbu a4, 24(a0)
|
||||
; RV64-NEXT: ld a5, 16(a0)
|
||||
; RV64-NEXT: or a0, a2, a4
|
||||
; RV64-NEXT: or a6, a3, a5
|
||||
; RV64-NEXT: or a0, a2, a0
|
||||
; RV64-NEXT: or a0, a6, a0
|
||||
; RV64-NEXT: beqz a0, .LBB1_20
|
||||
; RV64-NEXT: # %bb.1: # %itofp-if-end
|
||||
@@ -446,14 +445,13 @@ define void @test_bitint_200_to_float(ptr %in, ptr %out) nounwind {
|
||||
; RV32-NEXT: lw a5, 16(a0)
|
||||
; RV32-NEXT: lw a3, 20(a0)
|
||||
; RV32-NEXT: lbu a2, 24(a0)
|
||||
; RV32-NEXT: lw t3, 8(a0)
|
||||
; RV32-NEXT: lw a7, 0(a0)
|
||||
; RV32-NEXT: lw t5, 4(a0)
|
||||
; RV32-NEXT: lw t3, 8(a0)
|
||||
; RV32-NEXT: lw t4, 12(a0)
|
||||
; RV32-NEXT: zext.b a2, a2
|
||||
; RV32-NEXT: or a0, a7, a5
|
||||
; RV32-NEXT: or a4, t3, a2
|
||||
; RV32-NEXT: or a0, a0, a4
|
||||
; RV32-NEXT: or a0, t3, a2
|
||||
; RV32-NEXT: or a4, a7, a5
|
||||
; RV32-NEXT: or a0, a4, a0
|
||||
; RV32-NEXT: or a4, t5, a3
|
||||
; RV32-NEXT: or a4, a4, t4
|
||||
; RV32-NEXT: or a0, a0, a4
|
||||
|
||||
@@ -99,7 +99,7 @@ define i128 @mask_pair_128(i128 %x, i128 %y) nounwind {
|
||||
; X86-NOBMI-NEXT: pushl %edi
|
||||
; X86-NOBMI-NEXT: pushl %esi
|
||||
; X86-NOBMI-NEXT: subl $32, %esp
|
||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NOBMI-NEXT: movl $-1, {{[0-9]+}}(%esp)
|
||||
; X86-NOBMI-NEXT: movl $-1, {{[0-9]+}}(%esp)
|
||||
@@ -121,7 +121,6 @@ define i128 @mask_pair_128(i128 %x, i128 %y) nounwind {
|
||||
; X86-NOBMI-NEXT: movl 20(%esp,%ebx), %ebx
|
||||
; X86-NOBMI-NEXT: shldl %cl, %ebx, %edx
|
||||
; X86-NOBMI-NEXT: shldl %cl, %edi, %ebx
|
||||
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NOBMI-NEXT: shll %cl, %edi
|
||||
; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %esi
|
||||
@@ -143,7 +142,7 @@ define i128 @mask_pair_128(i128 %x, i128 %y) nounwind {
|
||||
; X86-BMI2-NEXT: pushl %edi
|
||||
; X86-BMI2-NEXT: pushl %esi
|
||||
; X86-BMI2-NEXT: subl $32, %esp
|
||||
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-BMI2-NEXT: movl $-1, {{[0-9]+}}(%esp)
|
||||
; X86-BMI2-NEXT: movl $-1, {{[0-9]+}}(%esp)
|
||||
|
||||
@@ -15,7 +15,7 @@ define i256 @bext_i256(i256 %a0, i256 %idx, i256 %len) nounwind {
|
||||
; SSE-NEXT: pushq %rbx
|
||||
; SSE-NEXT: pushq %rax
|
||||
; SSE-NEXT: movq %rcx, %rax
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rcx
|
||||
; SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; SSE-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
@@ -34,7 +34,6 @@ define i256 @bext_i256(i256 %a0, i256 %idx, i256 %len) nounwind {
|
||||
; SSE-NEXT: shldq %cl, %r14, %r11
|
||||
; SSE-NEXT: movq -32(%rsp,%rbx), %rbx
|
||||
; SSE-NEXT: shldq %cl, %rbx, %r14
|
||||
; SSE-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; SSE-NEXT: shlq %cl, %rbx
|
||||
; SSE-NEXT: addq $-1, %rbx
|
||||
; SSE-NEXT: adcq $-1, %r14
|
||||
@@ -81,7 +80,7 @@ define i256 @bext_i256(i256 %a0, i256 %idx, i256 %len) nounwind {
|
||||
; AVX2-NEXT: pushq %r14
|
||||
; AVX2-NEXT: pushq %rbx
|
||||
; AVX2-NEXT: movq %rcx, %rax
|
||||
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
|
||||
; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: vmovss {{.*#+}} xmm1 = [1,0,0,0]
|
||||
@@ -143,7 +142,7 @@ define i256 @bext_i256(i256 %a0, i256 %idx, i256 %len) nounwind {
|
||||
; AVX512F-NEXT: pushq %r14
|
||||
; AVX512F-NEXT: pushq %rbx
|
||||
; AVX512F-NEXT: movq %rcx, %rax
|
||||
; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %rcx
|
||||
; AVX512F-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; AVX512F-NEXT: vmovaps {{.*#+}} zmm0 = [0,0,0,0,1,0,0,0]
|
||||
; AVX512F-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512F-NEXT: movl %ecx, %r10d
|
||||
@@ -203,7 +202,7 @@ define i256 @bext_i256(i256 %a0, i256 %idx, i256 %len) nounwind {
|
||||
; AVX512VL-NEXT: pushq %rbx
|
||||
; AVX512VL-NEXT: movq %rcx, %r10
|
||||
; AVX512VL-NEXT: movq %rdi, %rax
|
||||
; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rcx
|
||||
; AVX512VL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VL-NEXT: vmovaps {{.*#+}} xmm1 = [1,0,0,0]
|
||||
@@ -264,7 +263,7 @@ define i256 @bext_i256(i256 %a0, i256 %idx, i256 %len) nounwind {
|
||||
; AVX512VBMI-NEXT: pushq %rbx
|
||||
; AVX512VBMI-NEXT: movq %rcx, %r10
|
||||
; AVX512VBMI-NEXT: movq %rdi, %rax
|
||||
; AVX512VBMI-NEXT: movq {{[0-9]+}}(%rsp), %rcx
|
||||
; AVX512VBMI-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; AVX512VBMI-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VBMI-NEXT: vmovaps {{.*#+}} xmm1 = [1,0,0,0]
|
||||
@@ -632,7 +631,7 @@ define i256 @bext_i256_load(i256 %a0, i256 %idx, i256 %len) nounwind {
|
||||
; SSE-NEXT: pushq %rbx
|
||||
; SSE-NEXT: pushq %rax
|
||||
; SSE-NEXT: movq %rcx, %rax
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rcx
|
||||
; SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; SSE-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
@@ -651,7 +650,6 @@ define i256 @bext_i256_load(i256 %a0, i256 %idx, i256 %len) nounwind {
|
||||
; SSE-NEXT: shldq %cl, %r14, %r11
|
||||
; SSE-NEXT: movq -32(%rsp,%rbx), %rbx
|
||||
; SSE-NEXT: shldq %cl, %rbx, %r14
|
||||
; SSE-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; SSE-NEXT: shlq %cl, %rbx
|
||||
; SSE-NEXT: addq $-1, %rbx
|
||||
; SSE-NEXT: adcq $-1, %r14
|
||||
@@ -698,7 +696,7 @@ define i256 @bext_i256_load(i256 %a0, i256 %idx, i256 %len) nounwind {
|
||||
; AVX2-NEXT: pushq %r14
|
||||
; AVX2-NEXT: pushq %rbx
|
||||
; AVX2-NEXT: movq %rcx, %rax
|
||||
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
|
||||
; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: vmovss {{.*#+}} xmm1 = [1,0,0,0]
|
||||
@@ -760,7 +758,7 @@ define i256 @bext_i256_load(i256 %a0, i256 %idx, i256 %len) nounwind {
|
||||
; AVX512F-NEXT: pushq %r14
|
||||
; AVX512F-NEXT: pushq %rbx
|
||||
; AVX512F-NEXT: movq %rcx, %rax
|
||||
; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %rcx
|
||||
; AVX512F-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; AVX512F-NEXT: vmovaps {{.*#+}} zmm0 = [0,0,0,0,1,0,0,0]
|
||||
; AVX512F-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512F-NEXT: movl %ecx, %r10d
|
||||
@@ -820,7 +818,7 @@ define i256 @bext_i256_load(i256 %a0, i256 %idx, i256 %len) nounwind {
|
||||
; AVX512VL-NEXT: pushq %rbx
|
||||
; AVX512VL-NEXT: movq %rcx, %r10
|
||||
; AVX512VL-NEXT: movq %rdi, %rax
|
||||
; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rcx
|
||||
; AVX512VL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VL-NEXT: vmovaps {{.*#+}} xmm1 = [1,0,0,0]
|
||||
@@ -881,7 +879,7 @@ define i256 @bext_i256_load(i256 %a0, i256 %idx, i256 %len) nounwind {
|
||||
; AVX512VBMI-NEXT: pushq %rbx
|
||||
; AVX512VBMI-NEXT: movq %rcx, %r10
|
||||
; AVX512VBMI-NEXT: movq %rdi, %rax
|
||||
; AVX512VBMI-NEXT: movq {{[0-9]+}}(%rsp), %rcx
|
||||
; AVX512VBMI-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
||||
; AVX512VBMI-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VBMI-NEXT: vmovaps {{.*#+}} xmm1 = [1,0,0,0]
|
||||
|
||||
@@ -17,7 +17,7 @@ define i512 @bext_i512(i512 %a0, i512 %idx, i512 %len) nounwind {
|
||||
; SSE-NEXT: pushq %rbx
|
||||
; SSE-NEXT: subq $168, %rsp
|
||||
; SSE-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; SSE-NEXT: movl {{[0-9]+}}(%rsp), %r10d
|
||||
; SSE-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
|
||||
@@ -56,7 +56,7 @@ define i512 @bext_i512(i512 %a0, i512 %idx, i512 %len) nounwind {
|
||||
; SSE-NEXT: movaps %xmm0, (%rsp)
|
||||
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; SSE-NEXT: movl {{[0-9]+}}(%rsp), %r10d
|
||||
; SSE-NEXT: addq $-1, %r15
|
||||
; SSE-NEXT: adcq $-1, %rbp
|
||||
; SSE-NEXT: adcq $-1, %r13
|
||||
@@ -141,7 +141,7 @@ define i512 @bext_i512(i512 %a0, i512 %idx, i512 %len) nounwind {
|
||||
; AVX2-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; AVX2-NEXT: vmovss {{.*#+}} xmm1 = [1,0,0,0]
|
||||
; AVX2-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX2-NEXT: movl %eax, %ecx
|
||||
; AVX2-NEXT: andl $63, %ecx
|
||||
; AVX2-NEXT: shrl $3, %eax
|
||||
@@ -168,7 +168,7 @@ define i512 @bext_i512(i512 %a0, i512 %idx, i512 %len) nounwind {
|
||||
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rcx
|
||||
; AVX2-NEXT: vmovups %ymm0, (%rsp)
|
||||
; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r15
|
||||
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %r15d
|
||||
; AVX2-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm1
|
||||
; AVX2-NEXT: addq $-1, %rdx
|
||||
; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
@@ -269,7 +269,7 @@ define i512 @bext_i512(i512 %a0, i512 %idx, i512 %len) nounwind {
|
||||
; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX512F-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
|
||||
; AVX512F-NEXT: vmovups %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %r14
|
||||
; AVX512F-NEXT: movl {{[0-9]+}}(%rsp), %r14d
|
||||
; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm0
|
||||
; AVX512F-NEXT: vpextrq $1, %xmm0, %r10
|
||||
; AVX512F-NEXT: vmovq %xmm2, %rbx
|
||||
@@ -335,7 +335,7 @@ define i512 @bext_i512(i512 %a0, i512 %idx, i512 %len) nounwind {
|
||||
; AVX512VL-NEXT: pushq %rbx
|
||||
; AVX512VL-NEXT: pushq %rax
|
||||
; AVX512VL-NEXT: movq %rdi, %rax
|
||||
; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rdi
|
||||
; AVX512VL-NEXT: movl {{[0-9]+}}(%rsp), %edi
|
||||
; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0
|
||||
; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX512VL-NEXT: movl $1, %r11d
|
||||
@@ -428,7 +428,7 @@ define i512 @bext_i512(i512 %a0, i512 %idx, i512 %len) nounwind {
|
||||
; AVX512VBMI-NEXT: pushq %rbx
|
||||
; AVX512VBMI-NEXT: pushq %rax
|
||||
; AVX512VBMI-NEXT: movq %rdi, %rax
|
||||
; AVX512VBMI-NEXT: movq {{[0-9]+}}(%rsp), %rdi
|
||||
; AVX512VBMI-NEXT: movl {{[0-9]+}}(%rsp), %edi
|
||||
; AVX512VBMI-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0
|
||||
; AVX512VBMI-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX512VBMI-NEXT: movl $1, %r11d
|
||||
@@ -465,36 +465,36 @@ define i512 @bext_i512(i512 %a0, i512 %idx, i512 %len) nounwind {
|
||||
; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VBMI-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VBMI-NEXT: vpbroadcastq %rdi, %zmm0
|
||||
; AVX512VBMI-NEXT: # kill: def $edi killed $edi killed $rdi def $rdi
|
||||
; AVX512VBMI-NEXT: movl %edi, %ecx
|
||||
; AVX512VBMI-NEXT: shrl $3, %edi
|
||||
; AVX512VBMI-NEXT: andl $56, %edi
|
||||
; AVX512VBMI-NEXT: vmovdqu64 -128(%rsp,%rdi), %zmm1
|
||||
; AVX512VBMI-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; AVX512VBMI-NEXT: valignq {{.*#+}} zmm3 = zmm1[1,2,3,4,5,6,7],zmm3[0]
|
||||
; AVX512VBMI-NEXT: vpshrdvq %zmm0, %zmm3, %zmm1
|
||||
; AVX512VBMI-NEXT: vextracti128 $1, %ymm2, %xmm0
|
||||
; AVX512VBMI-NEXT: vpextrq $1, %xmm0, %rdx
|
||||
; AVX512VBMI-NEXT: vextracti32x4 $3, %zmm1, %xmm3
|
||||
; AVX512VBMI-NEXT: vmovdqu64 -128(%rsp,%rdi), %zmm0
|
||||
; AVX512VBMI-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512VBMI-NEXT: valignq {{.*#+}} zmm1 = zmm0[1,2,3,4,5,6,7],zmm1[0]
|
||||
; AVX512VBMI-NEXT: vpbroadcastq %rcx, %zmm3
|
||||
; AVX512VBMI-NEXT: vpshrdvq %zmm3, %zmm1, %zmm0
|
||||
; AVX512VBMI-NEXT: vextracti128 $1, %ymm2, %xmm1
|
||||
; AVX512VBMI-NEXT: vpextrq $1, %xmm1, %rdx
|
||||
; AVX512VBMI-NEXT: vextracti32x4 $3, %zmm0, %xmm3
|
||||
; AVX512VBMI-NEXT: vpextrq $1, %xmm3, %rcx
|
||||
; AVX512VBMI-NEXT: vmovq %xmm2, %rsi
|
||||
; AVX512VBMI-NEXT: andq %r11, %rcx
|
||||
; AVX512VBMI-NEXT: vmovq %xmm3, %rdi
|
||||
; AVX512VBMI-NEXT: vextracti32x4 $2, %zmm1, %xmm2
|
||||
; AVX512VBMI-NEXT: vextracti32x4 $2, %zmm0, %xmm2
|
||||
; AVX512VBMI-NEXT: vpextrq $1, %xmm2, %r11
|
||||
; AVX512VBMI-NEXT: andq %r10, %rdi
|
||||
; AVX512VBMI-NEXT: andq %rbx, %r11
|
||||
; AVX512VBMI-NEXT: vmovq %xmm2, %r10
|
||||
; AVX512VBMI-NEXT: andq %r8, %r10
|
||||
; AVX512VBMI-NEXT: vpextrq $1, %xmm1, %r8
|
||||
; AVX512VBMI-NEXT: vpextrq $1, %xmm0, %r8
|
||||
; AVX512VBMI-NEXT: andq %r9, %r8
|
||||
; AVX512VBMI-NEXT: vmovq %xmm1, %r9
|
||||
; AVX512VBMI-NEXT: vmovq %xmm0, %r9
|
||||
; AVX512VBMI-NEXT: andq %rsi, %r9
|
||||
; AVX512VBMI-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||
; AVX512VBMI-NEXT: vpextrq $1, %xmm1, %rsi
|
||||
; AVX512VBMI-NEXT: vmovq %xmm0, %rbx
|
||||
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; AVX512VBMI-NEXT: vpextrq $1, %xmm0, %rsi
|
||||
; AVX512VBMI-NEXT: vmovq %xmm1, %rbx
|
||||
; AVX512VBMI-NEXT: andq %rdx, %rsi
|
||||
; AVX512VBMI-NEXT: vmovq %xmm1, %rdx
|
||||
; AVX512VBMI-NEXT: vmovq %xmm0, %rdx
|
||||
; AVX512VBMI-NEXT: andq %rbx, %rdx
|
||||
; AVX512VBMI-NEXT: movq %rcx, 56(%rax)
|
||||
; AVX512VBMI-NEXT: movq %rdi, 48(%rax)
|
||||
@@ -526,7 +526,7 @@ define i512 @bext_i512_vector(<8 x i64> %v0, i512 %idx, i512 %len) nounwind {
|
||||
; SSE-NEXT: pushq %r12
|
||||
; SSE-NEXT: pushq %rbx
|
||||
; SSE-NEXT: subq $168, %rsp
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; SSE-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE-NEXT: xorps %xmm4, %xmm4
|
||||
; SSE-NEXT: movups %xmm4, {{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movups %xmm4, -{{[0-9]+}}(%rsp)
|
||||
@@ -642,7 +642,7 @@ define i512 @bext_i512_vector(<8 x i64> %v0, i512 %idx, i512 %len) nounwind {
|
||||
; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: vmovss {{.*#+}} xmm3 = [1,0,0,0]
|
||||
; AVX2-NEXT: vmovups %ymm3, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX2-NEXT: movl %eax, %ecx
|
||||
; AVX2-NEXT: andl $63, %ecx
|
||||
; AVX2-NEXT: shrl $3, %eax
|
||||
@@ -861,7 +861,7 @@ define i512 @bext_i512_load(ptr %p0, i512 %idx, i512 %len) nounwind {
|
||||
; SSE-NEXT: pushq %r12
|
||||
; SSE-NEXT: pushq %rbx
|
||||
; SSE-NEXT: subq $168, %rsp
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; SSE-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE-NEXT: movaps (%rsi), %xmm0
|
||||
; SSE-NEXT: movaps 16(%rsi), %xmm1
|
||||
; SSE-NEXT: movaps 32(%rsi), %xmm2
|
||||
@@ -983,7 +983,7 @@ define i512 @bext_i512_load(ptr %p0, i512 %idx, i512 %len) nounwind {
|
||||
; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: vmovss {{.*#+}} xmm3 = [1,0,0,0]
|
||||
; AVX2-NEXT: vmovups %ymm3, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX2-NEXT: movl %eax, %ecx
|
||||
; AVX2-NEXT: andl $63, %ecx
|
||||
; AVX2-NEXT: shrl $3, %eax
|
||||
@@ -3256,7 +3256,7 @@ define i512 @bzhi_i512(i512 %a0, i512 %idx) nounwind {
|
||||
; SSE-NEXT: pushq %rbx
|
||||
; SSE-NEXT: pushq %rax
|
||||
; SSE-NEXT: movq %rcx, %rax
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; SSE-NEXT: movl {{[0-9]+}}(%rsp), %r10d
|
||||
; SSE-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
|
||||
@@ -3340,7 +3340,7 @@ define i512 @bzhi_i512(i512 %a0, i512 %idx) nounwind {
|
||||
; AVX2-NEXT: movq %rcx, %rax
|
||||
; AVX2-NEXT: vmovss {{.*#+}} xmm0 = [1,0,0,0]
|
||||
; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %r10d
|
||||
; AVX2-NEXT: movl %r10d, %ecx
|
||||
; AVX2-NEXT: andl $63, %ecx
|
||||
; AVX2-NEXT: shrl $3, %r10d
|
||||
|
||||
@@ -470,7 +470,7 @@ define void @freeze_two_buildvectors_one_undef_elt(ptr %origin0, ptr %origin1, p
|
||||
;
|
||||
; X64-LABEL: freeze_two_buildvectors_one_undef_elt:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movq (%rdi), %rax
|
||||
; X64-NEXT: movl (%rdi), %eax
|
||||
; X64-NEXT: andl $15, %eax
|
||||
; X64-NEXT: vmovd %eax, %xmm0
|
||||
; X64-NEXT: vpmovsxbq {{.*#+}} xmm1 = [7,7]
|
||||
|
||||
356
llvm/test/CodeGen/X86/reduce-load-width-freeze.ll
Normal file
356
llvm/test/CodeGen/X86/reduce-load-width-freeze.ll
Normal file
@@ -0,0 +1,356 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
|
||||
|
||||
define i32 @and_freeze_load_i32_to_i8(ptr %p) {
|
||||
; CHECK-LABEL: and_freeze_load_i32_to_i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movzbl (%rdi), %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%masked = and i32 %freeze, 255
|
||||
ret i32 %masked
|
||||
}
|
||||
|
||||
define i64 @and_freeze_load_i64_to_i8(ptr %p) {
|
||||
; CHECK-LABEL: and_freeze_load_i64_to_i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movzbl (%rdi), %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i64, ptr %p, align 8
|
||||
%freeze = freeze i64 %load
|
||||
%masked = and i64 %freeze, 255
|
||||
ret i64 %masked
|
||||
}
|
||||
|
||||
define i64 @and_freeze_load_i64_to_i16(ptr %p) {
|
||||
; CHECK-LABEL: and_freeze_load_i64_to_i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movzwl (%rdi), %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i64, ptr %p, align 8
|
||||
%freeze = freeze i64 %load
|
||||
%masked = and i64 %freeze, 65535
|
||||
ret i64 %masked
|
||||
}
|
||||
|
||||
define i64 @and_freeze_load_i64_to_i32(ptr %p) {
|
||||
; CHECK-LABEL: and_freeze_load_i64_to_i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl (%rdi), %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i64, ptr %p, align 8
|
||||
%freeze = freeze i64 %load
|
||||
%masked = and i64 %freeze, 4294967295
|
||||
ret i64 %masked
|
||||
}
|
||||
|
||||
define i8 @trunc_freeze_load_i32_to_i8(ptr %p) {
|
||||
; CHECK-LABEL: trunc_freeze_load_i32_to_i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movzbl (%rdi), %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%trunc = trunc i32 %freeze to i8
|
||||
ret i8 %trunc
|
||||
}
|
||||
|
||||
define i8 @trunc_freeze_load_i64_to_i8(ptr %p) {
|
||||
; CHECK-LABEL: trunc_freeze_load_i64_to_i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movzbl (%rdi), %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i64, ptr %p, align 8
|
||||
%freeze = freeze i64 %load
|
||||
%trunc = trunc i64 %freeze to i8
|
||||
ret i8 %trunc
|
||||
}
|
||||
|
||||
define i16 @trunc_freeze_load_i64_to_i16(ptr %p) {
|
||||
; CHECK-LABEL: trunc_freeze_load_i64_to_i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movzwl (%rdi), %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i64, ptr %p, align 8
|
||||
%freeze = freeze i64 %load
|
||||
%trunc = trunc i64 %freeze to i16
|
||||
ret i16 %trunc
|
||||
}
|
||||
|
||||
define i32 @sext_inreg_freeze_load_i8(ptr %p) {
|
||||
; CHECK-LABEL: sext_inreg_freeze_load_i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movsbl (%rdi), %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%shl = shl i32 %freeze, 24
|
||||
%ashr = ashr i32 %shl, 24
|
||||
ret i32 %ashr
|
||||
}
|
||||
|
||||
define i32 @zext_trunc_freeze_load(ptr %p) {
|
||||
; CHECK-LABEL: zext_trunc_freeze_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movzbl (%rdi), %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i64, ptr %p, align 8
|
||||
%freeze = freeze i64 %load
|
||||
%trunc = trunc i64 %freeze to i8
|
||||
%zext = zext i8 %trunc to i32
|
||||
ret i32 %zext
|
||||
}
|
||||
|
||||
define i32 @sext_trunc_freeze_load(ptr %p) {
|
||||
; CHECK-LABEL: sext_trunc_freeze_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movsbl (%rdi), %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i64, ptr %p, align 8
|
||||
%freeze = freeze i64 %load
|
||||
%trunc = trunc i64 %freeze to i8
|
||||
%sext = sext i8 %trunc to i32
|
||||
ret i32 %sext
|
||||
}
|
||||
|
||||
define i32 @and_freeze_volatile_load(ptr %p) {
|
||||
; CHECK-LABEL: and_freeze_volatile_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl (%rdi), %eax
|
||||
; CHECK-NEXT: movzbl %al, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load volatile i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%masked = and i32 %freeze, 255
|
||||
ret i32 %masked
|
||||
}
|
||||
|
||||
define i32 @and_freeze_invalid_mask(ptr %p) {
|
||||
; CHECK-LABEL: and_freeze_invalid_mask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl (%rdi), %eax
|
||||
; CHECK-NEXT: andl $170, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%masked = and i32 %freeze, 170
|
||||
ret i32 %masked
|
||||
}
|
||||
|
||||
define i32 @and_multiuse_freeze_extload_no_narrow(ptr %p) {
|
||||
; CHECK-LABEL: and_multiuse_freeze_extload_no_narrow:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movzbl (%rdi), %ecx
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: cmpl $42, %ecx
|
||||
; CHECK-NEXT: cmovel %ecx, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i8, ptr %p
|
||||
%freeze = freeze i8 %load
|
||||
%zext = zext i8 %freeze to i32
|
||||
%cmp = icmp eq i8 %freeze, 42
|
||||
%sel = select i1 %cmp, i32 %zext, i32 0
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
define i32 @and_multiuse_freeze_narrowing(ptr %p, ptr %q) {
|
||||
; CHECK-LABEL: and_multiuse_freeze_narrowing:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl (%rdi), %ecx
|
||||
; CHECK-NEXT: movzbl %cl, %eax
|
||||
; CHECK-NEXT: movl %ecx, (%rsi)
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i32, ptr %p
|
||||
%freeze = freeze i32 %load
|
||||
%masked = and i32 %freeze, 255
|
||||
store i32 %freeze, ptr %q
|
||||
ret i32 %masked
|
||||
}
|
||||
|
||||
define i64 @and_multiuse_freeze_i64_narrowing(ptr %p, ptr %q) {
|
||||
; CHECK-LABEL: and_multiuse_freeze_i64_narrowing:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq (%rdi), %rcx
|
||||
; CHECK-NEXT: movzbl %cl, %eax
|
||||
; CHECK-NEXT: movq %rcx, (%rsi)
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i64, ptr %p
|
||||
%freeze = freeze i64 %load
|
||||
%masked = and i64 %freeze, 255
|
||||
store i64 %freeze, ptr %q
|
||||
ret i64 %masked
|
||||
}
|
||||
|
||||
define i32 @and_freeze_load_i32_to_i16(ptr %p) {
|
||||
; CHECK-LABEL: and_freeze_load_i32_to_i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movzwl (%rdi), %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%masked = and i32 %freeze, 65535
|
||||
ret i32 %masked
|
||||
}
|
||||
|
||||
define i8 @trunc_freeze_load_i16_to_i8(ptr %p) {
|
||||
; CHECK-LABEL: trunc_freeze_load_i16_to_i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movzbl (%rdi), %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i16, ptr %p, align 2
|
||||
%freeze = freeze i16 %load
|
||||
%trunc = trunc i16 %freeze to i8
|
||||
ret i8 %trunc
|
||||
}
|
||||
|
||||
define i16 @trunc_freeze_load_i32_to_i16(ptr %p) {
|
||||
; CHECK-LABEL: trunc_freeze_load_i32_to_i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movzwl (%rdi), %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%trunc = trunc i32 %freeze to i16
|
||||
ret i16 %trunc
|
||||
}
|
||||
|
||||
define i32 @sext_inreg_freeze_load_i16(ptr %p) {
|
||||
; CHECK-LABEL: sext_inreg_freeze_load_i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movswl (%rdi), %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%shl = shl i32 %freeze, 16
|
||||
%ashr = ashr i32 %shl, 16
|
||||
ret i32 %ashr
|
||||
}
|
||||
|
||||
define i32 @and_freeze_load_shifted_mask(ptr %p) {
|
||||
; CHECK-LABEL: and_freeze_load_shifted_mask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movzbl 1(%rdi), %eax
|
||||
; CHECK-NEXT: shll $8, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%masked = and i32 %freeze, 65280
|
||||
ret i32 %masked
|
||||
}
|
||||
|
||||
define i32 @and_freeze_load_unaligned(ptr %p) {
|
||||
; CHECK-LABEL: and_freeze_load_unaligned:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movzbl (%rdi), %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i32, ptr %p, align 1
|
||||
%freeze = freeze i32 %load
|
||||
%masked = and i32 %freeze, 255
|
||||
ret i32 %masked
|
||||
}
|
||||
|
||||
define i32 @trunc_freeze_load_store(ptr %p, ptr %q) {
|
||||
; CHECK-LABEL: trunc_freeze_load_store:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movzbl (%rdi), %eax
|
||||
; CHECK-NEXT: movb %al, (%rsi)
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i16, ptr %p, align 4
|
||||
%freeze = freeze i16 %load
|
||||
%trunc = trunc i16 %freeze to i8
|
||||
store i8 %trunc, ptr %q, align 1
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define i32 @and_freeze_atomic_load(ptr %p) {
|
||||
; CHECK-LABEL: and_freeze_atomic_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl (%rdi), %eax
|
||||
; CHECK-NEXT: movzbl %al, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load atomic i32, ptr %p seq_cst, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%masked = and i32 %freeze, 255
|
||||
ret i32 %masked
|
||||
}
|
||||
|
||||
define i32 @and_freeze_non_contiguous_mask(ptr %p) {
|
||||
; CHECK-LABEL: and_freeze_non_contiguous_mask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl (%rdi), %eax
|
||||
; CHECK-NEXT: andl $170, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%masked = and i32 %freeze, 170
|
||||
ret i32 %masked
|
||||
}
|
||||
|
||||
define i32 @and_freeze_variable_mask(ptr %p, i32 %mask) {
|
||||
; CHECK-LABEL: and_freeze_variable_mask:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %esi, %eax
|
||||
; CHECK-NEXT: andl (%rdi), %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%masked = and i32 %freeze, %mask
|
||||
ret i32 %masked
|
||||
}
|
||||
|
||||
; SRL/SRA through freeze are not yet folded because the freeze peek-through
|
||||
; happens after the SRL early-return in reduceLoadWidth.
|
||||
|
||||
define i32 @srl_freeze_load_i32(ptr %p) {
|
||||
; CHECK-LABEL: srl_freeze_load_i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl (%rdi), %eax
|
||||
; CHECK-NEXT: shrl $8, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%srl = lshr i32 %freeze, 8
|
||||
ret i32 %srl
|
||||
}
|
||||
|
||||
define i32 @sra_freeze_load_i32(ptr %p) {
|
||||
; CHECK-LABEL: sra_freeze_load_i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl (%rdi), %eax
|
||||
; CHECK-NEXT: sarl $8, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%sra = ashr i32 %freeze, 8
|
||||
ret i32 %sra
|
||||
}
|
||||
|
||||
define i8 @trunc_srl_freeze_load(ptr %p) {
|
||||
; CHECK-LABEL: trunc_srl_freeze_load:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl (%rdi), %eax
|
||||
; CHECK-NEXT: shrl $8, %eax
|
||||
; CHECK-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i32, ptr %p, align 4
|
||||
%freeze = freeze i32 %load
|
||||
%srl = lshr i32 %freeze, 8
|
||||
%trunc = trunc i32 %srl to i8
|
||||
ret i8 %trunc
|
||||
}
|
||||
|
||||
define i16 @srl_freeze_load_i64_to_i16(ptr %p) {
|
||||
; CHECK-LABEL: srl_freeze_load_i64_to_i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl (%rdi), %eax
|
||||
; CHECK-NEXT: shrl $16, %eax
|
||||
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; CHECK-NEXT: retq
|
||||
%load = load i64, ptr %p, align 8
|
||||
%freeze = freeze i64 %load
|
||||
%srl = lshr i64 %freeze, 16
|
||||
%trunc = trunc i64 %srl to i16
|
||||
ret i16 %trunc
|
||||
}
|
||||
@@ -2219,8 +2219,7 @@ define i56 @select_undef_rhs(i64 %x, i1 %cmp) {
|
||||
; ATHLON-LABEL: select_undef_rhs:
|
||||
; ATHLON: ## %bb.0:
|
||||
; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; ATHLON-NEXT: movzwl %cx, %edx
|
||||
; ATHLON-NEXT: movzwl {{[0-9]+}}(%esp), %edx
|
||||
; ATHLON-NEXT: retl
|
||||
;
|
||||
; MCU-LABEL: select_undef_rhs:
|
||||
@@ -2253,8 +2252,7 @@ define i56 @select_undef_lhs(i64 %x, i1 %cmp) {
|
||||
; ATHLON-LABEL: select_undef_lhs:
|
||||
; ATHLON: ## %bb.0:
|
||||
; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; ATHLON-NEXT: movzwl %cx, %edx
|
||||
; ATHLON-NEXT: movzwl {{[0-9]+}}(%esp), %edx
|
||||
; ATHLON-NEXT: retl
|
||||
;
|
||||
; MCU-LABEL: select_undef_lhs:
|
||||
|
||||
@@ -16,7 +16,7 @@ define void @test_lshr_i128(i128 %x, i128 %a, ptr nocapture %r) nounwind {
|
||||
; i686-NEXT: pushl %esi
|
||||
; i686-NEXT: andl $-16, %esp
|
||||
; i686-NEXT: subl $48, %esp
|
||||
; i686-NEXT: movl 24(%ebp), %ecx
|
||||
; i686-NEXT: movzbl 24(%ebp), %ecx
|
||||
; i686-NEXT: movl 8(%ebp), %eax
|
||||
; i686-NEXT: movl 12(%ebp), %edx
|
||||
; i686-NEXT: movl 16(%ebp), %esi
|
||||
@@ -42,7 +42,6 @@ define void @test_lshr_i128(i128 %x, i128 %a, ptr nocapture %r) nounwind {
|
||||
; i686-NEXT: shrdl %cl, %edi, %eax
|
||||
; i686-NEXT: shrdl %cl, %ebx, %esi
|
||||
; i686-NEXT: movl 40(%ebp), %ebx
|
||||
; i686-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; i686-NEXT: shrl %cl, %edi
|
||||
; i686-NEXT: movl %edi, 12(%ebx)
|
||||
; i686-NEXT: movl %eax, 8(%ebx)
|
||||
@@ -83,7 +82,7 @@ define void @test_ashr_i128(i128 %x, i128 %a, ptr nocapture %r) nounwind {
|
||||
; i686-NEXT: pushl %esi
|
||||
; i686-NEXT: andl $-16, %esp
|
||||
; i686-NEXT: subl $48, %esp
|
||||
; i686-NEXT: movl 24(%ebp), %ecx
|
||||
; i686-NEXT: movzbl 24(%ebp), %ecx
|
||||
; i686-NEXT: movl 8(%ebp), %eax
|
||||
; i686-NEXT: movl 12(%ebp), %edx
|
||||
; i686-NEXT: movl 16(%ebp), %esi
|
||||
@@ -110,7 +109,6 @@ define void @test_ashr_i128(i128 %x, i128 %a, ptr nocapture %r) nounwind {
|
||||
; i686-NEXT: shrdl %cl, %edi, %eax
|
||||
; i686-NEXT: shrdl %cl, %ebx, %esi
|
||||
; i686-NEXT: movl 40(%ebp), %ebx
|
||||
; i686-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; i686-NEXT: sarl %cl, %edi
|
||||
; i686-NEXT: movl %edi, 12(%ebx)
|
||||
; i686-NEXT: movl %eax, 8(%ebx)
|
||||
@@ -152,7 +150,7 @@ define void @test_shl_i128(i128 %x, i128 %a, ptr nocapture %r) nounwind {
|
||||
; i686-NEXT: pushl %esi
|
||||
; i686-NEXT: andl $-16, %esp
|
||||
; i686-NEXT: subl $48, %esp
|
||||
; i686-NEXT: movl 24(%ebp), %ecx
|
||||
; i686-NEXT: movzbl 24(%ebp), %ecx
|
||||
; i686-NEXT: movl 8(%ebp), %eax
|
||||
; i686-NEXT: movl 12(%ebp), %edx
|
||||
; i686-NEXT: movl 16(%ebp), %esi
|
||||
@@ -182,7 +180,6 @@ define void @test_shl_i128(i128 %x, i128 %a, ptr nocapture %r) nounwind {
|
||||
; i686-NEXT: movl %esi, 8(%ebx)
|
||||
; i686-NEXT: movl %edx, %esi
|
||||
; i686-NEXT: shll %cl, %esi
|
||||
; i686-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; i686-NEXT: shldl %cl, %edx, %eax
|
||||
; i686-NEXT: movl %eax, 4(%ebx)
|
||||
; i686-NEXT: movl %esi, (%ebx)
|
||||
|
||||
@@ -172,7 +172,7 @@ define i256 @shl_i256(i256 %a0, i256 %a1) nounwind {
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: andl $-16, %esp
|
||||
; X86-NEXT: subl $112, %esp
|
||||
; X86-NEXT: movl 44(%ebp), %ecx
|
||||
; X86-NEXT: movzbl 44(%ebp), %ecx
|
||||
; X86-NEXT: movl 12(%ebp), %eax
|
||||
; X86-NEXT: movl 16(%ebp), %edx
|
||||
; X86-NEXT: movl 20(%ebp), %esi
|
||||
@@ -239,7 +239,6 @@ define i256 @shl_i256(i256 %a0, i256 %a1) nounwind {
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
|
||||
; X86-NEXT: movl %edi, %edx
|
||||
; X86-NEXT: shll %cl, %edx
|
||||
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
|
||||
; X86-NEXT: shldl %cl, %edi, %esi
|
||||
; X86-NEXT: movl %esi, 4(%eax)
|
||||
@@ -408,7 +407,7 @@ define i256 @lshr_i256(i256 %a0, i256 %a1) nounwind {
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: andl $-16, %esp
|
||||
; X86-NEXT: subl $112, %esp
|
||||
; X86-NEXT: movl 44(%ebp), %ecx
|
||||
; X86-NEXT: movzbl 44(%ebp), %ecx
|
||||
; X86-NEXT: movl 12(%ebp), %eax
|
||||
; X86-NEXT: movl 16(%ebp), %edx
|
||||
; X86-NEXT: movl 20(%ebp), %esi
|
||||
@@ -457,7 +456,6 @@ define i256 @lshr_i256(i256 %a0, i256 %a1) nounwind {
|
||||
; X86-NEXT: shrdl %cl, %edx, %esi
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||
; X86-NEXT: shrdl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
|
||||
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NEXT: shrl %cl, %edx
|
||||
; X86-NEXT: movl 8(%ebp), %eax
|
||||
; X86-NEXT: movl %edx, 28(%eax)
|
||||
@@ -647,7 +645,7 @@ define i256 @ashr_i256(i256 %a0, i256 %a1) nounwind {
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: andl $-16, %esp
|
||||
; X86-NEXT: subl $112, %esp
|
||||
; X86-NEXT: movl 44(%ebp), %ecx
|
||||
; X86-NEXT: movzbl 44(%ebp), %ecx
|
||||
; X86-NEXT: movl 12(%ebp), %eax
|
||||
; X86-NEXT: movl 16(%ebp), %edx
|
||||
; X86-NEXT: movl 20(%ebp), %esi
|
||||
@@ -697,7 +695,6 @@ define i256 @ashr_i256(i256 %a0, i256 %a1) nounwind {
|
||||
; X86-NEXT: shrdl %cl, %edx, %esi
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||
; X86-NEXT: shrdl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
|
||||
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NEXT: sarl %cl, %edx
|
||||
; X86-NEXT: movl 8(%ebp), %eax
|
||||
; X86-NEXT: movl %edx, 28(%eax)
|
||||
@@ -897,7 +894,7 @@ define i256 @shl_i256_load(ptr %p0, i256 %a1) nounwind {
|
||||
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl 16(%ebp), %ecx
|
||||
; X86-NEXT: movzbl 16(%ebp), %ecx
|
||||
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||
@@ -954,7 +951,6 @@ define i256 @shl_i256_load(ptr %p0, i256 %a1) nounwind {
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
|
||||
; X86-NEXT: movl %edi, %edx
|
||||
; X86-NEXT: shll %cl, %edx
|
||||
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
|
||||
; X86-NEXT: shldl %cl, %edi, %esi
|
||||
; X86-NEXT: movl %esi, 4(%eax)
|
||||
@@ -1132,7 +1128,7 @@ define i256 @lshr_i256_load(ptr %p0, i256 %a1) nounwind {
|
||||
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl 16(%ebp), %ecx
|
||||
; X86-NEXT: movzbl 16(%ebp), %ecx
|
||||
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||
@@ -1171,7 +1167,6 @@ define i256 @lshr_i256_load(ptr %p0, i256 %a1) nounwind {
|
||||
; X86-NEXT: shrdl %cl, %edx, %esi
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||
; X86-NEXT: shrdl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
|
||||
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NEXT: shrl %cl, %edx
|
||||
; X86-NEXT: movl 8(%ebp), %eax
|
||||
; X86-NEXT: movl %edx, 28(%eax)
|
||||
@@ -1388,7 +1383,7 @@ define i256 @ashr_i256_load(ptr %p0, i256 %a1) nounwind {
|
||||
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl 16(%ebp), %ecx
|
||||
; X86-NEXT: movzbl 16(%ebp), %ecx
|
||||
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
|
||||
@@ -1428,7 +1423,6 @@ define i256 @ashr_i256_load(ptr %p0, i256 %a1) nounwind {
|
||||
; X86-NEXT: shrdl %cl, %edx, %esi
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||
; X86-NEXT: shrdl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
|
||||
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NEXT: sarl %cl, %edx
|
||||
; X86-NEXT: movl 8(%ebp), %eax
|
||||
; X86-NEXT: movl %edx, 28(%eax)
|
||||
@@ -2048,7 +2042,7 @@ define i256 @shl_1_i256(i256 %a0) nounwind {
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: andl $-16, %esp
|
||||
; X86-NEXT: subl $112, %esp
|
||||
; X86-NEXT: movl 12(%ebp), %ecx
|
||||
; X86-NEXT: movzbl 12(%ebp), %ecx
|
||||
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
|
||||
@@ -2107,7 +2101,6 @@ define i256 @shl_1_i256(i256 %a0) nounwind {
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
|
||||
; X86-NEXT: movl %edi, %edx
|
||||
; X86-NEXT: shll %cl, %edx
|
||||
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
|
||||
; X86-NEXT: shldl %cl, %edi, %esi
|
||||
; X86-NEXT: movl %esi, 4(%eax)
|
||||
@@ -2267,7 +2260,7 @@ define i256 @lshr_signbit_i256(i256 %a0) nounwind {
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: andl $-16, %esp
|
||||
; X86-NEXT: subl $112, %esp
|
||||
; X86-NEXT: movl 12(%ebp), %ecx
|
||||
; X86-NEXT: movzbl 12(%ebp), %ecx
|
||||
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
|
||||
@@ -2308,7 +2301,6 @@ define i256 @lshr_signbit_i256(i256 %a0) nounwind {
|
||||
; X86-NEXT: shrdl %cl, %edx, %esi
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||
; X86-NEXT: shrdl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
|
||||
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NEXT: shrl %cl, %edx
|
||||
; X86-NEXT: movl 8(%ebp), %eax
|
||||
; X86-NEXT: movl %edx, 28(%eax)
|
||||
@@ -2481,7 +2473,7 @@ define i256 @ashr_signbit_i256(i256 %a0) nounwind {
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: andl $-16, %esp
|
||||
; X86-NEXT: subl $112, %esp
|
||||
; X86-NEXT: movl 12(%ebp), %ecx
|
||||
; X86-NEXT: movzbl 12(%ebp), %ecx
|
||||
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
|
||||
@@ -2522,7 +2514,6 @@ define i256 @ashr_signbit_i256(i256 %a0) nounwind {
|
||||
; X86-NEXT: shrdl %cl, %edx, %esi
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||
; X86-NEXT: shrdl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
|
||||
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NEXT: sarl %cl, %edx
|
||||
; X86-NEXT: movl 8(%ebp), %eax
|
||||
; X86-NEXT: movl %edx, 28(%eax)
|
||||
@@ -2706,7 +2697,7 @@ define i256 @shl_allbits_i256(i256 %a0) nounwind {
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: andl $-16, %esp
|
||||
; X86-NEXT: subl $112, %esp
|
||||
; X86-NEXT: movl 12(%ebp), %ecx
|
||||
; X86-NEXT: movzbl 12(%ebp), %ecx
|
||||
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
|
||||
@@ -2765,7 +2756,6 @@ define i256 @shl_allbits_i256(i256 %a0) nounwind {
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
|
||||
; X86-NEXT: movl %edi, %edx
|
||||
; X86-NEXT: shll %cl, %edx
|
||||
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
|
||||
; X86-NEXT: shldl %cl, %edi, %esi
|
||||
; X86-NEXT: movl %esi, 4(%eax)
|
||||
@@ -2926,7 +2916,7 @@ define i256 @lshr_allbits_i256(i256 %a0) nounwind {
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: andl $-16, %esp
|
||||
; X86-NEXT: subl $112, %esp
|
||||
; X86-NEXT: movl 12(%ebp), %ecx
|
||||
; X86-NEXT: movzbl 12(%ebp), %ecx
|
||||
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
|
||||
@@ -2967,7 +2957,6 @@ define i256 @lshr_allbits_i256(i256 %a0) nounwind {
|
||||
; X86-NEXT: shrdl %cl, %edx, %esi
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||
; X86-NEXT: shrdl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
|
||||
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NEXT: shrl %cl, %edx
|
||||
; X86-NEXT: movl 8(%ebp), %eax
|
||||
; X86-NEXT: movl %edx, 28(%eax)
|
||||
@@ -3090,7 +3079,7 @@ define i64 @lshr_extract_i256_i64(i256 %a0, i256 %a1) nounwind {
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: andl $-16, %esp
|
||||
; X86-NEXT: subl $64, %esp
|
||||
; X86-NEXT: movl 40(%ebp), %ecx
|
||||
; X86-NEXT: movzbl 40(%ebp), %ecx
|
||||
; X86-NEXT: movl 8(%ebp), %eax
|
||||
; X86-NEXT: movl 12(%ebp), %edx
|
||||
; X86-NEXT: movl 16(%ebp), %esi
|
||||
@@ -3123,7 +3112,6 @@ define i64 @lshr_extract_i256_i64(i256 %a0, i256 %a1) nounwind {
|
||||
; X86-NEXT: movl 4(%esp,%edx,4), %edi
|
||||
; X86-NEXT: movl %edi, %edx
|
||||
; X86-NEXT: shrdl %cl, %esi, %edx
|
||||
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NEXT: shrdl %cl, %edi, %eax
|
||||
; X86-NEXT: leal -8(%ebp), %esp
|
||||
; X86-NEXT: popl %esi
|
||||
@@ -3164,7 +3152,7 @@ define i64 @ashr_extract_i256_i64(i256 %a0, i256 %a1) nounwind {
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: andl $-16, %esp
|
||||
; X86-NEXT: subl $64, %esp
|
||||
; X86-NEXT: movl 40(%ebp), %ecx
|
||||
; X86-NEXT: movzbl 40(%ebp), %ecx
|
||||
; X86-NEXT: movl 8(%ebp), %eax
|
||||
; X86-NEXT: movl 12(%ebp), %edx
|
||||
; X86-NEXT: movl 16(%ebp), %esi
|
||||
@@ -3198,7 +3186,6 @@ define i64 @ashr_extract_i256_i64(i256 %a0, i256 %a1) nounwind {
|
||||
; X86-NEXT: movl 4(%esp,%edx,4), %edi
|
||||
; X86-NEXT: movl %edi, %edx
|
||||
; X86-NEXT: shrdl %cl, %esi, %edx
|
||||
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NEXT: shrdl %cl, %edi, %eax
|
||||
; X86-NEXT: leal -8(%ebp), %esp
|
||||
; X86-NEXT: popl %esi
|
||||
@@ -3321,7 +3308,7 @@ define i64 @lshr_extract_load_i256_i64(ptr %p0, i256 %a1) nounwind {
|
||||
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl 12(%ebp), %ecx
|
||||
; X86-NEXT: movzbl 12(%ebp), %ecx
|
||||
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||
@@ -3344,7 +3331,6 @@ define i64 @lshr_extract_load_i256_i64(ptr %p0, i256 %a1) nounwind {
|
||||
; X86-NEXT: movl 20(%esp,%edx,4), %edi
|
||||
; X86-NEXT: movl %edi, %edx
|
||||
; X86-NEXT: shrdl %cl, %esi, %edx
|
||||
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NEXT: shrdl %cl, %edi, %eax
|
||||
; X86-NEXT: leal -12(%ebp), %esp
|
||||
; X86-NEXT: popl %esi
|
||||
@@ -3452,7 +3438,7 @@ define i64 @ashr_extract_load_i256_i64(ptr %p0, i256 %a1) nounwind {
|
||||
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl 12(%ebp), %ecx
|
||||
; X86-NEXT: movzbl 12(%ebp), %ecx
|
||||
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
|
||||
@@ -3476,7 +3462,6 @@ define i64 @ashr_extract_load_i256_i64(ptr %p0, i256 %a1) nounwind {
|
||||
; X86-NEXT: movl 20(%esp,%edx,4), %edi
|
||||
; X86-NEXT: movl %edi, %edx
|
||||
; X86-NEXT: shrdl %cl, %esi, %edx
|
||||
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X86-NEXT: shrdl %cl, %edi, %eax
|
||||
; X86-NEXT: leal -12(%ebp), %esp
|
||||
; X86-NEXT: popl %esi
|
||||
|
||||
@@ -12,7 +12,7 @@ define i512 @shl_i512(i512 %a0, i512 %a1) nounwind {
|
||||
; SSE-NEXT: pushq %r14
|
||||
; SSE-NEXT: pushq %rbx
|
||||
; SSE-NEXT: pushq %rax
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; SSE-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; SSE-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
|
||||
@@ -75,7 +75,7 @@ define i512 @shl_i512(i512 %a0, i512 %a1) nounwind {
|
||||
; AVX2-NEXT: pushq %r14
|
||||
; AVX2-NEXT: pushq %rbx
|
||||
; AVX2-NEXT: pushq %rax
|
||||
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX2-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0
|
||||
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX2-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
|
||||
@@ -134,7 +134,7 @@ define i512 @shl_i512(i512 %a0, i512 %a1) nounwind {
|
||||
; AVX512F-LABEL: shl_i512:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: pushq %rax
|
||||
; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX512F-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX512F-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm0
|
||||
; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX512F-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
|
||||
@@ -169,7 +169,7 @@ define i512 @shl_i512(i512 %a0, i512 %a1) nounwind {
|
||||
; AVX512VL-LABEL: shl_i512:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: pushq %rax
|
||||
; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX512VL-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0
|
||||
; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX512VL-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
|
||||
@@ -208,7 +208,7 @@ define i512 @shl_i512(i512 %a0, i512 %a1) nounwind {
|
||||
; AVX512VBMI: # %bb.0:
|
||||
; AVX512VBMI-NEXT: pushq %rax
|
||||
; AVX512VBMI-NEXT: movq %rdi, %rax
|
||||
; AVX512VBMI-NEXT: movq {{[0-9]+}}(%rsp), %rdi
|
||||
; AVX512VBMI-NEXT: movl {{[0-9]+}}(%rsp), %edi
|
||||
; AVX512VBMI-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0
|
||||
; AVX512VBMI-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
|
||||
@@ -221,18 +221,18 @@ define i512 @shl_i512(i512 %a0, i512 %a1) nounwind {
|
||||
; AVX512VBMI-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VBMI-NEXT: vpbroadcastq %rdi, %zmm0
|
||||
; AVX512VBMI-NEXT: movl %edi, %ecx
|
||||
; AVX512VBMI-NEXT: shrl $3, %ecx
|
||||
; AVX512VBMI-NEXT: andl $56, %ecx
|
||||
; AVX512VBMI-NEXT: negl %ecx
|
||||
; AVX512VBMI-NEXT: movslq %ecx, %rcx
|
||||
; AVX512VBMI-NEXT: vmovdqu64 -64(%rsp,%rcx), %zmm1
|
||||
; AVX512VBMI-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VBMI-NEXT: valignq {{.*#+}} zmm2 = zmm2[7],zmm1[0,1,2,3,4,5,6]
|
||||
; AVX512VBMI-NEXT: vpshldvq %zmm0, %zmm2, %zmm1
|
||||
; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm1, 32(%rax)
|
||||
; AVX512VBMI-NEXT: vmovdqu %ymm1, (%rax)
|
||||
; AVX512VBMI-NEXT: shrl $3, %edi
|
||||
; AVX512VBMI-NEXT: andl $56, %edi
|
||||
; AVX512VBMI-NEXT: negl %edi
|
||||
; AVX512VBMI-NEXT: movslq %edi, %rdx
|
||||
; AVX512VBMI-NEXT: vmovdqu64 -64(%rsp,%rdx), %zmm0
|
||||
; AVX512VBMI-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512VBMI-NEXT: valignq {{.*#+}} zmm1 = zmm1[7],zmm0[0,1,2,3,4,5,6]
|
||||
; AVX512VBMI-NEXT: vpbroadcastq %rcx, %zmm2
|
||||
; AVX512VBMI-NEXT: vpshldvq %zmm2, %zmm1, %zmm0
|
||||
; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm0, 32(%rax)
|
||||
; AVX512VBMI-NEXT: vmovdqu %ymm0, (%rax)
|
||||
; AVX512VBMI-NEXT: popq %rcx
|
||||
; AVX512VBMI-NEXT: vzeroupper
|
||||
; AVX512VBMI-NEXT: retq
|
||||
@@ -246,7 +246,7 @@ define i512 @lshr_i512(i512 %a0, i512 %a1) nounwind {
|
||||
; SSE-NEXT: pushq %r15
|
||||
; SSE-NEXT: pushq %r14
|
||||
; SSE-NEXT: pushq %rbx
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; SSE-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; SSE-NEXT: xorps %xmm1, %xmm1
|
||||
@@ -302,7 +302,7 @@ define i512 @lshr_i512(i512 %a0, i512 %a1) nounwind {
|
||||
; AVX2-NEXT: pushq %r15
|
||||
; AVX2-NEXT: pushq %r14
|
||||
; AVX2-NEXT: pushq %rbx
|
||||
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX2-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0
|
||||
; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
|
||||
@@ -360,7 +360,7 @@ define i512 @lshr_i512(i512 %a0, i512 %a1) nounwind {
|
||||
; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX512F-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
|
||||
; AVX512F-NEXT: vmovups %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX512F-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX512F-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
|
||||
; AVX512F-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
|
||||
; AVX512F-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
|
||||
@@ -388,7 +388,7 @@ define i512 @lshr_i512(i512 %a0, i512 %a1) nounwind {
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: pushq %rax
|
||||
; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0
|
||||
; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX512VL-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VL-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
|
||||
@@ -425,28 +425,28 @@ define i512 @lshr_i512(i512 %a0, i512 %a1) nounwind {
|
||||
; AVX512VBMI-NEXT: pushq %rax
|
||||
; AVX512VBMI-NEXT: movq %rdi, %rax
|
||||
; AVX512VBMI-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0
|
||||
; AVX512VBMI-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; AVX512VBMI-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VBMI-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512VBMI-NEXT: vmovdqu %ymm1, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VBMI-NEXT: movq {{[0-9]+}}(%rsp), %rdi
|
||||
; AVX512VBMI-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VBMI-NEXT: vmovdqu %ymm1, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VBMI-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VBMI-NEXT: vmovups %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VBMI-NEXT: movq {{[0-9]+}}(%rsp), %rdi
|
||||
; AVX512VBMI-NEXT: movl {{[0-9]+}}(%rsp), %edi
|
||||
; AVX512VBMI-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VBMI-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VBMI-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VBMI-NEXT: vpbroadcastq %rdi, %zmm0
|
||||
; AVX512VBMI-NEXT: # kill: def $edi killed $edi killed $rdi def $rdi
|
||||
; AVX512VBMI-NEXT: movl %edi, %ecx
|
||||
; AVX512VBMI-NEXT: shrl $3, %edi
|
||||
; AVX512VBMI-NEXT: andl $56, %edi
|
||||
; AVX512VBMI-NEXT: vmovdqu64 -128(%rsp,%rdi), %zmm1
|
||||
; AVX512VBMI-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VBMI-NEXT: valignq {{.*#+}} zmm2 = zmm1[1,2,3,4,5,6,7],zmm2[0]
|
||||
; AVX512VBMI-NEXT: vpshrdvq %zmm0, %zmm2, %zmm1
|
||||
; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm1, 32(%rax)
|
||||
; AVX512VBMI-NEXT: vmovdqu %ymm1, (%rax)
|
||||
; AVX512VBMI-NEXT: vmovdqu64 -128(%rsp,%rdi), %zmm0
|
||||
; AVX512VBMI-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512VBMI-NEXT: valignq {{.*#+}} zmm1 = zmm0[1,2,3,4,5,6,7],zmm1[0]
|
||||
; AVX512VBMI-NEXT: vpbroadcastq %rcx, %zmm2
|
||||
; AVX512VBMI-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0
|
||||
; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm0, 32(%rax)
|
||||
; AVX512VBMI-NEXT: vmovdqu %ymm0, (%rax)
|
||||
; AVX512VBMI-NEXT: popq %rcx
|
||||
; AVX512VBMI-NEXT: vzeroupper
|
||||
; AVX512VBMI-NEXT: retq
|
||||
@@ -460,7 +460,7 @@ define i512 @ashr_i512(i512 %a0, i512 %a1) nounwind {
|
||||
; SSE-NEXT: pushq %r15
|
||||
; SSE-NEXT: pushq %r14
|
||||
; SSE-NEXT: pushq %rbx
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; SSE-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; SSE-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
|
||||
@@ -524,7 +524,7 @@ define i512 @ashr_i512(i512 %a0, i512 %a1) nounwind {
|
||||
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX2-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: vmovups %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX2-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
|
||||
@@ -577,7 +577,7 @@ define i512 @ashr_i512(i512 %a0, i512 %a1) nounwind {
|
||||
; AVX512F-LABEL: ashr_i512:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: pushq %rax
|
||||
; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX512F-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0
|
||||
; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX512F-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
|
||||
@@ -618,7 +618,7 @@ define i512 @ashr_i512(i512 %a0, i512 %a1) nounwind {
|
||||
; AVX512VL-LABEL: ashr_i512:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: pushq %rax
|
||||
; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX512VL-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0
|
||||
; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX512VL-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
|
||||
@@ -661,7 +661,7 @@ define i512 @ashr_i512(i512 %a0, i512 %a1) nounwind {
|
||||
; AVX512VBMI: # %bb.0:
|
||||
; AVX512VBMI-NEXT: pushq %rax
|
||||
; AVX512VBMI-NEXT: movq %rdi, %rax
|
||||
; AVX512VBMI-NEXT: movq {{[0-9]+}}(%rsp), %rdi
|
||||
; AVX512VBMI-NEXT: movl {{[0-9]+}}(%rsp), %edi
|
||||
; AVX512VBMI-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0
|
||||
; AVX512VBMI-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
|
||||
@@ -680,16 +680,16 @@ define i512 @ashr_i512(i512 %a0, i512 %a1) nounwind {
|
||||
; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VBMI-NEXT: vpbroadcastq %rdi, %zmm0
|
||||
; AVX512VBMI-NEXT: # kill: def $edi killed $edi killed $rdi def $rdi
|
||||
; AVX512VBMI-NEXT: movl %edi, %ecx
|
||||
; AVX512VBMI-NEXT: shrl $3, %edi
|
||||
; AVX512VBMI-NEXT: andl $56, %edi
|
||||
; AVX512VBMI-NEXT: vpsraq $63, -72(%rsp,%rdi){1to2}, %xmm1
|
||||
; AVX512VBMI-NEXT: vmovdqu64 -128(%rsp,%rdi), %zmm2
|
||||
; AVX512VBMI-NEXT: valignq {{.*#+}} zmm1 = zmm2[1,2,3,4,5,6,7],zmm1[0]
|
||||
; AVX512VBMI-NEXT: vpshrdvq %zmm0, %zmm1, %zmm2
|
||||
; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm2, 32(%rax)
|
||||
; AVX512VBMI-NEXT: vmovdqu %ymm2, (%rax)
|
||||
; AVX512VBMI-NEXT: vpsraq $63, -72(%rsp,%rdi){1to2}, %xmm0
|
||||
; AVX512VBMI-NEXT: vmovdqu64 -128(%rsp,%rdi), %zmm1
|
||||
; AVX512VBMI-NEXT: valignq {{.*#+}} zmm0 = zmm1[1,2,3,4,5,6,7],zmm0[0]
|
||||
; AVX512VBMI-NEXT: vpbroadcastq %rcx, %zmm2
|
||||
; AVX512VBMI-NEXT: vpshrdvq %zmm2, %zmm0, %zmm1
|
||||
; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm1, 32(%rax)
|
||||
; AVX512VBMI-NEXT: vmovdqu %ymm1, (%rax)
|
||||
; AVX512VBMI-NEXT: popq %rcx
|
||||
; AVX512VBMI-NEXT: vzeroupper
|
||||
; AVX512VBMI-NEXT: retq
|
||||
@@ -2447,7 +2447,7 @@ define i64 @lshr_extract_i512_i64(i512 %a0, i512 %a1) nounwind {
|
||||
; SSE-LABEL: lshr_extract_i512_i64:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pushq %rax
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; SSE-NEXT: movl {{[0-9]+}}(%rsp), %r10d
|
||||
; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
|
||||
; SSE-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
|
||||
@@ -2474,7 +2474,7 @@ define i64 @lshr_extract_i512_i64(i512 %a0, i512 %a1) nounwind {
|
||||
; AVX2-LABEL: lshr_extract_i512_i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: pushq %rax
|
||||
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %r10d
|
||||
; AVX2-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0
|
||||
; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
|
||||
@@ -2578,39 +2578,105 @@ define i64 @lshr_extract_i512_i64(i512 %a0, i512 %a1) nounwind {
|
||||
}
|
||||
|
||||
define i64 @ashr_extract_i512_i64(i512 %a0, i512 %a1) nounwind {
|
||||
; CHECK-LABEL: ashr_extract_i512_i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: movq %rcx, %rax
|
||||
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx
|
||||
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; CHECK-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: sarq $63, %r11
|
||||
; CHECK-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movl %ecx, %edx
|
||||
; CHECK-NEXT: shrl $3, %edx
|
||||
; CHECK-NEXT: andl $56, %edx
|
||||
; CHECK-NEXT: movq -128(%rsp,%rdx), %rax
|
||||
; CHECK-NEXT: movq -120(%rsp,%rdx), %rdx
|
||||
; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; CHECK-NEXT: shrdq %cl, %rdx, %rax
|
||||
; CHECK-NEXT: popq %rcx
|
||||
; CHECK-NEXT: retq
|
||||
; SSE-LABEL: ashr_extract_i512_i64:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pushq %rax
|
||||
; SSE-NEXT: movl {{[0-9]+}}(%rsp), %r10d
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; SSE-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: sarq $63, %r11
|
||||
; SSE-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; SSE-NEXT: movl %r10d, %ecx
|
||||
; SSE-NEXT: shrl $3, %ecx
|
||||
; SSE-NEXT: andl $56, %ecx
|
||||
; SSE-NEXT: movq -128(%rsp,%rcx), %rax
|
||||
; SSE-NEXT: movq -120(%rsp,%rcx), %rdx
|
||||
; SSE-NEXT: movl %r10d, %ecx
|
||||
; SSE-NEXT: shrdq %cl, %rdx, %rax
|
||||
; SSE-NEXT: popq %rcx
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: ashr_extract_i512_i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: pushq %rax
|
||||
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %r10d
|
||||
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax
|
||||
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; AVX2-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: sarq $63, %r11
|
||||
; AVX2-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; AVX2-NEXT: movl %r10d, %ecx
|
||||
; AVX2-NEXT: shrl $3, %ecx
|
||||
; AVX2-NEXT: andl $56, %ecx
|
||||
; AVX2-NEXT: movq -128(%rsp,%rcx), %rax
|
||||
; AVX2-NEXT: movq -120(%rsp,%rcx), %rdx
|
||||
; AVX2-NEXT: movl %r10d, %ecx
|
||||
; AVX2-NEXT: shrdq %cl, %rdx, %rax
|
||||
; AVX2-NEXT: popq %rcx
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: ashr_extract_i512_i64:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: pushq %rax
|
||||
; AVX512-NEXT: movq %rcx, %rax
|
||||
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx
|
||||
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; AVX512-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: sarq $63, %r11
|
||||
; AVX512-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: movl %ecx, %edx
|
||||
; AVX512-NEXT: shrl $3, %edx
|
||||
; AVX512-NEXT: andl $56, %edx
|
||||
; AVX512-NEXT: movq -128(%rsp,%rdx), %rax
|
||||
; AVX512-NEXT: movq -120(%rsp,%rdx), %rdx
|
||||
; AVX512-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; AVX512-NEXT: shrdq %cl, %rdx, %rax
|
||||
; AVX512-NEXT: popq %rcx
|
||||
; AVX512-NEXT: retq
|
||||
%b = ashr i512 %a0, %a1
|
||||
%r = trunc i512 %b to i64
|
||||
ret i64 %r
|
||||
|
||||
Reference in New Issue
Block a user