This reverts commit b7ce37c670.
The
[issue](https://github.com/llvm/llvm-project/pull/172837#issuecomment-3961532435)
this patch revealed was fixed by [this
patch](https://github.com/llvm/llvm-project/pull/183549).
262 lines
7.1 KiB
LLVM
262 lines
7.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s --check-prefix=NEON
|
|
; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+sve | FileCheck %s --check-prefix=SVE
|
|
; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+cssc | FileCheck %s --check-prefix=CSSC
|
|
|
|
; i256 popcount (register args, result truncated to i64)
|
|
define i64 @ctpop_i256(i256 %a) nounwind {
|
|
;
|
|
;
|
|
; NEON-LABEL: ctpop_i256:
|
|
; NEON: // %bb.0:
|
|
; NEON-NEXT: fmov d0, x2
|
|
; NEON-NEXT: fmov d1, x0
|
|
; NEON-NEXT: mov v0.d[1], x3
|
|
; NEON-NEXT: mov v1.d[1], x1
|
|
; NEON-NEXT: cnt v0.16b, v0.16b
|
|
; NEON-NEXT: cnt v1.16b, v1.16b
|
|
; NEON-NEXT: addv b0, v0.16b
|
|
; NEON-NEXT: addv b1, v1.16b
|
|
; NEON-NEXT: fmov x8, d0
|
|
; NEON-NEXT: fmov x9, d1
|
|
; NEON-NEXT: add x0, x9, x8
|
|
; NEON-NEXT: ret
|
|
;
|
|
; SVE-LABEL: ctpop_i256:
|
|
; SVE: // %bb.0:
|
|
; SVE-NEXT: fmov d0, x2
|
|
; SVE-NEXT: fmov d1, x0
|
|
; SVE-NEXT: ptrue p0.d
|
|
; SVE-NEXT: mov v0.d[1], x3
|
|
; SVE-NEXT: mov v1.d[1], x1
|
|
; SVE-NEXT: cnt z0.d, p0/m, z0.d
|
|
; SVE-NEXT: cnt z1.d, p0/m, z1.d
|
|
; SVE-NEXT: add v0.2d, v1.2d, v0.2d
|
|
; SVE-NEXT: addp d0, v0.2d
|
|
; SVE-NEXT: fmov x0, d0
|
|
; SVE-NEXT: ret
|
|
;
|
|
; CSSC-LABEL: ctpop_i256:
|
|
; CSSC: // %bb.0:
|
|
; CSSC-NEXT: cnt x8, x3
|
|
; CSSC-NEXT: cnt x9, x2
|
|
; CSSC-NEXT: cnt x10, x1
|
|
; CSSC-NEXT: cnt x11, x0
|
|
; CSSC-NEXT: add x8, x9, x8
|
|
; CSSC-NEXT: add x9, x11, x10
|
|
; CSSC-NEXT: add x0, x9, x8
|
|
; CSSC-NEXT: ret
|
|
%pop = call i256 @llvm.ctpop.i256(i256 %a)
|
|
%r = trunc i256 %pop to i64
|
|
ret i64 %r
|
|
}
|
|
|
|
; i256 Hamming distance: popcount(x ^ y)
|
|
define i64 @hamming_i256(i256 %a, i256 %b) nounwind {
|
|
;
|
|
;
|
|
; NEON-LABEL: hamming_i256:
|
|
; NEON: // %bb.0:
|
|
; NEON-NEXT: eor x8, x0, x4
|
|
; NEON-NEXT: eor x10, x2, x6
|
|
; NEON-NEXT: eor x9, x1, x5
|
|
; NEON-NEXT: fmov d0, x8
|
|
; NEON-NEXT: fmov d1, x10
|
|
; NEON-NEXT: eor x8, x3, x7
|
|
; NEON-NEXT: mov v1.d[1], x8
|
|
; NEON-NEXT: mov v0.d[1], x9
|
|
; NEON-NEXT: cnt v1.16b, v1.16b
|
|
; NEON-NEXT: cnt v0.16b, v0.16b
|
|
; NEON-NEXT: addv b1, v1.16b
|
|
; NEON-NEXT: addv b0, v0.16b
|
|
; NEON-NEXT: fmov x8, d1
|
|
; NEON-NEXT: fmov x9, d0
|
|
; NEON-NEXT: add x0, x9, x8
|
|
; NEON-NEXT: ret
|
|
;
|
|
; SVE-LABEL: hamming_i256:
|
|
; SVE: // %bb.0:
|
|
; SVE-NEXT: eor x8, x0, x4
|
|
; SVE-NEXT: eor x10, x2, x6
|
|
; SVE-NEXT: eor x9, x1, x5
|
|
; SVE-NEXT: fmov d0, x8
|
|
; SVE-NEXT: fmov d1, x10
|
|
; SVE-NEXT: eor x8, x3, x7
|
|
; SVE-NEXT: ptrue p0.d
|
|
; SVE-NEXT: mov v1.d[1], x8
|
|
; SVE-NEXT: mov v0.d[1], x9
|
|
; SVE-NEXT: cnt z1.d, p0/m, z1.d
|
|
; SVE-NEXT: cnt z0.d, p0/m, z0.d
|
|
; SVE-NEXT: add v0.2d, v0.2d, v1.2d
|
|
; SVE-NEXT: addp d0, v0.2d
|
|
; SVE-NEXT: fmov x0, d0
|
|
; SVE-NEXT: ret
|
|
;
|
|
; CSSC-LABEL: hamming_i256:
|
|
; CSSC: // %bb.0:
|
|
; CSSC-NEXT: eor x8, x3, x7
|
|
; CSSC-NEXT: eor x9, x0, x4
|
|
; CSSC-NEXT: eor x10, x1, x5
|
|
; CSSC-NEXT: eor x11, x2, x6
|
|
; CSSC-NEXT: cnt x8, x8
|
|
; CSSC-NEXT: cnt x10, x10
|
|
; CSSC-NEXT: cnt x11, x11
|
|
; CSSC-NEXT: cnt x9, x9
|
|
; CSSC-NEXT: add x8, x11, x8
|
|
; CSSC-NEXT: add x9, x9, x10
|
|
; CSSC-NEXT: add x0, x9, x8
|
|
; CSSC-NEXT: ret
|
|
%xor = xor i256 %a, %b
|
|
%pop = call i256 @llvm.ctpop.i256(i256 %xor)
|
|
%r = trunc i256 %pop to i64
|
|
ret i64 %r
|
|
}
|
|
|
|
; i256 count leading zeros
|
|
define i256 @ctlz_i256(i256 %a) nounwind {
|
|
;
|
|
;
|
|
; NEON-LABEL: ctlz_i256:
|
|
; NEON: // %bb.0:
|
|
; NEON-NEXT: clz x8, x2
|
|
; NEON-NEXT: clz x9, x3
|
|
; NEON-NEXT: cmp x3, #0
|
|
; NEON-NEXT: add x8, x8, #64
|
|
; NEON-NEXT: clz x10, x0
|
|
; NEON-NEXT: orr x11, x2, x3
|
|
; NEON-NEXT: csel x8, x9, x8, ne
|
|
; NEON-NEXT: clz x9, x1
|
|
; NEON-NEXT: add x10, x10, #64
|
|
; NEON-NEXT: cmp x1, #0
|
|
; NEON-NEXT: mov x1, xzr
|
|
; NEON-NEXT: mov x2, xzr
|
|
; NEON-NEXT: csel x9, x9, x10, ne
|
|
; NEON-NEXT: cmp x11, #0
|
|
; NEON-NEXT: mov x3, xzr
|
|
; NEON-NEXT: add x9, x9, #128
|
|
; NEON-NEXT: csel x0, x8, x9, ne
|
|
; NEON-NEXT: ret
|
|
;
|
|
; SVE-LABEL: ctlz_i256:
|
|
; SVE: // %bb.0:
|
|
; SVE-NEXT: clz x8, x2
|
|
; SVE-NEXT: clz x9, x3
|
|
; SVE-NEXT: cmp x3, #0
|
|
; SVE-NEXT: add x8, x8, #64
|
|
; SVE-NEXT: clz x10, x0
|
|
; SVE-NEXT: orr x11, x2, x3
|
|
; SVE-NEXT: csel x8, x9, x8, ne
|
|
; SVE-NEXT: clz x9, x1
|
|
; SVE-NEXT: add x10, x10, #64
|
|
; SVE-NEXT: cmp x1, #0
|
|
; SVE-NEXT: mov x1, xzr
|
|
; SVE-NEXT: mov x2, xzr
|
|
; SVE-NEXT: csel x9, x9, x10, ne
|
|
; SVE-NEXT: cmp x11, #0
|
|
; SVE-NEXT: mov x3, xzr
|
|
; SVE-NEXT: add x9, x9, #128
|
|
; SVE-NEXT: csel x0, x8, x9, ne
|
|
; SVE-NEXT: ret
|
|
;
|
|
; CSSC-LABEL: ctlz_i256:
|
|
; CSSC: // %bb.0:
|
|
; CSSC-NEXT: clz x8, x2
|
|
; CSSC-NEXT: clz x9, x3
|
|
; CSSC-NEXT: cmp x3, #0
|
|
; CSSC-NEXT: add x8, x8, #64
|
|
; CSSC-NEXT: clz x10, x0
|
|
; CSSC-NEXT: orr x11, x2, x3
|
|
; CSSC-NEXT: csel x8, x9, x8, ne
|
|
; CSSC-NEXT: clz x9, x1
|
|
; CSSC-NEXT: add x10, x10, #64
|
|
; CSSC-NEXT: cmp x1, #0
|
|
; CSSC-NEXT: mov x1, xzr
|
|
; CSSC-NEXT: mov x2, xzr
|
|
; CSSC-NEXT: csel x9, x9, x10, ne
|
|
; CSSC-NEXT: cmp x11, #0
|
|
; CSSC-NEXT: mov x3, xzr
|
|
; CSSC-NEXT: add x9, x9, #128
|
|
; CSSC-NEXT: csel x0, x8, x9, ne
|
|
; CSSC-NEXT: ret
|
|
%r = call i256 @llvm.ctlz.i256(i256 %a, i1 false)
|
|
ret i256 %r
|
|
}
|
|
|
|
; i256 count trailing zeros
|
|
define i256 @cttz_i256(i256 %a) nounwind {
|
|
;
|
|
;
|
|
; NEON-LABEL: cttz_i256:
|
|
; NEON: // %bb.0:
|
|
; NEON-NEXT: rbit x8, x1
|
|
; NEON-NEXT: rbit x9, x0
|
|
; NEON-NEXT: rbit x10, x3
|
|
; NEON-NEXT: cmp x0, #0
|
|
; NEON-NEXT: rbit x11, x2
|
|
; NEON-NEXT: mov x3, xzr
|
|
; NEON-NEXT: clz x8, x8
|
|
; NEON-NEXT: clz x9, x9
|
|
; NEON-NEXT: add x8, x8, #64
|
|
; NEON-NEXT: csel x8, x9, x8, ne
|
|
; NEON-NEXT: clz x9, x10
|
|
; NEON-NEXT: clz x10, x11
|
|
; NEON-NEXT: add x9, x9, #64
|
|
; NEON-NEXT: cmp x2, #0
|
|
; NEON-NEXT: orr x11, x0, x1
|
|
; NEON-NEXT: csel x9, x10, x9, ne
|
|
; NEON-NEXT: cmp x11, #0
|
|
; NEON-NEXT: mov x1, xzr
|
|
; NEON-NEXT: add x9, x9, #128
|
|
; NEON-NEXT: mov x2, xzr
|
|
; NEON-NEXT: csel x0, x8, x9, ne
|
|
; NEON-NEXT: ret
|
|
;
|
|
; SVE-LABEL: cttz_i256:
|
|
; SVE: // %bb.0:
|
|
; SVE-NEXT: rbit x8, x1
|
|
; SVE-NEXT: rbit x9, x0
|
|
; SVE-NEXT: rbit x10, x3
|
|
; SVE-NEXT: cmp x0, #0
|
|
; SVE-NEXT: rbit x11, x2
|
|
; SVE-NEXT: mov x3, xzr
|
|
; SVE-NEXT: clz x8, x8
|
|
; SVE-NEXT: clz x9, x9
|
|
; SVE-NEXT: add x8, x8, #64
|
|
; SVE-NEXT: csel x8, x9, x8, ne
|
|
; SVE-NEXT: clz x9, x10
|
|
; SVE-NEXT: clz x10, x11
|
|
; SVE-NEXT: add x9, x9, #64
|
|
; SVE-NEXT: cmp x2, #0
|
|
; SVE-NEXT: orr x11, x0, x1
|
|
; SVE-NEXT: csel x9, x10, x9, ne
|
|
; SVE-NEXT: cmp x11, #0
|
|
; SVE-NEXT: mov x1, xzr
|
|
; SVE-NEXT: add x9, x9, #128
|
|
; SVE-NEXT: mov x2, xzr
|
|
; SVE-NEXT: csel x0, x8, x9, ne
|
|
; SVE-NEXT: ret
|
|
;
|
|
; CSSC-LABEL: cttz_i256:
|
|
; CSSC: // %bb.0:
|
|
; CSSC-NEXT: ctz x8, x1
|
|
; CSSC-NEXT: ctz x9, x0
|
|
; CSSC-NEXT: cmp x0, #0
|
|
; CSSC-NEXT: add x8, x8, #64
|
|
; CSSC-NEXT: ctz x10, x3
|
|
; CSSC-NEXT: orr x11, x0, x1
|
|
; CSSC-NEXT: csel x8, x9, x8, ne
|
|
; CSSC-NEXT: ctz x9, x2
|
|
; CSSC-NEXT: add x10, x10, #64
|
|
; CSSC-NEXT: cmp x2, #0
|
|
; CSSC-NEXT: mov x1, xzr
|
|
; CSSC-NEXT: mov x2, xzr
|
|
; CSSC-NEXT: csel x9, x9, x10, ne
|
|
; CSSC-NEXT: cmp x11, #0
|
|
; CSSC-NEXT: mov x3, xzr
|
|
; CSSC-NEXT: add x9, x9, #128
|
|
; CSSC-NEXT: csel x0, x8, x9, ne
|
|
; CSSC-NEXT: ret
|
|
%r = call i256 @llvm.cttz.i256(i256 %a, i1 false)
|
|
ret i256 %r
|
|
}
|