On GPUs `TTI::getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector()` returns the element bit width, not the whole vector size as it does on the CPU. So this patch changes this to a call to `getLoadStoreVecRegBitWidth()`, which depends on the address space, so it also moves the calculation per seed. This patch also adds an AMDGPU lit test directory with a simple test.
23 lines
838 B
LLVM
23 lines
838 B
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
|
|
; RUN: opt -passes=sandbox-vectorizer -sbvec-passes="seed-collection<load-store-vec>" -mtriple=amdgcn--amdhsa -mcpu=gfx900 %s -S | FileCheck %s
|
|
|
|
define void @basic(ptr %ptr0) {
|
|
; CHECK-LABEL: define void @basic(
|
|
; CHECK-SAME: ptr [[PTR0:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: [[VECIINITL:%.*]] = load <2 x float>, ptr [[PTR0]], align 1, !sandboxvec [[META0:![0-9]+]]
|
|
; CHECK-NEXT: store <2 x float> [[VECIINITL]], ptr [[PTR0]], align 1, !sandboxvec [[META0]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ptr1 = getelementptr inbounds i8, ptr %ptr0, i64 4
|
|
|
|
%ld0 = load float, ptr %ptr0
|
|
%ld1 = load float, ptr %ptr1
|
|
|
|
store float %ld0, ptr %ptr0
|
|
store float %ld1, ptr %ptr1
|
|
ret void
|
|
}
|
|
;.
|
|
; CHECK: [[META0]] = distinct !{!"sandboxregion"}
|
|
;.
|