[AA] Teach getModRefInfo(FenceInst) to consult the AA chain (#192043)
Extend the AA chain (Concept / Model / AAResultBase) with a getModRefInfo(FenceInst) overload, mirroring the existing CallBase path. Implement it in ScopedNoAliasAAResult so that !noalias metadata on fences is respected. Previously, AAResults::getModRefInfo(FenceInst) only checked getModRefInfoMask and ignored individual AA passes. Assisted-by: Claude Opus --------- Co-authored-by: mselehov <mselehov@amd.com>
This commit is contained in:
@@ -7449,9 +7449,13 @@ does not carry useful data and need not be preserved.
|
||||
noalias memory-access sets. This means that some collection of memory access
|
||||
instructions (loads, stores, memory-accessing calls, etc.) that carry
|
||||
``noalias`` metadata can specifically be specified not to alias with some other
|
||||
collection of memory access instructions that carry ``alias.scope`` metadata. If
|
||||
accesses from different collections alias, the behavior is undefined. Each type
|
||||
of metadata specifies a list of scopes where each scope has an id and a domain.
|
||||
collection of memory access instructions that carry ``alias.scope`` metadata.
|
||||
These metadata kinds may also be attached to ``fence`` instructions to indicate
|
||||
which scoped memory regions the fence does (or does not) concern; this allows
|
||||
alias analysis to prove that a fence cannot affect a particular memory location.
|
||||
If accesses from different collections alias, the behavior is undefined. Each
|
||||
type of metadata specifies a list of scopes where each scope has an id and a
|
||||
domain.
|
||||
|
||||
When evaluating an aliasing query, if for some domain, the set
|
||||
of scopes with that domain in one instruction's ``alias.scope`` list is a
|
||||
|
||||
@@ -810,6 +810,12 @@ public:
|
||||
virtual ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
|
||||
AAQueryInfo &AAQI) = 0;
|
||||
|
||||
/// getModRefInfo (for fences) - Return information about whether
|
||||
/// a particular fence modifies or reads the specified memory location.
|
||||
virtual ModRefInfo getModRefInfo(const FenceInst *F,
|
||||
const MemoryLocation &Loc,
|
||||
AAQueryInfo &AAQI) = 0;
|
||||
|
||||
/// @}
|
||||
};
|
||||
|
||||
@@ -862,6 +868,11 @@ public:
|
||||
AAQueryInfo &AAQI) override {
|
||||
return Result.getModRefInfo(Call1, Call2, AAQI);
|
||||
}
|
||||
|
||||
ModRefInfo getModRefInfo(const FenceInst *F, const MemoryLocation &Loc,
|
||||
AAQueryInfo &AAQI) override {
|
||||
return Result.getModRefInfo(F, Loc, AAQI);
|
||||
}
|
||||
};
|
||||
|
||||
/// A base class to help implement the function alias analysis results concept.
|
||||
@@ -920,6 +931,11 @@ public:
|
||||
AAQueryInfo &AAQI) {
|
||||
return ModRefInfo::ModRef;
|
||||
}
|
||||
|
||||
ModRefInfo getModRefInfo(const FenceInst *F, const MemoryLocation &Loc,
|
||||
AAQueryInfo &AAQI) {
|
||||
return ModRefInfo::ModRef;
|
||||
}
|
||||
};
|
||||
|
||||
/// Return true if this pointer is returned by a noalias function.
|
||||
|
||||
@@ -75,6 +75,7 @@ public:
|
||||
|
||||
LLVM_ABI AliasResult aliasErrno(const MemoryLocation &Loc, const Module *M);
|
||||
|
||||
using AAResultBase::getModRefInfo;
|
||||
LLVM_ABI ModRefInfo getModRefInfo(const CallBase *Call,
|
||||
const MemoryLocation &Loc,
|
||||
AAQueryInfo &AAQI);
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class FenceInst;
|
||||
class Function;
|
||||
class MDNode;
|
||||
class MemoryLocation;
|
||||
@@ -47,6 +48,9 @@ public:
|
||||
AAQueryInfo &AAQI);
|
||||
LLVM_ABI ModRefInfo getModRefInfo(const CallBase *Call1,
|
||||
const CallBase *Call2, AAQueryInfo &AAQI);
|
||||
LLVM_ABI ModRefInfo getModRefInfo(const FenceInst *F,
|
||||
const MemoryLocation &Loc,
|
||||
AAQueryInfo &AAQI);
|
||||
|
||||
LLVM_ABI static void
|
||||
collectScopedDomains(const MDNode *NoAlias,
|
||||
|
||||
@@ -57,6 +57,7 @@ public:
|
||||
LLVM_ABI MemoryEffects getMemoryEffects(const CallBase *Call,
|
||||
AAQueryInfo &AAQI);
|
||||
LLVM_ABI MemoryEffects getMemoryEffects(const Function *F);
|
||||
using AAResultBase::getModRefInfo;
|
||||
LLVM_ABI ModRefInfo getModRefInfo(const CallBase *Call,
|
||||
const MemoryLocation &Loc,
|
||||
AAQueryInfo &AAQI);
|
||||
|
||||
@@ -502,14 +502,27 @@ ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
|
||||
return ModRefInfo::Mod;
|
||||
}
|
||||
|
||||
ModRefInfo AAResults::getModRefInfo(const FenceInst *S,
|
||||
ModRefInfo AAResults::getModRefInfo(const FenceInst *F,
|
||||
const MemoryLocation &Loc,
|
||||
AAQueryInfo &AAQI) {
|
||||
// All we know about a fence instruction is what we get from the ModRef
|
||||
// mask: if Loc is a constant memory location, the fence definitely could
|
||||
// not modify it.
|
||||
if (Loc.Ptr)
|
||||
return getModRefInfoMask(Loc);
|
||||
if (Loc.Ptr) {
|
||||
ModRefInfo Result = ModRefInfo::ModRef;
|
||||
|
||||
for (const auto &AA : AAs) {
|
||||
Result &= AA->getModRefInfo(F, Loc, AAQI);
|
||||
|
||||
if (isNoModRef(Result))
|
||||
return ModRefInfo::NoModRef;
|
||||
}
|
||||
|
||||
// Apply the ModRef mask. This ensures that if Loc is a constant memory
|
||||
// location, we take into account the fact that the fence definitely could
|
||||
// not modify the memory location.
|
||||
if (!isNoModRef(Result))
|
||||
Result &= getModRefInfoMask(Loc);
|
||||
|
||||
return Result;
|
||||
}
|
||||
return ModRefInfo::ModRef;
|
||||
}
|
||||
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/Analysis/MemoryLocation.h"
|
||||
#include "llvm/IR/InstrTypes.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
#include "llvm/IR/Metadata.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
@@ -93,6 +94,23 @@ ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call,
|
||||
return ModRefInfo::ModRef;
|
||||
}
|
||||
|
||||
ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const FenceInst *F,
|
||||
const MemoryLocation &Loc,
|
||||
AAQueryInfo &AAQI) {
|
||||
if (!EnableScopedNoAlias)
|
||||
return ModRefInfo::ModRef;
|
||||
|
||||
if (!mayAliasInScopes(Loc.AATags.Scope,
|
||||
F->getMetadata(LLVMContext::MD_noalias)))
|
||||
return ModRefInfo::NoModRef;
|
||||
|
||||
if (!mayAliasInScopes(F->getMetadata(LLVMContext::MD_alias_scope),
|
||||
Loc.AATags.NoAlias))
|
||||
return ModRefInfo::NoModRef;
|
||||
|
||||
return ModRefInfo::ModRef;
|
||||
}
|
||||
|
||||
ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call1,
|
||||
const CallBase *Call2,
|
||||
AAQueryInfo &AAQI) {
|
||||
|
||||
72
llvm/test/Analysis/ScopedNoAliasAA/fence-modref.ll
Normal file
72
llvm/test/Analysis/ScopedNoAliasAA/fence-modref.ll
Normal file
@@ -0,0 +1,72 @@
|
||||
; RUN: opt < %s -aa-pipeline=basic-aa,scoped-noalias-aa -passes='print<memoryssa>' -disable-output 2>&1 | FileCheck %s
|
||||
|
||||
; Test that ScopedNoAliasAA::getModRefInfo(FenceInst) uses scoped noalias
|
||||
; metadata to prove a fence cannot affect a given memory location.
|
||||
; MemorySSA exposes this: when the fence is NoModRef w.r.t. a load, the
|
||||
; load's clobbering access is liveOnEntry (not the fence).
|
||||
|
||||
define i32 @fence_noalias(ptr %p) {
|
||||
; CHECK-LABEL: MemorySSA for function: fence_noalias
|
||||
; Fence has !noalias covering the load's scope -> not a clobber.
|
||||
; CHECK: MemoryUse(liveOnEntry)
|
||||
; CHECK-NEXT: %v1 = load
|
||||
; CHECK: MemoryUse(liveOnEntry)
|
||||
; CHECK-NEXT: %v2 = load
|
||||
%v1 = load i32, ptr %p, align 4, !alias.scope !0, !noalias !3
|
||||
fence syncscope("workgroup") release, !noalias !5
|
||||
%v2 = load i32, ptr %p, align 4, !alias.scope !0, !noalias !3
|
||||
%sum = add i32 %v1, %v2
|
||||
ret i32 %sum
|
||||
}
|
||||
|
||||
define i32 @fence_alias_scope(ptr %p) {
|
||||
; CHECK-LABEL: MemorySSA for function: fence_alias_scope
|
||||
; Symmetric: fence has !alias.scope, load has !noalias covering it.
|
||||
; CHECK: MemoryUse(liveOnEntry)
|
||||
; CHECK-NEXT: %v1 = load
|
||||
; CHECK: MemoryUse(liveOnEntry)
|
||||
; CHECK-NEXT: %v2 = load
|
||||
%v1 = load i32, ptr %p, align 4, !alias.scope !0, !noalias !7
|
||||
fence syncscope("workgroup") release, !alias.scope !7
|
||||
%v2 = load i32, ptr %p, align 4, !alias.scope !0, !noalias !7
|
||||
%sum = add i32 %v1, %v2
|
||||
ret i32 %sum
|
||||
}
|
||||
|
||||
define i32 @fence_no_metadata(ptr %p) {
|
||||
; CHECK-LABEL: MemorySSA for function: fence_no_metadata
|
||||
; No metadata on fence -> fence is a clobber for the second load.
|
||||
; CHECK: MemoryUse(liveOnEntry)
|
||||
; CHECK-NEXT: %v1 = load
|
||||
; CHECK: MemoryUse([[FENCE:.*]])
|
||||
; CHECK-NEXT: %v2 = load
|
||||
%v1 = load i32, ptr %p, align 4, !alias.scope !0, !noalias !3
|
||||
fence syncscope("workgroup") release
|
||||
%v2 = load i32, ptr %p, align 4, !alias.scope !0, !noalias !3
|
||||
%sum = add i32 %v1, %v2
|
||||
ret i32 %sum
|
||||
}
|
||||
|
||||
define i32 @fence_partial_noalias(ptr %p) {
|
||||
; CHECK-LABEL: MemorySSA for function: fence_partial_noalias
|
||||
; Fence has !noalias for other_arg_scope only, load is in arg_scope.
|
||||
; Scopes don't match -> fence is still a clobber.
|
||||
; CHECK: MemoryUse(liveOnEntry)
|
||||
; CHECK-NEXT: %v1 = load
|
||||
; CHECK: MemoryUse([[FENCE2:.*]])
|
||||
; CHECK-NEXT: %v2 = load
|
||||
%v1 = load i32, ptr %p, align 4, !alias.scope !0, !noalias !3
|
||||
fence syncscope("workgroup") release, !noalias !3
|
||||
%v2 = load i32, ptr %p, align 4, !alias.scope !0, !noalias !3
|
||||
%sum = add i32 %v1, %v2
|
||||
ret i32 %sum
|
||||
}
|
||||
|
||||
!0 = !{!1}
|
||||
!1 = distinct !{!1, !2, !"arg_scope"}
|
||||
!2 = distinct !{!2, !"kernel_domain"}
|
||||
!3 = !{!4}
|
||||
!4 = distinct !{!4, !2, !"other_arg_scope"}
|
||||
!5 = !{!1, !4}
|
||||
!6 = distinct !{!6, !2, !"fence_sync_scope"}
|
||||
!7 = !{!6}
|
||||
@@ -0,0 +1,128 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
|
||||
; RUN: opt -mtriple=amdgcn-amd-amdhsa -aa-pipeline=basic-aa,scoped-noalias-aa -passes=load-store-vectorizer -S -o - %s | FileCheck %s
|
||||
|
||||
; Test that getModRefInfo(FenceInst, Loc) uses scoped noalias metadata on the
|
||||
; fence to prove it cannot affect a given memory location. Without this, the
|
||||
; load-store vectorizer conservatively treats fences as potential clobbers,
|
||||
; preventing vectorization of adjacent loads across fences.
|
||||
;
|
||||
; This models what happens after AMDGPULowerKernelArguments: noalias kernel
|
||||
; pointer arguments are replaced with loads from the kernarg segment, losing
|
||||
; the readonly attribute. The !alias.scope/!noalias metadata partially
|
||||
; compensates, but getModRefInfo(FenceInst) did not check it.
|
||||
|
||||
; Positive: fence declares !noalias for the load's scope -> vectorization.
|
||||
define void @vectorize_loads_across_fence_with_noalias(ptr addrspace(1) %ptr) {
|
||||
; CHECK-LABEL: define void @vectorize_loads_across_fence_with_noalias(
|
||||
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[PTR]], align 8, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
|
||||
; CHECK-NEXT: [[LOAD01:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[LOAD12:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
|
||||
; CHECK-NEXT: fence syncscope("workgroup") release, !noalias [[META5:![0-9]+]]
|
||||
; CHECK-NEXT: fence syncscope("workgroup") acquire, !noalias [[META5]]
|
||||
; CHECK-NEXT: [[SUM:%.*]] = add i32 [[LOAD01]], [[LOAD12]]
|
||||
; CHECK-NEXT: call void @use(i32 [[SUM]])
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 1
|
||||
%load0 = load i32, ptr addrspace(1) %ptr, align 8, !alias.scope !0, !noalias !3
|
||||
fence syncscope("workgroup") release, !noalias !5
|
||||
fence syncscope("workgroup") acquire, !noalias !5
|
||||
%load1 = load i32, ptr addrspace(1) %gep1, align 4, !alias.scope !0, !noalias !3
|
||||
%sum = add i32 %load0, %load1
|
||||
call void @use(i32 %sum)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Positive: fence declares !alias.scope, load declares !noalias covering that
|
||||
; scope. Exercises the symmetric branch in ScopedNoAliasAA::getModRefInfo.
|
||||
define void @vectorize_loads_across_fence_with_alias_scope(ptr addrspace(1) %ptr) {
|
||||
; CHECK-LABEL: define void @vectorize_loads_across_fence_with_alias_scope(
|
||||
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[PTR]], align 8, !alias.scope [[META0]], !noalias [[META6:![0-9]+]]
|
||||
; CHECK-NEXT: [[LOAD01:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[LOAD12:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
|
||||
; CHECK-NEXT: fence syncscope("workgroup") release, !alias.scope [[META6]]
|
||||
; CHECK-NEXT: fence syncscope("workgroup") acquire, !alias.scope [[META6]]
|
||||
; CHECK-NEXT: [[SUM:%.*]] = add i32 [[LOAD01]], [[LOAD12]]
|
||||
; CHECK-NEXT: call void @use(i32 [[SUM]])
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 1
|
||||
%load0 = load i32, ptr addrspace(1) %ptr, align 8, !alias.scope !0, !noalias !7
|
||||
fence syncscope("workgroup") release, !alias.scope !7
|
||||
fence syncscope("workgroup") acquire, !alias.scope !7
|
||||
%load1 = load i32, ptr addrspace(1) %gep1, align 4, !alias.scope !0, !noalias !7
|
||||
%sum = add i32 %load0, %load1
|
||||
call void @use(i32 %sum)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Negative: no metadata on fence at all -> no vectorization.
|
||||
define void @no_vectorize_loads_across_fence_without_noalias(ptr addrspace(1) %ptr) {
|
||||
; CHECK-LABEL: define void @no_vectorize_loads_across_fence_without_noalias(
|
||||
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
|
||||
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr addrspace(1) [[PTR]], i64 1
|
||||
; CHECK-NEXT: [[LOAD0:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 8, !alias.scope [[META0]], !noalias [[META3]]
|
||||
; CHECK-NEXT: fence syncscope("workgroup") release
|
||||
; CHECK-NEXT: fence syncscope("workgroup") acquire
|
||||
; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr addrspace(1) [[GEP1]], align 4, !alias.scope [[META0]], !noalias [[META3]]
|
||||
; CHECK-NEXT: [[SUM:%.*]] = add i32 [[LOAD0]], [[LOAD1]]
|
||||
; CHECK-NEXT: call void @use(i32 [[SUM]])
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 1
|
||||
%load0 = load i32, ptr addrspace(1) %ptr, align 8, !alias.scope !0, !noalias !3
|
||||
fence syncscope("workgroup") release
|
||||
fence syncscope("workgroup") acquire
|
||||
%load1 = load i32, ptr addrspace(1) %gep1, align 4, !alias.scope !0, !noalias !3
|
||||
%sum = add i32 %load0, %load1
|
||||
call void @use(i32 %sum)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Negative: fence has !noalias but only for a different scope (other_arg_scope),
|
||||
; not the load's scope (arg_scope). Partial coverage does not help.
|
||||
define void @no_vectorize_loads_across_fence_with_partial_noalias(ptr addrspace(1) %ptr) {
|
||||
; CHECK-LABEL: define void @no_vectorize_loads_across_fence_with_partial_noalias(
|
||||
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
|
||||
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr addrspace(1) [[PTR]], i64 1
|
||||
; CHECK-NEXT: [[LOAD0:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 8, !alias.scope [[META0]], !noalias [[META3]]
|
||||
; CHECK-NEXT: fence syncscope("workgroup") release, !noalias [[META3]]
|
||||
; CHECK-NEXT: fence syncscope("workgroup") acquire, !noalias [[META3]]
|
||||
; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr addrspace(1) [[GEP1]], align 4, !alias.scope [[META0]], !noalias [[META3]]
|
||||
; CHECK-NEXT: [[SUM:%.*]] = add i32 [[LOAD0]], [[LOAD1]]
|
||||
; CHECK-NEXT: call void @use(i32 [[SUM]])
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 1
|
||||
%load0 = load i32, ptr addrspace(1) %ptr, align 8, !alias.scope !0, !noalias !3
|
||||
fence syncscope("workgroup") release, !noalias !3
|
||||
fence syncscope("workgroup") acquire, !noalias !3
|
||||
%load1 = load i32, ptr addrspace(1) %gep1, align 4, !alias.scope !0, !noalias !3
|
||||
%sum = add i32 %load0, %load1
|
||||
call void @use(i32 %sum)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @use(i32)
|
||||
|
||||
; Metadata: three noalias scopes in the same domain.
|
||||
!0 = !{!1}
|
||||
!1 = distinct !{!1, !2, !"arg_scope"}
|
||||
!2 = distinct !{!2, !"kernel_domain"}
|
||||
!3 = !{!4}
|
||||
!4 = distinct !{!4, !2, !"other_arg_scope"}
|
||||
!5 = !{!1, !4}
|
||||
!6 = distinct !{!6, !2, !"fence_sync_scope"}
|
||||
!7 = !{!6}
|
||||
;.
|
||||
; CHECK: [[META0]] = !{[[META1:![0-9]+]]}
|
||||
; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], !"arg_scope"}
|
||||
; CHECK: [[META2]] = distinct !{[[META2]], !"kernel_domain"}
|
||||
; CHECK: [[META3]] = !{[[META4:![0-9]+]]}
|
||||
; CHECK: [[META4]] = distinct !{[[META4]], [[META2]], !"other_arg_scope"}
|
||||
; CHECK: [[META5]] = !{[[META1]], [[META4]]}
|
||||
; CHECK: [[META6]] = !{[[META7:![0-9]+]]}
|
||||
; CHECK: [[META7]] = distinct !{[[META7]], [[META2]], !"fence_sync_scope"}
|
||||
;.
|
||||
Reference in New Issue
Block a user