[AA] Teach getModRefInfo(FenceInst) to consult the AA chain (#192043)

Extend the AA chain (Concept / Model / AAResultBase) with a
getModRefInfo(FenceInst) overload, mirroring the existing CallBase path.
Implement it in ScopedNoAliasAAResult so that !noalias metadata on
fences is respected. Previously, AAResults::getModRefInfo(FenceInst)
only checked getModRefInfoMask and ignored individual AA passes.

Assisted-by: Claude Opus

---------

Co-authored-by: mselehov <mselehov@amd.com>
This commit is contained in:
michaelselehov
2026-04-30 17:54:02 +02:00
committed by GitHub
parent afdbe7bc49
commit e2f92a324b
9 changed files with 266 additions and 9 deletions

View File

@@ -7449,9 +7449,13 @@ does not carry useful data and need not be preserved.
noalias memory-access sets. This means that some collection of memory access
instructions (loads, stores, memory-accessing calls, etc.) that carry
``noalias`` metadata can specifically be specified not to alias with some other
collection of memory access instructions that carry ``alias.scope`` metadata. If
accesses from different collections alias, the behavior is undefined. Each type
of metadata specifies a list of scopes where each scope has an id and a domain.
collection of memory access instructions that carry ``alias.scope`` metadata.
These metadata kinds may also be attached to ``fence`` instructions to indicate
which scoped memory regions the fence does (or does not) concern; this allows
alias analysis to prove that a fence cannot affect a particular memory location.
If accesses from different collections alias, the behavior is undefined. Each
type of metadata specifies a list of scopes where each scope has an id and a
domain.
When evaluating an aliasing query, if for some domain, the set
of scopes with that domain in one instruction's ``alias.scope`` list is a

View File

@@ -810,6 +810,12 @@ public:
virtual ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
AAQueryInfo &AAQI) = 0;
/// getModRefInfo (for fences) - Return information about whether
/// a particular fence modifies or reads the specified memory location.
virtual ModRefInfo getModRefInfo(const FenceInst *F,
const MemoryLocation &Loc,
AAQueryInfo &AAQI) = 0;
/// @}
};
@@ -862,6 +868,11 @@ public:
AAQueryInfo &AAQI) override {
return Result.getModRefInfo(Call1, Call2, AAQI);
}
ModRefInfo getModRefInfo(const FenceInst *F, const MemoryLocation &Loc,
AAQueryInfo &AAQI) override {
return Result.getModRefInfo(F, Loc, AAQI);
}
};
/// A base class to help implement the function alias analysis results concept.
@@ -920,6 +931,11 @@ public:
AAQueryInfo &AAQI) {
return ModRefInfo::ModRef;
}
ModRefInfo getModRefInfo(const FenceInst *F, const MemoryLocation &Loc,
AAQueryInfo &AAQI) {
return ModRefInfo::ModRef;
}
};
/// Return true if this pointer is returned by a noalias function.

View File

@@ -75,6 +75,7 @@ public:
LLVM_ABI AliasResult aliasErrno(const MemoryLocation &Loc, const Module *M);
using AAResultBase::getModRefInfo;
LLVM_ABI ModRefInfo getModRefInfo(const CallBase *Call,
const MemoryLocation &Loc,
AAQueryInfo &AAQI);

View File

@@ -22,6 +22,7 @@
namespace llvm {
class FenceInst;
class Function;
class MDNode;
class MemoryLocation;
@@ -47,6 +48,9 @@ public:
AAQueryInfo &AAQI);
LLVM_ABI ModRefInfo getModRefInfo(const CallBase *Call1,
const CallBase *Call2, AAQueryInfo &AAQI);
LLVM_ABI ModRefInfo getModRefInfo(const FenceInst *F,
const MemoryLocation &Loc,
AAQueryInfo &AAQI);
LLVM_ABI static void
collectScopedDomains(const MDNode *NoAlias,

View File

@@ -57,6 +57,7 @@ public:
LLVM_ABI MemoryEffects getMemoryEffects(const CallBase *Call,
AAQueryInfo &AAQI);
LLVM_ABI MemoryEffects getMemoryEffects(const Function *F);
using AAResultBase::getModRefInfo;
LLVM_ABI ModRefInfo getModRefInfo(const CallBase *Call,
const MemoryLocation &Loc,
AAQueryInfo &AAQI);

View File

@@ -502,14 +502,27 @@ ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
return ModRefInfo::Mod;
}
ModRefInfo AAResults::getModRefInfo(const FenceInst *S,
ModRefInfo AAResults::getModRefInfo(const FenceInst *F,
const MemoryLocation &Loc,
AAQueryInfo &AAQI) {
// All we know about a fence instruction is what we get from the ModRef
// mask: if Loc is a constant memory location, the fence definitely could
// not modify it.
if (Loc.Ptr)
return getModRefInfoMask(Loc);
if (Loc.Ptr) {
ModRefInfo Result = ModRefInfo::ModRef;
for (const auto &AA : AAs) {
Result &= AA->getModRefInfo(F, Loc, AAQI);
if (isNoModRef(Result))
return ModRefInfo::NoModRef;
}
// Apply the ModRef mask. This ensures that if Loc is a constant memory
// location, we take into account the fact that the fence definitely could
// not modify the memory location.
if (!isNoModRef(Result))
Result &= getModRefInfoMask(Loc);
return Result;
}
return ModRefInfo::ModRef;
}

View File

@@ -36,6 +36,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/InitializePasses.h"
@@ -93,6 +94,23 @@ ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call,
return ModRefInfo::ModRef;
}
ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const FenceInst *F,
const MemoryLocation &Loc,
AAQueryInfo &AAQI) {
if (!EnableScopedNoAlias)
return ModRefInfo::ModRef;
if (!mayAliasInScopes(Loc.AATags.Scope,
F->getMetadata(LLVMContext::MD_noalias)))
return ModRefInfo::NoModRef;
if (!mayAliasInScopes(F->getMetadata(LLVMContext::MD_alias_scope),
Loc.AATags.NoAlias))
return ModRefInfo::NoModRef;
return ModRefInfo::ModRef;
}
ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call1,
const CallBase *Call2,
AAQueryInfo &AAQI) {

View File

@@ -0,0 +1,72 @@
; RUN: opt < %s -aa-pipeline=basic-aa,scoped-noalias-aa -passes='print<memoryssa>' -disable-output 2>&1 | FileCheck %s
; Test that ScopedNoAliasAA::getModRefInfo(FenceInst) uses scoped noalias
; metadata to prove a fence cannot affect a given memory location.
; MemorySSA exposes this: when the fence is NoModRef w.r.t. a load, the
; load's clobbering access is liveOnEntry (not the fence).
define i32 @fence_noalias(ptr %p) {
; CHECK-LABEL: MemorySSA for function: fence_noalias
; Fence has !noalias covering the load's scope -> not a clobber.
; CHECK: MemoryUse(liveOnEntry)
; CHECK-NEXT: %v1 = load
; CHECK: MemoryUse(liveOnEntry)
; CHECK-NEXT: %v2 = load
%v1 = load i32, ptr %p, align 4, !alias.scope !0, !noalias !3
fence syncscope("workgroup") release, !noalias !5
%v2 = load i32, ptr %p, align 4, !alias.scope !0, !noalias !3
%sum = add i32 %v1, %v2
ret i32 %sum
}
define i32 @fence_alias_scope(ptr %p) {
; CHECK-LABEL: MemorySSA for function: fence_alias_scope
; Symmetric: fence has !alias.scope, load has !noalias covering it.
; CHECK: MemoryUse(liveOnEntry)
; CHECK-NEXT: %v1 = load
; CHECK: MemoryUse(liveOnEntry)
; CHECK-NEXT: %v2 = load
%v1 = load i32, ptr %p, align 4, !alias.scope !0, !noalias !7
fence syncscope("workgroup") release, !alias.scope !7
%v2 = load i32, ptr %p, align 4, !alias.scope !0, !noalias !7
%sum = add i32 %v1, %v2
ret i32 %sum
}
define i32 @fence_no_metadata(ptr %p) {
; CHECK-LABEL: MemorySSA for function: fence_no_metadata
; No metadata on fence -> fence is a clobber for the second load.
; CHECK: MemoryUse(liveOnEntry)
; CHECK-NEXT: %v1 = load
; CHECK: MemoryUse([[FENCE:.*]])
; CHECK-NEXT: %v2 = load
%v1 = load i32, ptr %p, align 4, !alias.scope !0, !noalias !3
fence syncscope("workgroup") release
%v2 = load i32, ptr %p, align 4, !alias.scope !0, !noalias !3
%sum = add i32 %v1, %v2
ret i32 %sum
}
define i32 @fence_partial_noalias(ptr %p) {
; CHECK-LABEL: MemorySSA for function: fence_partial_noalias
; Fence has !noalias for other_arg_scope only, load is in arg_scope.
; Scopes don't match -> fence is still a clobber.
; CHECK: MemoryUse(liveOnEntry)
; CHECK-NEXT: %v1 = load
; CHECK: MemoryUse([[FENCE2:.*]])
; CHECK-NEXT: %v2 = load
%v1 = load i32, ptr %p, align 4, !alias.scope !0, !noalias !3
fence syncscope("workgroup") release, !noalias !3
%v2 = load i32, ptr %p, align 4, !alias.scope !0, !noalias !3
%sum = add i32 %v1, %v2
ret i32 %sum
}
!0 = !{!1}
!1 = distinct !{!1, !2, !"arg_scope"}
!2 = distinct !{!2, !"kernel_domain"}
!3 = !{!4}
!4 = distinct !{!4, !2, !"other_arg_scope"}
!5 = !{!1, !4}
!6 = distinct !{!6, !2, !"fence_sync_scope"}
!7 = !{!6}

View File

@@ -0,0 +1,128 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -mtriple=amdgcn-amd-amdhsa -aa-pipeline=basic-aa,scoped-noalias-aa -passes=load-store-vectorizer -S -o - %s | FileCheck %s
; Test that getModRefInfo(FenceInst, Loc) uses scoped noalias metadata on the
; fence to prove it cannot affect a given memory location. Without this, the
; load-store vectorizer conservatively treats fences as potential clobbers,
; preventing vectorization of adjacent loads across fences.
;
; This models what happens after AMDGPULowerKernelArguments: noalias kernel
; pointer arguments are replaced with loads from the kernarg segment, losing
; the readonly attribute. The !alias.scope/!noalias metadata partially
; compensates, but getModRefInfo(FenceInst) did not check it.
; Positive: fence declares !noalias for the load's scope -> vectorization.
define void @vectorize_loads_across_fence_with_noalias(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define void @vectorize_loads_across_fence_with_noalias(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[PTR]], align 8, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
; CHECK-NEXT: [[LOAD01:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
; CHECK-NEXT: [[LOAD12:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
; CHECK-NEXT: fence syncscope("workgroup") release, !noalias [[META5:![0-9]+]]
; CHECK-NEXT: fence syncscope("workgroup") acquire, !noalias [[META5]]
; CHECK-NEXT: [[SUM:%.*]] = add i32 [[LOAD01]], [[LOAD12]]
; CHECK-NEXT: call void @use(i32 [[SUM]])
; CHECK-NEXT: ret void
;
%gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 1
%load0 = load i32, ptr addrspace(1) %ptr, align 8, !alias.scope !0, !noalias !3
fence syncscope("workgroup") release, !noalias !5
fence syncscope("workgroup") acquire, !noalias !5
%load1 = load i32, ptr addrspace(1) %gep1, align 4, !alias.scope !0, !noalias !3
%sum = add i32 %load0, %load1
call void @use(i32 %sum)
ret void
}
; Positive: fence declares !alias.scope, load declares !noalias covering that
; scope. Exercises the symmetric branch in ScopedNoAliasAA::getModRefInfo.
define void @vectorize_loads_across_fence_with_alias_scope(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define void @vectorize_loads_across_fence_with_alias_scope(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[PTR]], align 8, !alias.scope [[META0]], !noalias [[META6:![0-9]+]]
; CHECK-NEXT: [[LOAD01:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
; CHECK-NEXT: [[LOAD12:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
; CHECK-NEXT: fence syncscope("workgroup") release, !alias.scope [[META6]]
; CHECK-NEXT: fence syncscope("workgroup") acquire, !alias.scope [[META6]]
; CHECK-NEXT: [[SUM:%.*]] = add i32 [[LOAD01]], [[LOAD12]]
; CHECK-NEXT: call void @use(i32 [[SUM]])
; CHECK-NEXT: ret void
;
%gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 1
%load0 = load i32, ptr addrspace(1) %ptr, align 8, !alias.scope !0, !noalias !7
fence syncscope("workgroup") release, !alias.scope !7
fence syncscope("workgroup") acquire, !alias.scope !7
%load1 = load i32, ptr addrspace(1) %gep1, align 4, !alias.scope !0, !noalias !7
%sum = add i32 %load0, %load1
call void @use(i32 %sum)
ret void
}
; Negative: no metadata on fence at all -> no vectorization.
define void @no_vectorize_loads_across_fence_without_noalias(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define void @no_vectorize_loads_across_fence_without_noalias(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr addrspace(1) [[PTR]], i64 1
; CHECK-NEXT: [[LOAD0:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 8, !alias.scope [[META0]], !noalias [[META3]]
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: fence syncscope("workgroup") acquire
; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr addrspace(1) [[GEP1]], align 4, !alias.scope [[META0]], !noalias [[META3]]
; CHECK-NEXT: [[SUM:%.*]] = add i32 [[LOAD0]], [[LOAD1]]
; CHECK-NEXT: call void @use(i32 [[SUM]])
; CHECK-NEXT: ret void
;
%gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 1
%load0 = load i32, ptr addrspace(1) %ptr, align 8, !alias.scope !0, !noalias !3
fence syncscope("workgroup") release
fence syncscope("workgroup") acquire
%load1 = load i32, ptr addrspace(1) %gep1, align 4, !alias.scope !0, !noalias !3
%sum = add i32 %load0, %load1
call void @use(i32 %sum)
ret void
}
; Negative: fence has !noalias but only for a different scope (other_arg_scope),
; not the load's scope (arg_scope). Partial coverage does not help.
define void @no_vectorize_loads_across_fence_with_partial_noalias(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define void @no_vectorize_loads_across_fence_with_partial_noalias(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr addrspace(1) [[PTR]], i64 1
; CHECK-NEXT: [[LOAD0:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 8, !alias.scope [[META0]], !noalias [[META3]]
; CHECK-NEXT: fence syncscope("workgroup") release, !noalias [[META3]]
; CHECK-NEXT: fence syncscope("workgroup") acquire, !noalias [[META3]]
; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr addrspace(1) [[GEP1]], align 4, !alias.scope [[META0]], !noalias [[META3]]
; CHECK-NEXT: [[SUM:%.*]] = add i32 [[LOAD0]], [[LOAD1]]
; CHECK-NEXT: call void @use(i32 [[SUM]])
; CHECK-NEXT: ret void
;
%gep1 = getelementptr i32, ptr addrspace(1) %ptr, i64 1
%load0 = load i32, ptr addrspace(1) %ptr, align 8, !alias.scope !0, !noalias !3
fence syncscope("workgroup") release, !noalias !3
fence syncscope("workgroup") acquire, !noalias !3
%load1 = load i32, ptr addrspace(1) %gep1, align 4, !alias.scope !0, !noalias !3
%sum = add i32 %load0, %load1
call void @use(i32 %sum)
ret void
}
declare void @use(i32)
; Metadata: three noalias scopes in the same domain.
!0 = !{!1}
!1 = distinct !{!1, !2, !"arg_scope"}
!2 = distinct !{!2, !"kernel_domain"}
!3 = !{!4}
!4 = distinct !{!4, !2, !"other_arg_scope"}
!5 = !{!1, !4}
!6 = distinct !{!6, !2, !"fence_sync_scope"}
!7 = !{!6}
;.
; CHECK: [[META0]] = !{[[META1:![0-9]+]]}
; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], !"arg_scope"}
; CHECK: [[META2]] = distinct !{[[META2]], !"kernel_domain"}
; CHECK: [[META3]] = !{[[META4:![0-9]+]]}
; CHECK: [[META4]] = distinct !{[[META4]], [[META2]], !"other_arg_scope"}
; CHECK: [[META5]] = !{[[META1]], [[META4]]}
; CHECK: [[META6]] = !{[[META7:![0-9]+]]}
; CHECK: [[META7]] = distinct !{[[META7]], [[META2]], !"fence_sync_scope"}
;.