From dfa56b58d663cf79cd4a825ef585aa87d9133a86 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 30 Apr 2026 12:32:44 -0400 Subject: [PATCH] [RFC][DataLayout] Add null pointer value infrastructure (#183207) Add support for specifying the null pointer bit representation per address space in DataLayout via new pointer spec flags: - 'z': null pointer is all-zeros - 'o': null pointer is all-ones When neither flag is present, the null pointer value is zero. No target DataLayout strings are updated in this change. This is pure infrastructure for a future ConstantPointerNull semantic change to support targets with non-zero null pointers (e.g. AMDGPU). --- llvm/docs/LangRef.rst | 7 ++- llvm/docs/ReleaseNotes.md | 7 +++ llvm/include/llvm/IR/DataLayout.h | 9 +++- llvm/lib/IR/DataLayout.cpp | 30 ++++++++--- llvm/unittests/IR/DataLayoutTest.cpp | 76 +++++++++++++++++++++++++++- 5 files changed, 119 insertions(+), 10 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index b351624a7588..83206c76a095 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -3395,12 +3395,15 @@ as follows: The optional ```` are used to specify properties of pointers in this address space: the character ``u`` marks pointers as having an unstable representation, and ``e`` marks pointers having external state. See - :ref:`Non-Integral Pointer Types `. The ```` is an + :ref:`Non-Integral Pointer Types `. Additionally, the + null pointer bit representation can be specified: ``z`` indicates it is + all-zeros, and ``o`` indicates it is all-ones. At most one of ``z`` or + ``o`` may be specified. If neither ``z`` nor ``o`` is specified, the null + pointer bit representation defaults to all-zeros. The ```` is an optional name of that address space, surrounded by ``(`` and ``)``. If the name is specified, it must be unique to that address space and cannot be ``A``, ``G``, or ``P`` which are pre-defined names used to denote alloca, global, and program address space respectively. - ``i:[:]`` This specifies the alignment for an integer type of a given bit ````. The value of ```` must be in the range [1,2^24). diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index b68495c6f858..4fba67c7d833 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -81,6 +81,13 @@ Makes programs 10x faster by doing Special New Thing. will be reintroduced in the future with bitwise-all-zeros semantics to support non-zero null pointers. +* Added support for specifying the null pointer bit representation per + address space in `DataLayout`. Pointer specifications (`p`) accept new + flags: `z` (null is all-zeros) and `o` (null is all-ones). Address + spaces without an explicit flag default to all-zeros. See the + `DataLayout` section of the + [LangRef](https://llvm.org/docs/LangRef.html#data-layout) for details. + * Removed TypePromoteFloat legalization from SelectionDAG * Removed `bugpoint`. Usage has been replaced by `llvm-reduce` and diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index 9c0ee38635b8..934c83878241 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -95,6 +95,8 @@ public: bool HasExternalState; // Symbolic name of the address space. std::string AddrSpaceName; + /// The null pointer bit representation for this address space. + APInt NullPtrValue; LLVM_ABI bool operator==(const PointerSpec &Other) const; }; @@ -164,7 +166,7 @@ private: void setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth, Align ABIAlign, Align PrefAlign, uint32_t IndexBitWidth, bool HasUnstableRepr, bool HasExternalState, - StringRef AddrSpaceName); + StringRef AddrSpaceName, APInt NullPtrValue); /// Internal helper to get alignment for integer of given bitwidth. LLVM_ABI Align getIntegerAlignment(uint32_t BitWidth, bool abi_or_pref) const; @@ -442,6 +444,11 @@ public: return PTy && hasExternalState(PTy->getPointerAddressSpace()); } + /// Returns the null pointer bit pattern for the given address space. + APInt getNullPtrValue(unsigned AS) const { + return getPointerSpec(AS).NullPtrValue; + } + /// Returns whether passes must avoid introducing `inttoptr` instructions /// for this address space (unless they have target-specific knowledge). /// diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp index 758f49f60c3f..82b33887b81f 100644 --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -153,6 +153,7 @@ bool DataLayout::PointerSpec::operator==(const PointerSpec &Other) const { IndexBitWidth == Other.IndexBitWidth && HasUnstableRepresentation == Other.HasUnstableRepresentation && HasExternalState == Other.HasExternalState && + NullPtrValue == Other.NullPtrValue && AddrSpaceName == Other.AddrSpaceName; } @@ -194,7 +195,7 @@ DataLayout::DataLayout() FloatSpecs(ArrayRef(DefaultFloatSpecs)) { // Default pointer type specifications. setPointerSpec(0, 64, Align::Constant<8>(), Align::Constant<8>(), 64, false, - false, ""); + false, "", APInt::getZero(64)); } DataLayout::DataLayout(StringRef LayoutString) : DataLayout() { @@ -446,6 +447,9 @@ Error DataLayout::parsePointerSpec( unsigned AddrSpace = 0; bool ExternalState = false; bool UnstableRepr = false; + // Null pointer value flags: default, z = all-zeros, o = all-ones. + enum class NullPtrKind { Default, Zero, AllOnes }; + NullPtrKind NullPtrFlag = NullPtrKind::Default; StringRef AddrSpaceName; StringRef AddrSpaceStr = Components[0]; while (!AddrSpaceStr.empty()) { @@ -454,6 +458,14 @@ Error DataLayout::parsePointerSpec( ExternalState = true; } else if (C == 'u') { UnstableRepr = true; + } else if (C == 'z') { + if (NullPtrFlag != NullPtrKind::Default) + return createStringError("only one of 'z' or 'o' may be specified"); + NullPtrFlag = NullPtrKind::Zero; + } else if (C == 'o') { + if (NullPtrFlag != NullPtrKind::Default) + return createStringError("only one of 'z' or 'o' may be specified"); + NullPtrFlag = NullPtrKind::AllOnes; } else if (isAlpha(C)) { return createStringError("'%c' is not a valid pointer specification flag", C); @@ -506,8 +518,12 @@ Error DataLayout::parsePointerSpec( return createStringError( "index size cannot be larger than the pointer size"); + APInt NullPtrValue = NullPtrFlag == NullPtrKind::AllOnes + ? APInt::getAllOnes(BitWidth) + : APInt::getZero(BitWidth); + setPointerSpec(AddrSpace, BitWidth, ABIAlign, PrefAlign, IndexBitWidth, - UnstableRepr, ExternalState, AddrSpaceName); + UnstableRepr, ExternalState, AddrSpaceName, NullPtrValue); return Error::success(); } @@ -692,7 +708,7 @@ Error DataLayout::parseLayoutString(StringRef LayoutString) { const PointerSpec &PS = getPointerSpec(AS); setPointerSpec(AS, PS.BitWidth, PS.ABIAlign, PS.PrefAlign, PS.IndexBitWidth, /*HasUnstableRepr=*/true, /*HasExternalState=*/false, - getAddressSpaceName(AS)); + getAddressSpaceName(AS), PS.NullPtrValue); } return Error::success(); @@ -741,13 +757,14 @@ DataLayout::getPointerSpec(uint32_t AddrSpace) const { void DataLayout::setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth, Align ABIAlign, Align PrefAlign, uint32_t IndexBitWidth, bool HasUnstableRepr, - bool HasExternalState, - StringRef AddrSpaceName) { + bool HasExternalState, StringRef AddrSpaceName, + APInt NullPtrValue) { auto I = lower_bound(PointerSpecs, AddrSpace, LessPointerAddrSpace()); if (I == PointerSpecs.end() || I->AddrSpace != AddrSpace) { PointerSpecs.insert(I, PointerSpec{AddrSpace, BitWidth, ABIAlign, PrefAlign, IndexBitWidth, HasUnstableRepr, - HasExternalState, AddrSpaceName.str()}); + HasExternalState, AddrSpaceName.str(), + std::move(NullPtrValue)}); } else { I->BitWidth = BitWidth; I->ABIAlign = ABIAlign; @@ -756,6 +773,7 @@ void DataLayout::setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth, I->HasUnstableRepresentation = HasUnstableRepr; I->HasExternalState = HasExternalState; I->AddrSpaceName = AddrSpaceName.str(); + I->NullPtrValue = std::move(NullPtrValue); } } diff --git a/llvm/unittests/IR/DataLayoutTest.cpp b/llvm/unittests/IR/DataLayoutTest.cpp index 40da68c24923..1cbdc32a7723 100644 --- a/llvm/unittests/IR/DataLayoutTest.cpp +++ b/llvm/unittests/IR/DataLayoutTest.cpp @@ -404,7 +404,7 @@ TEST(DataLayout, ParsePointerSpec) { DataLayout::parse(Str), FailedWithMessage("index size cannot be larger than the pointer size")); - // Only 'e', 'u', and 'n' flags are valid. + // Only 'e', 'u', 'z', and 'o' flags are valid. EXPECT_THAT_EXPECTED( DataLayout::parse("pa:32:32"), FailedWithMessage("'a' is not a valid pointer specification flag")); @@ -880,4 +880,78 @@ TEST(DataLayoutTest, Equality) { EXPECT_EQ(DL0, DL1); } +TEST(DataLayoutTest, NullPointerValue) { + // Default: null pointer is all-zeros for every address space. + { + const DataLayout DL = cantFail(DataLayout::parse("")); + APInt Val = DL.getNullPtrValue(0); + EXPECT_TRUE(Val.isZero()); + EXPECT_EQ(Val.getBitWidth(), 64U); + APInt UnlistedVal = DL.getNullPtrValue(42); + EXPECT_TRUE(UnlistedVal.isZero()); + EXPECT_EQ(UnlistedVal.getBitWidth(), 64U); + } + + // Explicit 'z' flag. + { + const DataLayout DL = cantFail(DataLayout::parse("pz1:32:32")); + APInt Val = DL.getNullPtrValue(1); + EXPECT_TRUE(Val.isZero()); + EXPECT_EQ(Val.getBitWidth(), 32U); + } + + // 'o' flag: null pointer is all-ones. + { + const DataLayout DL = cantFail(DataLayout::parse("po1:32:32")); + APInt Val = DL.getNullPtrValue(1); + EXPECT_TRUE(Val.isAllOnes()); + EXPECT_EQ(Val.getBitWidth(), 32U); + } + + // 'o' flag with 64-bit pointer. + { + const DataLayout DL = cantFail(DataLayout::parse("po1:64:64")); + APInt Val = DL.getNullPtrValue(1); + EXPECT_TRUE(Val.isAllOnes()); + EXPECT_EQ(Val.getBitWidth(), 64U); + } + + // Combination with other flags: 'o' + 'u'. + { + const DataLayout DL = cantFail(DataLayout::parse("pou1:32:32")); + APInt Val = DL.getNullPtrValue(1); + EXPECT_TRUE(Val.isAllOnes()); + EXPECT_TRUE(DL.hasUnstableRepresentation(1)); + } + + // Multiple address spaces with different null values. + { + const DataLayout DL = cantFail(DataLayout::parse("po1:32:32-pz3:32:32")); + APInt Val1 = DL.getNullPtrValue(1); + EXPECT_TRUE(Val1.isAllOnes()); + APInt Val3 = DL.getNullPtrValue(3); + EXPECT_TRUE(Val3.isZero()); + } + + // Error: multiple null-ptr flags. + for (StringRef Str : {"pzo1:32:32", "poz1:32:32", "pzoz1:32:32"}) + EXPECT_THAT_EXPECTED( + DataLayout::parse(Str), + FailedWithMessage("only one of 'z' or 'o' may be specified")); + + // 'c' flag is no longer supported. + EXPECT_THAT_EXPECTED(DataLayout::parse("pc1:32:32"), + FailedWithMessage("'c' is not a valid pointer " + "specification flag")); + + // Equality: different null pointer values make layouts not equal. + { + DataLayout DL1 = cantFail(DataLayout::parse("po1:32:32")); + DataLayout DL2 = cantFail(DataLayout::parse("pz1:32:32")); + DataLayout DL3 = cantFail(DataLayout::parse("po1:32:32")); + EXPECT_NE(DL1, DL2); + EXPECT_EQ(DL1, DL3); + } +} + } // anonymous namespace