From 2bddab25dba8d4b0932dc2b6cacef13fcf8a0694 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sat, 25 Dec 2021 15:59:29 -0800 Subject: [PATCH] DebugInfo: Don't hash DIE offsets before they're computed Instead of hashing DIE offsets, hash DIE references the same as they would be when used outside of a loclist - that is, deep hash the type on first use, and hash the numbering on subsequent uses. This does produce different hashes for different type references, where it did not before (because we were hashing zero all the time - so it didn't matter what type was referenced, the hash would be identical). This also allows us to enforce that the DIE offset (& size) is not queried before it is used (which came up while investigating another bug recently). --- llvm/include/llvm/CodeGen/DIE.h | 12 ++++++++++-- llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h | 21 +++++++++++++++++---- llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp | 12 ++++++++++++ llvm/lib/CodeGen/AsmPrinter/DIEHash.h | 2 ++ llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 9 +-------- llvm/test/DebugInfo/X86/convert-loclist.ll | 2 +- 6 files changed, 43 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/CodeGen/DIE.h b/llvm/include/llvm/CodeGen/DIE.h index 9e94c401bfae..51320aea0327 100644 --- a/llvm/include/llvm/CodeGen/DIE.h +++ b/llvm/include/llvm/CodeGen/DIE.h @@ -774,8 +774,16 @@ public: unsigned getAbbrevNumber() const { return AbbrevNumber; } dwarf::Tag getTag() const { return Tag; } /// Get the compile/type unit relative offset of this DIE. - unsigned getOffset() const { return Offset; } - unsigned getSize() const { return Size; } + unsigned getOffset() const { + // A real Offset can't be zero because the unit headers are at offset zero. + assert(Offset && "Offset being queried before it's been computed."); + return Offset; + } + unsigned getSize() const { + // A real Size can't be zero because it includes the non-empty abbrev code. + assert(Size && "Size being queried before it's been ocmputed."); + return Size; + } bool hasChildren() const { return ForceChildren || !Children.empty(); } void setForceChildren(bool B) { ForceChildren = B; } diff --git a/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h index 5e7db1f2f76c..7525e5865282 100644 --- a/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h +++ b/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -33,6 +33,7 @@ class ByteStreamer { virtual void emitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0; virtual void emitULEB128(uint64_t DWord, const Twine &Comment = "", unsigned PadTo = 0) = 0; + virtual void emitDIERef(const DIE &D) = 0; }; class APByteStreamer final : public ByteStreamer { @@ -54,15 +55,21 @@ public: AP.OutStreamer->AddComment(Comment); AP.emitULEB128(DWord, nullptr, PadTo); } + void emitDIERef(const DIE &D) override { + uint64_t Offset = D.getOffset(); + static constexpr unsigned ULEB128PadSize = 4; + assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit"); + emitULEB128(Offset, "", ULEB128PadSize); + } }; class HashingByteStreamer final : public ByteStreamer { private: DIEHash &Hash; public: - HashingByteStreamer(DIEHash &H) : Hash(H) {} - void emitInt8(uint8_t Byte, const Twine &Comment) override { - Hash.update(Byte); + HashingByteStreamer(DIEHash &H) : Hash(H) {} + void emitInt8(uint8_t Byte, const Twine &Comment) override { + Hash.update(Byte); } void emitSLEB128(uint64_t DWord, const Twine &Comment) override { Hash.addSLEB128(DWord); @@ -71,6 +78,7 @@ class HashingByteStreamer final : public ByteStreamer { unsigned PadTo) override { Hash.addULEB128(DWord); } + void emitDIERef(const DIE &D) override { Hash.hashRawTypeReference(D); } }; class BufferByteStreamer final : public ByteStreamer { @@ -115,9 +123,14 @@ public: // with each other. for (size_t i = 1; i < Length; ++i) Comments.push_back(""); - } } + void emitDIERef(const DIE &D) override { + uint64_t Offset = D.getOffset(); + static constexpr unsigned ULEB128PadSize = 4; + assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit"); + emitULEB128(Offset, "", ULEB128PadSize); + } }; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp index 5f4ee747fcca..b7b26199956a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -207,6 +207,18 @@ void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, computeHash(Entry); } +void DIEHash::hashRawTypeReference(const DIE &Entry) { + unsigned &DieNumber = Numbering[&Entry]; + if (DieNumber) { + addULEB128('R'); + addULEB128(DieNumber); + return; + } + DieNumber = Numbering.size(); + addULEB128('T'); + computeHash(Entry); +} + // Hash all of the values in a block like set of values. This assumes that // all of the data is going to be added as integers. void DIEHash::hashBlockData(const DIE::const_value_range &Values) { diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/llvm/lib/CodeGen/AsmPrinter/DIEHash.h index 29e1da4c5d60..24a973b39271 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIEHash.h +++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.h @@ -62,6 +62,8 @@ public: /// Encodes and adds \param Value to the hash as a SLEB128. void addSLEB128(int64_t Value); + void hashRawTypeReference(const DIE &Entry); + private: /// Adds \param Str to the hash and includes a NULL byte. void addString(StringRef Str); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 48134f1fd774..b129aa171669 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -2539,14 +2539,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, if (Op.getDescription().Op[I] == Encoding::SizeNA) continue; if (Op.getDescription().Op[I] == Encoding::BaseTypeRef) { - uint64_t Offset = - CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die->getOffset(); - assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit"); - Streamer.emitULEB128(Offset, "", ULEB128PadSize); - // Make sure comments stay aligned. - for (unsigned J = 0; J < ULEB128PadSize; ++J) - if (Comment != End) - Comment++; + Streamer.emitDIERef(*CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die); } else { for (uint64_t J = Offset; J < Op.getOperandEndOffset(I); ++J) Streamer.emitInt8(Data.getData()[J], Comment != End ? *(Comment++) : ""); diff --git a/llvm/test/DebugInfo/X86/convert-loclist.ll b/llvm/test/DebugInfo/X86/convert-loclist.ll index d732840d4924..56dede02c51d 100644 --- a/llvm/test/DebugInfo/X86/convert-loclist.ll +++ b/llvm/test/DebugInfo/X86/convert-loclist.ll @@ -13,7 +13,7 @@ ; often - add another IR file with a different DW_OP_convert that's otherwise ; identical and demonstrate that they have different DWO IDs. -; SPLIT: 0x00000000: Compile Unit: {{.*}} DWO_id = 0xecf2563326b0bdd3 +; SPLIT: 0x00000000: Compile Unit: {{.*}} DWO_id = 0xa6edbf487b0a7acf ; Regression testing a fairly quirky bug where instead of hashing (see above), ; extra bytes would be emitted into the output assembly in no