[llvm-c] Add LLVMParseIRInContext2 (#174085)

This new function is the same as LLVMParseIRInContext except it doesn't
take ownership of the memory buffer. This fixes a wart that has been in
place since 5ebb7b3112 changed the
underlying internal API to avoid taking ownership.

Reduce nesting in the implementation of LLVMParseIRInContext (now
LLVMParseIRInContext2) as well.

Update examples, OCaml bindings, and tests including plugging some
pre-existing memory leaks. OCaml bindings have renamed `parse_ir` to
`parse_ir_bitcode_or_assembly` to provoke compilation failures in
downstream code; this is intentional as this function now requires the
memory buffer to be disposed by the caller.
This commit is contained in:
Tamir Duberstein
2026-01-08 11:50:05 -05:00
committed by GitHub
parent 3ae71d30be
commit e39ddab2bc
15 changed files with 140 additions and 62 deletions

View File

@@ -16,11 +16,22 @@ exception Error of string
(** [get_module context mb] reads the bitcode for a new module [m] from the
memory buffer [mb] in the context [context]. Returns [m] if successful, or
raises [Error msg] otherwise, where [msg] is a description of the error
encountered. See the function [llvm::getBitcodeModule]. *)
encountered.
If parsing succeeds, ownership of [mb] is transferred to the returned
module (for lazy deserialization); the caller must not dispose [mb]. If
parsing fails, ownership is retained by the caller, which must dispose it.
See the function [llvm::getBitcodeModule]. *)
val get_module : Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule
(** [parse_bitcode context mb] parses the bitcode for a new module [m] from the
memory buffer [mb] in the context [context]. Returns [m] if successful, or
raises [Error msg] otherwise, where [msg] is a description of the error
encountered. See the function [llvm::ParseBitcodeFile]. *)
encountered.
This function does not take ownership of [mb]; the caller should dispose it
(see {!Llvm.MemoryBuffer.dispose}) when it is no longer needed.
See the function [llvm::ParseBitcodeFile]. *)
val parse_bitcode : Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule

View File

@@ -24,12 +24,11 @@ void llvm_raise(value Prototype, char *Message);
/* Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule */
value llvm_parse_ir(value C, value MemBuf) {
CAMLparam0();
CAMLlocal2(Variant, MessageVal);
LLVMModuleRef M;
char *Message;
if (LLVMParseIRInContext(Context_val(C), MemoryBuffer_val(MemBuf), &M,
&Message))
if (LLVMParseIRInContext2(Context_val(C), MemoryBuffer_val(MemBuf), &M,
&Message))
llvm_raise(*caml_named_value("Llvm_irreader.Error"), Message);
CAMLreturn(to_val(M));

View File

@@ -11,5 +11,6 @@ exception Error of string
let _ = Callback.register_exception "Llvm_irreader.Error" (Error "")
external parse_ir : Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule
= "llvm_parse_ir"
external parse_ir_bitcode_or_assembly
: Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule
= "llvm_parse_ir"

View File

@@ -13,8 +13,14 @@
exception Error of string
(** [parse_ir context mb] parses the IR for a new module [m] from the
memory buffer [mb] in the context [context]. Returns [m] if successful, or
raises [Error msg] otherwise, where [msg] is a description of the error
encountered. See the function [llvm::ParseIR]. *)
val parse_ir : Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule
(** [parse_ir_bitcode_or_assembly context mb] parses the IR for a new module [m]
from the memory buffer [mb] in the context [context]. Returns [m] if
successful, or raises [Error msg] otherwise, where [msg] is a description
of the error encountered.
This function does not take ownership of [mb]; the caller should dispose it
(see {!Llvm.MemoryBuffer.dispose}) when it is no longer needed.
See the function [llvm::ParseIR]. *)
val parse_ir_bitcode_or_assembly
: Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule

View File

@@ -182,6 +182,12 @@ Changes to the X86 Backend
Changes to the OCaml bindings
-----------------------------
* The IR reader bindings renamed `parse_ir` to
`parse_ir_bitcode_or_assembly` to clarify that the parser accepts both
textual IR and bitcode. This rename is intentional to force existing code to
update because the ownership semantics changed: the function no longer takes
ownership of the input memory buffer.
Changes to the Python bindings
------------------------------
@@ -191,6 +197,10 @@ Changes to the C API
* Add `LLVMGetOrInsertFunction` to get or insert a function, replacing the combination of `LLVMGetNamedFunction` and `LLVMAddFunction`.
* Allow `LLVMGetVolatile` to work with any kind of Instruction.
* Add `LLVMConstFPFromBits` to get a constant floating-point value from an array of 64 bit values.
* Add `LLVMParseIRInContext2`, which is equivalent to `LLVMParseIRInContext`
but does not take ownership of the input `LLVMMemoryBufferRef`. This matches
the underlying C++ API and avoids ownership surprises in language bindings
and examples.
* Functions working on the global context have been deprecated. Use the
functions that work on a specific context instead.

View File

@@ -71,16 +71,19 @@ LLVMErrorRef parseExampleModule(const char *Source, size_t Len,
// Create an LLVMContext for the Module.
LLVMContextRef Ctx = LLVMContextCreate();
// Wrap Source in a MemoryBuffer
LLVMMemoryBufferRef MB =
LLVMCreateMemoryBufferWithMemoryRange(Source, Len, Name, 0);
// Parse the LLVM module.
LLVMModuleRef M;
char *ErrMsg;
if (LLVMParseIRInContext(Ctx, MB, &M, &ErrMsg)) {
return LLVMCreateStringError(ErrMsg);
// TODO: LLVMDisposeMessage(ErrMsg);
// Wrap Source in a MemoryBuffer.
LLVMMemoryBufferRef MB =
LLVMCreateMemoryBufferWithMemoryRange(Source, Len, Name, 0);
LLVMBool Ret = LLVMParseIRInContext2(Ctx, MB, &M, &ErrMsg);
LLVMDisposeMemoryBuffer(MB);
if (Ret) {
LLVMErrorRef Err = LLVMCreateStringError(ErrMsg);
LLVMDisposeMessage(ErrMsg);
return Err;
}
// Create a new ThreadSafeContext to hold the context.

View File

@@ -77,14 +77,16 @@ LLVMErrorRef parseExampleModule(const char *Source, size_t Len,
// Create an LLVMContext.
LLVMContextRef Ctx = LLVMContextCreate();
// Wrap Source in a MemoryBuffer
LLVMMemoryBufferRef MB =
LLVMCreateMemoryBufferWithMemoryRange(Source, Len, Name, 1);
// Parse the LLVM module.
LLVMModuleRef M;
char *ErrMsg;
if (LLVMParseIRInContext(Ctx, MB, &M, &ErrMsg)) {
// Wrap Source in a MemoryBuffer.
LLVMMemoryBufferRef MB =
LLVMCreateMemoryBufferWithMemoryRange(Source, Len, Name, 0);
LLVMBool Ret = LLVMParseIRInContext2(Ctx, MB, &M, &ErrMsg);
LLVMDisposeMemoryBuffer(MB);
if (Ret) {
LLVMErrorRef Err = LLVMCreateStringError(ErrMsg);
LLVMDisposeMessage(ErrMsg);
return Err;

View File

@@ -33,6 +33,8 @@ LLVM_C_EXTERN_C_BEGIN
* Optionally returns a human-readable description of any errors that
* occurred during parsing IR. OutMessage must be disposed with
* LLVMDisposeMessage.
* The memory buffer is consumed by this function.
* This is deprecated. Use LLVMParseIRInContext2 instead.
*
* @see llvm::ParseIR()
*/
@@ -40,6 +42,21 @@ LLVM_C_ABI LLVMBool LLVMParseIRInContext(LLVMContextRef ContextRef,
LLVMMemoryBufferRef MemBuf,
LLVMModuleRef *OutM,
char **OutMessage);
/**
* Read LLVM IR from a memory buffer and convert it into an in-memory Module
* object. Returns 0 on success.
* Optionally returns a human-readable description of any errors that
* occurred during parsing IR. OutMessage must be disposed with
* LLVMDisposeMessage.
* The memory buffer is not consumed by this function. It is the responsibility
* of the caller to free it with \c LLVMDisposeMemoryBuffer.
*
* @see llvm::ParseIR()
*/
LLVM_C_ABI LLVMBool LLVMParseIRInContext2(LLVMContextRef ContextRef,
LLVMMemoryBufferRef MemBuf,
LLVMModuleRef *OutM,
char **OutMessage);
/**
* @}

View File

@@ -66,7 +66,7 @@ typedef enum {
* appropriate implementation selected. The context may be NULL except if
* the resulting file is an LLVM IR file.
*
* The memory buffer is not consumed by this function. It is the responsibilty
* The memory buffer is not consumed by this function. It is the responsibility
* of the caller to free it with \c LLVMDisposeMemoryBuffer.
*
* If NULL is returned, the \p ErrorMessage parameter is populated with the
@@ -82,7 +82,7 @@ LLVM_C_ABI LLVMBinaryRef LLVMCreateBinary(LLVMMemoryBufferRef MemBuf,
/**
* Dispose of a binary file.
*
* The binary file does not own its backing buffer. It is the responsibilty
* The binary file does not own its backing buffer. It is the responsibility
* of the caller to free it with \c LLVMDisposeMemoryBuffer.
*/
LLVM_C_ABI void LLVMDisposeBinary(LLVMBinaryRef BR);

View File

@@ -90,7 +90,7 @@ public:
Result(Result &&Arg)
: InnerAM(std::move(Arg.InnerAM)), LI(Arg.LI), MSSAUsed(Arg.MSSAUsed) {
// We have to null out the analysis manager in the moved-from state
// because we are taking ownership of the responsibilty to clear the
// because we are taking ownership of the responsibility to clear the
// analysis state.
Arg.InnerAM = nullptr;
}
@@ -99,7 +99,7 @@ public:
LI = RHS.LI;
MSSAUsed = RHS.MSSAUsed;
// We have to null out the analysis manager in the moved-from state
// because we are taking ownership of the responsibilty to clear the
// because we are taking ownership of the responsibility to clear the
// analysis state.
RHS.InnerAM = nullptr;
return *this;

View File

@@ -135,7 +135,7 @@ public:
Result(Result &&Arg) : FAM(std::move(Arg.FAM)) {
// We have to null out the analysis manager in the moved-from state
// because we are taking ownership of the responsibilty to clear the
// because we are taking ownership of the responsibility to clear the
// analysis state.
Arg.FAM = nullptr;
}
@@ -143,7 +143,7 @@ public:
Result &operator=(Result &&RHS) {
FAM = RHS.FAM;
// We have to null out the analysis manager in the moved-from state
// because we are taking ownership of the responsibilty to clear the
// because we are taking ownership of the responsibility to clear the
// analysis state.
RHS.FAM = nullptr;
return *this;

View File

@@ -589,7 +589,7 @@ public:
Result(Result &&Arg) : InnerAM(std::move(Arg.InnerAM)) {
// We have to null out the analysis manager in the moved-from state
// because we are taking ownership of the responsibilty to clear the
// because we are taking ownership of the responsibility to clear the
// analysis state.
Arg.InnerAM = nullptr;
}
@@ -607,7 +607,7 @@ public:
Result &operator=(Result &&RHS) {
InnerAM = RHS.InnerAM;
// We have to null out the analysis manager in the moved-from state
// because we are taking ownership of the responsibilty to clear the
// because we are taking ownership of the responsibility to clear the
// analysis state.
RHS.InnerAM = nullptr;
return *this;

View File

@@ -17,6 +17,7 @@
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include <cstring>
#include <optional>
#include <system_error>
@@ -117,23 +118,26 @@ std::unique_ptr<Module> llvm::parseIRFile(StringRef Filename, SMDiagnostic &Err,
LLVMBool LLVMParseIRInContext(LLVMContextRef ContextRef,
LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM,
char **OutMessage) {
std::unique_ptr<MemoryBuffer> MB(unwrap(MemBuf));
return LLVMParseIRInContext2(ContextRef, wrap(MB.get()), OutM, OutMessage);
}
LLVMBool LLVMParseIRInContext2(LLVMContextRef ContextRef,
LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM,
char **OutMessage) {
SMDiagnostic Diag;
std::unique_ptr<MemoryBuffer> MB(unwrap(MemBuf));
*OutM =
wrap(parseIR(MB->getMemBufferRef(), Diag, *unwrap(ContextRef)).release());
*OutM = wrap(parseIR(*unwrap(MemBuf), Diag, *unwrap(ContextRef)).release());
if(!*OutM) {
if (OutMessage) {
std::string buf;
raw_string_ostream os(buf);
if (*OutM)
return 0;
Diag.print(nullptr, os, false);
*OutMessage = strdup(buf.c_str());
}
return 1;
if (OutMessage) {
std::string Buf;
raw_string_ostream OS(Buf);
Diag.print(nullptr, OS, /*ShowColors=*/false);
*OutMessage = strdup(Buf.c_str());
}
return 0;
return 1;
}

View File

@@ -16,6 +16,16 @@ let diagnostic_handler _ = ()
let test x = if not x then exit 1 else ()
(* TODO: Replace with Fun.protect when the minimum OCaml version supports it. *)
let protect ~finally f =
try
let r = f () in
finally ();
r
with x ->
finally ();
raise x
let _ =
Llvm.set_diagnostic_handler context (Some diagnostic_handler);
@@ -29,13 +39,9 @@ let _ =
(* parse_bitcode *)
begin
let mb = Llvm.MemoryBuffer.of_file fn in
begin try
let m = Llvm_bitreader.parse_bitcode context mb in
Llvm.dispose_module m
with x ->
Llvm.MemoryBuffer.dispose mb;
raise x
end
let m = protect ~finally:(fun () -> Llvm.MemoryBuffer.dispose mb)
(fun () -> Llvm_bitreader.parse_bitcode context mb) in
Llvm.dispose_module m
end;
(* MemoryBuffer.of_file *)

View File

@@ -29,27 +29,46 @@ let _ =
let insist cond =
if not cond then failwith "insist"
(* TODO: Replace with Fun.protect when the minimum OCaml version supports it. *)
let protect ~finally f =
try
let r = f () in
finally ();
r
with x ->
finally ();
raise x
(*===-- IR Reader ---------------------------------------------------------===*)
let test_irreader () =
begin
let buf = MemoryBuffer.of_string "@foo = global i32 42" in
let m = parse_ir context buf in
match lookup_global "foo" m with
| Some foo ->
insist ((global_initializer foo) = (Some (const_int (i32_type context) 42)))
| None ->
failwith "global"
let m = protect ~finally:(fun () -> MemoryBuffer.dispose buf)
(fun () -> parse_ir_bitcode_or_assembly context buf) in
protect ~finally:(fun () -> dispose_module m) (fun () ->
match lookup_global "foo" m with
| Some foo ->
insist (global_initializer foo =
Some (const_int (i32_type context) 42))
| None ->
failwith "global")
end;
begin
let buf = MemoryBuffer.of_string "@foo = global garble" in
try
ignore (parse_ir context buf);
let parsed = protect ~finally:(fun () -> MemoryBuffer.dispose buf)
(fun () ->
try
let m = parse_ir_bitcode_or_assembly context buf in
dispose_module m;
true
with Llvm_irreader.Error _ ->
false)
in
if parsed then
failwith "parsed"
with Llvm_irreader.Error _ ->
()
end