# RUN: %PYTHON %s | FileCheck %s from mlir.ir import * import mlir.ir as ir from mlir.dialects import gpu, func, arith, math from mlir.extras import types as T import mlir.dialects.gpu.passes from mlir.passmanager import * def run(f): print("\nTEST:", f.__name__) with Context(), Location.unknown(): f() return f # CHECK-LABEL: testGPUPass # CHECK: SUCCESS @run def testGPUPass(): PassManager.parse("any(gpu-kernel-outlining)") print("SUCCESS") # CHECK-LABEL: testMMAElementWiseAttr @run def testMMAElementWiseAttr(): module = Module.create() with InsertionPoint(module.body): gpu.BlockDimOp(gpu.Dimension.y) # CHECK: %block_dim_y = gpu.block_dim y print(module) pass # CHECK-LABEL: testObjectAttr @run def testObjectAttr(): target = Attribute.parse("#nvvm.target") format = gpu.CompilationTarget.Fatbin object = b"BC\xc0\xde5\x14\x00\x00\x05\x00\x00\x00b\x0c0$MY\xbef" properties = DictAttr.get({"O": IntegerAttr.get(IntegerType.get_signless(32), 2)}) o = gpu.ObjectAttr.get(target, format, object, properties) # CHECK: #gpu.object<#nvvm.target, properties = {O = 2 : i32}, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf"> print(o) assert o.object == object o = gpu.ObjectAttr.get(target, format, object) # CHECK: #gpu.object<#nvvm.target, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf"> print(o) object = ( b"//\n// Generated by LLVM NVPTX Back-End\n//\n\n.version 6.0\n.target sm_50" ) o = gpu.ObjectAttr.get(target, format, object) # CHECK: #gpu.object<#nvvm.target, "//\0A// Generated by LLVM NVPTX Back-End\0A//\0A\0A.version 6.0\0A.target sm_50"> print(o) assert o.object == object object = b"BC\xc0\xde5\x14\x00\x00\x05\x00\x00\x00b\x0c0$MY\xbef" kernelTable = Attribute.parse( '#gpu.kernel_table<[#gpu.kernel_metadata<"kernel", () -> ()>]>' ) o = gpu.ObjectAttr.get(target, format, object, kernels=kernelTable) # CHECK: #gpu.object<#nvvm.target, kernels = <[#gpu.kernel_metadata<"kernel", () -> ()>]>, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf"> print(o) assert o.kernels == kernelTable # CHECK-LABEL: testGPUFuncOp @run def testGPUFuncOp(): assert gpu.GPUFuncOp.__doc__ is not None module = Module.create() with InsertionPoint(module.body): gpu_module_name = StringAttr.get("gpu_module") gpumodule = gpu.GPUModuleOp(gpu_module_name) block = gpumodule.bodyRegion.blocks.append() def builder(func: gpu.GPUFuncOp) -> None: gpu.GlobalIdOp(gpu.Dimension.x) gpu.ReturnOp([]) with InsertionPoint(block): name = StringAttr.get("kernel0") func_type = ir.FunctionType.get(inputs=[], results=[]) type_attr = TypeAttr.get(func_type) func = gpu.GPUFuncOp(type_attr, name) func.attributes["sym_name"] = name func.attributes["gpu.kernel"] = UnitAttr.get() try: func.entry_block assert False, "Expected RuntimeError" except RuntimeError as e: assert ( str(e) == "Entry block does not exist for kernel0. Do you need to call the add_entry_block() method on this GPUFuncOp?" ) block = func.add_entry_block() with InsertionPoint(block): builder(func) try: func.add_entry_block() assert False, "Expected RuntimeError" except RuntimeError as e: assert str(e) == "Entry block already exists for kernel0" func = gpu.GPUFuncOp( func_type, sym_name="kernel1", kernel=True, body_builder=builder, known_block_size=[1, 2, 3], known_grid_size=DenseI32ArrayAttr.get([4, 5, 6]), ) assert func.name.value == "kernel1" assert func.function_type.value == func_type assert func.arg_attrs == None assert func.res_attrs == None assert func.arguments == [] assert func.entry_block == func.body.blocks[0] assert func.is_kernel assert func.known_block_size == DenseI32ArrayAttr.get( [1, 2, 3] ), func.known_block_size assert func.known_grid_size == DenseI32ArrayAttr.get( [4, 5, 6] ), func.known_grid_size func = gpu.GPUFuncOp( ir.FunctionType.get(inputs=[T.index()], results=[]), sym_name="non_kernel_func", body_builder=builder, arg_attrs=[{"gpu.some_attribute": ir.StringAttr.get("foo")}], ) assert not func.is_kernel assert func.known_block_size is None assert func.known_grid_size is None print(module) # CHECK: gpu.module @gpu_module # CHECK: gpu.func @kernel0() kernel { # CHECK: %[[VAL_0:.*]] = gpu.global_id x # CHECK: gpu.return # CHECK: } # CHECK: gpu.func @kernel1() kernel attributes # CHECK-SAME: known_block_size = array # CHECK-SAME: known_grid_size = array # CHECK: %[[VAL_0:.*]] = gpu.global_id x # CHECK: gpu.return # CHECK: } # CHECK: gpu.func @non_kernel_func( # CHECK-SAME: %[[ARG0:.*]]: index {gpu.some_attribute = "foo"}) { # CHECK: %[[GLOBAL_ID_0:.*]] = gpu.global_id x # CHECK: gpu.return # CHECK: } # CHECK-LABEL: testGPULaunchFuncOp @run def testGPULaunchFuncOp(): module = Module.create() module.operation.attributes["gpu.container_module"] = UnitAttr.get() with InsertionPoint(module.body): gpu_module = gpu.GPUModuleOp("gpu_module") block = gpu_module.bodyRegion.blocks.append() with InsertionPoint(block): gpu_func = gpu.GPUFuncOp( FunctionType.get([], []), "kernel", body_builder=lambda func: gpu.return_([]), kernel=True, ) with InsertionPoint(module.body): host = func.FuncOp(type=FunctionType.get([], []), name="host") with InsertionPoint(host.add_entry_block()): c1 = arith.constant(T.index(), 1) grid_sizes = (1, 1, 1) block_sizes = (1, 1, 1) cluster_sizes = (1, 1, 1) token = gpu.wait() token = gpu.launch_func( async_dependencies=[token], kernel=[gpu_module.sym_name.value, gpu_func.name.value], grid_size=grid_sizes, block_size=block_sizes, kernel_operands=[], cluster_size=cluster_sizes, ) gpu.wait(async_dependencies=[token]) func.ReturnOp([]) print(module) # CHECK-LABEL: gpu.module @gpu_module { # CHECK: gpu.func @kernel() kernel { # CHECK: gpu.return # CHECK: } # CHECK: } # CHECK-LABEL: func.func @host() { # CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : index # CHECK: %[[WAIT_0:.*]] = gpu.wait async # CHECK: %[[CONSTANT_1:.*]] = arith.constant 1 : index # CHECK: %[[CONSTANT_2:.*]] = arith.constant 1 : index # CHECK: %[[CONSTANT_3:.*]] = arith.constant 1 : index # CHECK: %[[CONSTANT_4:.*]] = arith.constant 1 : index # CHECK: %[[CONSTANT_5:.*]] = arith.constant 1 : index # CHECK: %[[CONSTANT_6:.*]] = arith.constant 1 : index # CHECK: %[[CONSTANT_7:.*]] = arith.constant 1 : index # CHECK: %[[CONSTANT_8:.*]] = arith.constant 1 : index # CHECK: %[[CONSTANT_9:.*]] = arith.constant 1 : index # CHECK: %[[LAUNCH_FUNC_0:.*]] = gpu.launch_func async {{\[}}%[[WAIT_0]]] @gpu_module::@kernel clusters in (%[[CONSTANT_7]], %[[CONSTANT_8]], %[[CONSTANT_9]]) blocks in (%[[CONSTANT_1]], %[[CONSTANT_2]], %[[CONSTANT_3]]) threads in (%[[CONSTANT_4]], %[[CONSTANT_5]], %[[CONSTANT_6]]) # CHECK: %[[WAIT_1:.*]] = gpu.wait async {{\[}}%[[LAUNCH_FUNC_0]]] # CHECK: return # CHECK: } # CHECK-LABEL: testGPULaunchOp @run def testGPULaunchOp(): module = Module.create() with InsertionPoint(module.body): host = func.FuncOp(type=FunctionType.get([T.f32()], []), name="gpu_printf") entry_block = host.add_entry_block() with InsertionPoint(entry_block): c1 = arith.constant(T.index(), 1) grid_sizes = (c1, c1, c1) block_sizes = (c1, c1, c1) launch = gpu.launch(grid_sizes, block_sizes) op = launch(lambda *args: gpu.printf("%f", args[0])) with InsertionPoint(entry_block): func.ReturnOp([]) print(module) # CHECK-LABEL: func.func @gpu_printf( # CHECK-SAME: %[[ARG0:.*]]: f32) { # CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : index # CHECK: gpu.launch blocks(%[[VAL_0:.*]], %[[VAL_1:.*]], %[[VAL_2:.*]]) in (%[[VAL_3:.*]] = %[[CONSTANT_0]], %[[VAL_4:.*]] = %[[CONSTANT_0]], %[[VAL_5:.*]] = %[[CONSTANT_0]]) threads(%[[VAL_6:.*]], %[[VAL_7:.*]], %[[VAL_8:.*]]) in (%[[VAL_9:.*]] = %[[CONSTANT_0]], %[[VAL_10:.*]] = %[[CONSTANT_0]], %[[VAL_11:.*]] = %[[CONSTANT_0]]) { # CHECK: gpu.printf "%[[VAL_12:.*]]", %[[VAL_0]] : index # CHECK: gpu.terminator # CHECK: } # CHECK: return # CHECK: }