Follow-up to #184253. The ODS attr/type printer fix removed the leading space from generated print() methods. Update tests that checked for the old double-space output of GPU ops using GPU_DimensionAttr and GPU_MmaElementwiseOpAttr. Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
261 lines
9.3 KiB
Python
261 lines
9.3 KiB
Python
# RUN: %PYTHON %s | FileCheck %s
|
|
|
|
from mlir.ir import *
|
|
import mlir.ir as ir
|
|
from mlir.dialects import gpu, func, arith, math
|
|
from mlir.extras import types as T
|
|
import mlir.dialects.gpu.passes
|
|
from mlir.passmanager import *
|
|
|
|
|
|
def run(f):
|
|
print("\nTEST:", f.__name__)
|
|
with Context(), Location.unknown():
|
|
f()
|
|
return f
|
|
|
|
|
|
# CHECK-LABEL: testGPUPass
|
|
# CHECK: SUCCESS
|
|
@run
|
|
def testGPUPass():
|
|
PassManager.parse("any(gpu-kernel-outlining)")
|
|
print("SUCCESS")
|
|
|
|
|
|
# CHECK-LABEL: testMMAElementWiseAttr
|
|
@run
|
|
def testMMAElementWiseAttr():
|
|
module = Module.create()
|
|
with InsertionPoint(module.body):
|
|
gpu.BlockDimOp(gpu.Dimension.y)
|
|
# CHECK: %block_dim_y = gpu.block_dim y
|
|
print(module)
|
|
pass
|
|
|
|
|
|
# CHECK-LABEL: testObjectAttr
|
|
@run
|
|
def testObjectAttr():
|
|
target = Attribute.parse("#nvvm.target")
|
|
format = gpu.CompilationTarget.Fatbin
|
|
object = b"BC\xc0\xde5\x14\x00\x00\x05\x00\x00\x00b\x0c0$MY\xbef"
|
|
properties = DictAttr.get({"O": IntegerAttr.get(IntegerType.get_signless(32), 2)})
|
|
o = gpu.ObjectAttr.get(target, format, object, properties)
|
|
# CHECK: #gpu.object<#nvvm.target, properties = {O = 2 : i32}, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf">
|
|
print(o)
|
|
assert o.object == object
|
|
|
|
o = gpu.ObjectAttr.get(target, format, object)
|
|
# CHECK: #gpu.object<#nvvm.target, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf">
|
|
print(o)
|
|
|
|
object = (
|
|
b"//\n// Generated by LLVM NVPTX Back-End\n//\n\n.version 6.0\n.target sm_50"
|
|
)
|
|
o = gpu.ObjectAttr.get(target, format, object)
|
|
# CHECK: #gpu.object<#nvvm.target, "//\0A// Generated by LLVM NVPTX Back-End\0A//\0A\0A.version 6.0\0A.target sm_50">
|
|
print(o)
|
|
assert o.object == object
|
|
|
|
object = b"BC\xc0\xde5\x14\x00\x00\x05\x00\x00\x00b\x0c0$MY\xbef"
|
|
kernelTable = Attribute.parse(
|
|
'#gpu.kernel_table<[#gpu.kernel_metadata<"kernel", () -> ()>]>'
|
|
)
|
|
o = gpu.ObjectAttr.get(target, format, object, kernels=kernelTable)
|
|
# CHECK: #gpu.object<#nvvm.target, kernels = <[#gpu.kernel_metadata<"kernel", () -> ()>]>, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf">
|
|
print(o)
|
|
assert o.kernels == kernelTable
|
|
|
|
|
|
# CHECK-LABEL: testGPUFuncOp
|
|
@run
|
|
def testGPUFuncOp():
|
|
assert gpu.GPUFuncOp.__doc__ is not None
|
|
module = Module.create()
|
|
with InsertionPoint(module.body):
|
|
gpu_module_name = StringAttr.get("gpu_module")
|
|
gpumodule = gpu.GPUModuleOp(gpu_module_name)
|
|
block = gpumodule.bodyRegion.blocks.append()
|
|
|
|
def builder(func: gpu.GPUFuncOp) -> None:
|
|
gpu.GlobalIdOp(gpu.Dimension.x)
|
|
gpu.ReturnOp([])
|
|
|
|
with InsertionPoint(block):
|
|
name = StringAttr.get("kernel0")
|
|
func_type = ir.FunctionType.get(inputs=[], results=[])
|
|
type_attr = TypeAttr.get(func_type)
|
|
func = gpu.GPUFuncOp(type_attr, name)
|
|
func.attributes["sym_name"] = name
|
|
func.attributes["gpu.kernel"] = UnitAttr.get()
|
|
|
|
try:
|
|
func.entry_block
|
|
assert False, "Expected RuntimeError"
|
|
except RuntimeError as e:
|
|
assert (
|
|
str(e)
|
|
== "Entry block does not exist for kernel0. Do you need to call the add_entry_block() method on this GPUFuncOp?"
|
|
)
|
|
|
|
block = func.add_entry_block()
|
|
with InsertionPoint(block):
|
|
builder(func)
|
|
|
|
try:
|
|
func.add_entry_block()
|
|
assert False, "Expected RuntimeError"
|
|
except RuntimeError as e:
|
|
assert str(e) == "Entry block already exists for kernel0"
|
|
|
|
func = gpu.GPUFuncOp(
|
|
func_type,
|
|
sym_name="kernel1",
|
|
kernel=True,
|
|
body_builder=builder,
|
|
known_block_size=[1, 2, 3],
|
|
known_grid_size=DenseI32ArrayAttr.get([4, 5, 6]),
|
|
)
|
|
|
|
assert func.name.value == "kernel1"
|
|
assert func.function_type.value == func_type
|
|
assert func.arg_attrs == None
|
|
assert func.res_attrs == None
|
|
assert func.arguments == []
|
|
assert func.entry_block == func.body.blocks[0]
|
|
assert func.is_kernel
|
|
assert func.known_block_size == DenseI32ArrayAttr.get(
|
|
[1, 2, 3]
|
|
), func.known_block_size
|
|
assert func.known_grid_size == DenseI32ArrayAttr.get(
|
|
[4, 5, 6]
|
|
), func.known_grid_size
|
|
|
|
func = gpu.GPUFuncOp(
|
|
ir.FunctionType.get(inputs=[T.index()], results=[]),
|
|
sym_name="non_kernel_func",
|
|
body_builder=builder,
|
|
arg_attrs=[{"gpu.some_attribute": ir.StringAttr.get("foo")}],
|
|
)
|
|
assert not func.is_kernel
|
|
assert func.known_block_size is None
|
|
assert func.known_grid_size is None
|
|
|
|
print(module)
|
|
|
|
# CHECK: gpu.module @gpu_module
|
|
# CHECK: gpu.func @kernel0() kernel {
|
|
# CHECK: %[[VAL_0:.*]] = gpu.global_id x
|
|
# CHECK: gpu.return
|
|
# CHECK: }
|
|
# CHECK: gpu.func @kernel1() kernel attributes
|
|
# CHECK-SAME: known_block_size = array<i32: 1, 2, 3>
|
|
# CHECK-SAME: known_grid_size = array<i32: 4, 5, 6>
|
|
# CHECK: %[[VAL_0:.*]] = gpu.global_id x
|
|
# CHECK: gpu.return
|
|
# CHECK: }
|
|
# CHECK: gpu.func @non_kernel_func(
|
|
# CHECK-SAME: %[[ARG0:.*]]: index {gpu.some_attribute = "foo"}) {
|
|
# CHECK: %[[GLOBAL_ID_0:.*]] = gpu.global_id x
|
|
# CHECK: gpu.return
|
|
# CHECK: }
|
|
|
|
|
|
# CHECK-LABEL: testGPULaunchFuncOp
|
|
@run
|
|
def testGPULaunchFuncOp():
|
|
module = Module.create()
|
|
|
|
module.operation.attributes["gpu.container_module"] = UnitAttr.get()
|
|
with InsertionPoint(module.body):
|
|
gpu_module = gpu.GPUModuleOp("gpu_module")
|
|
block = gpu_module.bodyRegion.blocks.append()
|
|
|
|
with InsertionPoint(block):
|
|
gpu_func = gpu.GPUFuncOp(
|
|
FunctionType.get([], []),
|
|
"kernel",
|
|
body_builder=lambda func: gpu.return_([]),
|
|
kernel=True,
|
|
)
|
|
|
|
with InsertionPoint(module.body):
|
|
host = func.FuncOp(type=FunctionType.get([], []), name="host")
|
|
|
|
with InsertionPoint(host.add_entry_block()):
|
|
c1 = arith.constant(T.index(), 1)
|
|
grid_sizes = (1, 1, 1)
|
|
block_sizes = (1, 1, 1)
|
|
cluster_sizes = (1, 1, 1)
|
|
token = gpu.wait()
|
|
token = gpu.launch_func(
|
|
async_dependencies=[token],
|
|
kernel=[gpu_module.sym_name.value, gpu_func.name.value],
|
|
grid_size=grid_sizes,
|
|
block_size=block_sizes,
|
|
kernel_operands=[],
|
|
cluster_size=cluster_sizes,
|
|
)
|
|
gpu.wait(async_dependencies=[token])
|
|
func.ReturnOp([])
|
|
|
|
print(module)
|
|
|
|
# CHECK-LABEL: gpu.module @gpu_module {
|
|
# CHECK: gpu.func @kernel() kernel {
|
|
# CHECK: gpu.return
|
|
# CHECK: }
|
|
# CHECK: }
|
|
|
|
# CHECK-LABEL: func.func @host() {
|
|
# CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : index
|
|
# CHECK: %[[WAIT_0:.*]] = gpu.wait async
|
|
# CHECK: %[[CONSTANT_1:.*]] = arith.constant 1 : index
|
|
# CHECK: %[[CONSTANT_2:.*]] = arith.constant 1 : index
|
|
# CHECK: %[[CONSTANT_3:.*]] = arith.constant 1 : index
|
|
# CHECK: %[[CONSTANT_4:.*]] = arith.constant 1 : index
|
|
# CHECK: %[[CONSTANT_5:.*]] = arith.constant 1 : index
|
|
# CHECK: %[[CONSTANT_6:.*]] = arith.constant 1 : index
|
|
# CHECK: %[[CONSTANT_7:.*]] = arith.constant 1 : index
|
|
# CHECK: %[[CONSTANT_8:.*]] = arith.constant 1 : index
|
|
# CHECK: %[[CONSTANT_9:.*]] = arith.constant 1 : index
|
|
# CHECK: %[[LAUNCH_FUNC_0:.*]] = gpu.launch_func async {{\[}}%[[WAIT_0]]] @gpu_module::@kernel clusters in (%[[CONSTANT_7]], %[[CONSTANT_8]], %[[CONSTANT_9]]) blocks in (%[[CONSTANT_1]], %[[CONSTANT_2]], %[[CONSTANT_3]]) threads in (%[[CONSTANT_4]], %[[CONSTANT_5]], %[[CONSTANT_6]])
|
|
# CHECK: %[[WAIT_1:.*]] = gpu.wait async {{\[}}%[[LAUNCH_FUNC_0]]]
|
|
# CHECK: return
|
|
# CHECK: }
|
|
|
|
|
|
# CHECK-LABEL: testGPULaunchOp
|
|
@run
|
|
def testGPULaunchOp():
|
|
module = Module.create()
|
|
|
|
with InsertionPoint(module.body):
|
|
host = func.FuncOp(type=FunctionType.get([T.f32()], []), name="gpu_printf")
|
|
|
|
entry_block = host.add_entry_block()
|
|
with InsertionPoint(entry_block):
|
|
c1 = arith.constant(T.index(), 1)
|
|
grid_sizes = (c1, c1, c1)
|
|
block_sizes = (c1, c1, c1)
|
|
|
|
launch = gpu.launch(grid_sizes, block_sizes)
|
|
|
|
op = launch(lambda *args: gpu.printf("%f", args[0]))
|
|
|
|
with InsertionPoint(entry_block):
|
|
func.ReturnOp([])
|
|
|
|
print(module)
|
|
|
|
# CHECK-LABEL: func.func @gpu_printf(
|
|
# CHECK-SAME: %[[ARG0:.*]]: f32) {
|
|
# CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : index
|
|
# CHECK: gpu.launch blocks(%[[VAL_0:.*]], %[[VAL_1:.*]], %[[VAL_2:.*]]) in (%[[VAL_3:.*]] = %[[CONSTANT_0]], %[[VAL_4:.*]] = %[[CONSTANT_0]], %[[VAL_5:.*]] = %[[CONSTANT_0]]) threads(%[[VAL_6:.*]], %[[VAL_7:.*]], %[[VAL_8:.*]]) in (%[[VAL_9:.*]] = %[[CONSTANT_0]], %[[VAL_10:.*]] = %[[CONSTANT_0]], %[[VAL_11:.*]] = %[[CONSTANT_0]]) {
|
|
# CHECK: gpu.printf "%[[VAL_12:.*]]", %[[VAL_0]] : index
|
|
# CHECK: gpu.terminator
|
|
# CHECK: }
|
|
# CHECK: return
|
|
# CHECK: }
|