diff --git a/flang/lib/Optimizer/Transforms/CUDA/CUFDeviceFuncTransform.cpp b/flang/lib/Optimizer/Transforms/CUDA/CUFDeviceFuncTransform.cpp index 4532af98d839..88382cafcc3b 100644 --- a/flang/lib/Optimizer/Transforms/CUDA/CUFDeviceFuncTransform.cpp +++ b/flang/lib/Optimizer/Transforms/CUDA/CUFDeviceFuncTransform.cpp @@ -68,8 +68,7 @@ class CUFDeviceFuncTransform gpu::GPUFuncOp::create(builder, loc, funcOp.getName(), type, mlir::TypeRange{}, mlir::TypeRange{}); if (isGlobal) - deviceFuncOp->setAttr(gpu::GPUDialect::getKernelFuncAttrName(), - builder.getUnitAttr()); + deviceFuncOp.setKernel(true); mlir::Region &deviceFuncBody = deviceFuncOp.getBody(); mlir::Block &entryBlock = deviceFuncBody.front(); diff --git a/mlir/include/mlir/Conversion/LLVMCommon/LowerFunctionDiscardablesToLLVM.h b/mlir/include/mlir/Conversion/LLVMCommon/LowerFunctionDiscardablesToLLVM.h new file mode 100644 index 000000000000..c936cbded296 --- /dev/null +++ b/mlir/include/mlir/Conversion/LLVMCommon/LowerFunctionDiscardablesToLLVM.h @@ -0,0 +1,40 @@ +//===- LowerFunctionDiscardablesToLLVM.h - Func discardables to llvm - C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Shared helpers for lowering discardable attributes on any FunctionOpInterface +// (e.g. func.func, gpu.func) into llvm.func properties and discardables. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CONVERSION_LLVMCOMMON_LOWERFUNCTIONDISCARDABLESTOLLVM_H +#define MLIR_CONVERSION_LLVMCOMMON_LOWERFUNCTIONDISCARDABLESTOLLVM_H + +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/IR/OperationSupport.h" +#include "mlir/Interfaces/FunctionInterfaces.h" + +namespace mlir { + +/// Result of lowering discardable attributes from a `FunctionOpInterface` to +/// what `llvm.func` expects: typed inherent properties plus remaining +/// discardable attributes. +struct LoweredLLVMFuncAttrs { + LLVM::LLVMFuncOp::Properties properties; + NamedAttrList discardableAttrs; +}; + +/// Partition `funcOp`'s discardables for `llvm.func`: `sym_name`, +/// `function_type`, and typed `properties` from `llvm.*` ODS attrs; other +/// discardables unchanged. Fails if that property set is invalid; drops +/// ODS-named attrs without `llvm.`. +FailureOr +lowerDiscardableAttrsForLLVMFunc(FunctionOpInterface funcOp, Type llvmFuncType); + +} // namespace mlir + +#endif // MLIR_CONVERSION_LLVMCOMMON_LOWERFUNCTIONDISCARDABLESTOLLVM_H diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td index c055f73e9c99..16c3ffd2c858 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td @@ -426,7 +426,9 @@ def GPU_GPUFuncOp : GPU_Op<"func", [ OptionalAttr:$private_attrib_attrs, GPU_OptionalDimSizeHintAttr:$known_block_size, GPU_OptionalDimSizeHintAttr:$known_grid_size, - GPU_OptionalDimSizeHintAttr:$known_cluster_size); + GPU_OptionalDimSizeHintAttr:$known_cluster_size, + OptionalAttr>:$workgroup_attributions, + UnitAttr:$kernel); let regions = (region AnyRegion:$body); let skipDefaultBuilders = 1; @@ -440,22 +442,24 @@ def GPU_GPUFuncOp : GPU_Op<"func", [ let extraClassDeclaration = [{ /// Returns `true` if the GPU function defined by this Op is a kernel, i.e. - /// it is intended to be launched from host. + /// it is intended to be launched from host. Also accepts legacy discardable + /// `gpu.kernel` for compatibility with older generic IR. bool isKernel() { + if (getKernel()) + return true; return (*this)->getAttrOfType( GPUDialect::getKernelFuncAttrName()) != nullptr; } /// Returns the number of buffers located in the workgroup memory. unsigned getNumWorkgroupAttributions() { - auto attr = (*this)->getAttrOfType( - getNumWorkgroupAttributionsAttrName()); - if (!attr) + std::optional v = getWorkgroupAttributions(); + if (!v) return 0; - int64_t value = attr.getInt(); - assert(value >= 0 && value < std::numeric_limits::max() && + int64_t value = *v; + assert(value < std::numeric_limits::max() && "invalid workgroup attribution count"); - return value; + return static_cast(value); } /// Return the index of the first workgroup attribution in the block argument @@ -466,7 +470,7 @@ def GPU_GPUFuncOp : GPU_Op<"func", [ /// Returns a list of block arguments that correspond to buffers located in /// the workgroup memory - ArrayRef getWorkgroupAttributions() { + ArrayRef getWorkgroupAttributionBBArgs() { auto begin = std::next(getBody().args_begin(), getFirstWorkgroupAttributionIndex()); auto end = std::next(begin, getNumWorkgroupAttributions()); @@ -548,12 +552,6 @@ def GPU_GPUFuncOp : GPU_Op<"func", [ return setPrivateAttributionAttr(index, StringAttr::get((*this)->getContext(), name), value); } - /// Returns the name of the attribute containing the number of buffers - /// located in the workgroup memory. - static StringRef getNumWorkgroupAttributionsAttrName() { - return "workgroup_attributions"; - } - /// Returns the argument types of this function. ArrayRef getArgumentTypes() { return getFunctionType().getInputs(); } @@ -806,7 +804,8 @@ def GPU_LaunchOp : GPU_Op<"launch", [ Optional:$clusterSizeZ, Optional:$dynamicSharedMemorySize, OptionalAttr:$module, - OptionalAttr:$function)>, + OptionalAttr:$function, + OptionalAttr>:$workgroup_attributions)>, Results<(outs Optional:$asyncToken)> { let summary = "GPU kernel launch operation"; @@ -1004,19 +1003,17 @@ def GPU_LaunchOp : GPU_Op<"launch", [ /// Returns the number of buffers located in the workgroup memory. unsigned getNumWorkgroupAttributions() { - auto attr = (*this)->getAttrOfType( - getNumWorkgroupAttributionsAttrName()); - if (!attr) + std::optional v = getWorkgroupAttributions(); + if (!v) return 0; - int64_t value = attr.getInt(); - assert(value >= 0 && value < std::numeric_limits::max() && + int64_t value = *v; + assert(value < std::numeric_limits::max() && "invalid workgroup attribution count"); - return value; + return static_cast(value); } - /// Returns a list of block arguments that correspond to buffers located in - /// the workgroup memory - ArrayRef getWorkgroupAttributions() { + /// Block arguments for workgroup memory buffers + ArrayRef getWorkgroupAttributionBBArgs() { auto begin = std::next(getBody().args_begin(), getNumConfigRegionAttributes()); auto end = std::next(begin, getNumWorkgroupAttributions()); @@ -1047,12 +1044,6 @@ def GPU_LaunchOp : GPU_Op<"launch", [ /// Adds a new block argument that corresponds to buffers located in /// private memory. BlockArgument addPrivateAttribution(Type type, Location loc); - - /// Returns the name of the attribute containing the number of buffers - /// located in the workgroup memory. - static StringRef getNumWorkgroupAttributionsAttrName() { - return "workgroup_attributions"; - } }]; let hasCanonicalizer = 1; diff --git a/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp b/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp index 8ed8c73e2e33..2686158f67e7 100644 --- a/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp +++ b/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp @@ -19,6 +19,7 @@ #include "mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h" #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h" #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" +#include "mlir/Conversion/LLVMCommon/LowerFunctionDiscardablesToLLVM.h" #include "mlir/Conversion/LLVMCommon/Pattern.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/LLVMIR/FunctionCallUtils.h" @@ -77,65 +78,6 @@ static void filterFuncAttributes(FunctionOpInterface func, } } -/// Add custom lowered funcOp to llvm.func attributes here. -struct LoweredFuncAttrs { - LLVM::LLVMFuncOp::Properties properties; - NamedAttrList discardableAttrs; -}; - -/// Lower discardable function attributes on `func.func` to attributes expected -/// by `llvm.func`. -static FailureOr -lowerFuncAttributes(FunctionOpInterface func) { - MLIRContext *ctx = func->getContext(); - LoweredFuncAttrs lowered; - - llvm::SmallDenseSet odsAttrNames( - LLVM::LLVMFuncOp::getAttributeNames().begin(), - LLVM::LLVMFuncOp::getAttributeNames().end()); - - NamedAttrList inherentAttrs; - - for (const NamedAttribute &attr : func->getDiscardableAttrs()) { - StringRef attrName = attr.getName().strref(); - - if (odsAttrNames.contains(attrName)) { - LDBG() << "LLVM specific attributes: " << attrName - << "should use llvm.* prefix, discarding it"; - continue; - } - - StringRef inherent = attrName; - if (inherent.consume_front("llvm.") && odsAttrNames.contains(inherent)) - inherentAttrs.set(inherent, attr.getValue()); // collect inherent attrs - else - lowered.discardableAttrs.push_back(attr); - } - - // Convert collected inherent attrs into typed properties. - if (!inherentAttrs.empty()) { - DictionaryAttr dict = inherentAttrs.getDictionary(ctx); - auto emitError = [&] { - return func.emitOpError("invalid llvm.func property"); - }; - if (failed(LLVM::LLVMFuncOp::setPropertiesFromAttr(lowered.properties, dict, - emitError))) { - return failure(); - } - } - return lowered; -} - -static void buildLLVMFuncProperties(PatternRewriter &rewriter, - FunctionOpInterface srcFunc, - Type llvmFuncType, - LLVM::LLVMFuncOp::Properties &props) { - MLIRContext *ctx = rewriter.getContext(); - props.sym_name = rewriter.getStringAttr(srcFunc.getName()); - props.function_type = TypeAttr::get(llvmFuncType); - props.setCConv(LLVM::CConvAttr::get(ctx, LLVM::CConv::C)); -} - /// Propagate argument/results attributes. static void propagateArgResAttrs(OpBuilder &builder, bool resultStructType, FunctionOpInterface funcOp, @@ -369,14 +311,15 @@ static FailureOr convertFuncSignature( static LLVM::LLVMFuncOp createLLVMFuncOp(FunctionOpInterface funcOp, ConversionPatternRewriter &rewriter, LLVM::LLVMFunctionType llvmType, - LoweredFuncAttrs &loweredAttrs, + LoweredLLVMFuncAttrs &loweredAttrs, SymbolTableCollection *symbolTables) { Operation *symbolTableOp = funcOp->getParentWithTrait(); if (symbolTables && symbolTableOp) { SymbolTable &symbolTable = symbolTables->getSymbolTable(symbolTableOp); symbolTable.remove(funcOp); } - buildLLVMFuncProperties(rewriter, funcOp, llvmType, loweredAttrs.properties); + loweredAttrs.properties.setCConv( + LLVM::CConvAttr::get(rewriter.getContext(), LLVM::CConv::C)); auto newFuncOp = LLVM::LLVMFuncOp::create(rewriter, funcOp.getLoc(), loweredAttrs.properties, loweredAttrs.discardableAttrs); @@ -491,27 +434,25 @@ static void wrapWithCInterface(FunctionOpInterface funcOp, newFuncOp); } -// Conversion steps -// 1. Validate function type -// 2. Convert signature -// 3. Validate C wrapper varargs constraint -// 4. Lower function attrs -// 5. Create llvm.func -// 6. Propagate arg/result attrs -// 7. Inline body + signature conversion -// 8. Restore byval/byref pointee types -// 9. C-wrapper handling +/// Conversion steps +/// - Validate function type +/// - Convert signature +/// - Validate C wrapper varargs constraint +/// - Lower function attrs +/// - Create llvm.func +/// - Propagate arg/result attrs +/// - Inline body + signature conversion +/// - Restore byval/byref pointee types +/// - C-wrapper handling FailureOr mlir::convertFuncOpToLLVMFuncOp( FunctionOpInterface funcOp, ConversionPatternRewriter &rewriter, const LLVMTypeConverter &converter, SymbolTableCollection *symbolTables) { - // 1. Validate function type // Check the funcOp has `FunctionType`. auto funcTy = dyn_cast(funcOp.getFunctionType()); if (!funcTy) return rewriter.notifyMatchFailure( funcOp, "Only support FunctionOpInterface with FunctionType"); - // 2. Convert signature bool useBarePtrCallConv = shouldUseBarePtrCallConv(funcOp, &converter); // Convert the original function arguments. They are converted using the // LLVMTypeConverter provided to this legalization pattern. @@ -524,28 +465,29 @@ FailureOr mlir::convertFuncOpToLLVMFuncOp( if (failed(llvmType)) return rewriter.notifyMatchFailure(funcOp, "signature conversion failed"); - // 3. Validate C wrapper varargs constraint + // Validate C wrapper varargs constraint bool emitCWrapper = funcOp->hasAttrOfType( LLVM::LLVMDialect::getEmitCWrapperAttrName()); if (!useBarePtrCallConv && emitCWrapper && llvmType->isVarArg()) return funcOp.emitError("C interface for variadic functions is not " "supported yet."); - // 4. Lower function attrs - FailureOr loweredAttrs = lowerFuncAttributes(funcOp); + // Lower function attrs + FailureOr loweredAttrs = + lowerDiscardableAttrsForLLVMFunc(funcOp, *llvmType); if (failed(loweredAttrs)) return rewriter.notifyMatchFailure(funcOp, "failed to lower func attributes"); - // 5. Create llvm.func + // Create llvm.func auto newFuncOp = createLLVMFuncOp(funcOp, rewriter, *llvmType, *loweredAttrs, symbolTables); - // 6. Propagate arg/result attrs + // Propagate arg/result attrs propagateFunctionArgResAttrs(funcOp, rewriter, converter, result, *llvmType, newFuncOp); - // 7. Inline body + signature conversion + // Inline body + signature conversion rewriter.inlineRegionBefore(funcOp.getFunctionBody(), newFuncOp.getBody(), newFuncOp.end()); // Convert just the entry block. The remaining unstructured control flow is @@ -554,14 +496,14 @@ FailureOr mlir::convertFuncOpToLLVMFuncOp( rewriter.applySignatureConversion(&newFuncOp.getBody().front(), result, &converter); - // 8. Restore byval/byref pointee types + // Restore byval/byref pointee types // Fix the type mismatch between the materialized `llvm.ptr` and the expected // pointee type in the function body when converting `llvm.byval`/`llvm.byref` // function arguments. restoreByValRefArgumentType(rewriter, converter, byValRefNonPtrAttrs, newFuncOp); - // 9. C-wrapper handling + // C-wrapper handling if (!useBarePtrCallConv && emitCWrapper) wrapWithCInterface(funcOp, rewriter, converter, newFuncOp); diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp index 6a705ebab7aa..eeb90a7ff815 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp @@ -14,10 +14,14 @@ #include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/SymbolTable.h" #include "llvm/ADT/SmallVectorExtras.h" #include "llvm/ADT/StringSet.h" +#include "llvm/Support/DebugLog.h" #include "llvm/Support/FormatVariadic.h" +#define DEBUG_TYPE "gpu-lowering" + using namespace mlir; LLVM::LLVMFuncOp mlir::getOrDefineFunction(Operation *moduleOp, Location loc, @@ -74,6 +78,55 @@ LLVM::GlobalOp mlir::getOrCreateStringConstant(OpBuilder &b, Location loc, name, attr, alignment, addrSpace); } +FailureOr GPUFuncOpLowering::buildLoweredGPULLVMFuncAttrs( + gpu::GPUFuncOp gpuFuncOp, Type llvmFuncType, OpBuilder &rewriter) const { + FailureOr loweredAttrs = + lowerDiscardableAttrsForLLVMFunc(gpuFuncOp, llvmFuncType); + if (failed(loweredAttrs)) + return failure(); + + MLIRContext *ctx = rewriter.getContext(); + LLVM::LLVMFuncOp::Properties &props = loweredAttrs->properties; + props.sym_name = rewriter.getStringAttr(gpuFuncOp.getName()); + props.function_type = TypeAttr::get(llvmFuncType); + const bool isKernelFunc = gpuFuncOp.isKernel(); + props.setCConv(LLVM::CConvAttr::get(ctx, isKernelFunc + ? kernelCallingConvention + : nonKernelCallingConvention)); + + NamedAttrList &discardable = loweredAttrs->discardableAttrs; + auto *gpuDialect = cast(gpuFuncOp->getDialect()); + + auto appendIfNameAndValue = [&](StringAttr name, Attribute value) { + if (name && value) + discardable.append(name, value); + }; + + DenseI32ArrayAttr knownBlockSize = gpuFuncOp.getKnownBlockSizeAttr(); + DenseI32ArrayAttr knownGridSize = gpuFuncOp.getKnownGridSizeAttr(); + DenseI32ArrayAttr knownClusterSize = gpuFuncOp.getKnownClusterSizeAttr(); + + appendIfNameAndValue(gpuDialect->getKnownBlockSizeAttrHelper().getName(), + knownBlockSize); + appendIfNameAndValue(gpuDialect->getKnownGridSizeAttrHelper().getName(), + knownGridSize); + appendIfNameAndValue(gpuDialect->getKnownClusterSizeAttrHelper().getName(), + knownClusterSize); + + if (isKernelFunc) { + discardable.append(gpuDialect->getKernelFuncAttrName(), + rewriter.getUnitAttr()); + // Add a dialect specific kernel attribute in addition to GPU kernel + // attribute. The former is necessary for further translation while the + // latter is expected by gpu.launch_func. + appendIfNameAndValue(kernelAttributeName, rewriter.getUnitAttr()); + appendIfNameAndValue(kernelBlockSizeAttributeName, knownBlockSize); + appendIfNameAndValue(kernelClusterSizeAttributeName, knownClusterSize); + } + + return loweredAttrs; +} + LogicalResult GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const { @@ -85,7 +138,7 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, // workgroup attributions. ArrayRef workgroupAttributions = - gpuFuncOp.getWorkgroupAttributions(); + gpuFuncOp.getWorkgroupAttributionBBArgs(); size_t numAttributions = workgroupAttributions.size(); // Insert all arguments at the end. @@ -136,7 +189,7 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, } else { workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions()); for (auto [idx, attribution] : - llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) { + llvm::enumerate(gpuFuncOp.getWorkgroupAttributionBBArgs())) { auto type = dyn_cast(attribution.getType()); assert(type && type.hasStaticShape() && "unexpected type in attribution"); @@ -174,67 +227,17 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, }); } - // Create the new function operation. Only copy those attributes that are - // not specific to function modeling. - SmallVector attributes; - ArrayAttr argAttrs; - for (const auto &attr : gpuFuncOp->getAttrs()) { - if (attr.getName() == SymbolTable::getSymbolAttrName() || - attr.getName() == gpuFuncOp.getFunctionTypeAttrName() || - attr.getName() == - gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName() || - attr.getName() == gpuFuncOp.getWorkgroupAttribAttrsAttrName() || - attr.getName() == gpuFuncOp.getPrivateAttribAttrsAttrName() || - attr.getName() == gpuFuncOp.getKnownBlockSizeAttrName() || - attr.getName() == gpuFuncOp.getKnownGridSizeAttrName() || - attr.getName() == gpuFuncOp.getKnownClusterSizeAttrName()) - continue; - if (attr.getName() == gpuFuncOp.getArgAttrsAttrName()) { - argAttrs = gpuFuncOp.getArgAttrsAttr(); - continue; - } - attributes.push_back(attr); - } + ArrayAttr argAttrs = gpuFuncOp.getArgAttrsAttr(); - DenseI32ArrayAttr knownBlockSize = gpuFuncOp.getKnownBlockSizeAttr(); - DenseI32ArrayAttr knownGridSize = gpuFuncOp.getKnownGridSizeAttr(); - DenseI32ArrayAttr knownClusterSize = gpuFuncOp.getKnownClusterSizeAttr(); - // Ensure we don't lose information if the function is lowered before its - // surrounding context. - auto *gpuDialect = cast(gpuFuncOp->getDialect()); - if (knownBlockSize) - attributes.emplace_back(gpuDialect->getKnownBlockSizeAttrHelper().getName(), - knownBlockSize); - if (knownGridSize) - attributes.emplace_back(gpuDialect->getKnownGridSizeAttrHelper().getName(), - knownGridSize); - if (knownClusterSize) - attributes.emplace_back( - gpuDialect->getKnownClusterSizeAttrHelper().getName(), - knownClusterSize); + FailureOr loweredAttrs = + buildLoweredGPULLVMFuncAttrs(gpuFuncOp, funcType, rewriter); + if (failed(loweredAttrs)) + return rewriter.notifyMatchFailure(gpuFuncOp, + "failed to lower func attributes"); - // Add a dialect specific kernel attribute in addition to GPU kernel - // attribute. The former is necessary for further translation while the - // latter is expected by gpu.launch_func. - if (gpuFuncOp.isKernel()) { - if (kernelAttributeName) - attributes.emplace_back(kernelAttributeName, rewriter.getUnitAttr()); - // Set the dialect-specific block size attribute if there is one. - if (kernelBlockSizeAttributeName && knownBlockSize) { - attributes.emplace_back(kernelBlockSizeAttributeName, knownBlockSize); - } - // Set the dialect-specific cluster size attribute if there is one. - if (kernelClusterSizeAttributeName && knownClusterSize) { - attributes.emplace_back(kernelClusterSizeAttributeName, knownClusterSize); - } - } - LLVM::CConv callingConvention = gpuFuncOp.isKernel() - ? kernelCallingConvention - : nonKernelCallingConvention; - auto llvmFuncOp = LLVM::LLVMFuncOp::create( - rewriter, gpuFuncOp.getLoc(), gpuFuncOp.getName(), funcType, - LLVM::Linkage::External, /*dsoLocal=*/false, callingConvention, - /*comdat=*/nullptr, attributes); + auto llvmFuncOp = LLVM::LLVMFuncOp::create(rewriter, gpuFuncOp.getLoc(), + loweredAttrs->properties, + loweredAttrs->discardableAttrs); { // Insert operations that correspond to converted workgroup and private @@ -260,8 +263,9 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, ArrayRef attributionArguments = gpuFuncOp.getArguments().slice(numProperArguments - numAttributions, numAttributions); - for (auto [idx, vals] : llvm::enumerate(llvm::zip_equal( - gpuFuncOp.getWorkgroupAttributions(), attributionArguments))) { + for (auto [idx, vals] : llvm::enumerate( + llvm::zip_equal(gpuFuncOp.getWorkgroupAttributionBBArgs(), + attributionArguments))) { auto [attribution, arg] = vals; auto type = cast(attribution.getType()); @@ -287,7 +291,7 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, // existing memref infrastructure. This may use more registers than // otherwise necessary given that memref sizes are fixed, but we can try // and canonicalize that away later. - Value attribution = gpuFuncOp.getWorkgroupAttributions()[idx]; + Value attribution = gpuFuncOp.getWorkgroupAttributionBBArgs()[idx]; auto type = cast(attribution.getType()); Value descr = MemRefDescriptor::fromStaticShape( rewriter, loc, *getTypeConverter(), type, memory); diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h index a3b2e04c3531..dca4ee23ff3d 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h @@ -8,6 +8,7 @@ #ifndef MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ #define MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ +#include "mlir/Conversion/LLVMCommon/LowerFunctionDiscardablesToLLVM.h" #include "mlir/Conversion/LLVMCommon/Pattern.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/LLVMIR/LLVMAttrs.h" @@ -106,6 +107,12 @@ struct GPUFuncOpLowering : ConvertOpToLLVMPattern { matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override; + /// Lower discardable attrs like `func` lowering, then set `llvm.func` + /// properties and append GPU / target-specific discardable metadata. + FailureOr + buildLoweredGPULLVMFuncAttrs(gpu::GPUFuncOp gpuFuncOp, Type llvmFuncType, + OpBuilder &rewriter) const; + private: /// The address space to use for `alloca`s in private memory. unsigned allocaAddrSpace; diff --git a/mlir/lib/Conversion/LLVMCommon/CMakeLists.txt b/mlir/lib/Conversion/LLVMCommon/CMakeLists.txt index 568d9339aaab..e7175445618c 100644 --- a/mlir/lib/Conversion/LLVMCommon/CMakeLists.txt +++ b/mlir/lib/Conversion/LLVMCommon/CMakeLists.txt @@ -1,5 +1,6 @@ add_mlir_conversion_library(MLIRLLVMCommonConversion ConversionTarget.cpp + LowerFunctionDiscardablesToLLVM.cpp LoweringOptions.cpp MemRefBuilder.cpp Pattern.cpp diff --git a/mlir/lib/Conversion/LLVMCommon/LowerFunctionDiscardablesToLLVM.cpp b/mlir/lib/Conversion/LLVMCommon/LowerFunctionDiscardablesToLLVM.cpp new file mode 100644 index 000000000000..5427877fdfd8 --- /dev/null +++ b/mlir/lib/Conversion/LLVMCommon/LowerFunctionDiscardablesToLLVM.cpp @@ -0,0 +1,60 @@ +//===- LowerFunctionDiscardablesToLLVM.cpp - Func discardables to llvm ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Conversion/LLVMCommon/LowerFunctionDiscardablesToLLVM.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Support/DebugLog.h" + +using namespace mlir; + +#define DEBUG_TYPE "lower-function-discardables-to-llvm" + +FailureOr +mlir::lowerDiscardableAttrsForLLVMFunc(FunctionOpInterface funcOp, + Type llvmFuncType) { + MLIRContext *ctx = funcOp->getContext(); + LoweredLLVMFuncAttrs result; + + result.properties.sym_name = StringAttr::get(ctx, funcOp.getName()); + result.properties.function_type = TypeAttr::get(llvmFuncType); + + llvm::SmallDenseSet odsAttrNames( + LLVM::LLVMFuncOp::getAttributeNames().begin(), + LLVM::LLVMFuncOp::getAttributeNames().end()); + + NamedAttrList inherentAttrs; + + for (const NamedAttribute &attr : funcOp->getDiscardableAttrs()) { + StringRef attrName = attr.getName().strref(); + + if (odsAttrNames.contains(attrName)) { + LDBG() << "LLVM specific attributes: " << attrName + << "should use llvm.* prefix, discarding it"; + continue; + } + + StringRef inherent = attrName; + if (inherent.consume_front("llvm.") && odsAttrNames.contains(inherent)) + inherentAttrs.set(inherent, attr.getValue()); // collect inherent attrs + else + result.discardableAttrs.push_back(attr); + } + + // Convert collected inherent attrs into typed properties. + if (!inherentAttrs.empty()) { + DictionaryAttr dict = inherentAttrs.getDictionary(ctx); + auto emitError = [&] { + return funcOp.emitOpError("invalid llvm.func property"); + }; + if (failed(LLVM::LLVMFuncOp::setPropertiesFromAttr(result.properties, dict, + emitError))) { + return failure(); + } + } + return result; +} diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index f0fbd093d344..aa68f02f46b9 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -42,6 +42,7 @@ #include "llvm/Support/StringSaver.h" #include #include +#include using namespace mlir; using namespace mlir::gpu; @@ -262,8 +263,10 @@ bool GPUDialect::hasConstantMemoryAddressSpace(MemRefType type) { } bool GPUDialect::isKernel(Operation *op) { - UnitAttr isKernelAttr = op->getAttrOfType(getKernelFuncAttrName()); - return static_cast(isKernelAttr); + if (auto gpuFunc = dyn_cast(op)) + return gpuFunc.isKernel(); + return static_cast( + op->getAttrOfType(getKernelFuncAttrName())); } namespace { @@ -713,10 +716,10 @@ void LaunchOp::build(OpBuilder &builder, OperationState &result, FlatSymbolRefAttr module, FlatSymbolRefAttr function) { OpBuilder::InsertionGuard g(builder); - // Add a WorkGroup attribution attribute. This attribute is required to - // identify private attributions in the list of block argguments. - result.addAttribute(getNumWorkgroupAttributionsAttrName(), - builder.getI64IntegerAttr(workgroupAttributions.size())); + if (!workgroupAttributions.empty()) + result.addAttribute( + getWorkgroupAttributionsAttrName(result.name), + builder.getI64IntegerAttr(workgroupAttributions.size())); // Add Op operands. result.addOperands(asyncDependencies); @@ -842,7 +845,7 @@ LogicalResult LaunchOp::verifyRegions() { } // Verify Attributions Address Spaces. - if (failed(verifyAttributions(getOperation(), getWorkgroupAttributions(), + if (failed(verifyAttributions(getOperation(), getWorkgroupAttributionBBArgs(), GPUDialect::getWorkgroupAddressSpace())) || failed(verifyAttributions(getOperation(), getPrivateAttributions(), GPUDialect::getPrivateAddressSpace()))) @@ -921,7 +924,7 @@ void LaunchOp::print(OpAsmPrinter &p) { p << ')'; } - printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributions()); + printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributionBBArgs()); printAttributions(p, getPrivateKeyword(), getPrivateAttributions()); p << ' '; @@ -929,7 +932,7 @@ void LaunchOp::print(OpAsmPrinter &p) { p.printRegion(getBody(), /*printEntryBlockArgs=*/false); p.printOptionalAttrDict((*this)->getAttrs(), /*elidedAttrs=*/{ LaunchOp::getOperandSegmentSizeAttr(), - getNumWorkgroupAttributionsAttrName(), + getWorkgroupAttributionsAttrName(), moduleAttrName, functionAttrName}); } @@ -1072,12 +1075,9 @@ ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) { return failure(); } - // Create the region arguments, it has kNumConfigRegionAttributes arguments - // that correspond to block/thread identifiers and grid/block sizes, all - // having `index` type, a variadic number of WorkGroup Attributions and - // a variadic number of Private Attributions. The number of WorkGroup - // Attributions is stored in the attr with name: - // LaunchOp::getNumWorkgroupAttributionsAttrName(). + // Create the region arguments: fixed launch-config args (`index`), then + // workgroup / private attribution args. The workgroup count is stored in the + // inherent `workgroup_attributions` attribute when non-zero. Type index = parser.getBuilder().getIndexType(); SmallVector dataTypes( LaunchOp::kNumConfigRegionAttributes + 6, index); @@ -1101,8 +1101,9 @@ ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) { unsigned numWorkgroupAttrs = regionArguments.size() - LaunchOp::kNumConfigRegionAttributes - (hasCluster ? 6 : 0); - result.addAttribute(LaunchOp::getNumWorkgroupAttributionsAttrName(), - builder.getI64IntegerAttr(numWorkgroupAttrs)); + if (numWorkgroupAttrs != 0) + result.addAttribute(LaunchOp::getWorkgroupAttributionsAttrName(result.name), + builder.getI64IntegerAttr(numWorkgroupAttrs)); // Parse private memory attributions. if (failed(parseAttributions(parser, LaunchOp::getPrivateKeyword(), @@ -1176,12 +1177,10 @@ void LaunchOp::getCanonicalizationPatterns(RewritePatternSet &rewrites, /// Adds a new block argument that corresponds to buffers located in /// workgroup memory. BlockArgument LaunchOp::addWorkgroupAttribution(Type type, Location loc) { - auto attrName = getNumWorkgroupAttributionsAttrName(); - auto attr = (*this)->getAttrOfType(attrName); - (*this)->setAttr(attrName, - IntegerAttr::get(attr.getType(), attr.getValue() + 1)); + int64_t cur = getWorkgroupAttributions().value_or(0); + setWorkgroupAttributions(std::optional(cur + 1)); return getBody().insertArgument( - LaunchOp::getNumConfigRegionAttributes() + attr.getInt(), type, loc); + getNumConfigRegionAttributes() + static_cast(cur), type, loc); } /// Adds a new block argument that corresponds to buffers located in @@ -1391,8 +1390,7 @@ LaunchFuncOp::verifySymbolUses(SymbolTableCollection &symbolTable) { return diag; } - if (!kernelFunc->getAttrOfType( - GPUDialect::getKernelFuncAttrName())) + if (!GPUDialect::isKernel(kernelFunc)) return launchOp.emitOpError("kernel function is missing the '") << GPUDialect::getKernelFuncAttrName() << "' attribute"; @@ -1576,12 +1574,10 @@ void BarrierOp::build(OpBuilder &builder, OperationState &odsState, /// Adds a new block argument that corresponds to buffers located in /// workgroup memory. BlockArgument GPUFuncOp::addWorkgroupAttribution(Type type, Location loc) { - auto attrName = getNumWorkgroupAttributionsAttrName(); - auto attr = (*this)->getAttrOfType(attrName); - (*this)->setAttr(attrName, - IntegerAttr::get(attr.getType(), attr.getValue() + 1)); + int64_t cur = getWorkgroupAttributions().value_or(0); + setWorkgroupAttributions(std::optional(cur + 1)); return getBody().insertArgument( - getFunctionType().getNumInputs() + attr.getInt(), type, loc); + getFunctionType().getNumInputs() + static_cast(cur), type, loc); } /// Adds a new block argument that corresponds to buffers located in @@ -1603,7 +1599,7 @@ void GPUFuncOp::build(OpBuilder &builder, OperationState &result, builder.getStringAttr(name)); result.addAttribute(getFunctionTypeAttrName(result.name), TypeAttr::get(type)); - result.addAttribute(getNumWorkgroupAttributionsAttrName(), + result.addAttribute(getWorkgroupAttributionsAttrName(result.name), builder.getI64IntegerAttr(workgroupAttributions.size())); result.addAttributes(attrs); Region *body = result.addRegion(); @@ -1712,8 +1708,10 @@ ParseResult GPUFuncOp::parse(OpAsmParser &parser, OperationState &result) { // Store the number of operands we just parsed as the number of workgroup // memory attributions. unsigned numWorkgroupAttrs = entryArgs.size() - type.getNumInputs(); - result.addAttribute(GPUFuncOp::getNumWorkgroupAttributionsAttrName(), - builder.getI64IntegerAttr(numWorkgroupAttrs)); + if (numWorkgroupAttrs != 0) + result.addAttribute( + GPUFuncOp::getWorkgroupAttributionsAttrName(result.name), + builder.getI64IntegerAttr(numWorkgroupAttrs)); if (workgroupAttributionAttrs) result.addAttribute(GPUFuncOp::getWorkgroupAttribAttrsAttrName(result.name), workgroupAttributionAttrs); @@ -1729,7 +1727,7 @@ ParseResult GPUFuncOp::parse(OpAsmParser &parser, OperationState &result) { // Parse the kernel attribute if present. if (succeeded(parser.parseOptionalKeyword(GPUFuncOp::getKernelKeyword()))) - result.addAttribute(GPUDialect::getKernelFuncAttrName(), + result.addAttribute(GPUFuncOp::getKernelAttrName(result.name), builder.getUnitAttr()); // Parse attributes. @@ -1751,7 +1749,7 @@ void GPUFuncOp::print(OpAsmPrinter &p) { /*isVariadic=*/false, type.getResults()); - printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributions(), + printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributionBBArgs(), getWorkgroupAttribAttrs().value_or(nullptr)); printAttributions(p, getPrivateKeyword(), getPrivateAttributions(), getPrivateAttribAttrs().value_or(nullptr)); @@ -1760,7 +1758,7 @@ void GPUFuncOp::print(OpAsmPrinter &p) { function_interface_impl::printFunctionAttributes( p, *this, - {getNumWorkgroupAttributionsAttrName(), + {getWorkgroupAttributionsAttrName(), getKernelAttrName(), GPUDialect::getKernelFuncAttrName(), getFunctionTypeAttrName(), getArgAttrsAttrName(), getResAttrsAttrName(), getWorkgroupAttribAttrsAttrName(), getPrivateAttribAttrsAttrName()}); @@ -1914,7 +1912,7 @@ LogicalResult GPUFuncOp::verifyBody() { << blockArgType; } - if (failed(verifyAttributions(getOperation(), getWorkgroupAttributions(), + if (failed(verifyAttributions(getOperation(), getWorkgroupAttributionBBArgs(), GPUDialect::getWorkgroupAddressSpace())) || failed(verifyAttributions(getOperation(), getPrivateAttributions(), GPUDialect::getPrivateAddressSpace()))) diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp index b9529b0d067f..d2e74dce2307 100644 --- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp @@ -197,10 +197,9 @@ static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp, FunctionType::get(launchOp.getContext(), kernelOperandTypes, {}); auto outlinedFunc = gpu::GPUFuncOp::create( builder, loc, kernelFnName, type, - TypeRange(ValueRange(launchOp.getWorkgroupAttributions())), + TypeRange(ValueRange(launchOp.getWorkgroupAttributionBBArgs())), TypeRange(ValueRange(launchOp.getPrivateAttributions()))); - outlinedFunc->setAttr(gpu::GPUDialect::getKernelFuncAttrName(), - builder.getUnitAttr()); + outlinedFunc.setKernel(true); // If we can infer bounds on the grid and/or block sizes from the arguments // to the launch op, propagate them to the generated kernel. This is safe @@ -227,8 +226,8 @@ static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp, // Map memory attributions from the LaunOp op to the GPUFuncOp attributions. for (const auto &[launchArg, funcArg] : - llvm::zip(launchOp.getWorkgroupAttributions(), - outlinedFunc.getWorkgroupAttributions())) + llvm::zip(launchOp.getWorkgroupAttributionBBArgs(), + outlinedFunc.getWorkgroupAttributionBBArgs())) map.map(launchArg, funcArg); for (const auto &[launchArg, funcArg] : llvm::zip(launchOp.getPrivateAttributions(), diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp index f15a52fa010a..d7f46e167624 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp @@ -84,8 +84,7 @@ static gpu::GPUFuncOp genGPUFunc(OpBuilder &builder, gpu::GPUModuleOp gpuModule, FunctionType type = FunctionType::get(gpuModule->getContext(), argsTp, {}); auto gpuFunc = gpu::GPUFuncOp::create(builder, gpuModule->getLoc(), kernelName, type); - gpuFunc->setAttr(gpu::GPUDialect::getKernelFuncAttrName(), - builder.getUnitAttr()); + gpuFunc.setKernel(true); return gpuFunc; } diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp index 71fff428cceb..001f8f561eb7 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp @@ -173,7 +173,7 @@ struct MoveFuncBodyToWarpOp : public OpRewritePattern { gpuFuncOp, "expected gpu.func terminator to be gpu.return"); // Create a new function with the same signature and same attributes. SmallVector workgroupAttributionsTypes = - llvm::map_to_vector(gpuFuncOp.getWorkgroupAttributions(), + llvm::map_to_vector(gpuFuncOp.getWorkgroupAttributionBBArgs(), [](BlockArgument arg) { return arg.getType(); }); SmallVector privateAttributionsTypes = llvm::map_to_vector(gpuFuncOp.getPrivateAttributions(),