Only fir.declare at top level were converted. Update the pass to loop through all fir.declare operations.
154 lines
6.7 KiB
C++
154 lines
6.7 KiB
C++
//===-- CUFPredefinedVarToGPU.cpp -----------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "flang/Optimizer/Dialect/FIROps.h"
|
|
#include "flang/Optimizer/Dialect/FIROpsSupport.h"
|
|
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
|
|
#include "mlir/Pass/Pass.h"
|
|
|
|
namespace fir {
|
|
#define GEN_PASS_DEF_CUFPREDEFINEDVARTOGPU
|
|
#include "flang/Optimizer/Transforms/Passes.h.inc"
|
|
} // namespace fir
|
|
|
|
using namespace mlir;
|
|
|
|
namespace {
|
|
|
|
template <typename OpTyX, typename OpTyY, typename OpTyZ>
|
|
static void createForAllDimensions(mlir::OpBuilder &builder, mlir::Location loc,
|
|
mlir::Value c1,
|
|
SmallVectorImpl<mlir::Value> &values,
|
|
bool incrementByOne = false) {
|
|
if (incrementByOne) {
|
|
auto baseX = OpTyX::create(builder, loc, builder.getI32Type());
|
|
values.push_back(mlir::arith::AddIOp::create(builder, loc, baseX, c1));
|
|
auto baseY = OpTyY::create(builder, loc, builder.getI32Type());
|
|
values.push_back(mlir::arith::AddIOp::create(builder, loc, baseY, c1));
|
|
auto baseZ = OpTyZ::create(builder, loc, builder.getI32Type());
|
|
values.push_back(mlir::arith::AddIOp::create(builder, loc, baseZ, c1));
|
|
} else {
|
|
values.push_back(OpTyX::create(builder, loc, builder.getI32Type()));
|
|
values.push_back(OpTyY::create(builder, loc, builder.getI32Type()));
|
|
values.push_back(OpTyZ::create(builder, loc, builder.getI32Type()));
|
|
}
|
|
}
|
|
|
|
static constexpr llvm::StringRef builtinsModuleName = "__fortran_builtins";
|
|
static constexpr llvm::StringRef builtinVarPrefix = "__builtin_";
|
|
static constexpr llvm::StringRef threadidx = "threadidx";
|
|
static constexpr llvm::StringRef blockidx = "blockidx";
|
|
static constexpr llvm::StringRef blockdim = "blockdim";
|
|
static constexpr llvm::StringRef griddim = "griddim";
|
|
|
|
static constexpr unsigned field_x = 0;
|
|
static constexpr unsigned field_y = 1;
|
|
static constexpr unsigned field_z = 2;
|
|
|
|
std::string mangleBuiltin(llvm::StringRef varName) {
|
|
return "_QM" + builtinsModuleName.str() + "E" + builtinVarPrefix.str() +
|
|
varName.str();
|
|
}
|
|
|
|
static void processCoordinateOp(mlir::OpBuilder &builder, mlir::Location loc,
|
|
fir::CoordinateOp coordOp, unsigned fieldIdx,
|
|
mlir::Value &gpuValue) {
|
|
std::optional<llvm::ArrayRef<int32_t>> fieldIndices =
|
|
coordOp.getFieldIndices();
|
|
assert(fieldIndices && fieldIndices->size() == 1 &&
|
|
"expect only one coordinate");
|
|
if (static_cast<unsigned>((*fieldIndices)[0]) == fieldIdx) {
|
|
llvm::SmallVector<fir::LoadOp> opToErase;
|
|
for (mlir::OpOperand &coordUse : coordOp.getResult().getUses()) {
|
|
assert(mlir::isa<fir::LoadOp>(coordUse.getOwner()) &&
|
|
"only expect load op");
|
|
auto loadOp = mlir::dyn_cast<fir::LoadOp>(coordUse.getOwner());
|
|
loadOp.getResult().replaceAllUsesWith(gpuValue);
|
|
opToErase.push_back(loadOp);
|
|
}
|
|
for (auto op : opToErase)
|
|
op.erase();
|
|
}
|
|
}
|
|
|
|
static void
|
|
processDeclareOp(mlir::OpBuilder &builder, mlir::Location loc,
|
|
fir::DeclareOp declareOp, llvm::StringRef builtinVar,
|
|
llvm::SmallVectorImpl<mlir::Value> &gpuValues,
|
|
llvm::SmallVectorImpl<mlir::Operation *> &opsToDelete) {
|
|
if (declareOp.getUniqName().str().compare(builtinVar) == 0) {
|
|
for (mlir::OpOperand &use : declareOp.getResult().getUses()) {
|
|
fir::CoordinateOp coordOp =
|
|
mlir::dyn_cast<fir::CoordinateOp>(use.getOwner());
|
|
processCoordinateOp(builder, loc, coordOp, field_x, gpuValues[0]);
|
|
processCoordinateOp(builder, loc, coordOp, field_y, gpuValues[1]);
|
|
processCoordinateOp(builder, loc, coordOp, field_z, gpuValues[2]);
|
|
opsToDelete.push_back(coordOp);
|
|
}
|
|
opsToDelete.push_back(declareOp.getOperation());
|
|
if (declareOp.getMemref().getDefiningOp())
|
|
opsToDelete.push_back(declareOp.getMemref().getDefiningOp());
|
|
}
|
|
}
|
|
|
|
struct CUFPredefinedVarToGPU
|
|
: public fir::impl::CUFPredefinedVarToGPUBase<CUFPredefinedVarToGPU> {
|
|
|
|
void runOnOperation() override {
|
|
func::FuncOp funcOp = getOperation();
|
|
if (funcOp.getBody().empty())
|
|
return;
|
|
|
|
if (auto cudaProcAttr =
|
|
funcOp.getOperation()->getAttrOfType<cuf::ProcAttributeAttr>(
|
|
cuf::getProcAttrName())) {
|
|
if (cudaProcAttr.getValue() == cuf::ProcAttribute::Device ||
|
|
cudaProcAttr.getValue() == cuf::ProcAttribute::Global ||
|
|
cudaProcAttr.getValue() == cuf::ProcAttribute::GridGlobal ||
|
|
cudaProcAttr.getValue() == cuf::ProcAttribute::HostDevice) {
|
|
mlir::Location loc = funcOp.getLoc();
|
|
mlir::OpBuilder builder(funcOp.getContext());
|
|
builder.setInsertionPointToStart(&funcOp.getBody().front());
|
|
auto c1 = mlir::arith::ConstantOp::create(
|
|
builder, loc, builder.getI32Type(), builder.getI32IntegerAttr(1));
|
|
llvm::SmallVector<mlir::Value, 3> threadids, blockids, blockdims,
|
|
griddims;
|
|
createForAllDimensions<mlir::NVVM::ThreadIdXOp, mlir::NVVM::ThreadIdYOp,
|
|
mlir::NVVM::ThreadIdZOp>(
|
|
builder, loc, c1, threadids, /*incrementByOne=*/true);
|
|
createForAllDimensions<mlir::NVVM::BlockIdXOp, mlir::NVVM::BlockIdYOp,
|
|
mlir::NVVM::BlockIdZOp>(
|
|
builder, loc, c1, blockids, /*incrementByOne=*/true);
|
|
createForAllDimensions<mlir::NVVM::GridDimXOp, mlir::NVVM::GridDimYOp,
|
|
mlir::NVVM::GridDimZOp>(builder, loc, c1,
|
|
griddims);
|
|
createForAllDimensions<mlir::NVVM::BlockDimXOp, mlir::NVVM::BlockDimYOp,
|
|
mlir::NVVM::BlockDimZOp>(builder, loc, c1,
|
|
blockdims);
|
|
|
|
llvm::SmallVector<mlir::Operation *> opsToDelete;
|
|
funcOp.walk([&](fir::DeclareOp declareOp) {
|
|
processDeclareOp(builder, loc, declareOp, mangleBuiltin(threadidx),
|
|
threadids, opsToDelete);
|
|
processDeclareOp(builder, loc, declareOp, mangleBuiltin(blockidx),
|
|
blockids, opsToDelete);
|
|
processDeclareOp(builder, loc, declareOp, mangleBuiltin(blockdim),
|
|
blockdims, opsToDelete);
|
|
processDeclareOp(builder, loc, declareOp, mangleBuiltin(griddim),
|
|
griddims, opsToDelete);
|
|
});
|
|
|
|
for (auto op : opsToDelete)
|
|
op->erase();
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
} // end anonymous namespace
|