Instead of matching particular operations like `fir.alloca` we can use `MemoryEffectOpInterface` to figure out if a location is a new allocation.
448 lines
18 KiB
C++
448 lines
18 KiB
C++
//===- LoopInvariantCodeMotion.cpp ----------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
/// \file
|
|
/// FIR-specific Loop Invariant Code Motion pass.
|
|
/// The pass relies on FIR types and interfaces to prove the safety
|
|
/// of hoisting invariant operations out of loop-like operations.
|
|
/// It may be run on both HLFIR and FIR representations.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "flang/Optimizer/Analysis/AliasAnalysis.h"
|
|
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
|
|
#include "flang/Optimizer/Dialect/FIROperationMoveOpInterface.h"
|
|
#include "flang/Optimizer/Dialect/FIROpsSupport.h"
|
|
#include "flang/Optimizer/Dialect/FortranVariableInterface.h"
|
|
#include "flang/Optimizer/HLFIR/HLFIROps.h"
|
|
#include "flang/Optimizer/Support/Utils.h"
|
|
#include "flang/Optimizer/Transforms/Passes.h"
|
|
#include "mlir/Interfaces/LoopLikeInterface.h"
|
|
#include "mlir/Pass/Pass.h"
|
|
#include "mlir/Transforms/LoopInvariantCodeMotionUtils.h"
|
|
#include "llvm/ADT/TypeSwitch.h"
|
|
#include "llvm/Support/DebugLog.h"
|
|
|
|
namespace fir {
|
|
#define GEN_PASS_DEF_LOOPINVARIANTCODEMOTION
|
|
#include "flang/Optimizer/Transforms/Passes.h.inc"
|
|
} // namespace fir
|
|
|
|
#define DEBUG_TYPE "flang-licm"
|
|
|
|
// Temporary engineering option for triaging LICM.
|
|
static llvm::cl::opt<bool> disableFlangLICM(
|
|
"disable-flang-licm", llvm::cl::init(false), llvm::cl::Hidden,
|
|
llvm::cl::desc("Disable Flang's loop invariant code motion"));
|
|
|
|
namespace {
|
|
|
|
using namespace mlir;
|
|
|
|
/// The pass tries to hoist loop invariant operations with only
|
|
/// MemoryEffects::Read effects (MemoryEffects::Write support
|
|
/// may be added later).
|
|
/// The safety of hoisting is proven by:
|
|
/// * Proving that the loop runs at least one iteration.
|
|
/// * Proving that is is always safe to load from this location
|
|
/// (see isSafeToHoistLoad() comments below).
|
|
struct LoopInvariantCodeMotion
|
|
: fir::impl::LoopInvariantCodeMotionBase<LoopInvariantCodeMotion> {
|
|
using LoopInvariantCodeMotionBase::LoopInvariantCodeMotionBase;
|
|
void runOnOperation() override;
|
|
};
|
|
|
|
} // namespace
|
|
|
|
/// 'location' is a memory reference used by a memory access.
|
|
/// The type of 'location' defines the data type of the access
|
|
/// (e.g. it is considered to be invalid to access 'i64'
|
|
/// data using '!fir.ref<i32>`).
|
|
/// For the given location, this function returns true iff
|
|
/// the Fortran object being accessed is a scalar that
|
|
/// may not be OPTIONAL.
|
|
///
|
|
/// Note that the '!fir.ref<!fir.box<>>' accesses are considered
|
|
/// to be scalar, even if the underlying data is an array.
|
|
///
|
|
/// Note that an access of '!fir.ref<scalar>' may access
|
|
/// an array object. For example:
|
|
/// real :: x(:)
|
|
/// do i=...
|
|
/// = x(10)
|
|
/// 'x(10)' accesses array 'x', and it may be unsafe to hoist
|
|
/// it without proving that '10' is a valid index for the array.
|
|
/// The fact that 'x' is not OPTIONAL does not allow hoisting
|
|
/// on its own.
|
|
static bool isNonOptionalScalar(Value location) {
|
|
while (true) {
|
|
LDBG() << "Checking location:\n" << location;
|
|
Type dataType = fir::unwrapRefType(location.getType());
|
|
if (!isa<fir::BaseBoxType>(location.getType()) &&
|
|
(!dataType ||
|
|
(!isa<fir::BaseBoxType>(dataType) && !fir::isa_trivial(dataType) &&
|
|
!fir::isa_derived(dataType)))) {
|
|
LDBG() << "Failure: data access is not scalar";
|
|
return false;
|
|
}
|
|
Operation *defOp = location.getDefiningOp();
|
|
if (!defOp) {
|
|
// If this is a function argument
|
|
auto blockArg = cast<BlockArgument>(location);
|
|
Block *block = blockArg.getOwner();
|
|
if (block && block->isEntryBlock())
|
|
if (auto funcOp =
|
|
dyn_cast_if_present<FunctionOpInterface>(block->getParentOp()))
|
|
if (!funcOp.getArgAttrOfType<UnitAttr>(blockArg.getArgNumber(),
|
|
fir::getOptionalAttrName())) {
|
|
LDBG() << "Success: is non optional scalar dummy";
|
|
return true;
|
|
}
|
|
|
|
LDBG() << "Failure: no defining operation";
|
|
return false;
|
|
}
|
|
|
|
// Scalars "defined" by fir.address_of or that are new
|
|
// allocations (e.g. fir.alloca, cuf.alloc, etc.) are present.
|
|
if (isa<fir::AddrOfOp>(defOp) ||
|
|
fir::isNewAllocationResult(cast<OpResult>(location)).value_or(false)) {
|
|
LDBG() << "Success: is non optional scalar";
|
|
return true;
|
|
}
|
|
|
|
if (auto varIface = dyn_cast<fir::FortranVariableOpInterface>(defOp)) {
|
|
if (varIface.isOptional()) {
|
|
// The variable is optional, so do not look further.
|
|
// Note that it is possible to deduce that the optional
|
|
// is actually present, but we are not doing it now.
|
|
LDBG() << "Failure: is optional";
|
|
return false;
|
|
}
|
|
|
|
// In case of MLIR inlining and ASSOCIATE an [hl]fir.declare
|
|
// may declare a scalar variable that is actually a "view"
|
|
// of an array element. Originally, such [hl]fir.declare
|
|
// would be located inside the loop preventing the hoisting.
|
|
// But if we decide to hoist such [hl]fir.declare in future,
|
|
// we cannot rely on their attributes/types.
|
|
// Use reliable checks based on the variable storage.
|
|
|
|
// If the variable has storage specifier (e.g. it is a member
|
|
// of COMMON, etc.), we can rely that the storage is present,
|
|
// and we can also rely on its FortranVariableOpInterface
|
|
// definition type (which is a scalar due to previous checks).
|
|
if (auto storageIface =
|
|
dyn_cast<fir::FortranVariableStorageOpInterface>(defOp))
|
|
if (Value storage = storageIface.getStorage()) {
|
|
LDBG() << "Success: is scalar with existing storage";
|
|
return true;
|
|
}
|
|
|
|
// TODO: we can probably use FIR AliasAnalysis' getSource()
|
|
// method to identify the storage in more cases.
|
|
location = llvm::TypeSwitch<Operation *, Value>(defOp)
|
|
.Case<fir::DeclareOp, hlfir::DeclareOp>(
|
|
[](auto op) { return op.getMemref(); })
|
|
.Default([](auto) { return nullptr; });
|
|
|
|
if (location)
|
|
continue;
|
|
|
|
LDBG() << "Failure: cannot reason about variable storage";
|
|
return false;
|
|
}
|
|
if (auto viewIface = dyn_cast<fir::FortranObjectViewOpInterface>(defOp)) {
|
|
location = viewIface.getViewSource(cast<OpResult>(location));
|
|
} else {
|
|
LDBG() << "Failure: unknown operation:\n" << *defOp;
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Returns true iff it is safe to hoist the given load-like operation 'op',
|
|
/// which access given memory 'locations', out of the operation 'loopLike'.
|
|
/// The current safety conditions are:
|
|
/// * The load is known to be unconditionally executed in the loop and the
|
|
/// loop runs at least one iteration, OR
|
|
/// * all the accessed locations are inside scalar non-OPTIONAL
|
|
/// Fortran objects (Fortran descriptors are considered to be scalars).
|
|
///
|
|
/// When \p maybeConditionallyExecuted is true, the load may be inside a
|
|
/// conditional region (e.g. scf.if) within the loop, so the trip count
|
|
/// shortcut cannot be used: even if the loop runs, the condition might never
|
|
/// be true and the load might access an invalid location.
|
|
/// TODO: analyze the parent operation to determine whether it truly
|
|
/// conditionally executes its body (e.g. scf.execute_region always does).
|
|
static bool isSafeToHoistLoad(Operation *op, ArrayRef<Value> locations,
|
|
LoopLikeOpInterface loopLike,
|
|
AliasAnalysis &aliasAnalysis,
|
|
bool maybeConditionallyExecuted) {
|
|
for (Value location : locations)
|
|
if (aliasAnalysis.getModRef(loopLike.getOperation(), location).isMod()) {
|
|
LDBG() << "Failure: reads location:\n"
|
|
<< location << "\nwhich is modified inside the loop";
|
|
return false;
|
|
}
|
|
|
|
// Check that it is safe to read from all the locations before the loop.
|
|
if (!maybeConditionallyExecuted) {
|
|
std::optional<llvm::APInt> tripCount = loopLike.getStaticTripCount();
|
|
if (tripCount && !tripCount->isZero()) {
|
|
// Loop executes at least one iteration and the load is unconditionally
|
|
// executed in the loop body, so it is safe to hoist.
|
|
LDBG() << "Success: loop has non-zero iterations";
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// Check whether the access must always be valid.
|
|
return llvm::all_of(
|
|
locations, [&](Value location) { return isNonOptionalScalar(location); });
|
|
// TODO: consider hoisting under condition of the loop's trip count
|
|
// being non-zero.
|
|
}
|
|
|
|
/// Returns true iff the given 'op' is a load-like operation,
|
|
/// and it can be hoisted out of 'loopLike' operation.
|
|
/// See isSafeToHoistLoad for the meaning of \p maybeConditionallyExecuted.
|
|
static bool canHoistLoad(Operation *op, LoopLikeOpInterface loopLike,
|
|
AliasAnalysis &aliasAnalysis,
|
|
bool maybeConditionallyExecuted) {
|
|
LDBG() << "Checking operation:\n" << *op;
|
|
if (auto effectInterface = dyn_cast<MemoryEffectOpInterface>(op)) {
|
|
SmallVector<MemoryEffects::EffectInstance> effects;
|
|
effectInterface.getEffects(effects);
|
|
if (effects.empty()) {
|
|
LDBG() << "Failure: not a load";
|
|
return false;
|
|
}
|
|
llvm::SetVector<Value> locations;
|
|
for (const MemoryEffects::EffectInstance &effect : effects) {
|
|
Value location = effect.getValue();
|
|
if (!isa<MemoryEffects::Read>(effect.getEffect())) {
|
|
LDBG() << "Failure: has unsupported effects";
|
|
return false;
|
|
} else if (!location) {
|
|
LDBG() << "Failure: reads from unknown location";
|
|
return false;
|
|
}
|
|
locations.insert(location);
|
|
}
|
|
return isSafeToHoistLoad(op, locations.getArrayRef(), loopLike,
|
|
aliasAnalysis, maybeConditionallyExecuted);
|
|
}
|
|
LDBG() << "Failure: has unknown effects";
|
|
return false;
|
|
}
|
|
|
|
/// Recursively collect regions from operations inside \p region, skipping
|
|
/// IsolatedFromAbove operations (whose regions form a separate scope) and
|
|
/// LoopLikeOpInterface operations (which have their own LICM invocation).
|
|
static void collectNestedRegions(Region ®ion,
|
|
SmallVectorImpl<Region *> &result) {
|
|
for (Operation &op : region.getOps()) {
|
|
if (op.hasTrait<OpTrait::IsIsolatedFromAbove>())
|
|
continue;
|
|
if (isa<LoopLikeOpInterface>(&op))
|
|
continue;
|
|
for (Region &nested : op.getRegions()) {
|
|
result.push_back(&nested);
|
|
collectNestedRegions(nested, result);
|
|
}
|
|
}
|
|
}
|
|
|
|
void LoopInvariantCodeMotion::runOnOperation() {
|
|
if (disableFlangLICM) {
|
|
LDBG() << "Skipping [HL]FIR LoopInvariantCodeMotion()";
|
|
return;
|
|
}
|
|
|
|
LDBG() << "Enter [HL]FIR LoopInvariantCodeMotion()";
|
|
|
|
auto &aliasAnalysis = getAnalysis<AliasAnalysis>();
|
|
aliasAnalysis.addAnalysisImplementation(fir::AliasAnalysis{});
|
|
|
|
std::function<bool(Operation *, LoopLikeOpInterface, bool)>
|
|
shouldMoveOutOfLoop = [&](Operation *op, LoopLikeOpInterface loopLike,
|
|
bool maybeConditionallyExecuted) {
|
|
if (isPure(op)) {
|
|
LDBG() << "Pure operation: " << *op;
|
|
return true;
|
|
}
|
|
|
|
// Handle RecursivelySpeculatable operations that have
|
|
// RecursiveMemoryEffects by checking if all their
|
|
// nested operations can be hoisted.
|
|
auto iface = dyn_cast<ConditionallySpeculatable>(op);
|
|
if (iface && iface.getSpeculatability() ==
|
|
Speculation::RecursivelySpeculatable) {
|
|
if (op->hasTrait<OpTrait::HasRecursiveMemoryEffects>()) {
|
|
LDBG() << "Checking recursive operation:\n" << *op;
|
|
llvm::SmallVector<Operation *> nestedOps;
|
|
for (Region ®ion : op->getRegions())
|
|
for (Block &block : region)
|
|
for (Operation &nestedOp : block)
|
|
nestedOps.push_back(&nestedOp);
|
|
|
|
bool result = llvm::all_of(nestedOps, [&](Operation *nestedOp) {
|
|
return shouldMoveOutOfLoop(nestedOp, loopLike,
|
|
maybeConditionallyExecuted);
|
|
});
|
|
LDBG() << "Recursive operation can" << (result ? "" : "not")
|
|
<< " be hoisted";
|
|
|
|
// If nested operations cannot be hoisted, there is nothing
|
|
// else to check. Also if the operation itself does not have
|
|
// any memory effects, we can return the result now.
|
|
// Otherwise, we have to check the operation itself below.
|
|
if (!result || !isa<MemoryEffectOpInterface>(op))
|
|
return result;
|
|
}
|
|
}
|
|
return canHoistLoad(op, loopLike, aliasAnalysis,
|
|
maybeConditionallyExecuted);
|
|
};
|
|
|
|
getOperation()->walk([&](LoopLikeOpInterface loopLike) {
|
|
if (!fir::canMoveOutOf(loopLike, nullptr)) {
|
|
LDBG() << "Cannot hoist anything out of loop operation: ";
|
|
LDBG_OS([&](llvm::raw_ostream &os) {
|
|
loopLike->print(os, OpPrintingFlags().skipRegions());
|
|
});
|
|
return;
|
|
}
|
|
// We always hoist operations to the parent operation of the loopLike.
|
|
// Check that the parent operation allows the hoisting, e.g.
|
|
// omp::LoopWrapperInterface operations assume tight nesting
|
|
// of the inner maybe loop-like operations, so hoisting
|
|
// to such a parent would be invalid. We rely on
|
|
// fir::canMoveFromDescendant() to identify whether the hoisting
|
|
// is allowed.
|
|
Operation *parentOp = loopLike->getParentOp();
|
|
if (!parentOp) {
|
|
LDBG() << "Skipping top-level loop-like operation?";
|
|
return;
|
|
} else if (!fir::canMoveFromDescendant(parentOp, loopLike, nullptr)) {
|
|
LDBG() << "Cannot hoist anything into operation: ";
|
|
LDBG_OS([&](llvm::raw_ostream &os) {
|
|
parentOp->print(os, OpPrintingFlags().skipRegions());
|
|
});
|
|
return;
|
|
}
|
|
auto isDefinedOutsideRegion = [&](Value value, Region *) {
|
|
return loopLike.isDefinedOutsideOfLoop(value);
|
|
};
|
|
// Check canMoveOutOf for the candidate and all its nested operations.
|
|
// Moving an operation with regions also moves its contents, so
|
|
// restrictions like cuf.kernel blocking !fir.ref operands must be
|
|
// checked transitively.
|
|
auto canMoveOutOfOp = [&](Operation *regionOwner, Operation *candidate) {
|
|
if (!fir::canMoveOutOf(regionOwner, candidate))
|
|
return false;
|
|
bool blocked = false;
|
|
candidate->walk([&](Operation *nested) {
|
|
if (nested != candidate && !fir::canMoveOutOf(regionOwner, nested))
|
|
blocked = true;
|
|
return blocked ? WalkResult::interrupt() : WalkResult::advance();
|
|
});
|
|
return !blocked;
|
|
};
|
|
auto canMoveOutOfLoop = [&](Operation *op) {
|
|
if (!canMoveOutOfOp(loopLike, op)) {
|
|
LDBG() << "Cannot hoist " << *op << " out of the loop";
|
|
return false;
|
|
}
|
|
if (!fir::canMoveFromDescendant(parentOp, loopLike, op)) {
|
|
LDBG() << "Cannot hoist " << *op << " into the parent of the loop";
|
|
return false;
|
|
}
|
|
return true;
|
|
};
|
|
auto moveOutOfRegion = [&](Operation *op, Region *) {
|
|
loopLike.moveOutOfLoop(op);
|
|
};
|
|
|
|
moveLoopInvariantCode(
|
|
loopLike.getLoopRegions(), isDefinedOutsideRegion,
|
|
/*shouldMoveOutOfRegion=*/
|
|
[&](Operation *op, Region *) {
|
|
return canMoveOutOfLoop(op) &&
|
|
shouldMoveOutOfLoop(op, loopLike,
|
|
/*maybeConditionallyExecuted=*/false);
|
|
},
|
|
moveOutOfRegion);
|
|
|
|
if (hoistFromNestedRegions == fir::LICMNestedHoistingMode::None)
|
|
return;
|
|
|
|
// Hoist loop-invariant ops from nested regions (e.g., fir.convert
|
|
// inside scf.if) out of the loop. This enables CSE to deduplicate
|
|
// converted memrefs, which improves alias analysis for parallelization.
|
|
// The callbacks close over loopLike (ignoring the Region* parameter),
|
|
// so invariance and movement are evaluated against the loop, not the
|
|
// nested region.
|
|
// Loads hoisted from nested regions are treated as maybe-conditionally
|
|
// executed: we do not know whether the parent operation always executes
|
|
// its body (e.g. scf.execute_region does, scf.if might not), so the
|
|
// trip count shortcut cannot prove safety.
|
|
// TODO: analyze the parent operation to determine whether it truly
|
|
// conditionally executes its body.
|
|
SmallVector<Region *> nestedRegions;
|
|
for (Region *loopRegion : loopLike.getLoopRegions())
|
|
collectNestedRegions(*loopRegion, nestedRegions);
|
|
|
|
if (nestedRegions.empty())
|
|
return;
|
|
|
|
auto shouldMoveFromNestedRegion = [&](Operation *op, Region *) {
|
|
// Check that all intermediate operations between op and the loop
|
|
// allow the candidate to be moved out. For example, cuf.kernel
|
|
// restricts hoisting of operations with !fir.ref operands.
|
|
for (Operation *ancestor = op->getParentOp();
|
|
ancestor != loopLike.getOperation();
|
|
ancestor = ancestor->getParentOp()) {
|
|
if (!ancestor) {
|
|
// The operation is no longer nested inside the loop (a parent
|
|
// operation was already hoisted out). Nothing to do.
|
|
return false;
|
|
}
|
|
if (!canMoveOutOfOp(ancestor, op)) {
|
|
LDBG() << "Cannot hoist " << *op
|
|
<< " out of intermediate operation: ";
|
|
LDBG_OS([&](llvm::raw_ostream &os) {
|
|
ancestor->print(os, OpPrintingFlags().skipRegions());
|
|
});
|
|
return false;
|
|
}
|
|
}
|
|
return canMoveOutOfLoop(op) &&
|
|
shouldMoveOutOfLoop(op, loopLike,
|
|
/*maybeConditionallyExecuted=*/true);
|
|
};
|
|
if (hoistFromNestedRegions == fir::LICMNestedHoistingMode::Aggressive) {
|
|
moveLoopInvariantCode(nestedRegions, isDefinedOutsideRegion,
|
|
shouldMoveFromNestedRegion, moveOutOfRegion);
|
|
} else {
|
|
// "cheap" mode: only hoist fir.convert.
|
|
// TODO: refine the cost model for "cheap" hoisting to include
|
|
// other inexpensive operations.
|
|
moveLoopInvariantCode(
|
|
nestedRegions, isDefinedOutsideRegion,
|
|
/*shouldMoveOutOfRegion=*/
|
|
[&](Operation *op, Region *region) {
|
|
return isa<fir::ConvertOp>(op) &&
|
|
shouldMoveFromNestedRegion(op, region);
|
|
},
|
|
moveOutOfRegion);
|
|
}
|
|
});
|
|
|
|
LDBG() << "Exit [HL]FIR LoopInvariantCodeMotion()";
|
|
}
|