Files
llvm-project/llvm/lib/CodeGen/ExpandIRInsts.cpp
Yunqing Yu 22f0c6947c [CodeGen][ExpandIRInsts] NFC: cleanup style. (#194978)
- Quote in-tree include for Support/Casting.h
- Drop redundant llvm:: qualifiers (using namespace llvm is in scope).
- Replace anon-namespace free functions with `static`.
- Remove stray semicolons after empty function bodies.
2026-04-30 09:29:58 -07:00

1387 lines
53 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//===--- ExpandIRInsts.cpp - Expand IR instructions -----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This pass expands certain instructions at the IR level.
//
// The following expansions are implemented:
// - Expansion of fptoui .. to, fptosi .. to, uitofp .. to, sitofp
// .. to instructions with a bitwidth above a threshold. This is
// useful for targets like x86_64 that cannot lower fp convertions
// with more than 128 bits.
//
// - Expansion of frem for types MVT::f16, MVT::f32, and MVT::f64 for
// targets which use "Expand" as the legalization action for the
// corresponding type.
//
// - Expansion of udiv, sdiv, urem, and srem instructions with
// a bitwidth above a threshold into a call to auto-generated
// functions. This is useful for targets like x86_64 that cannot
// lower divisions with more than 128 bits or targets like x86_32 that
// cannot lower divisions with more than 64 bits.
//
// Instructions with vector types are scalarized first if their scalar
// types can be expanded. Scalable vector types are not supported.
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/ExpandIRInsts.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/SimplifyQuery.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/IntegerDivision.h"
#include <optional>
#define DEBUG_TYPE "expand-ir-insts"
using namespace llvm;
static cl::opt<unsigned>
ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,
cl::init(IntegerType::MAX_INT_BITS),
cl::desc("fp convert instructions on integers with "
"more than <N> bits are expanded."));
static cl::opt<unsigned>
ExpandDivRemBits("expand-div-rem-bits", cl::Hidden,
cl::init(IntegerType::MAX_INT_BITS),
cl::desc("div and rem instructions on integers with "
"more than <N> bits are expanded."));
static bool isConstantPowerOfTwo(Value *V, bool SignedOp) {
auto *C = dyn_cast<ConstantInt>(V);
if (!C)
return false;
APInt Val = C->getValue();
if (SignedOp && Val.isNegative())
Val = -Val;
return Val.isPowerOf2();
}
static bool isSigned(unsigned Opcode) {
return Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
}
/// For signed div/rem by a power of 2, compute the bias-adjusted dividend:
/// Sign = ashr X, (BitWidth - 1) -- 0 or -1
/// Bias = lshr Sign, (BitWidth - ShiftAmt) -- 0 or 2^ShiftAmt - 1
/// Adjusted = add X, Bias
/// The bias adds (2^ShiftAmt - 1) for negative X, correcting rounding towards
/// zero (instead of towards -inf that a plain ashr would give).
/// The lshr form is used instead of 'and' to avoid large immediate constants.
static Value *addSignedBias(IRBuilder<> &Builder, Value *X, unsigned BitWidth,
unsigned ShiftAmt) {
assert(ShiftAmt > 0 && ShiftAmt < BitWidth &&
"ShiftAmt out of range; callers should handle ShiftAmt == 0");
Value *Sign = Builder.CreateAShr(X, BitWidth - 1, "sign");
Value *Bias = Builder.CreateLShr(Sign, BitWidth - ShiftAmt, "bias");
return Builder.CreateAdd(X, Bias, "adjusted");
}
/// Expand division or remainder by a power-of-2 constant.
/// Division (let C = log2(|divisor|)):
/// udiv X, 2^C -> lshr X, C
/// sdiv X, 2^C -> ashr (add X, Bias), C (Bias corrects rounding)
/// sdiv exact X, 2^C -> ashr exact X, C (no bias needed)
/// For negative power-of-2 divisors, the division result is negated.
/// Remainder (let C = log2(|divisor|)):
/// urem X, 2^C -> and X, (2^C - 1)
/// srem X, 2^C -> sub X, (shl (ashr (add X, Bias), C), C)
static void expandPow2DivRem(BinaryOperator *BO) {
LLVM_DEBUG(dbgs() << "Expanding instruction: " << *BO << '\n');
unsigned Opcode = BO->getOpcode();
bool IsDiv = (Opcode == Instruction::UDiv || Opcode == Instruction::SDiv);
bool IsSigned = isSigned(Opcode);
// isExact() is only valid for div.
bool IsExact = IsDiv && BO->isExact();
assert(isConstantPowerOfTwo(BO->getOperand(1), IsSigned) &&
"Expected power-of-2 constant divisor");
Value *X = BO->getOperand(0);
auto *C = cast<ConstantInt>(BO->getOperand(1));
Type *Ty = BO->getType();
unsigned BitWidth = Ty->getIntegerBitWidth();
APInt DivisorVal = C->getValue();
bool IsNegativeDivisor = IsSigned && DivisorVal.isNegative();
// Use countr_zero() to get the shift amount directly from the bit pattern.
// This works correctly for both positive and negative powers of 2, including
// INT_MIN, without needing to negate the value first.
unsigned ShiftAmt = DivisorVal.countr_zero();
IRBuilder<> Builder(BO);
Value *Result;
if (ShiftAmt == 0) {
// Div by 1/-1: X / 1 = X, X / -1 = -X.
// Rem by 1/-1: always 0.
if (IsDiv)
Result = IsNegativeDivisor ? Builder.CreateNeg(X) : X;
else
Result = ConstantInt::get(Ty, 0);
} else if (IsSigned) {
// The signed expansion uses X multiple times (bias computation, shift,
// and sub for remainder). Freeze X to ensure consistent behavior if it is
// undef/poison. For exact division, no bias is needed and X is used only
// once, so freeze is unnecessary.
if (!IsExact && !isGuaranteedNotToBeUndefOrPoison(X))
X = Builder.CreateFreeze(X, X->getName() + ".fr");
// For exact division, no bias is needed since there's no rounding.
Value *Dividend =
IsExact ? X : addSignedBias(Builder, X, BitWidth, ShiftAmt);
Value *Quotient = Builder.CreateAShr(
Dividend, ShiftAmt, IsDiv && IsNegativeDivisor ? "pre.neg" : "shifted",
IsExact);
if (IsDiv) {
Result = IsNegativeDivisor ? Builder.CreateNeg(Quotient) : Quotient;
} else {
// Rem = X - (Quotient << ShiftAmt):
// clear lower ShiftAmt bits via round-trip shift, then subtract.
Value *Truncated = Builder.CreateShl(Quotient, ShiftAmt, "truncated");
Result = Builder.CreateSub(X, Truncated);
}
} else {
if (IsDiv) {
Result = Builder.CreateLShr(X, ShiftAmt, "", IsExact);
} else {
APInt Mask = APInt::getLowBitsSet(BitWidth, ShiftAmt);
Result = Builder.CreateAnd(X, ConstantInt::get(Ty, Mask));
}
}
BO->replaceAllUsesWith(Result);
if (Result != X)
if (auto *RI = dyn_cast<Instruction>(Result))
RI->takeName(BO);
BO->dropAllReferences();
BO->eraseFromParent();
}
/// This class implements a precise expansion of the frem instruction.
/// The generated code is based on the fmod implementation in the AMD device
/// libs.
namespace {
class FRemExpander {
/// The IRBuilder to use for the expansion.
IRBuilder<> &B;
/// Floating point type of the return value and the arguments of the FRem
/// instructions that should be expanded.
Type *FremTy;
/// Floating point type to use for the computation. This may be
/// wider than the \p FremTy.
Type *ComputeFpTy;
/// Integer type used to hold the exponents returned by frexp.
Type *ExTy;
/// How many bits of the quotient to compute per iteration of the
/// algorithm, stored as a value of type \p ExTy.
Value *Bits;
/// Constant 1 of type \p ExTy.
Value *One;
/// The frem argument/return types that can be expanded by this class.
// TODO: The expansion could work for other floating point types
// as well, but this would require additional testing.
static constexpr std::array<MVT, 3> ExpandableTypes{MVT::f16, MVT::f32,
MVT::f64};
public:
static bool canExpandType(Type *Ty) {
EVT VT = EVT::getEVT(Ty);
assert(VT.isSimple() && "Can expand only simple types");
return is_contained(ExpandableTypes, VT.getSimpleVT());
}
static bool shouldExpandFremType(const TargetLowering &TLI, EVT VT) {
assert(!VT.isVector() && "Cannot handle vector type; must scalarize first");
return TLI.getOperationAction(ISD::FREM, VT) ==
TargetLowering::LegalizeAction::Expand;
}
static bool shouldExpandFremType(const TargetLowering &TLI, Type *Ty) {
// Consider scalar type for simplicity. It seems unlikely that a
// vector type can be legalized without expansion if the scalar
// type cannot.
return shouldExpandFremType(TLI, EVT::getEVT(Ty->getScalarType()));
}
/// Return true if the pass should expand frem instructions of any type
/// for the target represented by \p TLI.
static bool shouldExpandAnyFremType(const TargetLowering &TLI) {
return any_of(ExpandableTypes,
[&](MVT V) { return shouldExpandFremType(TLI, EVT(V)); });
}
static FRemExpander create(IRBuilder<> &B, Type *Ty) {
assert(canExpandType(Ty) && "Expected supported floating point type");
// The type to use for the computation of the remainder. This may be
// wider than the input/result type which affects the ...
Type *ComputeTy = Ty;
// ... maximum number of iterations of the remainder computation loop
// to use. This value is for the case in which the computation
// uses the same input/result type.
unsigned MaxIter = 2;
if (Ty->isHalfTy()) {
// Use the wider type and less iterations.
ComputeTy = B.getFloatTy();
MaxIter = 1;
}
unsigned Precision = APFloat::semanticsPrecision(Ty->getFltSemantics());
return FRemExpander{B, Ty, Precision / MaxIter, ComputeTy};
}
/// Build the FRem expansion for the numerator \p X and the
/// denumerator \p Y. The type of X and Y must match \p FremTy. The
/// code will be generated at the insertion point of \p B and the
/// insertion point will be reset at exit.
Value *buildFRem(Value *X, Value *Y, std::optional<SimplifyQuery> &SQ) const;
/// Build an approximate FRem expansion for the numerator \p X and
/// the denumerator \p Y at the insertion point of builder \p B.
/// The type of X and Y must match \p FremTy.
Value *buildApproxFRem(Value *X, Value *Y) const;
private:
FRemExpander(IRBuilder<> &B, Type *FremTy, unsigned Bits, Type *ComputeFpTy)
: B(B), FremTy(FremTy), ComputeFpTy(ComputeFpTy), ExTy(B.getInt32Ty()),
Bits(ConstantInt::get(ExTy, Bits)), One(ConstantInt::get(ExTy, 1)) {}
Value *createRcp(Value *V, const Twine &Name) const {
// Leave it to later optimizations to turn this into an rcp
// instruction if available.
return B.CreateFDiv(ConstantFP::get(ComputeFpTy, 1.0), V, Name);
}
// Helper function to build the UPDATE_AX code which is common to the
// loop body and the "final iteration".
Value *buildUpdateAx(Value *Ax, Value *Ay, Value *Ayinv) const {
// Build:
// float q = rint(ax * ayinv);
// ax = fma(-q, ay, ax);
// int clt = ax < 0.0f;
// float axp = ax + ay;
// ax = clt ? axp : ax;
Value *Q = B.CreateUnaryIntrinsic(Intrinsic::rint, B.CreateFMul(Ax, Ayinv),
{}, "q");
Value *AxUpdate = B.CreateFMA(B.CreateFNeg(Q), Ay, Ax, {}, "ax");
Value *Clt = B.CreateFCmp(CmpInst::FCMP_OLT, AxUpdate,
ConstantFP::getZero(ComputeFpTy), "clt");
Value *Axp = B.CreateFAdd(AxUpdate, Ay, "axp");
return B.CreateSelect(Clt, Axp, AxUpdate, "ax");
}
/// Build code to extract the exponent and mantissa of \p Src.
/// Return the exponent minus one for use as a loop bound and
/// the mantissa taken to the given \p NewExp power.
std::pair<Value *, Value *> buildExpAndPower(Value *Src, Value *NewExp,
const Twine &ExName,
const Twine &PowName) const {
// Build:
// ExName = frexp_exp(Src) - 1;
// PowName = fldexp(frexp_mant(ExName), NewExp);
Type *Ty = Src->getType();
Type *ExTy = B.getInt32Ty();
Value *Frexp = B.CreateIntrinsic(Intrinsic::frexp, {Ty, ExTy}, Src);
Value *Mant = B.CreateExtractValue(Frexp, {0});
Value *Exp = B.CreateExtractValue(Frexp, {1});
Exp = B.CreateSub(Exp, One, ExName);
Value *Pow = B.CreateLdexp(Mant, NewExp, {}, PowName);
return {Pow, Exp};
}
/// Build the main computation of the remainder for the case in which
/// Ax > Ay, where Ax = |X|, Ay = |Y|, and X is the numerator and Y the
/// denumerator. Add the incoming edge from the computation result
/// to \p RetPhi.
void buildRemainderComputation(Value *AxInitial, Value *AyInitial, Value *X,
PHINode *RetPhi, FastMathFlags FMF) const {
IRBuilder<>::FastMathFlagGuard Guard(B);
B.setFastMathFlags(FMF);
// Build:
// ex = frexp_exp(ax) - 1;
// ax = fldexp(frexp_mant(ax), bits);
// ey = frexp_exp(ay) - 1;
// ay = fledxp(frexp_mant(ay), 1);
auto [Ax, Ex] = buildExpAndPower(AxInitial, Bits, "ex", "ax");
auto [Ay, Ey] = buildExpAndPower(AyInitial, One, "ey", "ay");
// Build:
// int nb = ex - ey;
// float ayinv = 1.0/ay;
Value *Nb = B.CreateSub(Ex, Ey, "nb");
Value *Ayinv = createRcp(Ay, "ayinv");
// Build: while (nb > bits)
BasicBlock *PreheaderBB = B.GetInsertBlock();
Function *Fun = PreheaderBB->getParent();
auto *LoopBB = BasicBlock::Create(B.getContext(), "frem.loop_body", Fun);
auto *ExitBB = BasicBlock::Create(B.getContext(), "frem.loop_exit", Fun);
B.CreateCondBr(B.CreateICmp(CmpInst::ICMP_SGT, Nb, Bits), LoopBB, ExitBB);
// Build loop body:
// UPDATE_AX
// ax = fldexp(ax, bits);
// nb -= bits;
// One iteration of the loop is factored out. The code shared by
// the loop and this "iteration" is denoted by UPDATE_AX.
B.SetInsertPoint(LoopBB);
PHINode *NbIv = B.CreatePHI(Nb->getType(), 2, "nb_iv");
NbIv->addIncoming(Nb, PreheaderBB);
auto *AxPhi = B.CreatePHI(ComputeFpTy, 2, "ax_loop_phi");
AxPhi->addIncoming(Ax, PreheaderBB);
Value *AxPhiUpdate = buildUpdateAx(AxPhi, Ay, Ayinv);
AxPhiUpdate = B.CreateLdexp(AxPhiUpdate, Bits, {}, "ax_update");
AxPhi->addIncoming(AxPhiUpdate, LoopBB);
NbIv->addIncoming(B.CreateSub(NbIv, Bits, "nb_update"), LoopBB);
B.CreateCondBr(B.CreateICmp(CmpInst::ICMP_SGT, NbIv, Bits), LoopBB, ExitBB);
// Build final iteration
// ax = fldexp(ax, nb - bits + 1);
// UPDATE_AX
B.SetInsertPoint(ExitBB);
auto *AxPhiExit = B.CreatePHI(ComputeFpTy, 2, "ax_exit_phi");
AxPhiExit->addIncoming(Ax, PreheaderBB);
AxPhiExit->addIncoming(AxPhi, LoopBB);
auto *NbExitPhi = B.CreatePHI(Nb->getType(), 2, "nb_exit_phi");
NbExitPhi->addIncoming(NbIv, LoopBB);
NbExitPhi->addIncoming(Nb, PreheaderBB);
Value *AxFinal = B.CreateLdexp(
AxPhiExit, B.CreateAdd(B.CreateSub(NbExitPhi, Bits), One), {}, "ax");
AxFinal = buildUpdateAx(AxFinal, Ay, Ayinv);
// Build:
// ax = fldexp(ax, ey);
// ret = copysign(ax,x);
AxFinal = B.CreateLdexp(AxFinal, Ey, {}, "ax");
if (ComputeFpTy != FremTy)
AxFinal = B.CreateFPTrunc(AxFinal, FremTy);
Value *Ret = B.CreateCopySign(AxFinal, X);
RetPhi->addIncoming(Ret, ExitBB);
}
/// Build the else-branch of the conditional in the FRem
/// expansion, i.e. the case in wich Ax <= Ay, where Ax = |X|, Ay
/// = |Y|, and X is the numerator and Y the denumerator. Add the
/// incoming edge from the result to \p RetPhi.
void buildElseBranch(Value *Ax, Value *Ay, Value *X, PHINode *RetPhi) const {
// Build:
// ret = ax == ay ? copysign(0.0f, x) : x;
Value *ZeroWithXSign = B.CreateCopySign(ConstantFP::getZero(FremTy), X);
Value *Ret = B.CreateSelect(B.CreateFCmpOEQ(Ax, Ay), ZeroWithXSign, X);
RetPhi->addIncoming(Ret, B.GetInsertBlock());
}
/// Return a value that is NaN if one of the corner cases concerning
/// the inputs \p X and \p Y is detected, and \p Ret otherwise.
Value *handleInputCornerCases(Value *Ret, Value *X, Value *Y,
std::optional<SimplifyQuery> &SQ,
bool NoInfs) const {
// Build:
// ret = (y == 0.0f || isnan(y)) ? QNAN : ret;
// ret = isfinite(x) ? ret : QNAN;
Value *Nan = ConstantFP::getQNaN(FremTy);
Ret = B.CreateSelect(B.CreateFCmpUEQ(Y, ConstantFP::getZero(FremTy)), Nan,
Ret);
Value *XFinite =
NoInfs || (SQ && isKnownNeverInfinity(X, *SQ))
? B.getTrue()
: B.CreateFCmpULT(B.CreateFAbs(X), ConstantFP::getInfinity(FremTy));
Ret = B.CreateSelect(XFinite, Ret, Nan);
return Ret;
}
};
} // namespace
Value *FRemExpander::buildApproxFRem(Value *X, Value *Y) const {
IRBuilder<>::FastMathFlagGuard Guard(B);
// Propagating the approximate functions flag to the
// division leads to an unacceptable drop in precision
// on AMDGPU.
// TODO Find out if any flags might be worth propagating.
B.clearFastMathFlags();
Value *Quot = B.CreateFDiv(X, Y);
Value *Trunc = B.CreateUnaryIntrinsic(Intrinsic::trunc, Quot, {});
Value *Neg = B.CreateFNeg(Trunc);
return B.CreateFMA(Neg, Y, X);
}
Value *FRemExpander::buildFRem(Value *X, Value *Y,
std::optional<SimplifyQuery> &SQ) const {
assert(X->getType() == FremTy && Y->getType() == FremTy);
FastMathFlags FMF = B.getFastMathFlags();
// This function generates the following code structure:
// if (abs(x) > abs(y))
// { ret = compute remainder }
// else
// { ret = x or 0 with sign of x }
// Adjust ret to NaN/inf in input
// return ret
Value *Ax = B.CreateFAbs(X, {}, "ax");
Value *Ay = B.CreateFAbs(Y, {}, "ay");
if (ComputeFpTy != X->getType()) {
Ax = B.CreateFPExt(Ax, ComputeFpTy, "ax");
Ay = B.CreateFPExt(Ay, ComputeFpTy, "ay");
}
Value *AxAyCmp = B.CreateFCmpOGT(Ax, Ay);
PHINode *RetPhi = B.CreatePHI(FremTy, 2, "ret");
Value *Ret = RetPhi;
// We would return NaN in all corner cases handled here.
// Hence, if NaNs are excluded, keep the result as it is.
if (!FMF.noNaNs())
Ret = handleInputCornerCases(Ret, X, Y, SQ, FMF.noInfs());
Function *Fun = B.GetInsertBlock()->getParent();
auto *ThenBB = BasicBlock::Create(B.getContext(), "frem.compute", Fun);
auto *ElseBB = BasicBlock::Create(B.getContext(), "frem.else", Fun);
SplitBlockAndInsertIfThenElse(AxAyCmp, RetPhi, &ThenBB, &ElseBB);
auto SavedInsertPt = B.GetInsertPoint();
// Build remainder computation for "then" branch
//
// The ordered comparison ensures that ax and ay are not NaNs
// in the then-branch. Furthermore, y cannot be an infinity and the
// check at the end of the function ensures that the result will not
// be used if x is an infinity.
FastMathFlags ComputeFMF = FMF;
ComputeFMF.setNoInfs();
ComputeFMF.setNoNaNs();
B.SetInsertPoint(ThenBB);
buildRemainderComputation(Ax, Ay, X, RetPhi, FMF);
B.CreateBr(RetPhi->getParent());
// Build "else"-branch
B.SetInsertPoint(ElseBB);
buildElseBranch(Ax, Ay, X, RetPhi);
B.CreateBr(RetPhi->getParent());
B.SetInsertPoint(SavedInsertPt);
return Ret;
}
static bool expandFRem(BinaryOperator &I, std::optional<SimplifyQuery> &SQ) {
LLVM_DEBUG(dbgs() << "Expanding instruction: " << I << '\n');
Type *Ty = I.getType();
assert(FRemExpander::canExpandType(Ty) &&
"Expected supported floating point type");
FastMathFlags FMF = I.getFastMathFlags();
// TODO Make use of those flags for optimization?
FMF.setAllowReciprocal(false);
FMF.setAllowContract(false);
IRBuilder<> B(&I);
B.setFastMathFlags(FMF);
B.SetCurrentDebugLocation(I.getDebugLoc());
const FRemExpander Expander = FRemExpander::create(B, Ty);
Value *Ret = FMF.approxFunc()
? Expander.buildApproxFRem(I.getOperand(0), I.getOperand(1))
: Expander.buildFRem(I.getOperand(0), I.getOperand(1), SQ);
I.replaceAllUsesWith(Ret);
Ret->takeName(&I);
I.eraseFromParent();
return true;
}
// clang-format off: preserve formatting of the following example
/// Generate code to convert a fp number to integer, replacing FPToS(U)I with
/// the generated code. This currently generates code similarly to compiler-rt's
/// implementations.
///
/// An example IR generated from compiler-rt/fixsfdi.c looks like below:
/// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {
/// entry:
/// %0 = bitcast float %a to i32
/// %conv.i = zext i32 %0 to i64
/// %tobool.not = icmp sgt i32 %0, -1
/// %conv = select i1 %tobool.not, i64 1, i64 -1
/// %and = lshr i64 %conv.i, 23
/// %shr = and i64 %and, 255
/// %and2 = and i64 %conv.i, 8388607
/// %or = or i64 %and2, 8388608
/// %cmp = icmp ult i64 %shr, 127
/// br i1 %cmp, label %cleanup, label %if.end
///
/// if.end: ; preds = %entry
/// %sub = add nuw nsw i64 %shr, 4294967169
/// %conv5 = and i64 %sub, 4294967232
/// %cmp6.not = icmp eq i64 %conv5, 0
/// br i1 %cmp6.not, label %if.end12, label %if.then8
///
/// if.then8: ; preds = %if.end
/// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64
/// -9223372036854775808 br label %cleanup
///
/// if.end12: ; preds = %if.end
/// %cmp13 = icmp ult i64 %shr, 150
/// br i1 %cmp13, label %if.then15, label %if.else
///
/// if.then15: ; preds = %if.end12
/// %sub16 = sub nuw nsw i64 150, %shr
/// %shr17 = lshr i64 %or, %sub16
/// %mul = mul nsw i64 %shr17, %conv
/// br label %cleanup
///
/// if.else: ; preds = %if.end12
/// %sub18 = add nsw i64 %shr, -150
/// %shl = shl i64 %or, %sub18
/// %mul19 = mul nsw i64 %shl, %conv
/// br label %cleanup
///
/// cleanup: ; preds = %entry,
/// %if.else, %if.then15, %if.then8
/// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [
/// %mul19, %if.else ], [ 0, %entry ] ret i64 %retval.0
/// }
///
/// Replace fp to integer with generated code.
static void expandFPToI(Instruction *FPToI, bool IsSaturating, bool IsSigned) {
// clang-format on
IRBuilder<> Builder(FPToI);
auto *FloatVal = FPToI->getOperand(0);
IntegerType *IntTy = cast<IntegerType>(FPToI->getType());
unsigned BitWidth = FPToI->getType()->getIntegerBitWidth();
unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
// FIXME: fp16's range is covered by i32. So `fptoi half` can convert
// to i32 first following a sext/zext to target integer type.
Value *A1 = nullptr;
if (FloatVal->getType()->isHalfTy() && BitWidth >= 32) {
if (FPToI->getOpcode() == Instruction::FPToUI) {
Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getInt32Ty());
A1 = Builder.CreateZExt(A0, IntTy);
} else { // FPToSI
Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getInt32Ty());
A1 = Builder.CreateSExt(A0, IntTy);
}
FPToI->replaceAllUsesWith(A1);
FPToI->dropAllReferences();
FPToI->eraseFromParent();
return;
}
// fp80 conversion is implemented by fpext to fp128 first then do the
// conversion.
FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
unsigned FloatWidth =
PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits());
unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
IntegerType *FloatIntTy = Builder.getIntNTy(FloatWidth);
Value *ImplicitBit = ConstantInt::get(
FloatIntTy, APInt::getOneBitSet(FloatWidth, FPMantissaWidth));
Value *SignificandMask = ConstantInt::get(
FloatIntTy, APInt::getLowBitsSet(FloatWidth, FPMantissaWidth));
BasicBlock *Entry = Builder.GetInsertBlock();
Function *F = Entry->getParent();
Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));
BasicBlock *CheckSaturateBB, *SaturateBB;
BasicBlock *End =
Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup");
if (IsSaturating) {
CheckSaturateBB = BasicBlock::Create(Builder.getContext(),
"fp-to-i-if-check.saturate", F, End);
SaturateBB =
BasicBlock::Create(Builder.getContext(), "fp-to-i-if-saturate", F, End);
}
BasicBlock *CheckExpSizeBB = BasicBlock::Create(
Builder.getContext(), "fp-to-i-if-check.exp.size", F, End);
BasicBlock *ExpSmallBB =
BasicBlock::Create(Builder.getContext(), "fp-to-i-if-exp.small", F, End);
BasicBlock *ExpLargeBB =
BasicBlock::Create(Builder.getContext(), "fp-to-i-if-exp.large", F, End);
Entry->getTerminator()->eraseFromParent();
// entry:
Builder.SetInsertPoint(Entry);
// We're going to introduce branches on the value, so freeze it.
if (!isGuaranteedNotToBeUndefOrPoison(FloatVal))
FloatVal = Builder.CreateFreeze(FloatVal);
// fp80 conversion is implemented by fpext to fp128 first then do the
// conversion.
if (FloatVal->getType()->isX86_FP80Ty())
FloatVal =
Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext()));
Value *ARep = Builder.CreateBitCast(FloatVal, FloatIntTy);
Value *PosOrNeg, *Sign;
if (IsSigned) {
PosOrNeg =
Builder.CreateICmpSGT(ARep, ConstantInt::getSigned(FloatIntTy, -1));
Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1),
ConstantInt::getSigned(IntTy, -1), "sign");
}
Value *And =
Builder.CreateLShr(ARep, Builder.getIntN(FloatWidth, FPMantissaWidth));
Value *BiasedExp = Builder.CreateAnd(
And, Builder.getIntN(FloatWidth, (1 << ExponentWidth) - 1), "biased.exp");
Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
Value *Significand = Builder.CreateOr(Abs, ImplicitBit, "significand");
Value *ZeroResultCond = Builder.CreateICmpULT(
BiasedExp, Builder.getIntN(FloatWidth, ExponentBias), "exp.is.negative");
if (IsSaturating) {
Value *IsNaN = Builder.CreateFCmpUNO(FloatVal, FloatVal, "is.nan");
ZeroResultCond = Builder.CreateOr(ZeroResultCond, IsNaN);
if (!IsSigned) {
Value *IsNeg = Builder.CreateIsNeg(ARep);
ZeroResultCond = Builder.CreateOr(ZeroResultCond, IsNeg);
}
}
Builder.CreateCondBr(ZeroResultCond, End,
IsSaturating ? CheckSaturateBB : CheckExpSizeBB);
Value *Saturated;
if (IsSaturating) {
// check.saturate:
Builder.SetInsertPoint(CheckSaturateBB);
Value *Cmp3 = Builder.CreateICmpUGE(
BiasedExp, ConstantInt::getSigned(
FloatIntTy, static_cast<int64_t>(ExponentBias +
BitWidth - IsSigned)));
Builder.CreateCondBr(Cmp3, SaturateBB, CheckExpSizeBB);
// saturate:
Builder.SetInsertPoint(SaturateBB);
if (IsSigned) {
Value *SignedMax =
ConstantInt::get(IntTy, APInt::getSignedMaxValue(BitWidth));
Value *SignedMin =
ConstantInt::get(IntTy, APInt::getSignedMinValue(BitWidth));
Saturated =
Builder.CreateSelect(PosOrNeg, SignedMax, SignedMin, "saturated");
} else {
Saturated = ConstantInt::getAllOnesValue(IntTy);
}
Builder.CreateBr(End);
}
// if.end9:
Builder.SetInsertPoint(CheckExpSizeBB);
Value *ExpSmallerMantissaWidth = Builder.CreateICmpULT(
BiasedExp, Builder.getIntN(FloatWidth, ExponentBias + FPMantissaWidth),
"exp.smaller.mantissa.width");
Builder.CreateCondBr(ExpSmallerMantissaWidth, ExpSmallBB, ExpLargeBB);
// exp.small:
Builder.SetInsertPoint(ExpSmallBB);
Value *Sub13 = Builder.CreateSub(
Builder.getIntN(FloatWidth, ExponentBias + FPMantissaWidth), BiasedExp);
Value *ExpSmallRes =
Builder.CreateZExtOrTrunc(Builder.CreateLShr(Significand, Sub13), IntTy);
if (IsSigned)
ExpSmallRes = Builder.CreateMul(ExpSmallRes, Sign);
Builder.CreateBr(End);
// exp.large:
Builder.SetInsertPoint(ExpLargeBB);
Value *Sub15 = Builder.CreateAdd(
BiasedExp,
ConstantInt::getSigned(
FloatIntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));
Value *SignificandCast = Builder.CreateZExtOrTrunc(Significand, IntTy);
Value *ExpLargeRes = Builder.CreateShl(
SignificandCast, Builder.CreateZExtOrTrunc(Sub15, IntTy));
if (IsSigned)
ExpLargeRes = Builder.CreateMul(ExpLargeRes, Sign);
Builder.CreateBr(End);
// cleanup:
Builder.SetInsertPoint(End, End->begin());
PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 3 + IsSaturating);
if (IsSaturating)
Retval0->addIncoming(Saturated, SaturateBB);
Retval0->addIncoming(ExpSmallRes, ExpSmallBB);
Retval0->addIncoming(ExpLargeRes, ExpLargeBB);
Retval0->addIncoming(Builder.getIntN(BitWidth, 0), Entry);
FPToI->replaceAllUsesWith(Retval0);
FPToI->dropAllReferences();
FPToI->eraseFromParent();
}
// clang-format off: preserve formatting of the following example
/// Generate code to convert a fp number to integer, replacing S(U)IToFP with
/// the generated code. This currently generates code similarly to compiler-rt's
/// implementations. This implementation has an implicit assumption that integer
/// width is larger than fp.
///
/// An example IR generated from compiler-rt/floatdisf.c looks like below:
/// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {
/// entry:
/// %cmp = icmp eq i64 %a, 0
/// br i1 %cmp, label %return, label %if.end
///
/// if.end: ; preds = %entry
/// %shr = ashr i64 %a, 63
/// %xor = xor i64 %shr, %a
/// %sub = sub nsw i64 %xor, %shr
/// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5
/// %cast = trunc i64 %0 to i32
/// %sub1 = sub nuw nsw i32 64, %cast
/// %sub2 = xor i32 %cast, 63
/// %cmp3 = icmp ult i32 %cast, 40
/// br i1 %cmp3, label %if.then4, label %if.else
///
/// if.then4: ; preds = %if.end
/// switch i32 %sub1, label %sw.default [
/// i32 25, label %sw.bb
/// i32 26, label %sw.epilog
/// ]
///
/// sw.bb: ; preds = %if.then4
/// %shl = shl i64 %sub, 1
/// br label %sw.epilog
///
/// sw.default: ; preds = %if.then4
/// %sub5 = sub nsw i64 38, %0
/// %sh_prom = and i64 %sub5, 4294967295
/// %shr6 = lshr i64 %sub, %sh_prom
/// %shr9 = lshr i64 274877906943, %0
/// %and = and i64 %shr9, %sub
/// %cmp10 = icmp ne i64 %and, 0
/// %conv11 = zext i1 %cmp10 to i64
/// %or = or i64 %shr6, %conv11
/// br label %sw.epilog
///
/// sw.epilog: ; preds = %sw.default,
/// %if.then4, %sw.bb
/// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl,
/// %sw.bb ] %1 = lshr i64 %a.addr.0, 2 %2 = and i64 %1, 1 %or16 = or i64 %2,
/// %a.addr.0 %inc = add nsw i64 %or16, 1 %3 = and i64 %inc, 67108864
/// %tobool.not = icmp eq i64 %3, 0
/// %spec.select.v = select i1 %tobool.not, i64 2, i64 3
/// %spec.select = ashr i64 %inc, %spec.select.v
/// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1
/// br label %if.end26
///
/// if.else: ; preds = %if.end
/// %sub23 = add nuw nsw i64 %0, 4294967256
/// %sh_prom24 = and i64 %sub23, 4294967295
/// %shl25 = shl i64 %sub, %sh_prom24
/// br label %if.end26
///
/// if.end26: ; preds = %sw.epilog,
/// %if.else
/// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]
/// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]
/// %conv27 = trunc i64 %shr to i32
/// %and28 = and i32 %conv27, -2147483648
/// %add = shl nuw nsw i32 %e.0, 23
/// %shl29 = add nuw nsw i32 %add, 1065353216
/// %conv31 = trunc i64 %a.addr.1 to i32
/// %and32 = and i32 %conv31, 8388607
/// %or30 = or i32 %and32, %and28
/// %or33 = or i32 %or30, %shl29
/// %4 = bitcast i32 %or33 to float
/// br label %return
///
/// return: ; preds = %entry,
/// %if.end26
/// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]
/// ret float %retval.0
/// }
///
/// Replace integer to fp with generated code.
static void expandIToFP(Instruction *IToFP) {
// clang-format on
IRBuilder<> Builder(IToFP);
auto *IntVal = IToFP->getOperand(0);
IntegerType *IntTy = cast<IntegerType>(IntVal->getType());
unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1;
// fp80 conversion is implemented by conversion tp fp128 first following
// a fptrunc to fp80.
FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
// FIXME: As there is no related builtins added in compliler-rt,
// here currently utilized the fp32 <-> fp16 lib calls to implement.
FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;
unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);
bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;
// We're going to introduce branches on the value, so freeze it.
if (!isGuaranteedNotToBeUndefOrPoison(IntVal))
IntVal = Builder.CreateFreeze(IntVal);
// The expansion below assumes that int width >= float width. Zero or sign
// extend the integer accordingly.
if (BitWidth < FloatWidth) {
BitWidth = FloatWidth;
IntTy = Builder.getIntNTy(BitWidth);
IntVal = Builder.CreateIntCast(IntVal, IntTy, IsSigned);
}
Value *Temp1 =
Builder.CreateShl(Builder.getIntN(BitWidth, 1),
Builder.getIntN(BitWidth, FPMantissaWidth + 3));
BasicBlock *Entry = Builder.GetInsertBlock();
Function *F = Entry->getParent();
Entry->setName(Twine(Entry->getName(), "itofp-entry"));
BasicBlock *End =
Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return");
BasicBlock *IfEnd =
BasicBlock::Create(Builder.getContext(), "itofp-if-end", F, End);
BasicBlock *IfThen4 =
BasicBlock::Create(Builder.getContext(), "itofp-if-then4", F, End);
BasicBlock *SwBB =
BasicBlock::Create(Builder.getContext(), "itofp-sw-bb", F, End);
BasicBlock *SwDefault =
BasicBlock::Create(Builder.getContext(), "itofp-sw-default", F, End);
BasicBlock *SwEpilog =
BasicBlock::Create(Builder.getContext(), "itofp-sw-epilog", F, End);
BasicBlock *IfThen20 =
BasicBlock::Create(Builder.getContext(), "itofp-if-then20", F, End);
BasicBlock *IfElse =
BasicBlock::Create(Builder.getContext(), "itofp-if-else", F, End);
BasicBlock *IfEnd26 =
BasicBlock::Create(Builder.getContext(), "itofp-if-end26", F, End);
Entry->getTerminator()->eraseFromParent();
Function *CTLZ =
Intrinsic::getOrInsertDeclaration(F->getParent(), Intrinsic::ctlz, IntTy);
ConstantInt *True = Builder.getTrue();
// entry:
Builder.SetInsertPoint(Entry);
Value *Cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0));
Builder.CreateCondBr(Cmp, End, IfEnd);
// if.end:
Builder.SetInsertPoint(IfEnd);
Value *Shr =
Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1));
Value *Xor = Builder.CreateXor(Shr, IntVal);
Value *Sub = Builder.CreateSub(Xor, Shr);
Value *Call = Builder.CreateCall(CTLZ, {IsSigned ? Sub : IntVal, True});
Value *Cast = Builder.CreateTrunc(Call, Builder.getInt32Ty());
int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;
Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth),
FloatWidth == 128 ? Call : Cast);
Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1),
FloatWidth == 128 ? Call : Cast);
Value *Cmp3 = Builder.CreateICmpSGT(
Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
// if.then4:
Builder.SetInsertPoint(IfThen4);
SwitchInst *SI = Builder.CreateSwitch(Sub1, SwDefault);
SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);
SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);
// sw.bb:
Builder.SetInsertPoint(SwBB);
Value *Shl =
Builder.CreateShl(IsSigned ? Sub : IntVal, Builder.getIntN(BitWidth, 1));
Builder.CreateBr(SwEpilog);
// sw.default:
Builder.SetInsertPoint(SwDefault);
Value *Sub5 = Builder.CreateSub(
Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3),
FloatWidth == 128 ? Call : Cast);
Value *ShProm = Builder.CreateZExt(Sub5, IntTy);
Value *Shr6 = Builder.CreateLShr(IsSigned ? Sub : IntVal,
FloatWidth == 128 ? Sub5 : ShProm);
Value *Sub8 =
Builder.CreateAdd(FloatWidth == 128 ? Call : Cast,
Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));
Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);
Value *Shr9 = Builder.CreateLShr(ConstantInt::getSigned(IntTy, -1),
FloatWidth == 128 ? Sub8 : ShProm9);
Value *And = Builder.CreateAnd(Shr9, IsSigned ? Sub : IntVal);
Value *Cmp10 = Builder.CreateICmpNE(And, Builder.getIntN(BitWidth, 0));
Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);
Value *Or = Builder.CreateOr(Shr6, Conv11);
Builder.CreateBr(SwEpilog);
// sw.epilog:
Builder.SetInsertPoint(SwEpilog);
PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);
AAddr0->addIncoming(Or, SwDefault);
AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4);
AAddr0->addIncoming(Shl, SwBB);
Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());
Value *A1 = Builder.CreateLShr(A0, Builder.getInt32(2));
Value *A2 = Builder.CreateAnd(A1, Builder.getInt32(1));
Value *Conv16 = Builder.CreateZExt(A2, IntTy);
Value *Or17 = Builder.CreateOr(AAddr0, Conv16);
Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1));
Value *Shr18 = nullptr;
if (IsSigned)
Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 2));
else
Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 2));
Value *A3 = Builder.CreateAnd(Inc, Temp1, "a3");
Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(BitWidth, 0));
Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));
Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(BitWidth, 32));
Value *ExtractT64 = nullptr;
if (FloatWidth > 80)
ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
else
ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());
Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
// if.then20
Builder.SetInsertPoint(IfThen20);
Value *Shr21 = nullptr;
if (IsSigned)
Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 3));
else
Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 3));
Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));
Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32));
Value *ExtractT62 = nullptr;
if (FloatWidth > 80)
ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getInt64Ty());
else
ExtractT62 = Builder.CreateTrunc(Extract, Builder.getInt32Ty());
Builder.CreateBr(IfEnd26);
// if.else:
Builder.SetInsertPoint(IfElse);
Value *Sub24 = Builder.CreateAdd(
FloatWidth == 128 ? Call : Cast,
ConstantInt::getSigned(Builder.getIntNTy(BitWidthNew),
-(int)(BitWidth - FPMantissaWidth - 1)));
Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);
Value *Shl26 = Builder.CreateShl(IsSigned ? Sub : IntVal,
FloatWidth == 128 ? Sub24 : ShProm25);
Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));
Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32));
Value *ExtractT66 = nullptr;
if (FloatWidth > 80)
ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
else
ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());
Builder.CreateBr(IfEnd26);
// if.end26:
Builder.SetInsertPoint(IfEnd26);
PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);
AAddr1Off0->addIncoming(ExtractT, IfThen20);
AAddr1Off0->addIncoming(ExtractT60, SwEpilog);
AAddr1Off0->addIncoming(ExtractT61, IfElse);
PHINode *AAddr1Off32 = nullptr;
if (FloatWidth > 32) {
AAddr1Off32 =
Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);
AAddr1Off32->addIncoming(ExtractT62, IfThen20);
AAddr1Off32->addIncoming(ExtractT64, SwEpilog);
AAddr1Off32->addIncoming(ExtractT66, IfElse);
}
PHINode *E0 = nullptr;
if (FloatWidth <= 80) {
E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);
E0->addIncoming(Sub1, IfThen20);
E0->addIncoming(Sub2, SwEpilog);
E0->addIncoming(Sub2, IfElse);
}
Value *And29 = nullptr;
if (FloatWidth > 80) {
Value *Temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1),
Builder.getIntN(BitWidth, 63));
And29 = Builder.CreateAnd(Shr, Temp2, "and29");
} else {
Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getInt32Ty());
And29 = Builder.CreateAnd(
Conv28, ConstantInt::get(Builder.getContext(), APInt::getSignMask(32)));
}
unsigned TempMod = FPMantissaWidth % 32;
Value *And34 = nullptr;
Value *Shl30 = nullptr;
if (FloatWidth > 80) {
TempMod += 32;
Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getInt64(TempMod));
Shl30 = Builder.CreateAdd(
Add, Builder.getInt64(((1ull << (62ull - TempMod)) - 1ull) << TempMod));
And34 = Builder.CreateZExt(Shl30, Builder.getInt128Ty());
} else {
Value *Add = Builder.CreateShl(E0, Builder.getInt32(TempMod));
Shl30 = Builder.CreateAdd(
Add, Builder.getInt32(((1 << (30 - TempMod)) - 1) << TempMod));
And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
Builder.getInt32((1 << TempMod) - 1));
}
Value *Or35 = nullptr;
if (FloatWidth > 80) {
Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getInt128Ty());
Value *Or31 = Builder.CreateOr(And29Trunc, And34);
Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));
Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),
Builder.getIntN(128, FPMantissaWidth));
Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));
Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);
Or35 = Builder.CreateOr(Or34, A6);
} else {
Value *Or31 = Builder.CreateOr(And34, And29);
Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);
}
Value *A4 = nullptr;
if (IToFP->getType()->isDoubleTy()) {
Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));
Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));
Value *And1 =
Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));
Value *Or1 = Builder.CreateOr(Shl1, And1);
A4 = Builder.CreateBitCast(Or1, IToFP->getType());
} else if (IToFP->getType()->isX86_FP80Ty()) {
Value *A40 =
Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext()));
A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
} else if (IToFP->getType()->isHalfTy() || IToFP->getType()->isBFloatTy()) {
// Deal with "half" situation. This is a workaround since we don't have
// floattihf.c currently as referring.
Value *A40 =
Builder.CreateBitCast(Or35, Type::getFloatTy(Builder.getContext()));
A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
} else // float type
A4 = Builder.CreateBitCast(Or35, IToFP->getType());
Builder.CreateBr(End);
// return:
Builder.SetInsertPoint(End, End->begin());
PHINode *Retval0 = Builder.CreatePHI(IToFP->getType(), 2);
Retval0->addIncoming(A4, IfEnd26);
Retval0->addIncoming(ConstantFP::getZero(IToFP->getType(), false), Entry);
IToFP->replaceAllUsesWith(Retval0);
IToFP->dropAllReferences();
IToFP->eraseFromParent();
}
static void scalarize(Instruction *I,
SmallVectorImpl<Instruction *> &Worklist) {
VectorType *VTy = cast<FixedVectorType>(I->getType());
IRBuilder<> Builder(I);
unsigned NumElements = VTy->getElementCount().getFixedValue();
Value *Result = PoisonValue::get(VTy);
for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx);
Value *NewOp = nullptr;
if (auto *BinOp = dyn_cast<BinaryOperator>(I))
NewOp = Builder.CreateBinOp(
BinOp->getOpcode(), Ext,
Builder.CreateExtractElement(I->getOperand(1), Idx));
else if (auto *CastI = dyn_cast<CastInst>(I))
NewOp = Builder.CreateCast(CastI->getOpcode(), Ext,
I->getType()->getScalarType());
else
llvm_unreachable("Unsupported instruction type");
Result = Builder.CreateInsertElement(Result, NewOp, Idx);
if (auto *ScalarizedI = dyn_cast<Instruction>(NewOp)) {
ScalarizedI->copyIRFlags(I, true);
Worklist.push_back(ScalarizedI);
}
}
I->replaceAllUsesWith(Result);
I->dropAllReferences();
I->eraseFromParent();
}
static void addToWorklist(Instruction &I,
SmallVector<Instruction *, 4> &Worklist) {
if (I.getOperand(0)->getType()->isVectorTy())
scalarize(&I, Worklist);
else
Worklist.push_back(&I);
}
static bool runImpl(Function &F, const TargetLowering &TLI,
const LibcallLoweringInfo &Libcalls, AssumptionCache *AC) {
SmallVector<Instruction *, 4> Worklist;
unsigned MaxLegalFpConvertBitWidth =
TLI.getMaxLargeFPConvertBitWidthSupported();
if (ExpandFpConvertBits != IntegerType::MAX_INT_BITS)
MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
unsigned MaxLegalDivRemBitWidth = TLI.getMaxDivRemBitWidthSupported();
if (ExpandDivRemBits != IntegerType::MAX_INT_BITS)
MaxLegalDivRemBitWidth = ExpandDivRemBits;
bool DisableExpandLargeFp =
MaxLegalFpConvertBitWidth >= IntegerType::MAX_INT_BITS;
bool DisableExpandLargeDivRem =
MaxLegalDivRemBitWidth >= IntegerType::MAX_INT_BITS;
bool DisableFrem = !FRemExpander::shouldExpandAnyFremType(TLI);
if (DisableExpandLargeFp && DisableFrem && DisableExpandLargeDivRem)
return false;
auto ShouldHandleInst = [&](Instruction &I) {
Type *Ty = I.getType();
// TODO: This pass doesn't handle scalable vectors.
if (Ty->isScalableTy())
return false;
switch (I.getOpcode()) {
case Instruction::FRem:
return !DisableFrem && FRemExpander::shouldExpandFremType(TLI, Ty);
case Instruction::FPToUI:
case Instruction::FPToSI:
return !DisableExpandLargeFp &&
cast<IntegerType>(Ty->getScalarType())->getIntegerBitWidth() >
MaxLegalFpConvertBitWidth;
case Instruction::UIToFP:
case Instruction::SIToFP:
return !DisableExpandLargeFp &&
cast<IntegerType>(I.getOperand(0)->getType()->getScalarType())
->getIntegerBitWidth() > MaxLegalFpConvertBitWidth;
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::URem:
case Instruction::SRem:
// Power-of-2 divisors are handled inside the expansion (via efficient
// shift/mask sequences) rather than being excluded here, so that
// backends that cannot lower wide div/rem even for powers of two
// (e.g. when DAGCombiner is disabled) still get valid lowered code.
return !DisableExpandLargeDivRem &&
cast<IntegerType>(Ty->getScalarType())->getIntegerBitWidth() >
MaxLegalDivRemBitWidth;
case Instruction::Call: {
auto *II = dyn_cast<IntrinsicInst>(&I);
if (II && (II->getIntrinsicID() == Intrinsic::fptoui_sat ||
II->getIntrinsicID() == Intrinsic::fptosi_sat)) {
return !DisableExpandLargeFp &&
cast<IntegerType>(Ty->getScalarType())->getIntegerBitWidth() >
MaxLegalFpConvertBitWidth;
}
return false;
}
}
return false;
};
bool Modified = false;
for (auto It = inst_begin(&F), End = inst_end(F); It != End;) {
Instruction &I = *It++;
if (!ShouldHandleInst(I))
continue;
addToWorklist(I, Worklist);
Modified = true;
}
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
switch (I->getOpcode()) {
case Instruction::FRem: {
auto SQ = [&]() -> std::optional<SimplifyQuery> {
if (AC) {
auto Res = std::make_optional<SimplifyQuery>(
I->getModule()->getDataLayout(), I);
Res->AC = AC;
return Res;
}
return {};
}();
expandFRem(cast<BinaryOperator>(*I), SQ);
break;
}
case Instruction::FPToUI:
expandFPToI(I, /*IsSaturating=*/false, /*IsSigned=*/false);
break;
case Instruction::FPToSI:
expandFPToI(I, /*IsSaturating=*/false, /*IsSigned=*/true);
break;
case Instruction::UIToFP:
case Instruction::SIToFP:
expandIToFP(I);
break;
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::URem:
case Instruction::SRem: {
auto *BO = cast<BinaryOperator>(I);
// TODO: isConstantPowerOfTwo does not handle vector constants, so
// vector div/rem by a power-of-2 splat goes through the generic path.
if (isConstantPowerOfTwo(BO->getOperand(1), isSigned(BO->getOpcode()))) {
expandPow2DivRem(BO);
} else {
unsigned Opc = BO->getOpcode();
if (Opc == Instruction::UDiv || Opc == Instruction::SDiv)
expandDivision(BO);
else
expandRemainder(BO);
}
break;
}
case Instruction::Call: {
auto *II = cast<IntrinsicInst>(I);
assert(II->getIntrinsicID() == Intrinsic::fptoui_sat ||
II->getIntrinsicID() == Intrinsic::fptosi_sat);
expandFPToI(I, /*IsSaturating=*/true,
/*IsSigned=*/II->getIntrinsicID() == Intrinsic::fptosi_sat);
break;
}
}
}
return Modified;
}
namespace {
class ExpandIRInstsLegacyPass : public FunctionPass {
CodeGenOptLevel OptLevel;
public:
static char ID;
ExpandIRInstsLegacyPass(CodeGenOptLevel OptLevel)
: FunctionPass(ID), OptLevel(OptLevel) {}
ExpandIRInstsLegacyPass() : ExpandIRInstsLegacyPass(CodeGenOptLevel::None) {}
bool runOnFunction(Function &F) override {
auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
const TargetSubtargetInfo *Subtarget = TM->getSubtargetImpl(F);
auto *TLI = Subtarget->getTargetLowering();
AssumptionCache *AC = nullptr;
const LibcallLoweringInfo &Libcalls =
getAnalysis<LibcallLoweringInfoWrapper>().getLibcallLowering(
*F.getParent(), *Subtarget);
if (OptLevel != CodeGenOptLevel::None && !F.hasOptNone())
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
return runImpl(F, *TLI, Libcalls, AC);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LibcallLoweringInfoWrapper>();
AU.addRequired<TargetPassConfig>();
if (OptLevel != CodeGenOptLevel::None)
AU.addRequired<AssumptionCacheTracker>();
AU.addPreserved<AAResultsWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addRequired<LibcallLoweringInfoWrapper>();
}
};
} // namespace
ExpandIRInstsPass::ExpandIRInstsPass(const TargetMachine &TM,
CodeGenOptLevel OptLevel)
: TM(&TM), OptLevel(OptLevel) {}
void ExpandIRInstsPass::printPipeline(
raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
static_cast<PassInfoMixin<ExpandIRInstsPass> *>(this)->printPipeline(
OS, MapClassName2PassName);
OS << '<';
OS << "O" << (int)OptLevel;
OS << '>';
}
PreservedAnalyses ExpandIRInstsPass::run(Function &F,
FunctionAnalysisManager &FAM) {
const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F);
auto &TLI = *STI->getTargetLowering();
AssumptionCache *AC = nullptr;
if (OptLevel != CodeGenOptLevel::None)
AC = &FAM.getResult<AssumptionAnalysis>(F);
auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
const LibcallLoweringModuleAnalysisResult *LibcallLowering =
MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(*F.getParent());
if (!LibcallLowering) {
F.getContext().emitError("'" + LibcallLoweringModuleAnalysis::name() +
"' analysis required");
return PreservedAnalyses::all();
}
const LibcallLoweringInfo &Libcalls =
LibcallLowering->getLibcallLowering(*STI);
return runImpl(F, TLI, Libcalls, AC) ? PreservedAnalyses::none()
: PreservedAnalyses::all();
}
char ExpandIRInstsLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(ExpandIRInstsLegacyPass, "expand-ir-insts",
"Expand certain fp instructions", false, false)
INITIALIZE_PASS_DEPENDENCY(LibcallLoweringInfoWrapper)
INITIALIZE_PASS_END(ExpandIRInstsLegacyPass, "expand-ir-insts",
"Expand IR instructions", false, false)
FunctionPass *llvm::createExpandIRInstsPass(CodeGenOptLevel OptLevel) {
return new ExpandIRInstsLegacyPass(OptLevel);
}