[RISCV] Fix IDiv/IRem scheduling data for RV32 cores that use the SiFive7 model (#187331)
The integer division and remainder instructions on a 32-bit core that uses SiFive7 scheduling model should have the same latency and throughput as its word counterparts on a 64-bit SiFive7 core. This patch fixes those scheduling entries by adding a new SchedPred that predicates on `Feature64Bit` to toggle the SchedVariant that is attached on the affected integer division / remainder instructions.
This commit is contained in:
@@ -23,6 +23,10 @@ def VLDSX0Pred
|
||||
// is enabled.
|
||||
def SingleElementVecFP64SchedPred : FeatureSchedPredicate<TuneHasSingleElementVecFP64>;
|
||||
|
||||
// This scheduling predicate is true when the subtarget is RV32 (i.e. Feature64Bit
|
||||
// is not enabled).
|
||||
def RV32SchedPred : NotSchedPred<FeatureSchedPredicate<Feature64Bit>>;
|
||||
|
||||
// Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
|
||||
def isSEXT_W
|
||||
: TIIPredicate<"isSEXT_W",
|
||||
|
||||
@@ -346,21 +346,30 @@ multiclass SiFive7WriteResBase<int VLEN,
|
||||
def : WriteRes<WriteIMul32, [PipeB]>;
|
||||
}
|
||||
|
||||
// Integer division
|
||||
def : WriteRes<WriteIDiv, [PipeB, IDiv]> {
|
||||
// IDiv / IRem in a 32-bit core should have the same latency and throughput
|
||||
// as IDiv32 / IRem32 in a 64-bit core.
|
||||
def SiFive7RV64IDivRemSchedWriteRes : SchedWriteRes<[PipeB, IDiv]> {
|
||||
let Latency = 66;
|
||||
let ReleaseAtCycles = [1, 65];
|
||||
}
|
||||
def SiFive7RV32IDivRemSchedWriteRes : SchedWriteRes<[PipeB, IDiv]> {
|
||||
let Latency = 34;
|
||||
let ReleaseAtCycles = [1, 33];
|
||||
}
|
||||
def SiFive7IDivRemSchedWriteVariant : SchedWriteVariant<[
|
||||
SchedVar<RV32SchedPred, [!cast<SchedWriteRes>(NAME # "SiFive7RV32IDivRemSchedWriteRes")]>,
|
||||
SchedVar<NoSchedPred, [!cast<SchedWriteRes>(NAME # "SiFive7RV64IDivRemSchedWriteRes")]>
|
||||
]>;
|
||||
|
||||
// Integer division
|
||||
def : SchedAlias<WriteIDiv, !cast<SchedWrite>(NAME # "SiFive7IDivRemSchedWriteVariant")>;
|
||||
def : WriteRes<WriteIDiv32, [PipeB, IDiv]> {
|
||||
let Latency = 34;
|
||||
let ReleaseAtCycles = [1, 33];
|
||||
}
|
||||
|
||||
// Integer remainder
|
||||
def : WriteRes<WriteIRem, [PipeB, IDiv]> {
|
||||
let Latency = 66;
|
||||
let ReleaseAtCycles = [1, 65];
|
||||
}
|
||||
def : SchedAlias<WriteIRem, !cast<SchedWrite>(NAME # "SiFive7IDivRemSchedWriteVariant")>;
|
||||
def : WriteRes<WriteIRem32, [PipeB, IDiv]> {
|
||||
let Latency = 34;
|
||||
let ReleaseAtCycles = [1, 33];
|
||||
|
||||
10
llvm/test/tools/llvm-mca/RISCV/Inputs/mul-div-rv32.s
Normal file
10
llvm/test/tools/llvm-mca/RISCV/Inputs/mul-div-rv32.s
Normal file
@@ -0,0 +1,10 @@
|
||||
# Input instructions for the 'M' extension.
|
||||
|
||||
mul a0, a0, a0
|
||||
mulh a0, a0, a0
|
||||
mulhu a0, a0, a0
|
||||
mulhsu a0, a0, a0
|
||||
div a0, a1, a2
|
||||
divu a0, a1, a2
|
||||
rem a0, a1, a2
|
||||
remu a0, a1, a2
|
||||
59
llvm/test/tools/llvm-mca/RISCV/SiFive7/mul-div-rv32.test
Normal file
59
llvm/test/tools/llvm-mca/RISCV/SiFive7/mul-div-rv32.test
Normal file
@@ -0,0 +1,59 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=riscv32 -mcpu=sifive-e76 -iterations=1 -instruction-tables=full %p/../Inputs/mul-div-rv32.s | FileCheck %s
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - VLEN512SiFive7FDiv:1
|
||||
# CHECK-NEXT: [1] - VLEN512SiFive7IDiv:1
|
||||
# CHECK-NEXT: [2] - VLEN512SiFive7PipeA:1
|
||||
# CHECK-NEXT: [3] - VLEN512SiFive7PipeAB:2 VLEN512SiFive7PipeA, VLEN512SiFive7PipeB
|
||||
# CHECK-NEXT: [4] - VLEN512SiFive7PipeB:1
|
||||
# CHECK-NEXT: [5] - VLEN512SiFive7VA1:1
|
||||
# CHECK-NEXT: [6] - VLEN512SiFive7VCQ:1
|
||||
# CHECK-NEXT: [7] - VLEN512SiFive7VL:1
|
||||
# CHECK-NEXT: [8] - VLEN512SiFive7VS:1
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
# CHECK-NEXT: [3]: RThroughput
|
||||
# CHECK-NEXT: [4]: MayLoad
|
||||
# CHECK-NEXT: [5]: MayStore
|
||||
# CHECK-NEXT: [6]: HasSideEffects (U)
|
||||
# CHECK-NEXT: [7]: Bypass Latency
|
||||
# CHECK-NEXT: [8]: Resources (<Name> | <Name>[<ReleaseAtCycle>] | <Name>[<AcquireAtCycle>,<ReleaseAtCycle])
|
||||
# CHECK-NEXT: [9]: LLVM Opcode Name
|
||||
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
|
||||
# CHECK-NEXT: 1 3 1.00 3 VLEN512SiFive7PipeAB,VLEN512SiFive7PipeB MUL mul a0, a0, a0
|
||||
# CHECK-NEXT: 1 3 1.00 3 VLEN512SiFive7PipeAB,VLEN512SiFive7PipeB MULH mulh a0, a0, a0
|
||||
# CHECK-NEXT: 1 3 1.00 3 VLEN512SiFive7PipeAB,VLEN512SiFive7PipeB MULHU mulhu a0, a0, a0
|
||||
# CHECK-NEXT: 1 3 1.00 3 VLEN512SiFive7PipeAB,VLEN512SiFive7PipeB MULHSU mulhsu a0, a0, a0
|
||||
# CHECK-NEXT: 1 34 33.00 34 VLEN512SiFive7IDiv[33],VLEN512SiFive7PipeAB,VLEN512SiFive7PipeB DIV div a0, a1, a2
|
||||
# CHECK-NEXT: 1 34 33.00 34 VLEN512SiFive7IDiv[33],VLEN512SiFive7PipeAB,VLEN512SiFive7PipeB DIVU divu a0, a1, a2
|
||||
# CHECK-NEXT: 1 34 33.00 34 VLEN512SiFive7IDiv[33],VLEN512SiFive7PipeAB,VLEN512SiFive7PipeB REM rem a0, a1, a2
|
||||
# CHECK-NEXT: 1 34 33.00 34 VLEN512SiFive7IDiv[33],VLEN512SiFive7PipeAB,VLEN512SiFive7PipeB REMU remu a0, a1, a2
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - VLEN512SiFive7FDiv
|
||||
# CHECK-NEXT: [1] - VLEN512SiFive7IDiv
|
||||
# CHECK-NEXT: [2] - VLEN512SiFive7PipeA
|
||||
# CHECK-NEXT: [3] - VLEN512SiFive7PipeB
|
||||
# CHECK-NEXT: [4] - VLEN512SiFive7VA1
|
||||
# CHECK-NEXT: [5] - VLEN512SiFive7VCQ
|
||||
# CHECK-NEXT: [6] - VLEN512SiFive7VL
|
||||
# CHECK-NEXT: [7] - VLEN512SiFive7VS
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
|
||||
# CHECK-NEXT: - 132.00 - 8.00 - - - -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
|
||||
# CHECK-NEXT: - - - 1.00 - - - - mul a0, a0, a0
|
||||
# CHECK-NEXT: - - - 1.00 - - - - mulh a0, a0, a0
|
||||
# CHECK-NEXT: - - - 1.00 - - - - mulhu a0, a0, a0
|
||||
# CHECK-NEXT: - - - 1.00 - - - - mulhsu a0, a0, a0
|
||||
# CHECK-NEXT: - 33.00 - 1.00 - - - - div a0, a1, a2
|
||||
# CHECK-NEXT: - 33.00 - 1.00 - - - - divu a0, a1, a2
|
||||
# CHECK-NEXT: - 33.00 - 1.00 - - - - rem a0, a1, a2
|
||||
# CHECK-NEXT: - 33.00 - 1.00 - - - - remu a0, a1, a2
|
||||
Reference in New Issue
Block a user