This block of code is currently conditional on the fusions being enabled but as far as I can tell, does no harm to generally enable. The net effect is the generically compiled code runs slightly better on machines with this fusion. The actual motivation is merely to stop confusing myself when I see the sequence in code; the register allocators choice to sometimes blow two registers instead of one is just generally weird, and my eyes spot it when scanning disassembly. (Note that this is just the regalloc hint; the scheduling changes remain conditional, and probably should remain so.)
57 lines
2.0 KiB
LLVM
57 lines
2.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: llc -mtriple=riscv32 -mattr=+zcmp,+e -target-abi ilp32e -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
|
|
|
|
declare i32 @__mulsi3(i32, i32)
|
|
|
|
define ptr @func(ptr %s, i32 %_c, ptr %incdec.ptr, i1 %0, i8 %conv14) #0 {
|
|
; RV32-LABEL: func:
|
|
; RV32: # %bb.0: # %entry
|
|
; RV32-NEXT: cm.push {ra, s0-s1}, -16
|
|
; RV32-NEXT: .cfi_def_cfa_offset 16
|
|
; RV32-NEXT: .cfi_offset ra, -12
|
|
; RV32-NEXT: .cfi_offset s0, -8
|
|
; RV32-NEXT: .cfi_offset s1, -4
|
|
; RV32-NEXT: addi sp, sp, -8
|
|
; RV32-NEXT: .cfi_def_cfa_offset 24
|
|
; RV32-NEXT: sw a4, 4(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: sw a2, 0(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: mv a2, a1
|
|
; RV32-NEXT: mv s1, a0
|
|
; RV32-NEXT: li a0, 1
|
|
; RV32-NEXT: andi a3, a3, 1
|
|
; RV32-NEXT: .LBB0_1: # %while.body
|
|
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; RV32-NEXT: mv s0, a0
|
|
; RV32-NEXT: li a0, 0
|
|
; RV32-NEXT: bnez a3, .LBB0_1
|
|
; RV32-NEXT: # %bb.2: # %while.end
|
|
; RV32-NEXT: lui a1, 4112
|
|
; RV32-NEXT: addi a1, a1, 257
|
|
; RV32-NEXT: mv a0, a2
|
|
; RV32-NEXT: call __mulsi3
|
|
; RV32-NEXT: sw a0, 0(zero)
|
|
; RV32-NEXT: andi s0, s0, 1
|
|
; RV32-NEXT: lw a0, 0(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: add s0, s0, a0
|
|
; RV32-NEXT: lw a0, 4(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: sb a0, 0(s0)
|
|
; RV32-NEXT: mv a0, s1
|
|
; RV32-NEXT: addi sp, sp, 8
|
|
; RV32-NEXT: .cfi_def_cfa_offset 16
|
|
; RV32-NEXT: cm.popret {ra, s0-s1}, 16
|
|
entry:
|
|
br label %while.body
|
|
|
|
while.body: ; preds = %while.body, %entry
|
|
%n.addr.042 = phi i32 [ 1, %entry ], [ 0, %while.body ]
|
|
br i1 %0, label %while.body, label %while.end
|
|
|
|
while.end: ; preds = %while.body
|
|
%mul_result = call i32 @__mulsi3(i32 %_c, i32 16843009)
|
|
store i32 %mul_result, ptr null, align 4
|
|
%1 = and i32 %n.addr.042, 1
|
|
%scevgep = getelementptr i8, ptr %incdec.ptr, i32 %1
|
|
store i8 %conv14, ptr %scevgep, align 1
|
|
ret ptr %s
|
|
}
|