Files
llvm-project/llvm/test/CodeGen/Mips/unalignedload.ll
JaberwockySeamonstah 458e9c452c Prevent undefined behavior caused by combination of branch and load delay slots on MIPS1 (#185427)
Under certain conditions the LLVM `MipsDelaySlotFiller` fills a branch
delay slot with an instruction requiring a load delay slot. However the
`MipsDelaySlotFiller` does not check the filled instruction for hazard
which leads to code like this:
```asm
	beqz	$1, $BB0_5
	lbu	$2, %lo(_RNvCs5jWYnRsDZoD_3app13CONTROLLERS_A)($2)
# --- Some other instructions
$BB0_5:
	andi	$1, $2, 1
```
`lbu` got moved into the branch delay slot but has a load delay slot -
so when jumping to `$BB0_5` the value for `$2` will not be ready, which
leads to undefined behavior.

This PR suggests to declare instructions with a load delay slot to be
hazardous for the branch delay slot, only for `MIPS1`. This will prevent
the load instructions in the branch delay slot, which has a slight
impact on the optimization.

Ideally in case of a load instruction in a branch delay slot, we would
want to check the target register and check if it is used in the
following instruction and at the branch destination instruction. Code
for this is already in place from a previous PR (`bool
MipsInstrInfo::SafeInLoadDelaySlot(const MachineInstr &MIInSlot, const
MachineInstr &LoadMI) const`), however I'm not experienced enough with
the LLVM to identify the `MachineInstr` required for that ideal
situation.

If I could get some feedback about this I might be able to stitch it in.

The original issue came from Rust and is described [here rust issue
150676](https://github.com/rust-lang/rust/issues/150676). It was then
raised in the LLVM project [here issue
180639](https://github.com/llvm/llvm-project/issues/180639#issuecomment-3874380424)
and in the forum
[here](https://discourse.llvm.org/t/where-to-start-fixing-an-opt-pass-for-mips1/89857).

Co-authored-by: Jaby <jaby@william.zone>
2026-04-21 09:32:35 +08:00

255 lines
9.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=mipsel-elf -mcpu=mips32 -relocation-model=pic | FileCheck %s -check-prefixes=MIPS32-EL
; RUN: llc < %s -mtriple=mips-elf -mcpu=mips32 -relocation-model=pic | FileCheck %s -check-prefixes=MIPS32-EB
; RUN: llc < %s -mtriple=mipsel-elf -mcpu=mips32r2 -relocation-model=pic | FileCheck %s -check-prefixes=MIPS32-EL
; RUN: llc < %s -mtriple=mips-elf -mcpu=mips32r2 -relocation-model=pic | FileCheck %s -check-prefixes=MIPS32-EB
; RUN: llc < %s -mtriple=mipsel-elf -mcpu=mips32r6 -relocation-model=pic | FileCheck %s -check-prefixes=MIPS32R6-EL
; RUN: llc < %s -mtriple=mips-elf -mcpu=mips32r6 -relocation-model=pic | FileCheck %s -check-prefixes=MIPS32R6-EB
; RUN: llc < %s -mtriple=mipsel-sony-psx -mcpu=mips1 -relocation-model=pic | FileCheck %s -check-prefixes=MIPS1-PSX
%struct.S2 = type { %struct.S1, %struct.S1 }
%struct.S1 = type { i8, i8 }
%struct.S4 = type { [7 x i8] }
@s2 = common global %struct.S2 zeroinitializer, align 1
@s4 = common global %struct.S4 zeroinitializer, align 1
define void @bar1() nounwind {
; MIPS32-EL-LABEL: bar1:
; MIPS32-EL: # %bb.0: # %entry
; MIPS32-EL-NEXT: lui $2, %hi(_gp_disp)
; MIPS32-EL-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32-EL-NEXT: addiu $sp, $sp, -24
; MIPS32-EL-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
; MIPS32-EL-NEXT: addu $gp, $2, $25
; MIPS32-EL-NEXT: lw $1, %got(s2)($gp)
; MIPS32-EL-NEXT: lbu $2, 2($1)
; MIPS32-EL-NEXT: lbu $1, 3($1)
; MIPS32-EL-NEXT: sll $1, $1, 8
; MIPS32-EL-NEXT: lw $25, %call16(foo2)($gp)
; MIPS32-EL-NEXT: .reloc $tmp0, R_MIPS_JALR, foo2
; MIPS32-EL-NEXT: $tmp0:
; MIPS32-EL-NEXT: jalr $25
; MIPS32-EL-NEXT: or $4, $1, $2
; MIPS32-EL-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
; MIPS32-EL-NEXT: jr $ra
; MIPS32-EL-NEXT: addiu $sp, $sp, 24
;
; MIPS32-EB-LABEL: bar1:
; MIPS32-EB: # %bb.0: # %entry
; MIPS32-EB-NEXT: lui $2, %hi(_gp_disp)
; MIPS32-EB-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32-EB-NEXT: addiu $sp, $sp, -24
; MIPS32-EB-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
; MIPS32-EB-NEXT: addu $gp, $2, $25
; MIPS32-EB-NEXT: lw $1, %got(s2)($gp)
; MIPS32-EB-NEXT: lbu $2, 3($1)
; MIPS32-EB-NEXT: sll $2, $2, 16
; MIPS32-EB-NEXT: lbu $1, 2($1)
; MIPS32-EB-NEXT: sll $1, $1, 24
; MIPS32-EB-NEXT: lw $25, %call16(foo2)($gp)
; MIPS32-EB-NEXT: .reloc $tmp0, R_MIPS_JALR, foo2
; MIPS32-EB-NEXT: $tmp0:
; MIPS32-EB-NEXT: jalr $25
; MIPS32-EB-NEXT: or $4, $1, $2
; MIPS32-EB-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
; MIPS32-EB-NEXT: jr $ra
; MIPS32-EB-NEXT: addiu $sp, $sp, 24
;
; MIPS32R6-EL-LABEL: bar1:
; MIPS32R6-EL: # %bb.0: # %entry
; MIPS32R6-EL-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6-EL-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6-EL-NEXT: addiu $sp, $sp, -24
; MIPS32R6-EL-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
; MIPS32R6-EL-NEXT: addu $gp, $2, $25
; MIPS32R6-EL-NEXT: lw $1, %got(s2)($gp)
; MIPS32R6-EL-NEXT: lhu $4, 2($1)
; MIPS32R6-EL-NEXT: lw $25, %call16(foo2)($gp)
; MIPS32R6-EL-NEXT: .reloc $tmp0, R_MIPS_JALR, foo2
; MIPS32R6-EL-NEXT: $tmp0:
; MIPS32R6-EL-NEXT: jalrc $25
; MIPS32R6-EL-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
; MIPS32R6-EL-NEXT: jr $ra
; MIPS32R6-EL-NEXT: addiu $sp, $sp, 24
;
; MIPS32R6-EB-LABEL: bar1:
; MIPS32R6-EB: # %bb.0: # %entry
; MIPS32R6-EB-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6-EB-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6-EB-NEXT: addiu $sp, $sp, -24
; MIPS32R6-EB-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
; MIPS32R6-EB-NEXT: addu $gp, $2, $25
; MIPS32R6-EB-NEXT: lw $1, %got(s2)($gp)
; MIPS32R6-EB-NEXT: lhu $1, 2($1)
; MIPS32R6-EB-NEXT: lw $25, %call16(foo2)($gp)
; MIPS32R6-EB-NEXT: .reloc $tmp0, R_MIPS_JALR, foo2
; MIPS32R6-EB-NEXT: $tmp0:
; MIPS32R6-EB-NEXT: jalr $25
; MIPS32R6-EB-NEXT: sll $4, $1, 16
; MIPS32R6-EB-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
; MIPS32R6-EB-NEXT: jr $ra
; MIPS32R6-EB-NEXT: addiu $sp, $sp, 24
;
; MIPS1-PSX-LABEL: bar1:
; MIPS1-PSX: # %bb.0: # %entry
; MIPS1-PSX-NEXT: lui $2, %hi(_gp_disp)
; MIPS1-PSX-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS1-PSX-NEXT: addiu $sp, $sp, -24
; MIPS1-PSX-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
; MIPS1-PSX-NEXT: addu $gp, $2, $25
; MIPS1-PSX-NEXT: lw $1, %got(s2)($gp)
; MIPS1-PSX-NEXT: nop
; MIPS1-PSX-NEXT: lbu $2, 2($1)
; MIPS1-PSX-NEXT: lbu $1, 3($1)
; MIPS1-PSX-NEXT: nop
; MIPS1-PSX-NEXT: sll $1, $1, 8
; MIPS1-PSX-NEXT: lw $25, %call16(foo2)($gp)
; MIPS1-PSX-NEXT: nop
; MIPS1-PSX-NEXT: .reloc $tmp0, R_MIPS_JALR, foo2
; MIPS1-PSX-NEXT: $tmp0:
; MIPS1-PSX-NEXT: jalr $25
; MIPS1-PSX-NEXT: or $4, $1, $2
; MIPS1-PSX-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
; MIPS1-PSX-NEXT: nop
; MIPS1-PSX-NEXT: jr $ra
; MIPS1-PSX-NEXT: addiu $sp, $sp, 24
entry:
tail call void @foo2(ptr byval(%struct.S1) getelementptr inbounds (%struct.S2, ptr @s2, i32 0, i32 1)) nounwind
ret void
}
; FIXME: We should be able to do better than this using lhu
define void @bar2() nounwind {
; MIPS32-EL-LABEL: bar2:
; MIPS32-EL: # %bb.0: # %entry
; MIPS32-EL-NEXT: lui $2, %hi(_gp_disp)
; MIPS32-EL-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32-EL-NEXT: addiu $sp, $sp, -24
; MIPS32-EL-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
; MIPS32-EL-NEXT: addu $gp, $2, $25
; MIPS32-EL-NEXT: lw $1, %got(s4)($gp)
; MIPS32-EL-NEXT: lwl $4, 3($1)
; MIPS32-EL-NEXT: lwr $4, 0($1)
; MIPS32-EL-NEXT: lbu $2, 4($1)
; MIPS32-EL-NEXT: lbu $3, 5($1)
; MIPS32-EL-NEXT: sll $3, $3, 8
; MIPS32-EL-NEXT: or $2, $3, $2
; MIPS32-EL-NEXT: lbu $1, 6($1)
; MIPS32-EL-NEXT: sll $1, $1, 16
; MIPS32-EL-NEXT: lw $25, %call16(foo4)($gp)
; MIPS32-EL-NEXT: .reloc $tmp1, R_MIPS_JALR, foo4
; MIPS32-EL-NEXT: $tmp1:
; MIPS32-EL-NEXT: jalr $25
; MIPS32-EL-NEXT: or $5, $2, $1
; MIPS32-EL-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
; MIPS32-EL-NEXT: jr $ra
; MIPS32-EL-NEXT: addiu $sp, $sp, 24
;
; MIPS32-EB-LABEL: bar2:
; MIPS32-EB: # %bb.0: # %entry
; MIPS32-EB-NEXT: lui $2, %hi(_gp_disp)
; MIPS32-EB-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32-EB-NEXT: addiu $sp, $sp, -24
; MIPS32-EB-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
; MIPS32-EB-NEXT: addu $gp, $2, $25
; MIPS32-EB-NEXT: lw $1, %got(s4)($gp)
; MIPS32-EB-NEXT: lwl $4, 0($1)
; MIPS32-EB-NEXT: lbu $2, 5($1)
; MIPS32-EB-NEXT: lwr $4, 3($1)
; MIPS32-EB-NEXT: sll $2, $2, 16
; MIPS32-EB-NEXT: lbu $3, 4($1)
; MIPS32-EB-NEXT: sll $3, $3, 24
; MIPS32-EB-NEXT: or $2, $3, $2
; MIPS32-EB-NEXT: lbu $1, 6($1)
; MIPS32-EB-NEXT: sll $1, $1, 8
; MIPS32-EB-NEXT: lw $25, %call16(foo4)($gp)
; MIPS32-EB-NEXT: .reloc $tmp1, R_MIPS_JALR, foo4
; MIPS32-EB-NEXT: $tmp1:
; MIPS32-EB-NEXT: jalr $25
; MIPS32-EB-NEXT: or $5, $2, $1
; MIPS32-EB-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
; MIPS32-EB-NEXT: jr $ra
; MIPS32-EB-NEXT: addiu $sp, $sp, 24
;
; MIPS32R6-EL-LABEL: bar2:
; MIPS32R6-EL: # %bb.0: # %entry
; MIPS32R6-EL-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6-EL-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6-EL-NEXT: addiu $sp, $sp, -24
; MIPS32R6-EL-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
; MIPS32R6-EL-NEXT: addu $gp, $2, $25
; MIPS32R6-EL-NEXT: lw $1, %got(s4)($gp)
; MIPS32R6-EL-NEXT: lhu $2, 4($1)
; MIPS32R6-EL-NEXT: lbu $3, 6($1)
; MIPS32R6-EL-NEXT: sll $3, $3, 16
; MIPS32R6-EL-NEXT: lw $4, 0($1)
; MIPS32R6-EL-NEXT: lw $25, %call16(foo4)($gp)
; MIPS32R6-EL-NEXT: .reloc $tmp1, R_MIPS_JALR, foo4
; MIPS32R6-EL-NEXT: $tmp1:
; MIPS32R6-EL-NEXT: jalr $25
; MIPS32R6-EL-NEXT: or $5, $2, $3
; MIPS32R6-EL-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
; MIPS32R6-EL-NEXT: jr $ra
; MIPS32R6-EL-NEXT: addiu $sp, $sp, 24
;
; MIPS32R6-EB-LABEL: bar2:
; MIPS32R6-EB: # %bb.0: # %entry
; MIPS32R6-EB-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6-EB-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS32R6-EB-NEXT: addiu $sp, $sp, -24
; MIPS32R6-EB-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
; MIPS32R6-EB-NEXT: addu $gp, $2, $25
; MIPS32R6-EB-NEXT: lw $1, %got(s4)($gp)
; MIPS32R6-EB-NEXT: lbu $2, 6($1)
; MIPS32R6-EB-NEXT: sll $2, $2, 8
; MIPS32R6-EB-NEXT: lhu $3, 4($1)
; MIPS32R6-EB-NEXT: sll $3, $3, 16
; MIPS32R6-EB-NEXT: lw $4, 0($1)
; MIPS32R6-EB-NEXT: lw $25, %call16(foo4)($gp)
; MIPS32R6-EB-NEXT: .reloc $tmp1, R_MIPS_JALR, foo4
; MIPS32R6-EB-NEXT: $tmp1:
; MIPS32R6-EB-NEXT: jalr $25
; MIPS32R6-EB-NEXT: or $5, $3, $2
; MIPS32R6-EB-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
; MIPS32R6-EB-NEXT: jr $ra
; MIPS32R6-EB-NEXT: addiu $sp, $sp, 24
;
; MIPS1-PSX-LABEL: bar2:
; MIPS1-PSX: # %bb.0: # %entry
; MIPS1-PSX-NEXT: lui $2, %hi(_gp_disp)
; MIPS1-PSX-NEXT: addiu $2, $2, %lo(_gp_disp)
; MIPS1-PSX-NEXT: addiu $sp, $sp, -24
; MIPS1-PSX-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
; MIPS1-PSX-NEXT: addu $gp, $2, $25
; MIPS1-PSX-NEXT: lw $1, %got(s4)($gp)
; MIPS1-PSX-NEXT: nop
; MIPS1-PSX-NEXT: lwl $4, 3($1)
; MIPS1-PSX-NEXT: nop
; MIPS1-PSX-NEXT: lwr $4, 0($1)
; MIPS1-PSX-NEXT: lbu $2, 4($1)
; MIPS1-PSX-NEXT: lbu $3, 5($1)
; MIPS1-PSX-NEXT: nop
; MIPS1-PSX-NEXT: sll $3, $3, 8
; MIPS1-PSX-NEXT: or $2, $3, $2
; MIPS1-PSX-NEXT: lbu $1, 6($1)
; MIPS1-PSX-NEXT: nop
; MIPS1-PSX-NEXT: sll $1, $1, 16
; MIPS1-PSX-NEXT: lw $25, %call16(foo4)($gp)
; MIPS1-PSX-NEXT: nop
; MIPS1-PSX-NEXT: .reloc $tmp1, R_MIPS_JALR, foo4
; MIPS1-PSX-NEXT: $tmp1:
; MIPS1-PSX-NEXT: jalr $25
; MIPS1-PSX-NEXT: or $5, $2, $1
; MIPS1-PSX-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
; MIPS1-PSX-NEXT: nop
; MIPS1-PSX-NEXT: jr $ra
; MIPS1-PSX-NEXT: addiu $sp, $sp, 24
entry:
tail call void @foo4(ptr byval(%struct.S4) @s4) nounwind
ret void
}
declare void @foo2(ptr byval(%struct.S1))
declare void @foo4(ptr byval(%struct.S4))