Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cranelift: always spill i32 with i64 stores #2840

Merged
merged 1 commit into from
Apr 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions cranelift/codegen/src/machinst/abi_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1213,6 +1213,19 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
let spill_off = islot * M::word_bytes() as i64;
let sp_off = self.stackslots_size as i64 + spill_off;
trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off);

// When reloading from a spill slot, we might have lost information about real integer
// types. For instance, on the x64 backend, a zero-extension can become spurious and
// optimized into a move, causing vregs of types I32 and I64 to share the same coalescing
// equivalency class. As a matter of fact, such a value can be spilled as an I32 and later
// reloaded as an I64; to make sure the high bits are always defined, do a word-sized store
// all the time, in this case.
let ty = if ty.is_int() && ty.bytes() < M::word_bytes() {
M::word_type()
} else {
ty
};

gen_store_stack_multi::<M>(StackAMode::NominalSPOffset(sp_off, ty), from_regs, ty)
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
test compile
target x86_64 machinst

;; The goal of this test is to ensure that stack spills of an integer value,
;; which width is less than the machine word's size, cause the full word to be
;; stored, and not only the lower bits.

;; Because of unsigned extensions which can be transformed into simple moves,
;; the source vreg of the extension operation can be coalesced with its
;; destination vreg, and if it happens to be spill, then the reload may use a
;; reload of a different, larger size.

function %f0(i32, i32, i32) -> i64 {
fn0 = %g(i32) -> i64

; check: pushq %rbp
; nextln: movq %rsp, %rbp
; nextln: subq $$64, %rsp

;; Stash all the callee-saved registers.

; nextln: movq %r12, 16(%rsp)
; nextln: movq %r13, 24(%rsp)
; nextln: movq %r14, 32(%rsp)
; nextln: movq %rbx, 40(%rsp)
; nextln: movq %r15, 48(%rsp)

block0(v0: i32, v1: i32, v2: i32):
;; First, create enough virtual registers so that the call instructions
;; causes at least one of them to be spilled onto the stack.

v3 = iadd.i32 v0, v1
v4 = iadd.i32 v1, v2
v5 = iadd.i32 v0, v2
v6 = iadd.i32 v3, v0
v7 = iadd.i32 v4, v0
v8 = iadd.i32 v5, v0

; nextln: movq %rdi, %r12
; nextln: addl %esi, %r12d
; nextln: movq %rsi, %r13
; nextln: addl %edx, %r13d
; nextln: movq %rdi, %r14
; nextln: addl %edx, %r14d
; nextln: movq %r12, %rbx
; nextln: addl %edi, %ebx
; nextln: movq %r13, %r15
; nextln: addl %edi, %r15d
; nextln: movq %r14, %rsi

;; This should be movq below, not movl.
; nextln: movq %rsi, rsp(0 + virtual offset)

; nextln: movslq rsp(0 + virtual offset), %rsi
; nextln: addl %edi, %esi

;; Put an effectful instruction so that the live-ranges of the adds and
;; uextends are split here, and to prevent the uextend to be emitted
;; before the call. This will effectively causing the above i32 to be
;; spilled as an i32, and not a full i64.

v300 = call fn0(v0)

;; This should be movq below, not movl.
; nextln: movq %rsi, rsp(0 + virtual offset)

; nextln: load_ext_name %g+0, %rsi
; nextln: call *%rsi

v31 = uextend.i64 v3
v41 = uextend.i64 v4
v51 = uextend.i64 v5
v61 = uextend.i64 v6
v71 = uextend.i64 v7
v81 = uextend.i64 v8

;; None of the uextends are generated here yet.

;; At this point, I'd expect that this second call below would be not
;; necessary, but if it is removed, the uextend is applied before the call,
;; and the i64 is spilled (then reloaded), causing the bug to not appear. So
;; an additional call it is!

v100 = call fn0(v3)

; nextln: movq %r12, %rsi
; nextln: movq %rsi, rsp(8 + virtual offset)
; nextln: nop len=0
; nextln: movq %r12, %rdi
; nextln: load_ext_name %g+0, %rsi
; nextln: call *%rsi

;; Cause reloads of all the values. Most are in registers, but one of them
;; is on the stack. Make sure they're all used in the final computation.

v101 = iadd.i64 v100, v31
v102 = iadd.i64 v101, v41
v103 = iadd.i64 v102, v51
v104 = iadd.i64 v103, v61
v105 = iadd.i64 v104, v71
v200 = iadd.i64 v105, v81

; nextln: movq %rax, %rsi
; nextln: movq rsp(8 + virtual offset), %rdi
; nextln: addq %rdi, %rsi
; nextln: addq %r13, %rsi
; nextln: addq %r14, %rsi
; nextln: addq %rbx, %rsi
; nextln: addq %r15, %rsi

;; The reload operates on a full word, so uses movq.
; nextln: movq rsp(0 + virtual offset), %rdi

; nextln: addq %rdi, %rsi
; nextln: movq %rsi, %rax
; nextln: movq 16(%rsp), %r12
; nextln: movq 24(%rsp), %r13
; nextln: movq 32(%rsp), %r14
; nextln: movq 40(%rsp), %rbx
; nextln: movq 48(%rsp), %r15
; nextln: addq $$64, %rsp

return v200
; nextln: movq %rbp, %rsp
; nextln: popq %rbp
; nextln: ret
}