From 41dc1b085796bbb0decae4701e54f5b08007d135 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 10 Jul 2025 16:03:52 +0100 Subject: [PATCH 1/2] Don't pass the tail-call scratch through the register allocator Regalloc3 doesn't support def operands on return instructions and this doesn't interact with register allocation anyways, so hard-code the use of r11 directly in `emit_return_call_common_sequence`. --- cranelift/codegen/src/isa/x64/inst/emit.rs | 4 +- cranelift/codegen/src/isa/x64/inst/mod.rs | 22 +- cranelift/codegen/src/isa/x64/lower/isle.rs | 2 - .../isa/x64/return-call-indirect.clif | 190 +++++++++--------- .../filetests/isa/x64/return-call.clif | 188 ++++++++--------- .../filetests/isa/x64/tail-call-conv.clif | 2 +- 6 files changed, 197 insertions(+), 211 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 518457282a25..edb1f78ef31d 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1984,7 +1984,9 @@ fn emit_return_call_common_sequence( but the current implementation relies on them being present" ); - let tmp = call_info.tmp.to_writable_reg(); + // Hard-coded register which doesn't conflict with function arguments or + // callee-saved registers. + let tmp = Writable::from_reg(regs::r11()); for inst in X64ABIMachineSpec::gen_clobber_restore(CallConv::Tail, &info.flags, state.frame_layout()) diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 523da371b2d0..3e52302c93e9 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -50,9 +50,6 @@ pub struct ReturnCallInfo { /// The in-register arguments and their constraints. pub uses: CallArgList, - - /// A temporary for use when moving the return address. - pub tmp: WritableGpr, } #[test] @@ -676,11 +673,9 @@ impl PrettyPrint for Inst { let ReturnCallInfo { uses, new_stack_arg_size, - tmp, dest, } = &**info; - let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8); - let mut s = format!("return_call_known {dest:?} ({new_stack_arg_size}) tmp={tmp}"); + let mut s = format!("return_call_known {dest:?} ({new_stack_arg_size})"); for ret in uses { let preg = regs::show_reg(ret.preg); let vreg = pretty_print_reg(ret.vreg, 8); @@ -693,13 +688,10 @@ impl PrettyPrint for Inst { let ReturnCallInfo { uses, new_stack_arg_size, - tmp, dest, } = &**info; let callee = pretty_print_reg(*dest, 8); - let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8); - let mut s = - format!("return_call_unknown {callee} ({new_stack_arg_size}) tmp={tmp}"); + let mut s = format!("return_call_unknown {callee} ({new_stack_arg_size})"); for ret in uses { let preg = regs::show_reg(ret.preg); let vreg = pretty_print_reg(ret.vreg, 8); @@ -1133,10 +1125,7 @@ fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { } Inst::ReturnCallKnown { info } => { - let ReturnCallInfo { - dest, uses, tmp, .. - } = &mut **info; - collector.reg_fixed_def(tmp, regs::r11()); + let ReturnCallInfo { dest, uses, .. } = &mut **info; // Same as in the `Inst::CallKnown` branch. debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack)); for CallArgPair { vreg, preg } in uses { @@ -1145,9 +1134,7 @@ fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { } Inst::ReturnCallUnknown { info } => { - let ReturnCallInfo { - dest, uses, tmp, .. - } = &mut **info; + let ReturnCallInfo { dest, uses, .. } = &mut **info; // TODO(https://github.com/bytecodealliance/regalloc2/issues/145): // This shouldn't be a fixed register constraint, but it's not clear how to @@ -1156,7 +1143,6 @@ fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { // safe to use. collector.reg_fixed_use(dest, regs::r10()); - collector.reg_fixed_def(tmp, regs::r11()); for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); } diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 60a5ccd3d901..e91a4e7f9df4 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -131,7 +131,6 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> { Box::new(ReturnCallInfo { dest, uses, - tmp: self.lower_ctx.temp_writable_gpr(), new_stack_arg_size, }) } @@ -150,7 +149,6 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> { Box::new(ReturnCallInfo { dest, uses, - tmp: self.lower_ctx.temp_writable_gpr(), new_stack_arg_size, }) } diff --git a/cranelift/filetests/filetests/isa/x64/return-call-indirect.clif b/cranelift/filetests/filetests/isa/x64/return-call-indirect.clif index ca226e7fe055..e1901aa59f22 100644 --- a/cranelift/filetests/filetests/isa/x64/return-call-indirect.clif +++ b/cranelift/filetests/filetests/isa/x64/return-call-indirect.clif @@ -43,7 +43,7 @@ block0(v0: i64): ; movq %rsp, %rbp ; block0: ; load_ext_name %callee_i64+0, %r10 -; return_call_unknown %r10 (0) tmp=%r11 %rdi=%rdi +; return_call_unknown %r10 (0) %rdi=%rdi ; ; Disassembled: ; block0: ; offset 0x0 @@ -71,7 +71,7 @@ block0(v0: i64): ; movq %rsp, %rbp ; block0: ; load_ext_name %callee_i64+0, %r10 -; return_call_unknown %r10 (0) tmp=%r11 %rdi=%rdi +; return_call_unknown %r10 (0) %rdi=%rdi ; ; Disassembled: ; block0: ; offset 0x0 @@ -140,7 +140,7 @@ block0(v0: f64): ; movq %rsp, %rbp ; block0: ; load_ext_name %callee_f64+0, %r10 -; return_call_unknown %r10 (0) tmp=%r11 %xmm0=%xmm0 +; return_call_unknown %r10 (0) %xmm0=%xmm0 ; ; Disassembled: ; block0: ; offset 0x0 @@ -196,7 +196,7 @@ block0(v0: i8): ; movq %rsp, %rbp ; block0: ; load_ext_name %callee_i8+0, %r10 -; return_call_unknown %r10 (0) tmp=%r11 %rdi=%rdi +; return_call_unknown %r10 (0) %rdi=%rdi ; ; Disassembled: ; block0: ; offset 0x0 @@ -272,62 +272,62 @@ block0: ; movq %r9, +0x48(%rsp) ; movl $0x23, %r9d ; movq %r9, +0x40(%rsp) -; movl $0x28, %eax -; movl $0x2d, %r10d -; movl $0x32, %r11d -; movl $0x37, %r13d -; movl $0x3c, %r14d -; movl $0x41, %r15d -; movl $0x46, %ebx -; movl $0x4b, %r12d +; movl $0x28, %esi +; movq %rsi, +0x38(%rsp) +; movl $0x2d, %eax +; movl $0x32, %r10d +; movl $0x37, %r12d +; movl $0x3c, %r13d +; movl $0x41, %r14d +; movl $0x46, %r15d +; movl $0x4b, %ebx ; movl $0x50, %edi ; movl $0x55, %esi -; movq %rsi, +0x38(%rsp) ; movl $0x5a, %edx ; movl $0x5f, %ecx ; movl $0x64, %r8d ; movl $0x69, %r9d -; movl $0x6e, %esi -; movq %rsi, +0x30(%rsp) -; movl $0x73, %esi -; movq %rsi, +0x28(%rsp) -; movl $0x78, %esi -; movq %rsi, +0x20(%rsp) -; movl $0x7d, %esi -; movq %rsi, +0x18(%rsp) -; movl $0x82, %esi -; movq %rsi, +0x10(%rsp) -; movl $0x87, %esi -; movq %rsi, +8(%rsp) -; load_ext_name %tail_callee_stack_args+0, %rsi -; movq %rsi, +(%rsp) -; movq %rax, +-0xa0(%rbp) -; movq %r10, +-0x98(%rbp) -; movq %r11, +-0x90(%rbp) -; movq %r13, +-0x88(%rbp) -; movq %r14, +-0x80(%rbp) -; movq %r15, +-0x78(%rbp) -; movq %rbx, +-0x70(%rbp) -; movq %r12, +-0x68(%rbp) +; movl $0x6e, %r11d +; movq %r11, +0x30(%rsp) +; movl $0x73, %r11d +; movq %r11, +0x28(%rsp) +; movl $0x78, %r11d +; movq %r11, +0x20(%rsp) +; movl $0x7d, %r11d +; movq %r11, +0x18(%rsp) +; movl $0x82, %r11d +; movq %r11, +0x10(%rsp) +; movl $0x87, %r11d +; movq %r11, +8(%rsp) +; load_ext_name %tail_callee_stack_args+0, %r11 +; movq %r11, +(%rsp) +; movq +0x38(%rsp), %r11 +; movq %r11, +-0xa0(%rbp) +; movq %rax, +-0x98(%rbp) +; movq %r10, +-0x90(%rbp) +; movq %r12, +-0x88(%rbp) +; movq %r13, +-0x80(%rbp) +; movq %r14, +-0x78(%rbp) +; movq %r15, +-0x70(%rbp) +; movq %rbx, +-0x68(%rbp) ; movq %rdi, +-0x60(%rbp) -; movq +0x38(%rsp), %rdi -; movq %rdi, +-0x58(%rbp) +; movq %rsi, +-0x58(%rbp) ; movq %rdx, +-0x50(%rbp) ; movq %rcx, +-0x48(%rbp) ; movq %r8, +-0x40(%rbp) ; movq %r9, +-0x38(%rbp) -; movq +0x30(%rsp), %rsi -; movq %rsi, +-0x30(%rbp) -; movq +0x28(%rsp), %rsi -; movq %rsi, +-0x28(%rbp) -; movq +0x20(%rsp), %rsi -; movq %rsi, +-0x20(%rbp) -; movq +0x18(%rsp), %rsi -; movq %rsi, +-0x18(%rbp) -; movq +0x10(%rsp), %rsi -; movq %rsi, +-0x10(%rbp) -; movq +8(%rsp), %rsi -; movq %rsi, +-8(%rbp) +; movq +0x30(%rsp), %r11 +; movq %r11, +-0x30(%rbp) +; movq +0x28(%rsp), %r11 +; movq %r11, +-0x28(%rbp) +; movq +0x20(%rsp), %r11 +; movq %r11, +-0x20(%rbp) +; movq +0x18(%rsp), %r11 +; movq %r11, +-0x18(%rbp) +; movq +0x10(%rsp), %r11 +; movq %r11, +-0x10(%rbp) +; movq +8(%rsp), %r11 +; movq %r11, +-8(%rbp) ; movq +0x50(%rsp), %rcx ; movq +0x58(%rsp), %rdx ; movq +0x60(%rsp), %rsi @@ -335,7 +335,7 @@ block0: ; movq +0x48(%rsp), %r8 ; movq +0x40(%rsp), %r9 ; movq +(%rsp), %r10 -; return_call_unknown %r10 (160) tmp=%r11 %rdi=%rdi %rsi=%rsi %rdx=%rdx %rcx=%rcx %r8=%r8 %r9=%r9 +; return_call_unknown %r10 (160) %rdi=%rdi %rsi=%rsi %rdx=%rdx %rcx=%rcx %r8=%r8 %r9=%r9 ; ; Disassembled: ; block0: ; offset 0x0 @@ -366,62 +366,62 @@ block0: ; movq %r9, 0x48(%rsp) ; movl $0x23, %r9d ; movq %r9, 0x40(%rsp) -; movl $0x28, %eax -; movl $0x2d, %r10d -; movl $0x32, %r11d -; movl $0x37, %r13d -; movl $0x3c, %r14d -; movl $0x41, %r15d -; movl $0x46, %ebx -; movl $0x4b, %r12d +; movl $0x28, %esi +; movq %rsi, 0x38(%rsp) +; movl $0x2d, %eax +; movl $0x32, %r10d +; movl $0x37, %r12d +; movl $0x3c, %r13d +; movl $0x41, %r14d +; movl $0x46, %r15d +; movl $0x4b, %ebx ; movl $0x50, %edi ; movl $0x55, %esi -; movq %rsi, 0x38(%rsp) ; movl $0x5a, %edx ; movl $0x5f, %ecx ; movl $0x64, %r8d ; movl $0x69, %r9d -; movl $0x6e, %esi -; movq %rsi, 0x30(%rsp) -; movl $0x73, %esi -; movq %rsi, 0x28(%rsp) -; movl $0x78, %esi -; movq %rsi, 0x20(%rsp) -; movl $0x7d, %esi -; movq %rsi, 0x18(%rsp) -; movl $0x82, %esi -; movq %rsi, 0x10(%rsp) -; movl $0x87, %esi -; movq %rsi, 8(%rsp) -; movabsq $0, %rsi ; reloc_external Abs8 %tail_callee_stack_args 0 -; movq %rsi, (%rsp) -; movq %rax, 0x10(%rbp) -; movq %r10, 0x18(%rbp) -; movq %r11, 0x20(%rbp) -; movq %r13, 0x28(%rbp) -; movq %r14, 0x30(%rbp) -; movq %r15, 0x38(%rbp) -; movq %rbx, 0x40(%rbp) -; movq %r12, 0x48(%rbp) +; movl $0x6e, %r11d +; movq %r11, 0x30(%rsp) +; movl $0x73, %r11d +; movq %r11, 0x28(%rsp) +; movl $0x78, %r11d +; movq %r11, 0x20(%rsp) +; movl $0x7d, %r11d +; movq %r11, 0x18(%rsp) +; movl $0x82, %r11d +; movq %r11, 0x10(%rsp) +; movl $0x87, %r11d +; movq %r11, 8(%rsp) +; movabsq $0, %r11 ; reloc_external Abs8 %tail_callee_stack_args 0 +; movq %r11, (%rsp) +; movq 0x38(%rsp), %r11 +; movq %r11, 0x10(%rbp) +; movq %rax, 0x18(%rbp) +; movq %r10, 0x20(%rbp) +; movq %r12, 0x28(%rbp) +; movq %r13, 0x30(%rbp) +; movq %r14, 0x38(%rbp) +; movq %r15, 0x40(%rbp) +; movq %rbx, 0x48(%rbp) ; movq %rdi, 0x50(%rbp) -; movq 0x38(%rsp), %rdi -; movq %rdi, 0x58(%rbp) +; movq %rsi, 0x58(%rbp) ; movq %rdx, 0x60(%rbp) ; movq %rcx, 0x68(%rbp) ; movq %r8, 0x70(%rbp) ; movq %r9, 0x78(%rbp) -; movq 0x30(%rsp), %rsi -; movq %rsi, 0x80(%rbp) -; movq 0x28(%rsp), %rsi -; movq %rsi, 0x88(%rbp) -; movq 0x20(%rsp), %rsi -; movq %rsi, 0x90(%rbp) -; movq 0x18(%rsp), %rsi -; movq %rsi, 0x98(%rbp) -; movq 0x10(%rsp), %rsi -; movq %rsi, 0xa0(%rbp) -; movq 8(%rsp), %rsi -; movq %rsi, 0xa8(%rbp) +; movq 0x30(%rsp), %r11 +; movq %r11, 0x80(%rbp) +; movq 0x28(%rsp), %r11 +; movq %r11, 0x88(%rbp) +; movq 0x20(%rsp), %r11 +; movq %r11, 0x90(%rbp) +; movq 0x18(%rsp), %r11 +; movq %r11, 0x98(%rbp) +; movq 0x10(%rsp), %r11 +; movq %r11, 0xa0(%rbp) +; movq 8(%rsp), %r11 +; movq %r11, 0xa8(%rbp) ; movq 0x50(%rsp), %rcx ; movq 0x58(%rsp), %rdx ; movq 0x60(%rsp), %rsi diff --git a/cranelift/filetests/filetests/isa/x64/return-call.clif b/cranelift/filetests/filetests/isa/x64/return-call.clif index 9d5f7ddc00f6..92e07054f9f3 100644 --- a/cranelift/filetests/filetests/isa/x64/return-call.clif +++ b/cranelift/filetests/filetests/isa/x64/return-call.clif @@ -41,7 +41,7 @@ block0(v0: i64): ; movq %rsp, %rbp ; block0: ; load_ext_name %callee_i64+0, %r10 -; return_call_unknown %r10 (0) tmp=%r11 %rdi=%rdi +; return_call_unknown %r10 (0) %rdi=%rdi ; ; Disassembled: ; block0: ; offset 0x0 @@ -66,7 +66,7 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; return_call_known TestCase(%callee_i64) (0) tmp=%r11 %rdi=%rdi +; return_call_known TestCase(%callee_i64) (0) %rdi=%rdi ; ; Disassembled: ; block0: ; offset 0x0 @@ -132,7 +132,7 @@ block0(v0: f64): ; movq %rsp, %rbp ; block0: ; load_ext_name %callee_f64+0, %r10 -; return_call_unknown %r10 (0) tmp=%r11 %xmm0=%xmm0 +; return_call_unknown %r10 (0) %xmm0=%xmm0 ; ; Disassembled: ; block0: ; offset 0x0 @@ -186,7 +186,7 @@ block0(v0: i8): ; movq %rsp, %rbp ; block0: ; load_ext_name %callee_i8+0, %r10 -; return_call_unknown %r10 (0) tmp=%r11 %rdi=%rdi +; return_call_unknown %r10 (0) %rdi=%rdi ; ; Disassembled: ; block0: ; offset 0x0 @@ -243,7 +243,7 @@ block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v ; movq %rcx, %rsi ; movq %r10, %rdx ; movq %r11, %rcx -; return_call_known TestCase(%one_stack_arg) (16) tmp=%r11 %rdi=%rdi %rsi=%rsi %rdx=%rdx %rcx=%rcx %r8=%r8 %r9=%r9 +; return_call_known TestCase(%one_stack_arg) (16) %rdi=%rdi %rsi=%rsi %rdx=%rdx %rcx=%rcx %r8=%r8 %r9=%r9 ; ; Disassembled: ; block0: ; offset 0x0 @@ -279,7 +279,7 @@ block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v ; movq %rsp, %rbp ; block0: ; movq +-0x10(%rbp), %rdi -; return_call_known TestCase(%callee_i8) (0) tmp=%r11 %rdi=%rdi +; return_call_known TestCase(%callee_i8) (0) %rdi=%rdi ; ; Disassembled: ; block0: ; offset 0x0 @@ -323,7 +323,7 @@ block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32): ; movq %rcx, %rdx ; movq %r8, %rcx ; movq %r10, %r8 -; return_call_known TestCase(%call_one_stack_arg) (32) tmp=%r11 %rdi=%rdi %rsi=%rsi %rdx=%rdx %rcx=%rcx %r8=%r8 %r9=%r9 +; return_call_known TestCase(%call_one_stack_arg) (32) %rdi=%rdi %rsi=%rsi %rdx=%rdx %rcx=%rcx %r8=%r8 %r9=%r9 ; ; Disassembled: ; block0: ; offset 0x0 @@ -437,60 +437,60 @@ block0: ; movq %r9, +0x40(%rsp) ; movl $0x23, %r9d ; movq %r9, +0x38(%rsp) -; movl $0x28, %eax -; movl $0x2d, %r10d -; movl $0x32, %r11d -; movl $0x37, %r13d -; movl $0x3c, %r14d -; movl $0x41, %r15d -; movl $0x46, %ebx -; movl $0x4b, %r12d +; movl $0x28, %esi +; movq %rsi, +0x30(%rsp) +; movl $0x2d, %eax +; movl $0x32, %r10d +; movl $0x37, %r12d +; movl $0x3c, %r13d +; movl $0x41, %r14d +; movl $0x46, %r15d +; movl $0x4b, %ebx ; movl $0x50, %edi ; movl $0x55, %esi -; movq %rsi, +0x30(%rsp) ; movl $0x5a, %edx ; movl $0x5f, %ecx ; movl $0x64, %r8d ; movl $0x69, %r9d -; movl $0x6e, %esi -; movq %rsi, +0x28(%rsp) -; movl $0x73, %esi -; movq %rsi, +0x20(%rsp) -; movl $0x78, %esi -; movq %rsi, +0x18(%rsp) -; movl $0x7d, %esi -; movq %rsi, +0x10(%rsp) -; movl $0x82, %esi -; movq %rsi, +8(%rsp) -; movl $0x87, %esi -; movq %rsi, +(%rsp) -; movq %rax, +-0xa0(%rbp) -; movq %r10, +-0x98(%rbp) -; movq %r11, +-0x90(%rbp) -; movq %r13, +-0x88(%rbp) -; movq %r14, +-0x80(%rbp) -; movq %r15, +-0x78(%rbp) -; movq %rbx, +-0x70(%rbp) -; movq %r12, +-0x68(%rbp) +; movl $0x6e, %r11d +; movq %r11, +0x28(%rsp) +; movl $0x73, %r11d +; movq %r11, +0x20(%rsp) +; movl $0x78, %r11d +; movq %r11, +0x18(%rsp) +; movl $0x7d, %r11d +; movq %r11, +0x10(%rsp) +; movl $0x82, %r11d +; movq %r11, +8(%rsp) +; movl $0x87, %r11d +; movq %r11, +(%rsp) +; movq +0x30(%rsp), %r11 +; movq %r11, +-0xa0(%rbp) +; movq %rax, +-0x98(%rbp) +; movq %r10, +-0x90(%rbp) +; movq %r12, +-0x88(%rbp) +; movq %r13, +-0x80(%rbp) +; movq %r14, +-0x78(%rbp) +; movq %r15, +-0x70(%rbp) +; movq %rbx, +-0x68(%rbp) ; movq %rdi, +-0x60(%rbp) -; movq +0x30(%rsp), %rdi -; movq %rdi, +-0x58(%rbp) +; movq %rsi, +-0x58(%rbp) ; movq %rdx, +-0x50(%rbp) ; movq %rcx, +-0x48(%rbp) ; movq %r8, +-0x40(%rbp) ; movq %r9, +-0x38(%rbp) -; movq +0x28(%rsp), %rsi -; movq %rsi, +-0x30(%rbp) -; movq +0x20(%rsp), %rsi -; movq %rsi, +-0x28(%rbp) -; movq +0x18(%rsp), %rsi -; movq %rsi, +-0x20(%rbp) -; movq +0x10(%rsp), %rsi -; movq %rsi, +-0x18(%rbp) -; movq +8(%rsp), %rsi -; movq %rsi, +-0x10(%rbp) -; movq +(%rsp), %rsi -; movq %rsi, +-8(%rbp) +; movq +0x28(%rsp), %r11 +; movq %r11, +-0x30(%rbp) +; movq +0x20(%rsp), %r11 +; movq %r11, +-0x28(%rbp) +; movq +0x18(%rsp), %r11 +; movq %r11, +-0x20(%rbp) +; movq +0x10(%rsp), %r11 +; movq %r11, +-0x18(%rbp) +; movq +8(%rsp), %r11 +; movq %r11, +-0x10(%rbp) +; movq +(%rsp), %r11 +; movq %r11, +-8(%rbp) ; load_ext_name %tail_callee_stack_args+0, %r10 ; movq +0x48(%rsp), %rcx ; movq +0x50(%rsp), %rdx @@ -498,7 +498,7 @@ block0: ; movq +0x60(%rsp), %rdi ; movq +0x40(%rsp), %r8 ; movq +0x38(%rsp), %r9 -; return_call_unknown %r10 (160) tmp=%r11 %rdi=%rdi %rsi=%rsi %rdx=%rdx %rcx=%rcx %r8=%r8 %r9=%r9 +; return_call_unknown %r10 (160) %rdi=%rdi %rsi=%rsi %rdx=%rdx %rcx=%rcx %r8=%r8 %r9=%r9 ; ; Disassembled: ; block0: ; offset 0x0 @@ -529,60 +529,60 @@ block0: ; movq %r9, 0x40(%rsp) ; movl $0x23, %r9d ; movq %r9, 0x38(%rsp) -; movl $0x28, %eax -; movl $0x2d, %r10d -; movl $0x32, %r11d -; movl $0x37, %r13d -; movl $0x3c, %r14d -; movl $0x41, %r15d -; movl $0x46, %ebx -; movl $0x4b, %r12d +; movl $0x28, %esi +; movq %rsi, 0x30(%rsp) +; movl $0x2d, %eax +; movl $0x32, %r10d +; movl $0x37, %r12d +; movl $0x3c, %r13d +; movl $0x41, %r14d +; movl $0x46, %r15d +; movl $0x4b, %ebx ; movl $0x50, %edi ; movl $0x55, %esi -; movq %rsi, 0x30(%rsp) ; movl $0x5a, %edx ; movl $0x5f, %ecx ; movl $0x64, %r8d ; movl $0x69, %r9d -; movl $0x6e, %esi -; movq %rsi, 0x28(%rsp) -; movl $0x73, %esi -; movq %rsi, 0x20(%rsp) -; movl $0x78, %esi -; movq %rsi, 0x18(%rsp) -; movl $0x7d, %esi -; movq %rsi, 0x10(%rsp) -; movl $0x82, %esi -; movq %rsi, 8(%rsp) -; movl $0x87, %esi -; movq %rsi, (%rsp) -; movq %rax, 0x10(%rbp) -; movq %r10, 0x18(%rbp) -; movq %r11, 0x20(%rbp) -; movq %r13, 0x28(%rbp) -; movq %r14, 0x30(%rbp) -; movq %r15, 0x38(%rbp) -; movq %rbx, 0x40(%rbp) -; movq %r12, 0x48(%rbp) +; movl $0x6e, %r11d +; movq %r11, 0x28(%rsp) +; movl $0x73, %r11d +; movq %r11, 0x20(%rsp) +; movl $0x78, %r11d +; movq %r11, 0x18(%rsp) +; movl $0x7d, %r11d +; movq %r11, 0x10(%rsp) +; movl $0x82, %r11d +; movq %r11, 8(%rsp) +; movl $0x87, %r11d +; movq %r11, (%rsp) +; movq 0x30(%rsp), %r11 +; movq %r11, 0x10(%rbp) +; movq %rax, 0x18(%rbp) +; movq %r10, 0x20(%rbp) +; movq %r12, 0x28(%rbp) +; movq %r13, 0x30(%rbp) +; movq %r14, 0x38(%rbp) +; movq %r15, 0x40(%rbp) +; movq %rbx, 0x48(%rbp) ; movq %rdi, 0x50(%rbp) -; movq 0x30(%rsp), %rdi -; movq %rdi, 0x58(%rbp) +; movq %rsi, 0x58(%rbp) ; movq %rdx, 0x60(%rbp) ; movq %rcx, 0x68(%rbp) ; movq %r8, 0x70(%rbp) ; movq %r9, 0x78(%rbp) -; movq 0x28(%rsp), %rsi -; movq %rsi, 0x80(%rbp) -; movq 0x20(%rsp), %rsi -; movq %rsi, 0x88(%rbp) -; movq 0x18(%rsp), %rsi -; movq %rsi, 0x90(%rbp) -; movq 0x10(%rsp), %rsi -; movq %rsi, 0x98(%rbp) -; movq 8(%rsp), %rsi -; movq %rsi, 0xa0(%rbp) -; movq (%rsp), %rsi -; movq %rsi, 0xa8(%rbp) +; movq 0x28(%rsp), %r11 +; movq %r11, 0x80(%rbp) +; movq 0x20(%rsp), %r11 +; movq %r11, 0x88(%rbp) +; movq 0x18(%rsp), %r11 +; movq %r11, 0x90(%rbp) +; movq 0x10(%rsp), %r11 +; movq %r11, 0x98(%rbp) +; movq 8(%rsp), %r11 +; movq %r11, 0xa0(%rbp) +; movq (%rsp), %r11 +; movq %r11, 0xa8(%rbp) ; movabsq $0, %r10 ; reloc_external Abs8 %tail_callee_stack_args 0 ; movq 0x48(%rsp), %rcx ; movq 0x50(%rsp), %rdx diff --git a/cranelift/filetests/filetests/isa/x64/tail-call-conv.clif b/cranelift/filetests/filetests/isa/x64/tail-call-conv.clif index d3bcfb2260dd..e510bf10f406 100644 --- a/cranelift/filetests/filetests/isa/x64/tail-call-conv.clif +++ b/cranelift/filetests/filetests/isa/x64/tail-call-conv.clif @@ -916,7 +916,7 @@ block0(v0: f64, v1: f64, v2: i8, v3: i32, v4: i128, v5: i32, v6: i128, v7: i32, ; movq %rsp, %rbp ; block0: ; load_ext_name %callee_simple+0, %r10 -; return_call_unknown %r10 (0) tmp=%r11 +; return_call_unknown %r10 (0) ; ; Disassembled: ; block0: ; offset 0x0 From 753ee922a6bf0cf93a52bc858a80817b187bf187 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 10 Jul 2025 16:32:28 +0100 Subject: [PATCH 2/2] Add regalloc3 as an option for Cranelift's register allocator --- Cargo.lock | 57 +++++++++++++++++-- Cargo.toml | 3 + cranelift/codegen/meta/src/shared/settings.rs | 2 +- cranelift/codegen/src/isa/x64/inst/emit.rs | 3 + cranelift/codegen/src/machinst/compile.rs | 1 + crates/cli-flags/src/opt.rs | 1 + crates/fuzzing/src/generators/config.rs | 2 + crates/wasmtime/src/config.rs | 4 ++ 8 files changed, 68 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4c016acb9986..cd5d768e568f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -52,6 +52,12 @@ version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45862d1c77f2228b9e10bc609d5bc203d86ebc9b87ad8d5d5167a6c9abf739d9" +[[package]] +name = "allocator-api2" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78200ac3468a57d333cd0ea5dd398e25111194dcacd49208afca95c629a6311d" + [[package]] name = "ambient-authority" version = "0.0.2" @@ -258,6 +264,17 @@ dependencies = [ "generic-array", ] +[[package]] +name = "brie-tree" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2314e51eefedcdef28777b80cdf66293635ffe1017ebb047b7ea46f1860eabf7" +dependencies = [ + "allocator-api2 0.3.0", + "cfg-if", + "nonmax", +] + [[package]] name = "bstr" version = "1.6.0" @@ -275,7 +292,7 @@ version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" dependencies = [ - "allocator-api2", + "allocator-api2 0.2.20", ] [[package]] @@ -1662,6 +1679,8 @@ version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" dependencies = [ + "allocator-api2 0.2.20", + "equivalent", "foldhash", "serde", ] @@ -2411,6 +2430,12 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nonmax" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "610a5acd306ec67f907abe5567859a3c693fb9886eb1f012ab8f2a47bef3db51" + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -2607,6 +2632,15 @@ dependencies = [ "openvino-finder", ] +[[package]] +name = "ordered-float" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2c1f9f56e534ac6a9b8a4600bdf0f530fb393b5f393e7b4d03489c3cf0c3f01" +dependencies = [ + "num-traits", +] + [[package]] name = "ort" version = "2.0.0-rc.2" @@ -2897,18 +2931,33 @@ dependencies = [ [[package]] name = "regalloc2" version = "0.12.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5216b1837de2149f8bc8e6d5f88a9326b63b8c836ed58ce4a0a29ec736a59734" +source = "git+https://github.com/Amanieu/regalloc2.git?branch=regalloc3#2b124549e47a203dd623e1d0d299d7f6a8e08746" dependencies = [ - "allocator-api2", + "allocator-api2 0.2.20", "bumpalo", "hashbrown 0.15.2", "log", + "regalloc3", "rustc-hash", "serde", "smallvec", ] +[[package]] +name = "regalloc3" +version = "0.1.0" +source = "git+https://github.com/Amanieu/regalloc3.git#c4a2b2767a94565557ca2b55aacb97e598fb9950" +dependencies = [ + "anyhow", + "bitflags 2.6.0", + "brie-tree", + "hashbrown 0.15.2", + "log", + "ordered-float", + "rustc-hash", + "smallvec", +] + [[package]] name = "regex" version = "1.9.1" diff --git a/Cargo.toml b/Cargo.toml index 51317e9ecd3e..b3aec27bfe47 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -619,3 +619,6 @@ debug = "line-tables-only" inherits = "release" codegen-units = 1 lto = true + +[patch.crates-io] +regalloc2 = { git = "https://github.com/Amanieu/regalloc2.git", branch = "regalloc3" } diff --git a/cranelift/codegen/meta/src/shared/settings.rs b/cranelift/codegen/meta/src/shared/settings.rs index ff0a0e0700bc..1c658c5270ed 100644 --- a/cranelift/codegen/meta/src/shared/settings.rs +++ b/cranelift/codegen/meta/src/shared/settings.rs @@ -43,7 +43,7 @@ pub(crate) fn define() -> SettingGroup { have adequate support for the kinds of allocations required by exception handling (https://github.com/bytecodealliance/regalloc2/issues/217). "#, - vec!["backtracking"], + vec!["backtracking", "regalloc3"], ); settings.add_enum( diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index edb1f78ef31d..8147edd92901 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1987,6 +1987,9 @@ fn emit_return_call_common_sequence( // Hard-coded register which doesn't conflict with function arguments or // callee-saved registers. let tmp = Writable::from_reg(regs::r11()); + for pair in &call_info.uses { + debug_assert_ne!(pair.preg, regs::r11()); + } for inst in X64ABIMachineSpec::gen_clobber_restore(CallConv::Tail, &info.flags, state.frame_layout()) diff --git a/cranelift/codegen/src/machinst/compile.rs b/cranelift/codegen/src/machinst/compile.rs index 507ac126a3d1..dddd9a979f43 100644 --- a/cranelift/codegen/src/machinst/compile.rs +++ b/cranelift/codegen/src/machinst/compile.rs @@ -66,6 +66,7 @@ pub fn compile( options.algorithm = match b.flags().regalloc_algorithm() { RegallocAlgorithm::Backtracking => Algorithm::Ion, + RegallocAlgorithm::Regalloc3 => Algorithm::Regalloc3, // Note: single-pass is currently disabled // (https://github.com/bytecodealliance/regalloc2/issues/217). }; diff --git a/crates/cli-flags/src/opt.rs b/crates/cli-flags/src/opt.rs index b57afccf15ff..02b5d990b133 100644 --- a/crates/cli-flags/src/opt.rs +++ b/crates/cli-flags/src/opt.rs @@ -482,6 +482,7 @@ impl WasmtimeOptionValue for wasmtime::RegallocAlgorithm { fn parse(val: Option<&str>) -> Result { match String::parse(val)?.as_str() { "backtracking" => Ok(wasmtime::RegallocAlgorithm::Backtracking), + "regalloc3" => Ok(wasmtime::RegallocAlgorithm::Regalloc3), other => bail!( "unknown regalloc algorithm`{}`, only backtracking,single-pass accepted", other diff --git a/crates/fuzzing/src/generators/config.rs b/crates/fuzzing/src/generators/config.rs index 0545956facf4..437717c97e35 100644 --- a/crates/fuzzing/src/generators/config.rs +++ b/crates/fuzzing/src/generators/config.rs @@ -806,6 +806,7 @@ impl OptLevel { enum RegallocAlgorithm { Backtracking, SinglePass, + Regalloc3, } impl RegallocAlgorithm { @@ -819,6 +820,7 @@ impl RegallocAlgorithm { // `arbitrary` mappings, we keep the `RegallocAlgorithm` // enum as it is and remap here to `Backtracking`. RegallocAlgorithm::SinglePass => wasmtime::RegallocAlgorithm::Backtracking, + RegallocAlgorithm::Regalloc3 => wasmtime::RegallocAlgorithm::Regalloc3, } } } diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index 7ea83dc19a55..ab4d1714ee25 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -1296,6 +1296,7 @@ impl Config { pub fn cranelift_regalloc_algorithm(&mut self, algo: RegallocAlgorithm) -> &mut Self { let val = match algo { RegallocAlgorithm::Backtracking => "backtracking", + RegallocAlgorithm::Regalloc3 => "regalloc3", }; self.compiler_config .settings @@ -2873,6 +2874,9 @@ pub enum RegallocAlgorithm { /// results in better register utilization, producing fewer spills /// and moves, but can cause super-linear compile runtime. Backtracking, + + /// New experimental register allocator. + Regalloc3, } /// Select which profiling technique to support.