Skip to content

ASpace asserts when covering exactly one element #1019

@iximeow

Description

@iximeow

I've been looking at the cores from @askfongjojo in #827 (comment) and the new ones are a different issue than originally reported in #827. figuring this out has mostly been an exercise in trusting what the computer is telling me, but straightforward enough..

so, the stack from Angela:

> $C ! demangle
fffff6ffe7bff2d0 libc.so.1`_lwp_kill+0xa()
fffff6ffe7bff300 libc.so.1`raise+0x22(6)
fffff6ffe7bff350 libc.so.1`abort+0x58()
fffff6ffe7bff360 ~std::sys::pal::unix::abort_internal::h515b31030a784ec5+8()
fffff6ffe7bff370 ~std::process::abort::h263ae6dd5f7f06e2+8()
fffff6ffe7bff380 ~__rustc::__rust_abort+8()
fffff6ffe7bff390 ~__rustc::__rust_start_panic+8()
fffff6ffe7bff3f0 __rustc::rust_panic+0xd()
fffff6ffe7bff4b0 std::panicking::rust_panic_with_hook::h24c1a837a734501c+0x224()
fffff6ffe7bff4f0 std::panicking::begin_panic_handler::{{closure}}::hebb980b0c684b90d+0x65()
fffff6ffe7bff500 ~std::sys::backtrace::__rust_end_short_backtrace::h4b651dba38b6cba9+8()
fffff6ffe7bff530 ~__rustc::rust_begin_unwind+0x1b()
fffff6ffe7bff560 ~core::panicking::panic_fmt::h717aa56ae600c892+0x1e()
fffff6ffe7bff5b0 ~core::panicking::panic::hf52b9b74c708168c+0x3a()
fffff6ffe7bff5c0 std::sync::poison::once::Once::call_once::_$u7b$$u7b$closure$u7d$$u7d$::h4aa1595ac930ccb3 (.llvm.7595959982290774484)+0x2c()
fffff6ffe7bff670 std::sys::sync::once::queue::Once::call::had06d6a18838cd97+0x1d6()
fffff6ffe7bff6a0 <propolis::hw::virtio::pci::ISR_STATUS_REGS as core::ops::deref::Deref>::deref::h5e01a69cbf2f7574+0x59()
fffff6ffe7bff7e0 propolis::hw::virtio::pci::<impl propolis::hw::pci::device::Device for D>::bar_rw::{{closure}}::h6ee0fa26971537b7+0xd9d()
fffff6ffe7bff910 propolis::hw::virtio::pci::<impl propolis::hw::pci::device::Device for D>::bar_rw::hb0b53e6e7d03f639+0x7ef()
fffff6ffe7bff9c0 propolis::mmio::MmioBus::handle_read::haa344c48dc3ccf7c+0x15a()
fffff6ffe7bffa20 propolis::vcpu::Vcpu::process_vmexit::hf993cbdc50a0a950+0x2b8()
fffff6ffe7bffe40 propolis_server::vcpu_tasks::VcpuTasks::vcpu_loop::h92946b7e82fb6cd8+0x346()
fffff6ffe7bffe80 std::sys::backtrace::__rust_begin_short_backtrace::h81c12d026840fca3+0x51()
fffff6ffe7bfff80 core::ops::function::FnOnce::call_once{{vtable.shim}}::hf6a98e36db4d0c78+0xf9()
fffff6ffe7bfffb0 std::sys::pal::unix::thread::Thread::new::thread_start::h128a92fdf19e3e8a+0x2f()
fffff6ffe7bfffe0 libc.so.1`_thrp_setup+0x77(fffff6ffeef4b240)
fffff6ffe7bffff0 libc.so.1`_lwp_start()

we've lost the panic message but the failure is kind of funky so I'm not sure it would have been as helpful as it seemed. importantly

<propolis::hw::virtio::pci::ISR_STATUS_REGS as core::ops::deref::Deref>::deref+h5e01a69cbf2f7574+0x59()

is from the lazy_static wrapping ISR_STATUS_REGS. that deref impl is from this macro:

            fn deref(&self) -> &$T {
                #[inline(always)]
                fn __static_ref_initialize() -> $T { $e }

                #[inline(always)]
                fn __stability() -> &'static $T {
                    __lazy_static_create!(LAZY, $T);
                    LAZY.get(__static_ref_initialize)
                }
                __stability()
            }

seeing Once::call next in the stack means we're at least trying to initialize the lazy static (presumably get() is partially inlined from libstd here). the call there left us at 0x1d6 in that function, so from the disassembly here we can see the call was the one at 0x1d3:

_ZN3std3sys4sync4once5queue4Once4call17had06d6a18838cd97E+0x1a6:movq   %r12,-0x40(%rbp)
_ZN3std3sys4sync4once5queue4Once4call17had06d6a18838cd97E+0x1aa:movq   $0x2,-0x38(%rbp)
_ZN3std3sys4sync4once5queue4Once4call17had06d6a18838cd97E+0x1b2:cmpl   $0x2,%ecx
_ZN3std3sys4sync4once5queue4Once4call17had06d6a18838cd97E+0x1b5:movq   $0x0,-0x78(%rbp)
_ZN3std3sys4sync4once5queue4Once4call17had06d6a18838cd97E+0x1bd:sete   -0x70(%rbp)
_ZN3std3sys4sync4once5queue4Once4call17had06d6a18838cd97E+0x1c1:leaq   -0x78(%rbp),%rsi
_ZN3std3sys4sync4once5queue4Once4call17had06d6a18838cd97E+0x1c5:movq   -0x88(%rbp),%rdi
_ZN3std3sys4sync4once5queue4Once4call17had06d6a18838cd97E+0x1cc:movq   -0x90(%rbp),%rax
_ZN3std3sys4sync4once5queue4Once4call17had06d6a18838cd97E+0x1d3:call   *0x20(%rax)                              ; <--- we panic in this
_ZN3std3sys4sync4once5queue4Once4call17had06d6a18838cd97E+0x1d6:movq   -0x78(%rbp),%rax
_ZN3std3sys4sync4once5queue4Once4call17had06d6a18838cd97E+0x1da:movq   %rax,-0x38(%rbp)
_ZN3std3sys4sync4once5queue4Once4call17had06d6a18838cd97E+0x1de:leaq   -0x40(%rbp),%rdi
_ZN3std3sys4sync4once5queue4Once4call17had06d6a18838cd97E+0x1e2:call   +0x109   <_ZN82_$LT$std..sys..sync..once..queue..WaiterQueue$u20$as$u20$core..ops..drop..Drop$GT$4drop17h7118157b0702c865E>
_ZN3std3sys4sync4once5queue4Once4call17had06d6a18838cd97E+0x1e7:addq   $0x78,%rsp
_ZN3std3sys4sync4once5queue4Once4call17had06d6a18838cd97E+0x1eb:popq   %rbx
_ZN3std3sys4sync4once5queue4Once4call17had06d6a18838cd97E+0x1ec:popq   %r12
_ZN3std3sys4sync4once5queue4Once4call17had06d6a18838cd97E+0x1ee:popq   %r13
_ZN3std3sys4sync4once5queue4Once4call17had06d6a18838cd97E+0x1f0:popq   %r14
_ZN3std3sys4sync4once5queue4Once4call17had06d6a18838cd97E+0x1f2:popq   %r15
_ZN3std3sys4sync4once5queue4Once4call17had06d6a18838cd97E+0x1f4:popq   %rbp

but everything here is consistent with us calling the closure to initialize the lazy static. relevant libstd bits are roughly here but not that important. the called function is what panics. mildly touched-up disassembly:

_ZN3std4sync6poison4once4Once9call_once28_$u7b$$u7b$closure$u7d$$u7d$17h4aa1595ac930ccb3E.llvm.7595959982290774484:

    pushq  %rbp
    movq   %rsp,%rbp
    movq   (%rdi),%rax
    cmpq   $0x0,(%rax)
    movq   $0x0,(%rax)
    je     +0x18    <...+0x2c>
    leaq   +0x1d7b9f0(%rip),%rdi    <anon.ac10ca7ef16c1cbfea35dcb4a534876c.67.llvm.16204478908247632396>
    leaq   +0x21f28fe(%rip),%rdx    <propolis-server`anon.ac10ca7ef16c1cbfea35dcb4a534876c.69.llvm.16204478908247632396>
    movl   $0x1d,%esi
    call   +0x1d31424       <_ZN4core9panicking5panic17hf52b9b74c708168cE>

...+0x2c:
    leaq   +0x2208145(%rip),%rdi    <0x3b1cbb8>
    call   +0x1d30fe8       <_ZN4core6option13unwrap_failed17h68556bd79dde9961E>

so we.. either panic, or an unwrap fails?! this closure is from std:

self.inner.call(true, &mut |p| f.take().unwrap()(p));

so that's where the unwrap_failed comes from, but the assembly here really is "if the unwrap didn't fail, panic". it turns out what happened here is that rustc const-eval'd the "happy" path into just being a panic. what the closure means is to initialize ISR_STATUS_REGS:

    static ref ISR_STATUS_REGS: RegMap<IsrStatusReg> = {
        let layout = [
            (IsrStatusReg::IsrStatus, 1),
        ];
        RegMap::create_packed(ISR_STATUS_REG_SIZE, &layout, None)
    };

and if that can be const-eval'd into a panic, lets look at create_packed:

    pub fn create_packed(
        size: usize,
        regdef: &[(ID, usize)],
        resv_reg: Option<ID>,
    ) -> Self {
        let mut map = RegMap::new(size);
        ...

and RegMap::new():

    pub const fn new(len: usize) -> Self {
        Self { len, space: ASpace::new(0, len - 1) }
    }

which is the first danger sign: len is 1 because the RegMap covers exactly one u8 register. onward to ASpace::new:

    /// Create a instance with inclusive range [`start`, `end`]
    ///
    /// # Panics
    ///
    /// - Panics if start >= end.
    pub const fn new(start: usize, end: usize) -> ASpace<T> {
        assert!(start < end);
        Self { start, end, map: BTreeMap::new() }
    }

welp. it's an inclusive address space, but the asserts if start == end which just means the address space has exactly one space in it.

so touching a RegMap of exactly one item will immediately crash propolis, and now we have one because ISR_STATUS_REGS got peeled out. not great.

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething that isn't working.

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions