From e3522676f84876cf3bbf5100c0dae5996987986e Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Wed, 31 Dec 2025 20:44:23 -0800 Subject: [PATCH 1/4] Check and parse PT_INTERP in ELF --- src/process/exec.rs | 59 ++++++++++++++++++++++++++++++---------- src/process/exec/auxv.rs | 2 ++ 2 files changed, 46 insertions(+), 15 deletions(-) diff --git a/src/process/exec.rs b/src/process/exec.rs index 551a0497..a7415750 100644 --- a/src/process/exec.rs +++ b/src/process/exec.rs @@ -13,7 +13,7 @@ use crate::{ }; use alloc::{string::String, vec}; use alloc::{string::ToString, sync::Arc, vec::Vec}; -use auxv::{AT_NULL, AT_PAGESZ, AT_PHDR, AT_PHENT, AT_PHNUM, AT_RANDOM}; +use auxv::{AT_BASE, AT_ENTRY, AT_NULL, AT_PAGESZ, AT_PHDR, AT_PHENT, AT_PHNUM, AT_RANDOM}; use core::{ffi::c_char, mem, slice}; use libkernel::{ UserAddressSpace, VirtualMemory, @@ -48,26 +48,18 @@ pub async fn kernel_exec( argv: Vec, envp: Vec, ) -> Result<()> { + // Read ELF header let mut buf = [0u8; core::mem::size_of::>()]; - let mut auxv = Vec::new(); - inode.read_at(0, &mut buf).await?; let elf = elf::FileHeader64::::parse(buf.as_slice()) .map_err(|_| ExecError::InvalidElfFormat)?; let endian = elf.endian().unwrap(); - // Push program header params. - auxv.push(AT_PHNUM); - auxv.push(elf.e_phnum.get(endian) as _); - auxv.push(AT_PHENT); - auxv.push(elf.e_phentsize(endian) as _); - - let mut ph_buf = vec![ - 0u8; - elf.e_phnum.get(endian) as usize * elf.e_phentsize.get(endian) as usize - + elf.e_phoff.get(endian) as usize - ]; + // Read full program header table + let ph_table_size = elf.e_phnum.get(endian) as usize * elf.e_phentsize.get(endian) as usize + + elf.e_phoff.get(endian) as usize; + let mut ph_buf = vec![0u8; ph_table_size]; inode.read_at(0, &mut ph_buf).await?; @@ -75,6 +67,40 @@ pub async fn kernel_exec( .program_headers(endian, ph_buf.as_slice()) .map_err(|_| ExecError::InvalidPHdrFormat)?; + // Detect PT_INTERP (dynamic linker) if present + let mut interp_path: Option = None; + for hdr in hdrs { + if hdr.p_type(endian) == elf::PT_INTERP { + let off = hdr.p_offset(endian) as usize; + let filesz = hdr.p_filesz(endian) as usize; + if filesz == 0 { + break; + } + + let mut ibuf = vec![0u8; filesz]; + inode.read_at(off as u64, &mut ibuf).await?; + + let len = ibuf.iter().position(|&b| b == 0).unwrap_or(filesz); + let s = core::str::from_utf8(&ibuf[..len]).map_err(|_| ExecError::InvalidElfFormat)?; + interp_path = Some(s.to_string()); + break; + } + } + + if let Some(path) = interp_path { + panic!("Dynamic linker not supported yet: {}", path); + // return exec_with_interp(inode, &elf, endian, &ph_buf, &hdrs, path, argv, envp).await; + } + + // static ELF ... + let mut auxv = Vec::new(); + + // Push program header params (for the main executable) + auxv.push(AT_PHNUM); + auxv.push(elf.e_phnum.get(endian) as _); + auxv.push(AT_PHENT); + auxv.push(elf.e_phentsize(endian) as _); + let mut vmas = Vec::new(); let mut highest_addr = 0; @@ -85,7 +111,7 @@ pub async fn kernel_exec( vmas.push(VMArea::from_pheader(inode.clone(), *hdr, endian)); if hdr.p_offset.get(endian) == 0 { - // TODO: poteintally more validation that this VA will contain + // TODO: potentially more validation that this VA will contain // the program headers. auxv.push(AT_PHDR); auxv.push(hdr.p_vaddr.get(endian) + elf.e_phoff.get(endian)); @@ -99,6 +125,9 @@ pub async fn kernel_exec( } } + auxv.push(AT_ENTRY); + auxv.push(elf.e_entry(endian) as u64); + vmas.push(VMArea::new( VirtMemoryRegion::new(VA::from_value(STACK_START), STACK_SZ), VMAreaKind::Anon, diff --git a/src/process/exec/auxv.rs b/src/process/exec/auxv.rs index 5be556d7..9a0339f9 100644 --- a/src/process/exec/auxv.rs +++ b/src/process/exec/auxv.rs @@ -3,4 +3,6 @@ pub const AT_PHDR: u64 = 3; pub const AT_PHENT: u64 = 4; pub const AT_PHNUM: u64 = 5; pub const AT_PAGESZ: u64 = 6; +pub const AT_BASE: u64 = 7; +pub const AT_ENTRY: u64 = 9; pub const AT_RANDOM: u64 = 25; From afc23fe90693f22035dcd151a6aca5bf0831c891 Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Wed, 31 Dec 2025 23:35:11 -0800 Subject: [PATCH 2/4] exec with interpreter --- .../src/memory/proc_vm/memory_map/mod.rs | 2 +- libkernel/src/memory/proc_vm/mod.rs | 15 +- libkernel/src/memory/proc_vm/vmarea.rs | 13 +- src/memory/mmap.rs | 9 +- src/process/exec.rs | 140 +++++++++++++----- 5 files changed, 131 insertions(+), 48 deletions(-) diff --git a/libkernel/src/memory/proc_vm/memory_map/mod.rs b/libkernel/src/memory/proc_vm/memory_map/mod.rs index 7498ab1d..95c35d1f 100644 --- a/libkernel/src/memory/proc_vm/memory_map/mod.rs +++ b/libkernel/src/memory/proc_vm/memory_map/mod.rs @@ -13,7 +13,7 @@ const MMAP_BASE: usize = 0x4000_0000_0000; /// Manages mappings in a process's address space. pub struct MemoryMap { - vmas: BTreeMap, + pub(super) vmas: BTreeMap, address_space: AS, } diff --git a/libkernel/src/memory/proc_vm/mod.rs b/libkernel/src/memory/proc_vm/mod.rs index d1a71e21..ecfb5b2b 100644 --- a/libkernel/src/memory/proc_vm/mod.rs +++ b/libkernel/src/memory/proc_vm/mod.rs @@ -48,10 +48,21 @@ impl ProcessVM { Ok(Self { mm, brk }) } - pub fn from_map(map: MemoryMap, brk: VA) -> Self { + pub fn from_map(map: MemoryMap) -> Self { + // Last entry will be the VMA with the highest address. + let brk = map + .vmas + .last_key_value() + .expect("No VMAs in map") + .1 + .region + .end_address() + // VMAs should already be page-aligned, but just in case. + .align_up(PAGE_SIZE); + Self { mm: map, - brk: VirtMemoryRegion::new(brk.align_up(PAGE_SIZE), 0), + brk: VirtMemoryRegion::new(brk, 0), } } diff --git a/libkernel/src/memory/proc_vm/vmarea.rs b/libkernel/src/memory/proc_vm/vmarea.rs index d3a8d44c..928608f3 100644 --- a/libkernel/src/memory/proc_vm/vmarea.rs +++ b/libkernel/src/memory/proc_vm/vmarea.rs @@ -172,7 +172,7 @@ impl VMAreaKind { /// managing a process's memory layout. #[derive(Clone, PartialEq)] pub struct VMArea { - pub(super) region: VirtMemoryRegion, + pub region: VirtMemoryRegion, pub(super) kind: VMAreaKind, pub(super) permissions: VMAPermissions, } @@ -205,11 +205,15 @@ impl VMArea { /// * `f`: A handle to the ELF file's inode. /// * `hdr`: The ELF program header (`LOAD` segment) to create the VMA from. /// * `endian`: The endianness of the ELF file, for correctly parsing header fields. + /// * `address_bias`: A bias added to the VAs of the segment. pub fn from_pheader( f: Arc, hdr: ProgramHeader64, endian: E, + address_bias: Option, ) -> VMArea { + let address_bias = address_bias.unwrap_or(0); + let mut permissions = VMAPermissions { read: false, write: false, @@ -229,7 +233,7 @@ impl VMArea { } let mappable_region = VirtMemoryRegion::new( - VA::from_value(hdr.p_vaddr(endian) as usize), + VA::from_value(hdr.p_vaddr(endian) as usize + address_bias), hdr.p_memsz(endian) as usize, ) .to_mappable_region(); @@ -446,6 +450,11 @@ impl VMArea { VMAreaKind::Anon => new_vma, } } + + /// Return the virtual memory region managed by this VMA. + pub fn region(&self) -> VirtMemoryRegion { + self.region + } } #[cfg(test)] diff --git a/src/memory/mmap.rs b/src/memory/mmap.rs index 74c28095..46ee046f 100644 --- a/src/memory/mmap.rs +++ b/src/memory/mmap.rs @@ -17,7 +17,6 @@ const PROT_READ: u64 = 1; const PROT_WRITE: u64 = 2; const PROT_EXEC: u64 = 4; -const MAP_FILE: u64 = 0x0000; const MAP_SHARED: u64 = 0x0001; const MAP_PRIVATE: u64 = 0x0002; const MAP_FIXED: u64 = 0x0010; @@ -86,9 +85,10 @@ pub async fn sys_mmap( let requested_len = len as usize; - let kind = if flags & (MAP_ANON | MAP_ANONYMOUS) != 0 { + let kind = if (flags & (MAP_ANON | MAP_ANONYMOUS)) != 0 { VMAreaKind::Anon - } else if flags == MAP_FILE { + } else { + // File-backed mapping: require a valid fd and use the provided offset. let fd = current_task() .fd_table .lock_save_irq() @@ -98,9 +98,6 @@ pub async fn sys_mmap( let inode = fd.inode().ok_or(KernelError::BadFd)?; VMAreaKind::new_file(inode, offset, len) - } else { - // One of MAP_FILE or MAP_ANONYMOUS must be set. - return Err(KernelError::InvalidValue); }; let address_request = if addr.is_null() { diff --git a/src/process/exec.rs b/src/process/exec.rs index a7415750..67299f1a 100644 --- a/src/process/exec.rs +++ b/src/process/exec.rs @@ -31,6 +31,8 @@ use libkernel::{ region::VirtMemoryRegion, }, }; +use object::Endian; +use object::elf::{ET_DYN, ProgramHeader64}; use object::{ LittleEndian, elf::{self, PT_LOAD}, @@ -39,10 +41,44 @@ use object::{ mod auxv; +const LINKER_BIAS: usize = 0x0000_7000_0000_0000; +const PROG_BIAS: usize = 0x0000_5000_0000_0000; + const STACK_END: usize = 0x0000_8000_0000_0000; const STACK_SZ: usize = 0x2000 * 0x400; const STACK_START: usize = STACK_END - STACK_SZ; +/// Process a set of progream headers from an ELF. Create VMAs for all `PT_LOAD` +/// segments, optionally applying `bias` to the load address. +/// +/// If a VMA was found that contains the headers themselves, the address of the +/// *VMA* is returned. +fn process_prog_headers( + hdrs: &[ProgramHeader64], + vmas: &mut Vec, + bias: Option, + elf_file: Arc, + endian: E, +) -> Option { + let mut hdr_addr = None; + + for hdr in hdrs { + if hdr.p_type(endian) == PT_LOAD { + let vma = VMArea::from_pheader(elf_file.clone(), *hdr, endian, bias); + + // Find PHDR: Assumption segment with p_offset == 0 contains + // headers. + if hdr.p_offset.get(endian) == 0 { + hdr_addr = Some(vma.region().start_address()); + } + + vmas.push(vma); + } + } + + hdr_addr +} + pub async fn kernel_exec( inode: Arc, argv: Vec, @@ -69,7 +105,7 @@ pub async fn kernel_exec( // Detect PT_INTERP (dynamic linker) if present let mut interp_path: Option = None; - for hdr in hdrs { + for hdr in hdrs.iter() { if hdr.p_type(endian) == elf::PT_INTERP { let off = hdr.p_offset(endian) as usize; let filesz = hdr.p_filesz(endian) as usize; @@ -87,46 +123,45 @@ pub async fn kernel_exec( } } - if let Some(path) = interp_path { - panic!("Dynamic linker not supported yet: {}", path); - // return exec_with_interp(inode, &elf, endian, &ph_buf, &hdrs, path, argv, envp).await; - } + // Setup a program bias for PIE. + let main_bias = if elf.e_type.get(endian) == ET_DYN { + Some(PROG_BIAS) + } else { + None + }; - // static ELF ... - let mut auxv = Vec::new(); - - // Push program header params (for the main executable) - auxv.push(AT_PHNUM); - auxv.push(elf.e_phnum.get(endian) as _); - auxv.push(AT_PHENT); - auxv.push(elf.e_phentsize(endian) as _); + let mut auxv = vec![ + AT_PHNUM, + elf.e_phnum.get(endian) as _, + AT_PHENT, + elf.e_phentsize(endian) as _, + ]; let mut vmas = Vec::new(); - let mut highest_addr = 0; - for hdr in hdrs { - let kind = hdr.p_type(endian); + // Process the binary progream headers. + if let Some(hdr_addr) = process_prog_headers(hdrs, &mut vmas, main_bias, inode.clone(), endian) + { + auxv.push(AT_PHDR); + auxv.push(hdr_addr.add_bytes(elf.e_phoff(endian) as _).value() as _); + } - if kind == PT_LOAD { - vmas.push(VMArea::from_pheader(inode.clone(), *hdr, endian)); + let main_entry = VA::from_value(elf.e_entry(endian) as usize + main_bias.unwrap_or(0)); - if hdr.p_offset.get(endian) == 0 { - // TODO: potentially more validation that this VA will contain - // the program headers. - auxv.push(AT_PHDR); - auxv.push(hdr.p_vaddr.get(endian) + elf.e_phoff.get(endian)); - } + // AT_ENTRY is the same in the static and interp case. + auxv.push(AT_ENTRY); + auxv.push(main_entry.value() as _); - let mapping_end = hdr.p_vaddr(endian) + hdr.p_memsz(endian); + let entry_addr = if let Some(path) = interp_path { + auxv.push(AT_BASE); + auxv.push(LINKER_BIAS as _); - if mapping_end > highest_addr { - highest_addr = mapping_end; - } - } - } - - auxv.push(AT_ENTRY); - auxv.push(elf.e_entry(endian) as u64); + // Returns the entry address of the interp program. + process_interp(path, &mut vmas).await? + } else { + // Otherwise, it's just the binary itself. + main_entry + }; vmas.push(VMArea::new( VirtMemoryRegion::new(VA::from_value(STACK_START), STACK_SZ), @@ -135,12 +170,10 @@ pub async fn kernel_exec( )); let mut mem_map = MemoryMap::from_vmas(vmas)?; - let stack_ptr = setup_user_stack(&mut mem_map, &argv, &envp, auxv)?; - let user_ctx = - ArchImpl::new_user_context(VA::from_value(elf.e_entry(endian) as usize), stack_ptr); - let mut vm = ProcessVM::from_map(mem_map, VA::from_value(highest_addr as usize)); + let user_ctx = ArchImpl::new_user_context(entry_addr, stack_ptr); + let mut vm = ProcessVM::from_map(mem_map); // We don't have to worry about actually calling for a full context switch // here. Parts of the old process that are replaced will go out of scope and @@ -274,6 +307,39 @@ fn setup_user_stack( Ok(VA::from_value(final_sp_val)) } +// Dynamic linker path: map PT_INTERP interpreter and return start address of +// the interpreter program. +async fn process_interp(interp_path: String, vmas: &mut Vec) -> Result { + // Resolve interpreter path from root; this assumes interp_path is absolute. + let task = current_task_shared(); + let path = Path::new(&interp_path); + let interp_inode = VFS.resolve_path(path, VFS.root_inode(), &task).await?; + + // Parse interpreter ELF header + let mut hdr_buf = [0u8; core::mem::size_of::>()]; + interp_inode.read_at(0, &mut hdr_buf).await?; + let interp_elf = elf::FileHeader64::::parse(&hdr_buf[..]) + .map_err(|_| ExecError::InvalidElfFormat)?; + let iendian = interp_elf.endian().unwrap(); + + // Read interpreter program headers + let interp_ph_table_size = interp_elf.e_phnum.get(iendian) as usize + * interp_elf.e_phentsize.get(iendian) as usize + + interp_elf.e_phoff.get(iendian) as usize; + let mut interp_ph_buf = vec![0u8; interp_ph_table_size]; + interp_inode.read_at(0, &mut interp_ph_buf).await?; + let interp_hdrs = interp_elf + .program_headers(iendian, &interp_ph_buf[..]) + .map_err(|_| ExecError::InvalidPHdrFormat)?; + + // Build VMAs for interpreter + process_prog_headers(interp_hdrs, vmas, Some(LINKER_BIAS), interp_inode, iendian); + + let interp_entry = VA::from_value(LINKER_BIAS + interp_elf.e_entry(iendian) as usize); + + Ok(interp_entry) +} + pub async fn sys_execve( path: TUA, mut usr_argv: TUA>, From 7d60dec31df9f96d15f0b271759d0014352875d9 Mon Sep 17 00:00:00 2001 From: Matthew Leach Date: Mon, 5 Jan 2026 20:06:19 +0000 Subject: [PATCH 3/4] libkernel: ext4: add inode id to metadata Add the proper inode and fs id to the `FileAttr` struct returned by `getattr`. This prevents the dynamic loader from skipping files which it considers to be hard-links (share the same inode ID). --- libkernel/src/fs/filesystems/ext4/mod.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/libkernel/src/fs/filesystems/ext4/mod.rs b/libkernel/src/fs/filesystems/ext4/mod.rs index b1abe1f7..a6ea3b44 100644 --- a/libkernel/src/fs/filesystems/ext4/mod.rs +++ b/libkernel/src/fs/filesystems/ext4/mod.rs @@ -171,7 +171,12 @@ impl Inode for Ext4Inode { } async fn getattr(&self) -> Result { - Ok(self.inner.metadata.clone().into()) + let mut attrs: FileAttr = self.inner.metadata.clone().into(); + let fs = self.fs_ref.upgrade().ok_or(FsError::InvalidFs)?; + + attrs.id = InodeId::from_fsid_and_inodeid(fs.id(), self.inner.index.get() as _); + + Ok(attrs) } async fn lookup(&self, name: &str) -> Result> { From abbebc20b317fa32017f1a3e88158f4460110ff9 Mon Sep 17 00:00:00 2001 From: Matthew Leach Date: Tue, 6 Jan 2026 09:44:40 +0000 Subject: [PATCH 4/4] syscalls: prctl: add stub impl Implement PR_CAPBSET_READ with a very basic implementation. --- etc/syscalls_linux_aarch64.md | 2 +- src/arch/arm64/exceptions/syscall.rs | 2 ++ src/process/mod.rs | 1 + src/process/prctl.rs | 21 +++++++++++++++++++++ 4 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 src/process/prctl.rs diff --git a/etc/syscalls_linux_aarch64.md b/etc/syscalls_linux_aarch64.md index a820752c..1ce08173 100644 --- a/etc/syscalls_linux_aarch64.md +++ b/etc/syscalls_linux_aarch64.md @@ -167,7 +167,7 @@ | 0xa4 (164) | setrlimit | (unsigned int resource, struct rlimit *rlim) | __arm64_sys_setrlimit | false | | 0xa5 (165) | getrusage | (int who, struct rusage *ru) | __arm64_sys_getrusage | false | | 0xa6 (166) | umask | (int mask) | __arm64_sys_umask | true | -| 0xa7 (167) | prctl | (int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) | __arm64_sys_prctl | false | +| 0xa7 (167) | prctl | (int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) | __arm64_sys_prctl | stub | | 0xa8 (168) | getcpu | (unsigned *cpup, unsigned *nodep, struct getcpu_cache *unused) | __arm64_sys_getcpu | false | | 0xa9 (169) | gettimeofday | (struct __kernel_old_timeval *tv, struct timezone *tz) | __arm64_sys_gettimeofday | dummy | | 0xaa (170) | settimeofday | (struct __kernel_old_timeval *tv, struct timezone *tz) | __arm64_sys_settimeofday | false | diff --git a/src/arch/arm64/exceptions/syscall.rs b/src/arch/arm64/exceptions/syscall.rs index 50a2f123..6c231ffc 100644 --- a/src/arch/arm64/exceptions/syscall.rs +++ b/src/arch/arm64/exceptions/syscall.rs @@ -53,6 +53,7 @@ use crate::{ fcntl::sys_fcntl, select::{sys_ppoll, sys_pselect6}, }, + prctl::sys_prctl, sleep::sys_nanosleep, thread_group::{ Pgid, @@ -342,6 +343,7 @@ pub async fn handle_syscall() { 0xa0 => sys_uname(TUA::from_value(arg1 as _)).await, 0xa3 => Err(KernelError::InvalidValue), 0xa6 => sys_umask(arg1 as _).map_err(|e| match e {}), + 0xa7 => sys_prctl(arg1 as _, arg2 as _), 0xa9 => sys_gettimeofday(TUA::from_value(arg1 as _), TUA::from_value(arg2 as _)).await, 0xac => sys_getpid().map_err(|e| match e {}), 0xad => sys_getppid().map_err(|e| match e {}), diff --git a/src/process/mod.rs b/src/process/mod.rs index 1770c411..dbcf6b76 100644 --- a/src/process/mod.rs +++ b/src/process/mod.rs @@ -18,6 +18,7 @@ pub mod exec; pub mod exit; pub mod fd_table; pub mod owned; +pub mod prctl; pub mod sleep; pub mod thread_group; pub mod threading; diff --git a/src/process/prctl.rs b/src/process/prctl.rs new file mode 100644 index 00000000..bb2c1986 --- /dev/null +++ b/src/process/prctl.rs @@ -0,0 +1,21 @@ +use libkernel::error::{KernelError, Result}; + +const PR_CAPBSET_READ: i32 = 23; +const CAP_MAX: usize = 40; + +fn pr_read_capset(what: usize) -> Result { + // Validate the argument + if what > CAP_MAX { + return Err(KernelError::InvalidValue); + } + + // Assume we have *all* the capabilities. + Ok(1) +} + +pub fn sys_prctl(op: i32, arg1: usize) -> Result { + match op { + PR_CAPBSET_READ => pr_read_capset(arg1), + _ => todo!("prctl op: {}", op), + } +}