From 35efecad76ffb97ca44dbb5f21c4fe13e7ce465b Mon Sep 17 00:00:00 2001 From: Matthew Leach Date: Tue, 30 Dec 2025 09:33:47 +0000 Subject: [PATCH 1/4] process: split `Task` into `Task` (shared) and `OwnedTask` (local) This commit refactors the core process representation to decouple "Identity/Resources" from "Execution/Scheduling". Previously, a monolithic `Task` struct wrapped in `Arc>` caused lock contention during hot scheduling paths and conflated shared state with CPU-local state. The `Task` struct has been split into: 1. `Task` (Shared): Holds process-wide resources (VM, FileTable, Credentials). Managed via `Arc` and internal fine-grained locking. 2. `OwnedTask` (Private): Holds execution state (Context, v_runtime, signal mask). Strictly owned by a specific CPU (via the Scheduler) and accessed lock-free. Key changes: * Scheduler: chedState` now owns tasks via `Box`. - Transitions between `run_queue` and `running_task` involve strictly moving ownership of the Box, ensuring pointer stability. - The EEVDF comparison logic now explicitly handles comparisons between the queued candidates and the currently running task (which is not in the queue). * Current Task Access: - `current()` now returns a `CurrentTaskGuard` which: 1. Disables preemption (preventing context switches while holding the reference). 2. Performs a runtime borrow check (panic on double-mutable borrow). 3. Dereferences a cached Per-CPU raw pointer for O(1) access. --- src/arch/arm64/boot/memory.rs | 2 +- src/arch/arm64/exceptions/mod.rs | 6 +- src/arch/arm64/exceptions/syscall.rs | 16 +- src/arch/arm64/memory/fault.rs | 4 +- src/arch/arm64/mod.rs | 4 +- src/arch/arm64/proc/idle.rs | 6 +- src/arch/arm64/proc/signal.rs | 7 +- src/arch/mod.rs | 3 +- src/console/chardev.rs | 2 +- src/console/tty.rs | 2 +- src/console/tty/cooker.rs | 2 +- src/drivers/fs/proc.rs | 2 +- src/drivers/timer/mod.rs | 14 - src/fs/dir.rs | 6 +- src/fs/mod.rs | 52 ++- src/fs/pipe.rs | 2 +- src/fs/syscalls/at/access.rs | 6 +- src/fs/syscalls/at/chmod.rs | 6 +- src/fs/syscalls/at/chown.rs | 6 +- src/fs/syscalls/at/link.rs | 18 +- src/fs/syscalls/at/mkdir.rs | 6 +- src/fs/syscalls/at/mod.rs | 6 +- src/fs/syscalls/at/open.rs | 8 +- src/fs/syscalls/at/readlink.rs | 7 +- src/fs/syscalls/at/rename.rs | 9 +- src/fs/syscalls/at/stat.rs | 6 +- src/fs/syscalls/at/statx.rs | 6 +- src/fs/syscalls/at/symlink.rs | 6 +- src/fs/syscalls/at/unlink.rs | 7 +- src/fs/syscalls/at/utime.rs | 9 +- src/fs/syscalls/chdir.rs | 14 +- src/fs/syscalls/chmod.rs | 4 +- src/fs/syscalls/chown.rs | 4 +- src/fs/syscalls/close.rs | 2 +- src/fs/syscalls/ioctl.rs | 2 +- src/fs/syscalls/iov.rs | 2 +- src/fs/syscalls/rw.rs | 2 +- src/fs/syscalls/seek.rs | 2 +- src/fs/syscalls/splice.rs | 2 +- src/fs/syscalls/stat.rs | 2 +- src/fs/syscalls/sync.rs | 8 +- src/fs/syscalls/trunc.rs | 11 +- src/interrupts/cpu_messenger.rs | 6 +- src/kernel/cpu_id.rs | 20 + src/kernel/mod.rs | 1 + src/kernel/power.rs | 4 +- src/main.rs | 11 +- src/memory/brk.rs | 2 +- src/memory/fault.rs | 2 +- src/memory/mmap.rs | 2 +- src/process/caps.rs | 8 +- src/process/clone.rs | 65 ++- src/process/creds.rs | 8 +- src/process/exec.rs | 20 +- src/process/exit.rs | 10 +- src/process/fd_table/dup.rs | 2 +- src/process/fd_table/fcntl.rs | 4 +- src/process/fd_table/select.rs | 6 +- src/process/mod.rs | 138 +----- src/process/owned.rs | 131 ++++++ src/process/thread_group/pid.rs | 2 +- src/process/thread_group/rsrc_lim.rs | 2 +- src/process/thread_group/signal/kill.rs | 2 +- src/process/thread_group/signal/sigaction.rs | 6 +- .../thread_group/signal/sigaltstack.rs | 2 +- .../thread_group/signal/sigprocmask.rs | 9 +- src/process/thread_group/umask.rs | 2 +- src/process/thread_group/wait.rs | 4 +- src/process/threading/futex/key.rs | 2 +- src/process/threading/mod.rs | 10 +- src/sched/current.rs | 112 +++++ src/sched/mod.rs | 408 ++++-------------- src/sched/runqueue.rs | 176 ++++++++ src/sched/sched_task.rs | 142 ++++++ src/sched/uspc_ret.rs | 83 ++-- src/sched/waker.rs | 4 + 76 files changed, 961 insertions(+), 736 deletions(-) create mode 100644 src/kernel/cpu_id.rs create mode 100644 src/process/owned.rs create mode 100644 src/sched/current.rs create mode 100644 src/sched/runqueue.rs create mode 100644 src/sched/sched_task.rs diff --git a/src/arch/arm64/boot/memory.rs b/src/arch/arm64/boot/memory.rs index 87999b37..fc2b071e 100644 --- a/src/arch/arm64/boot/memory.rs +++ b/src/arch/arm64/boot/memory.rs @@ -21,7 +21,7 @@ use libkernel::{ }; use log::info; -const KERNEL_STACK_SZ: usize = 64 * 1024; // 32 KiB +const KERNEL_STACK_SZ: usize = 256 * 1024; // 32 KiB pub const KERNEL_STACK_PG_ORDER: usize = (KERNEL_STACK_SZ / PAGE_SIZE).ilog2() as usize; const KERNEL_HEAP_SZ: usize = 64 * 1024 * 1024; // 64 MiB diff --git a/src/arch/arm64/exceptions/mod.rs b/src/arch/arm64/exceptions/mod.rs index 52c716bd..ef7558d7 100644 --- a/src/arch/arm64/exceptions/mod.rs +++ b/src/arch/arm64/exceptions/mod.rs @@ -6,7 +6,7 @@ use crate::{ arch::ArchImpl, interrupts::get_interrupt_root, ksym_pa, - sched::{current_task, uspc_ret::dispatch_userspace_task}, + sched::{current::current_task, uspc_ret::dispatch_userspace_task}, spawn_kernel_work, }; use aarch64_cpu::registers::{CPACR_EL1, ReadWriteable, VBAR_EL1}; @@ -144,7 +144,7 @@ extern "C" fn el1_serror_spx(state: &mut ExceptionState) { #[unsafe(no_mangle)] extern "C" fn el0_sync(state_ptr: *mut ExceptionState) -> *const ExceptionState { - current_task().ctx.lock_save_irq().save_user_ctx(state_ptr); + current_task().ctx.save_user_ctx(state_ptr); let state = unsafe { state_ptr.as_ref().unwrap() }; @@ -174,7 +174,7 @@ extern "C" fn el0_sync(state_ptr: *mut ExceptionState) -> *const ExceptionState #[unsafe(no_mangle)] extern "C" fn el0_irq(state: *mut ExceptionState) -> *mut ExceptionState { - current_task().ctx.lock_save_irq().save_user_ctx(state); + current_task().ctx.save_user_ctx(state); match get_interrupt_root() { Some(ref im) => im.handle_interrupt(), diff --git a/src/arch/arm64/exceptions/syscall.rs b/src/arch/arm64/exceptions/syscall.rs index fb32be0f..50a2f123 100644 --- a/src/arch/arm64/exceptions/syscall.rs +++ b/src/arch/arm64/exceptions/syscall.rs @@ -69,7 +69,7 @@ use crate::{ }, threading::{futex::sys_futex, sys_set_robust_list, sys_set_tid_address}, }, - sched::{current_task, sys_sched_yield}, + sched::{current::current_task, sys_sched_yield}, }; use alloc::boxed::Box; use libkernel::{ @@ -78,10 +78,10 @@ use libkernel::{ }; pub async fn handle_syscall() { - let task = current_task(); - let (nr, arg1, arg2, arg3, arg4, arg5, arg6) = { - let ctx = task.ctx.lock_save_irq(); + let mut task = current_task(); + + let ctx = &mut task.ctx; let state = ctx.user(); ( @@ -314,8 +314,8 @@ pub async fn handle_syscall() { } 0x8b => { // Special case for sys_rt_sigreturn - task.ctx - .lock_save_irq() + current_task() + .ctx .put_signal_work(Box::pin(ArchImpl::do_signal_return())); return; @@ -449,7 +449,7 @@ pub async fn handle_syscall() { } _ => panic!( "Unhandled syscall 0x{nr:x}, PC: 0x{:x}", - current_task().ctx.lock_save_irq().user().elr_el1 + current_task().ctx.user().elr_el1 ), }; @@ -458,5 +458,5 @@ pub async fn handle_syscall() { Err(e) => kern_err_to_syscall(e), }; - task.ctx.lock_save_irq().user_mut().x[0] = ret_val.cast_unsigned() as u64; + current_task().ctx.user_mut().x[0] = ret_val.cast_unsigned() as u64; } diff --git a/src/arch/arm64/memory/fault.rs b/src/arch/arm64/memory/fault.rs index 297a8d99..ff7ee47f 100644 --- a/src/arch/arm64/memory/fault.rs +++ b/src/arch/arm64/memory/fault.rs @@ -9,7 +9,7 @@ use crate::{ memory::uaccess::UAccessResult, }, memory::fault::{FaultResolution, handle_demand_fault, handle_protection_fault}, - sched::{current_task, spawn_kernel_work}, + sched::{current::current_task, spawn_kernel_work}, }; use alloc::boxed::Box; use libkernel::{ @@ -111,7 +111,7 @@ pub fn handle_mem_fault(exception: Exception, info: AbortIss) { "SIGSEGV on process {} {:?} PC: {:x}", current_task().process.tgid, exception, - current_task().ctx.lock_save_irq().user().elr_el1 + current_task().ctx.user().elr_el1 ), // If the page fault involves sleepy kernel work, we can // spawn that work on the process, since there is no other diff --git a/src/arch/arm64/mod.rs b/src/arch/arm64/mod.rs index 5f372466..7a33b0dc 100644 --- a/src/arch/arm64/mod.rs +++ b/src/arch/arm64/mod.rs @@ -21,6 +21,7 @@ use memory::{ use crate::{ process::{ Task, + owned::OwnedTask, thread_group::signal::{SigId, ksigaction::UserspaceSigAction}, }, sync::SpinLock, @@ -37,6 +38,7 @@ mod proc; pub mod psci; pub struct Aarch64 {} + impl CpuOps for Aarch64 { fn id() -> usize { MPIDR_EL1.read(MPIDR_EL1::Aff0) as _ @@ -109,7 +111,7 @@ impl Arch for Aarch64 { proc::context_switch(new); } - fn create_idle_task() -> Task { + fn create_idle_task() -> OwnedTask { proc::idle::create_idle_task() } diff --git a/src/arch/arm64/proc/idle.rs b/src/arch/arm64/proc/idle.rs index e6a53a5f..7d80fb25 100644 --- a/src/arch/arm64/proc/idle.rs +++ b/src/arch/arm64/proc/idle.rs @@ -1,7 +1,7 @@ use crate::{ arch::{ArchImpl, arm64::exceptions::ExceptionState}, memory::{PageOffsetTranslator, page::ClaimedPage}, - process::Task, + process::owned::OwnedTask, }; use core::arch::global_asm; use libkernel::{ @@ -16,7 +16,7 @@ use libkernel::{ global_asm!(include_str!("idle.s")); -pub fn create_idle_task() -> Task { +pub fn create_idle_task() -> OwnedTask { let code_page = ClaimedPage::alloc_zeroed().unwrap().leak(); let code_addr = VA::from_value(0xd00d0000); @@ -60,5 +60,5 @@ pub fn create_idle_task() -> Task { VMAPermissions::rx(), ); - Task::create_idle_task(addr_space, ctx, code_map) + OwnedTask::create_idle_task(addr_space, ctx, code_map) } diff --git a/src/arch/arm64/proc/signal.rs b/src/arch/arm64/proc/signal.rs index 2ab5f01c..5e784aa3 100644 --- a/src/arch/arm64/proc/signal.rs +++ b/src/arch/arm64/proc/signal.rs @@ -4,7 +4,7 @@ use crate::{ process::thread_group::signal::{ SigId, ksigaction::UserspaceSigAction, sigaction::SigActionFlags, }, - sched::current_task, + sched::current::current_task, }; use libkernel::{ error::Result, @@ -29,7 +29,7 @@ pub async fn do_signal(id: SigId, sa: UserspaceSigAction) -> Result Result Result { let task = current_task(); - let sig_frame_addr: TUA = - TUA::from_value(task.ctx.lock_save_irq().user().sp_el0 as _); + let sig_frame_addr: TUA = TUA::from_value(task.ctx.user().sp_el0 as _); let sig_frame = copy_from_user(sig_frame_addr).await?; diff --git a/src/arch/mod.rs b/src/arch/mod.rs index 243bcab3..4234bbc0 100644 --- a/src/arch/mod.rs +++ b/src/arch/mod.rs @@ -10,6 +10,7 @@ use crate::process::{ Task, + owned::OwnedTask, thread_group::signal::{SigId, ksigaction::UserspaceSigAction}, }; use alloc::sync::Arc; @@ -39,7 +40,7 @@ pub trait Arch: CpuOps + VirtualMemory { fn context_switch(new: Arc); /// Construct a new idle task. - fn create_idle_task() -> Task; + fn create_idle_task() -> OwnedTask; /// Powers off the machine. Implementations must never return. fn power_off() -> !; diff --git a/src/console/chardev.rs b/src/console/chardev.rs index 871b0a04..ca384145 100644 --- a/src/console/chardev.rs +++ b/src/console/chardev.rs @@ -6,7 +6,7 @@ use crate::{ fs::open_file::OpenFile, kernel_driver, process::fd_table::Fd, - sched::current_task, + sched::current::current_task, }; use alloc::{string::ToString, sync::Arc}; use libkernel::{ diff --git a/src/console/tty.rs b/src/console/tty.rs index 5ccaa128..b48493b7 100644 --- a/src/console/tty.rs +++ b/src/console/tty.rs @@ -3,7 +3,7 @@ use crate::{ kernel::kpipe::KPipe, memory::uaccess::{copy_from_user, copy_from_user_slice, copy_to_user}, process::thread_group::Pgid, - sched::current_task, + sched::current::current_task, sync::SpinLock, }; use alloc::{boxed::Box, sync::Arc}; diff --git a/src/console/tty/cooker.rs b/src/console/tty/cooker.rs index f7c854a6..444459e3 100644 --- a/src/console/tty/cooker.rs +++ b/src/console/tty/cooker.rs @@ -6,7 +6,7 @@ use crate::kernel::kpipe::KPipe; use crate::process::thread_group::Pgid; use crate::process::thread_group::signal::SigId; use crate::process::thread_group::signal::kill::send_signal_to_pg; -use crate::sched::current_task; +use crate::sched::current::current_task; use crate::sync::{CondVar, SpinLock}; use alloc::{sync::Arc, vec::Vec}; use libkernel::error::Result; diff --git a/src/drivers/fs/proc.rs b/src/drivers/fs/proc.rs index 9361221f..24a421af 100644 --- a/src/drivers/fs/proc.rs +++ b/src/drivers/fs/proc.rs @@ -2,7 +2,7 @@ use crate::process::find_task_by_descriptor; use crate::process::thread_group::Tgid; -use crate::sched::current_task; +use crate::sched::current::current_task; use crate::sync::OnceLock; use crate::{ drivers::{Driver, FilesystemDriver}, diff --git a/src/drivers/timer/mod.rs b/src/drivers/timer/mod.rs index ab12f1fe..94048dec 100644 --- a/src/drivers/timer/mod.rs +++ b/src/drivers/timer/mod.rs @@ -263,20 +263,6 @@ pub fn schedule_preempt(when: Instant) { } } -pub fn schedule_force_preempt() { - // Schedule a preemption event if none are scheduled - let when = now().unwrap() + Duration::from_millis(5); - - if let Some(next_event) = WAKEUP_Q.borrow().peek() - && next_event.when <= when - { - // An event is already scheduled before our forced preemption - return; - } - - schedule_preempt(when); -} - static SYS_TIMER: OnceLock> = OnceLock::new(); per_cpu! { diff --git a/src/fs/dir.rs b/src/fs/dir.rs index 4713dd36..c6b16d28 100644 --- a/src/fs/dir.rs +++ b/src/fs/dir.rs @@ -9,7 +9,9 @@ use libkernel::{ }; use ringbuf::Arc; -use crate::{memory::uaccess::copy_to_user_slice, process::fd_table::Fd, sched::current_task}; +use crate::{ + memory::uaccess::copy_to_user_slice, process::fd_table::Fd, sched::current::current_task_shared, +}; use super::{fops::FileOps, open_file::FileCtx}; @@ -132,7 +134,7 @@ struct Dirent64Hdr { } pub async fn sys_getdents64(fd: Fd, mut ubuf: UA, size: u32) -> Result { - let task = current_task(); + let task = current_task_shared(); let file = task .fd_table .lock_save_irq() diff --git a/src/fs/mod.rs b/src/fs/mod.rs index c6e0f459..983abfac 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -1,22 +1,23 @@ -use alloc::borrow::ToOwned; -use alloc::boxed::Box; -use alloc::{collections::btree_map::BTreeMap, sync::Arc}; +use crate::{ + drivers::{DM, Driver}, + process::Task, + sync::SpinLock, +}; +use alloc::{borrow::ToOwned, boxed::Box, collections::btree_map::BTreeMap, sync::Arc, vec::Vec}; use async_trait::async_trait; use core::sync::atomic::{AtomicU64, Ordering}; use dir::DirFile; -use libkernel::error::{FsError, KernelError, Result}; -use libkernel::fs::attr::FilePermissions; -use libkernel::fs::path::Path; -use libkernel::fs::{BlockDevice, FS_ID_START, FileType, Filesystem, Inode, InodeId, OpenFlags}; -use libkernel::proc::caps::CapabilitiesFlags; +use libkernel::{ + error::{FsError, KernelError, Result}, + fs::{ + BlockDevice, FS_ID_START, FileType, Filesystem, Inode, InodeId, OpenFlags, + attr::FilePermissions, path::Path, + }, + proc::caps::CapabilitiesFlags, +}; use open_file::OpenFile; use reg::RegFile; -use crate::drivers::{DM, Driver}; -use crate::process::Task; -use crate::sync::SpinLock; -use alloc::vec::Vec; - pub mod dir; pub mod fops; pub mod open_file; @@ -182,7 +183,7 @@ impl VFS { &self, path: &Path, root: Arc, - task: Arc, + task: &Arc, ) -> Result> { let root = if path.is_absolute() { task.root.lock_save_irq().0.clone() // use the task's root inode, in case a custom chroot was set @@ -199,7 +200,7 @@ impl VFS { &self, path: &Path, root: Arc, - task: Arc, + task: &Arc, ) -> Result> { let root = if path.is_absolute() { task.root.lock_save_irq().0.clone() @@ -306,10 +307,10 @@ impl VFS { flags: OpenFlags, root: Arc, mode: FilePermissions, - task: Arc, + task: &Arc, ) -> Result> { // Attempt to resolve the full path first. - let resolve_result = self.resolve_path(path, root.clone(), task.clone()).await; + let resolve_result = self.resolve_path(path, root.clone(), task).await; let target_inode = match resolve_result { // The file/directory exists. @@ -413,10 +414,10 @@ impl VFS { path: &Path, root: Arc, mode: FilePermissions, - task: Arc, + task: &Arc, ) -> Result<()> { // Try to resolve the target directory first. - match self.resolve_path(path, root.clone(), task.clone()).await { + match self.resolve_path(path, root.clone(), task).await { // The path already exists, this is an error. Ok(_) => Err(FsError::AlreadyExists.into()), @@ -457,12 +458,10 @@ impl VFS { path: &Path, root: Arc, remove_dir: bool, - task: Arc, + task: &Arc, ) -> Result<()> { // First, resolve the target inode so we can inspect its type. - let target_inode = self - .resolve_path_nofollow(path, root.clone(), task.clone()) - .await?; + let target_inode = self.resolve_path_nofollow(path, root.clone(), task).await?; let attr = target_inode.getattr().await?; @@ -480,8 +479,7 @@ impl VFS { // Determine the parent directory inode in which to perform the unlink. let parent_inode = if let Some(parent_path) = path.parent() { - self.resolve_path(parent_path, root.clone(), task.clone()) - .await? + self.resolve_path(parent_path, root.clone(), task).await? } else { root.clone() }; @@ -527,9 +525,9 @@ impl VFS { target: &Path, link: &Path, root: Arc, - task: Arc, + task: &Arc, ) -> Result<()> { - match self.resolve_path(link, root.clone(), task.clone()).await { + match self.resolve_path(link, root.clone(), task).await { Ok(_) => Err(FsError::AlreadyExists.into()), Err(KernelError::Fs(FsError::NotFound)) => { let name = link.file_name().ok_or(FsError::InvalidInput)?; diff --git a/src/fs/pipe.rs b/src/fs/pipe.rs index a0ed5610..536fecea 100644 --- a/src/fs/pipe.rs +++ b/src/fs/pipe.rs @@ -2,7 +2,7 @@ use crate::{ kernel::kpipe::KPipe, memory::uaccess::copy_to_user, process::{fd_table::Fd, thread_group::signal::SigId}, - sched::current_task, + sched::current::current_task, sync::CondVar, }; use alloc::{boxed::Box, sync::Arc}; diff --git a/src/fs/syscalls/at/access.rs b/src/fs/syscalls/at/access.rs index 0d74023a..81da295f 100644 --- a/src/fs/syscalls/at/access.rs +++ b/src/fs/syscalls/at/access.rs @@ -1,7 +1,7 @@ use super::{AtFlags, resolve_at_start_node}; use crate::{ fs::syscalls::at::resolve_path_flags, memory::uaccess::cstr::UserCStr, process::fd_table::Fd, - sched::current_task, + sched::current::current_task_shared, }; use core::ffi::c_char; use libkernel::{ @@ -17,12 +17,12 @@ pub async fn sys_faccessat(dirfd: Fd, path: TUA, mode: i32) -> Result, mode: i32, flags: i32) -> Result { let mut buf = [0; 1024]; - let task = current_task(); + let task = current_task_shared(); let access_mode = AccessMode::from_bits_retain(mode); let path = Path::new(UserCStr::from_ptr(path).copy_from_user(&mut buf).await?); let at_flags = AtFlags::from_bits_retain(flags); let start_node = resolve_at_start_node(dirfd, path, at_flags).await?; - let node = resolve_path_flags(dirfd, path, start_node, task.clone(), at_flags).await?; + let node = resolve_path_flags(dirfd, path, start_node, &task, at_flags).await?; // If mode is F_OK (value 0), the check is for the file's existence. // Reaching this point means we found the file, so we can return success. diff --git a/src/fs/syscalls/at/chmod.rs b/src/fs/syscalls/at/chmod.rs index 931e0764..b5c00c3d 100644 --- a/src/fs/syscalls/at/chmod.rs +++ b/src/fs/syscalls/at/chmod.rs @@ -12,7 +12,7 @@ use crate::{ fs::syscalls::at::{AtFlags, resolve_at_start_node, resolve_path_flags}, memory::uaccess::cstr::UserCStr, process::{Task, fd_table::Fd}, - sched::current_task, + sched::current::current_task_shared, }; pub fn can_chmod(task: Arc, uid: Uid) -> bool { @@ -25,12 +25,12 @@ pub async fn sys_fchmodat(dirfd: Fd, path: TUA, mode: u16, flags: i32) - let mut buf = [0; 1024]; - let task = current_task(); + let task = current_task_shared(); let path = Path::new(UserCStr::from_ptr(path).copy_from_user(&mut buf).await?); let start_node = resolve_at_start_node(dirfd, path, flags).await?; let mode = FilePermissions::from_bits_retain(mode); - let node = resolve_path_flags(dirfd, path, start_node, task.clone(), flags).await?; + let node = resolve_path_flags(dirfd, path, start_node, &task, flags).await?; let mut attr = node.getattr().await?; if !can_chmod(task, attr.uid) { diff --git a/src/fs/syscalls/at/chown.rs b/src/fs/syscalls/at/chown.rs index 5a331a15..ccf94b49 100644 --- a/src/fs/syscalls/at/chown.rs +++ b/src/fs/syscalls/at/chown.rs @@ -14,7 +14,7 @@ use crate::{ fs::syscalls::at::{AtFlags, resolve_at_start_node, resolve_path_flags}, memory::uaccess::cstr::UserCStr, process::fd_table::Fd, - sched::current_task, + sched::current::current_task_shared, }; pub async fn sys_fchownat( @@ -26,12 +26,12 @@ pub async fn sys_fchownat( ) -> Result { let mut buf = [0; 1024]; - let task = current_task(); + let task = current_task_shared(); let flags = AtFlags::from_bits_retain(flags); let path = Path::new(UserCStr::from_ptr(path).copy_from_user(&mut buf).await?); let start_node = resolve_at_start_node(dirfd, path, flags).await?; - let node = resolve_path_flags(dirfd, path, start_node, task.clone(), flags).await?; + let node = resolve_path_flags(dirfd, path, start_node, &task, flags).await?; let mut attr = node.getattr().await?; { diff --git a/src/fs/syscalls/at/link.rs b/src/fs/syscalls/at/link.rs index f22f54d3..eb87edb0 100644 --- a/src/fs/syscalls/at/link.rs +++ b/src/fs/syscalls/at/link.rs @@ -14,7 +14,7 @@ use crate::{ }, memory::uaccess::cstr::UserCStr, process::fd_table::Fd, - sched::current_task, + sched::current::current_task_shared, }; pub async fn sys_linkat( @@ -27,7 +27,7 @@ pub async fn sys_linkat( let mut buf = [0; 1024]; let mut buf2 = [0; 1024]; - let task = current_task(); + let task = current_task_shared(); let mut flags = AtFlags::from_bits_retain(flags); // following symlinks is implied for any other syscall. @@ -60,14 +60,8 @@ pub async fn sys_linkat( let old_start_node = resolve_at_start_node(old_dirfd, old_path, flags).await?; let new_start_node = resolve_at_start_node(new_dirfd, new_path, flags).await?; - let target_inode = resolve_path_flags( - old_dirfd, - old_path, - old_start_node.clone(), - task.clone(), - flags, - ) - .await?; + let target_inode = + resolve_path_flags(old_dirfd, old_path, old_start_node.clone(), &task, flags).await?; let attr = target_inode.getattr().await?; @@ -77,7 +71,7 @@ pub async fn sys_linkat( // newpath does not follow flags, and doesnt follow symlinks either if VFS - .resolve_path_nofollow(new_path, new_start_node.clone(), task.clone()) + .resolve_path_nofollow(new_path, new_start_node.clone(), &task) .await .is_ok() { @@ -86,7 +80,7 @@ pub async fn sys_linkat( // parent newpath should follow symlinks though let parent_inode = if let Some(parent) = new_path.parent() { - VFS.resolve_path(parent, new_start_node, task).await? + VFS.resolve_path(parent, new_start_node, &task).await? } else { new_start_node }; diff --git a/src/fs/syscalls/at/mkdir.rs b/src/fs/syscalls/at/mkdir.rs index 8cc3c38c..9afa3961 100644 --- a/src/fs/syscalls/at/mkdir.rs +++ b/src/fs/syscalls/at/mkdir.rs @@ -1,8 +1,8 @@ -use crate::current_task; use crate::fs::VFS; use crate::fs::syscalls::at::{AtFlags, resolve_at_start_node}; use crate::memory::uaccess::cstr::UserCStr; use crate::process::fd_table::Fd; +use crate::sched::current::current_task_shared; use core::ffi::c_char; use libkernel::fs::attr::FilePermissions; use libkernel::fs::path::Path; @@ -15,11 +15,11 @@ pub async fn sys_mkdirat( ) -> libkernel::error::Result { let mut buf = [0; 1024]; - let task = current_task(); + let task = current_task_shared(); let path = Path::new(UserCStr::from_ptr(path).copy_from_user(&mut buf).await?); let start_node = resolve_at_start_node(dirfd, path, AtFlags::empty()).await?; let mode = FilePermissions::from_bits_retain(mode); - VFS.mkdir(path, start_node, mode, task.clone()).await?; + VFS.mkdir(path, start_node, mode, &task).await?; Ok(0) } diff --git a/src/fs/syscalls/at/mod.rs b/src/fs/syscalls/at/mod.rs index 5fc5e214..5fcc74d4 100644 --- a/src/fs/syscalls/at/mod.rs +++ b/src/fs/syscalls/at/mod.rs @@ -1,7 +1,7 @@ use crate::{ fs::{DummyInode, VFS}, process::{Task, fd_table::Fd}, - sched::current_task, + sched::current::current_task_shared, }; use alloc::sync::Arc; use libkernel::{ @@ -41,7 +41,7 @@ async fn resolve_at_start_node(dirfd: Fd, path: &Path, flags: AtFlags) -> Result // just return a dummy, since it'll operate on dirfd anyways return Ok(Arc::new(DummyInode {})); } - let task = current_task(); + let task = current_task_shared(); let start_node: Arc = if path.is_absolute() { // Absolute path ignores dirfd. @@ -73,7 +73,7 @@ async fn resolve_path_flags( dirfd: Fd, path: &Path, root: Arc, - task: Arc, + task: &Arc, flags: AtFlags, ) -> Result> { // simply return the inode that dirfd refers to diff --git a/src/fs/syscalls/at/open.rs b/src/fs/syscalls/at/open.rs index 0674ac28..feb92e26 100644 --- a/src/fs/syscalls/at/open.rs +++ b/src/fs/syscalls/at/open.rs @@ -2,7 +2,7 @@ use crate::{ fs::{VFS, syscalls::at::AtFlags}, memory::uaccess::cstr::UserCStr, process::fd_table::Fd, - sched::current_task, + sched::current::current_task_shared, }; use core::ffi::c_char; use libkernel::{ @@ -16,15 +16,13 @@ use super::resolve_at_start_node; pub async fn sys_openat(dirfd: Fd, path: TUA, flags: u32, mode: u16) -> Result { let mut buf = [0; 1024]; - let task = current_task(); + let task = current_task_shared(); let flags = OpenFlags::from_bits_truncate(flags); let path = Path::new(UserCStr::from_ptr(path).copy_from_user(&mut buf).await?); let start_node = resolve_at_start_node(dirfd, path, AtFlags::empty()).await?; let mode = FilePermissions::from_bits_retain(mode); - let file = VFS - .open(path, flags, start_node, mode, task.clone()) - .await?; + let file = VFS.open(path, flags, start_node, mode, &task).await?; let fd = task.fd_table.lock_save_irq().insert(file)?; diff --git a/src/fs/syscalls/at/readlink.rs b/src/fs/syscalls/at/readlink.rs index d213c76f..53d0d522 100644 --- a/src/fs/syscalls/at/readlink.rs +++ b/src/fs/syscalls/at/readlink.rs @@ -5,7 +5,7 @@ use crate::{ }, memory::uaccess::{copy_to_user_slice, cstr::UserCStr}, process::fd_table::Fd, - sched::current_task, + sched::current::current_task_shared, }; use core::{cmp::min, ffi::c_char}; use libkernel::{ @@ -17,7 +17,7 @@ use libkernel::{ pub async fn sys_readlinkat(dirfd: Fd, path: TUA, buf: UA, size: usize) -> Result { let mut path_buf = [0; 1024]; - let task = current_task(); + let task = current_task_shared(); let path = Path::new( UserCStr::from_ptr(path) .copy_from_user(&mut path_buf) @@ -28,8 +28,7 @@ pub async fn sys_readlinkat(dirfd: Fd, path: TUA, buf: UA, size: usize) let name = path.file_name().ok_or(FsError::InvalidInput)?; let parent = if let Some(p) = path.parent() { - VFS.resolve_path_nofollow(p, start.clone(), task.clone()) - .await? + VFS.resolve_path_nofollow(p, start.clone(), &task).await? } else { start }; diff --git a/src/fs/syscalls/at/rename.rs b/src/fs/syscalls/at/rename.rs index 0a191734..b6237e58 100644 --- a/src/fs/syscalls/at/rename.rs +++ b/src/fs/syscalls/at/rename.rs @@ -14,7 +14,7 @@ use crate::{ }, memory::uaccess::cstr::UserCStr, process::fd_table::Fd, - sched::current_task, + sched::current::current_task_shared, }; // from linux/fcntl.h @@ -53,7 +53,8 @@ pub async fn sys_renameat2( let mut buf = [0; 1024]; let mut buf2 = [0; 1024]; - let task = current_task(); + let task = current_task_shared(); + let old_path = Path::new( UserCStr::from_ptr(old_path) .copy_from_user(&mut buf) @@ -71,14 +72,14 @@ pub async fn sys_renameat2( let new_start_node = resolve_at_start_node(new_dirfd, new_path, AtFlags::empty()).await?; let old_parent_inode = if let Some(parent_path) = old_path.parent() { - VFS.resolve_path(parent_path, old_start_node.clone(), task.clone()) + VFS.resolve_path(parent_path, old_start_node.clone(), &task) .await? } else { old_start_node.clone() }; let new_parent_inode = if let Some(parent_path) = new_path.parent() { - VFS.resolve_path(parent_path, new_start_node.clone(), task.clone()) + VFS.resolve_path(parent_path, new_start_node.clone(), &task) .await? } else { new_start_node.clone() diff --git a/src/fs/syscalls/at/stat.rs b/src/fs/syscalls/at/stat.rs index bde5298f..77ae9114 100644 --- a/src/fs/syscalls/at/stat.rs +++ b/src/fs/syscalls/at/stat.rs @@ -1,8 +1,8 @@ use crate::{ - current_task, fs::syscalls::at::{resolve_at_start_node, resolve_path_flags}, memory::uaccess::{UserCopyable, copy_to_user, cstr::UserCStr}, process::fd_table::Fd, + sched::current::current_task_shared, }; use core::ffi::c_char; use libkernel::{ @@ -75,7 +75,7 @@ pub async fn sys_newfstatat( ) -> Result { let mut buf = [0; 1024]; - let task = current_task(); + let task = current_task_shared(); let flags = AtFlags::from_bits_truncate(flags); let path = Path::new(UserCStr::from_ptr(path).copy_from_user(&mut buf).await?); @@ -84,7 +84,7 @@ pub async fn sys_newfstatat( Err(err) if err != KernelError::NotSupported => panic!("{err}"), Err(err) => return Err(err), }; - let node = resolve_path_flags(dirfd, path, start_node, task.clone(), flags).await?; + let node = resolve_path_flags(dirfd, path, start_node, &task, flags).await?; let attr = node.getattr().await?; diff --git a/src/fs/syscalls/at/statx.rs b/src/fs/syscalls/at/statx.rs index e233b2ce..ed736ee9 100644 --- a/src/fs/syscalls/at/statx.rs +++ b/src/fs/syscalls/at/statx.rs @@ -1,11 +1,11 @@ use crate::{ - current_task, fs::{ VFS, syscalls::at::{resolve_at_start_node, resolve_path_flags}, }, memory::uaccess::{UserCopyable, copy_to_user, cstr::UserCStr}, process::fd_table::Fd, + sched::current::current_task_shared, }; use core::{ffi::c_char, time::Duration}; use libkernel::{error::Result, fs::path::Path, memory::address::TUA}; @@ -124,13 +124,13 @@ pub async fn sys_statx( ) -> Result { let mut buf = [0; 1024]; - let task = current_task(); + let task = current_task_shared(); let flags = AtFlags::from_bits_truncate(flags); let mask = StatXMask::from_bits_truncate(mask); let path = Path::new(UserCStr::from_ptr(path).copy_from_user(&mut buf).await?); let start_node = resolve_at_start_node(dirfd, path, flags).await?; - let node = resolve_path_flags(dirfd, path, start_node, task.clone(), flags).await?; + let node = resolve_path_flags(dirfd, path, start_node, &task, flags).await?; let attr = node.getattr().await?; diff --git a/src/fs/syscalls/at/symlink.rs b/src/fs/syscalls/at/symlink.rs index d9aa33e8..635b5e8f 100644 --- a/src/fs/syscalls/at/symlink.rs +++ b/src/fs/syscalls/at/symlink.rs @@ -9,7 +9,7 @@ use crate::{ }, memory::uaccess::cstr::UserCStr, process::fd_table::Fd, - sched::current_task, + sched::current::current_task_shared, }; pub async fn sys_symlinkat( @@ -20,7 +20,7 @@ pub async fn sys_symlinkat( let mut buf = [0; 1024]; let mut buf2 = [0; 1024]; - let task = current_task(); + let task = current_task_shared(); let source = Path::new( UserCStr::from_ptr(old_name) .copy_from_user(&mut buf) @@ -33,7 +33,7 @@ pub async fn sys_symlinkat( ); let start_node = resolve_at_start_node(new_dirfd, target, AtFlags::empty()).await?; - VFS.symlink(source, target, start_node, task).await?; + VFS.symlink(source, target, start_node, &task).await?; Ok(0) } diff --git a/src/fs/syscalls/at/unlink.rs b/src/fs/syscalls/at/unlink.rs index 658f5cb0..a4315b21 100644 --- a/src/fs/syscalls/at/unlink.rs +++ b/src/fs/syscalls/at/unlink.rs @@ -3,13 +3,13 @@ use core::ffi::c_char; use libkernel::{error::Result, fs::path::Path, memory::address::TUA}; use crate::{ - current_task, fs::{ VFS, syscalls::at::{AtFlags, resolve_at_start_node}, }, memory::uaccess::cstr::UserCStr, process::fd_table::Fd, + sched::current::current_task_shared, }; // As defined in linux/fcntl.h ─ enables directory removal via unlinkat. @@ -25,7 +25,7 @@ pub async fn sys_unlinkat(dirfd: Fd, path: TUA, flags: u32) -> Result, flags: u32) -> Result, flags: i32, ) -> Result { - let task = current_task(); + let task = current_task_shared(); // linux specifically uses NULL path to indicate futimens, see utimensat(2) let node = if path.is_null() { @@ -45,7 +46,7 @@ pub async fn sys_utimensat( let flags = AtFlags::from_bits_retain(flags); let start_node = resolve_at_start_node(dirfd, path, flags).await?; - resolve_path_flags(dirfd, path, start_node, task.clone(), flags).await? + resolve_path_flags(dirfd, path, start_node, &task, flags).await? }; let mut attr = node.getattr().await?; diff --git a/src/fs/syscalls/chdir.rs b/src/fs/syscalls/chdir.rs index 6707f120..9f33369a 100644 --- a/src/fs/syscalls/chdir.rs +++ b/src/fs/syscalls/chdir.rs @@ -2,7 +2,7 @@ use crate::{ fs::VFS, memory::uaccess::{copy_to_user_slice, cstr::UserCStr}, process::fd_table::Fd, - sched::current_task, + sched::current::current_task_shared, }; use alloc::{borrow::ToOwned, ffi::CString, string::ToString}; use core::{ffi::c_char, str::FromStr}; @@ -14,7 +14,7 @@ use libkernel::{ }; pub async fn sys_getcwd(buf: UA, len: usize) -> Result { - let task = current_task(); + let task = current_task_shared(); let path = task.cwd.lock_save_irq().1.as_str().to_string(); let cstr = CString::from_str(&path).map_err(|_| KernelError::InvalidValue)?; let slice = cstr.as_bytes_with_nul(); @@ -32,11 +32,11 @@ pub async fn sys_chdir(path: TUA) -> Result { let mut buf = [0; 1024]; let path = Path::new(UserCStr::from_ptr(path).copy_from_user(&mut buf).await?); - let task = current_task(); + let task = current_task_shared(); let current_path = task.cwd.lock_save_irq().0.clone(); let new_path = task.cwd.lock_save_irq().1.join(path); - let node = VFS.resolve_path(path, current_path, task.clone()).await?; + let node = VFS.resolve_path(path, current_path, &task).await?; *task.cwd.lock_save_irq() = (node, new_path); @@ -44,7 +44,7 @@ pub async fn sys_chdir(path: TUA) -> Result { } pub async fn sys_chroot(path: TUA) -> Result { - let task = current_task(); + let task = current_task_shared(); task.creds .lock_save_irq() .caps() @@ -56,7 +56,7 @@ pub async fn sys_chroot(path: TUA) -> Result { let current_path = task.root.lock_save_irq().0.clone(); let new_path = task.root.lock_save_irq().1.join(path); - let node = VFS.resolve_path(path, current_path, task.clone()).await?; + let node = VFS.resolve_path(path, current_path, &task).await?; *task.root.lock_save_irq() = (node, new_path); @@ -64,7 +64,7 @@ pub async fn sys_chroot(path: TUA) -> Result { } pub async fn sys_fchdir(fd: Fd) -> Result { - let task = current_task(); + let task = current_task_shared(); let file = task .fd_table .lock_save_irq() diff --git a/src/fs/syscalls/chmod.rs b/src/fs/syscalls/chmod.rs index 8ebb2bca..713dd7ea 100644 --- a/src/fs/syscalls/chmod.rs +++ b/src/fs/syscalls/chmod.rs @@ -4,10 +4,10 @@ use libkernel::{ fs::attr::FilePermissions, }; -use crate::{process::fd_table::Fd, sched::current_task}; +use crate::{process::fd_table::Fd, sched::current::current_task_shared}; pub async fn sys_fchmod(fd: Fd, mode: u16) -> Result { - let task = current_task(); + let task = current_task_shared(); let file = task .fd_table .lock_save_irq() diff --git a/src/fs/syscalls/chown.rs b/src/fs/syscalls/chown.rs index 51d22eec..d52d482c 100644 --- a/src/fs/syscalls/chown.rs +++ b/src/fs/syscalls/chown.rs @@ -6,10 +6,10 @@ use libkernel::{ }, }; -use crate::{process::fd_table::Fd, sched::current_task}; +use crate::{process::fd_table::Fd, sched::current::current_task_shared}; pub async fn sys_fchown(fd: Fd, owner: i32, group: i32) -> Result { - let task = current_task(); + let task = current_task_shared(); let file = task .fd_table .lock_save_irq() diff --git a/src/fs/syscalls/close.rs b/src/fs/syscalls/close.rs index b9617f38..047d34db 100644 --- a/src/fs/syscalls/close.rs +++ b/src/fs/syscalls/close.rs @@ -1,4 +1,4 @@ -use crate::{process::fd_table::Fd, sched::current_task}; +use crate::{process::fd_table::Fd, sched::current::current_task}; use alloc::sync::Arc; use libkernel::error::{KernelError, Result}; diff --git a/src/fs/syscalls/ioctl.rs b/src/fs/syscalls/ioctl.rs index 8d91ac4f..d52aa1dc 100644 --- a/src/fs/syscalls/ioctl.rs +++ b/src/fs/syscalls/ioctl.rs @@ -1,4 +1,4 @@ -use crate::{process::fd_table::Fd, sched::current_task}; +use crate::{process::fd_table::Fd, sched::current::current_task}; use libkernel::error::{KernelError, Result}; pub async fn sys_ioctl(fd: Fd, request: usize, arg: usize) -> Result { diff --git a/src/fs/syscalls/iov.rs b/src/fs/syscalls/iov.rs index 0a334c30..491eb2ba 100644 --- a/src/fs/syscalls/iov.rs +++ b/src/fs/syscalls/iov.rs @@ -1,7 +1,7 @@ use crate::{ memory::uaccess::{UserCopyable, copy_obj_array_from_user}, process::fd_table::Fd, - sched::current_task, + sched::current::current_task, }; use libkernel::{ error::{KernelError, Result}, diff --git a/src/fs/syscalls/rw.rs b/src/fs/syscalls/rw.rs index 960bbe6f..7839beff 100644 --- a/src/fs/syscalls/rw.rs +++ b/src/fs/syscalls/rw.rs @@ -1,4 +1,4 @@ -use crate::{process::fd_table::Fd, sched::current_task}; +use crate::{process::fd_table::Fd, sched::current::current_task}; use libkernel::{ error::{KernelError, Result}, memory::address::UA, diff --git a/src/fs/syscalls/seek.rs b/src/fs/syscalls/seek.rs index ef8fd4a7..7bea05e6 100644 --- a/src/fs/syscalls/seek.rs +++ b/src/fs/syscalls/seek.rs @@ -1,4 +1,4 @@ -use crate::{process::fd_table::Fd, sched::current_task}; +use crate::{process::fd_table::Fd, sched::current::current_task}; use libkernel::{ error::{KernelError, Result}, fs::SeekFrom, diff --git a/src/fs/syscalls/splice.rs b/src/fs/syscalls/splice.rs index 1e232658..151dbe90 100644 --- a/src/fs/syscalls/splice.rs +++ b/src/fs/syscalls/splice.rs @@ -1,4 +1,4 @@ -use crate::{kernel::kpipe::KPipe, process::fd_table::Fd, sched::current_task}; +use crate::{kernel::kpipe::KPipe, process::fd_table::Fd, sched::current::current_task}; use alloc::sync::Arc; use libkernel::{ error::{KernelError, Result}, diff --git a/src/fs/syscalls/stat.rs b/src/fs/syscalls/stat.rs index 595ac47b..deb21cf7 100644 --- a/src/fs/syscalls/stat.rs +++ b/src/fs/syscalls/stat.rs @@ -1,6 +1,6 @@ use super::at::stat::Stat; use crate::memory::uaccess::copy_to_user; -use crate::{process::fd_table::Fd, sched::current_task}; +use crate::{process::fd_table::Fd, sched::current::current_task}; use libkernel::error::Result; use libkernel::{error::KernelError, memory::address::TUA}; diff --git a/src/fs/syscalls/sync.rs b/src/fs/syscalls/sync.rs index cf257f4d..09fb08c8 100644 --- a/src/fs/syscalls/sync.rs +++ b/src/fs/syscalls/sync.rs @@ -1,6 +1,6 @@ use libkernel::error::{KernelError, Result}; -use crate::{fs::VFS, process::fd_table::Fd, sched::current_task}; +use crate::{fs::VFS, process::fd_table::Fd, sched::current::current_task_shared}; pub async fn sys_sync() -> Result { VFS.sync_all().await?; @@ -8,7 +8,7 @@ pub async fn sys_sync() -> Result { } pub async fn sys_syncfs(fd: Fd) -> Result { - let task = current_task(); + let task = current_task_shared(); let inode = task .fd_table @@ -23,7 +23,7 @@ pub async fn sys_syncfs(fd: Fd) -> Result { } pub async fn sys_fsync(fd: Fd) -> Result { - let task = current_task(); + let task = current_task_shared(); let inode = task .fd_table @@ -38,7 +38,7 @@ pub async fn sys_fsync(fd: Fd) -> Result { } pub async fn sys_fdatasync(fd: Fd) -> Result { - let task = current_task(); + let task = current_task_shared(); let inode = task .fd_table diff --git a/src/fs/syscalls/trunc.rs b/src/fs/syscalls/trunc.rs index 3bc97556..1bd9df01 100644 --- a/src/fs/syscalls/trunc.rs +++ b/src/fs/syscalls/trunc.rs @@ -1,6 +1,11 @@ use core::ffi::c_char; -use crate::{fs::VFS, memory::uaccess::cstr::UserCStr, process::fd_table::Fd, sched::current_task}; +use crate::{ + fs::VFS, + memory::uaccess::cstr::UserCStr, + process::fd_table::Fd, + sched::current::{current_task, current_task_shared}, +}; use libkernel::{ error::{KernelError, Result}, fs::{OpenFlags, attr::FilePermissions, path::Path}, @@ -10,7 +15,7 @@ use libkernel::{ pub async fn sys_truncate(path: TUA, new_size: usize) -> Result { let mut buf = [0; 1024]; - let task = current_task(); + let task = current_task_shared(); let path = Path::new(UserCStr::from_ptr(path).copy_from_user(&mut buf).await?); let root = task.root.lock_save_irq().0.clone(); @@ -20,7 +25,7 @@ pub async fn sys_truncate(path: TUA, new_size: usize) -> Result { OpenFlags::O_WRONLY, root, FilePermissions::empty(), - task, + &task, ) .await?; diff --git a/src/interrupts/cpu_messenger.rs b/src/interrupts/cpu_messenger.rs index 37437f95..c2e4d084 100644 --- a/src/interrupts/cpu_messenger.rs +++ b/src/interrupts/cpu_messenger.rs @@ -3,7 +3,7 @@ use super::{ ClaimedInterrupt, InterruptConfig, InterruptDescriptor, InterruptHandler, get_interrupt_root, }; -use crate::process::Task; +use crate::process::owned::OwnedTask; use crate::{ arch::ArchImpl, drivers::Driver, @@ -11,6 +11,7 @@ use crate::{ sched, sync::{OnceLock, SpinLock}, }; +use alloc::boxed::Box; use alloc::{sync::Arc, vec::Vec}; use libkernel::{ CpuOps, @@ -18,9 +19,8 @@ use libkernel::{ }; use log::{info, warn}; -#[derive(Clone)] pub enum Message { - PutTask(Arc), + PutTask(Box), Ping(u32), } diff --git a/src/kernel/cpu_id.rs b/src/kernel/cpu_id.rs new file mode 100644 index 00000000..86cbfc7d --- /dev/null +++ b/src/kernel/cpu_id.rs @@ -0,0 +1,20 @@ +use libkernel::CpuOps; + +use crate::arch::ArchImpl; + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub struct CpuId(usize); + +impl CpuId { + pub fn this() -> CpuId { + CpuId(ArchImpl::id()) + } + + pub fn from_value(id: usize) -> Self { + Self(id) + } + + pub fn value(&self) -> usize { + self.0 + } +} diff --git a/src/kernel/mod.rs b/src/kernel/mod.rs index 7796e659..175c7de1 100644 --- a/src/kernel/mod.rs +++ b/src/kernel/mod.rs @@ -1,3 +1,4 @@ +pub mod cpu_id; pub mod kpipe; pub mod power; pub mod rand; diff --git a/src/kernel/power.rs b/src/kernel/power.rs index 569ab60c..b1402bf2 100644 --- a/src/kernel/power.rs +++ b/src/kernel/power.rs @@ -1,11 +1,11 @@ -use crate::{ArchImpl, arch::Arch, sched::current_task}; +use crate::{ArchImpl, arch::Arch, sched::current::current_task_shared}; use libkernel::{ error::{KernelError, Result}, proc::caps::CapabilitiesFlags, }; pub async fn sys_reboot(magic: u32, magic2: u32, op: u32, _arg: usize) -> Result { - current_task() + current_task_shared() .creds .lock_save_irq() .caps() diff --git a/src/main.rs b/src/main.rs index 940cdf1f..1e7eed66 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,6 +2,7 @@ #![no_main] #![feature(used_with_arg)] #![feature(likely_unlikely)] +#![feature(box_as_ptr)] use alloc::{ boxed::Box, @@ -27,7 +28,9 @@ use libkernel::{ }; use log::{error, warn}; use process::ctx::UserCtx; -use sched::{current_task, sched_init, spawn_kernel_work, uspc_ret::dispatch_userspace_task}; +use sched::{ + current::current_task_shared, sched_init, spawn_kernel_work, uspc_ret::dispatch_userspace_task, +}; extern crate alloc; @@ -122,7 +125,7 @@ async fn launch_init(opts: KOptions) { .await .expect("Unable to find init"); - let task = current_task(); + let task = current_task_shared(); // Ensure that the exec() call applies to init. assert!(task.process.tgid.is_init()); @@ -138,7 +141,7 @@ async fn launch_init(opts: KOptions) { OpenFlags::O_RDWR, VFS.root_inode(), FilePermissions::empty(), - task.clone(), + &task, ) .await .expect("Could not open console for init process"); @@ -158,6 +161,8 @@ async fn launch_init(opts: KOptions) { .expect("Could not clone FD"); } + drop(task); + process::exec::kernel_exec(inode, vec![init.as_str().to_string()], vec![]) .await .expect("Could not launch init process"); diff --git a/src/memory/brk.rs b/src/memory/brk.rs index f94db23c..4c535eb0 100644 --- a/src/memory/brk.rs +++ b/src/memory/brk.rs @@ -2,7 +2,7 @@ use core::convert::Infallible; use libkernel::memory::address::VA; -use crate::sched::current_task; +use crate::sched::current::current_task; /// Handles the `brk` system call. /// diff --git a/src/memory/fault.rs b/src/memory/fault.rs index 785b50be..d501ff38 100644 --- a/src/memory/fault.rs +++ b/src/memory/fault.rs @@ -1,4 +1,4 @@ -use crate::{process::ProcVM, sched::current_task}; +use crate::{process::ProcVM, sched::current::current_task}; use alloc::boxed::Box; use libkernel::{ PageInfo, UserAddressSpace, diff --git a/src/memory/mmap.rs b/src/memory/mmap.rs index 7ad313e2..74c28095 100644 --- a/src/memory/mmap.rs +++ b/src/memory/mmap.rs @@ -1,6 +1,6 @@ use core::sync::atomic::{AtomicUsize, Ordering}; -use crate::{process::fd_table::Fd, sched::current_task}; +use crate::{process::fd_table::Fd, sched::current::current_task}; use libkernel::{ error::{KernelError, Result}, memory::{ diff --git a/src/process/caps.rs b/src/process/caps.rs index 893656a8..f5a1cdcc 100644 --- a/src/process/caps.rs +++ b/src/process/caps.rs @@ -1,9 +1,9 @@ use crate::{ - current_task, memory::uaccess::{ UserCopyable, copy_from_user, copy_obj_array_from_user, copy_objs_to_user, copy_to_user, }, process::TASK_LIST, + sched::current::current_task_shared, }; use libkernel::{ error::{KernelError, Result}, @@ -53,7 +53,7 @@ pub async fn sys_capget(hdrp: TUA, datap: TUA) -> Re let mut header = copy_from_user(hdrp).await?; let task = if header.pid == 0 { - current_task() + current_task_shared() } else { TASK_LIST .lock_save_irq() @@ -85,9 +85,9 @@ pub async fn sys_capget(hdrp: TUA, datap: TUA) -> Re pub async fn sys_capset(hdrp: TUA, datap: TUA) -> Result { let mut header = copy_from_user(hdrp).await?; - let caller_caps = current_task().creds.lock_save_irq().caps(); + let caller_caps = current_task_shared().creds.lock_save_irq().caps(); let task = if header.pid == 0 { - current_task() + current_task_shared() } else { caller_caps.check_capable(CapabilitiesFlags::CAP_SETPCAP)?; TASK_LIST diff --git a/src/process/clone.rs b/src/process/clone.rs index 795863b2..77b46ae1 100644 --- a/src/process/clone.rs +++ b/src/process/clone.rs @@ -1,11 +1,13 @@ +use super::owned::OwnedTask; use super::{ctx::Context, thread_group::signal::SigSet}; +use crate::kernel::cpu_id::CpuId; use crate::memory::uaccess::copy_to_user; -use crate::sched::CpuId; use crate::{ process::{TASK_LIST, Task, TaskState}, - sched::{self, current_task}, + sched::{self, current::current_task}, sync::SpinLock, }; +use alloc::boxed::Box; use bitflags::bitflags; use libkernel::memory::address::TUA; use libkernel::{ @@ -55,7 +57,7 @@ pub async fn sys_clone( let new_task = { let current_task = current_task(); - let mut user_ctx = *current_task.ctx.lock_save_irq().user(); + let mut user_ctx = *current_task.ctx.user(); // TODO: Make this arch indepdenant. The child returns '0' on clone. user_ctx.x[0] = 0; @@ -127,52 +129,47 @@ pub async fn sys_clone( let creds = current_task.creds.lock_save_irq().clone(); - let new_sigmask = *current_task.sig_mask.lock_save_irq(); - - Task { - tid, - comm: Arc::new(SpinLock::new(*current_task.comm.lock_save_irq())), - process: tg, - vm, - fd_table: files, - cwd, - root, - creds: SpinLock::new(creds), - ctx: SpinLock::new(Context::from_user_ctx(user_ctx)), + let new_sigmask = current_task.sig_mask; + + OwnedTask { + ctx: Context::from_user_ctx(user_ctx), priority: current_task.priority, - sig_mask: SpinLock::new(new_sigmask), - pending_signals: SpinLock::new(SigSet::empty()), - v_runtime: SpinLock::new(0), - v_eligible: SpinLock::new(0), - v_deadline: SpinLock::new(0), - exec_start: SpinLock::new(None), - deadline: SpinLock::new(*current_task.deadline.lock_save_irq()), - state: Arc::new(SpinLock::new(TaskState::Runnable)), - last_run: SpinLock::new(None), - robust_list: SpinLock::new(None), - child_tid_ptr: SpinLock::new(if !child_tidptr.is_null() { + sig_mask: new_sigmask, + pending_signals: SigSet::empty(), + robust_list: None, + child_tid_ptr: if !child_tidptr.is_null() { Some(child_tidptr) } else { None + }, + t_shared: Arc::new(Task { + tid, + comm: Arc::new(SpinLock::new(*current_task.comm.lock_save_irq())), + process: tg, + vm, + fd_table: files, + cwd, + root, + creds: SpinLock::new(creds), + state: Arc::new(SpinLock::new(TaskState::Runnable)), + last_cpu: SpinLock::new(CpuId::this()), }), - last_cpu: SpinLock::new(CpuId::this()), } }; let tid = new_task.tid; - let task = Arc::new(new_task); - TASK_LIST .lock_save_irq() - .insert(task.descriptor(), Arc::downgrade(&task)); + .insert(new_task.descriptor(), Arc::downgrade(&new_task.t_shared)); - sched::insert_task_cross_cpu(task.clone()); - - task.process + new_task + .process .tasks .lock_save_irq() - .insert(tid, Arc::downgrade(&task)); + .insert(tid, Arc::downgrade(&new_task.t_shared)); + + sched::insert_task_cross_cpu(Box::new(new_task)); // Honour CLONE_*SETTID semantics for the parent and (shared-VM) child. if flags.contains(CloneFlags::CLONE_PARENT_SETTID) && !parent_tidptr.is_null() { diff --git a/src/process/creds.rs b/src/process/creds.rs index 4680256d..6c4e88ad 100644 --- a/src/process/creds.rs +++ b/src/process/creds.rs @@ -2,7 +2,7 @@ use core::convert::Infallible; use crate::{ memory::uaccess::{UserCopyable, copy_to_user}, - sched::current_task, + sched::current::current_task, }; use libkernel::{ error::Result, @@ -100,8 +100,7 @@ pub fn sys_gettid() -> core::result::Result { } pub async fn sys_getresuid(ruid: TUA, euid: TUA, suid: TUA) -> Result { - let task = current_task(); - let creds = task.creds.lock_save_irq().clone(); + let creds = current_task().creds.lock_save_irq().clone(); copy_to_user(ruid, creds.uid).await?; copy_to_user(euid, creds.euid).await?; @@ -111,8 +110,7 @@ pub async fn sys_getresuid(ruid: TUA, euid: TUA, suid: TUA) -> Re } pub async fn sys_getresgid(rgid: TUA, egid: TUA, sgid: TUA) -> Result { - let task = current_task(); - let creds = task.creds.lock_save_irq().clone(); + let creds = current_task().creds.lock_save_irq().clone(); copy_to_user(rgid, creds.gid).await?; copy_to_user(egid, creds.egid).await?; diff --git a/src/process/exec.rs b/src/process/exec.rs index 0260ec58..551a0497 100644 --- a/src/process/exec.rs +++ b/src/process/exec.rs @@ -1,13 +1,15 @@ +use crate::ArchImpl; use crate::process::Comm; +use crate::sched::current::current_task_shared; use crate::{ - arch::{Arch, ArchImpl}, + arch::Arch, fs::VFS, memory::{ page::ClaimedPage, uaccess::{copy_from_user, cstr::UserCStr}, }, - process::{TaskState, ctx::Context, thread_group::signal::SignalState}, - sched::current_task, + process::{ctx::Context, thread_group::signal::SignalState}, + sched::current::current_task, }; use alloc::{string::String, vec}; use alloc::{string::ToString, sync::Arc, vec::Vec}; @@ -119,13 +121,13 @@ pub async fn kernel_exec( let new_comm = argv.first().map(|s| Comm::new(s.as_str())); - let current_task = current_task(); + let mut current_task = current_task(); if let Some(new_comm) = new_comm { *current_task.comm.lock_save_irq() = new_comm; } - *current_task.ctx.lock_save_irq() = Context::from_user_ctx(user_ctx); - *current_task.state.lock_save_irq() = TaskState::Runnable; + + current_task.ctx = Context::from_user_ctx(user_ctx); *current_task.vm.lock_save_irq() = vm; *current_task.process.signals.lock_save_irq() = SignalState::new_default(); @@ -248,6 +250,7 @@ pub async fn sys_execve( mut usr_argv: TUA>, mut usr_env: TUA>, ) -> Result { + let task = current_task_shared(); let mut buf = [0; 1024]; let mut argv = Vec::new(); let mut envp = Vec::new(); @@ -276,11 +279,8 @@ pub async fn sys_execve( usr_env = usr_env.add_objs(1); } - let task = current_task(); let path = Path::new(UserCStr::from_ptr(path).copy_from_user(&mut buf).await?); - let inode = VFS - .resolve_path(path, VFS.root_inode(), task.clone()) - .await?; + let inode = VFS.resolve_path(path, VFS.root_inode(), &task).await?; kernel_exec(inode, argv, envp).await?; diff --git a/src/process/exit.rs b/src/process/exit.rs index 9faf4bb3..bfe0a276 100644 --- a/src/process/exit.rs +++ b/src/process/exit.rs @@ -3,8 +3,8 @@ use super::{ thread_group::{ProcessState, Tgid, ThreadGroup, signal::SigId, wait::ChildState}, threading::futex::{self, key::FutexKey}, }; -use crate::memory::uaccess::copy_to_user; -use crate::sched::current_task; +use crate::sched::current::current_task; +use crate::{memory::uaccess::copy_to_user, sched::current::current_task_shared}; use alloc::vec::Vec; use libkernel::error::Result; use log::warn; @@ -102,10 +102,9 @@ pub fn sys_exit_group(exit_code: usize) -> Result { } pub async fn sys_exit(exit_code: usize) -> Result { - let task = current_task(); - // Honour CLONE_CHILD_CLEARTID: clear the user TID word and futex-wake any waiters. - let ptr = task.child_tid_ptr.lock_save_irq().take(); + let ptr = current_task().child_tid_ptr.take(); + if let Some(ptr) = ptr { copy_to_user(ptr, 0u32).await?; @@ -116,6 +115,7 @@ pub async fn sys_exit(exit_code: usize) -> Result { } } + let task = current_task_shared(); let process = Arc::clone(&task.process); let mut tasks_lock = process.tasks.lock_save_irq(); diff --git a/src/process/fd_table/dup.rs b/src/process/fd_table/dup.rs index 5f6ed78a..22b38fd2 100644 --- a/src/process/fd_table/dup.rs +++ b/src/process/fd_table/dup.rs @@ -1,4 +1,4 @@ -use crate::sched::current_task; +use crate::sched::current::current_task; use libkernel::{ error::{KernelError, Result}, fs::OpenFlags, diff --git a/src/process/fd_table/fcntl.rs b/src/process/fd_table/fcntl.rs index d67765e8..0ffe3f41 100644 --- a/src/process/fd_table/fcntl.rs +++ b/src/process/fd_table/fcntl.rs @@ -1,7 +1,7 @@ use bitflags::Flags; use libkernel::error::{KernelError, Result}; -use crate::{process::fd_table::FdFlags, sched::current_task}; +use crate::{process::fd_table::FdFlags, sched::current::current_task_shared}; use super::Fd; @@ -12,7 +12,7 @@ const F_GETFL: u32 = 3; // Get file status flags. const F_SETFL: u32 = 4; // Set file status flags. pub async fn sys_fcntl(fd: Fd, op: u32, arg: usize) -> Result { - let task = current_task(); + let task = current_task_shared(); match op { F_DUPFD => todo!(), diff --git a/src/process/fd_table/select.rs b/src/process/fd_table/select.rs index 997293b5..520c32f8 100644 --- a/src/process/fd_table/select.rs +++ b/src/process/fd_table/select.rs @@ -12,7 +12,7 @@ use crate::{ UserCopyable, copy_from_user, copy_obj_array_from_user, copy_objs_to_user, copy_to_user, }, process::thread_group::signal::SigSet, - sched::current_task, + sched::current::current_task_shared, }; use super::Fd; @@ -68,7 +68,7 @@ pub async fn sys_pselect6( timeout: TUA, _mask: TUA, ) -> Result { - let task = current_task(); + let task = current_task_shared(); let mut read_fd_set = copy_from_user(readfds).await?; @@ -168,7 +168,7 @@ pub async fn sys_ppoll( _sigmask: TUA, _sigset_len: usize, ) -> Result { - let task = current_task(); + let task = current_task_shared(); let mut poll_fds = copy_obj_array_from_user(ufds, nfds as _).await?; diff --git a/src/process/mod.rs b/src/process/mod.rs index 0eaee497..b21a71be 100644 --- a/src/process/mod.rs +++ b/src/process/mod.rs @@ -1,33 +1,14 @@ -use crate::drivers::timer::Instant; -use crate::process::threading::RobustListHead; -use crate::sched::CpuId; -use crate::{ - arch::{Arch, ArchImpl}, - fs::DummyInode, - sync::SpinLock, -}; +use crate::{arch::ArchImpl, kernel::cpu_id::CpuId, sync::SpinLock}; use alloc::{ collections::btree_map::BTreeMap, sync::{Arc, Weak}, }; use core::fmt::Display; use creds::Credentials; -use ctx::{Context, UserCtx}; use fd_table::FileDescriptorTable; -use libkernel::memory::address::TUA; use libkernel::{VirtualMemory, fs::Inode}; -use libkernel::{ - fs::pathbuf::PathBuf, - memory::{ - address::VA, - proc_vm::{ProcessVM, vmarea::VMArea}, - }, -}; -use thread_group::{ - Tgid, ThreadGroup, - builder::ThreadGroupBuilder, - signal::{SigId, SigSet, SignalState}, -}; +use libkernel::{fs::pathbuf::PathBuf, memory::proc_vm::ProcessVM}; +use thread_group::{Tgid, ThreadGroup}; pub mod caps; pub mod clone; @@ -36,6 +17,7 @@ pub mod ctx; pub mod exec; pub mod exit; pub mod fd_table; +pub mod owned; pub mod sleep; pub mod thread_group; pub mod threading; @@ -52,6 +34,10 @@ impl Tid { pub fn from_tgid(tgid: Tgid) -> Self { Self(tgid.0) } + + fn idle_for_cpu() -> Tid { + Self(CpuId::this().value() as _) + } } /// A unqiue identifier for any task in the current system. @@ -166,11 +152,6 @@ impl Comm { } } -/// Scheduler base weight to ensure tasks always have a strictly positive -/// scheduling weight. The value is added to a task's priority to obtain its -/// effective weight (`w_i` in EEVDF paper). -pub const SCHED_WEIGHT_BASE: i32 = 1024; - pub struct Task { pub tid: Tid, pub comm: Arc>, @@ -180,114 +161,15 @@ pub struct Task { pub root: Arc, PathBuf)>>, pub creds: SpinLock, pub fd_table: Arc>, - pub ctx: SpinLock, - pub sig_mask: SpinLock, - pub pending_signals: SpinLock, - pub v_runtime: SpinLock, - /// Virtual time at which the task becomes eligible (v_ei). - pub v_eligible: SpinLock, - /// Virtual deadline (v_di) used by the EEVDF scheduler. - pub v_deadline: SpinLock, - pub exec_start: SpinLock>, - pub deadline: SpinLock>, - pub priority: i8, - pub last_run: SpinLock>, pub state: Arc>, - pub robust_list: SpinLock>>, - pub child_tid_ptr: SpinLock>>, pub last_cpu: SpinLock, } impl Task { - pub fn create_idle_task( - addr_space: ::ProcessAddressSpace, - user_ctx: UserCtx, - code_map: VMArea, - ) -> Self { - // SAFETY: The code page will have been mapped corresponding to the VMA. - let vm = unsafe { ProcessVM::from_vma_and_address_space(code_map, addr_space) }; - - let thread_group_builder = ThreadGroupBuilder::new(Tgid::idle()) - .with_sigstate(Arc::new(SpinLock::new(SignalState::new_ignore()))); - - Self { - tid: Tid(0), - comm: Arc::new(SpinLock::new(Comm::new("idle"))), - process: thread_group_builder.build(), - state: Arc::new(SpinLock::new(TaskState::Runnable)), - priority: i8::MIN, - cwd: Arc::new(SpinLock::new((Arc::new(DummyInode {}), PathBuf::new()))), - root: Arc::new(SpinLock::new((Arc::new(DummyInode {}), PathBuf::new()))), - creds: SpinLock::new(Credentials::new_root()), - ctx: SpinLock::new(Context::from_user_ctx(user_ctx)), - vm: Arc::new(SpinLock::new(vm)), - sig_mask: SpinLock::new(SigSet::empty()), - pending_signals: SpinLock::new(SigSet::empty()), - v_runtime: SpinLock::new(0), - v_eligible: SpinLock::new(0), - v_deadline: SpinLock::new(0), - exec_start: SpinLock::new(None), - deadline: SpinLock::new(None), - fd_table: Arc::new(SpinLock::new(FileDescriptorTable::new())), - last_run: SpinLock::new(None), - robust_list: SpinLock::new(None), - child_tid_ptr: SpinLock::new(None), - last_cpu: SpinLock::new(CpuId::this()), - } - } - - pub fn create_init_task() -> Self { - Self { - tid: Tid(1), - comm: Arc::new(SpinLock::new(Comm::new("init"))), - process: ThreadGroupBuilder::new(Tgid::init()).build(), - state: Arc::new(SpinLock::new(TaskState::Runnable)), - cwd: Arc::new(SpinLock::new((Arc::new(DummyInode {}), PathBuf::new()))), - root: Arc::new(SpinLock::new((Arc::new(DummyInode {}), PathBuf::new()))), - creds: SpinLock::new(Credentials::new_root()), - vm: Arc::new(SpinLock::new( - ProcessVM::empty().expect("Could not create init process's VM"), - )), - fd_table: Arc::new(SpinLock::new(FileDescriptorTable::new())), - pending_signals: SpinLock::new(SigSet::empty()), - v_runtime: SpinLock::new(0), - v_eligible: SpinLock::new(0), - v_deadline: SpinLock::new(0), - exec_start: SpinLock::new(None), - deadline: SpinLock::new(None), - sig_mask: SpinLock::new(SigSet::empty()), - priority: 0, - ctx: SpinLock::new(Context::from_user_ctx( - ::new_user_context(VA::null(), VA::null()), - )), - last_run: SpinLock::new(None), - robust_list: SpinLock::new(None), - child_tid_ptr: SpinLock::new(None), - last_cpu: SpinLock::new(CpuId::this()), - } - } - pub fn is_idle_task(&self) -> bool { self.process.tgid.is_idle() } - pub fn priority(&self) -> i8 { - self.priority - } - - /// Compute this task's scheduling weight. - /// - /// weight = priority + SCHED_WEIGHT_BASE - /// The sum is clamped to a minimum of 1 - pub fn weight(&self) -> u32 { - let w = self.priority as i32 + SCHED_WEIGHT_BASE; - if w <= 0 { 1 } else { w as u32 } - } - - pub fn set_priority(&mut self, priority: i8) { - self.priority = priority; - } - pub fn pgid(&self) -> Tgid { self.process.tgid } @@ -301,10 +183,6 @@ impl Task { pub fn descriptor(&self) -> TaskDescriptor { TaskDescriptor::from_tgid_tid(self.process.tgid, self.tid) } - - pub fn raise_task_signal(&self, signal: SigId) { - self.pending_signals.lock_save_irq().insert(signal.into()); - } } pub fn find_task_by_descriptor(descriptor: &TaskDescriptor) -> Option> { diff --git a/src/process/owned.rs b/src/process/owned.rs new file mode 100644 index 00000000..494a9423 --- /dev/null +++ b/src/process/owned.rs @@ -0,0 +1,131 @@ +use core::ops::Deref; + +use super::{ + Comm, Task, TaskState, Tid, + creds::Credentials, + ctx::{Context, UserCtx}, + fd_table::FileDescriptorTable, + thread_group::{ + Tgid, + builder::ThreadGroupBuilder, + signal::{SigId, SigSet, SignalState}, + }, + threading::RobustListHead, +}; +use crate::{ + arch::{Arch, ArchImpl}, + fs::DummyInode, + kernel::cpu_id::CpuId, + sync::SpinLock, +}; +use alloc::sync::Arc; +use libkernel::{ + VirtualMemory, + fs::pathbuf::PathBuf, + memory::{ + address::{TUA, VA}, + proc_vm::{ProcessVM, vmarea::VMArea}, + }, +}; + +/// Task state which is exclusively owned by this CPU/runqueue, it is not shared +/// between other tasks and can therefore be access lock-free. +pub struct OwnedTask { + pub ctx: Context, + pub sig_mask: SigSet, + pub pending_signals: SigSet, + pub priority: i8, + pub robust_list: Option>, + pub child_tid_ptr: Option>, + pub t_shared: Arc, +} + +unsafe impl Send for OwnedTask {} +unsafe impl Sync for OwnedTask {} + +impl Deref for OwnedTask { + type Target = Task; + + fn deref(&self) -> &Self::Target { + &self.t_shared + } +} + +impl OwnedTask { + pub fn create_idle_task( + addr_space: ::ProcessAddressSpace, + user_ctx: UserCtx, + code_map: VMArea, + ) -> Self { + // SAFETY: The code page will have been mapped corresponding to the VMA. + let vm = unsafe { ProcessVM::from_vma_and_address_space(code_map, addr_space) }; + + let thread_group_builder = ThreadGroupBuilder::new(Tgid::idle()) + .with_sigstate(Arc::new(SpinLock::new(SignalState::new_ignore()))); + + let task = Task { + tid: Tid::idle_for_cpu(), + comm: Arc::new(SpinLock::new(Comm::new("idle"))), + process: thread_group_builder.build(), + state: Arc::new(SpinLock::new(TaskState::Runnable)), + cwd: Arc::new(SpinLock::new((Arc::new(DummyInode {}), PathBuf::new()))), + root: Arc::new(SpinLock::new((Arc::new(DummyInode {}), PathBuf::new()))), + creds: SpinLock::new(Credentials::new_root()), + vm: Arc::new(SpinLock::new(vm)), + fd_table: Arc::new(SpinLock::new(FileDescriptorTable::new())), + last_cpu: SpinLock::new(CpuId::this()), + }; + + Self { + priority: i8::MIN, + ctx: Context::from_user_ctx(user_ctx), + sig_mask: SigSet::empty(), + pending_signals: SigSet::empty(), + robust_list: None, + child_tid_ptr: None, + t_shared: Arc::new(task), + } + } + + pub fn create_init_task() -> Self { + let task = Task { + tid: Tid(1), + comm: Arc::new(SpinLock::new(Comm::new("init"))), + process: ThreadGroupBuilder::new(Tgid::init()).build(), + state: Arc::new(SpinLock::new(TaskState::Runnable)), + cwd: Arc::new(SpinLock::new((Arc::new(DummyInode {}), PathBuf::new()))), + root: Arc::new(SpinLock::new((Arc::new(DummyInode {}), PathBuf::new()))), + creds: SpinLock::new(Credentials::new_root()), + vm: Arc::new(SpinLock::new( + ProcessVM::empty().expect("Could not create init process's VM"), + )), + fd_table: Arc::new(SpinLock::new(FileDescriptorTable::new())), + last_cpu: SpinLock::new(CpuId::this()), + }; + + Self { + pending_signals: SigSet::empty(), + sig_mask: SigSet::empty(), + priority: 0, + ctx: Context::from_user_ctx(::new_user_context( + VA::null(), + VA::null(), + )), + robust_list: None, + child_tid_ptr: None, + t_shared: Arc::new(task), + } + } + + pub fn priority(&self) -> i8 { + self.priority + } + + pub fn set_priority(&mut self, priority: i8) { + self.priority = priority; + } + + pub fn raise_task_signal(&mut self, signal: SigId) { + self.pending_signals.insert(signal.into()); + } +} diff --git a/src/process/thread_group/pid.rs b/src/process/thread_group/pid.rs index c5f6ea71..77e1dbd6 100644 --- a/src/process/thread_group/pid.rs +++ b/src/process/thread_group/pid.rs @@ -1,6 +1,6 @@ use libkernel::error::{KernelError, Result}; -use crate::sched::current_task; +use crate::sched::current::current_task; use core::convert::Infallible; use super::{Pgid, Tgid, ThreadGroup}; diff --git a/src/process/thread_group/rsrc_lim.rs b/src/process/thread_group/rsrc_lim.rs index 891c25d6..93be1481 100644 --- a/src/process/thread_group/rsrc_lim.rs +++ b/src/process/thread_group/rsrc_lim.rs @@ -6,7 +6,7 @@ use libkernel::{ use crate::{ memory::uaccess::{UserCopyable, copy_from_user, copy_to_user}, process::thread_group::{TG_LIST, Tgid}, - sched::current_task, + sched::current::current_task, }; use super::pid::PidT; diff --git a/src/process/thread_group/signal/kill.rs b/src/process/thread_group/signal/kill.rs index a1b04ebe..4d1d9e7c 100644 --- a/src/process/thread_group/signal/kill.rs +++ b/src/process/thread_group/signal/kill.rs @@ -3,7 +3,7 @@ use crate::{ Tid, thread_group::{Pgid, Tgid, ThreadGroup, pid::PidT}, }, - sched::current_task, + sched::current::current_task, }; use super::{SigId, uaccess::UserSigId}; diff --git a/src/process/thread_group/signal/sigaction.rs b/src/process/thread_group/signal/sigaction.rs index 16fa998f..fa2148b9 100644 --- a/src/process/thread_group/signal/sigaction.rs +++ b/src/process/thread_group/signal/sigaction.rs @@ -3,7 +3,7 @@ use libkernel::error::{KernelError, Result}; use libkernel::memory::address::TUA; use crate::memory::uaccess::{UserCopyable, copy_from_user, copy_to_user}; -use crate::sched::current_task; +use crate::sched::current::current_task; use super::ksigaction::UserspaceSigAction; use super::uaccess::UserSigId; @@ -93,8 +93,6 @@ pub async fn sys_rt_sigaction( oact: TUA, sigsetsize: usize, ) -> Result { - let task = current_task(); - if sigsetsize != size_of::() { Err(KernelError::InvalidValue)? } @@ -112,6 +110,8 @@ pub async fn sys_rt_sigaction( }; let old_action = { + let task = current_task(); + let sigstate = task.process.signals.lock_save_irq(); let mut action_table = sigstate.action.lock_save_irq(); let old_action = action_table[sig]; diff --git a/src/process/thread_group/signal/sigaltstack.rs b/src/process/thread_group/signal/sigaltstack.rs index 6ac61ecb..12706efd 100644 --- a/src/process/thread_group/signal/sigaltstack.rs +++ b/src/process/thread_group/signal/sigaltstack.rs @@ -1,6 +1,6 @@ use crate::{ memory::uaccess::{UserCopyable, copy_from_user, copy_to_user}, - sched::current_task, + sched::current::current_task, }; use bitflags::bitflags; use libkernel::{ diff --git a/src/process/thread_group/signal/sigprocmask.rs b/src/process/thread_group/signal/sigprocmask.rs index d8947901..a516c5d0 100644 --- a/src/process/thread_group/signal/sigprocmask.rs +++ b/src/process/thread_group/signal/sigprocmask.rs @@ -1,5 +1,5 @@ use crate::memory::uaccess::{copy_from_user, copy_to_user}; -use crate::sched::current_task; +use crate::sched::current::current_task; use libkernel::error::{KernelError, Result}; use libkernel::memory::address::TUA; @@ -26,9 +26,8 @@ pub async fn sys_rt_sigprocmask( }; let old_sigmask = { - let task = current_task(); - let mut sigmask = task.sig_mask.lock_save_irq(); - let old_sigmask = *sigmask; + let mut task = current_task(); + let old_sigmask = task.sig_mask; if let Some(set) = set { let mut new_sigmask = match how { @@ -41,7 +40,7 @@ pub async fn sys_rt_sigprocmask( // SIGSTOP and SIGKILL can never be masked. new_sigmask = new_sigmask.union(UNMASKABLE_SIGNALS); - *sigmask = new_sigmask; + task.sig_mask = new_sigmask; } old_sigmask diff --git a/src/process/thread_group/umask.rs b/src/process/thread_group/umask.rs index 9efa199d..f68874ea 100644 --- a/src/process/thread_group/umask.rs +++ b/src/process/thread_group/umask.rs @@ -1,6 +1,6 @@ use core::convert::Infallible; -use crate::sched::current_task; +use crate::sched::current::current_task; pub fn sys_umask(new_umask: u32) -> core::result::Result { let task = current_task(); diff --git a/src/process/thread_group/wait.rs b/src/process/thread_group/wait.rs index 222c638d..73e6fb9a 100644 --- a/src/process/thread_group/wait.rs +++ b/src/process/thread_group/wait.rs @@ -1,6 +1,6 @@ use crate::clock::timespec::TimeSpec; use crate::memory::uaccess::copy_to_user; -use crate::sched::current_task; +use crate::sched::current::current_task_shared; use crate::sync::CondVar; use alloc::collections::btree_map::BTreeMap; use bitflags::Flags; @@ -137,7 +137,7 @@ pub async fn sys_wait4( return Err(KernelError::NotSupported); } - let task = current_task(); + let task = current_task_shared(); let (tgid, child_state) = task .process diff --git a/src/process/threading/futex/key.rs b/src/process/threading/futex/key.rs index cdcc08c7..2dea7f00 100644 --- a/src/process/threading/futex/key.rs +++ b/src/process/threading/futex/key.rs @@ -1,4 +1,4 @@ -use crate::sched::current_task; +use crate::sched::current::current_task; use libkernel::UserAddressSpace; use libkernel::error::{KernelError, Result}; use libkernel::memory::address::{TUA, VA}; diff --git a/src/process/threading/mod.rs b/src/process/threading/mod.rs index e679a57b..c189cf08 100644 --- a/src/process/threading/mod.rs +++ b/src/process/threading/mod.rs @@ -1,7 +1,7 @@ use core::ffi::c_long; use core::mem::size_of; -use crate::sched::current_task; +use crate::sched::current::current_task; use libkernel::{ error::{KernelError, Result}, memory::address::TUA, @@ -10,9 +10,9 @@ use libkernel::{ pub mod futex; pub fn sys_set_tid_address(tidptr: TUA) -> Result { - let task = current_task(); + let mut task = current_task(); - *task.child_tid_ptr.lock_save_irq() = Some(tidptr); + task.child_tid_ptr = Some(tidptr); Ok(task.tid.value() as _) } @@ -36,8 +36,8 @@ pub async fn sys_set_robust_list(head: TUA, len: usize) -> Resul return Err(KernelError::InvalidValue); } - let task = current_task(); - task.robust_list.lock_save_irq().replace(head); + let mut task = current_task(); + task.robust_list.replace(head); Ok(0) } diff --git a/src/sched/current.rs b/src/sched/current.rs new file mode 100644 index 00000000..67bcf57d --- /dev/null +++ b/src/sched/current.rs @@ -0,0 +1,112 @@ +use crate::{ + per_cpu, + process::{Task, owned::OwnedTask}, +}; +use alloc::{boxed::Box, sync::Arc}; +use core::{ + cell::Cell, + marker::PhantomData, + ops::{Deref, DerefMut}, + ptr, +}; + +per_cpu! { + pub(super) static CUR_TASK_PTR: CurrentTaskPtr = CurrentTaskPtr::new; +} + +pub(super) struct CurrentTaskPtr { + pub(super) ptr: Cell<*mut OwnedTask>, + pub(super) borrowed: Cell, +} + +unsafe impl Send for CurrentTaskPtr {} + +pub struct CurrentTaskGuard<'a> { + task: &'a mut OwnedTask, + _marker: PhantomData<*const ()>, +} + +impl Deref for CurrentTaskGuard<'_> { + type Target = OwnedTask; + + fn deref(&self) -> &Self::Target { + self.task + } +} + +impl DerefMut for CurrentTaskGuard<'_> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.task + } +} + +impl<'a> Drop for CurrentTaskGuard<'a> { + fn drop(&mut self) { + CUR_TASK_PTR.borrow().borrowed.set(false); + } +} + +impl CurrentTaskPtr { + pub const fn new() -> Self { + Self { + ptr: Cell::new(ptr::null_mut()), + borrowed: Cell::new(false), + } + } + + pub fn current(&self) -> CurrentTaskGuard<'static> { + if self.borrowed.get() { + panic!("Double mutable borrow of current task!"); + } + + self.borrowed.set(true); + + unsafe { + let ptr = self.ptr.get(); + + CurrentTaskGuard { + task: &mut *ptr, + _marker: PhantomData, + } + } + } + + pub(super) fn set_current(&self, task: &mut Box) { + self.ptr.set(Box::as_mut_ptr(task)); + } +} + +/// Returns a mutable reference to the CPU-local private task state +/// (`OwnedTask`). +/// +/// # Panics +/// +/// Panics if the current task is already borrowed on this CPU (reentrancy bug). +/// This usually happens if you call `current_task()` and then call a function +/// that also calls `current_task()` without dropping the first guard. +/// +/// # Critical Section +/// +/// This function disables preemption. You must drop the returned guard before +/// attempting to sleep, yield, or await. +#[track_caller] +pub fn current_task() -> CurrentTaskGuard<'static> { + CUR_TASK_PTR.borrow_mut().current() +} + +/// Returns a shared reference to the Process Identity (`Task`). +/// +/// Use this for accessing shared resources like: +/// - File Descriptors +/// - Virtual Memory (Page Tables) +/// - Current Working Directory +/// - Credentials / PID / Thread Group +/// +/// # Execution Context +/// +/// This function creates a temporary `CurrentTaskGuard` just long enough to +/// clone the `Arc`, then drops it. It is safe to await or yield after calling +/// this function, as it does not hold the CPU lock. +pub fn current_task_shared() -> Arc { + current_task().t_shared.clone() +} diff --git a/src/sched/mod.rs b/src/sched/mod.rs index 686df048..87b65003 100644 --- a/src/sched/mod.rs +++ b/src/sched/mod.rs @@ -1,40 +1,34 @@ -use crate::drivers::timer::{Instant, now, schedule_force_preempt, schedule_preempt}; +use crate::arch::ArchImpl; +use crate::drivers::timer::{Instant, now}; use crate::interrupts::cpu_messenger::{Message, message_cpu}; +use crate::kernel::cpu_id::CpuId; +use crate::process::owned::OwnedTask; use crate::{ - arch::{Arch, ArchImpl}, + arch::Arch, per_cpu, - process::{TASK_LIST, Task, TaskDescriptor, TaskState}, - sync::OnceLock, + process::{TASK_LIST, TaskDescriptor, TaskState}, }; use alloc::{boxed::Box, collections::btree_map::BTreeMap, sync::Arc}; -use core::cmp::Ordering; use core::sync::atomic::AtomicUsize; use core::time::Duration; -use libkernel::{CpuOps, UserAddressSpace, error::Result}; - +use current::{CUR_TASK_PTR, current_task}; +use libkernel::{UserAddressSpace, error::Result}; +use log::warn; +use runqueue::{RunQueue, SwitchResult}; +use sched_task::SchedulableTask; + +pub mod current; +mod runqueue; +pub mod sched_task; pub mod uspc_ret; pub mod waker; -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub struct CpuId(usize); - -impl CpuId { - pub fn this() -> CpuId { - CpuId(ArchImpl::id()) - } - - pub fn value(&self) -> usize { - self.0 - } -} - -// TODO: arbitrary cap. per_cpu! { static SCHED_STATE: SchedState = SchedState::new; } -/// Default time-slice (in milliseconds) assigned to runnable tasks. -const DEFAULT_TIME_SLICE_MS: u64 = 4; +/// Default time-slice assigned to runnable tasks. +const DEFAULT_TIME_SLICE: Duration = Duration::from_millis(4); /// Fixed-point configuration for virtual-time accounting. /// We now use a 65.63 format (65 integer bits, 63 fractional bits) as @@ -45,6 +39,11 @@ pub const VT_ONE: u128 = 1u128 << VT_FIXED_SHIFT; /// Two virtual-time instants whose integer parts differ by no more than this constant are considered equal. pub const VCLOCK_EPSILON: u128 = VT_ONE; +/// Scheduler base weight to ensure tasks always have a strictly positive +/// scheduling weight. The value is added to a task's priority to obtain its +/// effective weight (`w_i` in EEVDF paper). +pub const SCHED_WEIGHT_BASE: i32 = 1024; + /// Schedule a new task. /// /// This function is the core of the kernel's scheduler. It is responsible for @@ -65,6 +64,7 @@ pub const VCLOCK_EPSILON: u128 = VT_ONE; /// Nothing, but the CPU context will be set to the next runnable task. See /// `userspace_return` for how this is invoked. fn schedule() { + // Reentrancy Check if SCHED_STATE.try_borrow_mut().is_none() { log::warn!( "Scheduler reentrancy detected on CPU {}", @@ -72,59 +72,46 @@ fn schedule() { ); return; } - // Mark the current task as runnable so it's considered for scheduling in - // the next time-slice. - { - let task = current_task(); - let mut task_state = task.state.lock_save_irq(); - if *task_state == TaskState::Running { - *task_state = TaskState::Runnable; - } + let mut sched = SCHED_STATE.borrow_mut(); + + // Update Clocks + let now_inst = now().expect("System timer not initialised"); + + sched.advance_vclock(now_inst); + + if let Some(current) = sched.run_q.current_mut() { + current.tick(now_inst); } - let previous_task = current_task(); - let mut sched_state = SCHED_STATE.borrow_mut(); + // Select Next Task + let next_task_desc = sched.run_q.find_next_runnable_desc(sched.vclock); - // Bring the virtual clock up-to-date so that eligibility tests use the - // most recent value. - let now_inst = now().expect("System timer not initialised"); - sched_state.advance_vclock(now_inst); - - let next_task = sched_state.find_next_runnable_task(); - // if previous_task.tid != next_task.tid { - // let runnable_tasks = sched_state - // .run_queue - // .values() - // .filter(|t| *t.state.lock_save_irq() == TaskState::Runnable) - // .count(); - // if matches!(*previous_task.state.lock_save_irq(), TaskState::Sleeping | TaskState::Finished) { - // log::debug!( - // "CPU {} scheduling switch due to removal from run queue: {} -> {} (runnable tasks: {runnable_tasks})", - // CpuId::this().value(), - // previous_task.tid.value(), - // next_task.tid.value(), - // ); - // } else { - // log::debug!( - // "CPU {} scheduling switch: {} -> {} (runnable tasks: {runnable_tasks})", - // CpuId::this().value(), - // previous_task.tid.value(), - // next_task.tid.value() - // ); - // } - // } - - sched_state - .switch_to_task(Some(previous_task), next_task.clone()) - .expect("Could not schedule next task"); + match sched.run_q.switch_tasks(next_task_desc, now_inst) { + SwitchResult::AlreadyRunning => { + // Nothing to do. + return; + } + SwitchResult::Blocked { old_task } => { + // If the blocked task has finished, allow it to drop here so it's + // resources are released. + if !old_task.state.lock_save_irq().is_finished() { + sched.wait_q.insert(old_task.descriptor(), old_task); + } + } + // fall-thru. + SwitchResult::Preempted => {} + } + + // Update all context since the task has switched. + if let Some(new_current) = sched.run_q.current_mut() { + ArchImpl::context_switch(new_current.t_shared.clone()); + CUR_TASK_PTR.borrow_mut().set_current(&mut new_current.task); + } } pub fn spawn_kernel_work(fut: impl Future + 'static + Send) { - current_task() - .ctx - .lock_save_irq() - .put_kernel_work(Box::pin(fut)); + current_task().ctx.put_kernel_work(Box::pin(fut)); } #[cfg(feature = "smp")] @@ -134,7 +121,7 @@ fn get_next_cpu() -> CpuId { let cpu_count = ArchImpl::cpu_count(); let cpu_id = NEXT_CPU.fetch_add(1, core::sync::atomic::Ordering::Relaxed) % cpu_count; - CpuId(cpu_id) + CpuId::from_value(cpu_id) } #[cfg(not(feature = "smp"))] @@ -143,12 +130,14 @@ fn get_next_cpu() -> CpuId { } /// Insert the given task onto a CPU's run queue. -pub fn insert_task(task: Arc) { - SCHED_STATE.borrow_mut().add_task(task); +pub fn insert_task(task: Box) { + SCHED_STATE + .borrow_mut() + .insert_into_runq(SchedulableTask::new(task)); } #[cfg(feature = "smp")] -pub fn insert_task_cross_cpu(task: Arc) { +pub fn insert_task_cross_cpu(task: Box) { let cpu = get_next_cpu(); if cpu == CpuId::this() { insert_task(task); @@ -158,21 +147,16 @@ pub fn insert_task_cross_cpu(task: Arc) { } #[cfg(not(feature = "smp"))] -pub fn insert_task_cross_cpu(task: Arc) { +pub fn insert_task_cross_cpu(task: Box) { insert_task(task); } pub struct SchedState { - /// Task that is currently running on this CPU (if any). - running_task: Option>, - // TODO: To be changed to virtual-deadline key for better performance - // TODO: Use a red-black tree for better performance. - pub run_queue: BTreeMap>, + run_q: RunQueue, + wait_q: BTreeMap>, /// Per-CPU virtual clock (fixed-point 65.63 stored in a u128). /// Expressed in virtual-time units as defined by the EEVDF paper. vclock: u128, - /// Cached sum of weights of all tasks in the run queue (`sum w_i`). - total_weight: u64, /// Real-time moment when `vclock` was last updated. last_update: Option, } @@ -180,80 +164,11 @@ pub struct SchedState { unsafe impl Send for SchedState {} impl SchedState { - /// Inserts `task` into this CPU's run-queue and updates all EEVDF - /// accounting information (eligible time, virtual deadline and the cached - /// weight sum). - pub fn add_task(&mut self, task: Arc) { - // Always advance the virtual clock first so that eligibility and - // deadline calculations for the incoming task are based on the most - // recent time stamp. - let now_inst = now().expect("System timer not initialised"); - self.advance_vclock(now_inst); - - let desc = task.descriptor(); - - if self.run_queue.contains_key(&desc) { - return; - } - - // A freshly enqueued task becomes eligible immediately. - *task.v_eligible.lock_save_irq() = self.vclock; - - // Grant it an initial virtual deadline proportional to its weight. - let q_ns: u128 = (DEFAULT_TIME_SLICE_MS as u128) * 1_000_000; - let v_delta = (q_ns << VT_FIXED_SHIFT) / task.weight() as u128; - let new_v_deadline = self.vclock + v_delta; - *task.v_deadline.lock_save_irq() = new_v_deadline; - - // Since the task is not executing yet, its exec_start must be `None`. - *task.exec_start.lock_save_irq() = None; - - if !task.is_idle_task() { - self.total_weight = self.total_weight.saturating_add(task.weight() as u64); - } - - // Decide whether the currently-running task must be preempted - // immediately. - let newcomer_eligible = { - let v_e = *task.v_eligible.lock_save_irq(); - v_e.saturating_sub(self.vclock) <= VCLOCK_EPSILON - }; - let preempt_now = if newcomer_eligible { - if let Some(ref current) = self.running_task { - let current_deadline = *current.v_deadline.lock_save_irq(); - new_v_deadline < current_deadline - } else { - true - } - } else { - false - }; - - self.run_queue.insert(desc, task); - - // Arm an immediate preemption timer so that the interrupt - // handler will force the actual context switch as soon as possible. - if preempt_now { - schedule_preempt(now_inst + Duration::from_nanos(1)); - } - } - - /// Removes a task given its descriptor and subtracts its weight from the - /// cached `total_weight`. Missing descriptors are ignored. - pub fn remove_task_with_weight(&mut self, desc: &TaskDescriptor) { - if let Some(task) = self.run_queue.remove(desc) { - if task.is_idle_task() { - panic!("Cannot remove the idle task"); - } - self.total_weight = self.total_weight.saturating_sub(task.weight() as u64); - } - } pub const fn new() -> Self { Self { - running_task: None, - run_queue: BTreeMap::new(), + run_q: RunQueue::new(), + wait_q: BTreeMap::new(), vclock: 0, - total_weight: 0, last_update: None, } } @@ -266,161 +181,35 @@ impl SchedState { fn advance_vclock(&mut self, now_inst: Instant) { if let Some(prev) = self.last_update { let delta_real = now_inst - prev; - if self.total_weight > 0 { + if self.run_q.weight() > 0 { let delta_vt = - ((delta_real.as_nanos()) << VT_FIXED_SHIFT) / self.total_weight as u128; + ((delta_real.as_nanos()) << VT_FIXED_SHIFT) / self.run_q.weight() as u128; self.vclock = self.vclock.saturating_add(delta_vt); } } self.last_update = Some(now_inst); } - fn switch_to_task( - &mut self, - previous_task: Option>, - next_task: Arc, - ) -> Result<()> { - let now_inst = now().expect("System timer not initialised"); - // Update the virtual clock before we do any other accounting. - self.advance_vclock(now_inst); - - if let Some(ref prev_task) = previous_task { - *prev_task.last_run.lock_save_irq() = Some(now_inst); - } - - if let Some(ref prev_task) = previous_task - && Arc::ptr_eq(&next_task, prev_task) - { - // Ensure the task state is running. - *next_task.state.lock_save_irq() = TaskState::Running; - // TODO: Fix hack - if next_task.is_idle_task() { - schedule_force_preempt(); - } - return Ok(()); - } - - // Update vruntime, clear exec_start and assign a new eligible virtual deadline - // for the previous task. - if let Some(ref prev_task) = previous_task { - // Compute how much virtual time the task actually consumed. - let delta_vt = if let Some(start) = *prev_task.exec_start.lock_save_irq() { - let delta = now_inst - start; - let w = prev_task.weight() as u128; - let dv = ((delta.as_nanos() as u128) << VT_FIXED_SHIFT) / w; - *prev_task.v_runtime.lock_save_irq() += dv; - dv - } else { - 0 - }; - *prev_task.exec_start.lock_save_irq() = None; - - // Advance its eligible time by the virtual run time it just used - // (EEVDF: v_ei += t_used / w_i). - *prev_task.v_eligible.lock_save_irq() += delta_vt; - - // Re-issue a virtual deadline - let q_ns: u128 = (DEFAULT_TIME_SLICE_MS as u128) * 1_000_000; - let v_delta = (q_ns << VT_FIXED_SHIFT) / prev_task.weight() as u128; - let v_ei = *prev_task.v_eligible.lock_save_irq(); - *prev_task.v_deadline.lock_save_irq() = v_ei + v_delta; - } - - *next_task.exec_start.lock_save_irq() = Some(now_inst); - *next_task.last_cpu.lock_save_irq() = CpuId::this(); - - // Make sure the task possesses an eligible virtual deadline. If none is set - // (or the previous one has elapsed), we hand out a brand-new one. - { - let mut deadline_guard = next_task.deadline.lock_save_irq(); - // Refresh deadline if none is set or the previous deadline has elapsed. - if deadline_guard - .is_none_or(|d| d <= now_inst + Duration::from_millis(DEFAULT_TIME_SLICE_MS)) - { - *deadline_guard = Some(now_inst + Duration::from_millis(DEFAULT_TIME_SLICE_MS)); - } - if let Some(d) = *deadline_guard { - // log::debug!( - // "CPU {}: Next task {} has deadline in {}ms", - // CpuId::this().value(), - // next_task.tid.value(), - // (d - now_inst).as_millis() - // ); - schedule_preempt(d); - } - } - - *next_task.state.lock_save_irq() = TaskState::Running; - - // Update the scheduler's state to reflect the new running task. - self.running_task = Some(next_task.clone()); + fn insert_into_runq(&mut self, task: Box) { + let now = now().expect("systimer not running"); - // Perform the architecture-specific context switch. - ArchImpl::context_switch(next_task); + self.advance_vclock(now); - Ok(()) + self.run_q.enqueue_task(task, self.vclock); } - fn find_next_runnable_task(&self) -> Arc { - let idle_task = self - .run_queue - .get(&TaskDescriptor::this_cpus_idle()) - .expect("Every runqueue should have an idle task"); - - self.run_queue - .values() - // We only care about processes that are ready to run. - .filter(|candidate_proc| { - let state = *candidate_proc.state.lock_save_irq(); - let eligible_vt = *candidate_proc.v_eligible.lock_save_irq(); - state == TaskState::Runnable - && !candidate_proc.is_idle_task() - // Allow a small epsilon tolerance to compensate for rounding - && eligible_vt.saturating_sub(self.vclock) <= VCLOCK_EPSILON - }) - .min_by(|proc1, proc2| { - if proc1.is_idle_task() { - return Ordering::Greater; - } else if proc2.is_idle_task() { - return Ordering::Less; - } - let vd1 = *proc1.v_deadline.lock_save_irq(); - let vd2 = *proc2.v_deadline.lock_save_irq(); - - vd1.cmp(&vd2).then_with(|| { - let vr1 = *proc1.v_runtime.lock_save_irq(); - let vr2 = *proc2.v_runtime.lock_save_irq(); - - vr1.cmp(&vr2).then_with(|| { - let last_run1 = proc1.last_run.lock_save_irq(); - let last_run2 = proc2.last_run.lock_save_irq(); - - match (*last_run1, *last_run2) { - (Some(t1), Some(t2)) => t1.cmp(&t2), - (Some(_), None) => Ordering::Less, - (None, Some(_)) => Ordering::Greater, - (None, None) => Ordering::Equal, - } - }) - }) - }) - .unwrap_or(idle_task) - .clone() + pub fn wakeup(&mut self, desc: TaskDescriptor) { + if let Some(task) = self.wait_q.remove(&desc) { + self.insert_into_runq(task); + } else { + warn!("Spurious wakeup for task {:?}", desc); + } } } -pub fn current_task() -> Arc { - SCHED_STATE - .borrow() - .running_task - .as_ref() - .expect("Current task called before initial task created") - .clone() -} - pub fn sched_init() { - let idle_task = get_idle_task(); - let init_task = Arc::new(Task::create_init_task()); + let idle_task = ArchImpl::create_idle_task(); + let init_task = OwnedTask::create_init_task(); init_task .vm @@ -429,44 +218,25 @@ pub fn sched_init() { .address_space_mut() .activate(); - *init_task.state.lock_save_irq() = TaskState::Running; - SCHED_STATE.borrow_mut().running_task = Some(idle_task.clone()); + *init_task.state.lock_save_irq() = TaskState::Runnable; { let mut task_list = TASK_LIST.lock_save_irq(); - task_list.insert(idle_task.descriptor(), Arc::downgrade(&idle_task)); - task_list.insert(init_task.descriptor(), Arc::downgrade(&init_task)); + task_list.insert(idle_task.descriptor(), Arc::downgrade(&idle_task.t_shared)); + task_list.insert(init_task.descriptor(), Arc::downgrade(&init_task.t_shared)); } - insert_task(idle_task); - insert_task(init_task.clone()); + insert_task(Box::new(idle_task)); + insert_task(Box::new(init_task)); - SCHED_STATE - .borrow_mut() - .switch_to_task(None, init_task) - .expect("Failed to switch to init task"); + schedule(); } pub fn sched_init_secondary() { - let idle_task = get_idle_task(); - SCHED_STATE.borrow_mut().running_task = Some(idle_task.clone()); - - // Important to ensure that the idle task is in the TASK_LIST for this CPU. - insert_task(idle_task.clone()); - - SCHED_STATE - .borrow_mut() - .switch_to_task(None, idle_task) - .expect("Failed to switch to idle task"); -} - -fn get_idle_task() -> Arc { - static IDLE_TASK: OnceLock> = OnceLock::new(); + let idle_task = ArchImpl::create_idle_task(); - IDLE_TASK - .get_or_init(|| Arc::new(ArchImpl::create_idle_task())) - .clone() + insert_task(Box::new(idle_task)); } pub fn sys_sched_yield() -> Result { diff --git a/src/sched/runqueue.rs b/src/sched/runqueue.rs new file mode 100644 index 00000000..b5ec9944 --- /dev/null +++ b/src/sched/runqueue.rs @@ -0,0 +1,176 @@ +use core::cmp::Ordering; + +use crate::{ + drivers::timer::Instant, + process::{TaskDescriptor, TaskState}, +}; +use alloc::{boxed::Box, collections::btree_map::BTreeMap}; +use log::warn; + +use super::{VCLOCK_EPSILON, sched_task::SchedulableTask}; + +/// The result of a requested task switch. +pub enum SwitchResult { + /// The requested task is already running. No changes made. + AlreadyRunning, + /// A switch occurred. The previous task was Runnable and has been + /// re-queued. + Preempted, + /// A switch occurred. The previous task is Blocked (or Finished) and + /// ownership is returned to the caller (to handle sleep/wait queues). + Blocked { old_task: Box }, +} + +/// A simple weight-tracking runqueue. +/// +/// Invariants: +/// 1. `total_weight` = Sum(queue tasks) + Weight(running_task) (excluding the idle task). +/// 2. `running_task` is NOT in `queue`. +pub struct RunQueue { + total_weight: u64, + pub(super) queue: BTreeMap>, + pub(super) running_task: Option>, +} + +impl RunQueue { + pub const fn new() -> Self { + Self { + total_weight: 0, + queue: BTreeMap::new(), + running_task: None, + } + } + + fn insert_task(&mut self, task: Box) { + if !task.is_idle_task() { + self.total_weight = self.total_weight.saturating_add(task.weight() as u64); + } + + if let Some(old_task) = self.queue.insert(task.descriptor(), task) { + // Handle the edge case where we overwrite a task. If we replaced + // someone, we must subtract their weight to avoid accounting drift. + warn!("Overwrote active task {:?}", old_task.descriptor()); + self.total_weight = self.total_weight.saturating_sub(old_task.weight() as u64); + } + } + + pub fn switch_tasks(&mut self, next_task: TaskDescriptor, now_inst: Instant) -> SwitchResult { + if let Some(current) = self.current() { + if current.descriptor() == next_task { + return SwitchResult::AlreadyRunning; + } + } + + let mut new_task = match self.queue.remove(&next_task) { + Some(t) => t, + None => { + warn!("Task {:?} not found for switch.", next_task); + return SwitchResult::AlreadyRunning; + } + }; + + new_task.about_to_execute(now_inst); + + // Perform the swap. + if let Some(old_task) = self.running_task.replace(new_task) { + let state = *old_task.state.lock_save_irq(); + + match state { + TaskState::Running | TaskState::Runnable => { + // Update state to strictly Runnable + *old_task.state.lock_save_irq() = TaskState::Runnable; + + self.queue.insert(old_task.descriptor(), old_task); + + return SwitchResult::Preempted; + } + _ => { + self.total_weight = self.total_weight.saturating_sub(old_task.weight() as u64); + + return SwitchResult::Blocked { old_task }; + } + } + } + + // If there was no previous task (e.g., boot up), it counts as a + // Preemption. + SwitchResult::Preempted + } + + pub fn weight(&self) -> u64 { + self.total_weight + } + + pub fn current(&self) -> Option<&Box> { + self.running_task.as_ref() + } + + pub fn current_mut(&mut self) -> Option<&mut Box> { + self.running_task.as_mut() + } + + fn fallback_current_or_idle(&self) -> TaskDescriptor { + if let Some(ref current) = self.running_task { + let s = *current.state.lock_save_irq(); + if !current.is_idle_task() && (s == TaskState::Runnable || s == TaskState::Running) { + return current.descriptor(); + } + } + + TaskDescriptor::this_cpus_idle() + } + + /// Returns the Descriptor of the best task to run next. This compares the + /// best task in the run_queue against the currently running task. + pub fn find_next_runnable_desc(&self, vclock: u128) -> TaskDescriptor { + // Find the best candidate from the Run Queue + let best_queued_entry = self + .queue + .iter() + .filter(|(_, task)| { + !task.is_idle_task() && task.v_eligible.saturating_sub(vclock) <= VCLOCK_EPSILON + }) + .min_by(|(_, t1), (_, t2)| t1.compare_with(t2)); + + let (best_queued_desc, best_queued_task) = match best_queued_entry { + Some((d, t)) => (*d, t), + // If runqueue is empty (or no eligible tasks), we might just run + // current or idle. + None => return self.fallback_current_or_idle(), + }; + + // Compare against the current task + if let Some(current) = self.current() { + // If current is not Runnable (e.g. it blocked, yielded, or + // finished), it cannot win. + let current_state = *current.state.lock_save_irq(); + if current_state != TaskState::Runnable && current_state != TaskState::Running { + return best_queued_desc; + } + + // compare current vs challenger + match current.compare_with(best_queued_task) { + Ordering::Less | Ordering::Equal => { + // Current is better (has earlier deadline) or equal. Keep + // running current. + return current.descriptor(); + } + Ordering::Greater => { + // Queued task is better. Switch. + return best_queued_desc; + } + } + } + + best_queued_desc + } + + /// Inserts `task` into this CPU's run-queue and updates all EEVDF + /// accounting information (eligible time, virtual deadline and the cached + /// weight sum). + pub fn enqueue_task(&mut self, mut new_task: Box, vclock: u128) { + new_task.inserting_into_runqueue(vclock); + + self.insert_task(new_task); + } +} diff --git a/src/sched/sched_task.rs b/src/sched/sched_task.rs new file mode 100644 index 00000000..161fcfe9 --- /dev/null +++ b/src/sched/sched_task.rs @@ -0,0 +1,142 @@ +use core::{ + cmp::Ordering, + ops::{Deref, DerefMut}, +}; + +use alloc::boxed::Box; + +use crate::{ + drivers::timer::{Instant, schedule_preempt}, + kernel::cpu_id::CpuId, + process::{TaskState, owned::OwnedTask}, +}; + +use super::{DEFAULT_TIME_SLICE, SCHED_WEIGHT_BASE, VT_FIXED_SHIFT}; + +pub struct SchedulableTask { + pub task: Box, + pub v_runtime: u128, + /// Virtual time at which the task becomes eligible (v_ei). + pub v_eligible: u128, + /// Virtual deadline (v_di) used by the EEVDF scheduler. + pub v_deadline: u128, + pub exec_start: Option, + pub deadline: Option, + pub last_run: Option, +} + +impl Deref for SchedulableTask { + type Target = OwnedTask; + + fn deref(&self) -> &Self::Target { + &self.task + } +} + +impl DerefMut for SchedulableTask { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.task + } +} + +impl SchedulableTask { + pub fn new(task: Box) -> Box { + Box::new(Self { + task, + v_runtime: 0, + v_eligible: 0, + v_deadline: 0, + exec_start: None, + deadline: None, + last_run: None, + }) + } + + /// Update accounting info for this task given the latest time. + pub fn tick(&mut self, now: Instant) { + let delta_vt = if let Some(start) = self.exec_start { + let delta = now - start; + let w = self.weight() as u128; + let dv = ((delta.as_nanos() as u128) << VT_FIXED_SHIFT) / w; + self.v_runtime = dv; + dv + } else { + 0 + }; + + // Advance its eligible time by the virtual run time it just used + // (EEVDF: v_ei += t_used / w_i). + self.v_eligible += delta_vt; + + // Re-issue a virtual deadline + let q_ns: u128 = DEFAULT_TIME_SLICE.as_nanos(); + let v_delta = (q_ns << VT_FIXED_SHIFT) / self.weight() as u128; + let v_ei = self.v_eligible; + self.v_deadline = v_ei + v_delta; + } + + /// Compute this task's scheduling weight. + /// + /// weight = priority + SCHED_WEIGHT_BASE + /// The sum is clamped to a minimum of 1 + pub fn weight(&self) -> u32 { + let w = self.task.priority as i32 + SCHED_WEIGHT_BASE; + if w <= 0 { 1 } else { w as u32 } + } + + pub fn compare_with(&self, other: &Self) -> core::cmp::Ordering { + if self.is_idle_task() { + return Ordering::Greater; + } + + if other.is_idle_task() { + return Ordering::Less; + } + + self.v_deadline + .cmp(&other.v_deadline) + .then_with(|| self.v_runtime.cmp(&other.v_runtime)) + // If completely equal, prefer the one that hasn't run in a while? + // Or prefer the one already running to avoid cache thrashing? + // Usually irrelevant for EEVDF but strict ordering is good for + // stability. + .then_with(|| match (self.last_run, other.last_run) { + (Some(a), Some(b)) => a.cmp(&b), + (Some(_), None) => Ordering::Less, + (None, Some(_)) => Ordering::Greater, + (None, None) => Ordering::Equal, + }) + } + + /// Update accounting information when the task is about to be inserted into + /// a runqueue. + pub fn inserting_into_runqueue(&mut self, vclock: u128) { + // A freshly enqueued task becomes eligible immediately. + self.v_eligible = vclock; + + // Grant it an initial virtual deadline proportional to its weight. + let q_ns: u128 = DEFAULT_TIME_SLICE.as_nanos(); + let v_delta = (q_ns << VT_FIXED_SHIFT) / self.weight() as u128; + let new_v_deadline = vclock + v_delta; + self.v_deadline = new_v_deadline; + + // Since the task is not executing yet, its exec_start must be `None`. + self.exec_start = None; + } + + /// Setup task accounting info such that it is about to be executed. + pub fn about_to_execute(&mut self, now: Instant) { + self.exec_start = Some(now); + *self.last_cpu.lock_save_irq() = CpuId::this(); + *self.state.lock_save_irq() = TaskState::Running; + + // Deadline logic + if self.deadline.is_none_or(|d| d <= now + DEFAULT_TIME_SLICE) { + self.deadline = Some(now + DEFAULT_TIME_SLICE); + } + + if let Some(d) = self.deadline { + schedule_preempt(d); + } + } +} diff --git a/src/sched/uspc_ret.rs b/src/sched/uspc_ret.rs index 5def468d..85481e4b 100644 --- a/src/sched/uspc_ret.rs +++ b/src/sched/uspc_ret.rs @@ -1,5 +1,4 @@ -use super::{SCHED_STATE, current_task, schedule, waker::create_waker}; -use crate::process::TASK_LIST; +use super::{current::current_task, schedule, waker::create_waker}; use crate::{ arch::{Arch, ArchImpl}, process::{ @@ -83,21 +82,26 @@ pub fn dispatch_userspace_task(ctx: *mut UserCtx) { state = State::ProcessKernelWork; } State::ProcessKernelWork => { - let task = current_task(); - // First, let's handle signals. If there is any scheduled signal // work (this has to be async to handle faults, etc). - let signal_work = task.ctx.lock_save_irq().take_signal_work(); + let (signal_work, desc, is_idle) = { + let mut task = current_task(); + ( + task.ctx.take_signal_work(), + task.descriptor(), + task.is_idle_task(), + ) + }; + if let Some(mut signal_work) = signal_work { - if task.is_idle_task() { + if is_idle { panic!("Signal processing for idle task"); } match signal_work .as_mut() - .poll(&mut core::task::Context::from_waker(&create_waker( - task.descriptor(), - ))) { + .poll(&mut core::task::Context::from_waker(&create_waker(desc))) + { Poll::Ready(Ok(state)) => { // Signal actioning is complete. Return to userspace. unsafe { ptr::copy_nonoverlapping(&state as _, ctx, 1) }; @@ -114,7 +118,9 @@ pub fn dispatch_userspace_task(ctx: *mut UserCtx) { continue; } Poll::Pending => { - task.ctx.lock_save_irq().put_signal_work(signal_work); + let mut task = current_task(); + + task.ctx.put_signal_work(signal_work); let mut task_state = task.state.lock_save_irq(); match *task_state { @@ -145,29 +151,24 @@ pub fn dispatch_userspace_task(ctx: *mut UserCtx) { } // Now let's handle any kernel work that's been spawned for this task. - let kern_work = task.ctx.lock_save_irq().take_kernel_work(); + let kern_work = current_task().ctx.take_kernel_work(); if let Some(mut kern_work) = kern_work { - if task.is_idle_task() { + if is_idle { panic!("Idle process should never have kernel work"); } match kern_work .as_mut() - .poll(&mut core::task::Context::from_waker(&create_waker( - task.descriptor(), - ))) { + .poll(&mut core::task::Context::from_waker(&create_waker(desc))) + { Poll::Ready(()) => { - // If the task just exited (entered the finished state), - // don't return to it's userspace, instead, find another - // task to execute, removing this task from the - // runqueue, reaping it's resouces. - if task.state.lock_save_irq().is_finished() { - SCHED_STATE - .borrow_mut() - .remove_task_with_weight(&task.descriptor()); - let mut task_list = TASK_LIST.lock_save_irq(); - task_list.remove(&task.descriptor()); + let task = current_task(); + // If the task just exited (entered the finished + // state), don't return to it's userspace, instead, + // find another task to execute, removing this task + // from the runqueue, reaping it's resouces. + if task.state.lock_save_irq().is_finished() { state = State::PickNewTask; continue; } @@ -180,12 +181,14 @@ pub fn dispatch_userspace_task(ctx: *mut UserCtx) { continue; } Poll::Pending => { + let mut task = current_task(); + // Kernel work hasn't finished. A wake up should // have been scheduled by the future. Replace the // kernel work context back into the task, set it's // state to sleeping so it's not scheduled again and // search for another task to execute. - task.ctx.lock_save_irq().put_kernel_work(kern_work); + task.ctx.put_kernel_work(kern_work); let mut task_state = task.state.lock_save_irq(); match *task_state { @@ -215,21 +218,22 @@ pub fn dispatch_userspace_task(ctx: *mut UserCtx) { // No kernel work. Check for any pending signals. // We never handle signals for the idle task. - if task.is_idle_task() { + if current_task().is_idle_task() { state = State::ReturnToUserspace; continue; } // See if there are any signals we need to action. - let task = current_task(); - let mut pending_task_sigs = task.pending_signals.lock_save_irq(); - let mask = task.sig_mask.lock_save_irq(); - if let Some((id, action)) = task - .process - .signals - .lock_save_irq() - .action_signal(*mask, &mut pending_task_sigs) - { + if let Some((id, action)) = { + let task = current_task(); + let mut pending_task_sigs = task.pending_signals; + + let mask = task.sig_mask; + task.process + .signals + .lock_save_irq() + .action_signal(mask, &mut pending_task_sigs) + } { match action { KSignalAction::Term | KSignalAction::Core => { // Terminate the process, and find a new task. @@ -239,6 +243,8 @@ pub fn dispatch_userspace_task(ctx: *mut UserCtx) { continue; } KSignalAction::Stop => { + let task = current_task(); + // Default action: stop (suspend) the entire process. let process = &task.process; @@ -265,6 +271,7 @@ pub fn dispatch_userspace_task(ctx: *mut UserCtx) { continue; } KSignalAction::Continue => { + let task = current_task(); let process = &task.process; // Wake up all sleeping threads in the process. @@ -297,7 +304,7 @@ pub fn dispatch_userspace_task(ctx: *mut UserCtx) { KSignalAction::Userspace(id, action) => { let fut = ArchImpl::do_signal(id, action); - task.ctx.lock_save_irq().put_signal_work(Box::pin(fut)); + current_task().ctx.put_signal_work(Box::pin(fut)); state = State::ProcessKernelWork; continue; @@ -310,7 +317,7 @@ pub fn dispatch_userspace_task(ctx: *mut UserCtx) { State::ReturnToUserspace => { // Real user-space return now. - current_task().ctx.lock_save_irq().restore_user_ctx(ctx); + current_task().ctx.restore_user_ctx(ctx); return; } } diff --git a/src/sched/waker.rs b/src/sched/waker.rs index e8d4e870..5129ba8a 100644 --- a/src/sched/waker.rs +++ b/src/sched/waker.rs @@ -1,6 +1,8 @@ use crate::process::{TASK_LIST, TaskDescriptor, TaskState}; use core::task::{RawWaker, RawWakerVTable, Waker}; +use super::SCHED_STATE; + unsafe fn clone_waker(data: *const ()) -> RawWaker { RawWaker::new(data, &VTABLE) } @@ -17,6 +19,8 @@ unsafe fn wake_waker(data: *const ()) { // If the task has been put to sleep, then wake it up. TaskState::Sleeping => { *state = TaskState::Runnable; + + SCHED_STATE.borrow_mut().wakeup(desc); } // If the task is running, mark it so it doesn't actually go to // sleep when poll returns. This covers the small race-window From fca835b573f5fe655857cd36af27cfa806f493ef Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Thu, 1 Jan 2026 00:09:48 -0800 Subject: [PATCH 2/4] fix scheduling bug --- src/sched/sched_task.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/sched/sched_task.rs b/src/sched/sched_task.rs index 161fcfe9..c5636b69 100644 --- a/src/sched/sched_task.rs +++ b/src/sched/sched_task.rs @@ -54,25 +54,27 @@ impl SchedulableTask { /// Update accounting info for this task given the latest time. pub fn tick(&mut self, now: Instant) { - let delta_vt = if let Some(start) = self.exec_start { + let dv_increment = if let Some(start) = self.exec_start { let delta = now - start; let w = self.weight() as u128; - let dv = ((delta.as_nanos() as u128) << VT_FIXED_SHIFT) / w; - self.v_runtime = dv; - dv + ((delta.as_nanos()) << VT_FIXED_SHIFT) / w } else { 0 }; + self.v_runtime = self.v_runtime.saturating_add(dv_increment); + // Advance its eligible time by the virtual run time it just used // (EEVDF: v_ei += t_used / w_i). - self.v_eligible += delta_vt; + self.v_eligible = self.v_eligible.saturating_add(dv_increment); // Re-issue a virtual deadline let q_ns: u128 = DEFAULT_TIME_SLICE.as_nanos(); let v_delta = (q_ns << VT_FIXED_SHIFT) / self.weight() as u128; let v_ei = self.v_eligible; self.v_deadline = v_ei + v_delta; + + self.exec_start = Some(now); } /// Compute this task's scheduling weight. From 41dec05046551ea40dc4aafe34735f7a5831f6fc Mon Sep 17 00:00:00 2001 From: Matthew Leach Date: Thu, 1 Jan 2026 21:07:04 +0000 Subject: [PATCH 3/4] sched: implement fast-path return Previously, the scheduler unconditionally performed a full runqueue search:(`find_next_runnable_desc`) on every invocation, including every timer tick and syscall return. This resulted in unnecessary overhead. This change introduces a "lazy preemption" model: 1. Fast-Path Optimization: `do_schedule` now checks if the current task is valid, is not the Idle task, and still has virtual budget remaining. If these conditions are met and `force_resched` is not set, the scheduler returns immediately without locking the runqueue. 2. Preemption & Idle Handling: - `insert_into_runq` now sets `force_resched` if the new task has an earlier deadline than the current task, or if the current task is Idle. - The Idle task is explicitly excluded from the fast-path to ensure immediate context switching when new work arrives. --- src/sched/mod.rs | 116 +++++++++++++++++++++++++++------------- src/sched/runqueue.rs | 41 ++++++-------- src/sched/sched_task.rs | 32 +++++++---- src/sched/uspc_ret.rs | 6 ++- 4 files changed, 123 insertions(+), 72 deletions(-) diff --git a/src/sched/mod.rs b/src/sched/mod.rs index 87b65003..974521e2 100644 --- a/src/sched/mod.rs +++ b/src/sched/mod.rs @@ -73,41 +73,13 @@ fn schedule() { return; } - let mut sched = SCHED_STATE.borrow_mut(); - - // Update Clocks - let now_inst = now().expect("System timer not initialised"); - - sched.advance_vclock(now_inst); - - if let Some(current) = sched.run_q.current_mut() { - current.tick(now_inst); - } - - // Select Next Task - let next_task_desc = sched.run_q.find_next_runnable_desc(sched.vclock); - - match sched.run_q.switch_tasks(next_task_desc, now_inst) { - SwitchResult::AlreadyRunning => { - // Nothing to do. - return; - } - SwitchResult::Blocked { old_task } => { - // If the blocked task has finished, allow it to drop here so it's - // resources are released. - if !old_task.state.lock_save_irq().is_finished() { - sched.wait_q.insert(old_task.descriptor(), old_task); - } - } - // fall-thru. - SwitchResult::Preempted => {} - } + SCHED_STATE.borrow_mut().do_schedule(); +} - // Update all context since the task has switched. - if let Some(new_current) = sched.run_q.current_mut() { - ArchImpl::context_switch(new_current.t_shared.clone()); - CUR_TASK_PTR.borrow_mut().set_current(&mut new_current.task); - } +/// Set the force resched task for this CPU. This ensures that the next time +/// schedule() is called a full run of the schduling algorithm will occur. +fn force_resched() { + SCHED_STATE.borrow_mut().force_resched = true; } pub fn spawn_kernel_work(fut: impl Future + 'static + Send) { @@ -159,6 +131,8 @@ pub struct SchedState { vclock: u128, /// Real-time moment when `vclock` was last updated. last_update: Option, + /// Force a reschedule. + force_resched: bool, } unsafe impl Send for SchedState {} @@ -170,6 +144,7 @@ impl SchedState { wait_q: BTreeMap::new(), vclock: 0, last_update: None, + force_resched: false, } } @@ -190,12 +165,24 @@ impl SchedState { self.last_update = Some(now_inst); } - fn insert_into_runq(&mut self, task: Box) { + fn insert_into_runq(&mut self, mut new_task: Box) { let now = now().expect("systimer not running"); self.advance_vclock(now); - self.run_q.enqueue_task(task, self.vclock); + new_task.inserting_into_runqueue(self.vclock); + + if let Some(current) = self.run_q.current() { + // We force a reschedule if: + // + // We are currently idling, OR The new task has an earlier deadline + // than the current task. + if current.is_idle_task() || new_task.v_deadline < current.v_deadline { + self.force_resched = true; + } + } + + self.run_q.enqueue_task(new_task); } pub fn wakeup(&mut self, desc: TaskDescriptor) { @@ -205,6 +192,63 @@ impl SchedState { warn!("Spurious wakeup for task {:?}", desc); } } + + pub fn do_schedule(&mut self) { + // Update Clocks + let now_inst = now().expect("System timer not initialised"); + + self.advance_vclock(now_inst); + + let mut needs_resched = self.force_resched; + + if let Some(current) = self.run_q.current_mut() { + // If the current task is IDLE, we always want to proceed to the + // scheduler core to see if a real task has arrived. + if current.is_idle_task() { + needs_resched = true; + } + else if current.tick(now_inst) { + // Otherwise, check if the real task expired + needs_resched = true; + } + } else { + needs_resched = true; + } + + if !needs_resched { + // Fast Path: Only return if we have a valid task, it has budget, + // AND it's not the idle task. + return; + } + + // Reset the force flag for next time. + self.force_resched = false; + + // Select Next Task. + let next_task_desc = self.run_q.find_next_runnable_desc(self.vclock); + + match self.run_q.switch_tasks(next_task_desc, now_inst) { + SwitchResult::AlreadyRunning => { + // Nothing to do. + return; + } + SwitchResult::Blocked { old_task } => { + // If the blocked task has finished, allow it to drop here so it's + // resources are released. + if !old_task.state.lock_save_irq().is_finished() { + self.wait_q.insert(old_task.descriptor(), old_task); + } + } + // fall-thru. + SwitchResult::Preempted => {} + } + + // Update all context since the task has switched. + if let Some(new_current) = self.run_q.current_mut() { + ArchImpl::context_switch(new_current.t_shared.clone()); + CUR_TASK_PTR.borrow_mut().set_current(&mut new_current.task); + } + } } pub fn sched_init() { diff --git a/src/sched/runqueue.rs b/src/sched/runqueue.rs index b5ec9944..ef79a587 100644 --- a/src/sched/runqueue.rs +++ b/src/sched/runqueue.rs @@ -41,24 +41,11 @@ impl RunQueue { } } - fn insert_task(&mut self, task: Box) { - if !task.is_idle_task() { - self.total_weight = self.total_weight.saturating_add(task.weight() as u64); - } - - if let Some(old_task) = self.queue.insert(task.descriptor(), task) { - // Handle the edge case where we overwrite a task. If we replaced - // someone, we must subtract their weight to avoid accounting drift. - warn!("Overwrote active task {:?}", old_task.descriptor()); - self.total_weight = self.total_weight.saturating_sub(old_task.weight() as u64); - } - } - pub fn switch_tasks(&mut self, next_task: TaskDescriptor, now_inst: Instant) -> SwitchResult { - if let Some(current) = self.current() { - if current.descriptor() == next_task { - return SwitchResult::AlreadyRunning; - } + if let Some(current) = self.current() + && current.descriptor() == next_task + { + return SwitchResult::AlreadyRunning; } let mut new_task = match self.queue.remove(&next_task) { @@ -101,6 +88,7 @@ impl RunQueue { self.total_weight } + #[allow(clippy::borrowed_box)] pub fn current(&self) -> Option<&Box> { self.running_task.as_ref() } @@ -112,7 +100,7 @@ impl RunQueue { fn fallback_current_or_idle(&self) -> TaskDescriptor { if let Some(ref current) = self.running_task { let s = *current.state.lock_save_irq(); - if !current.is_idle_task() && (s == TaskState::Runnable || s == TaskState::Running) { + if !current.is_idle_task() && (s == TaskState::Runnable || s == TaskState::Running) { return current.descriptor(); } } @@ -165,12 +153,17 @@ impl RunQueue { best_queued_desc } - /// Inserts `task` into this CPU's run-queue and updates all EEVDF - /// accounting information (eligible time, virtual deadline and the cached - /// weight sum). - pub fn enqueue_task(&mut self, mut new_task: Box, vclock: u128) { - new_task.inserting_into_runqueue(vclock); + /// Inserts `task` into this CPU's run-queue. + pub fn enqueue_task(&mut self, new_task: Box) { + if !new_task.is_idle_task() { + self.total_weight = self.total_weight.saturating_add(new_task.weight() as u64); + } - self.insert_task(new_task); + if let Some(old_task) = self.queue.insert(new_task.descriptor(), new_task) { + // Handle the edge case where we overwrite a task. If we replaced + // someone, we must subtract their weight to avoid accounting drift. + warn!("Overwrote active task {:?}", old_task.descriptor()); + self.total_weight = self.total_weight.saturating_sub(old_task.weight() as u64); + } } } diff --git a/src/sched/sched_task.rs b/src/sched/sched_task.rs index c5636b69..6050f6f8 100644 --- a/src/sched/sched_task.rs +++ b/src/sched/sched_task.rs @@ -52,8 +52,16 @@ impl SchedulableTask { }) } - /// Update accounting info for this task given the latest time. - pub fn tick(&mut self, now: Instant) { + /// Re-issue a virtual deadline + pub fn replenish_deadline(&mut self) { + let q_ns: u128 = DEFAULT_TIME_SLICE.as_nanos(); + let v_delta = (q_ns << VT_FIXED_SHIFT) / self.weight() as u128; + self.v_deadline = self.v_eligible + v_delta; + } + + /// Update accounting info for this task given the latest time. Returns + /// `true` when we should try to reschedule another task, `false` otherwise. + pub fn tick(&mut self, now: Instant) -> bool { let dv_increment = if let Some(start) = self.exec_start { let delta = now - start; let w = self.weight() as u128; @@ -68,13 +76,18 @@ impl SchedulableTask { // (EEVDF: v_ei += t_used / w_i). self.v_eligible = self.v_eligible.saturating_add(dv_increment); - // Re-issue a virtual deadline - let q_ns: u128 = DEFAULT_TIME_SLICE.as_nanos(); - let v_delta = (q_ns << VT_FIXED_SHIFT) / self.weight() as u128; - let v_ei = self.v_eligible; - self.v_deadline = v_ei + v_delta; - self.exec_start = Some(now); + + // Has the task exceeded its deadline? + if self.v_eligible >= self.v_deadline { + self.replenish_deadline(); + + true + } else { + // Task still has budget. Do nothing. Return to userspace + // immediately. + false + } } /// Compute this task's scheduling weight. @@ -119,8 +132,7 @@ impl SchedulableTask { // Grant it an initial virtual deadline proportional to its weight. let q_ns: u128 = DEFAULT_TIME_SLICE.as_nanos(); let v_delta = (q_ns << VT_FIXED_SHIFT) / self.weight() as u128; - let new_v_deadline = vclock + v_delta; - self.v_deadline = new_v_deadline; + self.v_deadline = vclock + v_delta; // Since the task is not executing yet, its exec_start must be `None`. self.exec_start = None; diff --git a/src/sched/uspc_ret.rs b/src/sched/uspc_ret.rs index 85481e4b..99c13c9d 100644 --- a/src/sched/uspc_ret.rs +++ b/src/sched/uspc_ret.rs @@ -1,4 +1,4 @@ -use super::{current::current_task, schedule, waker::create_waker}; +use super::{current::current_task, force_resched, schedule, waker::create_waker}; use crate::{ arch::{Arch, ArchImpl}, process::{ @@ -128,7 +128,8 @@ pub fn dispatch_userspace_task(ctx: *mut UserCtx) { // task. // Task is currently running or is runnable and will now sleep. TaskState::Running | TaskState::Runnable => { - *task_state = TaskState::Sleeping + force_resched(); + *task_state = TaskState::Sleeping; } // If we were woken between the future returning // `Poll::Pending` and acquiring the lock above, @@ -194,6 +195,7 @@ pub fn dispatch_userspace_task(ctx: *mut UserCtx) { match *task_state { // Task is runnable or running, put it to sleep. TaskState::Running | TaskState::Runnable => { + force_resched(); *task_state = TaskState::Sleeping } // If we were woken between the future returning From 37138e1ee7ed7a9398d0d2b5698e0735a1893ef9 Mon Sep 17 00:00:00 2001 From: Matthew Leach Date: Thu, 1 Jan 2026 22:52:13 +0000 Subject: [PATCH 4/4] sched, messenger: get SMP working with sched changes Add various fixes to get an SMP version of `usertest` working with SMP. --- src/arch/arm64/boot/mod.rs | 7 +------ src/interrupts/cpu_messenger.rs | 27 ++++++++++++++------------- src/process/mod.rs | 2 +- src/sched/mod.rs | 19 ++++++++++++++----- src/sched/uspc_ret.rs | 3 +++ src/sched/waker.rs | 27 ++++++++++++++++++++------- 6 files changed, 53 insertions(+), 32 deletions(-) diff --git a/src/arch/arm64/boot/mod.rs b/src/arch/arm64/boot/mod.rs index 82c5e66d..276ae5d5 100644 --- a/src/arch/arm64/boot/mod.rs +++ b/src/arch/arm64/boot/mod.rs @@ -10,10 +10,7 @@ use crate::{ fdt_prober::{probe_for_fdt_devices, set_fdt_va}, init::run_initcalls, }, - interrupts::{ - cpu_messenger::{Message, cpu_messenger_init, message_cpu}, - get_interrupt_root, - }, + interrupts::{cpu_messenger::cpu_messenger_init, get_interrupt_root}, kmain, memory::{INITAL_ALLOCATOR, PAGE_ALLOC}, sched::{sched_init_secondary, uspc_ret::dispatch_userspace_task}, @@ -132,8 +129,6 @@ fn arch_init_stage2(frame: *mut ExceptionState) -> *mut ExceptionState { boot_secondaries(); // Prove that we can send IPIs through the messenger. - let _ = message_cpu(1, Message::Ping(ArchImpl::id() as _)); - frame } diff --git a/src/interrupts/cpu_messenger.rs b/src/interrupts/cpu_messenger.rs index c2e4d084..cb938ed0 100644 --- a/src/interrupts/cpu_messenger.rs +++ b/src/interrupts/cpu_messenger.rs @@ -1,8 +1,11 @@ //! A module for sending messages between CPUs, utilising IPIs. +use core::task::Waker; + use super::{ ClaimedInterrupt, InterruptConfig, InterruptDescriptor, InterruptHandler, get_interrupt_root, }; +use crate::kernel::cpu_id::CpuId; use crate::process::owned::OwnedTask; use crate::{ arch::ArchImpl, @@ -17,11 +20,11 @@ use libkernel::{ CpuOps, error::{KernelError, Result}, }; -use log::{info, warn}; +use log::warn; pub enum Message { PutTask(Box), - Ping(u32), + WakeupTask(Waker), } struct CpuMessenger { @@ -37,21 +40,19 @@ impl Driver for CpuMessenger { impl InterruptHandler for CpuMessenger { fn handle_irq(&self, _desc: InterruptDescriptor) { - let message = CPU_MESSENGER + while let Some(message) = CPU_MESSENGER .get() .unwrap() .mailboxes .lock_save_irq() .get(ArchImpl::id()) .unwrap() - .try_pop(); - - match message { - Some(Message::PutTask(task)) => sched::insert_task(task), - Some(Message::Ping(cpu_id)) => { - info!("CPU {} recieved ping from CPU {}", ArchImpl::id(), cpu_id) + .try_pop() + { + match message { + Message::PutTask(task) => sched::insert_task(task), + Message::WakeupTask(waker) => waker.wake(), } - None => warn!("Spurious CPU IPI"), } } } @@ -86,19 +87,19 @@ pub fn cpu_messenger_init(num_cpus: usize) { } } -pub fn message_cpu(cpu_id: usize, message: Message) -> Result<()> { +pub fn message_cpu(cpu_id: CpuId, message: Message) -> Result<()> { let messenger = CPU_MESSENGER.get().ok_or(KernelError::InvalidValue)?; let irq = get_interrupt_root().ok_or(KernelError::InvalidValue)?; messenger .mailboxes .lock_save_irq() - .get(cpu_id) + .get(cpu_id.value()) .ok_or(KernelError::InvalidValue)? .try_push(message) .map_err(|_| KernelError::NoMemory)?; - irq.raise_ipi(cpu_id); + irq.raise_ipi(cpu_id.value()); Ok(()) } diff --git a/src/process/mod.rs b/src/process/mod.rs index b21a71be..1770c411 100644 --- a/src/process/mod.rs +++ b/src/process/mod.rs @@ -56,7 +56,7 @@ impl TaskDescriptor { pub fn this_cpus_idle() -> Self { Self { tgid: Tgid(0), - tid: Tid(0), + tid: Tid(CpuId::this().value() as _), } } diff --git a/src/sched/mod.rs b/src/sched/mod.rs index 974521e2..fd78ec91 100644 --- a/src/sched/mod.rs +++ b/src/sched/mod.rs @@ -114,7 +114,7 @@ pub fn insert_task_cross_cpu(task: Box) { if cpu == CpuId::this() { insert_task(task); } else { - message_cpu(cpu.value(), Message::PutTask(task)).expect("Failed to send task to CPU"); + message_cpu(cpu, Message::PutTask(task)).expect("Failed to send task to CPU"); } } @@ -174,7 +174,7 @@ impl SchedState { if let Some(current) = self.run_q.current() { // We force a reschedule if: - // + // // We are currently idling, OR The new task has an earlier deadline // than the current task. if current.is_idle_task() || new_task.v_deadline < current.v_deadline { @@ -189,7 +189,11 @@ impl SchedState { if let Some(task) = self.wait_q.remove(&desc) { self.insert_into_runq(task); } else { - warn!("Spurious wakeup for task {:?}", desc); + warn!( + "Spurious wakeup for task {:?} on CPU {:?}", + desc, + CpuId::this().value() + ); } } @@ -206,8 +210,7 @@ impl SchedState { // scheduler core to see if a real task has arrived. if current.is_idle_task() { needs_resched = true; - } - else if current.tick(now_inst) { + } else if current.tick(now_inst) { // Otherwise, check if the real task expired needs_resched = true; } @@ -218,6 +221,12 @@ impl SchedState { if !needs_resched { // Fast Path: Only return if we have a valid task, it has budget, // AND it's not the idle task. + // + // Ensure that, in a debug build, we are only taking the fast-path + // on a *running* task. + if let Some(current) = self.run_q.current_mut() { + debug_assert_eq!(*current.state.lock_save_irq(), TaskState::Running); + } return; } diff --git a/src/sched/uspc_ret.rs b/src/sched/uspc_ret.rs index 99c13c9d..72d37fef 100644 --- a/src/sched/uspc_ret.rs +++ b/src/sched/uspc_ret.rs @@ -170,6 +170,9 @@ pub fn dispatch_userspace_task(ctx: *mut UserCtx) { // find another task to execute, removing this task // from the runqueue, reaping it's resouces. if task.state.lock_save_irq().is_finished() { + // Ensure we don't take the fast-path sched exit + // for a finished task. + force_resched(); state = State::PickNewTask; continue; } diff --git a/src/sched/waker.rs b/src/sched/waker.rs index 5129ba8a..2ae8d073 100644 --- a/src/sched/waker.rs +++ b/src/sched/waker.rs @@ -1,4 +1,8 @@ -use crate::process::{TASK_LIST, TaskDescriptor, TaskState}; +use crate::{ + interrupts::cpu_messenger::{Message, message_cpu}, + kernel::cpu_id::CpuId, + process::{TASK_LIST, TaskDescriptor, TaskState}, +}; use core::task::{RawWaker, RawWakerVTable, Waker}; use super::SCHED_STATE; @@ -11,16 +15,25 @@ unsafe fn clone_waker(data: *const ()) -> RawWaker { unsafe fn wake_waker(data: *const ()) { let desc = TaskDescriptor::from_ptr(data); - if let Some(task) = TASK_LIST.lock_save_irq().get(&desc) - && let Some(task) = task.upgrade() - { + let task = TASK_LIST + .lock_save_irq() + .get(&desc) + .and_then(|x| x.upgrade()); + + if let Some(task) = task { let mut state = task.state.lock_save_irq(); + let locus = *task.last_cpu.lock_save_irq(); + match *state { // If the task has been put to sleep, then wake it up. TaskState::Sleeping => { - *state = TaskState::Runnable; - - SCHED_STATE.borrow_mut().wakeup(desc); + if locus == CpuId::this() { + *state = TaskState::Runnable; + SCHED_STATE.borrow_mut().wakeup(desc); + } else { + message_cpu(locus, Message::WakeupTask(create_waker(desc))) + .expect("Could not wakeup task on other CPU"); + } } // If the task is running, mark it so it doesn't actually go to // sleep when poll returns. This covers the small race-window