diff --git a/Cargo.lock b/Cargo.lock index d4cf149..1050f46 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "aliasable" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -228,7 +234,7 @@ version = "4.5.55" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" dependencies = [ - "heck", + "heck 0.5.0", "proc-macro2", "quote", "syn", @@ -758,6 +764,7 @@ dependencies = [ "opentelemetry", "opentelemetry-otlp", "opentelemetry_sdk", + "ouroboros", "rand", "reqwest", "reqwest-middleware", @@ -839,6 +846,12 @@ dependencies = [ "hashbrown 0.16.1", ] +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "heck" version = "0.5.0" @@ -1497,6 +1510,30 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "ouroboros" +version = "0.18.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0f050db9c44b97a94723127e6be766ac5c340c48f2c4bb3ffa11713744be59" +dependencies = [ + "aliasable", + "ouroboros_macro", + "static_assertions", +] + +[[package]] +name = "ouroboros_macro" +version = "0.18.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c7028bdd3d43083f6d8d4d5187680d0d3560d54df4cc9d752005268b41e64d0" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "proc-macro2-diagnostics", + "quote", + "syn", +] + [[package]] name = "page_size" version = "0.6.0" @@ -1623,6 +1660,19 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proc-macro2-diagnostics" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "version_check", + "yansi", +] + [[package]] name = "prost" version = "0.13.5" @@ -2312,6 +2362,12 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.11.1" @@ -2865,6 +2921,12 @@ dependencies = [ "rustversion", ] +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "vt100" version = "0.16.2" @@ -3309,7 +3371,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" dependencies = [ "anyhow", - "heck", + "heck 0.5.0", "wit-parser", ] @@ -3320,7 +3382,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" dependencies = [ "anyhow", - "heck", + "heck 0.5.0", "indexmap 2.13.0", "prettyplease", "syn", @@ -3387,6 +3449,12 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + [[package]] name = "yoke" version = "0.8.1" diff --git a/Cargo.toml b/Cargo.toml index d837f7f..dcf7b55 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,6 +49,7 @@ tracing-indicatif = "0.3.14" opentelemetry = { version = "0.29" } opentelemetry_sdk = { version = "0.29", features = ["rt-tokio"] } opentelemetry-otlp = { version = "0.29", default-features = false, features = ["http-proto", "trace", "reqwest-blocking-client"] } +ouroboros = "0.18" tracing-opentelemetry = { version = "0.30" } hashlink = "0.11.0" diff --git a/lib/cache/async_backed.rs b/lib/cache/async_backed.rs new file mode 100644 index 0000000..36eaade --- /dev/null +++ b/lib/cache/async_backed.rs @@ -0,0 +1,266 @@ +//! Concurrent deduplication cache for async computations. +//! +//! Given a key and an async factory, ensures the factory runs at most once per key. Subsequent +//! callers for the same key await the already-in-flight computation via a [`Shared`] future, +//! avoiding the race conditions inherent in `Notify`-based signalling. +//! +//! Note that this cache does not support automatic eviction. + +use std::{fmt::Debug, future::Future, hash::Hash, pin::Pin}; + +use futures::FutureExt as _; +use futures::future::Shared; + +/// Two-state slot: `InFlight` while a factory future is running, then promoted to `Ready` once +/// the future completes. The caller that inserted the `InFlight` variant is responsible for +/// awaiting the `Shared` future and performing the promotion to `Ready`. +enum Slot { + InFlight(Shared + Send>>>), + Ready(V), +} + +/// Deduplicating async cache. +/// +/// If [`get_or_init`](Self::get_or_init) is called concurrently for the same key, only one +/// invocation of the factory runs. All callers receive a clone of the result. +pub struct FutureBackedCache { + map: scc::HashMap>, +} + +impl Default for FutureBackedCache +where + K: Eq + Hash, + V: Clone + Send + 'static, +{ + fn default() -> Self { + Self { + map: scc::HashMap::default(), + } + } +} + +impl FutureBackedCache +where + K: Eq + Hash + Debug + Clone + Send + Sync + 'static, + V: Clone + Send + Sync + 'static, +{ + /// Get the cached value for `key`, or initialize it by running `factory`. + /// + /// If another caller is already computing the value for this key, this awaits the in-flight + /// computation instead of spawning a duplicate. + pub async fn get_or_init(&self, key: K, factory: F) -> V + where + F: FnOnce() -> Fut, + Fut: Future + Send + 'static, + { + // Fast path: value already cached. Uses a shared read lock on the bucket, released + // immediately after the closure returns. + let existing = self + .map + .read_async(&key, |_, slot| match slot { + Slot::Ready(v) => Ok(v.clone()), + Slot::InFlight(shared) => Err(shared.clone()), + }) + .await; + + match existing { + Some(Ok(v)) => return v, + Some(Err(shared)) => return shared.await, + None => {} + } + + // Slow path: use entry_async for atomic check-and-insert. This acquires an exclusive lock + // on the bucket, so no other caller can race between our check and insert. + let shared = match self.map.entry_async(key.clone()).await { + scc::hash_map::Entry::Occupied(occ) => match occ.get() { + Slot::Ready(v) => return v.clone(), + Slot::InFlight(shared) => shared.clone(), + }, + scc::hash_map::Entry::Vacant(vac) => { + let boxed: Pin + Send>> = Box::pin(factory()); + let shared = boxed.shared(); + let ret = shared.clone(); + vac.insert_entry(Slot::InFlight(shared)); + ret + } + }; + + let val: V = shared.await; + + // Promote to Ready so future callers hit the fast path and the Shared machinery can be + // dropped. + self.map + .update_async(&key, |_, slot| { + if matches!(slot, Slot::InFlight(_)) { + *slot = Slot::Ready(val.clone()); + } + }) + .await; + + val + } + + /// Like [`get_or_init`](Self::get_or_init), but for fallible factories. + /// + /// If the factory returns `Ok(v)`, the value is cached and returned. If it returns `Err(e)`, + /// **nothing is cached** and the error is propagated to the caller. + /// + /// Unlike `get_or_init`, concurrent callers are **not** deduplicated — each caller that + /// finds the key absent will invoke the factory independently. However, if a value was + /// previously cached (by either `get_or_init` or a successful `get_or_try_init`), it is + /// returned immediately without calling the factory. + /// + /// Note: the returned value may have been produced by a concurrent caller's factory, not the + /// one supplied to this call. + pub async fn get_or_try_init(&self, key: K, factory: F) -> Result + where + F: FnOnce() -> Fut, + Fut: Future> + Send + 'static, + { + // Fast path: value already cached or in-flight from an infallible init. + let existing = self + .map + .read_async(&key, |_, slot| match slot { + Slot::Ready(v) => Ok(v.clone()), + Slot::InFlight(shared) => Err(shared.clone()), + }) + .await; + + match existing { + Some(Ok(v)) => return Ok(v), + Some(Err(shared)) => return Ok(shared.await), + None => {} + } + + // Run the fallible factory (not deduplicated). + let val = factory().await?; + + // Attempt to cache. If another caller raced us and already inserted, + // return the existing value and discard ours. + match self.map.entry_async(key).await { + scc::hash_map::Entry::Occupied(occ) => match occ.get() { + Slot::Ready(v) => Ok(v.clone()), + // A concurrent `get_or_init` started an in-flight computation + // while our factory was running. + Slot::InFlight(shared) => Ok(shared.clone().await), + }, + scc::hash_map::Entry::Vacant(vac) => { + vac.insert_entry(Slot::Ready(val.clone())); + Ok(val) + } + } + } + + /// Get the cached value for `key` if it exists. + /// + /// - If the value is `Ready`, returns `Some(v)` immediately. + /// - If the value is `InFlight`, awaits the in-flight computation and returns `Some(v)`. + /// - If the key is absent, returns `None`. + pub async fn get(&self, key: &K) -> Option { + let existing = self + .map + .read_async(key, |_, slot| match slot { + Slot::Ready(v) => Ok(v.clone()), + Slot::InFlight(shared) => Err(shared.clone()), + }) + .await; + + match existing { + Some(Ok(v)) => Some(v), + Some(Err(shared)) => Some(shared.await), + None => None, + } + } + + /// Returns the number of entries in the cache (both `Ready` and `InFlight`). + #[must_use] + pub fn len(&self) -> usize { + self.map.len() + } + + /// Returns `true` if the cache contains no entries. + #[must_use] + pub fn is_empty(&self) -> bool { + self.map.is_empty() + } + + /// Synchronously insert a value, overwriting any existing entry. + /// + /// Suitable for seeding the cache before async operations begin (e.g. + /// inside an ouroboros builder where async is unavailable). + pub fn insert_sync(&self, key: K, value: V) { + drop(self.map.insert_sync(key, Slot::Ready(value))); + } + + /// Synchronously remove the entry for `key`, returning `true` if it was present. + /// + /// Suitable for use in contexts where async is not available (e.g. inside + /// [`StatelessDrop::delete`](crate::drop_ward::StatelessDrop::delete)). + pub fn remove_sync(&self, key: &K) -> bool { + self.map.remove_sync(key).is_some() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn try_init_ok_caches_value() { + let cache = FutureBackedCache::::default(); + let result: Result = cache + .get_or_try_init(1, || async { Ok("hello".to_owned()) }) + .await; + assert_eq!(result.unwrap(), "hello", "should return Ok value"); + + // Value should now be cached (get returns it without factory) + let cached = cache.get(&1).await; + assert_eq!(cached.unwrap(), "hello", "value should be in cache"); + } + + #[tokio::test] + async fn try_init_err_does_not_cache() { + let cache = FutureBackedCache::::default(); + let result: Result = cache.get_or_try_init(1, || async { Err("boom") }).await; + assert_eq!(result.unwrap_err(), "boom", "should return the error"); + + // Cache should be empty — error was not stored + assert!(cache.is_empty(), "cache should have no entries after error"); + assert!(cache.get(&1).await.is_none(), "key should not exist"); + } + + #[tokio::test] + async fn try_init_err_then_retry_ok() { + let cache = FutureBackedCache::::default(); + + // First call: factory fails + let r1: Result = cache.get_or_try_init(1, || async { Err("fail") }).await; + assert!(r1.is_err(), "first call should fail"); + + // Second call: factory succeeds + let r2: Result = cache + .get_or_try_init(1, || async { Ok("recovered".to_owned()) }) + .await; + assert_eq!(r2.unwrap(), "recovered", "retry should succeed"); + + // Value should now be cached + let cached = cache.get(&1).await; + assert_eq!(cached.unwrap(), "recovered"); + } + + #[tokio::test] + async fn try_init_returns_value_cached_by_init() { + let cache = FutureBackedCache::::default(); + + // Populate via infallible get_or_init + cache + .get_or_init(1, || async { "from_init".to_owned() }) + .await; + + // get_or_try_init should return the cached value without running factory + let result: Result = cache + .get_or_try_init(1, || async { panic!("factory should not run") }) + .await; + assert_eq!(result.unwrap(), "from_init"); + } +} diff --git a/lib/cache/mod.rs b/lib/cache/mod.rs index e0c1c97..5c48ee2 100644 --- a/lib/cache/mod.rs +++ b/lib/cache/mod.rs @@ -1,3 +1,5 @@ +/// Async-backed cache implementation. +pub mod async_backed; /// Cache eviction policies. pub mod eviction; /// File-backed cache implementation. diff --git a/lib/drop_ward.rs b/lib/drop_ward.rs new file mode 100644 index 0000000..4922e13 --- /dev/null +++ b/lib/drop_ward.rs @@ -0,0 +1,133 @@ +//! Automatic, type-directed cleanup driven by reference counting. +//! +//! [`DropWard`] tracks how many live references exist for a given key and invokes a cleanup +//! callback when a key's count reaches zero. The cleanup logic is selected at the type level +//! through a zero-sized "tag" type that implements [`StatelessDrop`], keeping the ward itself +//! generic over *what* it manages without storing per-key values. +//! +//! This is designed for resources whose lifecycle is bound to an external context (e.g. GPU device +//! handles, connection pools, graphics pipelines) where Rust's built-in `Drop` cannot be used +//! because cleanup requires access to that context. +//! +//! # Design rationale +//! +//! The tag type `T` is constrained to be zero-sized. It exists only to carry the [`StatelessDrop`] +//! implementation at the type level — no `T` value is ever constructed or stored. This means a +//! single `DropWard` instance adds no per-key overhead beyond the key and its `usize` count. +//! +//! # Example +//! +//! ```ignore +//! struct GpuTextureDrop; +//! +//! impl StatelessDrop for GpuTextureDrop { +//! fn delete(device: &wgpu::Device, _key: &TextureId) { +//! // e.g. flush a deferred-destruction queue +//! device.poll(wgpu::Maintain::Wait); +//! } +//! } +//! +//! let mut ward: DropWard = DropWard::new(device); +//! +//! ward.inc(texture_id); // → 1 +//! ward.inc(texture_id); // → 2 +//! ward.dec(&texture_id); // → Some(1) +//! ward.dec(&texture_id); // → Some(0), calls GpuTextureDrop::delete(&device, &texture_id) +//! ``` + +use std::marker::PhantomData; + +use rustc_hash::FxHashMap; + +/// Type-level hook for cleanup that requires an external context. +/// +/// Implement this on a zero-sized tag type. The tag is never instantiated — it only selects which +/// `delete` implementation a [`DropWard`] will call. +pub trait StatelessDrop { + /// Called exactly once when a key's reference count reaches zero. + /// + /// `ctx` is the shared context owned by the [`DropWard`]. `key` is the key whose count just + /// reached zero. This callback fires synchronously inside [`DropWard::dec`]; avoid blocking or + /// panicking if the ward is used on a hot path. + fn delete(ctx: &Ctx, key: &K); +} + +/// A reference-counted key set that triggers [`StatelessDrop::delete`] on the associated context +/// when any key's count drops to zero. +/// +/// # Type parameters +/// +/// - `Ctx` — shared context passed to `T::delete` (e.g. a device handle). +/// - `K` — the key type being reference-counted. +/// - `T` — a **zero-sized** tag type carrying the cleanup logic. +/// Will fail to compile if `size_of::() != 0`. +/// +/// # Concurrency +/// +/// Not thread-safe. All access requires `&mut self`. Wrap in a `Mutex` or similar if shared across +/// threads. +/// +#[derive(Debug, Clone)] +pub struct DropWard { + map: FxHashMap, + ctx: Ctx, + _marker: PhantomData, +} + +impl DropWard +where + K: Eq + std::hash::Hash, + T: StatelessDrop, +{ + /// Compile-time guard: `T` must be zero-sized. + const _ASSERT_ZST: () = assert!(size_of::() == 0, "T must be zero-sized"); + + /// Create a new ward that will pass `ctx` to `T::delete` on cleanup. + pub fn new(ctx: Ctx) -> Self { + Self { + map: FxHashMap::default(), + ctx, + _marker: PhantomData, + } + } + + /// Increment the reference count for `key`, inserting it with a count + /// of 1 if it does not exist. + /// + /// Returns the count **after** incrementing. + pub fn inc(&mut self, key: K) -> usize { + *self + .map + .entry(key) + .and_modify(|count| *count += 1) + .or_insert(1) + } + + fn dec_by(&mut self, key: &K, by: usize) -> Option { + let curr = *self.map.get(key)?; + let new_count = curr.saturating_sub(by); + if new_count == 0 { + self.map.remove(key); + T::delete(&self.ctx, key); + } else if let Some(slot) = self.map.get_mut(key) { + *slot = new_count; + } + Some(new_count) + } + + /// Decrement the reference count for `key`. + /// + /// If the count reaches zero, the key is removed and `T::delete` is + /// called synchronously with the ward's context. Returns `Some(0)` in + /// this case — the key will no longer be tracked. + /// + /// Returns `None` if `key` was not present (no-op). + pub fn dec(&mut self, key: &K) -> Option { + self.dec_by(key, 1) + } + + /// Decrement the reference count for `key` by `count`. + pub fn dec_count(&mut self, key: &K, count: usize) -> Option { + self.dec_by(key, count) + } +} diff --git a/lib/fs/async_fs.rs b/lib/fs/async_fs.rs new file mode 100644 index 0000000..9d836ae --- /dev/null +++ b/lib/fs/async_fs.rs @@ -0,0 +1,423 @@ +//! Async `INode` Table which supports concurrent access and modification. + +use std::ffi::{OsStr, OsString}; +use std::future::Future; +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; + +use bytes::Bytes; + +use crate::cache::async_backed::FutureBackedCache; +use crate::drop_ward::StatelessDrop; +use crate::fs::{ + AsyncFsStats, DirEntry, FileHandle, INode, INodeType, InodeAddr, LoadedAddr, OpenFlags, + dcache::DCache, +}; + +/// A reader for an open file, returned by [`FsDataProvider::open`]. +/// +/// Implementors provide the actual data for read operations. Dropping the +/// reader releases any resources held for the open file. +pub trait FileReader: Send + Sync + 'static { + /// Read up to `size` bytes starting at byte `offset`. + fn read( + &self, + offset: u64, + size: u32, + ) -> impl Future> + Send; +} + +/// A data provider for [`AsyncFs`] that fetches inode data on cache misses. +pub trait FsDataProvider: Clone + Send + Sync + 'static { + /// The reader type returned by [`open`](Self::open). + type Reader: FileReader; + + /// Look up a child inode by name within the given parent directory. + fn lookup( + &self, + parent: INode, + name: &OsStr, + ) -> impl Future> + Send; + + /// List all children of a directory. + /// + /// Called by [`AsyncFs::readdir`] on a cache miss. The returned + /// children are inserted into the directory cache and inode table + /// so subsequent reads are served from cache. + fn readdir( + &self, + parent: INode, + ) -> impl Future, std::io::Error>> + Send; + + /// Open a file and return a reader for subsequent read calls. + fn open( + &self, + inode: INode, + flags: OpenFlags, + ) -> impl Future> + Send; +} + +/// Zero-sized tag whose [`StatelessDrop`] implementation automatically evicts +/// an inode from the inode table when its reference count reaches zero. +pub struct InodeForget; + +impl<'a> StatelessDrop<&'a FutureBackedCache, InodeAddr> for InodeForget { + fn delete(inode_table: &&'a FutureBackedCache, addr: &InodeAddr) { + inode_table.remove_sync(addr); + } +} + +/// A looked-up inode whose lifetime must be managed by the caller. +/// +/// Each `TrackedINode` returned by [`AsyncFs::lookup`] represents one +/// reference that the FUSE kernel holds. The caller must balance it by +/// decrementing the [`InodeLifecycle`] ward when the kernel sends `forget`. +#[derive(Debug, Clone, Copy)] +pub struct TrackedINode { + /// The resolved inode data. + pub inode: INode, +} + +/// An open file that provides read access. +/// +/// Returned by [`AsyncFs::open`]. The caller owns this handle and uses +/// [`read`](Self::read) to fetch data. Dropping the handle releases +/// the underlying reader when the last `Arc` clone is gone. +#[derive(Debug, Clone)] +pub struct OpenFile { + /// The raw file handle number, suitable for returning to the FUSE kernel. + pub fh: FileHandle, + /// The reader backing this open file. + pub reader: Arc, +} + +impl OpenFile { + /// Read up to `size` bytes starting at byte `offset`. + pub async fn read(&self, offset: u64, size: u32) -> Result { + self.reader.read(offset, size).await + } +} + +mod inode_lifecycle_impl { + #![allow(clippy::future_not_send, clippy::mem_forget)] + use ouroboros::self_referencing; + + use crate::cache::async_backed::FutureBackedCache; + use crate::drop_ward::DropWard; + use crate::fs::InodeAddr; + + use super::{INode, InodeForget}; + + /// Co-located inode table and reference-count ward. + /// + /// The ward borrows the table directly (no `Arc`) via `ouroboros`. + /// When `dec` reaches zero for a key, [`InodeForget::delete`] synchronously + /// removes that inode from the table. + #[self_referencing] + pub struct InodeLifecycle { + pub(super) table: FutureBackedCache, + #[borrows(table)] + #[not_covariant] + pub(super) ward: + DropWard<&'this FutureBackedCache, InodeAddr, InodeForget>, + } + + impl InodeLifecycle { + /// Create a new lifecycle managing the given inode table. + pub fn from_table(table: FutureBackedCache) -> Self { + Self::new(table, |tbl| DropWard::new(tbl)) + } + } +} + +pub use inode_lifecycle_impl::InodeLifecycle; + +impl InodeLifecycle { + /// Increment the reference count for an inode address. + pub fn inc(&mut self, addr: InodeAddr) -> usize { + self.with_ward_mut(|ward| ward.inc(addr)) + } + + /// Decrement the reference count for an inode address. + /// + /// When the count reaches zero, the inode is automatically evicted + /// from the table via [`InodeForget::delete`]. + pub fn dec(&mut self, addr: &InodeAddr) -> Option { + self.with_ward_mut(|ward| ward.dec(addr)) + } + + /// Decrement the reference count by `count`. + /// + /// When the count reaches zero, the inode is automatically evicted. + pub fn dec_count(&mut self, addr: &InodeAddr, count: usize) -> Option { + self.with_ward_mut(|ward| ward.dec_count(addr, count)) + } + + /// Read-only access to the underlying inode table. + #[must_use] + pub fn table(&self) -> &FutureBackedCache { + self.borrow_table() + } +} + +/// An asynchronous filesystem cache mapping `InodeAddr` to `INode`. +/// +/// Uses two [`FutureBackedCache`] layers: +/// - `inode_table` stores resolved inodes by address, used by [`loaded_inode`](Self::loaded_inode). +/// - `lookup_cache` stores lookup results by `(parent_addr, name)`, ensuring `dp.lookup()` is only +/// called on a true cache miss (not already cached or in-flight). +/// +/// The [`DCache`] sits in front as a synchronous fast path mapping `(parent, name)` to child addr. +pub struct AsyncFs<'tbl, DP: FsDataProvider> { + /// Canonical addr -> `INode` map. Used by `loaded_inode()` to retrieve inodes by address. + inode_table: &'tbl FutureBackedCache, + + /// Deduplicating lookup cache keyed by `(parent_addr, child_name)`. The factory is + /// `dp.lookup()`, so the data provider is only called on a true cache miss. + lookup_cache: FutureBackedCache<(InodeAddr, OsString), INode>, + + /// Directory entry cache, mapping `(parent, name)` to child inode address. + directory_cache: DCache, + + /// The data provider used to fetch inode data on cache misses. + data_provider: DP, + + /// Monotonically increasing file handle counter. Starts at 1 (0 is reserved). + next_fh: AtomicU64, + + /// Tracks which directories have had their children fetched via `dp.readdir`. + readdir_populated: FutureBackedCache, +} + +impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { + /// Create a new `AsyncFs`, seeding the root inode into the table. + pub async fn new( + data_provider: DP, + root: INode, + inode_table: &'tbl FutureBackedCache, + ) -> Self { + inode_table + .get_or_init(root.addr, || async move { root }) + .await; + + Self { + inode_table, + lookup_cache: FutureBackedCache::default(), + directory_cache: DCache::new(), + data_provider, + next_fh: AtomicU64::new(1), + readdir_populated: FutureBackedCache::default(), + } + } + + /// Create a new `AsyncFs`, assuming the root inode is already in the table. + /// + /// This synchronous constructor is needed for ouroboros builders where + /// async is unavailable. The caller must ensure the root inode has already + /// been inserted into `inode_table` (e.g. via [`FutureBackedCache::insert_sync`]). + #[must_use] + pub fn new_preseeded( + data_provider: DP, + inode_table: &'tbl FutureBackedCache, + ) -> Self { + Self { + inode_table, + lookup_cache: FutureBackedCache::default(), + directory_cache: DCache::new(), + data_provider, + next_fh: AtomicU64::new(1), + readdir_populated: FutureBackedCache::default(), + } + } + + /// Get the total number of inodes currently stored in the inode table. + #[must_use] + pub fn inode_count(&self) -> usize { + self.inode_table.len() + } + + /// Return filesystem statistics. + /// + /// Reports the current inode count from the cache. Block-related + /// fields default to values appropriate for a virtual read-only + /// filesystem (4 KiB blocks, no free space). + #[must_use] + pub fn statfs(&self) -> AsyncFsStats { + AsyncFsStats { + block_size: 4096, + total_blocks: 0, + free_blocks: 0, + available_blocks: 0, + total_inodes: self.inode_count() as u64, + free_inodes: 0, + max_filename_length: 255, + } + } + + /// Asynchronously look up an inode by name within a parent directory. + /// + /// Resolution order: + /// 1. Directory cache (synchronous fast path) + /// 2. Lookup cache (`get_or_try_init` — calls `dp.lookup()` only on a true miss) + /// 3. On success, populates inode table and directory cache + pub async fn lookup( + &self, + parent: LoadedAddr, + name: &OsStr, + ) -> Result { + let parent_ino = self.loaded_inode(parent).await?; + debug_assert!( + matches!(parent_ino.itype, INodeType::Directory), + "parent inode should be a directory" + ); + + if let Some(dentry) = self.directory_cache.lookup(parent, name) + && let Some(inode) = self.inode_table.get(&dentry.ino.0).await + { + return Ok(TrackedINode { inode }); + } + // Inode was evicted from the table — fall through to the slow path. + + let name_owned = name.to_os_string(); + let name_for_cache = name_owned.clone(); + let lookup_key = (parent.0, name_owned.clone()); + let dp = self.data_provider.clone(); + + let child = self + .lookup_cache + .get_or_try_init(lookup_key, || async move { + dp.lookup(parent_ino, &name_owned).await + }) + .await?; + + self.inode_table + .get_or_init(child.addr, || async move { child }) + .await; + + self.directory_cache + .insert( + parent, + name_for_cache, + LoadedAddr(child.addr), + matches!(child.itype, INodeType::Directory), + ) + .await; + + Ok(TrackedINode { inode: child }) + } + + /// Retrieve an inode that is expected to already be loaded. + /// + /// If the inode is currently in-flight (being loaded by another caller), this awaits + /// completion. Returns an error if the inode is not in the table at all. + pub async fn loaded_inode(&self, addr: LoadedAddr) -> Result { + self.inode_table.get(&addr.0).await.ok_or_else(|| { + tracing::error!( + inode = ?addr.0, + "inode not found in table — this is a programming bug" + ); + std::io::Error::from_raw_os_error(libc::ENOENT) + }) + } + + /// Return the attributes of the inode at `addr`. + /// + /// This is the getattr entry point for the filesystem. Returns the + /// cached [`INode`] directly — callers at the FUSE boundary are + /// responsible for converting to `fuser::FileAttr`. + pub async fn getattr(&self, addr: LoadedAddr) -> Result { + self.loaded_inode(addr).await + } + + /// Open a file for reading. + /// + /// Validates the inode is not a directory, delegates to the data provider + /// to create a [`FileReader`], and returns an [`OpenFile`] that the caller + /// owns. Reads go through [`OpenFile::read`]. + pub async fn open( + &self, + addr: LoadedAddr, + flags: OpenFlags, + ) -> Result, std::io::Error> { + let inode = self.loaded_inode(addr).await?; + if inode.itype == INodeType::Directory { + return Err(std::io::Error::from_raw_os_error(libc::EISDIR)); + } + let reader = self.data_provider.open(inode, flags).await?; + let fh = self.next_fh.fetch_add(1, Ordering::Relaxed); + Ok(OpenFile { + fh, + reader: Arc::new(reader), + }) + } + + /// Iterate directory entries for `parent`, starting from `offset`. + /// + /// On the first call for a given parent, fetches the directory listing + /// from the data provider and populates the directory cache and inode + /// table. Subsequent calls serve entries directly from cache. + /// + /// Entries are yielded in name-sorted order. For each entry, `filler` is + /// called with the [`DirEntry`] and the next offset value. If `filler` + /// returns `true` (indicating the caller's buffer is full), iteration + /// stops early. + /// + /// # Concurrency + /// + /// The `readdir_populated` check-then-populate is **not** atomic. If two + /// concurrent callers invoke `readdir` for the same parent, both may call + /// `dp.readdir()` and insert duplicate children. This is safe when the + /// caller serializes access (e.g. via `&mut self` on the `Fs` trait). + /// + /// TODO(MES-746): Implement `opendir` and `releasedir` to snapshot directory contents and + /// avoid racing with `lookup`/`createfile`. + pub async fn readdir( + &self, + parent: LoadedAddr, + offset: u64, + mut filler: impl FnMut(DirEntry<'_>, u64) -> bool, + ) -> Result<(), std::io::Error> { + let parent_inode = self.loaded_inode(parent).await?; + if parent_inode.itype != INodeType::Directory { + return Err(std::io::Error::from_raw_os_error(libc::ENOTDIR)); + } + + // Populate the directory cache on first readdir for this parent. + if self.readdir_populated.get(&parent).await.is_none() { + let children = self.data_provider.readdir(parent_inode).await?; + for (name, child_inode) in children { + self.inode_table + .get_or_init(child_inode.addr, || async move { child_inode }) + .await; + self.directory_cache + .insert( + parent, + name, + LoadedAddr(child_inode.addr), + child_inode.itype == INodeType::Directory, + ) + .await; + } + self.readdir_populated + .get_or_init(parent, || async {}) + .await; + } + + let mut children = self.directory_cache.readdir(parent).await; + children.sort_unstable_by(|(a, _), (b, _)| a.cmp(b)); + + #[expect( + clippy::cast_possible_truncation, + reason = "offset fits in usize on supported 64-bit platforms" + )] + for (i, (name, dvalue)) in children.iter().enumerate().skip(offset as usize) { + let inode = self.loaded_inode(dvalue.ino).await?; + let next_offset = (i + 1) as u64; + if filler(DirEntry { name, inode }, next_offset) { + break; + } + } + + Ok(()) + } +} diff --git a/lib/fs/dcache.rs b/lib/fs/dcache.rs new file mode 100644 index 0000000..5138e80 --- /dev/null +++ b/lib/fs/dcache.rs @@ -0,0 +1,65 @@ +use std::ffi::{OsStr, OsString}; + +use crate::fs::LoadedAddr; + +/// Cached metadata for a directory entry. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DValue { + /// Inode address of this entry. + pub ino: LoadedAddr, + /// Whether this entry is itself a directory. + pub is_dir: bool, +} + +/// In-memory directory entry cache mapping `(parent, name)` to child metadata. +/// +/// Backed by [`scc::HashMap`] for atomic upsert on insert. The `readdir` +/// implementation scans the entire map and filters by parent — this is O(n) +/// over the cache size rather than O(log n + k) with an ordered index, but +/// guarantees that `insert` never creates a window where an entry is absent. +#[derive(Default)] +pub struct DCache { + cache: scc::HashMap<(LoadedAddr, OsString), DValue>, +} + +impl DCache { + /// Creates an empty directory cache. + #[must_use] + pub fn new() -> Self { + Self::default() + } + + /// Looks up a single child entry by parent inode and name. + #[must_use] + pub fn lookup(&self, parent_ino: LoadedAddr, name: &OsStr) -> Option { + let key = (parent_ino, name.to_os_string()); + self.cache.read_sync(&key, |_, v| v.clone()) + } + + /// Atomically inserts or overwrites a child entry in the cache. + pub async fn insert( + &self, + parent_ino: LoadedAddr, + name: OsString, + ino: LoadedAddr, + is_dir: bool, + ) { + let key = (parent_ino, name); + let value = DValue { ino, is_dir }; + self.cache.upsert_async(key, value).await; + } + + /// Returns all cached children of `parent_ino` as `(name, value)` pairs. + pub async fn readdir(&self, parent_ino: LoadedAddr) -> Vec<(OsString, DValue)> { + let mut entries = Vec::new(); + self.cache + .iter_async(|key, value| { + if key.0 == parent_ino { + entries.push((key.1.clone(), value.clone())); + } + true + }) + .await; + entries + } +} diff --git a/lib/fs/fuser.rs b/lib/fs/fuser.rs new file mode 100644 index 0000000..59619c0 --- /dev/null +++ b/lib/fs/fuser.rs @@ -0,0 +1,421 @@ +//! FUSE adapter: maps [`fuser::Filesystem`] callbacks to [`AsyncFs`](super::async_fs::AsyncFs). + +use std::collections::HashMap; +use std::ffi::OsStr; +use std::sync::Arc; + +use super::async_fs::{FileReader as _, FsDataProvider}; +use super::{FileHandle, INode, INodeType, InodeAddr, LoadedAddr, OpenFlags}; +use crate::cache::async_backed::FutureBackedCache; +use tracing::{debug, error, instrument}; + +/// Wrapper converting [`std::io::Error`] to errno. +#[derive(Debug, thiserror::Error)] +#[error("{0}")] +struct FuseIoError(std::io::Error); + +#[expect( + clippy::wildcard_enum_match_arm, + reason = "ErrorKind is non_exhaustive; EIO is the safe default" +)] +impl From for i32 { + fn from(e: FuseIoError) -> Self { + e.0.raw_os_error().unwrap_or_else(|| match e.0.kind() { + std::io::ErrorKind::NotFound => libc::ENOENT, + std::io::ErrorKind::PermissionDenied => libc::EACCES, + std::io::ErrorKind::AlreadyExists => libc::EEXIST, + _ => libc::EIO, + }) + } +} + +/// Error for read operations. +#[derive(Debug, thiserror::Error)] +enum FuseReadError { + /// The file handle was not open. + #[error("file handle not open")] + NotOpen, + /// An I/O error occurred during the read. + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), +} + +impl From for i32 { + fn from(e: FuseReadError) -> Self { + match e { + FuseReadError::NotOpen => libc::EBADF, + FuseReadError::Io(ref io) => io.raw_os_error().unwrap_or(libc::EIO), + } + } +} + +/// Error for release operations. +#[derive(Debug, thiserror::Error)] +enum FuseReleaseError { + /// The file handle was not open. + #[error("file handle not open")] + NotOpen, +} + +impl From for i32 { + fn from(e: FuseReleaseError) -> Self { + match e { + FuseReleaseError::NotOpen => libc::EBADF, + } + } +} + +mod inner { + #![allow(clippy::future_not_send, clippy::mem_forget)] + + use ouroboros::self_referencing; + + use crate::cache::async_backed::FutureBackedCache; + use crate::drop_ward::DropWard; + use crate::fs::async_fs::{AsyncFs, FsDataProvider, InodeForget}; + use crate::fs::{INode, InodeAddr}; + + /// Self-referential struct holding the inode table, refcount ward, and `AsyncFs`. + /// + /// Both `ward` and `fs` borrow from `table`. The ward manages inode + /// refcounts; the fs serves lookup/readdir/open/read operations. + #[self_referencing] + pub(super) struct FuseBridgeInner { + table: FutureBackedCache, + #[borrows(table)] + #[not_covariant] + ward: DropWard<&'this FutureBackedCache, InodeAddr, InodeForget>, + #[borrows(table)] + #[covariant] + fs: AsyncFs<'this, DP>, + } + + impl FuseBridgeInner { + pub(super) fn create(table: FutureBackedCache, provider: DP) -> Self { + FuseBridgeInnerBuilder { + table, + ward_builder: |tbl| DropWard::new(tbl), + fs_builder: |tbl| AsyncFs::new_preseeded(provider, tbl), + } + .build() + } + + pub(super) fn get_fs(&self) -> &AsyncFs<'_, DP> { + self.borrow_fs() + } + + pub(super) fn ward_inc(&mut self, addr: InodeAddr) -> usize { + self.with_ward_mut(|ward| ward.inc(addr)) + } + + pub(super) fn ward_dec_count(&mut self, addr: InodeAddr, count: usize) -> Option { + self.with_ward_mut(|ward| ward.dec_count(&addr, count)) + } + } +} + +use inner::FuseBridgeInner; + +/// Convert an `INode` to the fuser-specific `FileAttr`. +fn inode_to_fuser_attr(inode: &INode, block_size: u32) -> fuser::FileAttr { + fuser::FileAttr { + ino: inode.addr, + size: inode.size, + blocks: inode.size.div_ceil(512), + atime: inode.last_modified_at, + mtime: inode.last_modified_at, + ctime: inode.last_modified_at, + crtime: inode.create_time, + kind: inode_type_to_fuser(inode.itype), + perm: inode.permissions.bits(), + nlink: 1, + uid: inode.uid, + gid: inode.gid, + rdev: 0, + blksize: block_size, + flags: 0, + } +} + +#[expect( + clippy::wildcard_enum_match_arm, + reason = "INodeType is non_exhaustive; File is the safe default" +)] +fn inode_type_to_fuser(itype: INodeType) -> fuser::FileType { + match itype { + INodeType::Directory => fuser::FileType::Directory, + INodeType::Symlink => fuser::FileType::Symlink, + _ => fuser::FileType::RegularFile, + } +} + +const BLOCK_SIZE: u32 = 4096; + +/// Bridges a generic [`FsDataProvider`] to the [`fuser::Filesystem`] trait. +/// +/// Owns a self-referential inode table + ward + [`AsyncFs`](super::async_fs::AsyncFs), +/// plus an open-file map and a tokio runtime handle for blocking on async ops. +pub struct FuserAdapter { + inner: FuseBridgeInner, + open_files: HashMap>, + runtime: tokio::runtime::Handle, +} + +impl FuserAdapter { + // TODO(markovejnovic): This low TTL is really not ideal. It slows us down a lot, since the + // kernel has to ask us for every single lookup all the time. + // + // I think a better implementation is to implement + // + // notify_inval_inode(ino, offset, len) + // notify_inval_entry(parent_ino, name) + // + // These two functions can be used to invalidate specific entries in the kernel cache when we + // know they have changed. This would allow us to set a much higher TTL here. + const SHAMEFUL_TTL: std::time::Duration = std::time::Duration::from_secs(1); + + /// Create a new adapter from a pre-seeded inode table and data provider. + /// + /// The `table` must already have the root inode inserted. + pub fn new( + table: FutureBackedCache, + provider: DP, + runtime: tokio::runtime::Handle, + ) -> Self { + Self { + inner: FuseBridgeInner::create(table, provider), + open_files: HashMap::new(), + runtime, + } + } +} + +impl fuser::Filesystem for FuserAdapter { + #[instrument(name = "FuserAdapter::lookup", skip(self, _req, reply))] + fn lookup( + &mut self, + _req: &fuser::Request<'_>, + parent: u64, + name: &OsStr, + reply: fuser::ReplyEntry, + ) { + let result = self.runtime.block_on(async { + let tracked = self + .inner + .get_fs() + .lookup(LoadedAddr(parent), name) + .await + .map_err(FuseIoError)?; + self.inner.ward_inc(tracked.inode.addr); + Ok::<_, FuseIoError>(tracked.inode) + }); + match result { + Ok(inode) => { + let f_attr = inode_to_fuser_attr(&inode, BLOCK_SIZE); + debug!(?f_attr, "replying..."); + reply.entry(&Self::SHAMEFUL_TTL, &f_attr, 0); + } + Err(e) => { + debug!(error = %e, "replying error"); + reply.error(e.into()); + } + } + } + + #[instrument(name = "FuserAdapter::getattr", skip(self, _req, _fh, reply))] + fn getattr( + &mut self, + _req: &fuser::Request<'_>, + ino: u64, + _fh: Option, + reply: fuser::ReplyAttr, + ) { + let result = self.runtime.block_on(async { + self.inner + .get_fs() + .getattr(LoadedAddr(ino)) + .await + .map_err(FuseIoError) + }); + match result { + Ok(inode) => { + let attr = inode_to_fuser_attr(&inode, BLOCK_SIZE); + debug!(?attr, "replying..."); + reply.attr(&Self::SHAMEFUL_TTL, &attr); + } + Err(e) => { + debug!(error = %e, "replying error"); + reply.error(e.into()); + } + } + } + + #[instrument(name = "FuserAdapter::readdir", skip(self, _req, _fh, offset, reply))] + fn readdir( + &mut self, + _req: &fuser::Request<'_>, + ino: u64, + _fh: u64, + offset: i64, + mut reply: fuser::ReplyDirectory, + ) { + let offset_u64 = offset.cast_unsigned(); + let result = self.runtime.block_on(async { + let mut entries = Vec::new(); + self.inner + .get_fs() + .readdir(LoadedAddr(ino), offset_u64, |de, _next_offset| { + entries.push((de.inode.addr, de.name.to_os_string(), de.inode.itype)); + false + }) + .await + .map_err(FuseIoError)?; + Ok::<_, FuseIoError>(entries) + }); + + let entries = match result { + Ok(entries) => entries, + Err(e) => { + debug!(error = %e, "replying error"); + reply.error(e.into()); + return; + } + }; + + #[expect( + clippy::cast_possible_truncation, + reason = "offset fits in usize on supported 64-bit platforms" + )] + for (i, (entry_ino, entry_name, entry_itype)) in entries.iter().enumerate() { + let kind = inode_type_to_fuser(*entry_itype); + let abs_idx = offset_u64 as usize + i + 1; + let Ok(idx): Result = abs_idx.try_into() else { + error!("Directory entry index {} too large for fuser", abs_idx); + reply.error(libc::EIO); + return; + }; + + debug!(?entry_name, ino = entry_ino, "adding entry to reply..."); + if reply.add(*entry_ino, idx, kind, entry_name) { + debug!("buffer full for now, stopping readdir"); + break; + } + } + + debug!("finalizing reply..."); + reply.ok(); + } + + #[instrument(name = "FuserAdapter::open", skip(self, _req, flags, reply))] + fn open(&mut self, _req: &fuser::Request<'_>, ino: u64, flags: i32, reply: fuser::ReplyOpen) { + let flags = OpenFlags::from_bits_truncate(flags); + let result = self.runtime.block_on(async { + let open_file = self + .inner + .get_fs() + .open(LoadedAddr(ino), flags) + .await + .map_err(FuseIoError)?; + let fh = open_file.fh; + self.open_files.insert(fh, Arc::clone(&open_file.reader)); + Ok::<_, FuseIoError>(fh) + }); + match result { + Ok(fh) => { + debug!(handle = fh, "replying..."); + reply.opened(fh, 0); + } + Err(e) => { + debug!(error = %e, "replying error"); + reply.error(e.into()); + } + } + } + + #[instrument( + name = "FuserAdapter::read", + skip(self, _req, _ino, fh, offset, size, _flags, _lock_owner, reply) + )] + fn read( + &mut self, + _req: &fuser::Request<'_>, + _ino: u64, + fh: u64, + offset: i64, + size: u32, + _flags: i32, + _lock_owner: Option, + reply: fuser::ReplyData, + ) { + let result: Result<_, FuseReadError> = self.runtime.block_on(async { + let reader = self.open_files.get(&fh).ok_or(FuseReadError::NotOpen)?; + Ok(reader.read(offset.cast_unsigned(), size).await?) + }); + match result { + Ok(data) => { + debug!(read_bytes = data.len(), "replying..."); + reply.data(&data); + } + Err(e) => { + debug!(error = %e, "replying error"); + reply.error(e.into()); + } + } + } + + #[instrument( + name = "FuserAdapter::release", + skip(self, _req, _ino, fh, _flags, _lock_owner, _flush, reply) + )] + fn release( + &mut self, + _req: &fuser::Request<'_>, + _ino: u64, + fh: u64, + _flags: i32, + _lock_owner: Option, + _flush: bool, + reply: fuser::ReplyEmpty, + ) { + let result: Result<_, FuseReleaseError> = self + .open_files + .remove(&fh) + .map(|_| ()) + .ok_or(FuseReleaseError::NotOpen); + match result { + Ok(()) => { + debug!("replying ok"); + reply.ok(); + } + Err(e) => { + debug!(error = %e, "replying error"); + reply.error(e.into()); + } + } + } + + #[expect( + clippy::cast_possible_truncation, + reason = "nlookups fits in usize on supported 64-bit platforms" + )] + #[instrument(name = "FuserAdapter::forget", skip(self, _req, nlookup))] + fn forget(&mut self, _req: &fuser::Request<'_>, ino: u64, nlookup: u64) { + self.inner.ward_dec_count(ino, nlookup as usize); + } + + #[instrument(name = "FuserAdapter::statfs", skip(self, _req, _ino, reply))] + fn statfs(&mut self, _req: &fuser::Request<'_>, _ino: u64, reply: fuser::ReplyStatfs) { + let stats = self.inner.get_fs().statfs(); + debug!(?stats, "replying..."); + reply.statfs( + stats.total_blocks, + stats.free_blocks, + stats.available_blocks, + stats.total_inodes, + stats.free_inodes, + stats.block_size, + stats.max_filename_length, + 0, + ); + } +} diff --git a/lib/fs/mod.rs b/lib/fs/mod.rs new file mode 100644 index 0000000..e8f971b --- /dev/null +++ b/lib/fs/mod.rs @@ -0,0 +1,188 @@ +//! Useful filesystem generalizations. +/// Async filesystem cache with concurrent inode management. +pub mod async_fs; +/// Directory entry cache for fast parent-child lookups. +pub mod dcache; +/// FUSE adapter: maps [`fuser::Filesystem`] callbacks to [`async_fs::AsyncFs`]. +pub mod fuser; + +pub use async_fs::{InodeForget, InodeLifecycle, OpenFile, TrackedINode}; + +use std::ffi::OsStr; +use std::time::SystemTime; + +use bitflags::bitflags; + +/// Type representing an inode identifier. +pub type InodeAddr = u64; + +/// Represents an inode address that has been loaded into the inode table. +/// +/// This newtype wrapper distinguishes inode addresses that are known to exist +/// in the [`async_fs::AsyncFs`] inode table from raw [`InodeAddr`] values. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct LoadedAddr(pub InodeAddr); + +/// Type representing a file handle. +pub type FileHandle = u64; + +bitflags! { + /// Permission bits for an inode, similar to Unix file permissions. + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] + pub struct InodePerms: u16 { + /// Other: execute permission. + const OTHER_EXECUTE = 1 << 0; + /// Other: write permission. + const OTHER_WRITE = 1 << 1; + /// Other: read permission. + const OTHER_READ = 1 << 2; + + /// Group: execute permission. + const GROUP_EXECUTE = 1 << 3; + /// Group: write permission. + const GROUP_WRITE = 1 << 4; + /// Group: read permission. + const GROUP_READ = 1 << 5; + + /// Owner: execute permission. + const OWNER_EXECUTE = 1 << 6; + /// Owner: write permission. + const OWNER_WRITE = 1 << 7; + /// Owner: read permission. + const OWNER_READ = 1 << 8; + + /// Sticky bit. + const STICKY = 1 << 9; + /// Set-group-ID bit. + const SETGID = 1 << 10; + /// Set-user-ID bit. + const SETUID = 1 << 11; + + /// Other: read, write, and execute. + const OTHER_RWX = Self::OTHER_READ.bits() + | Self::OTHER_WRITE.bits() + | Self::OTHER_EXECUTE.bits(); + /// Group: read, write, and execute. + const GROUP_RWX = Self::GROUP_READ.bits() + | Self::GROUP_WRITE.bits() + | Self::GROUP_EXECUTE.bits(); + /// Owner: read, write, and execute. + const OWNER_RWX = Self::OWNER_READ.bits() + | Self::OWNER_WRITE.bits() + | Self::OWNER_EXECUTE.bits(); + } +} + +bitflags! { + /// Flags for opening a file, similar to Unix open(2) flags. + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] + pub struct OpenFlags: i32 { + /// Open for reading only. + const RDONLY = libc::O_RDONLY; + /// Open for writing only. + const WRONLY = libc::O_WRONLY; + /// Open for reading and writing. + const RDWR = libc::O_RDWR; + + /// Append on each write. + const APPEND = libc::O_APPEND; + /// Truncate to zero length. + const TRUNC = libc::O_TRUNC; + /// Create file if it does not exist. + const CREAT = libc::O_CREAT; + /// Error if file already exists (with `CREAT`). + const EXCL = libc::O_EXCL; + + /// Non-blocking mode. + const NONBLOCK = libc::O_NONBLOCK; + /// Synchronous writes. + const SYNC = libc::O_SYNC; + /// Synchronous data integrity writes. + const DSYNC = libc::O_DSYNC; + /// Do not follow symlinks. + const NOFOLLOW = libc::O_NOFOLLOW; + /// Set close-on-exec. + const CLOEXEC = libc::O_CLOEXEC; + /// Fail if not a directory. + const DIRECTORY = libc::O_DIRECTORY; + + /// Do not update access time (Linux only). + #[cfg(target_os = "linux")] + const NOATIME = libc::O_NOATIME; + } +} + +/// The type of an inode entry in the filesystem. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum INodeType { + /// A regular file. + File, + /// A directory. + Directory, + /// A symbolic link. + Symlink, +} + +/// Representation of an inode. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct INode { + /// The address of this inode, which serves as its unique identifier. + pub addr: InodeAddr, + /// The permissions associated with this inode, represented as a bitfield. + pub permissions: InodePerms, + /// The user ID of the owner of this inode. + pub uid: u32, + /// The group ID of the owner of this inode. + pub gid: u32, + /// The time this inode was created at. + pub create_time: SystemTime, + /// The time this inode was last modified at. + pub last_modified_at: SystemTime, + /// The parent inode address, if any. This is `None` for the root inode. + pub parent: Option, + /// The size of the file represented by this inode, in bytes. + pub size: u64, + /// Additional information about the type of this inode (e.g., file vs directory). + pub itype: INodeType, +} + +impl INode { + /// Check if this inode is the root inode (i.e., has no parent). + #[must_use] + pub fn is_root(&self) -> bool { + self.parent.is_none() + } +} + +/// A directory entry yielded by [`async_fs::AsyncFs::readdir`]. +/// +/// Borrows the entry name from the directory cache's iteration buffer. +#[derive(Debug, Clone, Copy)] +pub struct DirEntry<'a> { + /// The name of this entry within its parent directory. + pub name: &'a OsStr, + /// The full inode data for this entry. + pub inode: INode, +} + +/// Filesystem statistics returned by [`async_fs::AsyncFs::statfs`]. +/// +/// Block-related sizes are in units of `block_size` bytes. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct AsyncFsStats { + /// Filesystem block size (bytes). + pub block_size: u32, + /// Total number of data blocks. + pub total_blocks: u64, + /// Number of free blocks. + pub free_blocks: u64, + /// Number of blocks available to unprivileged users. + pub available_blocks: u64, + /// Total number of file nodes (inodes). + pub total_inodes: u64, + /// Number of free file nodes. + pub free_inodes: u64, + /// Maximum filename length (bytes). + pub max_filename_length: u32, +} diff --git a/lib/lib.rs b/lib/lib.rs index f7388bd..40b1e8f 100644 --- a/lib/lib.rs +++ b/lib/lib.rs @@ -2,4 +2,7 @@ /// Caching primitives for git-fs. pub mod cache; +pub mod drop_ward; +/// Filesystem abstractions and caching layers. +pub mod fs; pub mod io; diff --git a/src/daemon.rs b/src/daemon.rs index dac2d05..44659f3 100644 --- a/src/daemon.rs +++ b/src/daemon.rs @@ -14,9 +14,13 @@ mod managed_fuse { use nix::errno::Errno; + use git_fs::cache::async_backed::FutureBackedCache; + use git_fs::fs::{INode, INodeType, InodePerms}; + use super::{MesaFS, OrgConfig, app_config, debug, error}; - use crate::fs::fuser::FuserAdapter; + use crate::fs::mescloud::MesaFsProvider; use fuser::BackgroundSession; + use git_fs::fs::fuser::FuserAdapter; pub struct FuseCoreScope { _session: BackgroundSession, @@ -44,7 +48,24 @@ mod managed_fuse { api_key: org.api_key.clone(), }); let mesa_fs = MesaFS::new(orgs, (config.uid, config.gid), &config.cache); - let fuse_adapter = FuserAdapter::new(mesa_fs, handle); + + let table = FutureBackedCache::default(); + let now = std::time::SystemTime::now(); + let root = INode { + addr: 1, + permissions: InodePerms::from_bits_truncate(0o755), + uid: config.uid, + gid: config.gid, + create_time: now, + last_modified_at: now, + parent: None, + size: 0, + itype: INodeType::Directory, + }; + table.insert_sync(1, root); + + let provider = MesaFsProvider::new(mesa_fs, handle.clone()); + let fuse_adapter = FuserAdapter::new(table, provider, handle); let mount_opts = [ fuser::MountOption::FSName("git-fs".to_owned()), fuser::MountOption::RO, diff --git a/src/fs/fuser.rs b/src/fs/fuser.rs deleted file mode 100644 index 86ddabb..0000000 --- a/src/fs/fuser.rs +++ /dev/null @@ -1,351 +0,0 @@ -use std::ffi::OsStr; - -use crate::fs::r#trait::{CommonFileAttr, DirEntryType, FileAttr, Fs, LockOwner, OpenFlags}; -use tracing::{debug, error, instrument}; - -impl From for fuser::FileAttr { - fn from(val: FileAttr) -> Self { - fn common_to_fuser(common: CommonFileAttr) -> fuser::FileAttr { - fuser::FileAttr { - ino: common.ino, - size: 0, - blocks: 0, - atime: common.atime, - mtime: common.mtime, - ctime: common.ctime, - crtime: common.crtime, - kind: fuser::FileType::RegularFile, - perm: common.perm.bits(), - nlink: common.nlink, - uid: common.uid, - gid: common.gid, - rdev: 0, - blksize: common.blksize, - flags: 0, - } - } - - match val { - FileAttr::RegularFile { - common, - size, - blocks, - } => { - let mut attr = common_to_fuser(common); - attr.size = size; - attr.blocks = blocks; - attr.kind = fuser::FileType::RegularFile; - attr - } - FileAttr::Directory { common } => { - let mut attr = common_to_fuser(common); - attr.kind = fuser::FileType::Directory; - attr - } - FileAttr::Symlink { common, size } => { - let mut attr = common_to_fuser(common); - attr.size = size; - attr.kind = fuser::FileType::Symlink; - attr - } - FileAttr::CharDevice { common, rdev } => { - let mut attr = common_to_fuser(common); - debug_assert!(u32::try_from(rdev).is_ok(), "rdev value {rdev} too large"); - attr.rdev = rdev - .try_into() - .map_err(|_| { - error!("rdev value {rdev} too large for fuser::FileAttr"); - }) - .unwrap_or(0); - attr.kind = fuser::FileType::CharDevice; - attr - } - FileAttr::BlockDevice { common, rdev } => { - let mut attr = common_to_fuser(common); - debug_assert!(u32::try_from(rdev).is_ok(), "rdev value {rdev} too large"); - attr.rdev = rdev - .try_into() - .map_err(|_| { - error!("rdev value {rdev} too large for fuser::FileAttr"); - }) - .unwrap_or(0); - attr.kind = fuser::FileType::BlockDevice; - attr - } - FileAttr::NamedPipe { common } => { - let mut attr = common_to_fuser(common); - attr.kind = fuser::FileType::NamedPipe; - attr - } - FileAttr::Socket { common } => { - let mut attr = common_to_fuser(common); - attr.kind = fuser::FileType::Socket; - attr - } - } - } -} - -impl From for fuser::FileType { - fn from(val: DirEntryType) -> Self { - match val { - DirEntryType::RegularFile => Self::RegularFile, - DirEntryType::Directory => Self::Directory, - DirEntryType::Symlink => Self::Symlink, - DirEntryType::CharDevice => Self::CharDevice, - DirEntryType::BlockDevice => Self::BlockDevice, - DirEntryType::NamedPipe => Self::NamedPipe, - DirEntryType::Socket => Self::Socket, - } - } -} - -impl From for OpenFlags { - fn from(val: i32) -> Self { - Self::from_bits_truncate(val) - } -} - -pub struct FuserAdapter -where - F::LookupError: Into, - F::GetAttrError: Into, - F::OpenError: Into, - F::ReadError: Into, - F::ReaddirError: Into, - F::ReleaseError: Into, -{ - fs: F, - runtime: tokio::runtime::Handle, -} - -impl FuserAdapter -where - F::LookupError: Into, - F::GetAttrError: Into, - F::OpenError: Into, - F::ReadError: Into, - F::ReaddirError: Into, - F::ReleaseError: Into, -{ - // TODO(markovejnovic): This low TTL is really not ideal. It slows us down a lot, since the - // kernel has to ask us for every single lookup all the time. - // - // I think a better implementation is to implement - // - // notify_inval_inode(ino, offset, len) - // notify_inval_entry(parent_ino, name) - // - // These two functions can be used to invalidate specific entries in the kernel cache when we - // know they have changed. This would allow us to set a much higher TTL here. - const SHAMEFUL_TTL: std::time::Duration = std::time::Duration::from_secs(1); - - pub fn new(fs: F, runtime: tokio::runtime::Handle) -> Self { - Self { fs, runtime } - } -} - -impl fuser::Filesystem for FuserAdapter -where - F::LookupError: Into, - F::GetAttrError: Into, - F::OpenError: Into, - F::ReadError: Into, - F::ReaddirError: Into, - F::ReleaseError: Into, -{ - #[instrument(name = "FuserAdapter::lookup", skip(self, _req, reply))] - fn lookup( - &mut self, - _req: &fuser::Request<'_>, - parent: u64, - name: &OsStr, - reply: fuser::ReplyEntry, - ) { - match self.runtime.block_on(self.fs.lookup(parent, name)) { - Ok(attr) => { - // TODO(markovejnovic): Passing generation = 0 here is a recipe for disaster. - // Someone with A LOT of files will likely see inode reuse which will lead to a - // disaster. - let f_attr: fuser::FileAttr = attr.into(); - debug!(?f_attr, "replying..."); - reply.entry(&Self::SHAMEFUL_TTL, &f_attr, 0); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); - } - } - } - - #[instrument(name = "FuserAdapter::getattr", skip(self, _req, fh, reply))] - fn getattr( - &mut self, - _req: &fuser::Request<'_>, - ino: u64, - fh: Option, - reply: fuser::ReplyAttr, - ) { - match self.runtime.block_on(self.fs.getattr(ino, fh)) { - Ok(attr) => { - debug!(?attr, "replying..."); - reply.attr(&Self::SHAMEFUL_TTL, &attr.into()); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); - } - } - } - - #[instrument(name = "FuserAdapter::readdir", skip(self, _req, _fh, offset, reply))] - fn readdir( - &mut self, - _req: &fuser::Request<'_>, - ino: u64, - _fh: u64, - offset: i64, - mut reply: fuser::ReplyDirectory, - ) { - let entries = match self.runtime.block_on(self.fs.readdir(ino)) { - Ok(entries) => entries, - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); - return; - } - }; - - #[expect( - clippy::cast_possible_truncation, - reason = "fuser offset is i64 but always non-negative" - )] - for (i, entry) in entries - .iter() - .enumerate() - .skip(offset.cast_unsigned() as usize) - { - let kind: fuser::FileType = entry.kind.into(); - let Ok(idx): Result = (i + 1).try_into() else { - error!("Directory entry index {} too large for fuser", i + 1); - reply.error(libc::EIO); - return; - }; - - debug!(?entry, "adding entry to reply..."); - if reply.add(entry.ino, idx, kind, &entry.name) { - debug!("buffer full for now, stopping readdir"); - break; - } - } - - debug!("finalizing reply..."); - reply.ok(); - } - - #[instrument(name = "FuserAdapter::open", skip(self, _req, flags, reply))] - fn open(&mut self, _req: &fuser::Request<'_>, ino: u64, flags: i32, reply: fuser::ReplyOpen) { - match self.runtime.block_on(self.fs.open(ino, flags.into())) { - Ok(open_file) => { - debug!(handle = open_file.handle, "replying..."); - reply.opened(open_file.handle, 0); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); - } - } - } - - #[instrument( - name = "FuserAdapter::read", - skip(self, _req, fh, offset, size, flags, lock_owner, reply) - )] - fn read( - &mut self, - _req: &fuser::Request<'_>, - ino: u64, - fh: u64, - offset: i64, - size: u32, - flags: i32, - lock_owner: Option, - reply: fuser::ReplyData, - ) { - let flags: OpenFlags = flags.into(); - let lock_owner = lock_owner.map(LockOwner); - match self.runtime.block_on(self.fs.read( - ino, - fh, - offset.cast_unsigned(), - size, - flags, - lock_owner, - )) { - Ok(data) => { - debug!(read_bytes = data.len(), "replying..."); - reply.data(&data); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); - } - } - } - - #[instrument(name = "FuserAdapter::release", skip(self, _req, _lock_owner, reply))] - fn release( - &mut self, - _req: &fuser::Request<'_>, - ino: u64, - fh: u64, - flags: i32, - _lock_owner: Option, - flush: bool, - reply: fuser::ReplyEmpty, - ) { - match self - .runtime - .block_on(self.fs.release(ino, fh, flags.into(), flush)) - { - Ok(()) => { - debug!("replying ok"); - reply.ok(); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); - } - } - } - - #[instrument(name = "FuserAdapter::forget", skip(self, _req, nlookup))] - fn forget(&mut self, _req: &fuser::Request<'_>, ino: u64, nlookup: u64) { - self.runtime.block_on(self.fs.forget(ino, nlookup)); - } - - #[instrument(name = "FuserAdapter::statfs", skip(self, _req, _ino, reply))] - fn statfs(&mut self, _req: &fuser::Request<'_>, _ino: u64, reply: fuser::ReplyStatfs) { - self.runtime.block_on(async { - match self.fs.statfs().await { - Ok(statvfs) => { - debug!(?statvfs, "replying..."); - reply.statfs( - statvfs.total_blocks, - statvfs.free_blocks, - statvfs.available_blocks, - statvfs.total_inodes, - statvfs.free_inodes, - statvfs.block_size, - statvfs.max_filename_length, - 0, - ); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.raw_os_error().unwrap_or(libc::EIO)); - } - } - }); - } -} diff --git a/src/fs/icache/async_cache.rs b/src/fs/icache/async_cache.rs deleted file mode 100644 index 84003da..0000000 --- a/src/fs/icache/async_cache.rs +++ /dev/null @@ -1,1410 +0,0 @@ -//! Async inode cache with InFlight/Available state machine. - -use std::future::Future; - -use scc::HashMap as ConcurrentHashMap; -use tokio::sync::watch; - -use tracing::{instrument, trace, warn}; - -use crate::fs::r#trait::Inode; - -use super::IcbLike; - -/// State of an entry in the async inode cache. -pub enum IcbState { - /// Entry is being loaded; waiters clone the receiver and `.changed().await`. - /// - /// The channel carries `()` rather than the resolved value because the map - /// is the single source of truth: ICBs are mutated in-place (rc, attrs) so - /// a snapshot in the channel would immediately go stale. Sender-drop also - /// gives us implicit, leak-proof signalling on both success and error paths. - InFlight(watch::Receiver<()>), - /// Entry is ready for use. - Available(I), -} - -impl IcbState { - /// Consume `self`, returning the inner value if `Available`, or `None` if `InFlight`. - fn into_available(self) -> Option { - match self { - Self::Available(inner) => Some(inner), - Self::InFlight(_) => None, - } - } -} - -/// Trait for resolving an inode to its control block. -/// -/// Implementations act as a "promise" that an ICB will eventually be produced -/// for a given inode. The cache calls `resolve` when it needs to populate a -/// missing entry. -pub trait IcbResolver: Send + Sync { - /// The inode control block type this resolver produces. - type Icb: IcbLike + Send + Sync; - /// Error type returned when resolution fails. - type Error: Send; - - /// Resolve an inode to a fully-populated control block. - /// - /// - `stub`: `Some(icb)` if upgrading an existing stub entry, `None` if creating - /// from scratch. The stub typically has `parent` and `path` set but `attr` missing. - /// - `cache`: reference to the cache, useful for walking parent chains to build paths. - fn resolve( - &self, - ino: Inode, - stub: Option, - cache: &AsyncICache, - ) -> impl Future> + Send - where - Self: Sized; -} - -/// Async, concurrency-safe inode cache. -/// -/// All methods take `&self` — internal synchronization is provided by -/// `scc::HashMap` (sharded lock-free map). -pub struct AsyncICache { - resolver: R, - inode_table: ConcurrentHashMap>, -} - -impl AsyncICache { - /// Create a new cache with a root ICB at `root_ino` (rc = 1). - pub fn new(resolver: R, root_ino: Inode, root_path: impl Into) -> Self { - let table = ConcurrentHashMap::new(); - // insert_sync is infallible for a fresh map - drop(table.insert_sync( - root_ino, - IcbState::Available(R::Icb::new_root(root_path.into())), - )); - Self { - resolver, - inode_table: table, - } - } - - /// Number of entries (`InFlight` + `Available`) in the table. - pub fn inode_count(&self) -> usize { - self.inode_table.len() - } - - /// Wait until `ino` is `Available`. - /// Returns `true` if the entry exists and is Available, - /// `false` if the entry does not exist. - #[instrument(name = "AsyncICache::wait_for_available", skip(self))] - async fn wait_for_available(&self, ino: Inode) -> bool { - loop { - let rx = self - .inode_table - .read_async(&ino, |_, s| match s { - IcbState::InFlight(rx) => Some(rx.clone()), - IcbState::Available(_) => None, - }) - .await; - - match rx { - None => return false, // key missing - Some(None) => return true, // Available - Some(Some(mut rx)) => { - // Wait for the resolver to complete (or fail/drop sender). - // changed() returns Err(RecvError) when sender is dropped, - // which is fine — it means resolution finished. - let _ = rx.changed().await; - // Loop back — the entry might be InFlight again if another - // resolution cycle started between our wakeup and re-read. - } - } - } - } - - /// Check whether `ino` has an entry in the table (either `InFlight` or `Available`). - /// - /// This is a non-blocking, synchronous check. It does **not** wait for - /// `InFlight` entries to resolve. - pub fn contains(&self, ino: Inode) -> bool { - self.inode_table.contains_sync(&ino) - } - - /// Read an ICB via closure. **Awaits** if `InFlight`. - /// Returns `None` if `ino` doesn't exist. - #[instrument(name = "AsyncICache::get_icb", skip(self, f))] - // `Sync` is required because `f` is held across `.await` points in the - // loop body; for the resulting future to be `Send`, the captured closure - // must be `Sync` (clippy::future_not_send). - pub async fn get_icb( - &self, - ino: Inode, - f: impl Fn(&R::Icb) -> T + Send + Sync, - ) -> Option { - loop { - if !self.wait_for_available(ino).await { - return None; - } - let result = self - .inode_table - .read_async(&ino, |_, state| match state { - IcbState::Available(icb) => Some(f(icb)), - IcbState::InFlight(_) => None, - }) - .await; - match result { - Some(Some(val)) => return Some(val), - Some(None) => {} // was InFlight, retry - None => return None, // key missing - } - } - } - - /// Mutate an ICB via closure. **Awaits** if `InFlight`. - /// Returns `None` if `ino` doesn't exist. - #[instrument(name = "AsyncICache::get_icb_mut", skip(self, f))] - pub async fn get_icb_mut( - &self, - ino: Inode, - mut f: impl FnMut(&mut R::Icb) -> T + Send, - ) -> Option { - loop { - if !self.wait_for_available(ino).await { - return None; - } - let result = self - .inode_table - .update_async(&ino, |_, state| match state { - IcbState::Available(icb) => Some(f(icb)), - IcbState::InFlight(_) => None, - }) - .await; - match result { - Some(Some(val)) => return Some(val), - Some(None) => {} // was InFlight, retry - None => return None, // key missing - } - } - } - - /// Insert an ICB directly as `Available`. If the entry is currently - /// `InFlight`, waits for resolution before overwriting. - #[instrument(name = "AsyncICache::insert_icb", skip(self, icb))] - pub async fn insert_icb(&self, ino: Inode, icb: R::Icb) { - use scc::hash_map::Entry; - let mut icb = Some(icb); - loop { - match self.inode_table.entry_async(ino).await { - Entry::Vacant(vac) => { - let val = icb - .take() - .unwrap_or_else(|| unreachable!("icb consumed more than once")); - vac.insert_entry(IcbState::Available(val)); - return; - } - Entry::Occupied(mut occ) => match occ.get_mut() { - IcbState::InFlight(rx) => { - let mut rx = rx.clone(); - drop(occ); - let _ = rx.changed().await; - } - IcbState::Available(_) => { - let val = icb - .take() - .unwrap_or_else(|| unreachable!("icb consumed more than once")); - *occ.get_mut() = IcbState::Available(val); - return; - } - }, - } - } - } - - /// Get-or-insert pattern. If `ino` exists (awaits `InFlight`), runs `then` - /// on it. If absent, calls `factory` to create, inserts, then runs `then`. - /// - /// Both `factory` and `then` are `FnOnce` — wrapped in `Option` internally - /// to satisfy the borrow checker across the await-loop. - #[instrument(name = "AsyncICache::entry_or_insert_icb", skip(self, factory, then))] - pub async fn entry_or_insert_icb( - &self, - ino: Inode, - factory: impl FnOnce() -> R::Icb, - then: impl FnOnce(&mut R::Icb) -> T, - ) -> T { - use scc::hash_map::Entry; - let mut factory = Some(factory); - let mut then_fn = Some(then); - - loop { - match self.inode_table.entry_async(ino).await { - Entry::Occupied(mut occ) => match occ.get_mut() { - IcbState::Available(icb) => { - let t = then_fn - .take() - .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); - return t(icb); - } - IcbState::InFlight(rx) => { - let mut rx = rx.clone(); - drop(occ); // release shard lock before awaiting - let _ = rx.changed().await; - } - }, - Entry::Vacant(vac) => { - let f = factory - .take() - .unwrap_or_else(|| unreachable!("factory consumed more than once")); - let t = then_fn - .take() - .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); - let mut icb = f(); - let result = t(&mut icb); - vac.insert_entry(IcbState::Available(icb)); - return result; - } - } - } - } - - /// Write an ICB back to the table only if the entry still exists. - /// - /// If the entry was evicted (vacant) during resolution, the result is - /// silently dropped — this prevents resurrecting entries that a concurrent - /// `forget` has already removed. - async fn write_back_if_present(&self, ino: Inode, icb: R::Icb) { - use scc::hash_map::Entry; - match self.inode_table.entry_async(ino).await { - Entry::Occupied(mut occ) => { - *occ.get_mut() = IcbState::Available(icb); - } - Entry::Vacant(_) => { - tracing::debug!( - ino, - "resolved inode was evicted during resolution, dropping result" - ); - } - } - } - - /// Look up `ino`. If `Available` and fully resolved, run `then` and return - /// `Ok(T)`. If `Available` but `needs_resolve()` is true (stub), extract - /// the stub, resolve it, cache the result, then run `then`. If absent, call - /// the resolver to fetch the ICB, cache it, then run `then`. If another task - /// is already resolving this inode (`InFlight`), wait for it. - /// - /// Returns `Err(R::Error)` if resolution fails. On error the `InFlight` - /// entry is removed so subsequent calls can retry. - #[instrument(name = "AsyncICache::get_or_resolve", skip(self, then))] - pub async fn get_or_resolve( - &self, - ino: Inode, - then: impl FnOnce(&R::Icb) -> T, - ) -> Result { - use scc::hash_map::Entry; - - let mut then_fn = Some(then); - - // Fast path: Available and fully resolved - { - let hit = self - .inode_table - .read_async(&ino, |_, s| match s { - IcbState::Available(icb) if !icb.needs_resolve() => { - let t = then_fn - .take() - .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); - Some(t(icb)) - } - IcbState::InFlight(_) | IcbState::Available(_) => None, - }) - .await; - if let Some(Some(r)) = hit { - return Ok(r); - } - } - - // Slow path: missing, InFlight, or stub needing resolution - loop { - match self.inode_table.entry_async(ino).await { - Entry::Occupied(mut occ) => match occ.get_mut() { - IcbState::Available(icb) if !icb.needs_resolve() => { - let t = then_fn - .take() - .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); - return Ok(t(icb)); - } - IcbState::Available(_) => { - // Stub needing resolution — extract stub, replace with InFlight - let (tx, rx) = watch::channel(()); - let old = std::mem::replace(occ.get_mut(), IcbState::InFlight(rx)); - let stub = old.into_available().unwrap_or_else(|| { - unreachable!("matched Available arm, replaced value must be Available") - }); - let fallback = stub.clone(); - drop(occ); // release shard lock before awaiting - - match self.resolver.resolve(ino, Some(stub), self).await { - Ok(icb) => { - let t = then_fn.take().unwrap_or_else(|| { - unreachable!("then_fn consumed more than once") - }); - let result = t(&icb); - self.write_back_if_present(ino, icb).await; - drop(tx); - return Ok(result); - } - Err(e) => { - if fallback.rc() > 0 { - self.write_back_if_present(ino, fallback).await; - } else { - self.inode_table.remove_async(&ino).await; - } - drop(tx); - return Err(e); - } - } - } - IcbState::InFlight(rx) => { - let mut rx = rx.clone(); - drop(occ); - let _ = rx.changed().await; - } - }, - Entry::Vacant(vac) => { - let (tx, rx) = watch::channel(()); - vac.insert_entry(IcbState::InFlight(rx)); - - match self.resolver.resolve(ino, None, self).await { - Ok(icb) => { - let t = then_fn - .take() - .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); - let result = t(&icb); - self.write_back_if_present(ino, icb).await; - drop(tx); - return Ok(result); - } - Err(e) => { - self.inode_table.remove_async(&ino).await; - drop(tx); - return Err(e); - } - } - } - } - } - } - - /// Increment rc. **Awaits** `InFlight`. - /// - /// Returns `None` if the inode does not exist or was evicted concurrently. - /// This can happen when a concurrent `forget` removes the entry between the - /// caller's insert/cache and this `inc_rc` call, or when a concurrent - /// `get_or_resolve` swaps the entry to `InFlight` and the entry is then - /// evicted on resolution failure. Callers in FUSE `lookup` paths should - /// treat `None` as a lookup failure to avoid ref-count leaks (the kernel - /// would hold a reference the cache no longer tracks). - #[instrument(name = "AsyncICache::inc_rc", skip(self))] - pub async fn inc_rc(&self, ino: Inode) -> Option { - loop { - if !self.wait_for_available(ino).await { - warn!(ino, "inc_rc: inode not in table"); - return None; - } - let result = self - .inode_table - .update_async(&ino, |_, state| match state { - IcbState::Available(icb) => { - *icb.rc_mut() += 1; - Some(icb.rc()) - } - IcbState::InFlight(_) => None, - }) - .await - .flatten(); - - match result { - Some(rc) => return Some(rc), - None => { - // Entry was concurrently replaced with InFlight or evicted. - if !self.contains(ino) { - warn!(ino, "inc_rc: inode evicted concurrently"); - return None; - } - // Entry exists but became InFlight — retry. - } - } - } - } - - /// Decrement rc by `nlookups`. If rc drops to zero, evicts and returns - /// the ICB. **Awaits** `InFlight` entries. - #[instrument(name = "AsyncICache::forget", skip(self))] - pub async fn forget(&self, ino: Inode, nlookups: u64) -> Option { - use scc::hash_map::Entry; - - loop { - match self.inode_table.entry_async(ino).await { - Entry::Occupied(mut occ) => match occ.get_mut() { - IcbState::Available(icb) => { - if icb.rc() <= nlookups { - trace!(ino, "evicting inode"); - let (_, state) = occ.remove_entry(); - return state.into_available(); - } - *icb.rc_mut() -= nlookups; - trace!(ino, new_rc = icb.rc(), "decremented rc"); - return None; - } - IcbState::InFlight(rx) => { - let mut rx = rx.clone(); - drop(occ); - let _ = rx.changed().await; - } - }, - Entry::Vacant(_) => { - warn!(ino, "forget on unknown inode"); - return None; - } - } - } - } - - /// Synchronous mutable access to an `Available` entry. - /// Does **not** wait for `InFlight`. Intended for initialization. - pub fn get_icb_mut_sync(&self, ino: Inode, f: impl FnOnce(&mut R::Icb) -> T) -> Option { - self.inode_table - .update_sync(&ino, |_, state| match state { - IcbState::Available(icb) => Some(f(icb)), - IcbState::InFlight(_) => None, - }) - .flatten() - } - - /// Iterate over all `Available` entries (skips `InFlight`). - /// Async-safe iteration using `iter_async` to avoid contention on single-threaded runtimes. - pub async fn for_each(&self, mut f: impl FnMut(&Inode, &R::Icb)) { - self.inode_table - .iter_async(|ino, state| { - if let IcbState::Available(icb) = state { - f(ino, icb); - } - true // continue iteration - }) - .await; - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::collections::HashMap as StdHashMap; - use std::path::PathBuf; - use std::sync::atomic::Ordering; - use std::sync::{Arc, Mutex}; - - #[derive(Debug, Clone, PartialEq)] - struct TestIcb { - rc: u64, - path: PathBuf, - resolved: bool, - } - - impl IcbLike for TestIcb { - fn new_root(path: PathBuf) -> Self { - Self { - rc: 1, - path, - resolved: true, - } - } - fn rc(&self) -> u64 { - self.rc - } - fn rc_mut(&mut self) -> &mut u64 { - &mut self.rc - } - fn needs_resolve(&self) -> bool { - !self.resolved - } - } - - struct TestResolver { - responses: Mutex>>, - } - - impl TestResolver { - fn new() -> Self { - Self { - responses: Mutex::new(StdHashMap::new()), - } - } - - fn add(&self, ino: Inode, icb: TestIcb) { - self.responses - .lock() - .expect("test mutex") - .insert(ino, Ok(icb)); - } - - fn add_err(&self, ino: Inode, err: impl Into) { - self.responses - .lock() - .expect("test mutex") - .insert(ino, Err(err.into())); - } - } - - impl IcbResolver for TestResolver { - type Icb = TestIcb; - type Error = String; - - fn resolve( - &self, - ino: Inode, - _stub: Option, - _cache: &AsyncICache, - ) -> impl Future> + Send { - let result = self - .responses - .lock() - .expect("test mutex") - .remove(&ino) - .unwrap_or_else(|| Err(format!("no response for inode {ino}"))); - async move { result } - } - } - - fn test_cache() -> AsyncICache { - AsyncICache::new(TestResolver::new(), 1, "/root") - } - - fn test_cache_with(resolver: TestResolver) -> AsyncICache { - AsyncICache::new(resolver, 1, "/root") - } - - #[tokio::test] - async fn contains_returns_true_for_root() { - let cache = test_cache(); - assert!(cache.contains(1), "root should exist"); - } - - #[tokio::test] - async fn contains_returns_false_for_missing() { - let cache = test_cache(); - assert!(!cache.contains(999), "missing inode should not exist"); - } - - #[tokio::test] - async fn contains_after_resolver_completes() { - let resolver = TestResolver::new(); - resolver.add( - 42, - TestIcb { - rc: 1, - path: "/test".into(), - resolved: true, - }, - ); - let cache = Arc::new(test_cache_with(resolver)); - - // Trigger resolve in background - let cache2 = Arc::clone(&cache); - let handle = tokio::spawn(async move { cache2.get_or_resolve(42, |_| ()).await }); - - handle - .await - .expect("task panicked") - .expect("resolve failed"); - assert!(cache.contains(42), "should be true after resolve"); - } - - #[tokio::test] - async fn new_creates_root_entry() { - let cache = test_cache(); - assert_eq!(cache.inode_count(), 1, "should have exactly 1 entry"); - } - - #[tokio::test] - async fn get_icb_returns_value() { - let cache = test_cache(); - let path = cache.get_icb(1, |icb| icb.path.clone()).await; - assert_eq!(path, Some(PathBuf::from("/root"))); - } - - #[tokio::test] - async fn get_icb_returns_none_for_missing() { - let cache = test_cache(); - let result = cache.get_icb(999, IcbLike::rc).await; - assert_eq!(result, None, "missing inode should return None"); - } - - #[tokio::test] - async fn get_icb_mut_modifies_value() { - let cache = test_cache(); - cache - .get_icb_mut(1, |icb| { - *icb.rc_mut() += 10; - }) - .await; - let rc = cache.get_icb(1, IcbLike::rc).await; - assert_eq!(rc, Some(11), "root starts at rc=1, +10 = 11"); - } - - #[tokio::test] - async fn get_icb_after_resolver_completes() { - let resolver = TestResolver::new(); - resolver.add( - 42, - TestIcb { - rc: 1, - path: "/loaded".into(), - resolved: true, - }, - ); - let cache = test_cache_with(resolver); - - // Resolve inode 42 - cache - .get_or_resolve(42, |_| ()) - .await - .expect("resolve failed"); - - let path = cache.get_icb(42, |icb| icb.path.clone()).await; - assert_eq!(path, Some(PathBuf::from("/loaded"))); - } - - #[tokio::test] - async fn insert_icb_adds_entry() { - let cache = test_cache(); - cache - .insert_icb( - 42, - TestIcb { - rc: 1, - path: "/foo".into(), - resolved: true, - }, - ) - .await; - assert!(cache.contains(42), "inserted entry should exist"); - assert_eq!(cache.inode_count(), 2, "root + inserted = 2"); - } - - #[tokio::test] - async fn insert_icb_does_not_clobber_inflight() { - let cache = Arc::new(test_cache()); - let (tx, rx) = watch::channel(()); - cache - .inode_table - .upsert_async(42, IcbState::InFlight(rx)) - .await; - - // Spawn insert_icb in background — should wait for InFlight to resolve - let cache2 = Arc::clone(&cache); - let handle = tokio::spawn(async move { - cache2 - .insert_icb( - 42, - TestIcb { - rc: 5, - path: "/inserted".into(), - resolved: true, - }, - ) - .await; - }); - - // Give insert_icb time to start waiting - tokio::task::yield_now().await; - - // Complete the InFlight from the resolver side (write directly) - cache - .inode_table - .upsert_async( - 42, - IcbState::Available(TestIcb { - rc: 1, - path: "/resolved".into(), - resolved: true, - }), - ) - .await; - drop(tx); // signal watchers - - handle.await.expect("task panicked"); - - // After insert_icb completes, it should have overwritten the resolved value - let path = cache.get_icb(42, |icb| icb.path.clone()).await; - assert_eq!(path, Some(PathBuf::from("/inserted"))); - } - - #[tokio::test] - async fn entry_or_insert_creates_new() { - let cache = test_cache(); - let rc = cache - .entry_or_insert_icb( - 42, - || TestIcb { - rc: 0, - path: "/new".into(), - resolved: true, - }, - |icb| { - *icb.rc_mut() += 1; - icb.rc() - }, - ) - .await; - assert_eq!(rc, 1, "factory creates rc=0, then +1 = 1"); - } - - #[tokio::test] - async fn entry_or_insert_returns_existing() { - let cache = test_cache(); - cache - .insert_icb( - 42, - TestIcb { - rc: 5, - path: "/existing".into(), - resolved: true, - }, - ) - .await; - - let rc = cache - .entry_or_insert_icb( - 42, - || panic!("factory should not be called"), - |icb| icb.rc(), - ) - .await; - assert_eq!(rc, 5, "existing entry rc should be 5"); - } - - #[tokio::test] - async fn entry_or_insert_after_resolver_completes() { - let resolver = TestResolver::new(); - resolver.add( - 42, - TestIcb { - rc: 1, - path: "/resolved".into(), - resolved: true, - }, - ); - let cache = Arc::new(test_cache_with(resolver)); - - // Start resolve in background - let cache2 = Arc::clone(&cache); - let resolve_handle = tokio::spawn(async move { cache2.get_or_resolve(42, |_| ()).await }); - - // Wait for resolve to finish - resolve_handle - .await - .expect("task panicked") - .expect("resolve failed"); - - // Now entry_or_insert should find the existing entry - let rc = cache - .entry_or_insert_icb( - 42, - || panic!("factory should not be called"), - |icb| icb.rc(), - ) - .await; - assert_eq!(rc, 1, "should find the resolved entry"); - } - - #[tokio::test] - async fn inc_rc_increments() { - let cache = test_cache(); - cache - .insert_icb( - 42, - TestIcb { - rc: 1, - path: "/a".into(), - resolved: true, - }, - ) - .await; - let new_rc = cache.inc_rc(42).await; - assert_eq!(new_rc, Some(2), "rc 1 + 1 = 2"); - } - - #[tokio::test] - async fn forget_decrements_rc() { - let cache = test_cache(); - cache - .insert_icb( - 42, - TestIcb { - rc: 5, - path: "/a".into(), - resolved: true, - }, - ) - .await; - - let evicted = cache.forget(42, 2).await; - assert!(evicted.is_none(), "rc 5 - 2 = 3, should not evict"); - - let rc = cache.get_icb(42, IcbLike::rc).await; - assert_eq!(rc, Some(3), "rc should be 3 after forget(2)"); - } - - #[tokio::test] - async fn forget_evicts_when_rc_drops_to_zero() { - let cache = test_cache(); - cache - .insert_icb( - 42, - TestIcb { - rc: 3, - path: "/a".into(), - resolved: true, - }, - ) - .await; - - let evicted = cache.forget(42, 3).await; - assert!(evicted.is_some(), "rc 3 - 3 = 0, should evict"); - assert!(!cache.contains(42), "evicted entry should be gone"); - assert_eq!(cache.inode_count(), 1, "only root remains"); - } - - #[tokio::test] - async fn forget_unknown_inode_returns_none() { - let cache = test_cache(); - let evicted = cache.forget(999, 1).await; - assert!(evicted.is_none(), "unknown inode should return None"); - } - - #[tokio::test] - async fn for_each_iterates_available_entries() { - let cache = test_cache(); - cache - .insert_icb( - 2, - TestIcb { - rc: 1, - path: "/a".into(), - resolved: true, - }, - ) - .await; - cache - .insert_icb( - 3, - TestIcb { - rc: 1, - path: "/b".into(), - resolved: true, - }, - ) - .await; - - let mut seen = std::collections::HashSet::new(); - cache - .for_each(|ino, _icb| { - seen.insert(*ino); - }) - .await; - assert_eq!(seen.len(), 3, "should see all 3 entries"); - assert!(seen.contains(&1), "should contain root"); - assert!(seen.contains(&2), "should contain inode 2"); - assert!(seen.contains(&3), "should contain inode 3"); - } - - #[tokio::test] - async fn for_each_skips_inflight() { - let cache = test_cache(); - // Directly insert an InFlight entry for testing iteration - let (_tx, rx) = watch::channel(()); - cache - .inode_table - .upsert_async(42, IcbState::InFlight(rx)) - .await; - - let mut count = 0; - cache - .for_each(|_, _| { - count += 1; - }) - .await; - assert_eq!(count, 1, "only root, not the InFlight entry"); - } - - #[tokio::test] - async fn wait_does_not_miss_signal_on_immediate_complete() { - let cache = Arc::new(test_cache()); - - // Insert InFlight manually, then immediately complete before anyone waits - let (tx, rx) = watch::channel(()); - cache - .inode_table - .upsert_async(42, IcbState::InFlight(rx)) - .await; - - // Complete before any waiter (simulate resolver by writing directly) - cache - .inode_table - .upsert_async( - 42, - IcbState::Available(TestIcb { - rc: 1, - path: "/fast".into(), - resolved: true, - }), - ) - .await; - drop(tx); - - assert!(cache.contains(42), "entry should exist in table"); - } - - // -- get_or_resolve tests -- - - #[tokio::test] - async fn get_or_resolve_returns_existing() { - let cache = test_cache(); - cache - .insert_icb( - 42, - TestIcb { - rc: 1, - path: "/existing".into(), - resolved: true, - }, - ) - .await; - - let path: Result = cache.get_or_resolve(42, |icb| icb.path.clone()).await; - assert_eq!(path, Ok(PathBuf::from("/existing"))); - } - - #[tokio::test] - async fn get_or_resolve_resolves_missing() { - let resolver = TestResolver::new(); - resolver.add( - 42, - TestIcb { - rc: 1, - path: "/resolved".into(), - resolved: true, - }, - ); - let cache = test_cache_with(resolver); - - let path: Result = cache.get_or_resolve(42, |icb| icb.path.clone()).await; - assert_eq!(path, Ok(PathBuf::from("/resolved"))); - // Should now be cached - assert!(cache.contains(42)); - } - - #[tokio::test] - async fn get_or_resolve_propagates_error() { - let resolver = TestResolver::new(); - resolver.add_err(42, "network error"); - let cache = test_cache_with(resolver); - - let result: Result = - cache.get_or_resolve(42, |icb| icb.path.clone()).await; - assert_eq!(result, Err("network error".to_owned())); - // Entry should be cleaned up on error - assert!(!cache.contains(42)); - } - - struct CountingResolver { - count: Arc, - } - - impl IcbResolver for CountingResolver { - type Icb = TestIcb; - type Error = String; - - fn resolve( - &self, - _ino: Inode, - _stub: Option, - _cache: &AsyncICache, - ) -> impl Future> + Send { - self.count.fetch_add(1, Ordering::SeqCst); - async { - tokio::task::yield_now().await; - Ok(TestIcb { - rc: 1, - path: "/coalesced".into(), - resolved: true, - }) - } - } - } - - #[tokio::test] - async fn get_or_resolve_coalesces_concurrent_requests() { - use std::sync::atomic::AtomicUsize; - - let resolve_count = Arc::new(AtomicUsize::new(0)); - - let cache = Arc::new(AsyncICache::new( - CountingResolver { - count: Arc::clone(&resolve_count), - }, - 1, - "/root", - )); - - let mut handles = Vec::new(); - for _ in 0..5 { - let c = Arc::clone(&cache); - handles.push(tokio::spawn(async move { - c.get_or_resolve(42, |icb| icb.path.clone()).await - })); - } - - for h in handles { - assert_eq!( - h.await.expect("task panicked"), - Ok(PathBuf::from("/coalesced")), - ); - } - - // Resolver should only have been called ONCE (not 5 times) - assert_eq!( - resolve_count.load(Ordering::SeqCst), - 1, - "should coalesce to 1 resolve call" - ); - } - - #[test] - fn icb_state_into_available_returns_inner() { - let state = IcbState::Available(TestIcb { - rc: 1, - path: "/test".into(), - resolved: true, - }); - assert!(state.into_available().is_some()); - } - - #[test] - fn icb_state_into_available_returns_none_for_inflight() { - let (_tx, rx) = watch::channel(()); - let state: IcbState = IcbState::InFlight(rx); - assert!(state.into_available().is_none()); - } - - #[tokio::test] - async fn get_or_resolve_resolves_stub_entry() { - let resolver = TestResolver::new(); - resolver.add( - 42, - TestIcb { - rc: 1, - path: "/resolved".into(), - resolved: true, - }, - ); - let cache = test_cache_with(resolver); - - // Insert unresolved stub - cache - .insert_icb( - 42, - TestIcb { - rc: 0, - path: "/stub".into(), - resolved: false, - }, - ) - .await; - - // get_or_resolve should trigger resolution because needs_resolve() == true - let path: Result = cache.get_or_resolve(42, |icb| icb.path.clone()).await; - assert_eq!(path, Ok(PathBuf::from("/resolved"))); - } - - #[tokio::test] - async fn forget_handles_inflight_entry() { - let cache = Arc::new(test_cache()); - let (tx, rx) = watch::channel(()); - cache - .inode_table - .upsert_async(42, IcbState::InFlight(rx)) - .await; - - let cache2 = Arc::clone(&cache); - let handle = tokio::spawn(async move { cache2.forget(42, 1).await }); - - // Give forget time to start waiting - tokio::task::yield_now().await; - - // Simulate resolver completing (write directly to inode_table) - cache - .inode_table - .upsert_async( - 42, - IcbState::Available(TestIcb { - rc: 3, - path: "/inflight".into(), - resolved: true, - }), - ) - .await; - drop(tx); - - let evicted = handle.await.expect("task panicked"); - assert!(evicted.is_none(), "rc=3 - 1 = 2, should not evict"); - - let rc = cache.get_icb(42, IcbLike::rc).await; - assert_eq!(rc, Some(2), "rc should be 2 after forget(1) on rc=3"); - } - - #[tokio::test] - async fn get_or_resolve_error_preserves_stub_with_nonzero_rc() { - let resolver = TestResolver::new(); - resolver.add_err(42, "resolve failed"); - let cache = test_cache_with(resolver); - - // Insert a stub with rc=2 (simulates a looked-up entry needing resolution) - cache - .insert_icb( - 42, - TestIcb { - rc: 2, - path: "/stub".into(), - resolved: false, - }, - ) - .await; - - // get_or_resolve should fail - let result: Result = - cache.get_or_resolve(42, |icb| icb.path.clone()).await; - assert!(result.is_err(), "should propagate resolver error"); - - // The stub should be preserved since rc > 0 - assert!(cache.contains(42), "entry with rc=2 should survive error"); - let rc = cache.get_icb(42, IcbLike::rc).await; - assert_eq!(rc, Some(2), "rc should be preserved"); - } - - #[tokio::test] - async fn inc_rc_missing_inode_returns_none() { - let cache = test_cache(); - assert_eq!(cache.inc_rc(999).await, None); - } - - #[tokio::test] - async fn inc_rc_waits_for_inflight() { - let cache = Arc::new(test_cache()); - let (tx, rx) = watch::channel(()); - cache - .inode_table - .upsert_async(42, IcbState::InFlight(rx)) - .await; - - let cache2 = Arc::clone(&cache); - let handle = tokio::spawn(async move { cache2.inc_rc(42).await }); - - // Simulate resolver completing by writing directly to inode_table - cache - .inode_table - .upsert_async( - 42, - IcbState::Available(TestIcb { - rc: 1, - path: "/a".into(), - resolved: true, - }), - ) - .await; - drop(tx); - - let result = handle - .await - .unwrap_or_else(|e| panic!("task panicked: {e}")); - assert_eq!( - result, - Some(2), - "waited for Available, then incremented 1 -> 2" - ); - } - - #[tokio::test] - async fn inc_rc_returns_none_after_concurrent_eviction() { - let cache = Arc::new(test_cache()); - let (tx, rx) = watch::channel(()); - cache - .inode_table - .upsert_async(42, IcbState::InFlight(rx)) - .await; - - let cache2 = Arc::clone(&cache); - let handle = tokio::spawn(async move { cache2.inc_rc(42).await }); - - // Evict instead of completing - cache.inode_table.remove_async(&42).await; - drop(tx); - - let result = handle - .await - .unwrap_or_else(|e| panic!("task panicked: {e}")); - assert_eq!(result, None, "evicted entry should return None"); - } - - /// Resolver that pauses mid-resolution via a `Notify`, allowing the test - /// to interleave a `forget` while the resolve future is suspended. - struct SlowResolver { - /// Signalled by the resolver once it has started (so the test knows - /// resolution is in progress). - started: Arc, - /// The resolver waits on this before returning (the test signals it - /// after calling `forget`). - proceed: Arc, - } - - impl IcbResolver for SlowResolver { - type Icb = TestIcb; - type Error = String; - - fn resolve( - &self, - _ino: Inode, - _stub: Option, - _cache: &AsyncICache, - ) -> impl Future> + Send { - let started = Arc::clone(&self.started); - let proceed = Arc::clone(&self.proceed); - async move { - started.notify_one(); - proceed.notified().await; - Ok(TestIcb { - rc: 1, - path: "/slow-resolved".into(), - resolved: true, - }) - } - } - } - - /// Regression test: `get_icb` must survive the entry cycling back to - /// `InFlight` between when `wait_for_available` returns and when - /// `read_async` runs. The loop in `get_icb` should retry and eventually - /// return the final resolved value. - #[tokio::test] - async fn wait_for_available_retries_on_re_inflight() { - let cache = Arc::new(test_cache()); - let ino: Inode = 42; - - // Phase 1: insert an InFlight entry. - let (tx1, rx1) = watch::channel(()); - cache - .inode_table - .upsert_async(ino, IcbState::InFlight(rx1)) - .await; - - // Spawn get_icb — it will wait for InFlight to resolve. - let cache_get = Arc::clone(&cache); - let get_handle = - tokio::spawn(async move { cache_get.get_icb(ino, |icb| icb.path.clone()).await }); - - // Give get_icb time to start waiting on the watch channel. - tokio::task::yield_now().await; - - // Phase 1 complete: transition to Available briefly, then immediately - // back to InFlight (simulates get_or_resolve finding a stub and - // re-entering InFlight for a second resolution). - let (tx2, rx2) = watch::channel(()); - cache - .inode_table - .upsert_async(ino, IcbState::InFlight(rx2)) - .await; - // Signal phase-1 watchers so get_icb wakes up; it will re-read the - // entry and find InFlight again, then loop back to wait. - drop(tx1); - - // Give get_icb time to re-enter the wait loop. - tokio::task::yield_now().await; - - // Phase 2 complete: write the final resolved value. - cache - .inode_table - .upsert_async( - ino, - IcbState::Available(TestIcb { - rc: 1, - path: "/fully-resolved".into(), - resolved: true, - }), - ) - .await; - drop(tx2); - - // get_icb should return the final resolved value (not None). - let result = get_handle.await.expect("get_icb task panicked"); - assert_eq!( - result, - Some(PathBuf::from("/fully-resolved")), - "get_icb must survive re-InFlight and return the final resolved value" - ); - } - - /// Regression test: an entry evicted by `forget` during an in-progress - /// `get_or_resolve` must NOT be resurrected when resolution completes. - #[tokio::test] - async fn get_or_resolve_does_not_resurrect_evicted_entry() { - let started = Arc::new(tokio::sync::Notify::new()); - let proceed = Arc::new(tokio::sync::Notify::new()); - - let cache = Arc::new(AsyncICache::new( - SlowResolver { - started: Arc::clone(&started), - proceed: Arc::clone(&proceed), - }, - 1, - "/root", - )); - - let ino: Inode = 42; - - // Insert a stub with rc=1 (simulates a looked-up, unresolved entry). - cache - .insert_icb( - ino, - TestIcb { - rc: 1, - path: "/stub".into(), - resolved: false, - }, - ) - .await; - - // Spawn get_or_resolve which will trigger slow resolution. - let cache2 = Arc::clone(&cache); - let resolve_handle = - tokio::spawn(async move { cache2.get_or_resolve(ino, |icb| icb.path.clone()).await }); - - // Wait until the resolver has started (entry is now InFlight). - started.notified().await; - - // Evict the entry while resolution is in progress. - // forget waits for InFlight, so we need to complete resolution for - // forget to proceed. Instead, remove the InFlight entry directly to - // simulate a concurrent eviction (e.g., by another path that already - // removed the entry). - cache.inode_table.remove_async(&ino).await; - - // Let the resolver finish. - proceed.notify_one(); - - // Wait for get_or_resolve to complete. - drop(resolve_handle.await.expect("task panicked")); - - // The entry must NOT have been resurrected by write_back_if_present. - assert!( - !cache.contains(ino), - "evicted entry must not be resurrected after resolution completes" - ); - } -} diff --git a/src/fs/icache/bridge.rs b/src/fs/icache/bridge.rs deleted file mode 100644 index e674a56..0000000 --- a/src/fs/icache/bridge.rs +++ /dev/null @@ -1,138 +0,0 @@ -use crate::fs::r#trait::{FileAttr, FileHandle, Inode}; - -/// Bidirectional bridge for both inodes and file handles between two Fs layers. -/// -/// Convention: **left = outer (caller), right = inner (callee)**. -/// `forward(left)` → right, `backward(right)` → left. -pub struct HashMapBridge { - inode_map: bimap::BiMap, - fh_map: bimap::BiMap, -} - -impl HashMapBridge { - pub fn new() -> Self { - Self { - inode_map: bimap::BiMap::new(), - fh_map: bimap::BiMap::new(), - } - } - - // ── Inode methods ──────────────────────────────────────────────────── - - pub fn insert_inode(&mut self, left: Inode, right: Inode) { - self.inode_map.insert(left, right); - } - - /// Look up right→left, or allocate a new left inode if unmapped. - pub fn backward_or_insert_inode( - &mut self, - right: Inode, - allocate: impl FnOnce() -> Inode, - ) -> Inode { - if let Some(&left) = self.inode_map.get_by_right(&right) { - left - } else { - let left = allocate(); - self.inode_map.insert(left, right); - left - } - } - - /// Look up left→right, or allocate a new right inode if unmapped. - pub fn forward_or_insert_inode( - &mut self, - left: Inode, - allocate: impl FnOnce() -> Inode, - ) -> Inode { - if let Some(&right) = self.inode_map.get_by_left(&left) { - right - } else { - let right = allocate(); - self.inode_map.insert(left, right); - right - } - } - - /// Remove an inode mapping by its left (outer) key. - pub fn remove_inode_by_left(&mut self, left: Inode) { - self.inode_map.remove_by_left(&left); - } - - /// Look up left→right directly. - pub fn inode_map_get_by_left(&self, left: Inode) -> Option<&Inode> { - self.inode_map.get_by_left(&left) - } - - /// Rewrite the `ino` field in a [`FileAttr`] from right (inner) to left (outer) namespace. - pub fn attr_backward(&self, attr: FileAttr) -> FileAttr { - let backward = |ino: Inode| -> Inode { - if let Some(&left) = self.inode_map.get_by_right(&ino) { - left - } else { - tracing::warn!( - inner_ino = ino, - "attr_backward: no bridge mapping, using raw inner inode" - ); - ino - } - }; - rewrite_attr_ino(attr, backward) - } - - // ── File handle methods ────────────────────────────────────────────── - - pub fn insert_fh(&mut self, left: FileHandle, right: FileHandle) { - self.fh_map.insert(left, right); - } - - pub fn fh_forward(&self, left: FileHandle) -> Option { - self.fh_map.get_by_left(&left).copied() - } - - /// Remove a file handle mapping by its left (outer) key. - pub fn remove_fh_by_left(&mut self, left: FileHandle) { - self.fh_map.remove_by_left(&left); - } -} - -/// Rewrite the `ino` field in a [`FileAttr`] using the given translation function. -fn rewrite_attr_ino(attr: FileAttr, translate: impl Fn(Inode) -> Inode) -> FileAttr { - match attr { - FileAttr::RegularFile { - mut common, - size, - blocks, - } => { - common.ino = translate(common.ino); - FileAttr::RegularFile { - common, - size, - blocks, - } - } - FileAttr::Directory { mut common } => { - common.ino = translate(common.ino); - FileAttr::Directory { common } - } - FileAttr::Symlink { mut common, size } => { - common.ino = translate(common.ino); - FileAttr::Symlink { common, size } - } - FileAttr::CharDevice { mut common, rdev } => { - common.ino = translate(common.ino); - FileAttr::CharDevice { common, rdev } - } - FileAttr::BlockDevice { mut common, rdev } => { - common.ino = translate(common.ino); - FileAttr::BlockDevice { common, rdev } - } - FileAttr::NamedPipe { mut common } => { - common.ino = translate(common.ino); - FileAttr::NamedPipe { common } - } - FileAttr::Socket { mut common } => { - common.ino = translate(common.ino); - FileAttr::Socket { common } - } - } -} diff --git a/src/fs/icache/file_table.rs b/src/fs/icache/file_table.rs deleted file mode 100644 index 332a6ff..0000000 --- a/src/fs/icache/file_table.rs +++ /dev/null @@ -1,22 +0,0 @@ -use std::sync::atomic::{AtomicU64, Ordering}; - -use crate::fs::r#trait::FileHandle; - -/// Monotonically increasing file handle allocator. -#[must_use] -pub struct FileTable { - next_fh: AtomicU64, -} - -impl FileTable { - pub fn new() -> Self { - Self { - next_fh: AtomicU64::new(1), - } - } - - #[must_use] - pub fn allocate(&self) -> FileHandle { - self.next_fh.fetch_add(1, Ordering::Relaxed) - } -} diff --git a/src/fs/icache/inode_factory.rs b/src/fs/icache/inode_factory.rs deleted file mode 100644 index 1a60338..0000000 --- a/src/fs/icache/inode_factory.rs +++ /dev/null @@ -1,19 +0,0 @@ -use crate::fs::r#trait::Inode; -use std::sync::atomic::{AtomicU64, Ordering}; - -/// Monotonically increasing inode allocator. -pub struct InodeFactory { - next_inode: AtomicU64, -} - -impl InodeFactory { - pub fn new(start: Inode) -> Self { - Self { - next_inode: AtomicU64::new(start), - } - } - - pub fn allocate(&self) -> Inode { - self.next_inode.fetch_add(1, Ordering::Relaxed) - } -} diff --git a/src/fs/icache/mod.rs b/src/fs/icache/mod.rs deleted file mode 100644 index 2ccd80b..0000000 --- a/src/fs/icache/mod.rs +++ /dev/null @@ -1,21 +0,0 @@ -//! Generic directory cache and inode management primitives. - -pub mod async_cache; -pub mod bridge; -mod file_table; -mod inode_factory; - -pub use async_cache::AsyncICache; -pub use async_cache::IcbResolver; -pub use file_table::FileTable; -pub use inode_factory::InodeFactory; - -/// Common interface for inode control block types usable with `ICache`. -pub trait IcbLike: Clone { - /// Create an ICB with rc=1, the given path, and no children. - fn new_root(path: std::path::PathBuf) -> Self; - fn rc(&self) -> u64; - fn rc_mut(&mut self) -> &mut u64; - /// Returns true if this entry needs resolution (e.g., attr not yet fetched). - fn needs_resolve(&self) -> bool; -} diff --git a/src/fs/mescloud/common.rs b/src/fs/mescloud/common.rs index 340b588..3c63b78 100644 --- a/src/fs/mescloud/common.rs +++ b/src/fs/mescloud/common.rs @@ -1,12 +1,12 @@ //! Shared types and helpers used by both `MesaFS` and `RepoFs`. +use std::ffi::{OsStr, OsString}; + +use bytes::Bytes; +use git_fs::fs::{FileHandle, INode, INodeType, InodeAddr, OpenFlags as LibOpenFlags}; use mesa_dev::low_level::apis; use thiserror::Error; -use crate::fs::r#trait::{FileAttr, Inode}; - -pub(super) use super::icache::InodeControlBlock; - /// A concrete error type that preserves the structure of `mesa_dev::low_level::apis::Error` /// without the generic parameter. #[derive(Debug, Error)] @@ -51,50 +51,22 @@ pub enum LookupError { #[error("inode not found")] InodeNotFound, - #[error("file does not exist")] - FileDoesNotExist, - #[error("remote mesa error")] RemoteMesaError(#[from] MesaApiError), } -impl From for i32 { - fn from(e: LookupError) -> Self { - match e { - LookupError::InodeNotFound | LookupError::FileDoesNotExist => libc::ENOENT, - LookupError::RemoteMesaError(_) => libc::EIO, - } - } -} - #[derive(Debug, Error)] pub enum GetAttrError { #[error("inode not found")] InodeNotFound, } -impl From for i32 { - fn from(e: GetAttrError) -> Self { - match e { - GetAttrError::InodeNotFound => libc::ENOENT, - } - } -} - -#[derive(Debug, Error)] +#[derive(Debug, Clone, Copy, Error)] pub enum OpenError { #[error("inode not found")] InodeNotFound, } -impl From for i32 { - fn from(e: OpenError) -> Self { - match e { - OpenError::InodeNotFound => libc::ENOENT, - } - } -} - #[derive(Debug, Error)] pub enum ReadError { #[error("file not open")] @@ -113,17 +85,6 @@ pub enum ReadError { Base64Decode(#[from] base64::DecodeError), } -impl From for i32 { - fn from(e: ReadError) -> Self { - match e { - ReadError::FileNotOpen => libc::EBADF, - ReadError::InodeNotFound => libc::ENOENT, - ReadError::RemoteMesaError(_) | ReadError::Base64Decode(_) => libc::EIO, - ReadError::NotAFile => libc::EISDIR, - } - } -} - #[derive(Debug, Error)] pub enum ReadDirError { #[error("inode not found")] @@ -143,18 +104,7 @@ impl From for ReadDirError { fn from(e: LookupError) -> Self { match e { LookupError::RemoteMesaError(api) => Self::RemoteMesaError(api), - LookupError::InodeNotFound | LookupError::FileDoesNotExist => Self::InodeNotFound, - } - } -} - -impl From for i32 { - fn from(e: ReadDirError) -> Self { - match e { - ReadDirError::InodeNotFound => libc::ENOENT, - ReadDirError::RemoteMesaError(_) => libc::EIO, - ReadDirError::NotADirectory => libc::ENOTDIR, - ReadDirError::NotPermitted => libc::EPERM, + LookupError::InodeNotFound => Self::InodeNotFound, } } } @@ -165,18 +115,39 @@ pub enum ReleaseError { FileNotOpen, } -impl From for i32 { - fn from(e: ReleaseError) -> Self { - match e { - ReleaseError::FileNotOpen => libc::EBADF, - } - } +/// A directory entry for readdir results, using lib types. +pub struct FsDirEntry { + pub ino: InodeAddr, + pub name: OsString, + pub itype: INodeType, } -/// Allows a parent compositor to peek at cached attrs from a child filesystem. +/// Trait for child filesystems composed by [`CompositeFs`](super::composite::CompositeFs). +/// +/// Uses lib types (`INode`, `InodeAddr`) directly — no conversion to/from `FileAttr`. +/// Replaces the old `Fs + InodeCachePeek` bound. #[async_trait::async_trait] -pub(super) trait InodeCachePeek { - async fn peek_attr(&self, ino: Inode) -> Option; +pub(super) trait ChildFs: Send + Sync { + /// Look up a child by name within the given parent directory. + async fn lookup(&mut self, parent: InodeAddr, name: &OsStr) -> Result; + + /// List all children of a directory, returning full `INode` data for each. + async fn readdir(&mut self, ino: InodeAddr) -> Result, ReadDirError>; + + /// Open a file for reading. + async fn open(&mut self, ino: InodeAddr, flags: LibOpenFlags) -> Result; + + /// Read data from an open file. + async fn read( + &mut self, + ino: InodeAddr, + fh: FileHandle, + offset: u64, + size: u32, + ) -> Result; + + /// Release (close) a file handle. + async fn release(&mut self, ino: InodeAddr, fh: FileHandle) -> Result<(), ReleaseError>; } #[cfg(test)] @@ -189,12 +160,6 @@ mod tests { assert!(matches!(err, ReadDirError::InodeNotFound)); } - #[test] - fn lookup_file_does_not_exist_converts_to_readdir_inode_not_found() { - let err: ReadDirError = LookupError::FileDoesNotExist.into(); - assert!(matches!(err, ReadDirError::InodeNotFound)); - } - #[test] fn lookup_remote_error_converts_to_readdir_remote_error() { let api_err = MesaApiError::Response { diff --git a/src/fs/mescloud/composite.rs b/src/fs/mescloud/composite.rs index 6dbac25..bb840f9 100644 --- a/src/fs/mescloud/composite.rs +++ b/src/fs/mescloud/composite.rs @@ -1,308 +1,461 @@ use std::collections::HashMap; use std::ffi::OsStr; +use std::sync::atomic::{AtomicU64, Ordering}; use bytes::Bytes; -use tracing::{instrument, trace, warn}; - -use crate::fs::icache::bridge::HashMapBridge; -use crate::fs::icache::{FileTable, IcbResolver}; -use crate::fs::r#trait::{ - DirEntry, FileAttr, FileHandle, FilesystemStats, Fs, Inode, LockOwner, OpenFile, OpenFlags, +use git_fs::cache::async_backed::FutureBackedCache; +use git_fs::fs::dcache::DCache; +use git_fs::fs::{ + AsyncFsStats, FileHandle, INode, INodeType, InodeAddr, InodePerms, LoadedAddr, OpenFlags, }; +use rustc_hash::FxHashMap; +use tracing::{instrument, trace}; use super::common::{ - GetAttrError, InodeCachePeek, LookupError, OpenError, ReadDirError, ReadError, ReleaseError, + ChildFs, FsDirEntry, GetAttrError, LookupError, OpenError, ReadDirError, ReadError, + ReleaseError, }; -use super::icache::{InodeControlBlock, MescloudICache}; -/// A child filesystem slot: inner filesystem + bidirectional inode/fh bridge. +/// Bidirectional inode mapping between outer (composite) and inner (child) address spaces. +/// +/// Convention: **outer = left, inner = right**. +pub(super) struct InodeBridge { + map: bimap::BiMap, +} + +impl InodeBridge { + pub fn new() -> Self { + Self { + map: bimap::BiMap::new(), + } + } + + pub fn insert(&mut self, outer: InodeAddr, inner: InodeAddr) { + self.map.insert(outer, inner); + } + + pub fn forward(&self, outer: InodeAddr) -> Option { + self.map.get_by_left(&outer).copied() + } + + #[expect(dead_code, reason = "will be needed by future callers")] + pub fn backward(&self, inner: InodeAddr) -> Option { + self.map.get_by_right(&inner).copied() + } + + /// Look up inner->outer, or allocate a new outer address if unmapped. + pub fn backward_or_insert( + &mut self, + inner: InodeAddr, + allocate: impl FnOnce() -> InodeAddr, + ) -> InodeAddr { + if let Some(&outer) = self.map.get_by_right(&inner) { + outer + } else { + let outer = allocate(); + self.map.insert(outer, inner); + outer + } + } + + pub fn remove_by_outer(&mut self, outer: InodeAddr) { + self.map.remove_by_left(&outer); + } + + #[expect(dead_code, reason = "will be needed by future callers")] + pub fn get_inner(&self, outer: InodeAddr) -> Option<&InodeAddr> { + self.map.get_by_left(&outer) + } +} + pub(super) struct ChildSlot { pub inner: Inner, - pub bridge: HashMapBridge, + pub bridge: InodeBridge, } -/// Layered filesystem that presents multiple child filesystems under a single -/// inode namespace. -/// -/// `MesaCloud`'s filesystem is a hierarchy of compositions: -/// -/// ```text -/// MesaFS (CompositeFs<_, OrgFs>) -/// └─ OrgFs (CompositeFs<_, RepoFs>) -/// └─ RepoFs (leaf — backed by git) -/// ``` -/// -/// Each child filesystem numbers its inodes starting from 1, so the composite -/// maintains a bidirectional inode/file-handle bridge per child (see -/// [`ChildSlot`]) to translate between the outer namespace visible to FUSE and -/// each child's internal namespace. -pub(super) struct CompositeFs -where - R: IcbResolver, -{ - pub icache: MescloudICache, - pub file_table: FileTable, - pub readdir_buf: Vec, - /// Maps outer inode to index into `slots` for child-root inodes. - pub child_inodes: HashMap, - /// Maps every translated outer inode to its owning slot index. - pub inode_to_slot: HashMap, - pub slots: Vec>, +/// Tracks an open file: which child slot owns it and the inner fh. +struct OpenFileEntry { + slot_idx: usize, + inner_ino: InodeAddr, + inner_fh: FileHandle, +} + +pub(super) struct CompositeFs { + pub(super) inode_table: FutureBackedCache, + pub(super) directory_cache: DCache, + readdir_populated: FutureBackedCache, + next_ino: AtomicU64, + next_fh: AtomicU64, + refcounts: FxHashMap, + pub(super) readdir_buf: Vec, + open_files: HashMap, + pub(super) child_inodes: HashMap, + pub(super) inode_to_slot: HashMap, + pub(super) slots: Vec>, + fs_owner: (u32, u32), + block_size: u32, } -impl CompositeFs -where - R: IcbResolver, - Inner: Fs< - LookupError = LookupError, - GetAttrError = GetAttrError, - OpenError = OpenError, - ReadError = ReadError, - ReaddirError = ReadDirError, - ReleaseError = ReleaseError, - > + InodeCachePeek - + Send - + Sync, -{ - /// Look up which child slot owns an inode via direct map. - #[instrument(name = "CompositeFs::slot_for_inode", skip(self))] - pub fn slot_for_inode(&self, ino: Inode) -> Option { +impl CompositeFs { + pub const ROOT_INO: InodeAddr = 1; + + pub fn new(fs_owner: (u32, u32), block_size: u32) -> Self { + let inode_table = FutureBackedCache::default(); + let now = std::time::SystemTime::now(); + let root = INode { + addr: Self::ROOT_INO, + permissions: InodePerms::from_bits_truncate(0o755), + uid: fs_owner.0, + gid: fs_owner.1, + create_time: now, + last_modified_at: now, + parent: None, + size: 0, + itype: INodeType::Directory, + }; + inode_table.insert_sync(Self::ROOT_INO, root); + + let mut refcounts = FxHashMap::default(); + refcounts.insert(Self::ROOT_INO, 1); + + Self { + inode_table, + directory_cache: DCache::new(), + readdir_populated: FutureBackedCache::default(), + next_ino: AtomicU64::new(Self::ROOT_INO + 1), + next_fh: AtomicU64::new(1), + refcounts, + readdir_buf: Vec::new(), + open_files: HashMap::new(), + child_inodes: HashMap::new(), + inode_to_slot: HashMap::new(), + slots: Vec::new(), + fs_owner, + block_size, + } + } + + pub fn allocate_inode(&self) -> InodeAddr { + self.next_ino.fetch_add(1, Ordering::Relaxed) + } + + pub fn fs_owner(&self) -> (u32, u32) { + self.fs_owner + } + + #[expect(dead_code, reason = "available for future use")] + pub fn block_size(&self) -> u32 { + self.block_size + } + + pub fn add_child(&mut self, inner: Inner, child_root_ino: InodeAddr) -> InodeAddr { + self.add_child_with_parent(inner, child_root_ino, Self::ROOT_INO) + } + + pub fn cache_inode(&self, inode: INode) { + self.inode_table.insert_sync(inode.addr, inode); + } + + /// Insert the inode into the table and initialise its refcount to zero. + /// + /// The caller is responsible for bumping the refcount via [`inc_rc`](Self::inc_rc). + pub fn cache_inode_and_init_rc(&mut self, inode: INode) { + let addr = inode.addr; + self.inode_table.insert_sync(addr, inode); + self.refcounts.entry(addr).or_insert(0); + } + + pub fn inc_rc(&mut self, addr: InodeAddr) -> Option { + let rc = self.refcounts.get_mut(&addr)?; + *rc += 1; + Some(*rc) + } + + pub fn slot_for_inode(&self, ino: InodeAddr) -> Option { self.inode_to_slot.get(&ino).copied() } - /// Allocate an outer file handle and map it through the bridge. - #[must_use] - pub fn alloc_fh(&mut self, slot_idx: usize, inner_fh: FileHandle) -> FileHandle { - let fh = self.file_table.allocate(); - self.slots[slot_idx].bridge.insert_fh(fh, inner_fh); - fh + /// Like [`add_child`](Self::add_child) but sets a custom parent inode + /// instead of always using `ROOT_INO`. + pub fn add_child_with_parent( + &mut self, + inner: Inner, + child_root_ino: InodeAddr, + parent_ino: InodeAddr, + ) -> InodeAddr { + let outer_ino = self.allocate_inode(); + let now = std::time::SystemTime::now(); + let inode = INode { + addr: outer_ino, + permissions: InodePerms::from_bits_truncate(0o755), + uid: self.fs_owner.0, + gid: self.fs_owner.1, + create_time: now, + last_modified_at: now, + parent: Some(parent_ino), + size: 0, + itype: INodeType::Directory, + }; + self.inode_table.insert_sync(outer_ino, inode); + + let mut bridge = InodeBridge::new(); + bridge.insert(outer_ino, child_root_ino); + + let idx = self.slots.len(); + self.slots.push(ChildSlot { inner, bridge }); + self.child_inodes.insert(outer_ino, idx); + self.inode_to_slot.insert(outer_ino, idx); + + outer_ino } +} - /// Translate an inner inode to an outer inode, allocating if needed. - /// Also inserts a stub ICB into the outer icache when the inode is new. - #[instrument(name = "CompositeFs::translate_inner_ino", skip(self, name))] - pub async fn translate_inner_ino( +impl CompositeFs { + #[instrument(name = "CompositeFs::delegated_lookup", skip(self, name))] + pub async fn delegated_lookup( &mut self, - slot_idx: usize, - inner_ino: Inode, - parent_outer_ino: Inode, + parent: InodeAddr, name: &OsStr, - ) -> Inode { - let outer_ino = self.slots[slot_idx] + ) -> Result { + // Fast path: DCache hit + inode still in table + if let Some(dentry) = self.directory_cache.lookup(LoadedAddr(parent), name) + && let Some(inode) = self.inode_table.get(&dentry.ino.0).await + { + *self.refcounts.entry(inode.addr).or_insert(0) += 1; + return Ok(inode); + } + + // Slow path: delegate to child + let idx = self + .inode_to_slot + .get(&parent) + .copied() + .ok_or(LookupError::InodeNotFound)?; + let inner_parent = self.slots[idx] .bridge - .backward_or_insert_inode(inner_ino, || self.icache.allocate_inode()); - self.inode_to_slot.insert(outer_ino, slot_idx); - self.icache - .entry_or_insert_icb( - outer_ino, - || InodeControlBlock { - rc: 0, - path: name.into(), - parent: Some(parent_outer_ino), - attr: None, - children: None, - }, - |_| {}, + .forward(parent) + .ok_or(LookupError::InodeNotFound)?; + let inner_inode = self.slots[idx].inner.lookup(inner_parent, name).await?; + + let next_ino = &self.next_ino; + let outer_ino = self.slots[idx] + .bridge + .backward_or_insert(inner_inode.addr, || { + next_ino.fetch_add(1, Ordering::Relaxed) + }); + self.inode_to_slot.insert(outer_ino, idx); + + let remapped = INode { + addr: outer_ino, + ..inner_inode + }; + self.inode_table + .get_or_init(outer_ino, || async move { remapped }) + .await; + + let is_dir = matches!(inner_inode.itype, INodeType::Directory); + self.directory_cache + .insert( + LoadedAddr(parent), + name.to_os_string(), + LoadedAddr(outer_ino), + is_dir, ) .await; - outer_ino + + *self.refcounts.entry(outer_ino).or_insert(0) += 1; + let rc = self.refcounts[&outer_ino]; + trace!( + outer_ino, + inner_ino = inner_inode.addr, + rc, + "lookup: resolved via delegation" + ); + + Ok(remapped) + } + + #[instrument(name = "CompositeFs::delegated_readdir", skip(self))] + pub async fn delegated_readdir( + &mut self, + ino: InodeAddr, + ) -> Result<&[FsDirEntry], ReadDirError> { + let idx = self + .inode_to_slot + .get(&ino) + .copied() + .ok_or(ReadDirError::InodeNotFound)?; + + if self.readdir_populated.get(&LoadedAddr(ino)).await.is_none() { + let inner_ino = self.slots[idx] + .bridge + .forward(ino) + .ok_or(ReadDirError::InodeNotFound)?; + let inner_entries = self.slots[idx].inner.readdir(inner_ino).await?; + + for (name, child_inode) in &inner_entries { + let next_ino = &self.next_ino; + let outer_child = self.slots[idx] + .bridge + .backward_or_insert(child_inode.addr, || { + next_ino.fetch_add(1, Ordering::Relaxed) + }); + self.inode_to_slot.insert(outer_child, idx); + + let remapped = INode { + addr: outer_child, + ..*child_inode + }; + self.inode_table + .get_or_init(outer_child, || async move { remapped }) + .await; + + let is_dir = matches!(child_inode.itype, INodeType::Directory); + self.directory_cache + .insert( + LoadedAddr(ino), + name.clone(), + LoadedAddr(outer_child), + is_dir, + ) + .await; + } + + self.readdir_populated + .get_or_init(LoadedAddr(ino), || async {}) + .await; + } + + let mut children = self.directory_cache.readdir(LoadedAddr(ino)).await; + children.sort_unstable_by(|(a, _), (b, _)| a.cmp(b)); + + let mut entries = Vec::with_capacity(children.len()); + for (name, dvalue) in &children { + if let Some(inode) = self.inode_table.get(&dvalue.ino.0).await { + entries.push(FsDirEntry { + ino: inode.addr, + name: name.clone(), + itype: inode.itype, + }); + } + } + + self.readdir_buf = entries; + Ok(&self.readdir_buf) } - /// Get cached file attributes for an inode. #[instrument(name = "CompositeFs::delegated_getattr", skip(self))] - pub async fn delegated_getattr(&self, ino: Inode) -> Result { - self.icache.get_attr(ino).await.ok_or_else(|| { - warn!(ino, "getattr on unknown inode"); - GetAttrError::InodeNotFound - }) + pub async fn delegated_getattr(&self, ino: InodeAddr) -> Result { + self.inode_table + .get(&ino) + .await + .ok_or(GetAttrError::InodeNotFound) + } + + #[expect(dead_code, reason = "will be needed by future callers")] + #[must_use] + pub fn delegated_statfs(&self) -> AsyncFsStats { + AsyncFsStats { + block_size: self.block_size, + total_blocks: 0, + free_blocks: 0, + available_blocks: 0, + total_inodes: self.inode_table.len() as u64, + free_inodes: 0, + max_filename_length: 255, + } } - /// Find slot, forward inode, delegate to inner, allocate outer file handle. #[instrument(name = "CompositeFs::delegated_open", skip(self))] pub async fn delegated_open( &mut self, - ino: Inode, + ino: InodeAddr, flags: OpenFlags, - ) -> Result { - let idx = self.slot_for_inode(ino).ok_or_else(|| { - warn!(ino, "open on inode not belonging to any child"); - OpenError::InodeNotFound - })?; + ) -> Result { + let idx = self + .inode_to_slot + .get(&ino) + .copied() + .ok_or(OpenError::InodeNotFound)?; let inner_ino = self.slots[idx] .bridge - .forward_or_insert_inode(ino, || unreachable!("open: ino should be mapped")); - let inner_open = self.slots[idx].inner.open(inner_ino, flags).await?; - let outer_fh = self.alloc_fh(idx, inner_open.handle); - trace!( - ino, + .forward(ino) + .ok_or(OpenError::InodeNotFound)?; + let inner_fh = self.slots[idx].inner.open(inner_ino, flags).await?; + + let outer_fh = self.next_fh.fetch_add(1, Ordering::Relaxed); + self.open_files.insert( outer_fh, - inner_fh = inner_open.handle, - "open: assigned file handle" + OpenFileEntry { + slot_idx: idx, + inner_ino, + inner_fh, + }, ); - Ok(OpenFile { - handle: outer_fh, - options: inner_open.options, - }) + + trace!(ino, outer_fh, inner_fh, "open: assigned fh"); + Ok(outer_fh) } - /// Find slot, forward inode and file handle, delegate read to inner. - #[expect(clippy::too_many_arguments, reason = "mirrors fuser read API")] #[instrument(name = "CompositeFs::delegated_read", skip(self))] pub async fn delegated_read( &mut self, - ino: Inode, fh: FileHandle, offset: u64, size: u32, - flags: OpenFlags, - lock_owner: Option, ) -> Result { - let idx = self.slot_for_inode(ino).ok_or_else(|| { - warn!(ino, "read on inode not belonging to any child"); - ReadError::InodeNotFound - })?; - let inner_ino = self.slots[idx] - .bridge - .forward_or_insert_inode(ino, || unreachable!("read: ino should be mapped")); - let inner_fh = self.slots[idx].bridge.fh_forward(fh).ok_or_else(|| { - warn!(fh, "read: no fh mapping found"); - ReadError::FileNotOpen - })?; - self.slots[idx] + let entry = self.open_files.get(&fh).ok_or(ReadError::FileNotOpen)?; + let slot_idx = entry.slot_idx; + let inner_ino = entry.inner_ino; + let inner_fh = entry.inner_fh; + self.slots[slot_idx] .inner - .read(inner_ino, inner_fh, offset, size, flags, lock_owner) + .read(inner_ino, inner_fh, offset, size) .await } - /// Find slot, forward inode and file handle, delegate release to inner, - /// then clean up the file handle mapping. #[instrument(name = "CompositeFs::delegated_release", skip(self))] - pub async fn delegated_release( - &mut self, - ino: Inode, - fh: FileHandle, - flags: OpenFlags, - flush: bool, - ) -> Result<(), ReleaseError> { - let idx = self.slot_for_inode(ino).ok_or_else(|| { - warn!(ino, "release on inode not belonging to any child"); - ReleaseError::FileNotOpen - })?; - let inner_ino = self.slots[idx] - .bridge - .forward_or_insert_inode(ino, || unreachable!("release: ino should be mapped")); - let inner_fh = self.slots[idx].bridge.fh_forward(fh).ok_or_else(|| { - warn!(fh, "release: no fh mapping found"); - ReleaseError::FileNotOpen - })?; - let result = self.slots[idx] + pub async fn delegated_release(&mut self, fh: FileHandle) -> Result<(), ReleaseError> { + let entry = self + .open_files + .remove(&fh) + .ok_or(ReleaseError::FileNotOpen)?; + let result = self.slots[entry.slot_idx] .inner - .release(inner_ino, inner_fh, flags, flush) + .release(entry.inner_ino, entry.inner_fh) .await; - self.slots[idx].bridge.remove_fh_by_left(fh); - trace!(ino, fh, "release: cleaned up fh mapping"); + trace!(fh, "release: cleaned up fh mapping"); result } - /// Propagate forget to the inner filesystem, evict from icache, and clean - /// up bridge mappings. Returns `true` if the inode was evicted. + /// Returns `true` if the inode was evicted. /// - /// Child-root inodes (those in `child_inodes`) do NOT propagate forget to - /// the inner filesystem: the inner root's `rc=1` is an initialization - /// invariant unrelated to outer FUSE lookup counts. Propagating would - /// evict the inner root, breaking all subsequent operations on that child. + /// The composite only manages its own refcounts and inode table. + /// Inner filesystem inodes are managed by the inner FS itself through + /// its own lifecycle; the composite does not propagate forget to children. + #[expect(dead_code, reason = "will be needed by future callers")] #[must_use] #[instrument(name = "CompositeFs::delegated_forget", skip(self))] - pub async fn delegated_forget(&mut self, ino: Inode, nlookups: u64) -> bool { - let slot_idx = self.slot_for_inode(ino); - let is_child_root = self.child_inodes.contains_key(&ino); - if !is_child_root - && let Some(idx) = slot_idx - && let Some(&inner_ino) = self.slots[idx].bridge.inode_map_get_by_left(ino) - { - self.slots[idx].inner.forget(inner_ino, nlookups).await; - } - if self.icache.forget(ino, nlookups).await.is_some() { - self.child_inodes.remove(&ino); - self.inode_to_slot.remove(&ino); - if let Some(idx) = slot_idx { - self.slots[idx].bridge.remove_inode_by_left(ino); + pub fn delegated_forget(&mut self, ino: InodeAddr, nlookups: u64) -> bool { + let slot_idx = self.inode_to_slot.get(&ino).copied(); + + if let Some(rc) = self.refcounts.get_mut(&ino) { + *rc = rc.saturating_sub(nlookups); + if *rc > 0 { + return false; } - true + self.refcounts.remove(&ino); } else { - false + return false; } - } - - /// Return filesystem statistics from the icache. - #[must_use] - pub fn delegated_statfs(&self) -> FilesystemStats { - self.icache.statfs() - } - - /// Delegation branch for lookup when the parent is owned by a child slot. - #[instrument(name = "CompositeFs::delegated_lookup", skip(self, name))] - pub async fn delegated_lookup( - &mut self, - parent: Inode, - name: &OsStr, - ) -> Result { - let idx = self - .slot_for_inode(parent) - .ok_or(LookupError::InodeNotFound)?; - let inner_parent = self.slots[idx] - .bridge - .forward_or_insert_inode(parent, || unreachable!("lookup: parent should be mapped")); - let inner_attr = self.slots[idx].inner.lookup(inner_parent, name).await?; - let inner_ino = inner_attr.common().ino; - let outer_ino = self.translate_inner_ino(idx, inner_ino, parent, name).await; - let outer_attr = self.slots[idx].bridge.attr_backward(inner_attr); - self.icache.cache_attr(outer_ino, outer_attr).await; - // None means the entry was concurrently evicted; fail the lookup so - // the kernel doesn't hold a ref the cache no longer tracks. - let rc = self - .icache - .inc_rc(outer_ino) - .await - .ok_or(LookupError::InodeNotFound)?; - trace!(outer_ino, inner_ino, rc, "lookup: resolved via delegation"); - Ok(outer_attr) - } - /// Delegation branch for readdir when the inode is owned by a child slot. - #[instrument(name = "CompositeFs::delegated_readdir", skip(self))] - pub async fn delegated_readdir(&mut self, ino: Inode) -> Result<&[DirEntry], ReadDirError> { - let idx = self - .slot_for_inode(ino) - .ok_or(ReadDirError::InodeNotFound)?; - let inner_ino = self.slots[idx] - .bridge - .forward_or_insert_inode(ino, || unreachable!("readdir: ino should be mapped")); - let inner_entries = self.slots[idx].inner.readdir(inner_ino).await?; - let inner_entries: Vec = inner_entries.to_vec(); - let evicted = self.icache.evict_zero_rc_children(ino).await; - for evicted_ino in evicted { - if let Some(slot) = self.inode_to_slot.remove(&evicted_ino) { - self.slots[slot].bridge.remove_inode_by_left(evicted_ino); - } - self.child_inodes.remove(&evicted_ino); + self.inode_table.remove_sync(&ino); + self.child_inodes.remove(&ino); + self.inode_to_slot.remove(&ino); + if let Some(idx) = slot_idx { + self.slots[idx].bridge.remove_by_outer(ino); } - let mut outer_entries = Vec::with_capacity(inner_entries.len()); - for entry in &inner_entries { - let outer_child_ino = self - .translate_inner_ino(idx, entry.ino, ino, &entry.name) - .await; - if let Some(inner_attr) = self.slots[idx].inner.peek_attr(entry.ino).await { - let outer_attr = self.slots[idx].bridge.attr_backward(inner_attr); - self.icache.cache_attr(outer_child_ino, outer_attr).await; - } - outer_entries.push(DirEntry { - ino: outer_child_ino, - name: entry.name.clone(), - kind: entry.kind, - }); - } - self.readdir_buf = outer_entries; - Ok(&self.readdir_buf) + + true } } diff --git a/src/fs/mescloud/icache.rs b/src/fs/mescloud/icache.rs deleted file mode 100644 index 15f1f5d..0000000 --- a/src/fs/mescloud/icache.rs +++ /dev/null @@ -1,437 +0,0 @@ -//! Mescloud-specific inode control block, helpers, and directory cache wrapper. - -use std::ffi::OsStr; -use std::time::SystemTime; - -use crate::fs::icache::{AsyncICache, IcbLike, IcbResolver, InodeFactory}; -use crate::fs::r#trait::{ - CommonFileAttr, DirEntryType, FileAttr, FilesystemStats, Inode, Permissions, -}; - -/// Inode control block for mescloud filesystem layers. -#[derive(Clone)] -pub struct InodeControlBlock { - pub parent: Option, - pub rc: u64, - pub path: std::path::PathBuf, - /// Cached file attributes from the last lookup. - pub attr: Option, - /// Cached directory children from the resolver (directories only). - pub children: Option>, -} - -impl IcbLike for InodeControlBlock { - fn new_root(path: std::path::PathBuf) -> Self { - Self { - rc: 1, - parent: None, - path, - attr: None, - children: None, - } - } - - fn rc(&self) -> u64 { - self.rc - } - - fn rc_mut(&mut self) -> &mut u64 { - &mut self.rc - } - - fn needs_resolve(&self) -> bool { - match self.attr { - None => true, - Some(FileAttr::Directory { .. }) => self.children.is_none(), - Some(_) => false, - } - } -} - -/// Calculate the number of blocks needed for a given size. -pub fn blocks_of_size(block_size: u32, size: u64) -> u64 { - size.div_ceil(u64::from(block_size)) -} - -/// Free function -- usable by both `MescloudICache` and resolvers. -pub fn make_common_file_attr( - ino: Inode, - perm: u16, - atime: SystemTime, - mtime: SystemTime, - fs_owner: (u32, u32), - block_size: u32, -) -> CommonFileAttr { - CommonFileAttr { - ino, - atime, - mtime, - ctime: SystemTime::UNIX_EPOCH, - crtime: SystemTime::UNIX_EPOCH, - perm: Permissions::from_bits_truncate(perm), - nlink: 1, - uid: fs_owner.0, - gid: fs_owner.1, - blksize: block_size, - } -} - -/// Mescloud-specific directory cache wrapper over `AsyncICache`. -pub struct MescloudICache> { - inner: AsyncICache, - inode_factory: InodeFactory, - fs_owner: (u32, u32), - block_size: u32, -} - -impl> MescloudICache { - /// Create a new `MescloudICache`. Initializes root ICB (rc=1), caches root dir attr. - pub fn new(resolver: R, root_ino: Inode, fs_owner: (u32, u32), block_size: u32) -> Self { - let cache = Self { - inner: AsyncICache::new(resolver, root_ino, "/"), - inode_factory: InodeFactory::new(root_ino + 1), - fs_owner, - block_size, - }; - - // Set root directory attr synchronously during initialization - let now = SystemTime::now(); - let root_attr = FileAttr::Directory { - common: make_common_file_attr(root_ino, 0o755, now, now, fs_owner, block_size), - }; - cache.inner.get_icb_mut_sync(root_ino, |icb| { - icb.attr = Some(root_attr); - }); - - cache - } - - // -- Delegated from AsyncICache (async) -- - - pub fn contains(&self, ino: Inode) -> bool { - self.inner.contains(ino) - } - - pub async fn get_icb( - &self, - ino: Inode, - // `Sync` required: see comment on `AsyncICache::get_icb`. - f: impl Fn(&InodeControlBlock) -> T + Send + Sync, - ) -> Option { - self.inner.get_icb(ino, f).await - } - - pub async fn insert_icb(&self, ino: Inode, icb: InodeControlBlock) { - self.inner.insert_icb(ino, icb).await; - } - - pub async fn entry_or_insert_icb( - &self, - ino: Inode, - factory: impl FnOnce() -> InodeControlBlock, - then: impl FnOnce(&mut InodeControlBlock) -> T, - ) -> T { - self.inner.entry_or_insert_icb(ino, factory, then).await - } - - pub async fn inc_rc(&self, ino: Inode) -> Option { - self.inner.inc_rc(ino).await - } - - pub async fn forget(&self, ino: Inode, nlookups: u64) -> Option { - self.inner.forget(ino, nlookups).await - } - - pub async fn get_or_resolve( - &self, - ino: Inode, - then: impl FnOnce(&InodeControlBlock) -> T, - ) -> Result { - self.inner.get_or_resolve(ino, then).await - } - - // -- Domain-specific -- - - /// Allocate a new inode number. - pub fn allocate_inode(&self) -> Inode { - self.inode_factory.allocate() - } - - pub async fn get_attr(&self, ino: Inode) -> Option { - self.inner.get_icb(ino, |icb| icb.attr).await.flatten() - } - - pub async fn cache_attr(&self, ino: Inode, attr: FileAttr) { - self.inner - .get_icb_mut(ino, |icb| { - icb.attr = Some(attr); - }) - .await; - } - - pub fn fs_owner(&self) -> (u32, u32) { - self.fs_owner - } - - pub fn block_size(&self) -> u32 { - self.block_size - } - - pub fn statfs(&self) -> FilesystemStats { - FilesystemStats { - block_size: self.block_size, - fragment_size: u64::from(self.block_size), - total_blocks: 0, - free_blocks: 0, - available_blocks: 0, - total_inodes: self.inner.inode_count() as u64, - free_inodes: 0, - available_inodes: 0, - filesystem_id: 0, - mount_flags: 0, - max_filename_length: 255, - } - } - - /// Evict all `Available` children of `parent` that have `rc == 0`. - /// Returns the list of evicted inode numbers so callers can clean up - /// associated state (e.g., bridge mappings, slot tracking). - pub async fn evict_zero_rc_children(&self, parent: Inode) -> Vec { - let mut to_evict = Vec::new(); - self.inner - .for_each(|&ino, icb| { - if icb.rc == 0 && icb.parent == Some(parent) { - to_evict.push(ino); - } - }) - .await; - let mut evicted = Vec::new(); - for ino in to_evict { - if self.inner.forget(ino, 0).await.is_some() { - evicted.push(ino); - } - } - evicted - } - - /// Find an existing child by (parent, name) or allocate a new inode. - /// If new, inserts a stub ICB (parent+path set, attr=None, children=None, rc=0). - /// Does NOT bump rc. Returns the inode number. - /// - /// # Safety invariant - /// - /// The `for_each` scan and `insert_icb` are **not** atomic. If two callers - /// race with the same `(parent, name)`, both may allocate distinct inodes - /// for the same logical child. This is currently safe because all callers - /// go through `&mut self` on the owning `Fs` implementation. - pub async fn ensure_child_ino(&self, parent: Inode, name: &OsStr) -> Inode { - // Search for existing child by parent + name - let mut existing_ino = None; - self.inner - .for_each(|&ino, icb| { - if icb.parent == Some(parent) && icb.path.as_os_str() == name { - existing_ino = Some(ino); - } - }) - .await; - - if let Some(ino) = existing_ino { - return ino; - } - - // Allocate new inode and insert stub - let ino = self.inode_factory.allocate(); - self.inner - .insert_icb( - ino, - InodeControlBlock { - rc: 0, - path: name.into(), - parent: Some(parent), - attr: None, - children: None, - }, - ) - .await; - ino - } -} - -#[cfg(test)] -mod tests { - use std::future::Future; - - use super::*; - use crate::fs::icache::async_cache::AsyncICache; - use crate::fs::r#trait::DirEntryType; - - fn dummy_dir_attr(ino: Inode) -> FileAttr { - let now = SystemTime::now(); - FileAttr::Directory { - common: make_common_file_attr(ino, 0o755, now, now, (0, 0), 4096), - } - } - - fn dummy_file_attr(ino: Inode) -> FileAttr { - let now = SystemTime::now(); - FileAttr::RegularFile { - common: make_common_file_attr(ino, 0o644, now, now, (0, 0), 4096), - size: 100, - blocks: 1, - } - } - - #[test] - fn needs_resolve_stub_returns_true() { - let icb = InodeControlBlock { - parent: Some(1), - rc: 0, - path: "stub".into(), - attr: None, - children: None, - }; - assert!(icb.needs_resolve()); - } - - #[test] - fn needs_resolve_file_with_attr_returns_false() { - let icb = InodeControlBlock { - parent: Some(1), - rc: 1, - path: "file.txt".into(), - attr: Some(dummy_file_attr(2)), - children: None, - }; - assert!(!icb.needs_resolve()); - } - - #[test] - fn needs_resolve_dir_without_children_returns_true() { - let icb = InodeControlBlock { - parent: Some(1), - rc: 1, - path: "dir".into(), - attr: Some(dummy_dir_attr(3)), - children: None, - }; - assert!(icb.needs_resolve()); - } - - #[test] - fn needs_resolve_dir_with_children_returns_false() { - let icb = InodeControlBlock { - parent: Some(1), - rc: 1, - path: "dir".into(), - attr: Some(dummy_dir_attr(3)), - children: Some(vec![("README.md".to_owned(), DirEntryType::RegularFile)]), - }; - assert!(!icb.needs_resolve()); - } - - #[test] - fn needs_resolve_dir_with_empty_children_returns_false() { - let icb = InodeControlBlock { - parent: Some(1), - rc: 1, - path: "empty-dir".into(), - attr: Some(dummy_dir_attr(4)), - children: Some(vec![]), - }; - assert!(!icb.needs_resolve()); - } - - struct NoOpResolver; - - impl IcbResolver for NoOpResolver { - type Icb = InodeControlBlock; - type Error = std::convert::Infallible; - - #[expect( - clippy::manual_async_fn, - reason = "must match IcbResolver trait signature" - )] - fn resolve( - &self, - _ino: Inode, - _stub: Option, - _cache: &AsyncICache, - ) -> impl Future> + Send { - async { unreachable!("NoOpResolver should not be called") } - } - } - - fn test_mescloud_cache() -> MescloudICache { - MescloudICache::new(NoOpResolver, 1, (0, 0), 4096) - } - - #[tokio::test] - async fn evict_zero_rc_children_removes_stubs() { - let cache = test_mescloud_cache(); - - // Insert stubs as children of root (ino=1) with rc=0 - cache - .insert_icb( - 10, - InodeControlBlock { - rc: 0, - path: "child_a".into(), - parent: Some(1), - attr: None, - children: None, - }, - ) - .await; - cache - .insert_icb( - 11, - InodeControlBlock { - rc: 0, - path: "child_b".into(), - parent: Some(1), - attr: None, - children: None, - }, - ) - .await; - - // Insert a child with rc > 0 — should survive - cache - .insert_icb( - 12, - InodeControlBlock { - rc: 1, - path: "active".into(), - parent: Some(1), - attr: None, - children: None, - }, - ) - .await; - - // Insert a stub under a different parent — should survive - cache - .insert_icb( - 20, - InodeControlBlock { - rc: 0, - path: "other".into(), - parent: Some(12), - attr: None, - children: None, - }, - ) - .await; - - let evicted = cache.evict_zero_rc_children(1).await; - assert_eq!(evicted.len(), 2, "should evict 2 zero-rc children of root"); - - assert!(!cache.contains(10), "child_a should be evicted"); - assert!(!cache.contains(11), "child_b should be evicted"); - assert!(cache.contains(12), "active child should survive"); - assert!( - cache.contains(20), - "child of different parent should survive" - ); - } -} diff --git a/src/fs/mescloud/mod.rs b/src/fs/mescloud/mod.rs index 1a3cce8..db0319c 100644 --- a/src/fs/mescloud/mod.rs +++ b/src/fs/mescloud/mod.rs @@ -1,24 +1,22 @@ -use std::collections::HashMap; -use std::ffi::OsStr; +use std::ffi::{OsStr, OsString}; use std::future::Future; +use std::sync::Arc; use std::time::SystemTime; use bytes::Bytes; +use git_fs::fs::{FileHandle, INode, INodeType, InodeAddr, InodePerms, OpenFlags}; use mesa_dev::MesaClient; use opentelemetry::propagation::Injector; use secrecy::ExposeSecret as _; -use tracing::{Instrument as _, instrument, trace, warn}; +use tracing::{instrument, trace, warn}; use tracing_opentelemetry::OpenTelemetrySpanExt as _; use crate::app_config::CacheConfig; -use crate::fs::icache::bridge::HashMapBridge; -use crate::fs::icache::{AsyncICache, FileTable, IcbResolver}; -use crate::fs::r#trait::{ - DirEntry, DirEntryType, FileAttr, FileHandle, FilesystemStats, Fs, Inode, LockOwner, OpenFile, - OpenFlags, -}; -use composite::{ChildSlot, CompositeFs}; +pub use common::FsDirEntry; +use composite::CompositeFs; + +pub use common::{GetAttrError, LookupError, OpenError, ReadDirError, ReadError, ReleaseError}; #[cfg(feature = "staging")] const MESA_API_BASE_URL: &str = "https://staging.depot.mesa.dev/api/v1"; @@ -27,17 +25,11 @@ const MESA_API_BASE_URL: &str = "https://depot.mesa.dev/api/v1"; mod common; mod composite; -use common::InodeControlBlock; -pub use common::{GetAttrError, LookupError, OpenError, ReadDirError, ReadError, ReleaseError}; - -use icache as mescloud_icache; -use icache::MescloudICache; mod org; pub use org::OrgConfig; use org::OrgFs; -pub mod icache; pub mod repo; struct HeaderInjector<'a>(&'a mut reqwest::header::HeaderMap); @@ -89,50 +81,6 @@ fn build_mesa_client(api_key: &str) -> MesaClient { .build() } -struct MesaResolver { - fs_owner: (u32, u32), - block_size: u32, -} - -impl IcbResolver for MesaResolver { - type Icb = InodeControlBlock; - type Error = std::convert::Infallible; - - fn resolve( - &self, - ino: Inode, - stub: Option, - _cache: &AsyncICache, - ) -> impl Future> + Send - where - Self: Sized, - { - let fs_owner = self.fs_owner; - let block_size = self.block_size; - async move { - let stub = stub.unwrap_or_else(|| InodeControlBlock { - parent: None, - path: "/".into(), - rc: 0, - attr: None, - children: None, - }); - let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, 0o755, now, now, fs_owner, block_size, - ), - }; - Ok(InodeControlBlock { - attr: Some(attr), - children: Some(vec![]), - ..stub - }) - } - .instrument(tracing::info_span!("MesaResolver::resolve", ino)) - } -} - /// Classifies an inode by its role in the mesa hierarchy. enum InodeRole { /// The filesystem root (ino == 1). @@ -146,11 +94,11 @@ enum InodeRole { /// Composes multiple [`OrgFs`] instances, each with its own inode namespace, /// delegating to [`CompositeFs`] for inode/fh translation at each boundary. pub struct MesaFS { - composite: CompositeFs, + composite: CompositeFs, } impl MesaFS { - const ROOT_NODE_INO: Inode = 1; + const ROOT_NODE_INO: InodeAddr = CompositeFs::::ROOT_INO; const BLOCK_SIZE: u32 = 4096; /// Create a new `MesaFS` instance. @@ -160,38 +108,17 @@ impl MesaFS { fs_owner: (u32, u32), cache: &CacheConfig, ) -> Self { - let resolver = MesaResolver { - fs_owner, - block_size: Self::BLOCK_SIZE, - }; - Self { - composite: CompositeFs { - icache: MescloudICache::new( - resolver, - Self::ROOT_NODE_INO, - fs_owner, - Self::BLOCK_SIZE, - ), - file_table: FileTable::new(), - readdir_buf: Vec::new(), - child_inodes: HashMap::new(), - inode_to_slot: HashMap::new(), - slots: orgs - .map(|org_conf| { - let client = build_mesa_client(org_conf.api_key.expose_secret()); - let org = OrgFs::new(org_conf.name, client, fs_owner, cache.clone()); - ChildSlot { - inner: org, - bridge: HashMapBridge::new(), - } - }) - .collect(), - }, + let mut composite = CompositeFs::new(fs_owner, Self::BLOCK_SIZE); + for org_conf in orgs { + let client = build_mesa_client(org_conf.api_key.expose_secret()); + let org = OrgFs::new(org_conf.name, client, fs_owner, cache.clone()); + composite.add_child(org, OrgFs::ROOT_INO); } + Self { composite } } /// Classify an inode by its role. - fn inode_role(&self, ino: Inode) -> Option { + fn inode_role(&self, ino: InodeAddr) -> Option { if ino == Self::ROOT_NODE_INO { return Some(InodeRole::Root); } @@ -205,10 +132,8 @@ impl MesaFS { } /// Ensure a mesa-level inode exists for the org at `org_idx`. - /// Seeds the bridge with (`mesa_org_ino`, `OrgFs::ROOT_INO`). /// Does NOT bump rc. - async fn ensure_org_inode(&mut self, org_idx: usize) -> (Inode, FileAttr) { - // Check if an inode already exists. + async fn ensure_org_inode(&mut self, org_idx: usize) -> (InodeAddr, INode) { let existing_ino = self .composite .child_inodes @@ -217,104 +142,62 @@ impl MesaFS { .map(|(&ino, _)| ino); if let Some(existing_ino) = existing_ino { - if let Some(attr) = self.composite.icache.get_attr(existing_ino).await { - let rc = self - .composite - .icache - .get_icb(existing_ino, |icb| icb.rc) - .await - .unwrap_or(0); + if let Ok(inode) = self.composite.delegated_getattr(existing_ino).await { trace!( ino = existing_ino, - org_idx, rc, "ensure_org_inode: reusing existing inode" - ); - return (existing_ino, attr); - } - if self.composite.icache.contains(existing_ino) { - // ICB exists but attr missing — rebuild and cache. - warn!( - ino = existing_ino, - org_idx, "ensure_org_inode: attr missing, rebuilding" + org_idx, "ensure_org_inode: reusing existing inode" ); - let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - existing_ino, - 0o755, - now, - now, - self.composite.icache.fs_owner(), - self.composite.icache.block_size(), - ), - }; - self.composite.icache.cache_attr(existing_ino, attr).await; - return (existing_ino, attr); + return (existing_ino, inode); } - // ICB was evicted — clean up stale tracking entries. warn!( ino = existing_ino, - org_idx, "ensure_org_inode: ICB evicted, cleaning up stale entry" + org_idx, "ensure_org_inode: evicted, rebuilding" ); - self.composite.child_inodes.remove(&existing_ino); - self.composite.inode_to_slot.remove(&existing_ino); + let now = SystemTime::now(); + let inode = INode { + addr: existing_ino, + permissions: InodePerms::from_bits_truncate(0o755), + uid: self.composite.fs_owner().0, + gid: self.composite.fs_owner().1, + create_time: now, + last_modified_at: now, + parent: Some(Self::ROOT_NODE_INO), + size: 0, + itype: INodeType::Directory, + }; + self.composite.cache_inode(inode); + self.composite.inode_to_slot.insert(existing_ino, org_idx); + self.composite.child_inodes.insert(existing_ino, org_idx); + return (existing_ino, inode); } - // Allocate new. + warn!( + org_idx, + "ensure_org_inode: no child_inodes entry for org slot" + ); let org_name = self.composite.slots[org_idx].inner.name().to_owned(); - let ino = self.composite.icache.allocate_inode(); - trace!(ino, org_idx, org = %org_name, "ensure_org_inode: allocated new inode"); - + let ino = self.composite.allocate_inode(); let now = SystemTime::now(); - self.composite - .icache - .insert_icb( - ino, - InodeControlBlock { - rc: 0, - path: org_name.as_str().into(), - parent: Some(Self::ROOT_NODE_INO), - attr: None, - children: None, - }, - ) - .await; - + let inode = INode { + addr: ino, + permissions: InodePerms::from_bits_truncate(0o755), + uid: self.composite.fs_owner().0, + gid: self.composite.fs_owner().1, + create_time: now, + last_modified_at: now, + parent: Some(Self::ROOT_NODE_INO), + size: 0, + itype: INodeType::Directory, + }; + self.composite.cache_inode(inode); self.composite.child_inodes.insert(ino, org_idx); self.composite.inode_to_slot.insert(ino, org_idx); - - // Reset bridge (may have stale mappings from a previous eviction cycle) - // and seed: mesa org-root <-> OrgFs::ROOT_INO. - self.composite.slots[org_idx].bridge = HashMapBridge::new(); - self.composite.slots[org_idx] - .bridge - .insert_inode(ino, OrgFs::ROOT_INO); - - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, - 0o755, - now, - now, - self.composite.icache.fs_owner(), - self.composite.icache.block_size(), - ), - }; - self.composite.icache.cache_attr(ino, attr).await; - (ino, attr) + trace!(ino, org_idx, org = %org_name, "ensure_org_inode: allocated new inode"); + (ino, inode) } -} - -#[async_trait::async_trait] -impl Fs for MesaFS { - type LookupError = LookupError; - type GetAttrError = GetAttrError; - type OpenError = OpenError; - type ReadError = ReadError; - type ReaddirError = ReadDirError; - type ReleaseError = ReleaseError; #[instrument(name = "MesaFS::lookup", skip(self))] - async fn lookup(&mut self, parent: Inode, name: &OsStr) -> Result { + pub async fn lookup(&mut self, parent: InodeAddr, name: &OsStr) -> Result { let role = self.inode_role(parent).ok_or(LookupError::InodeNotFound)?; match role { InodeRole::Root => { @@ -327,31 +210,23 @@ impl Fs for MesaFS { .ok_or(LookupError::InodeNotFound)?; trace!(org = org_name, "lookup: matched org"); - let (ino, attr) = self.ensure_org_inode(org_idx).await; - let rc = self - .composite - .icache + let (ino, inode) = self.ensure_org_inode(org_idx).await; + self.composite .inc_rc(ino) - .await .ok_or(LookupError::InodeNotFound)?; - trace!(ino, org = org_name, rc, "lookup: resolved org inode"); - Ok(attr) + Ok(inode) } InodeRole::OrgOwned => self.composite.delegated_lookup(parent, name).await, } } #[instrument(name = "MesaFS::getattr", skip(self))] - async fn getattr( - &mut self, - ino: Inode, - _fh: Option, - ) -> Result { + pub async fn getattr(&self, ino: InodeAddr) -> Result { self.composite.delegated_getattr(ino).await } #[instrument(name = "MesaFS::readdir", skip(self))] - async fn readdir(&mut self, ino: Inode) -> Result<&[DirEntry], ReadDirError> { + pub async fn readdir(&mut self, ino: InodeAddr) -> Result<&[FsDirEntry], ReadDirError> { let role = self.inode_role(ino).ok_or(ReadDirError::InodeNotFound)?; match role { InodeRole::Root => { @@ -365,11 +240,11 @@ impl Fs for MesaFS { let mut entries = Vec::with_capacity(org_info.len()); for (org_idx, name) in &org_info { - let (org_ino, _) = self.ensure_org_inode(*org_idx).await; - entries.push(DirEntry { - ino: org_ino, + let (entry_ino, _) = self.ensure_org_inode(*org_idx).await; + entries.push(FsDirEntry { + ino: entry_ino, name: name.clone().into(), - kind: DirEntryType::Directory, + itype: INodeType::Directory, }); } @@ -382,45 +257,169 @@ impl Fs for MesaFS { } #[instrument(name = "MesaFS::open", skip(self))] - async fn open(&mut self, ino: Inode, flags: OpenFlags) -> Result { + pub async fn open( + &mut self, + ino: InodeAddr, + flags: OpenFlags, + ) -> Result { self.composite.delegated_open(ino, flags).await } #[instrument(name = "MesaFS::read", skip(self))] - async fn read( + pub async fn read( &mut self, - ino: Inode, fh: FileHandle, offset: u64, size: u32, - flags: OpenFlags, - lock_owner: Option, ) -> Result { - self.composite - .delegated_read(ino, fh, offset, size, flags, lock_owner) - .await + self.composite.delegated_read(fh, offset, size).await } #[instrument(name = "MesaFS::release", skip(self))] - async fn release( - &mut self, - ino: Inode, - fh: FileHandle, - flags: OpenFlags, - flush: bool, - ) -> Result<(), ReleaseError> { - self.composite - .delegated_release(ino, fh, flags, flush) - .await + pub async fn release(&mut self, fh: FileHandle) -> Result<(), ReleaseError> { + self.composite.delegated_release(fh).await } +} - #[instrument(name = "MesaFS::forget", skip(self))] - async fn forget(&mut self, ino: Inode, nlookups: u64) { - // MesaFS has no extra state to clean up on eviction (unlike OrgFs::owner_inodes). - let _ = self.composite.delegated_forget(ino, nlookups).await; +/// A file reader that delegates reads to `MesaFS` through a shared mutex. +/// +/// When dropped, spawns an async task to release the internal file handle. +pub struct MesaFsReader { + inner: Arc>, + fh: FileHandle, + handle: tokio::runtime::Handle, +} + +impl git_fs::fs::async_fs::FileReader for MesaFsReader { + fn read( + &self, + offset: u64, + size: u32, + ) -> impl Future> + Send { + let inner = Arc::clone(&self.inner); + let fh = self.fh; + async move { + let mut guard = inner.lock().await; + guard + .read(fh, offset, size) + .await + .map_err(|e| std::io::Error::other(e.to_string())) + } + } +} + +impl Drop for MesaFsReader { + fn drop(&mut self) { + let inner = Arc::clone(&self.inner); + let fh = self.fh; + self.handle.spawn(async move { + let mut guard = inner.lock().await; + let _ = guard.release(fh).await; + }); + } +} + +/// A [`FsDataProvider`](git_fs::fs::async_fs::FsDataProvider) that wraps +/// `MesaFS` behind a shared mutex. +#[derive(Clone)] +pub struct MesaFsProvider { + inner: Arc>, + handle: tokio::runtime::Handle, +} + +impl MesaFsProvider { + /// Create a new provider wrapping the given `MesaFS`. + pub fn new(mesa_fs: MesaFS, handle: tokio::runtime::Handle) -> Self { + Self { + inner: Arc::new(tokio::sync::Mutex::new(mesa_fs)), + handle, + } + } +} + +fn lookup_error_to_io(e: LookupError) -> std::io::Error { + match e { + LookupError::InodeNotFound => std::io::Error::from_raw_os_error(libc::ENOENT), + LookupError::RemoteMesaError(api) => std::io::Error::other(api.to_string()), + } +} + +fn readdir_error_to_io(e: ReadDirError) -> std::io::Error { + match e { + ReadDirError::InodeNotFound => std::io::Error::from_raw_os_error(libc::ENOENT), + ReadDirError::NotADirectory => std::io::Error::from_raw_os_error(libc::ENOTDIR), + ReadDirError::NotPermitted => std::io::Error::from_raw_os_error(libc::EPERM), + ReadDirError::RemoteMesaError(api) => std::io::Error::other(api.to_string()), } +} - async fn statfs(&mut self) -> Result { - Ok(self.composite.delegated_statfs()) +fn open_error_to_io(e: OpenError) -> std::io::Error { + match e { + OpenError::InodeNotFound => std::io::Error::from_raw_os_error(libc::ENOENT), + } +} + +impl git_fs::fs::async_fs::FsDataProvider for MesaFsProvider { + type Reader = MesaFsReader; + + fn lookup( + &self, + parent: INode, + name: &OsStr, + ) -> impl Future> + Send { + let inner = Arc::clone(&self.inner); + let name = name.to_os_string(); + async move { + let mut guard = inner.lock().await; + guard + .lookup(parent.addr, &name) + .await + .map_err(lookup_error_to_io) + } + } + + fn readdir( + &self, + parent: INode, + ) -> impl Future, std::io::Error>> + Send { + let inner = Arc::clone(&self.inner); + async move { + let mut guard = inner.lock().await; + let dir_entries: Vec<(OsString, InodeAddr)> = { + let entries = guard + .readdir(parent.addr) + .await + .map_err(readdir_error_to_io)?; + entries.iter().map(|e| (e.name.clone(), e.ino)).collect() + }; + let mut result = Vec::with_capacity(dir_entries.len()); + for (name, ino) in dir_entries { + if let Ok(inode) = guard.getattr(ino).await { + result.push((name, inode)); + } + } + Ok(result) + } + } + + fn open( + &self, + inode: INode, + flags: OpenFlags, + ) -> impl Future> + Send { + let inner = Arc::clone(&self.inner); + let handle = self.handle.clone(); + async move { + let mut guard = inner.lock().await; + let fh = guard + .open(inode.addr, flags) + .await + .map_err(open_error_to_io)?; + Ok(MesaFsReader { + inner: Arc::clone(&inner), + fh, + handle, + }) + } } } diff --git a/src/fs/mescloud/org.rs b/src/fs/mescloud/org.rs index 1f3b8b5..5d3f34f 100644 --- a/src/fs/mescloud/org.rs +++ b/src/fs/mescloud/org.rs @@ -1,73 +1,19 @@ use std::collections::HashMap; -use std::ffi::OsStr; -use std::future::Future; +use std::ffi::{OsStr, OsString}; use std::time::SystemTime; use bytes::Bytes; use futures::TryStreamExt as _; +use git_fs::fs::{FileHandle, INode, INodeType, InodeAddr, InodePerms, OpenFlags}; use mesa_dev::MesaClient; use secrecy::SecretString; -use tracing::{Instrument as _, instrument, trace, warn}; - -pub use super::common::{ - GetAttrError, LookupError, OpenError, ReadDirError, ReadError, ReleaseError, -}; -use super::common::{InodeControlBlock, MesaApiError}; -use super::composite::{ChildSlot, CompositeFs}; -use super::icache as mescloud_icache; -use super::icache::MescloudICache; +use tracing::{instrument, trace, warn}; + +use super::common::{ChildFs, MesaApiError}; +pub use super::common::{LookupError, OpenError, ReadDirError, ReadError, ReleaseError}; +use super::composite::CompositeFs; use super::repo::RepoFs; use crate::app_config::CacheConfig; -use crate::fs::icache::bridge::HashMapBridge; -use crate::fs::icache::{AsyncICache, FileTable, IcbResolver}; -use crate::fs::r#trait::{ - DirEntry, DirEntryType, FileAttr, FileHandle, FilesystemStats, Fs, Inode, LockOwner, OpenFile, - OpenFlags, -}; - -pub(super) struct OrgResolver { - fs_owner: (u32, u32), - block_size: u32, -} - -impl IcbResolver for OrgResolver { - type Icb = InodeControlBlock; - type Error = LookupError; - - fn resolve( - &self, - ino: Inode, - stub: Option, - _cache: &AsyncICache, - ) -> impl Future> + Send - where - Self: Sized, - { - let fs_owner = self.fs_owner; - let block_size = self.block_size; - async move { - let stub = stub.unwrap_or_else(|| InodeControlBlock { - parent: None, - path: "/".into(), - rc: 0, - attr: None, - children: None, - }); - let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, 0o755, now, now, fs_owner, block_size, - ), - }; - Ok(InodeControlBlock { - attr: Some(attr), - children: Some(vec![]), - ..stub - }) - } - .instrument(tracing::info_span!("OrgResolver::resolve", ino)) - } -} #[derive(Debug, Clone)] pub struct OrgConfig { @@ -81,7 +27,7 @@ enum InodeRole { OrgRoot, /// A virtual owner directory (github only). OwnerDir, - /// An inode owned by some repo. + /// An inode owned by some repo (either a child-root or delegated). RepoOwned, } @@ -92,14 +38,14 @@ enum InodeRole { pub struct OrgFs { name: String, client: MesaClient, - composite: CompositeFs, + composite: CompositeFs, /// Maps org-level owner-dir inodes to owner name (github only). - owner_inodes: HashMap, + owner_inodes: HashMap, cache_config: CacheConfig, } impl OrgFs { - pub(crate) const ROOT_INO: Inode = 1; + pub(crate) const ROOT_INO: InodeAddr = CompositeFs::::ROOT_INO; const BLOCK_SIZE: u32 = 4096; /// The name of the organization. @@ -123,31 +69,14 @@ impl OrgFs { /// Ensure an inode exists for a virtual owner directory (github only). Does NOT bump rc. /// TODO(MES-674): Cleanup "special" casing for github. - async fn ensure_owner_inode(&mut self, owner: &str) -> (Inode, FileAttr) { + async fn ensure_owner_inode(&mut self, owner: &str) -> (InodeAddr, INode) { // Check existing let mut stale_ino = None; for (&ino, existing_owner) in &self.owner_inodes { if existing_owner == owner { - if let Some(attr) = self.composite.icache.get_attr(ino).await { - return (ino, attr); - } - if self.composite.icache.contains(ino) { - // ICB exists but attr missing — rebuild and cache - let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, - 0o755, - now, - now, - self.composite.icache.fs_owner(), - self.composite.icache.block_size(), - ), - }; - self.composite.icache.cache_attr(ino, attr).await; - return (ino, attr); + if let Ok(inode) = self.composite.delegated_getattr(ino).await { + return (ino, inode); } - // ICB was evicted — mark for cleanup stale_ino = Some(ino); break; } @@ -156,35 +85,22 @@ impl OrgFs { self.owner_inodes.remove(&ino); } - // Allocate new - let ino = self.composite.icache.allocate_inode(); + let ino = self.composite.allocate_inode(); let now = SystemTime::now(); - self.composite - .icache - .insert_icb( - ino, - InodeControlBlock { - rc: 0, - path: owner.into(), - parent: Some(Self::ROOT_INO), - attr: None, - children: None, - }, - ) - .await; - self.owner_inodes.insert(ino, owner.to_owned()); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, - 0o755, - now, - now, - self.composite.icache.fs_owner(), - self.composite.icache.block_size(), - ), + let inode = INode { + addr: ino, + permissions: InodePerms::from_bits_truncate(0o755), + uid: self.composite.fs_owner().0, + gid: self.composite.fs_owner().1, + create_time: now, + last_modified_at: now, + parent: Some(Self::ROOT_INO), + size: 0, + itype: INodeType::Directory, }; - self.composite.icache.cache_attr(ino, attr).await; - (ino, attr) + self.composite.cache_inode_and_init_rc(inode); + self.owner_inodes.insert(ino, owner.to_owned()); + (ino, inode) } #[must_use] @@ -194,28 +110,17 @@ impl OrgFs { fs_owner: (u32, u32), cache_config: CacheConfig, ) -> Self { - let resolver = OrgResolver { - fs_owner, - block_size: Self::BLOCK_SIZE, - }; Self { name, client, - composite: CompositeFs { - icache: MescloudICache::new(resolver, Self::ROOT_INO, fs_owner, Self::BLOCK_SIZE), - file_table: FileTable::new(), - readdir_buf: Vec::new(), - child_inodes: HashMap::new(), - inode_to_slot: HashMap::new(), - slots: Vec::new(), - }, + composite: CompositeFs::new(fs_owner, Self::BLOCK_SIZE), owner_inodes: HashMap::new(), cache_config, } } /// Classify an inode by its role. - fn inode_role(&self, ino: Inode) -> Option { + fn inode_role(&self, ino: InodeAddr) -> Option { if ino == Self::ROOT_INO { return Some(InodeRole::OrgRoot); } @@ -242,144 +147,92 @@ impl OrgFs { repo_name: &str, display_name: &str, default_branch: &str, - parent_ino: Inode, - ) -> (Inode, FileAttr) { + parent_ino: InodeAddr, + ) -> (InodeAddr, INode) { // Check existing repos. for (&ino, &idx) in &self.composite.child_inodes { if self.composite.slots[idx].inner.repo_name() == repo_name { - if let Some(attr) = self.composite.icache.get_attr(ino).await { - let rc = self - .composite - .icache - .get_icb(ino, |icb| icb.rc) - .await - .unwrap_or(0); - trace!(ino, repo = repo_name, rc, "ensure_repo_inode: reusing"); - return (ino, attr); + if let Ok(inode) = self.composite.delegated_getattr(ino).await { + trace!(ino, repo = repo_name, "ensure_repo_inode: reusing"); + return (ino, inode); } warn!( ino, repo = repo_name, "ensure_repo_inode: attr missing, rebuilding" ); - return self.make_repo_dir_attr(ino).await; + return self.make_repo_dir_inode(ino); } } - // Check for orphaned slot (slot exists but not in child_inodes). - if let Some(idx) = self - .composite - .slots - .iter() - .position(|s| s.inner.repo_name() == repo_name) - { - return self.register_repo_slot(idx, display_name, parent_ino).await; - } - - // Allocate truly new slot. - let ino = self.composite.icache.allocate_inode(); - trace!( - ino, - repo = repo_name, - "ensure_repo_inode: allocated new inode" - ); - - self.composite - .icache - .insert_icb( - ino, - InodeControlBlock { - rc: 0, - path: display_name.into(), - parent: Some(parent_ino), - attr: None, - children: None, - }, - ) - .await; - + // Create new RepoFs and register as child. let repo = RepoFs::new( self.client.clone(), self.name.clone(), repo_name.to_owned(), default_branch.to_owned(), - self.composite.icache.fs_owner(), - // TODO(markovejnovic): Unnecessary clone. Refactoring for clearer ownership semantics - // would be ideal. + self.composite.fs_owner(), self.cache_config.clone(), ) .await; - let mut bridge = HashMapBridge::new(); - bridge.insert_inode(ino, RepoFs::ROOT_INO); - - let idx = self.composite.slots.len(); - self.composite.slots.push(ChildSlot { - inner: repo, - bridge, - }); - self.composite.child_inodes.insert(ino, idx); - self.composite.inode_to_slot.insert(ino, idx); - - self.make_repo_dir_attr(ino).await - } - - /// Allocate a new inode, register it in an existing (orphaned) slot, and - /// return `(ino, attr)`. - async fn register_repo_slot( - &mut self, - idx: usize, - display_name: &str, - parent_ino: Inode, - ) -> (Inode, FileAttr) { - let ino = self.composite.icache.allocate_inode(); - trace!(ino, idx, "register_repo_slot: reusing orphaned slot"); + let outer_ino = self + .composite + .add_child_with_parent(repo, RepoFs::ROOT_INO, parent_ino); + trace!( + ino = outer_ino, + repo = repo_name, + "ensure_repo_inode: allocated new inode" + ); + // Register in directory cache so readdir sees it. self.composite - .icache - .insert_icb( - ino, - InodeControlBlock { - rc: 0, - path: display_name.into(), - parent: Some(parent_ino), - attr: None, - children: None, - }, + .directory_cache + .insert( + git_fs::fs::LoadedAddr(parent_ino), + OsString::from(display_name), + git_fs::fs::LoadedAddr(outer_ino), + true, ) .await; - warn!( - ino, - idx, - "register_repo_slot: resetting bridge for orphaned slot; \ - inner filesystem will not receive forget for stale inode mappings" - ); - self.composite.slots[idx].bridge = HashMapBridge::new(); - self.composite.slots[idx] - .bridge - .insert_inode(ino, RepoFs::ROOT_INO); - self.composite.child_inodes.insert(ino, idx); - self.composite.inode_to_slot.insert(ino, idx); - - self.make_repo_dir_attr(ino).await + let inode = self + .composite + .delegated_getattr(outer_ino) + .await + .unwrap_or_else(|_| { + let now = SystemTime::now(); + INode { + addr: outer_ino, + permissions: InodePerms::from_bits_truncate(0o755), + uid: self.composite.fs_owner().0, + gid: self.composite.fs_owner().1, + create_time: now, + last_modified_at: now, + parent: Some(parent_ino), + size: 0, + itype: INodeType::Directory, + } + }); + (outer_ino, inode) } - /// Build and cache a directory attr for `ino`, returning `(ino, attr)`. - async fn make_repo_dir_attr(&self, ino: Inode) -> (Inode, FileAttr) { + /// Build a directory inode for `ino`, returning `(ino, inode)`. + fn make_repo_dir_inode(&self, ino: InodeAddr) -> (InodeAddr, INode) { let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, - 0o755, - now, - now, - self.composite.icache.fs_owner(), - self.composite.icache.block_size(), - ), + let inode = INode { + addr: ino, + permissions: InodePerms::from_bits_truncate(0o755), + uid: self.composite.fs_owner().0, + gid: self.composite.fs_owner().1, + create_time: now, + last_modified_at: now, + parent: None, + size: 0, + itype: INodeType::Directory, }; - self.composite.icache.cache_attr(ino, attr).await; - (ino, attr) + self.composite.cache_inode(inode); + (ino, inode) } /// Fetch a repo by name via the API. @@ -398,62 +251,36 @@ impl OrgFs { } #[async_trait::async_trait] -impl super::common::InodeCachePeek for OrgFs { - async fn peek_attr(&self, ino: Inode) -> Option { - self.composite.icache.get_attr(ino).await - } -} - -#[async_trait::async_trait] -impl Fs for OrgFs { - type LookupError = LookupError; - type GetAttrError = GetAttrError; - type OpenError = OpenError; - type ReadError = ReadError; - type ReaddirError = ReadDirError; - type ReleaseError = ReleaseError; - +impl ChildFs for OrgFs { #[instrument(name = "OrgFs::lookup", skip(self), fields(org = %self.name))] - async fn lookup(&mut self, parent: Inode, name: &OsStr) -> Result { + async fn lookup(&mut self, parent: InodeAddr, name: &OsStr) -> Result { let role = self.inode_role(parent).ok_or(LookupError::InodeNotFound)?; match role { InodeRole::OrgRoot => { - // TODO(MES-674): Cleanup "special" casing for github. let name_str = name.to_str().ok_or(LookupError::InodeNotFound)?; if self.is_github() { - // name is an owner like "torvalds" — create lazily, no API validation. trace!(owner = name_str, "lookup: resolving github owner dir"); - let (ino, attr) = self.ensure_owner_inode(name_str).await; + let (ino, inode) = self.ensure_owner_inode(name_str).await; self.composite - .icache .inc_rc(ino) - .await .ok_or(LookupError::InodeNotFound)?; - Ok(attr) + Ok(inode) } else { - // Children of org root are repos. trace!(repo = name_str, "lookup: resolving repo"); - - // Validate repo exists via API. let repo = self.wait_for_sync(name_str).await?; - - let (ino, attr) = self + let (ino, inode) = self .ensure_repo_inode(name_str, name_str, &repo.default_branch, Self::ROOT_INO) .await; let rc = self .composite - .icache .inc_rc(ino) - .await .ok_or(LookupError::InodeNotFound)?; trace!(ino, repo = name_str, rc, "lookup: resolved repo inode"); - Ok(attr) + Ok(inode) } } InodeRole::OwnerDir => { - // TODO(MES-674): Cleanup "special" casing for github. - // Parent is an owner dir, name is a repo like "linux". let owner = self .owner_inodes .get(&parent) @@ -464,49 +291,32 @@ impl Fs for OrgFs { let encoded = Self::encode_github_repo_name(&full_decoded); trace!( - owner = %owner, - repo = repo_name_str, - encoded = %encoded, + owner = %owner, repo = repo_name_str, encoded = %encoded, "lookup: resolving github repo via owner dir" ); - // Validate via API (uses encoded name). let repo = self.wait_for_sync(&encoded).await?; - - let (ino, attr) = self + let (ino, inode) = self .ensure_repo_inode(&encoded, repo_name_str, &repo.default_branch, parent) .await; self.composite - .icache .inc_rc(ino) - .await .ok_or(LookupError::InodeNotFound)?; - Ok(attr) + Ok(inode) } InodeRole::RepoOwned => self.composite.delegated_lookup(parent, name).await, } } - #[instrument(name = "OrgFs::getattr", skip(self), fields(org = %self.name))] - async fn getattr( - &mut self, - ino: Inode, - _fh: Option, - ) -> Result { - self.composite.delegated_getattr(ino).await - } - #[instrument(name = "OrgFs::readdir", skip(self), fields(org = %self.name))] - async fn readdir(&mut self, ino: Inode) -> Result<&[DirEntry], ReadDirError> { + async fn readdir(&mut self, ino: InodeAddr) -> Result, ReadDirError> { let role = self.inode_role(ino).ok_or(ReadDirError::InodeNotFound)?; match role { InodeRole::OrgRoot => { - // TODO(MES-674): Cleanup "special" casing for github. if self.is_github() { return Err(ReadDirError::NotPermitted); } - // List repos via API. let repos: Vec = self .client .org(&self.name) @@ -528,70 +338,59 @@ impl Fs for OrgFs { let mut entries = Vec::with_capacity(repo_infos.len()); for (repo_name, default_branch) in &repo_infos { - let (repo_ino, _) = self + let (_, inode) = self .ensure_repo_inode(repo_name, repo_name, default_branch, Self::ROOT_INO) .await; - entries.push(DirEntry { - ino: repo_ino, - name: repo_name.clone().into(), - kind: DirEntryType::Directory, - }); + entries.push((OsString::from(repo_name), inode)); } - self.composite.readdir_buf = entries; - Ok(&self.composite.readdir_buf) - } - InodeRole::OwnerDir if self.is_github() => { - // TODO(MES-674): Cleanup "special" casing for github. - Err(ReadDirError::NotPermitted) + Ok(entries) } + InodeRole::OwnerDir if self.is_github() => Err(ReadDirError::NotPermitted), InodeRole::OwnerDir => Err(ReadDirError::NotADirectory), - InodeRole::RepoOwned => self.composite.delegated_readdir(ino).await, + InodeRole::RepoOwned => { + let (uid, gid) = self.composite.fs_owner(); + let inner_entries = self.composite.delegated_readdir(ino).await?; + let entries = inner_entries + .iter() + .map(|e| { + let inode = INode { + addr: e.ino, + permissions: InodePerms::from_bits_truncate(0o755), + uid, + gid, + create_time: SystemTime::now(), + last_modified_at: SystemTime::now(), + parent: Some(ino), + size: 0, + itype: e.itype, + }; + (e.name.clone(), inode) + }) + .collect(); + Ok(entries) + } } } #[instrument(name = "OrgFs::open", skip(self), fields(org = %self.name))] - async fn open(&mut self, ino: Inode, flags: OpenFlags) -> Result { + async fn open(&mut self, ino: InodeAddr, flags: OpenFlags) -> Result { self.composite.delegated_open(ino, flags).await } #[instrument(name = "OrgFs::read", skip(self), fields(org = %self.name))] async fn read( &mut self, - ino: Inode, + _ino: InodeAddr, fh: FileHandle, offset: u64, size: u32, - flags: OpenFlags, - lock_owner: Option, ) -> Result { - self.composite - .delegated_read(ino, fh, offset, size, flags, lock_owner) - .await + self.composite.delegated_read(fh, offset, size).await } #[instrument(name = "OrgFs::release", skip(self), fields(org = %self.name))] - async fn release( - &mut self, - ino: Inode, - fh: FileHandle, - flags: OpenFlags, - flush: bool, - ) -> Result<(), ReleaseError> { - self.composite - .delegated_release(ino, fh, flags, flush) - .await - } - - #[instrument(name = "OrgFs::forget", skip(self), fields(org = %self.name))] - async fn forget(&mut self, ino: Inode, nlookups: u64) { - let evicted = self.composite.delegated_forget(ino, nlookups).await; - if evicted { - self.owner_inodes.remove(&ino); - } - } - - async fn statfs(&mut self) -> Result { - Ok(self.composite.delegated_statfs()) + async fn release(&mut self, _ino: InodeAddr, fh: FileHandle) -> Result<(), ReleaseError> { + self.composite.delegated_release(fh).await } } diff --git a/src/fs/mescloud/repo.rs b/src/fs/mescloud/repo.rs index 11b334a..acff3d0 100644 --- a/src/fs/mescloud/repo.rs +++ b/src/fs/mescloud/repo.rs @@ -2,197 +2,436 @@ //! //! This module directly accesses the mesa repo through the Rust SDK, on a per-repo basis. +use std::collections::HashMap; +use std::ffi::OsString; use std::future::Future; -use std::{collections::HashMap, ffi::OsStr, path::PathBuf, time::SystemTime}; +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::SystemTime; +use std::{ffi::OsStr, path::PathBuf}; use base64::Engine as _; use bytes::Bytes; use mesa_dev::MesaClient; use mesa_dev::low_level::content::{Content, DirEntry as MesaDirEntry}; use num_traits::cast::ToPrimitive as _; -use tracing::{Instrument as _, instrument, trace, warn}; +use tracing::warn; use git_fs::cache::fcache::FileCache; use git_fs::cache::traits::{AsyncReadableCache as _, AsyncWritableCache as _}; +use git_fs::fs::async_fs::{FileReader, FsDataProvider}; +use git_fs::fs::{ + INode, INodeType, InodeAddr, InodePerms, LoadedAddr, OpenFlags as AsyncOpenFlags, +}; use crate::app_config::CacheConfig; -use crate::fs::icache::{AsyncICache, FileTable, IcbResolver}; -use crate::fs::r#trait::{ - DirEntry, DirEntryType, FileAttr, FileHandle, FileOpenOptions, FilesystemStats, Fs, Inode, - LockOwner, OpenFile, OpenFlags, -}; use super::common::MesaApiError; -pub use super::common::{ - GetAttrError, LookupError, OpenError, ReadDirError, ReadError, ReleaseError, -}; -use super::icache as mescloud_icache; -use super::icache::{InodeControlBlock, MescloudICache}; +pub use super::common::{LookupError, OpenError, ReadDirError, ReadError, ReleaseError}; + +fn mesa_api_error_to_io(e: MesaApiError) -> std::io::Error { + match &e { + MesaApiError::Response { status, .. } if *status == 404 => { + std::io::Error::from_raw_os_error(libc::ENOENT) + } + MesaApiError::Reqwest(_) + | MesaApiError::ReqwestMiddleware(_) + | MesaApiError::Serde(_) + | MesaApiError::SerdePath(_) + | MesaApiError::Io(_) + | MesaApiError::Response { .. } => std::io::Error::other(e), + } +} -pub(super) struct RepoResolver { +#[derive(Clone)] +pub(super) struct MesRepoProvider { + inner: Arc, +} + +struct MesRepoProviderInner { client: MesaClient, org_name: String, repo_name: String, ref_: String, fs_owner: (u32, u32), - block_size: u32, + next_addr: AtomicU64, + /// Maps inode addresses to repo-relative paths (e.g., "src/main.rs"). + /// Root directory maps to an empty `PathBuf`. + path_map: scc::HashMap, + file_cache: Option>>, +} + +impl MesRepoProvider { + pub(super) fn new( + client: MesaClient, + org_name: String, + repo_name: String, + ref_: String, + fs_owner: (u32, u32), + file_cache: Option>>, + ) -> Self { + Self { + inner: Arc::new(MesRepoProviderInner { + client, + org_name, + repo_name, + ref_, + fs_owner, + next_addr: AtomicU64::new(2), // 1 is reserved for root + path_map: scc::HashMap::new(), + file_cache, + }), + } + } + + /// Store the path for the root inode address. + pub(super) fn seed_root_path(&self, root_addr: InodeAddr) { + // Root maps to empty PathBuf (no path prefix for API calls) + drop(self.inner.path_map.insert_sync(root_addr, PathBuf::new())); + } + + /// Remove the path entry for an inode. Called during forget/cleanup. + #[expect(dead_code, reason = "will be needed when child forget is implemented")] + pub(super) fn remove_path(&self, addr: InodeAddr) { + self.inner.path_map.remove_sync(&addr); + } + + /// The name of the repository. + pub(super) fn repo_name(&self) -> &str { + &self.inner.repo_name + } } -impl IcbResolver for RepoResolver { - type Icb = InodeControlBlock; - type Error = LookupError; +impl FsDataProvider for MesRepoProvider { + type Reader = MesFileReader; + + fn lookup( + &self, + parent: INode, + name: &OsStr, + ) -> impl Future> + Send { + let inner = Arc::clone(&self.inner); + let name = name.to_os_string(); + async move { + let parent_path = inner + .path_map + .get_async(&parent.addr) + .await + .map(|e| e.get().clone()) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let child_path = parent_path.join(&name); + let child_path_str = child_path.to_str().ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "path contains non-UTF-8 characters", + ) + })?; + + let content = inner + .client + .org(&inner.org_name) + .repos() + .at(&inner.repo_name) + .content() + .get(Some(inner.ref_.as_str()), Some(child_path_str), Some(1u64)) + .await + .map_err(MesaApiError::from) + .map_err(mesa_api_error_to_io)?; + + let now = SystemTime::now(); + let (uid, gid) = inner.fs_owner; + + let (itype, size) = match &content { + Content::File(f) => (INodeType::File, f.size.to_u64().unwrap_or(0)), + Content::Symlink(s) => (INodeType::File, s.size.to_u64().unwrap_or(0)), + Content::Dir(_) => (INodeType::Directory, 0), + }; + + let perms = if itype == INodeType::Directory { + InodePerms::from_bits_truncate(0o755) + } else { + InodePerms::from_bits_truncate(0o644) + }; + + let addr = inner.next_addr.fetch_add(1, Ordering::Relaxed); + drop(inner.path_map.insert_async(addr, child_path).await); + + Ok(INode { + addr, + permissions: perms, + uid, + gid, + create_time: now, + last_modified_at: now, + parent: Some(parent.addr), + size, + itype, + }) + } + } + + fn readdir( + &self, + parent: INode, + ) -> impl Future, std::io::Error>> + Send { + let inner = Arc::clone(&self.inner); + async move { + let parent_path = inner + .path_map + .get_async(&parent.addr) + .await + .map(|e| e.get().clone()) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let api_path = if parent_path.as_os_str().is_empty() { + None + } else { + Some( + parent_path + .to_str() + .ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "path contains non-UTF-8 characters", + ) + })? + .to_owned(), + ) + }; + + let content = inner + .client + .org(&inner.org_name) + .repos() + .at(&inner.repo_name) + .content() + .get(Some(inner.ref_.as_str()), api_path.as_deref(), Some(1u64)) + .await + .map_err(MesaApiError::from) + .map_err(mesa_api_error_to_io)?; + + let dir = match content { + Content::Dir(d) => d, + Content::File(_) | Content::Symlink(_) => { + return Err(std::io::Error::from_raw_os_error(libc::ENOTDIR)); + } + }; + + let now = SystemTime::now(); + let (uid, gid) = inner.fs_owner; + let mut entries = Vec::with_capacity(dir.entries.len()); + + for entry in dir.entries { + let (name, itype, size) = match entry { + MesaDirEntry::File(f) => { + let Some(name) = f.name else { continue }; + (name, INodeType::File, f.size.to_u64().unwrap_or(0)) + } + MesaDirEntry::Symlink(s) => { + let Some(name) = s.name else { continue }; + (name, INodeType::File, s.size.to_u64().unwrap_or(0)) + } + MesaDirEntry::Dir(d) => { + let Some(name) = d.name else { continue }; + (name, INodeType::Directory, 0) + } + }; + + let perms = if itype == INodeType::Directory { + InodePerms::from_bits_truncate(0o755) + } else { + InodePerms::from_bits_truncate(0o644) + }; + + let addr = inner.next_addr.fetch_add(1, Ordering::Relaxed); + let child_path = parent_path.join(&name); + drop(inner.path_map.insert_async(addr, child_path).await); + + let inode = INode { + addr, + permissions: perms, + uid, + gid, + create_time: now, + last_modified_at: now, + parent: Some(parent.addr), + size, + itype, + }; + + entries.push((OsString::from(name), inode)); + } + + Ok(entries) + } + } - fn resolve( + fn open( &self, - ino: Inode, - stub: Option, - cache: &AsyncICache, - ) -> impl Future> + Send - where - Self: Sized, - { + inode: INode, + _flags: AsyncOpenFlags, + ) -> impl Future> + Send { + let inner = Arc::clone(&self.inner); + async move { + let path = inner + .path_map + .get_async(&inode.addr) + .await + .map(|e| e.get().clone()) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + Ok(MesFileReader { + client: inner.client.clone(), + org_name: inner.org_name.clone(), + repo_name: inner.repo_name.clone(), + ref_: inner.ref_.clone(), + path, + file_cache: inner.file_cache.clone(), + inode_addr: inode.addr, + }) + } + } +} + +pub(super) struct MesFileReader { + client: MesaClient, + org_name: String, + repo_name: String, + ref_: String, + path: PathBuf, + file_cache: Option>>, + inode_addr: InodeAddr, +} + +impl FileReader for MesFileReader { + fn read( + &self, + offset: u64, + size: u32, + ) -> impl Future> + Send { let client = self.client.clone(); let org_name = self.org_name.clone(); let repo_name = self.repo_name.clone(); let ref_ = self.ref_.clone(); - let fs_owner = self.fs_owner; - let block_size = self.block_size; + let path = self.path.clone(); + let file_cache = self.file_cache.clone(); + let inode_addr = self.inode_addr; async move { - let stub = stub.ok_or(LookupError::InodeNotFound)?; - let file_path = build_repo_path(stub.parent, &stub.path, cache, RepoFs::ROOT_INO).await; - - // Non-root inodes must have a resolvable path. - if stub.parent.is_some() && file_path.is_none() { - return Err(LookupError::InodeNotFound); + // Try the file cache first. + if let Some(cache) = &file_cache + && let Some(data) = cache.get(&inode_addr).await + { + let start = usize::try_from(offset) + .unwrap_or(data.len()) + .min(data.len()); + let end = start.saturating_add(size as usize).min(data.len()); + return Ok(Bytes::copy_from_slice(&data[start..end])); } + // Cache miss -- fetch from the Mesa API. + let path_str = path.to_str().ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "path contains non-UTF-8 characters", + ) + })?; + + let api_path = if path_str.is_empty() { + None + } else { + Some(path_str) + }; + let content = client .org(&org_name) .repos() .at(&repo_name) .content() - .get(Some(ref_.as_str()), file_path.as_deref(), Some(1u64)) + .get(Some(ref_.as_str()), api_path, None) .await - .map_err(MesaApiError::from)?; - - let now = SystemTime::now(); - let attr = match &content { - Content::File(f) => { - let size = f.size.to_u64().unwrap_or(0); - FileAttr::RegularFile { - common: mescloud_icache::make_common_file_attr( - ino, 0o644, now, now, fs_owner, block_size, - ), - size, - blocks: mescloud_icache::blocks_of_size(block_size, size), - } + .map_err(MesaApiError::from) + .map_err(mesa_api_error_to_io)?; + + let encoded_content = match content { + Content::File(f) => f.content.unwrap_or_default(), + Content::Symlink(s) => s.content.unwrap_or_default(), + Content::Dir(_) => { + return Err(std::io::Error::from_raw_os_error(libc::EISDIR)); } - Content::Symlink(s) => { - let size = s.size.to_u64().unwrap_or(0); - FileAttr::RegularFile { - common: mescloud_icache::make_common_file_attr( - ino, 0o644, now, now, fs_owner, block_size, - ), - size, - blocks: mescloud_icache::blocks_of_size(block_size, size), - } - } - Content::Dir(_) => FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, 0o755, now, now, fs_owner, block_size, - ), - }, }; - let children = match content { - Content::Dir(d) => Some( - d.entries - .into_iter() - .filter_map(|e| { - let (name, kind) = match e { - MesaDirEntry::File(f) => (f.name?, DirEntryType::RegularFile), - // TODO(MES-712): return DirEntryType::Symlink once readlink is wired up. - MesaDirEntry::Symlink(s) => (s.name?, DirEntryType::RegularFile), - MesaDirEntry::Dir(d) => (d.name?, DirEntryType::Directory), - }; - Some((name, kind)) - }) - .collect(), - ), - Content::File(_) | Content::Symlink(_) => None, - }; + let decoded = base64::engine::general_purpose::STANDARD + .decode(&encoded_content) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; - Ok(InodeControlBlock { - parent: stub.parent, - path: stub.path, - rc: stub.rc, - attr: Some(attr), - children, - }) + let start = usize::try_from(offset) + .unwrap_or(decoded.len()) + .min(decoded.len()); + let end = start.saturating_add(size as usize).min(decoded.len()); + let result = Bytes::copy_from_slice(&decoded[start..end]); + + // Store the decoded content in the cache for future reads. + if let Some(cache) = &file_cache + && let Err(e) = cache.insert(&inode_addr, decoded).await + { + warn!(error = ?e, inode_addr, "failed to cache file content"); + } + + Ok(result) } - .instrument(tracing::info_span!("RepoResolver::resolve", ino)) } } -/// Walk the parent chain in the cache to build the repo-relative path. -/// Returns `None` for the root inode (maps to `path=None` in the mesa content API). -async fn build_repo_path( - parent: Option, - name: &std::path::Path, - cache: &AsyncICache, - root_ino: Inode, -) -> Option { - /// Maximum parent-chain depth before bailing out. Prevents infinite loops - /// if a bug creates a cycle in the parent pointers. - const MAX_DEPTH: usize = 1024; - - let parent = parent?; - if parent == root_ino { - return name.to_str().map(String::from); +mod repo_fs_inner { + #![allow(clippy::future_not_send, clippy::mem_forget)] + use git_fs::cache::async_backed::FutureBackedCache; + use git_fs::fs::async_fs::AsyncFs; + use git_fs::fs::{INode, InodeAddr}; + use ouroboros::self_referencing; + + use super::MesRepoProvider; + + #[self_referencing] + pub struct RepoFsInner { + pub(super) inode_table: FutureBackedCache, + #[borrows(inode_table)] + #[covariant] + pub(super) fs: AsyncFs<'this, MesRepoProvider>, } - let mut components = vec![name.to_path_buf()]; - let mut current = parent; - for _ in 0..MAX_DEPTH { - if current == root_ino { - break; + impl RepoFsInner { + pub fn create( + inode_table: FutureBackedCache, + provider: MesRepoProvider, + ) -> Self { + RepoFsInnerBuilder { + inode_table, + fs_builder: |tbl| AsyncFs::new_preseeded(provider, tbl), + } + .build() } - let (path, next_parent) = cache - .get_icb(current, |icb| (icb.path.clone(), icb.parent)) - .await?; - components.push(path); - current = next_parent?; } - if current != root_ino { - tracing::warn!("build_repo_path: exceeded MAX_DEPTH={MAX_DEPTH}, possible parent cycle"); - return None; - } - components.reverse(); - let joined: PathBuf = components.iter().collect(); - joined.to_str().map(String::from) } +use repo_fs_inner::RepoFsInner; /// A filesystem rooted at a single mesa repository. /// -/// Implements [`Fs`] for navigating files and directories within one repo. -/// Does not handle organizations or multi-repo hierarchy — that is [`super::MesaFS`]'s job. +/// Wraps [`AsyncFs`] via ouroboros to co-locate the inode table +/// and the filesystem that borrows it. Implements [`Fs`] as a thin adapter. pub struct RepoFs { - client: MesaClient, - org_name: String, - repo_name: String, - ref_: String, - - icache: MescloudICache, - file_table: FileTable, - readdir_buf: Vec, - open_files: HashMap, - file_cache: Option>, + inner: RepoFsInner, + /// Reference counts for inodes held by the kernel. + refcounts: rustc_hash::FxHashMap, + /// Open file handles mapped to readers. + open_files: HashMap>, + /// Provider clone for accessing `repo_name` and `path_map` cleanup. + provider: MesRepoProvider, } impl RepoFs { - pub(crate) const ROOT_INO: Inode = 1; - const BLOCK_SIZE: u32 = 4096; + pub(crate) const ROOT_INO: InodeAddr = 1; /// Create a new `RepoFs` for a specific org and repo. pub async fn new( @@ -203,24 +442,15 @@ impl RepoFs { fs_owner: (u32, u32), cache_config: CacheConfig, ) -> Self { - let resolver = RepoResolver { - client: client.clone(), - org_name: org_name.clone(), - repo_name: repo_name.clone(), - ref_: ref_.clone(), - fs_owner, - block_size: Self::BLOCK_SIZE, - }; - let file_cache = match cache_config.max_size { Some(max_size) if max_size.as_u64() > 0 => { let cache_dir = cache_config.path.join(&org_name).join(&repo_name); let max_bytes = max_size.as_u64().try_into().unwrap_or(usize::MAX); match FileCache::new(&cache_dir, max_bytes).await { - Ok(cache) => Some(cache), + Ok(cache) => Some(Arc::new(cache)), Err(e) => { warn!(error = ?e, org = %org_name, repo = %repo_name, - "failed to create file cache, continuing without caching",); + "failed to create file cache, continuing without caching"); None } } @@ -228,317 +458,140 @@ impl RepoFs { _ => None, }; + let provider = + MesRepoProvider::new(client, org_name, repo_name, ref_, fs_owner, file_cache); + provider.seed_root_path(Self::ROOT_INO); + + let root = INode { + addr: Self::ROOT_INO, + permissions: InodePerms::from_bits_truncate(0o755), + uid: fs_owner.0, + gid: fs_owner.1, + create_time: SystemTime::now(), + last_modified_at: SystemTime::now(), + parent: None, + size: 0, + itype: INodeType::Directory, + }; + + let inode_table = git_fs::cache::async_backed::FutureBackedCache::default(); + inode_table.insert_sync(root.addr, root); + + let inner = RepoFsInner::create(inode_table, provider.clone()); + + let mut refcounts = rustc_hash::FxHashMap::default(); + refcounts.insert(Self::ROOT_INO, 1); + Self { - client, - org_name, - repo_name, - ref_, - icache: MescloudICache::new(resolver, Self::ROOT_INO, fs_owner, Self::BLOCK_SIZE), - file_table: FileTable::new(), - readdir_buf: Vec::new(), + inner, + refcounts, open_files: HashMap::new(), - file_cache, + provider, } } /// The name of the repository this filesystem is rooted at. pub(crate) fn repo_name(&self) -> &str { - &self.repo_name - } - - /// Build the repo-relative path for an inode by walking up the parent chain. - /// - /// Returns `None` for the root inode (the repo top-level maps to `path=None` in the - /// mesa content API). - async fn path_of_inode(&self, ino: Inode) -> Option { - /// Maximum parent-chain depth before bailing out. - const MAX_DEPTH: usize = 1024; - - if ino == Self::ROOT_INO { - return None; - } - - let mut components = Vec::new(); - let mut current = ino; - for _ in 0..MAX_DEPTH { - if current == Self::ROOT_INO { - break; - } - let (path, parent) = self - .icache - .get_icb(current, |icb| (icb.path.clone(), icb.parent)) - .await?; - components.push(path); - current = parent?; - } - if current != Self::ROOT_INO { - tracing::warn!( - ino, - "path_of_inode: exceeded MAX_DEPTH={MAX_DEPTH}, possible parent cycle" - ); - return None; - } - components.reverse(); - let joined: PathBuf = components.iter().collect(); - joined.to_str().map(String::from) + self.provider.repo_name() } } -#[async_trait::async_trait] -impl super::common::InodeCachePeek for RepoFs { - async fn peek_attr(&self, ino: Inode) -> Option { - self.icache.get_attr(ino).await - } +#[expect( + clippy::wildcard_enum_match_arm, + reason = "mapping all ErrorKind variants is impractical; EIO is the sensible default" +)] +fn io_error_to_errno(e: &std::io::Error) -> i32 { + e.raw_os_error().unwrap_or_else(|| match e.kind() { + std::io::ErrorKind::NotFound => libc::ENOENT, + std::io::ErrorKind::PermissionDenied => libc::EACCES, + std::io::ErrorKind::AlreadyExists => libc::EEXIST, + _ => libc::EIO, + }) } #[async_trait::async_trait] -impl Fs for RepoFs { - type LookupError = LookupError; - type GetAttrError = GetAttrError; - type OpenError = OpenError; - type ReadError = ReadError; - type ReaddirError = ReadDirError; - type ReleaseError = ReleaseError; - - #[instrument(name = "RepoFs::lookup", skip(self), fields(repo = %self.repo_name))] - async fn lookup(&mut self, parent: Inode, name: &OsStr) -> Result { - debug_assert!( - self.icache.contains(parent), - "lookup: parent inode {parent} not in inode table" - ); - - let ino = self.icache.ensure_child_ino(parent, name).await; - let attr = self - .icache - .get_or_resolve(ino, |icb| icb.attr) - .await? - .ok_or(LookupError::InodeNotFound)?; - - let rc = self - .icache - .inc_rc(ino) +impl super::common::ChildFs for RepoFs { + async fn lookup(&mut self, parent: InodeAddr, name: &OsStr) -> Result { + let tracked = self + .inner + .borrow_fs() + .lookup(LoadedAddr(parent), name) .await - .ok_or(LookupError::InodeNotFound)?; - trace!(ino, ?name, rc, "resolved inode"); - Ok(attr) - } - - #[instrument(name = "RepoFs::getattr", skip(self), fields(repo = %self.repo_name))] - async fn getattr( - &mut self, - ino: Inode, - _fh: Option, - ) -> Result { - self.icache.get_attr(ino).await.ok_or_else(|| { - warn!(ino, "getattr on unknown inode"); - GetAttrError::InodeNotFound - }) + .map_err(|e| { + if io_error_to_errno(&e) == libc::ENOENT { + LookupError::InodeNotFound + } else { + LookupError::RemoteMesaError(MesaApiError::Io(e)) + } + })?; + *self.refcounts.entry(tracked.inode.addr).or_insert(0) += 1; + Ok(tracked.inode) } - #[instrument(name = "RepoFs::readdir", skip(self), fields(repo = %self.repo_name))] - async fn readdir(&mut self, ino: Inode) -> Result<&[DirEntry], ReadDirError> { - debug_assert!( - self.icache.contains(ino), - "readdir: inode {ino} not in inode table" - ); - debug_assert!( - matches!( - self.icache.get_attr(ino).await, - Some(FileAttr::Directory { .. }) | None - ), - "readdir: inode {ino} has non-directory cached attr" - ); - - let children = self - .icache - .get_or_resolve(ino, |icb| icb.children.clone()) - .await? - .ok_or(ReadDirError::NotADirectory)?; - - trace!( - ino, - count = children.len(), - "readdir: resolved directory listing from icache" - ); - - self.icache.evict_zero_rc_children(ino).await; - - let mut entries = Vec::with_capacity(children.len()); - for (name, kind) in &children { - let child_ino = self.icache.ensure_child_ino(ino, OsStr::new(name)).await; - // Only cache directory attrs in readdir. File attrs are left as - // None so that lookup triggers the resolver to fetch the real file - // size. Caching placeholder file attrs (size=0) would poison - // needs_resolve(), preventing resolution on subsequent lookups. - if *kind == DirEntryType::Directory { - let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - child_ino, - 0o755, - now, - now, - self.icache.fs_owner(), - self.icache.block_size(), - ), - }; - self.icache.cache_attr(child_ino, attr).await; - } - entries.push(DirEntry { - ino: child_ino, - name: name.clone().into(), - kind: *kind, - }); - } - - self.readdir_buf = entries; - Ok(&self.readdir_buf) + async fn readdir(&mut self, ino: InodeAddr) -> Result, ReadDirError> { + let mut entries = Vec::new(); + self.inner + .borrow_fs() + .readdir(LoadedAddr(ino), 0, |de, _offset| { + entries.push((de.name.to_os_string(), de.inode)); + false + }) + .await + .map_err(|e| { + if io_error_to_errno(&e) == libc::ENOTDIR { + ReadDirError::NotADirectory + } else if io_error_to_errno(&e) == libc::ENOENT { + ReadDirError::InodeNotFound + } else { + ReadDirError::RemoteMesaError(MesaApiError::Io(e)) + } + })?; + Ok(entries) } - #[instrument(name = "RepoFs::open", skip(self), fields(repo = %self.repo_name))] - async fn open(&mut self, ino: Inode, _flags: OpenFlags) -> Result { - if !self.icache.contains(ino) { - warn!(ino, "open on unknown inode"); - return Err(OpenError::InodeNotFound); - } - debug_assert!( - matches!( - self.icache.get_attr(ino).await, - Some(FileAttr::RegularFile { .. }) | None - ), - "open: inode {ino} has non-file cached attr" - ); - let fh = self.file_table.allocate(); - self.open_files.insert(fh, ino); - trace!(ino, fh, "assigned file handle"); - Ok(OpenFile { - handle: fh, - options: FileOpenOptions::empty(), - }) + async fn open( + &mut self, + ino: InodeAddr, + flags: AsyncOpenFlags, + ) -> Result { + let open_file = self + .inner + .borrow_fs() + .open(LoadedAddr(ino), flags) + .await + .map_err(|_| OpenError::InodeNotFound)?; + self.open_files + .insert(open_file.fh, Arc::clone(&open_file.reader)); + Ok(open_file.fh) } - #[instrument(name = "RepoFs::read", skip(self), fields(repo = %self.repo_name))] async fn read( &mut self, - ino: Inode, - fh: FileHandle, + _ino: InodeAddr, + fh: git_fs::fs::FileHandle, offset: u64, size: u32, - _flags: OpenFlags, - _lock_owner: Option, ) -> Result { - let &file_ino = self.open_files.get(&fh).ok_or_else(|| { - warn!(fh, "read on unknown file handle"); - ReadError::FileNotOpen - })?; - debug_assert!( - file_ino == ino, - "read: file handle {fh} maps to inode {file_ino}, but caller passed inode {ino}" - ); - debug_assert!( - matches!( - self.icache.get_attr(ino).await, - Some(FileAttr::RegularFile { .. }) | None - ), - "read: inode {ino} has non-file cached attr" - ); - - // Try the file cache first. - if let Some(cache) = &self.file_cache - && let Some(data) = cache.get(&ino).await - { - let start = usize::try_from(offset) - .unwrap_or(data.len()) - .min(data.len()); - let end = start.saturating_add(size as usize).min(data.len()); - trace!( - ino, - fh, - cached = true, - decoded_len = data.len(), - start, - end, - "read content" - ); - return Ok(Bytes::copy_from_slice(&data[start..end])); - } - - // Cache miss — fetch from the Mesa API. - let file_path = self.path_of_inode(ino).await; - - if ino != Self::ROOT_INO && file_path.is_none() { - warn!(ino, "read: path_of_inode returned None for non-root inode"); - return Err(ReadError::InodeNotFound); - } - - let content = self - .client - .org(&self.org_name) - .repos() - .at(&self.repo_name) - .content() - .get(Some(self.ref_.as_str()), file_path.as_deref(), None) - .await - .map_err(MesaApiError::from)?; - - let encoded_content = match content { - Content::File(f) => f.content.unwrap_or_default(), - // TODO(MES-712): return ReadError::NotAFile once symlinks are surfaced as - // DirEntryType::Symlink, and implement readlink to return the link target. - Content::Symlink(s) => s.content.unwrap_or_default(), - Content::Dir(_) => return Err(ReadError::NotAFile), - }; - - let decoded = base64::engine::general_purpose::STANDARD.decode(&encoded_content)?; - - let start = usize::try_from(offset) - .unwrap_or(decoded.len()) - .min(decoded.len()); - let end = start.saturating_add(size as usize).min(decoded.len()); - let result = Bytes::copy_from_slice(&decoded[start..end]); - trace!(ino, fh, cached = false, path = ?file_path, decoded_len = decoded.len(), start, end, "read content"); - - // Store the decoded content in the cache for future reads. - if let Some(cache) = &self.file_cache - && let Err(e) = cache.insert(&ino, decoded).await - { - warn!(error = ?e, ino, "failed to cache file content"); - } - - Ok(result) + let reader = self.open_files.get(&fh).ok_or(ReadError::FileNotOpen)?; + reader.read(offset, size).await.map_err(|e| { + if io_error_to_errno(&e) == libc::EISDIR { + ReadError::NotAFile + } else if io_error_to_errno(&e) == libc::ENOENT { + ReadError::InodeNotFound + } else { + ReadError::RemoteMesaError(MesaApiError::Io(e)) + } + }) } - #[instrument(name = "RepoFs::release", skip(self), fields(repo = %self.repo_name))] async fn release( &mut self, - ino: Inode, - fh: FileHandle, - _flags: OpenFlags, - _flush: bool, + _ino: InodeAddr, + fh: git_fs::fs::FileHandle, ) -> Result<(), ReleaseError> { - let released_ino = self.open_files.remove(&fh).ok_or_else(|| { - warn!(fh, "release on unknown file handle"); - ReleaseError::FileNotOpen - })?; - debug_assert!( - released_ino == ino, - "release: file handle {fh} mapped to inode {released_ino}, but caller passed inode {ino}" - ); - trace!(ino = released_ino, fh, "closed file handle"); + self.open_files + .remove(&fh) + .ok_or(ReleaseError::FileNotOpen)?; Ok(()) } - - #[instrument(name = "RepoFs::forget", skip(self), fields(repo = %self.repo_name))] - async fn forget(&mut self, ino: Inode, nlookups: u64) { - debug_assert!( - self.icache.contains(ino), - "forget: inode {ino} not in inode table" - ); - - self.icache.forget(ino, nlookups).await; - } - - async fn statfs(&mut self) -> Result { - Ok(self.icache.statfs()) - } } diff --git a/src/fs/mod.rs b/src/fs/mod.rs index 003e1b0..a696e56 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -1,4 +1 @@ -pub mod fuser; -pub mod icache; pub mod mescloud; -pub mod r#trait; diff --git a/src/fs/trait.rs b/src/fs/trait.rs deleted file mode 100644 index f4d9852..0000000 --- a/src/fs/trait.rs +++ /dev/null @@ -1,375 +0,0 @@ -//! Generic trait for implementing filesystems. -//! -//! Note that this is a slightly cleaner interface than directly using fuser. The whole point of -//! this is to abstract away fuser-specific details. -use async_trait::async_trait; -use std::{ - ffi::{OsStr, OsString}, - time::{Duration, SystemTime}, -}; -use tracing::error; - -use bitflags::bitflags; -use bytes::Bytes; - -/// Type representing an inode. -pub type Inode = u64; - -pub type FileHandle = u64; - -/// An opaque lock owner identifier provided by the kernel. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct LockOwner(pub u64); - -bitflags! { - #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] - pub struct Permissions: u16 { - // Other - const OTHER_EXECUTE = 1 << 0; - const OTHER_WRITE = 1 << 1; - const OTHER_READ = 1 << 2; - - // Group - const GROUP_EXECUTE = 1 << 3; - const GROUP_WRITE = 1 << 4; - const GROUP_READ = 1 << 5; - - // Owner - const OWNER_EXECUTE = 1 << 6; - const OWNER_WRITE = 1 << 7; - const OWNER_READ = 1 << 8; - - // Special bits - const STICKY = 1 << 9; - const SETGID = 1 << 10; - const SETUID = 1 << 11; - - const OTHER_RWX = Self::OTHER_READ.bits() - | Self::OTHER_WRITE.bits() - | Self::OTHER_EXECUTE.bits(); - const GROUP_RWX = Self::GROUP_READ.bits() - | Self::GROUP_WRITE.bits() - | Self::GROUP_EXECUTE.bits(); - const OWNER_RWX = Self::OWNER_READ.bits() - | Self::OWNER_WRITE.bits() - | Self::OWNER_EXECUTE.bits(); - } -} - -bitflags! { - #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] - pub struct OpenFlags: i32 { - // Access modes (mutually exclusive) - const RDONLY = libc::O_RDONLY; - const WRONLY = libc::O_WRONLY; - const RDWR = libc::O_RDWR; - - // Creation/status flags - const APPEND = libc::O_APPEND; - const TRUNC = libc::O_TRUNC; - const CREAT = libc::O_CREAT; - const EXCL = libc::O_EXCL; - - // Behavior flags - const NONBLOCK = libc::O_NONBLOCK; - const SYNC = libc::O_SYNC; - const DSYNC = libc::O_DSYNC; - const NOFOLLOW = libc::O_NOFOLLOW; - const CLOEXEC = libc::O_CLOEXEC; - const DIRECTORY = libc::O_DIRECTORY; - - #[cfg(target_os = "linux")] - const NOATIME = libc::O_NOATIME; - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct CommonFileAttr { - pub ino: Inode, - pub atime: SystemTime, - pub mtime: SystemTime, - pub ctime: SystemTime, - pub crtime: SystemTime, - pub perm: Permissions, - pub nlink: u32, - pub uid: u32, - pub gid: u32, - pub blksize: u32, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum FileAttr { - RegularFile { - common: CommonFileAttr, - size: u64, - blocks: u64, - }, - Directory { - common: CommonFileAttr, - }, - Symlink { - common: CommonFileAttr, - size: u64, - }, - CharDevice { - common: CommonFileAttr, - rdev: u64, - }, - BlockDevice { - common: CommonFileAttr, - rdev: u64, - }, - NamedPipe { - common: CommonFileAttr, - }, - Socket { - common: CommonFileAttr, - }, -} - -impl FileAttr { - pub fn common(&self) -> &CommonFileAttr { - match self { - Self::RegularFile { common, .. } - | Self::Directory { common } - | Self::Symlink { common, .. } - | Self::CharDevice { common, .. } - | Self::BlockDevice { common, .. } - | Self::NamedPipe { common } - | Self::Socket { common } => common, - } - } -} - -bitflags! { - #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] - pub (crate) struct FileOpenOptions: u32 { - const DIRECT_IO = 1 << 0; - const KEEP_CACHE = 1 << 1; - const NONSEEKABLE = 1 << 2; - const STREAM = 1 << 4; - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct OpenFile { - pub handle: FileHandle, - pub options: FileOpenOptions, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum DirEntryType { - RegularFile, - Directory, - Symlink, - CharDevice, - BlockDevice, - NamedPipe, - Socket, -} - -impl TryFrom for FileAttr { - type Error = (); - - #[expect( - clippy::cast_possible_truncation, - reason = "metadata mode/nlink/blksize narrowing is intentional" - )] - #[expect( - clippy::cast_sign_loss, - reason = "nsecs from MetadataExt is always in [0, 999_999_999]" - )] - fn try_from(meta: std::fs::Metadata) -> Result { - use std::os::unix::fs::FileTypeExt as _; - use std::os::unix::fs::MetadataExt as _; - - fn to_systime(secs: i64, nsecs: i64) -> SystemTime { - if secs >= 0 { - std::time::UNIX_EPOCH + Duration::new(secs.cast_unsigned(), nsecs as u32) - } else { - // nsecs is always in [0, 999_999_999] from MetadataExt. - // For negative secs, subtract whole seconds then add back nsecs. - std::time::UNIX_EPOCH - Duration::from_secs((-secs).cast_unsigned()) - + Duration::from_nanos(nsecs.cast_unsigned()) - } - } - - let common_attr = CommonFileAttr { - ino: meta.ino(), - atime: to_systime(meta.atime(), meta.atime_nsec()), - mtime: to_systime(meta.mtime(), meta.mtime_nsec()), - ctime: to_systime(meta.ctime(), meta.ctime_nsec()), - crtime: to_systime(0, 0), // Not available in std::fs::Metadata - perm: Permissions::from_bits_truncate(meta.mode() as u16), - nlink: meta.nlink() as u32, - uid: meta.uid(), - gid: meta.gid(), - blksize: meta.blksize() as u32, - }; - - let ft = meta.file_type(); - if ft.is_file() { - Ok(Self::RegularFile { - common: common_attr, - size: meta.len(), - blocks: meta.blocks(), - }) - } else if ft.is_dir() { - Ok(Self::Directory { - common: common_attr, - }) - } else if ft.is_symlink() { - Ok(Self::Symlink { - common: common_attr, - size: meta.len(), - }) - } else if ft.is_char_device() { - Ok(Self::CharDevice { - common: common_attr, - rdev: meta.rdev(), - }) - } else if ft.is_block_device() { - Ok(Self::BlockDevice { - common: common_attr, - rdev: meta.rdev(), - }) - } else if ft.is_fifo() { - Ok(Self::NamedPipe { - common: common_attr, - }) - } else if ft.is_socket() { - Ok(Self::Socket { - common: common_attr, - }) - } else { - debug_assert!( - false, - "Unknown file type encountered in FileAttr conversion" - ); - Err(()) - } - } -} - -impl From for DirEntryType { - fn from(attr: FileAttr) -> Self { - match attr { - FileAttr::RegularFile { .. } => Self::RegularFile, - FileAttr::Directory { .. } => Self::Directory, - FileAttr::Symlink { .. } => Self::Symlink, - FileAttr::CharDevice { .. } => Self::CharDevice, - FileAttr::BlockDevice { .. } => Self::BlockDevice, - FileAttr::NamedPipe { .. } => Self::NamedPipe, - FileAttr::Socket { .. } => Self::Socket, - } - } -} - -impl TryFrom for DirEntryType { - type Error = (); - - fn try_from(ft: std::fs::FileType) -> Result { - use std::os::unix::fs::FileTypeExt as _; - - if ft.is_file() { - Ok(Self::RegularFile) - } else if ft.is_dir() { - Ok(Self::Directory) - } else if ft.is_symlink() { - Ok(Self::Symlink) - } else if ft.is_char_device() { - Ok(Self::CharDevice) - } else if ft.is_block_device() { - Ok(Self::BlockDevice) - } else if ft.is_fifo() { - Ok(Self::NamedPipe) - } else if ft.is_socket() { - Ok(Self::Socket) - } else { - debug_assert!( - false, - "Unknown file type encountered in DirEntryType conversion" - ); - error!(ft = ?ft, "Unknown file type encountered in DirEntryType conversion"); - Err(()) - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct DirEntry { - pub ino: Inode, - // TODO(markovejnovic): This OsString is hella expensive - pub name: OsString, - pub kind: DirEntryType, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct FilesystemStats { - pub block_size: u32, - pub fragment_size: u64, - pub total_blocks: u64, - pub free_blocks: u64, - pub available_blocks: u64, - pub total_inodes: u64, - pub free_inodes: u64, - pub available_inodes: u64, - pub filesystem_id: u64, - pub mount_flags: u32, - pub max_filename_length: u32, -} - -#[async_trait] -pub trait Fs { - type LookupError: std::error::Error; - type GetAttrError: std::error::Error; - type OpenError: std::error::Error; - type ReadError: std::error::Error; - type ReaddirError: std::error::Error; - type ReleaseError: std::error::Error; - - /// For each lookup call made by the kernel, it expects the icache to be updated with the - /// returned `FileAttr`. - async fn lookup(&mut self, parent: Inode, name: &OsStr) -> Result; - - /// Can be called in two contexts -- the file is not open (in which case `fh` is `None`), - /// or the file is open (in which case `fh` is `Some`). - async fn getattr( - &mut self, - ino: Inode, - fh: Option, - ) -> Result; - - /// Read the contents of a directory. - async fn readdir(&mut self, ino: Inode) -> Result<&[DirEntry], Self::ReaddirError>; - - /// Open a file for reading. - async fn open(&mut self, ino: Inode, flags: OpenFlags) -> Result; - - /// Read data from an open file. - #[expect(clippy::too_many_arguments, reason = "mirrors fuser read API")] - async fn read( - &mut self, - ino: Inode, - fh: FileHandle, - offset: u64, - size: u32, - flags: OpenFlags, - lock_owner: Option, - ) -> Result; - - /// Called when the kernel closes a file handle. - async fn release( - &mut self, - ino: Inode, - fh: FileHandle, - flags: OpenFlags, - flush: bool, - ) -> Result<(), Self::ReleaseError>; - - /// Called when the kernel is done with an inode. - async fn forget(&mut self, ino: Inode, nlookups: u64); - - /// Get filesystem statistics. - async fn statfs(&mut self) -> Result; -}