From 745f25addb2960a2fc6ff1841b2329925af687a9 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Tue, 17 Feb 2026 20:08:14 -0800 Subject: [PATCH 01/24] Mega Cleanup --- CLAUDE.md | 5 + Cargo.lock | 74 +- Cargo.toml | 1 + lib/cache/async_backed.rs | 391 +++++++++ lib/cache/mod.rs | 2 + lib/drop_ward.rs | 133 +++ lib/fs/async_fs.rs | 432 ++++++++++ lib/fs/dcache.rs | 65 ++ lib/fs/fuser.rs | 425 ++++++++++ lib/fs/mod.rs | 188 +++++ lib/lib.rs | 3 + src/daemon.rs | 25 +- src/fs/fuser.rs | 351 -------- src/fs/icache/async_cache.rs | 1410 -------------------------------- src/fs/icache/bridge.rs | 138 ---- src/fs/icache/file_table.rs | 22 - src/fs/icache/inode_factory.rs | 19 - src/fs/icache/mod.rs | 21 - src/fs/mescloud/common.rs | 106 +-- src/fs/mescloud/composite.rs | 634 ++++++++------ src/fs/mescloud/icache.rs | 437 ---------- src/fs/mescloud/mod.rs | 440 +++++----- src/fs/mescloud/org.rs | 449 +++------- src/fs/mescloud/repo.rs | 903 ++++++++++---------- src/fs/mod.rs | 3 - src/fs/trait.rs | 375 --------- tests/async_fs_correctness.rs | 609 ++++++++++++++ tests/common/async_fs_mocks.rs | 104 +++ tests/common/mod.rs | 4 +- 29 files changed, 3706 insertions(+), 4063 deletions(-) create mode 100644 lib/cache/async_backed.rs create mode 100644 lib/drop_ward.rs create mode 100644 lib/fs/async_fs.rs create mode 100644 lib/fs/dcache.rs create mode 100644 lib/fs/fuser.rs create mode 100644 lib/fs/mod.rs delete mode 100644 src/fs/fuser.rs delete mode 100644 src/fs/icache/async_cache.rs delete mode 100644 src/fs/icache/bridge.rs delete mode 100644 src/fs/icache/file_table.rs delete mode 100644 src/fs/icache/inode_factory.rs delete mode 100644 src/fs/icache/mod.rs delete mode 100644 src/fs/mescloud/icache.rs delete mode 100644 src/fs/trait.rs create mode 100644 tests/async_fs_correctness.rs create mode 100644 tests/common/async_fs_mocks.rs diff --git a/CLAUDE.md b/CLAUDE.md index 9ba3f68b..653c07a6 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -43,6 +43,11 @@ cargo fmt --all && cargo clippy --all-targets --all-features -- -D warnings && c - Channels: `tokio::sync::mpsc` for multi-producer, `tokio::sync::oneshot` for request-response - Never block the async runtime — offload blocking work with `tokio::task::spawn_blocking` +## Testing + +- Avoid writing tests in-line in the same file as production code; use separate `tests/` directory + for tests. + ## Dependencies - Check for existing deps with `cargo tree` before adding new crates diff --git a/Cargo.lock b/Cargo.lock index d4cf1499..1050f46b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "aliasable" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -228,7 +234,7 @@ version = "4.5.55" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" dependencies = [ - "heck", + "heck 0.5.0", "proc-macro2", "quote", "syn", @@ -758,6 +764,7 @@ dependencies = [ "opentelemetry", "opentelemetry-otlp", "opentelemetry_sdk", + "ouroboros", "rand", "reqwest", "reqwest-middleware", @@ -839,6 +846,12 @@ dependencies = [ "hashbrown 0.16.1", ] +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "heck" version = "0.5.0" @@ -1497,6 +1510,30 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "ouroboros" +version = "0.18.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0f050db9c44b97a94723127e6be766ac5c340c48f2c4bb3ffa11713744be59" +dependencies = [ + "aliasable", + "ouroboros_macro", + "static_assertions", +] + +[[package]] +name = "ouroboros_macro" +version = "0.18.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c7028bdd3d43083f6d8d4d5187680d0d3560d54df4cc9d752005268b41e64d0" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "proc-macro2-diagnostics", + "quote", + "syn", +] + [[package]] name = "page_size" version = "0.6.0" @@ -1623,6 +1660,19 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proc-macro2-diagnostics" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "version_check", + "yansi", +] + [[package]] name = "prost" version = "0.13.5" @@ -2312,6 +2362,12 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.11.1" @@ -2865,6 +2921,12 @@ dependencies = [ "rustversion", ] +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "vt100" version = "0.16.2" @@ -3309,7 +3371,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" dependencies = [ "anyhow", - "heck", + "heck 0.5.0", "wit-parser", ] @@ -3320,7 +3382,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" dependencies = [ "anyhow", - "heck", + "heck 0.5.0", "indexmap 2.13.0", "prettyplease", "syn", @@ -3387,6 +3449,12 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + [[package]] name = "yoke" version = "0.8.1" diff --git a/Cargo.toml b/Cargo.toml index d837f7fe..dcf7b555 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,6 +49,7 @@ tracing-indicatif = "0.3.14" opentelemetry = { version = "0.29" } opentelemetry_sdk = { version = "0.29", features = ["rt-tokio"] } opentelemetry-otlp = { version = "0.29", default-features = false, features = ["http-proto", "trace", "reqwest-blocking-client"] } +ouroboros = "0.18" tracing-opentelemetry = { version = "0.30" } hashlink = "0.11.0" diff --git a/lib/cache/async_backed.rs b/lib/cache/async_backed.rs new file mode 100644 index 00000000..c3fddd05 --- /dev/null +++ b/lib/cache/async_backed.rs @@ -0,0 +1,391 @@ +//! Concurrent deduplication cache for async computations. +//! +//! Given a key and an async factory, ensures the factory runs at most once per key. Subsequent +//! callers for the same key await the already-in-flight computation via a [`Shared`] future, +//! avoiding the race conditions inherent in `Notify`-based signalling. +//! +//! Note that this cache does not support automatic eviction. + +use std::panic::AssertUnwindSafe; +use std::{fmt::Debug, future::Future, hash::Hash, pin::Pin}; + +use futures::FutureExt as _; +use futures::future::Shared; + +type SharedFut = Shared> + Send>>>; + +/// Two-state slot: `InFlight` while a factory future is running, then promoted to `Ready` once +/// the future completes. +/// +/// The `InFlight` variant holds a `Shared<..., Output = Option>` where `None` signals that the +/// factory panicked (caught by `catch_unwind`). On `None`, callers remove the entry and retry. +enum Slot { + InFlight(SharedFut), + Ready(V), +} + +/// Deduplicating async cache. +/// +/// If [`get_or_init`](Self::get_or_init) is called concurrently for the same key, only one +/// invocation of the factory runs. All callers receive a clone of the result. +pub struct FutureBackedCache { + map: scc::HashMap>, +} + +impl Default for FutureBackedCache +where + K: Eq + Hash, + V: Clone + Send + 'static, +{ + fn default() -> Self { + Self { + map: scc::HashMap::default(), + } + } +} + +impl FutureBackedCache +where + K: Eq + Hash + Debug + Clone + Send + Sync + 'static, + V: Clone + Send + Sync + 'static, +{ + /// Get the cached value for `key`, or initialize it by running `factory`. + /// + /// If another caller is already computing the value for this key, this awaits the in-flight + /// computation instead of spawning a duplicate. If the factory panics, the entry is removed + /// and the next caller retries with a fresh factory invocation. + /// + /// # Panics + /// + /// Panics if this caller joins an in-flight factory that itself panicked (i.e. the caller + /// lost the race to insert a fresh entry after the poisoned slot was removed). + pub async fn get_or_init(&self, key: K, factory: F) -> V + where + F: FnOnce() -> Fut, + Fut: Future + Send + 'static, + { + // Fast path: value already cached. + let existing = self + .map + .read_async(&key, |_, slot| match slot { + Slot::Ready(v) => Ok(v.clone()), + Slot::InFlight(shared) => Err(shared.clone()), + }) + .await; + + match existing { + Some(Ok(v)) => return v, + Some(Err(shared)) => { + if let Some(v) = self.await_shared(&key, shared).await { + return v; + } + // Factory panicked; entry removed. Fall through to re-insert below. + } + None => {} + } + + // Slow path: use entry_async for atomic check-and-insert. + let shared = match self.map.entry_async(key.clone()).await { + scc::hash_map::Entry::Occupied(occ) => match occ.get() { + Slot::Ready(v) => return v.clone(), + Slot::InFlight(shared) => shared.clone(), + }, + scc::hash_map::Entry::Vacant(vac) => { + let shared = Self::make_shared(factory); + let ret = shared.clone(); + vac.insert_entry(Slot::InFlight(shared)); + ret + } + }; + + if let Some(v) = self.await_shared(&key, shared).await { + return v; + } + + panic!("FutureBackedCache: joined an in-flight factory that panicked for key {key:?}"); + } + + /// Like [`get_or_init`](Self::get_or_init), but for fallible factories. + /// + /// If the factory returns `Ok(v)`, the value is cached and returned. If it returns `Err(e)`, + /// **nothing is cached** and the error is propagated to the caller. + /// + /// Unlike `get_or_init`, concurrent callers are **not** deduplicated — each caller that + /// finds the key absent will invoke the factory independently. However, if a value was + /// previously cached (by either `get_or_init` or a successful `get_or_try_init`), it is + /// returned immediately without calling the factory. + pub async fn get_or_try_init(&self, key: K, factory: F) -> Result + where + F: FnOnce() -> Fut, + Fut: Future> + Send + 'static, + { + // Fast path: value already cached or in-flight from an infallible init. + let existing = self + .map + .read_async(&key, |_, slot| match slot { + Slot::Ready(v) => Ok(v.clone()), + Slot::InFlight(shared) => Err(shared.clone()), + }) + .await; + + match existing { + Some(Ok(v)) => return Ok(v), + Some(Err(shared)) => { + if let Some(v) = self.await_shared(&key, shared).await { + return Ok(v); + } + // Factory panicked; entry was removed. Fall through to run our own factory. + } + None => {} + } + + // Run the fallible factory (not deduplicated). + let val = factory().await?; + + // Attempt to cache. If another caller raced us and already inserted, + // return the existing value and discard ours. + match self.map.entry_async(key).await { + scc::hash_map::Entry::Occupied(occ) => match occ.get() { + Slot::Ready(v) => Ok(v.clone()), + Slot::InFlight(shared) => Ok(self + .await_shared(occ.key(), shared.clone()) + .await + .unwrap_or(val)), + }, + scc::hash_map::Entry::Vacant(vac) => { + vac.insert_entry(Slot::Ready(val.clone())); + Ok(val) + } + } + } + + /// Get the cached value for `key` if it exists. + /// + /// - If the value is `Ready`, returns `Some(v)` immediately. + /// - If the value is `InFlight`, awaits the in-flight computation and returns `Some(v)`. + /// - If the key is absent, returns `None`. + /// - If the in-flight factory panicked, returns `None` (and removes the poisoned entry). + pub async fn get(&self, key: &K) -> Option { + let existing = self + .map + .read_async(key, |_, slot| match slot { + Slot::Ready(v) => Ok(v.clone()), + Slot::InFlight(shared) => Err(shared.clone()), + }) + .await; + + match existing { + Some(Ok(v)) => Some(v), + Some(Err(shared)) => self.await_shared(key, shared).await, + None => None, + } + } + + /// Await a `Shared` future, handle promotion to `Ready`, and handle panic recovery. + /// + /// Returns `Some(v)` on success. Returns `None` if the factory panicked, after removing + /// the poisoned entry from the map. + async fn await_shared(&self, key: &K, shared: SharedFut) -> Option { + let mut guard = PromoteGuard { + map: &self.map, + key, + value: None, + }; + + let result = shared.await; + + if let Some(v) = result { + guard.value = Some(v.clone()); + + self.map + .update_async(key, |_, slot| { + if matches!(slot, Slot::InFlight(_)) { + *slot = Slot::Ready(v.clone()); + } + }) + .await; + + guard.value = None; + Some(v) + } else { + // Factory panicked. Remove the poisoned InFlight entry so the next caller + // can retry. + drop( + self.map + .remove_if_sync(key, |slot| matches!(slot, Slot::InFlight(_))), + ); + None + } + } + + /// Wrap a factory future in `catch_unwind`, producing a `Shared` with `Output = Option`. + fn make_shared(factory: F) -> SharedFut + where + F: FnOnce() -> Fut, + Fut: Future + Send + 'static, + { + let fut = AssertUnwindSafe(factory()).catch_unwind(); + let boxed: Pin> + Send>> = + Box::pin(async move { fut.await.ok() }); + boxed.shared() + } + + /// Returns the number of entries in the cache (both `Ready` and `InFlight`). + #[must_use] + pub fn len(&self) -> usize { + self.map.len() + } + + /// Returns `true` if the cache contains no entries. + #[must_use] + pub fn is_empty(&self) -> bool { + self.map.is_empty() + } + + /// Synchronously insert a value, overwriting any existing entry. + /// + /// Suitable for seeding the cache before async operations begin (e.g. + /// inside an ouroboros builder where async is unavailable). + pub fn insert_sync(&self, key: K, value: V) { + drop(self.map.insert_sync(key, Slot::Ready(value))); + } + + /// Synchronously remove the entry for `key`, returning `true` if it was present. + /// + /// Suitable for use in contexts where async is not available (e.g. inside + /// [`StatelessDrop::delete`](crate::drop_ward::StatelessDrop::delete)). + pub fn remove_sync(&self, key: &K) -> bool { + self.map.remove_sync(key).is_some() + } +} + +/// Drop guard that synchronously promotes an `InFlight` entry to `Ready` if the caller +/// is cancelled between `shared.await` completing and the async promotion running. +/// +/// Set `value = None` to defuse after successful promotion. +struct PromoteGuard<'a, K, V> +where + K: Eq + Hash, + V: Clone + Send + Sync + 'static, +{ + map: &'a scc::HashMap>, + key: &'a K, + value: Option, +} + +impl Drop for PromoteGuard<'_, K, V> +where + K: Eq + Hash, + V: Clone + Send + Sync + 'static, +{ + fn drop(&mut self) { + if let Some(v) = self.value.take() { + self.map.update_sync(self.key, |_, slot| { + if matches!(slot, Slot::InFlight(_)) { + *slot = Slot::Ready(v); + } + }); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn try_init_ok_caches_value() { + let cache = FutureBackedCache::::default(); + let result: Result = cache + .get_or_try_init(1, || async { Ok("hello".to_owned()) }) + .await; + assert_eq!(result.unwrap(), "hello", "should return Ok value"); + + // Value should now be cached (get returns it without factory) + let cached = cache.get(&1).await; + assert_eq!(cached.unwrap(), "hello", "value should be in cache"); + } + + #[tokio::test] + async fn try_init_err_does_not_cache() { + let cache = FutureBackedCache::::default(); + let result: Result = cache.get_or_try_init(1, || async { Err("boom") }).await; + assert_eq!(result.unwrap_err(), "boom", "should return the error"); + + // Cache should be empty — error was not stored + assert!(cache.is_empty(), "cache should have no entries after error"); + assert!(cache.get(&1).await.is_none(), "key should not exist"); + } + + #[tokio::test] + async fn try_init_err_then_retry_ok() { + let cache = FutureBackedCache::::default(); + + // First call: factory fails + let r1: Result = cache.get_or_try_init(1, || async { Err("fail") }).await; + assert!(r1.is_err(), "first call should fail"); + + // Second call: factory succeeds + let r2: Result = cache + .get_or_try_init(1, || async { Ok("recovered".to_owned()) }) + .await; + assert_eq!(r2.unwrap(), "recovered", "retry should succeed"); + + // Value should now be cached + let cached = cache.get(&1).await; + assert_eq!(cached.unwrap(), "recovered"); + } + + #[tokio::test] + async fn try_init_returns_value_cached_by_init() { + let cache = FutureBackedCache::::default(); + + // Populate via infallible get_or_init + cache + .get_or_init(1, || async { "from_init".to_owned() }) + .await; + + // get_or_try_init should return the cached value without running factory + let result: Result = cache + .get_or_try_init(1, || async { panic!("factory should not run") }) + .await; + assert_eq!(result.unwrap(), "from_init"); + } + + #[tokio::test] + async fn panic_in_factory_is_recovered() { + use std::sync::Arc; + use std::sync::atomic::{AtomicUsize, Ordering}; + + let cache = Arc::new(FutureBackedCache::::default()); + let call_count = Arc::new(AtomicUsize::new(0)); + + // Spawn a task whose factory panics. tokio::spawn catches the panic. + let cache2 = Arc::clone(&cache); + let call_count2 = Arc::clone(&call_count); + let handle = tokio::spawn(async move { + cache2 + .get_or_init(1, || { + call_count2.fetch_add(1, Ordering::Relaxed); + async { panic!("boom") } + }) + .await + }); + // The spawned task panics internally; JoinHandle returns Err. + assert!(handle.await.is_err(), "task should have panicked"); + + // The key should NOT be permanently bricked. A new caller should succeed. + let v = cache + .get_or_init(1, || { + call_count.fetch_add(1, Ordering::Relaxed); + async { "recovered".to_owned() } + }) + .await; + assert_eq!(v, "recovered", "should recover after panic"); + assert_eq!( + call_count.load(Ordering::Relaxed), + 2, + "factory called twice" + ); + } +} diff --git a/lib/cache/mod.rs b/lib/cache/mod.rs index e0c1c97f..5c48ee22 100644 --- a/lib/cache/mod.rs +++ b/lib/cache/mod.rs @@ -1,3 +1,5 @@ +/// Async-backed cache implementation. +pub mod async_backed; /// Cache eviction policies. pub mod eviction; /// File-backed cache implementation. diff --git a/lib/drop_ward.rs b/lib/drop_ward.rs new file mode 100644 index 00000000..4922e13c --- /dev/null +++ b/lib/drop_ward.rs @@ -0,0 +1,133 @@ +//! Automatic, type-directed cleanup driven by reference counting. +//! +//! [`DropWard`] tracks how many live references exist for a given key and invokes a cleanup +//! callback when a key's count reaches zero. The cleanup logic is selected at the type level +//! through a zero-sized "tag" type that implements [`StatelessDrop`], keeping the ward itself +//! generic over *what* it manages without storing per-key values. +//! +//! This is designed for resources whose lifecycle is bound to an external context (e.g. GPU device +//! handles, connection pools, graphics pipelines) where Rust's built-in `Drop` cannot be used +//! because cleanup requires access to that context. +//! +//! # Design rationale +//! +//! The tag type `T` is constrained to be zero-sized. It exists only to carry the [`StatelessDrop`] +//! implementation at the type level — no `T` value is ever constructed or stored. This means a +//! single `DropWard` instance adds no per-key overhead beyond the key and its `usize` count. +//! +//! # Example +//! +//! ```ignore +//! struct GpuTextureDrop; +//! +//! impl StatelessDrop for GpuTextureDrop { +//! fn delete(device: &wgpu::Device, _key: &TextureId) { +//! // e.g. flush a deferred-destruction queue +//! device.poll(wgpu::Maintain::Wait); +//! } +//! } +//! +//! let mut ward: DropWard = DropWard::new(device); +//! +//! ward.inc(texture_id); // → 1 +//! ward.inc(texture_id); // → 2 +//! ward.dec(&texture_id); // → Some(1) +//! ward.dec(&texture_id); // → Some(0), calls GpuTextureDrop::delete(&device, &texture_id) +//! ``` + +use std::marker::PhantomData; + +use rustc_hash::FxHashMap; + +/// Type-level hook for cleanup that requires an external context. +/// +/// Implement this on a zero-sized tag type. The tag is never instantiated — it only selects which +/// `delete` implementation a [`DropWard`] will call. +pub trait StatelessDrop { + /// Called exactly once when a key's reference count reaches zero. + /// + /// `ctx` is the shared context owned by the [`DropWard`]. `key` is the key whose count just + /// reached zero. This callback fires synchronously inside [`DropWard::dec`]; avoid blocking or + /// panicking if the ward is used on a hot path. + fn delete(ctx: &Ctx, key: &K); +} + +/// A reference-counted key set that triggers [`StatelessDrop::delete`] on the associated context +/// when any key's count drops to zero. +/// +/// # Type parameters +/// +/// - `Ctx` — shared context passed to `T::delete` (e.g. a device handle). +/// - `K` — the key type being reference-counted. +/// - `T` — a **zero-sized** tag type carrying the cleanup logic. +/// Will fail to compile if `size_of::() != 0`. +/// +/// # Concurrency +/// +/// Not thread-safe. All access requires `&mut self`. Wrap in a `Mutex` or similar if shared across +/// threads. +/// +#[derive(Debug, Clone)] +pub struct DropWard { + map: FxHashMap, + ctx: Ctx, + _marker: PhantomData, +} + +impl DropWard +where + K: Eq + std::hash::Hash, + T: StatelessDrop, +{ + /// Compile-time guard: `T` must be zero-sized. + const _ASSERT_ZST: () = assert!(size_of::() == 0, "T must be zero-sized"); + + /// Create a new ward that will pass `ctx` to `T::delete` on cleanup. + pub fn new(ctx: Ctx) -> Self { + Self { + map: FxHashMap::default(), + ctx, + _marker: PhantomData, + } + } + + /// Increment the reference count for `key`, inserting it with a count + /// of 1 if it does not exist. + /// + /// Returns the count **after** incrementing. + pub fn inc(&mut self, key: K) -> usize { + *self + .map + .entry(key) + .and_modify(|count| *count += 1) + .or_insert(1) + } + + fn dec_by(&mut self, key: &K, by: usize) -> Option { + let curr = *self.map.get(key)?; + let new_count = curr.saturating_sub(by); + if new_count == 0 { + self.map.remove(key); + T::delete(&self.ctx, key); + } else if let Some(slot) = self.map.get_mut(key) { + *slot = new_count; + } + Some(new_count) + } + + /// Decrement the reference count for `key`. + /// + /// If the count reaches zero, the key is removed and `T::delete` is + /// called synchronously with the ward's context. Returns `Some(0)` in + /// this case — the key will no longer be tracked. + /// + /// Returns `None` if `key` was not present (no-op). + pub fn dec(&mut self, key: &K) -> Option { + self.dec_by(key, 1) + } + + /// Decrement the reference count for `key` by `count`. + pub fn dec_count(&mut self, key: &K, count: usize) -> Option { + self.dec_by(key, count) + } +} diff --git a/lib/fs/async_fs.rs b/lib/fs/async_fs.rs new file mode 100644 index 00000000..7626578f --- /dev/null +++ b/lib/fs/async_fs.rs @@ -0,0 +1,432 @@ +//! Async `INode` Table which supports concurrent access and modification. + +use std::ffi::{OsStr, OsString}; +use std::future::Future; +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; + +use bytes::Bytes; + +use crate::cache::async_backed::FutureBackedCache; +use crate::drop_ward::StatelessDrop; +use crate::fs::{ + AsyncFsStats, DirEntry, FileHandle, INode, INodeType, InodeAddr, LoadedAddr, OpenFlags, + dcache::DCache, +}; + +/// A reader for an open file, returned by [`FsDataProvider::open`]. +/// +/// Implementors provide the actual data for read operations. The FUSE +/// adapter calls [`close`](Self::close) to release resources explicitly. +pub trait FileReader: Send + Sync + 'static { + /// Read up to `size` bytes starting at byte `offset`. + fn read( + &self, + offset: u64, + size: u32, + ) -> impl Future> + Send; + + /// Release any resources held by this reader. + /// + /// Called explicitly by the FUSE adapter during `release`. Implementations + /// that hold inner file handles should release them here. The default + /// implementation is a no-op. + fn close(&self) -> impl Future> + Send { + async { Ok(()) } + } +} + +/// A data provider for [`AsyncFs`] that fetches inode data on cache misses. +pub trait FsDataProvider: Clone + Send + Sync + 'static { + /// The reader type returned by [`open`](Self::open). + type Reader: FileReader; + + /// Look up a child inode by name within the given parent directory. + fn lookup( + &self, + parent: INode, + name: &OsStr, + ) -> impl Future> + Send; + + /// List all children of a directory. + /// + /// Called by [`AsyncFs::readdir`] on a cache miss. The returned + /// children are inserted into the directory cache and inode table + /// so subsequent reads are served from cache. + fn readdir( + &self, + parent: INode, + ) -> impl Future, std::io::Error>> + Send; + + /// Open a file and return a reader for subsequent read calls. + fn open( + &self, + inode: INode, + flags: OpenFlags, + ) -> impl Future> + Send; +} + +/// Zero-sized tag whose [`StatelessDrop`] implementation automatically evicts +/// an inode from the inode table when its reference count reaches zero. +pub struct InodeForget; + +impl<'a> StatelessDrop<&'a FutureBackedCache, InodeAddr> for InodeForget { + fn delete(inode_table: &&'a FutureBackedCache, addr: &InodeAddr) { + inode_table.remove_sync(addr); + } +} + +/// A looked-up inode whose lifetime must be managed by the caller. +/// +/// Each `TrackedINode` returned by [`AsyncFs::lookup`] represents one +/// reference that the FUSE kernel holds. The caller must balance it by +/// decrementing the [`InodeLifecycle`] ward when the kernel sends `forget`. +#[derive(Debug, Clone, Copy)] +pub struct TrackedINode { + /// The resolved inode data. + pub inode: INode, +} + +/// An open file that provides read access. +/// +/// Returned by [`AsyncFs::open`]. The caller owns this handle and uses +/// [`read`](Self::read) to fetch data. Dropping the handle releases +/// the underlying reader when the last `Arc` clone is gone. +#[derive(Debug, Clone)] +pub struct OpenFile { + /// The raw file handle number, suitable for returning to the FUSE kernel. + pub fh: FileHandle, + /// The reader backing this open file. + pub reader: Arc, +} + +impl OpenFile { + /// Read up to `size` bytes starting at byte `offset`. + pub async fn read(&self, offset: u64, size: u32) -> Result { + self.reader.read(offset, size).await + } +} + +mod inode_lifecycle_impl { + #![allow(clippy::future_not_send, clippy::mem_forget)] + use ouroboros::self_referencing; + + use crate::cache::async_backed::FutureBackedCache; + use crate::drop_ward::DropWard; + use crate::fs::InodeAddr; + + use super::{INode, InodeForget}; + + /// Co-located inode table and reference-count ward. + /// + /// The ward borrows the table directly (no `Arc`) via `ouroboros`. + /// When `dec` reaches zero for a key, [`InodeForget::delete`] synchronously + /// removes that inode from the table. + #[self_referencing] + pub struct InodeLifecycle { + pub(super) table: FutureBackedCache, + #[borrows(table)] + #[not_covariant] + pub(super) ward: + DropWard<&'this FutureBackedCache, InodeAddr, InodeForget>, + } + + impl InodeLifecycle { + /// Create a new lifecycle managing the given inode table. + pub fn from_table(table: FutureBackedCache) -> Self { + Self::new(table, |tbl| DropWard::new(tbl)) + } + } +} + +pub use inode_lifecycle_impl::InodeLifecycle; + +impl InodeLifecycle { + /// Increment the reference count for an inode address. + pub fn inc(&mut self, addr: InodeAddr) -> usize { + self.with_ward_mut(|ward| ward.inc(addr)) + } + + /// Decrement the reference count for an inode address. + /// + /// When the count reaches zero, the inode is automatically evicted + /// from the table via [`InodeForget::delete`]. + pub fn dec(&mut self, addr: &InodeAddr) -> Option { + self.with_ward_mut(|ward| ward.dec(addr)) + } + + /// Decrement the reference count by `count`. + /// + /// When the count reaches zero, the inode is automatically evicted. + pub fn dec_count(&mut self, addr: &InodeAddr, count: usize) -> Option { + self.with_ward_mut(|ward| ward.dec_count(addr, count)) + } + + /// Read-only access to the underlying inode table. + #[must_use] + pub fn table(&self) -> &FutureBackedCache { + self.borrow_table() + } +} + +/// An asynchronous filesystem cache mapping `InodeAddr` to `INode`. +/// +/// Uses two [`FutureBackedCache`] layers: +/// - `inode_table` stores resolved inodes by address, used by [`loaded_inode`](Self::loaded_inode). +/// - `lookup_cache` stores lookup results by `(parent_addr, name)`, ensuring `dp.lookup()` is only +/// called on a true cache miss (not already cached or in-flight). +/// +/// The [`DCache`] sits in front as a synchronous fast path mapping `(parent, name)` to child addr. +pub struct AsyncFs<'tbl, DP: FsDataProvider> { + /// Canonical addr -> `INode` map. Used by `loaded_inode()` to retrieve inodes by address. + inode_table: &'tbl FutureBackedCache, + + /// Deduplicating lookup cache keyed by `(parent_addr, child_name)`. The factory is + /// `dp.lookup()`, so the data provider is only called on a true cache miss. + lookup_cache: FutureBackedCache<(InodeAddr, OsString), INode>, + + /// Directory entry cache, mapping `(parent, name)` to child inode address. + directory_cache: DCache, + + /// The data provider used to fetch inode data on cache misses. + data_provider: DP, + + /// Monotonically increasing file handle counter. Starts at 1 (0 is reserved). + next_fh: AtomicU64, + + /// Tracks which directories have had their children fetched via `dp.readdir`. + readdir_populated: FutureBackedCache, +} + +impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { + /// Create a new `AsyncFs`, seeding the root inode into the table. + pub async fn new( + data_provider: DP, + root: INode, + inode_table: &'tbl FutureBackedCache, + ) -> Self { + inode_table + .get_or_init(root.addr, || async move { root }) + .await; + + Self { + inode_table, + lookup_cache: FutureBackedCache::default(), + directory_cache: DCache::new(), + data_provider, + next_fh: AtomicU64::new(1), + readdir_populated: FutureBackedCache::default(), + } + } + + /// Create a new `AsyncFs`, assuming the root inode is already in the table. + /// + /// This synchronous constructor is needed for ouroboros builders where + /// async is unavailable. The caller must ensure the root inode has already + /// been inserted into `inode_table` (e.g. via [`FutureBackedCache::insert_sync`]). + #[must_use] + pub fn new_preseeded( + data_provider: DP, + inode_table: &'tbl FutureBackedCache, + ) -> Self { + Self { + inode_table, + lookup_cache: FutureBackedCache::default(), + directory_cache: DCache::new(), + data_provider, + next_fh: AtomicU64::new(1), + readdir_populated: FutureBackedCache::default(), + } + } + + /// Get the total number of inodes currently stored in the inode table. + #[must_use] + pub fn inode_count(&self) -> usize { + self.inode_table.len() + } + + /// Return filesystem statistics. + /// + /// Reports the current inode count from the cache. Block-related + /// fields default to values appropriate for a virtual read-only + /// filesystem (4 KiB blocks, no free space). + #[must_use] + pub fn statfs(&self) -> AsyncFsStats { + AsyncFsStats { + block_size: 4096, + total_blocks: 0, + free_blocks: 0, + available_blocks: 0, + total_inodes: self.inode_count() as u64, + free_inodes: 0, + max_filename_length: 255, + } + } + + /// Asynchronously look up an inode by name within a parent directory. + /// + /// Resolution order: + /// 1. Directory cache (synchronous fast path) + /// 2. Lookup cache (`get_or_try_init` — calls `dp.lookup()` only on a true miss) + /// 3. On success, populates inode table and directory cache + pub async fn lookup( + &self, + parent: LoadedAddr, + name: &OsStr, + ) -> Result { + let parent_ino = self.loaded_inode(parent).await?; + debug_assert!( + matches!(parent_ino.itype, INodeType::Directory), + "parent inode should be a directory" + ); + + if let Some(dentry) = self.directory_cache.lookup(parent, name) + && let Some(inode) = self.inode_table.get(&dentry.ino.0).await + { + return Ok(TrackedINode { inode }); + } + // Inode was evicted from the table — fall through to the slow path. + + let name_owned = name.to_os_string(); + let name_for_cache = name_owned.clone(); + let lookup_key = (parent.0, name_owned.clone()); + let dp = self.data_provider.clone(); + + let child = self + .lookup_cache + .get_or_try_init(lookup_key, || async move { + dp.lookup(parent_ino, &name_owned).await + }) + .await?; + + self.inode_table + .get_or_init(child.addr, || async move { child }) + .await; + + self.directory_cache + .insert( + parent, + name_for_cache, + LoadedAddr(child.addr), + matches!(child.itype, INodeType::Directory), + ) + .await; + + Ok(TrackedINode { inode: child }) + } + + /// Retrieve an inode that is expected to already be loaded. + /// + /// If the inode is currently in-flight (being loaded by another caller), this awaits + /// completion. Returns an error if the inode is not in the table at all. + pub async fn loaded_inode(&self, addr: LoadedAddr) -> Result { + self.inode_table.get(&addr.0).await.ok_or_else(|| { + tracing::error!( + inode = ?addr.0, + "inode not found in table — this is a programming bug" + ); + std::io::Error::from_raw_os_error(libc::ENOENT) + }) + } + + /// Return the attributes of the inode at `addr`. + /// + /// This is the getattr entry point for the filesystem. Returns the + /// cached [`INode`] directly — callers at the FUSE boundary are + /// responsible for converting to `fuser::FileAttr`. + pub async fn getattr(&self, addr: LoadedAddr) -> Result { + self.loaded_inode(addr).await + } + + /// Open a file for reading. + /// + /// Validates the inode is not a directory, delegates to the data provider + /// to create a [`FileReader`], and returns an [`OpenFile`] that the caller + /// owns. Reads go through [`OpenFile::read`]. + pub async fn open( + &self, + addr: LoadedAddr, + flags: OpenFlags, + ) -> Result, std::io::Error> { + let inode = self.loaded_inode(addr).await?; + if inode.itype == INodeType::Directory { + return Err(std::io::Error::from_raw_os_error(libc::EISDIR)); + } + let reader = self.data_provider.open(inode, flags).await?; + let fh = self.next_fh.fetch_add(1, Ordering::Relaxed); + Ok(OpenFile { + fh, + reader: Arc::new(reader), + }) + } + + /// Iterate directory entries for `parent`, starting from `offset`. + /// + /// On the first call for a given parent, fetches the directory listing + /// from the data provider and populates the directory cache and inode + /// table. Subsequent calls serve entries directly from cache. + /// + /// Entries are yielded in name-sorted order. For each entry, `filler` is + /// called with the [`DirEntry`] and the next offset value. If `filler` + /// returns `true` (indicating the caller's buffer is full), iteration + /// stops early. + /// + /// # Concurrency + /// + /// The `readdir_populated` check-then-populate is **not** atomic. If two + /// concurrent callers invoke `readdir` for the same parent, both may call + /// `dp.readdir()` and insert duplicate children. This is safe when the + /// caller serializes access (e.g. via `&mut self` on the `Fs` trait). + /// + /// TODO(MES-746): Implement `opendir` and `releasedir` to snapshot directory contents and + /// avoid racing with `lookup`/`createfile`. + pub async fn readdir( + &self, + parent: LoadedAddr, + offset: u64, + mut filler: impl FnMut(DirEntry<'_>, u64) -> bool, + ) -> Result<(), std::io::Error> { + let parent_inode = self.loaded_inode(parent).await?; + if parent_inode.itype != INodeType::Directory { + return Err(std::io::Error::from_raw_os_error(libc::ENOTDIR)); + } + + // Populate the directory cache on first readdir for this parent. + if self.readdir_populated.get(&parent).await.is_none() { + let children = self.data_provider.readdir(parent_inode).await?; + for (name, child_inode) in children { + self.inode_table + .get_or_init(child_inode.addr, || async move { child_inode }) + .await; + self.directory_cache + .insert( + parent, + name, + LoadedAddr(child_inode.addr), + child_inode.itype == INodeType::Directory, + ) + .await; + } + self.readdir_populated + .get_or_init(parent, || async {}) + .await; + } + + let mut children = self.directory_cache.readdir(parent).await; + children.sort_unstable_by(|(a, _), (b, _)| a.cmp(b)); + + #[expect( + clippy::cast_possible_truncation, + reason = "offset fits in usize on supported 64-bit platforms" + )] + for (i, (name, dvalue)) in children.iter().enumerate().skip(offset as usize) { + let inode = self.loaded_inode(dvalue.ino).await?; + let next_offset = (i + 1) as u64; + if filler(DirEntry { name, inode }, next_offset) { + break; + } + } + + Ok(()) + } +} diff --git a/lib/fs/dcache.rs b/lib/fs/dcache.rs new file mode 100644 index 00000000..5138e802 --- /dev/null +++ b/lib/fs/dcache.rs @@ -0,0 +1,65 @@ +use std::ffi::{OsStr, OsString}; + +use crate::fs::LoadedAddr; + +/// Cached metadata for a directory entry. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DValue { + /// Inode address of this entry. + pub ino: LoadedAddr, + /// Whether this entry is itself a directory. + pub is_dir: bool, +} + +/// In-memory directory entry cache mapping `(parent, name)` to child metadata. +/// +/// Backed by [`scc::HashMap`] for atomic upsert on insert. The `readdir` +/// implementation scans the entire map and filters by parent — this is O(n) +/// over the cache size rather than O(log n + k) with an ordered index, but +/// guarantees that `insert` never creates a window where an entry is absent. +#[derive(Default)] +pub struct DCache { + cache: scc::HashMap<(LoadedAddr, OsString), DValue>, +} + +impl DCache { + /// Creates an empty directory cache. + #[must_use] + pub fn new() -> Self { + Self::default() + } + + /// Looks up a single child entry by parent inode and name. + #[must_use] + pub fn lookup(&self, parent_ino: LoadedAddr, name: &OsStr) -> Option { + let key = (parent_ino, name.to_os_string()); + self.cache.read_sync(&key, |_, v| v.clone()) + } + + /// Atomically inserts or overwrites a child entry in the cache. + pub async fn insert( + &self, + parent_ino: LoadedAddr, + name: OsString, + ino: LoadedAddr, + is_dir: bool, + ) { + let key = (parent_ino, name); + let value = DValue { ino, is_dir }; + self.cache.upsert_async(key, value).await; + } + + /// Returns all cached children of `parent_ino` as `(name, value)` pairs. + pub async fn readdir(&self, parent_ino: LoadedAddr) -> Vec<(OsString, DValue)> { + let mut entries = Vec::new(); + self.cache + .iter_async(|key, value| { + if key.0 == parent_ino { + entries.push((key.1.clone(), value.clone())); + } + true + }) + .await; + entries + } +} diff --git a/lib/fs/fuser.rs b/lib/fs/fuser.rs new file mode 100644 index 00000000..50042a24 --- /dev/null +++ b/lib/fs/fuser.rs @@ -0,0 +1,425 @@ +//! FUSE adapter: maps [`fuser::Filesystem`] callbacks to [`AsyncFs`](super::async_fs::AsyncFs). + +use std::collections::HashMap; +use std::ffi::OsStr; +use std::sync::Arc; + +use super::async_fs::{FileReader as _, FsDataProvider}; +use super::{FileHandle, INode, INodeType, InodeAddr, LoadedAddr, OpenFlags}; +use crate::cache::async_backed::FutureBackedCache; +use tracing::{debug, error, instrument}; + +/// Wrapper converting [`std::io::Error`] to errno. +#[derive(Debug, thiserror::Error)] +#[error("{0}")] +struct FuseIoError(std::io::Error); + +#[expect( + clippy::wildcard_enum_match_arm, + reason = "ErrorKind is non_exhaustive; EIO is the safe default" +)] +impl From for i32 { + fn from(e: FuseIoError) -> Self { + e.0.raw_os_error().unwrap_or_else(|| match e.0.kind() { + std::io::ErrorKind::NotFound => libc::ENOENT, + std::io::ErrorKind::PermissionDenied => libc::EACCES, + std::io::ErrorKind::AlreadyExists => libc::EEXIST, + _ => libc::EIO, + }) + } +} + +/// Error for read operations. +#[derive(Debug, thiserror::Error)] +enum FuseReadError { + /// The file handle was not open. + #[error("file handle not open")] + NotOpen, + /// An I/O error occurred during the read. + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), +} + +impl From for i32 { + fn from(e: FuseReadError) -> Self { + match e { + FuseReadError::NotOpen => libc::EBADF, + FuseReadError::Io(ref io) => io.raw_os_error().unwrap_or(libc::EIO), + } + } +} + +/// Error for release operations. +#[derive(Debug, thiserror::Error)] +enum FuseReleaseError { + /// The file handle was not open. + #[error("file handle not open")] + NotOpen, +} + +impl From for i32 { + fn from(e: FuseReleaseError) -> Self { + match e { + FuseReleaseError::NotOpen => libc::EBADF, + } + } +} + +mod inner { + #![allow(clippy::future_not_send, clippy::mem_forget)] + + use ouroboros::self_referencing; + + use crate::cache::async_backed::FutureBackedCache; + use crate::drop_ward::DropWard; + use crate::fs::async_fs::{AsyncFs, FsDataProvider, InodeForget}; + use crate::fs::{INode, InodeAddr}; + + /// Self-referential struct holding the inode table, refcount ward, and `AsyncFs`. + /// + /// Both `ward` and `fs` borrow from `table`. The ward manages inode + /// refcounts; the fs serves lookup/readdir/open/read operations. + #[self_referencing] + pub(super) struct FuseBridgeInner { + table: FutureBackedCache, + #[borrows(table)] + #[not_covariant] + ward: DropWard<&'this FutureBackedCache, InodeAddr, InodeForget>, + #[borrows(table)] + #[covariant] + fs: AsyncFs<'this, DP>, + } + + impl FuseBridgeInner { + pub(super) fn create(table: FutureBackedCache, provider: DP) -> Self { + FuseBridgeInnerBuilder { + table, + ward_builder: |tbl| DropWard::new(tbl), + fs_builder: |tbl| AsyncFs::new_preseeded(provider, tbl), + } + .build() + } + + pub(super) fn get_fs(&self) -> &AsyncFs<'_, DP> { + self.borrow_fs() + } + + pub(super) fn ward_inc(&mut self, addr: InodeAddr) -> usize { + self.with_ward_mut(|ward| ward.inc(addr)) + } + + pub(super) fn ward_dec_count(&mut self, addr: InodeAddr, count: usize) -> Option { + self.with_ward_mut(|ward| ward.dec_count(&addr, count)) + } + } +} + +use inner::FuseBridgeInner; + +/// Convert an `INode` to the fuser-specific `FileAttr`. +fn inode_to_fuser_attr(inode: &INode, block_size: u32) -> fuser::FileAttr { + fuser::FileAttr { + ino: inode.addr, + size: inode.size, + blocks: inode.size.div_ceil(512), + atime: inode.last_modified_at, + mtime: inode.last_modified_at, + ctime: inode.last_modified_at, + crtime: inode.create_time, + kind: inode_type_to_fuser(inode.itype), + perm: inode.permissions.bits(), + nlink: 1, + uid: inode.uid, + gid: inode.gid, + rdev: 0, + blksize: block_size, + flags: 0, + } +} + +#[expect( + clippy::wildcard_enum_match_arm, + reason = "INodeType is non_exhaustive; File is the safe default" +)] +fn inode_type_to_fuser(itype: INodeType) -> fuser::FileType { + match itype { + INodeType::Directory => fuser::FileType::Directory, + INodeType::Symlink => fuser::FileType::Symlink, + _ => fuser::FileType::RegularFile, + } +} + +const BLOCK_SIZE: u32 = 4096; + +/// Bridges a generic [`FsDataProvider`] to the [`fuser::Filesystem`] trait. +/// +/// Owns a self-referential inode table + ward + [`AsyncFs`](super::async_fs::AsyncFs), +/// plus an open-file map and a tokio runtime handle for blocking on async ops. +pub struct FuserAdapter { + inner: FuseBridgeInner, + open_files: HashMap>, + runtime: tokio::runtime::Handle, +} + +impl FuserAdapter { + // TODO(markovejnovic): This low TTL is really not ideal. It slows us down a lot, since the + // kernel has to ask us for every single lookup all the time. + // + // I think a better implementation is to implement + // + // notify_inval_inode(ino, offset, len) + // notify_inval_entry(parent_ino, name) + // + // These two functions can be used to invalidate specific entries in the kernel cache when we + // know they have changed. This would allow us to set a much higher TTL here. + const SHAMEFUL_TTL: std::time::Duration = std::time::Duration::from_secs(1); + + /// Create a new adapter from a pre-seeded inode table and data provider. + /// + /// The `table` must already have the root inode inserted. + pub fn new( + table: FutureBackedCache, + provider: DP, + runtime: tokio::runtime::Handle, + ) -> Self { + Self { + inner: FuseBridgeInner::create(table, provider), + open_files: HashMap::new(), + runtime, + } + } +} + +impl fuser::Filesystem for FuserAdapter { + #[instrument(name = "FuserAdapter::lookup", skip(self, _req, reply))] + fn lookup( + &mut self, + _req: &fuser::Request<'_>, + parent: u64, + name: &OsStr, + reply: fuser::ReplyEntry, + ) { + let result = self.runtime.block_on(async { + let tracked = self + .inner + .get_fs() + .lookup(LoadedAddr(parent), name) + .await + .map_err(FuseIoError)?; + self.inner.ward_inc(tracked.inode.addr); + Ok::<_, FuseIoError>(tracked.inode) + }); + match result { + Ok(inode) => { + let f_attr = inode_to_fuser_attr(&inode, BLOCK_SIZE); + debug!(?f_attr, "replying..."); + reply.entry(&Self::SHAMEFUL_TTL, &f_attr, 0); + } + Err(e) => { + debug!(error = %e, "replying error"); + reply.error(e.into()); + } + } + } + + #[instrument(name = "FuserAdapter::getattr", skip(self, _req, _fh, reply))] + fn getattr( + &mut self, + _req: &fuser::Request<'_>, + ino: u64, + _fh: Option, + reply: fuser::ReplyAttr, + ) { + let result = self.runtime.block_on(async { + self.inner + .get_fs() + .getattr(LoadedAddr(ino)) + .await + .map_err(FuseIoError) + }); + match result { + Ok(inode) => { + let attr = inode_to_fuser_attr(&inode, BLOCK_SIZE); + debug!(?attr, "replying..."); + reply.attr(&Self::SHAMEFUL_TTL, &attr); + } + Err(e) => { + debug!(error = %e, "replying error"); + reply.error(e.into()); + } + } + } + + #[instrument(name = "FuserAdapter::readdir", skip(self, _req, _fh, offset, reply))] + fn readdir( + &mut self, + _req: &fuser::Request<'_>, + ino: u64, + _fh: u64, + offset: i64, + mut reply: fuser::ReplyDirectory, + ) { + let offset_u64 = offset.cast_unsigned(); + let result = self.runtime.block_on(async { + let mut entries = Vec::new(); + self.inner + .get_fs() + .readdir(LoadedAddr(ino), offset_u64, |de, _next_offset| { + entries.push((de.inode.addr, de.name.to_os_string(), de.inode.itype)); + false + }) + .await + .map_err(FuseIoError)?; + Ok::<_, FuseIoError>(entries) + }); + + let entries = match result { + Ok(entries) => entries, + Err(e) => { + debug!(error = %e, "replying error"); + reply.error(e.into()); + return; + } + }; + + #[expect( + clippy::cast_possible_truncation, + reason = "offset fits in usize on supported 64-bit platforms" + )] + for (i, (entry_ino, entry_name, entry_itype)) in entries.iter().enumerate() { + let kind = inode_type_to_fuser(*entry_itype); + let abs_idx = offset_u64 as usize + i + 1; + let Ok(idx): Result = abs_idx.try_into() else { + error!("Directory entry index {} too large for fuser", abs_idx); + reply.error(libc::EIO); + return; + }; + + debug!(?entry_name, ino = entry_ino, "adding entry to reply..."); + if reply.add(*entry_ino, idx, kind, entry_name) { + debug!("buffer full for now, stopping readdir"); + break; + } + } + + debug!("finalizing reply..."); + reply.ok(); + } + + #[instrument(name = "FuserAdapter::open", skip(self, _req, flags, reply))] + fn open(&mut self, _req: &fuser::Request<'_>, ino: u64, flags: i32, reply: fuser::ReplyOpen) { + let flags = OpenFlags::from_bits_truncate(flags); + let result = self.runtime.block_on(async { + let open_file = self + .inner + .get_fs() + .open(LoadedAddr(ino), flags) + .await + .map_err(FuseIoError)?; + let fh = open_file.fh; + self.open_files.insert(fh, Arc::clone(&open_file.reader)); + Ok::<_, FuseIoError>(fh) + }); + match result { + Ok(fh) => { + debug!(handle = fh, "replying..."); + reply.opened(fh, 0); + } + Err(e) => { + debug!(error = %e, "replying error"); + reply.error(e.into()); + } + } + } + + #[instrument( + name = "FuserAdapter::read", + skip(self, _req, _ino, fh, offset, size, _flags, _lock_owner, reply) + )] + fn read( + &mut self, + _req: &fuser::Request<'_>, + _ino: u64, + fh: u64, + offset: i64, + size: u32, + _flags: i32, + _lock_owner: Option, + reply: fuser::ReplyData, + ) { + let result: Result<_, FuseReadError> = self.runtime.block_on(async { + let reader = self.open_files.get(&fh).ok_or(FuseReadError::NotOpen)?; + Ok(reader.read(offset.cast_unsigned(), size).await?) + }); + match result { + Ok(data) => { + debug!(read_bytes = data.len(), "replying..."); + reply.data(&data); + } + Err(e) => { + debug!(error = %e, "replying error"); + reply.error(e.into()); + } + } + } + + #[instrument( + name = "FuserAdapter::release", + skip(self, _req, _ino, fh, _flags, _lock_owner, _flush, reply) + )] + fn release( + &mut self, + _req: &fuser::Request<'_>, + _ino: u64, + fh: u64, + _flags: i32, + _lock_owner: Option, + _flush: bool, + reply: fuser::ReplyEmpty, + ) { + let result: Result<_, FuseReleaseError> = match self.open_files.remove(&fh) { + Some(reader) => { + if let Err(e) = self.runtime.block_on(reader.close()) { + debug!(error = %e, "reader close reported error"); + } + Ok(()) + } + None => Err(FuseReleaseError::NotOpen), + }; + match result { + Ok(()) => { + debug!("replying ok"); + reply.ok(); + } + Err(e) => { + debug!(error = %e, "replying error"); + reply.error(e.into()); + } + } + } + + #[expect( + clippy::cast_possible_truncation, + reason = "nlookups fits in usize on supported 64-bit platforms" + )] + #[instrument(name = "FuserAdapter::forget", skip(self, _req, nlookup))] + fn forget(&mut self, _req: &fuser::Request<'_>, ino: u64, nlookup: u64) { + self.inner.ward_dec_count(ino, nlookup as usize); + } + + #[instrument(name = "FuserAdapter::statfs", skip(self, _req, _ino, reply))] + fn statfs(&mut self, _req: &fuser::Request<'_>, _ino: u64, reply: fuser::ReplyStatfs) { + let stats = self.inner.get_fs().statfs(); + debug!(?stats, "replying..."); + reply.statfs( + stats.total_blocks, + stats.free_blocks, + stats.available_blocks, + stats.total_inodes, + stats.free_inodes, + stats.block_size, + stats.max_filename_length, + 0, + ); + } +} diff --git a/lib/fs/mod.rs b/lib/fs/mod.rs new file mode 100644 index 00000000..e8f971b4 --- /dev/null +++ b/lib/fs/mod.rs @@ -0,0 +1,188 @@ +//! Useful filesystem generalizations. +/// Async filesystem cache with concurrent inode management. +pub mod async_fs; +/// Directory entry cache for fast parent-child lookups. +pub mod dcache; +/// FUSE adapter: maps [`fuser::Filesystem`] callbacks to [`async_fs::AsyncFs`]. +pub mod fuser; + +pub use async_fs::{InodeForget, InodeLifecycle, OpenFile, TrackedINode}; + +use std::ffi::OsStr; +use std::time::SystemTime; + +use bitflags::bitflags; + +/// Type representing an inode identifier. +pub type InodeAddr = u64; + +/// Represents an inode address that has been loaded into the inode table. +/// +/// This newtype wrapper distinguishes inode addresses that are known to exist +/// in the [`async_fs::AsyncFs`] inode table from raw [`InodeAddr`] values. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct LoadedAddr(pub InodeAddr); + +/// Type representing a file handle. +pub type FileHandle = u64; + +bitflags! { + /// Permission bits for an inode, similar to Unix file permissions. + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] + pub struct InodePerms: u16 { + /// Other: execute permission. + const OTHER_EXECUTE = 1 << 0; + /// Other: write permission. + const OTHER_WRITE = 1 << 1; + /// Other: read permission. + const OTHER_READ = 1 << 2; + + /// Group: execute permission. + const GROUP_EXECUTE = 1 << 3; + /// Group: write permission. + const GROUP_WRITE = 1 << 4; + /// Group: read permission. + const GROUP_READ = 1 << 5; + + /// Owner: execute permission. + const OWNER_EXECUTE = 1 << 6; + /// Owner: write permission. + const OWNER_WRITE = 1 << 7; + /// Owner: read permission. + const OWNER_READ = 1 << 8; + + /// Sticky bit. + const STICKY = 1 << 9; + /// Set-group-ID bit. + const SETGID = 1 << 10; + /// Set-user-ID bit. + const SETUID = 1 << 11; + + /// Other: read, write, and execute. + const OTHER_RWX = Self::OTHER_READ.bits() + | Self::OTHER_WRITE.bits() + | Self::OTHER_EXECUTE.bits(); + /// Group: read, write, and execute. + const GROUP_RWX = Self::GROUP_READ.bits() + | Self::GROUP_WRITE.bits() + | Self::GROUP_EXECUTE.bits(); + /// Owner: read, write, and execute. + const OWNER_RWX = Self::OWNER_READ.bits() + | Self::OWNER_WRITE.bits() + | Self::OWNER_EXECUTE.bits(); + } +} + +bitflags! { + /// Flags for opening a file, similar to Unix open(2) flags. + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] + pub struct OpenFlags: i32 { + /// Open for reading only. + const RDONLY = libc::O_RDONLY; + /// Open for writing only. + const WRONLY = libc::O_WRONLY; + /// Open for reading and writing. + const RDWR = libc::O_RDWR; + + /// Append on each write. + const APPEND = libc::O_APPEND; + /// Truncate to zero length. + const TRUNC = libc::O_TRUNC; + /// Create file if it does not exist. + const CREAT = libc::O_CREAT; + /// Error if file already exists (with `CREAT`). + const EXCL = libc::O_EXCL; + + /// Non-blocking mode. + const NONBLOCK = libc::O_NONBLOCK; + /// Synchronous writes. + const SYNC = libc::O_SYNC; + /// Synchronous data integrity writes. + const DSYNC = libc::O_DSYNC; + /// Do not follow symlinks. + const NOFOLLOW = libc::O_NOFOLLOW; + /// Set close-on-exec. + const CLOEXEC = libc::O_CLOEXEC; + /// Fail if not a directory. + const DIRECTORY = libc::O_DIRECTORY; + + /// Do not update access time (Linux only). + #[cfg(target_os = "linux")] + const NOATIME = libc::O_NOATIME; + } +} + +/// The type of an inode entry in the filesystem. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum INodeType { + /// A regular file. + File, + /// A directory. + Directory, + /// A symbolic link. + Symlink, +} + +/// Representation of an inode. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct INode { + /// The address of this inode, which serves as its unique identifier. + pub addr: InodeAddr, + /// The permissions associated with this inode, represented as a bitfield. + pub permissions: InodePerms, + /// The user ID of the owner of this inode. + pub uid: u32, + /// The group ID of the owner of this inode. + pub gid: u32, + /// The time this inode was created at. + pub create_time: SystemTime, + /// The time this inode was last modified at. + pub last_modified_at: SystemTime, + /// The parent inode address, if any. This is `None` for the root inode. + pub parent: Option, + /// The size of the file represented by this inode, in bytes. + pub size: u64, + /// Additional information about the type of this inode (e.g., file vs directory). + pub itype: INodeType, +} + +impl INode { + /// Check if this inode is the root inode (i.e., has no parent). + #[must_use] + pub fn is_root(&self) -> bool { + self.parent.is_none() + } +} + +/// A directory entry yielded by [`async_fs::AsyncFs::readdir`]. +/// +/// Borrows the entry name from the directory cache's iteration buffer. +#[derive(Debug, Clone, Copy)] +pub struct DirEntry<'a> { + /// The name of this entry within its parent directory. + pub name: &'a OsStr, + /// The full inode data for this entry. + pub inode: INode, +} + +/// Filesystem statistics returned by [`async_fs::AsyncFs::statfs`]. +/// +/// Block-related sizes are in units of `block_size` bytes. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct AsyncFsStats { + /// Filesystem block size (bytes). + pub block_size: u32, + /// Total number of data blocks. + pub total_blocks: u64, + /// Number of free blocks. + pub free_blocks: u64, + /// Number of blocks available to unprivileged users. + pub available_blocks: u64, + /// Total number of file nodes (inodes). + pub total_inodes: u64, + /// Number of free file nodes. + pub free_inodes: u64, + /// Maximum filename length (bytes). + pub max_filename_length: u32, +} diff --git a/lib/lib.rs b/lib/lib.rs index f7388bd5..40b1e8f2 100644 --- a/lib/lib.rs +++ b/lib/lib.rs @@ -2,4 +2,7 @@ /// Caching primitives for git-fs. pub mod cache; +pub mod drop_ward; +/// Filesystem abstractions and caching layers. +pub mod fs; pub mod io; diff --git a/src/daemon.rs b/src/daemon.rs index dac2d052..0a7a9f31 100644 --- a/src/daemon.rs +++ b/src/daemon.rs @@ -14,9 +14,13 @@ mod managed_fuse { use nix::errno::Errno; + use git_fs::cache::async_backed::FutureBackedCache; + use git_fs::fs::{INode, INodeType, InodePerms}; + use super::{MesaFS, OrgConfig, app_config, debug, error}; - use crate::fs::fuser::FuserAdapter; + use crate::fs::mescloud::MesaFsProvider; use fuser::BackgroundSession; + use git_fs::fs::fuser::FuserAdapter; pub struct FuseCoreScope { _session: BackgroundSession, @@ -44,7 +48,24 @@ mod managed_fuse { api_key: org.api_key.clone(), }); let mesa_fs = MesaFS::new(orgs, (config.uid, config.gid), &config.cache); - let fuse_adapter = FuserAdapter::new(mesa_fs, handle); + + let table = FutureBackedCache::default(); + let now = std::time::SystemTime::now(); + let root = INode { + addr: 1, + permissions: InodePerms::from_bits_truncate(0o755), + uid: config.uid, + gid: config.gid, + create_time: now, + last_modified_at: now, + parent: None, + size: 0, + itype: INodeType::Directory, + }; + table.insert_sync(1, root); + + let provider = MesaFsProvider::new(mesa_fs); + let fuse_adapter = FuserAdapter::new(table, provider, handle); let mount_opts = [ fuser::MountOption::FSName("git-fs".to_owned()), fuser::MountOption::RO, diff --git a/src/fs/fuser.rs b/src/fs/fuser.rs deleted file mode 100644 index 86ddabb6..00000000 --- a/src/fs/fuser.rs +++ /dev/null @@ -1,351 +0,0 @@ -use std::ffi::OsStr; - -use crate::fs::r#trait::{CommonFileAttr, DirEntryType, FileAttr, Fs, LockOwner, OpenFlags}; -use tracing::{debug, error, instrument}; - -impl From for fuser::FileAttr { - fn from(val: FileAttr) -> Self { - fn common_to_fuser(common: CommonFileAttr) -> fuser::FileAttr { - fuser::FileAttr { - ino: common.ino, - size: 0, - blocks: 0, - atime: common.atime, - mtime: common.mtime, - ctime: common.ctime, - crtime: common.crtime, - kind: fuser::FileType::RegularFile, - perm: common.perm.bits(), - nlink: common.nlink, - uid: common.uid, - gid: common.gid, - rdev: 0, - blksize: common.blksize, - flags: 0, - } - } - - match val { - FileAttr::RegularFile { - common, - size, - blocks, - } => { - let mut attr = common_to_fuser(common); - attr.size = size; - attr.blocks = blocks; - attr.kind = fuser::FileType::RegularFile; - attr - } - FileAttr::Directory { common } => { - let mut attr = common_to_fuser(common); - attr.kind = fuser::FileType::Directory; - attr - } - FileAttr::Symlink { common, size } => { - let mut attr = common_to_fuser(common); - attr.size = size; - attr.kind = fuser::FileType::Symlink; - attr - } - FileAttr::CharDevice { common, rdev } => { - let mut attr = common_to_fuser(common); - debug_assert!(u32::try_from(rdev).is_ok(), "rdev value {rdev} too large"); - attr.rdev = rdev - .try_into() - .map_err(|_| { - error!("rdev value {rdev} too large for fuser::FileAttr"); - }) - .unwrap_or(0); - attr.kind = fuser::FileType::CharDevice; - attr - } - FileAttr::BlockDevice { common, rdev } => { - let mut attr = common_to_fuser(common); - debug_assert!(u32::try_from(rdev).is_ok(), "rdev value {rdev} too large"); - attr.rdev = rdev - .try_into() - .map_err(|_| { - error!("rdev value {rdev} too large for fuser::FileAttr"); - }) - .unwrap_or(0); - attr.kind = fuser::FileType::BlockDevice; - attr - } - FileAttr::NamedPipe { common } => { - let mut attr = common_to_fuser(common); - attr.kind = fuser::FileType::NamedPipe; - attr - } - FileAttr::Socket { common } => { - let mut attr = common_to_fuser(common); - attr.kind = fuser::FileType::Socket; - attr - } - } - } -} - -impl From for fuser::FileType { - fn from(val: DirEntryType) -> Self { - match val { - DirEntryType::RegularFile => Self::RegularFile, - DirEntryType::Directory => Self::Directory, - DirEntryType::Symlink => Self::Symlink, - DirEntryType::CharDevice => Self::CharDevice, - DirEntryType::BlockDevice => Self::BlockDevice, - DirEntryType::NamedPipe => Self::NamedPipe, - DirEntryType::Socket => Self::Socket, - } - } -} - -impl From for OpenFlags { - fn from(val: i32) -> Self { - Self::from_bits_truncate(val) - } -} - -pub struct FuserAdapter -where - F::LookupError: Into, - F::GetAttrError: Into, - F::OpenError: Into, - F::ReadError: Into, - F::ReaddirError: Into, - F::ReleaseError: Into, -{ - fs: F, - runtime: tokio::runtime::Handle, -} - -impl FuserAdapter -where - F::LookupError: Into, - F::GetAttrError: Into, - F::OpenError: Into, - F::ReadError: Into, - F::ReaddirError: Into, - F::ReleaseError: Into, -{ - // TODO(markovejnovic): This low TTL is really not ideal. It slows us down a lot, since the - // kernel has to ask us for every single lookup all the time. - // - // I think a better implementation is to implement - // - // notify_inval_inode(ino, offset, len) - // notify_inval_entry(parent_ino, name) - // - // These two functions can be used to invalidate specific entries in the kernel cache when we - // know they have changed. This would allow us to set a much higher TTL here. - const SHAMEFUL_TTL: std::time::Duration = std::time::Duration::from_secs(1); - - pub fn new(fs: F, runtime: tokio::runtime::Handle) -> Self { - Self { fs, runtime } - } -} - -impl fuser::Filesystem for FuserAdapter -where - F::LookupError: Into, - F::GetAttrError: Into, - F::OpenError: Into, - F::ReadError: Into, - F::ReaddirError: Into, - F::ReleaseError: Into, -{ - #[instrument(name = "FuserAdapter::lookup", skip(self, _req, reply))] - fn lookup( - &mut self, - _req: &fuser::Request<'_>, - parent: u64, - name: &OsStr, - reply: fuser::ReplyEntry, - ) { - match self.runtime.block_on(self.fs.lookup(parent, name)) { - Ok(attr) => { - // TODO(markovejnovic): Passing generation = 0 here is a recipe for disaster. - // Someone with A LOT of files will likely see inode reuse which will lead to a - // disaster. - let f_attr: fuser::FileAttr = attr.into(); - debug!(?f_attr, "replying..."); - reply.entry(&Self::SHAMEFUL_TTL, &f_attr, 0); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); - } - } - } - - #[instrument(name = "FuserAdapter::getattr", skip(self, _req, fh, reply))] - fn getattr( - &mut self, - _req: &fuser::Request<'_>, - ino: u64, - fh: Option, - reply: fuser::ReplyAttr, - ) { - match self.runtime.block_on(self.fs.getattr(ino, fh)) { - Ok(attr) => { - debug!(?attr, "replying..."); - reply.attr(&Self::SHAMEFUL_TTL, &attr.into()); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); - } - } - } - - #[instrument(name = "FuserAdapter::readdir", skip(self, _req, _fh, offset, reply))] - fn readdir( - &mut self, - _req: &fuser::Request<'_>, - ino: u64, - _fh: u64, - offset: i64, - mut reply: fuser::ReplyDirectory, - ) { - let entries = match self.runtime.block_on(self.fs.readdir(ino)) { - Ok(entries) => entries, - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); - return; - } - }; - - #[expect( - clippy::cast_possible_truncation, - reason = "fuser offset is i64 but always non-negative" - )] - for (i, entry) in entries - .iter() - .enumerate() - .skip(offset.cast_unsigned() as usize) - { - let kind: fuser::FileType = entry.kind.into(); - let Ok(idx): Result = (i + 1).try_into() else { - error!("Directory entry index {} too large for fuser", i + 1); - reply.error(libc::EIO); - return; - }; - - debug!(?entry, "adding entry to reply..."); - if reply.add(entry.ino, idx, kind, &entry.name) { - debug!("buffer full for now, stopping readdir"); - break; - } - } - - debug!("finalizing reply..."); - reply.ok(); - } - - #[instrument(name = "FuserAdapter::open", skip(self, _req, flags, reply))] - fn open(&mut self, _req: &fuser::Request<'_>, ino: u64, flags: i32, reply: fuser::ReplyOpen) { - match self.runtime.block_on(self.fs.open(ino, flags.into())) { - Ok(open_file) => { - debug!(handle = open_file.handle, "replying..."); - reply.opened(open_file.handle, 0); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); - } - } - } - - #[instrument( - name = "FuserAdapter::read", - skip(self, _req, fh, offset, size, flags, lock_owner, reply) - )] - fn read( - &mut self, - _req: &fuser::Request<'_>, - ino: u64, - fh: u64, - offset: i64, - size: u32, - flags: i32, - lock_owner: Option, - reply: fuser::ReplyData, - ) { - let flags: OpenFlags = flags.into(); - let lock_owner = lock_owner.map(LockOwner); - match self.runtime.block_on(self.fs.read( - ino, - fh, - offset.cast_unsigned(), - size, - flags, - lock_owner, - )) { - Ok(data) => { - debug!(read_bytes = data.len(), "replying..."); - reply.data(&data); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); - } - } - } - - #[instrument(name = "FuserAdapter::release", skip(self, _req, _lock_owner, reply))] - fn release( - &mut self, - _req: &fuser::Request<'_>, - ino: u64, - fh: u64, - flags: i32, - _lock_owner: Option, - flush: bool, - reply: fuser::ReplyEmpty, - ) { - match self - .runtime - .block_on(self.fs.release(ino, fh, flags.into(), flush)) - { - Ok(()) => { - debug!("replying ok"); - reply.ok(); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); - } - } - } - - #[instrument(name = "FuserAdapter::forget", skip(self, _req, nlookup))] - fn forget(&mut self, _req: &fuser::Request<'_>, ino: u64, nlookup: u64) { - self.runtime.block_on(self.fs.forget(ino, nlookup)); - } - - #[instrument(name = "FuserAdapter::statfs", skip(self, _req, _ino, reply))] - fn statfs(&mut self, _req: &fuser::Request<'_>, _ino: u64, reply: fuser::ReplyStatfs) { - self.runtime.block_on(async { - match self.fs.statfs().await { - Ok(statvfs) => { - debug!(?statvfs, "replying..."); - reply.statfs( - statvfs.total_blocks, - statvfs.free_blocks, - statvfs.available_blocks, - statvfs.total_inodes, - statvfs.free_inodes, - statvfs.block_size, - statvfs.max_filename_length, - 0, - ); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.raw_os_error().unwrap_or(libc::EIO)); - } - } - }); - } -} diff --git a/src/fs/icache/async_cache.rs b/src/fs/icache/async_cache.rs deleted file mode 100644 index 84003da3..00000000 --- a/src/fs/icache/async_cache.rs +++ /dev/null @@ -1,1410 +0,0 @@ -//! Async inode cache with InFlight/Available state machine. - -use std::future::Future; - -use scc::HashMap as ConcurrentHashMap; -use tokio::sync::watch; - -use tracing::{instrument, trace, warn}; - -use crate::fs::r#trait::Inode; - -use super::IcbLike; - -/// State of an entry in the async inode cache. -pub enum IcbState { - /// Entry is being loaded; waiters clone the receiver and `.changed().await`. - /// - /// The channel carries `()` rather than the resolved value because the map - /// is the single source of truth: ICBs are mutated in-place (rc, attrs) so - /// a snapshot in the channel would immediately go stale. Sender-drop also - /// gives us implicit, leak-proof signalling on both success and error paths. - InFlight(watch::Receiver<()>), - /// Entry is ready for use. - Available(I), -} - -impl IcbState { - /// Consume `self`, returning the inner value if `Available`, or `None` if `InFlight`. - fn into_available(self) -> Option { - match self { - Self::Available(inner) => Some(inner), - Self::InFlight(_) => None, - } - } -} - -/// Trait for resolving an inode to its control block. -/// -/// Implementations act as a "promise" that an ICB will eventually be produced -/// for a given inode. The cache calls `resolve` when it needs to populate a -/// missing entry. -pub trait IcbResolver: Send + Sync { - /// The inode control block type this resolver produces. - type Icb: IcbLike + Send + Sync; - /// Error type returned when resolution fails. - type Error: Send; - - /// Resolve an inode to a fully-populated control block. - /// - /// - `stub`: `Some(icb)` if upgrading an existing stub entry, `None` if creating - /// from scratch. The stub typically has `parent` and `path` set but `attr` missing. - /// - `cache`: reference to the cache, useful for walking parent chains to build paths. - fn resolve( - &self, - ino: Inode, - stub: Option, - cache: &AsyncICache, - ) -> impl Future> + Send - where - Self: Sized; -} - -/// Async, concurrency-safe inode cache. -/// -/// All methods take `&self` — internal synchronization is provided by -/// `scc::HashMap` (sharded lock-free map). -pub struct AsyncICache { - resolver: R, - inode_table: ConcurrentHashMap>, -} - -impl AsyncICache { - /// Create a new cache with a root ICB at `root_ino` (rc = 1). - pub fn new(resolver: R, root_ino: Inode, root_path: impl Into) -> Self { - let table = ConcurrentHashMap::new(); - // insert_sync is infallible for a fresh map - drop(table.insert_sync( - root_ino, - IcbState::Available(R::Icb::new_root(root_path.into())), - )); - Self { - resolver, - inode_table: table, - } - } - - /// Number of entries (`InFlight` + `Available`) in the table. - pub fn inode_count(&self) -> usize { - self.inode_table.len() - } - - /// Wait until `ino` is `Available`. - /// Returns `true` if the entry exists and is Available, - /// `false` if the entry does not exist. - #[instrument(name = "AsyncICache::wait_for_available", skip(self))] - async fn wait_for_available(&self, ino: Inode) -> bool { - loop { - let rx = self - .inode_table - .read_async(&ino, |_, s| match s { - IcbState::InFlight(rx) => Some(rx.clone()), - IcbState::Available(_) => None, - }) - .await; - - match rx { - None => return false, // key missing - Some(None) => return true, // Available - Some(Some(mut rx)) => { - // Wait for the resolver to complete (or fail/drop sender). - // changed() returns Err(RecvError) when sender is dropped, - // which is fine — it means resolution finished. - let _ = rx.changed().await; - // Loop back — the entry might be InFlight again if another - // resolution cycle started between our wakeup and re-read. - } - } - } - } - - /// Check whether `ino` has an entry in the table (either `InFlight` or `Available`). - /// - /// This is a non-blocking, synchronous check. It does **not** wait for - /// `InFlight` entries to resolve. - pub fn contains(&self, ino: Inode) -> bool { - self.inode_table.contains_sync(&ino) - } - - /// Read an ICB via closure. **Awaits** if `InFlight`. - /// Returns `None` if `ino` doesn't exist. - #[instrument(name = "AsyncICache::get_icb", skip(self, f))] - // `Sync` is required because `f` is held across `.await` points in the - // loop body; for the resulting future to be `Send`, the captured closure - // must be `Sync` (clippy::future_not_send). - pub async fn get_icb( - &self, - ino: Inode, - f: impl Fn(&R::Icb) -> T + Send + Sync, - ) -> Option { - loop { - if !self.wait_for_available(ino).await { - return None; - } - let result = self - .inode_table - .read_async(&ino, |_, state| match state { - IcbState::Available(icb) => Some(f(icb)), - IcbState::InFlight(_) => None, - }) - .await; - match result { - Some(Some(val)) => return Some(val), - Some(None) => {} // was InFlight, retry - None => return None, // key missing - } - } - } - - /// Mutate an ICB via closure. **Awaits** if `InFlight`. - /// Returns `None` if `ino` doesn't exist. - #[instrument(name = "AsyncICache::get_icb_mut", skip(self, f))] - pub async fn get_icb_mut( - &self, - ino: Inode, - mut f: impl FnMut(&mut R::Icb) -> T + Send, - ) -> Option { - loop { - if !self.wait_for_available(ino).await { - return None; - } - let result = self - .inode_table - .update_async(&ino, |_, state| match state { - IcbState::Available(icb) => Some(f(icb)), - IcbState::InFlight(_) => None, - }) - .await; - match result { - Some(Some(val)) => return Some(val), - Some(None) => {} // was InFlight, retry - None => return None, // key missing - } - } - } - - /// Insert an ICB directly as `Available`. If the entry is currently - /// `InFlight`, waits for resolution before overwriting. - #[instrument(name = "AsyncICache::insert_icb", skip(self, icb))] - pub async fn insert_icb(&self, ino: Inode, icb: R::Icb) { - use scc::hash_map::Entry; - let mut icb = Some(icb); - loop { - match self.inode_table.entry_async(ino).await { - Entry::Vacant(vac) => { - let val = icb - .take() - .unwrap_or_else(|| unreachable!("icb consumed more than once")); - vac.insert_entry(IcbState::Available(val)); - return; - } - Entry::Occupied(mut occ) => match occ.get_mut() { - IcbState::InFlight(rx) => { - let mut rx = rx.clone(); - drop(occ); - let _ = rx.changed().await; - } - IcbState::Available(_) => { - let val = icb - .take() - .unwrap_or_else(|| unreachable!("icb consumed more than once")); - *occ.get_mut() = IcbState::Available(val); - return; - } - }, - } - } - } - - /// Get-or-insert pattern. If `ino` exists (awaits `InFlight`), runs `then` - /// on it. If absent, calls `factory` to create, inserts, then runs `then`. - /// - /// Both `factory` and `then` are `FnOnce` — wrapped in `Option` internally - /// to satisfy the borrow checker across the await-loop. - #[instrument(name = "AsyncICache::entry_or_insert_icb", skip(self, factory, then))] - pub async fn entry_or_insert_icb( - &self, - ino: Inode, - factory: impl FnOnce() -> R::Icb, - then: impl FnOnce(&mut R::Icb) -> T, - ) -> T { - use scc::hash_map::Entry; - let mut factory = Some(factory); - let mut then_fn = Some(then); - - loop { - match self.inode_table.entry_async(ino).await { - Entry::Occupied(mut occ) => match occ.get_mut() { - IcbState::Available(icb) => { - let t = then_fn - .take() - .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); - return t(icb); - } - IcbState::InFlight(rx) => { - let mut rx = rx.clone(); - drop(occ); // release shard lock before awaiting - let _ = rx.changed().await; - } - }, - Entry::Vacant(vac) => { - let f = factory - .take() - .unwrap_or_else(|| unreachable!("factory consumed more than once")); - let t = then_fn - .take() - .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); - let mut icb = f(); - let result = t(&mut icb); - vac.insert_entry(IcbState::Available(icb)); - return result; - } - } - } - } - - /// Write an ICB back to the table only if the entry still exists. - /// - /// If the entry was evicted (vacant) during resolution, the result is - /// silently dropped — this prevents resurrecting entries that a concurrent - /// `forget` has already removed. - async fn write_back_if_present(&self, ino: Inode, icb: R::Icb) { - use scc::hash_map::Entry; - match self.inode_table.entry_async(ino).await { - Entry::Occupied(mut occ) => { - *occ.get_mut() = IcbState::Available(icb); - } - Entry::Vacant(_) => { - tracing::debug!( - ino, - "resolved inode was evicted during resolution, dropping result" - ); - } - } - } - - /// Look up `ino`. If `Available` and fully resolved, run `then` and return - /// `Ok(T)`. If `Available` but `needs_resolve()` is true (stub), extract - /// the stub, resolve it, cache the result, then run `then`. If absent, call - /// the resolver to fetch the ICB, cache it, then run `then`. If another task - /// is already resolving this inode (`InFlight`), wait for it. - /// - /// Returns `Err(R::Error)` if resolution fails. On error the `InFlight` - /// entry is removed so subsequent calls can retry. - #[instrument(name = "AsyncICache::get_or_resolve", skip(self, then))] - pub async fn get_or_resolve( - &self, - ino: Inode, - then: impl FnOnce(&R::Icb) -> T, - ) -> Result { - use scc::hash_map::Entry; - - let mut then_fn = Some(then); - - // Fast path: Available and fully resolved - { - let hit = self - .inode_table - .read_async(&ino, |_, s| match s { - IcbState::Available(icb) if !icb.needs_resolve() => { - let t = then_fn - .take() - .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); - Some(t(icb)) - } - IcbState::InFlight(_) | IcbState::Available(_) => None, - }) - .await; - if let Some(Some(r)) = hit { - return Ok(r); - } - } - - // Slow path: missing, InFlight, or stub needing resolution - loop { - match self.inode_table.entry_async(ino).await { - Entry::Occupied(mut occ) => match occ.get_mut() { - IcbState::Available(icb) if !icb.needs_resolve() => { - let t = then_fn - .take() - .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); - return Ok(t(icb)); - } - IcbState::Available(_) => { - // Stub needing resolution — extract stub, replace with InFlight - let (tx, rx) = watch::channel(()); - let old = std::mem::replace(occ.get_mut(), IcbState::InFlight(rx)); - let stub = old.into_available().unwrap_or_else(|| { - unreachable!("matched Available arm, replaced value must be Available") - }); - let fallback = stub.clone(); - drop(occ); // release shard lock before awaiting - - match self.resolver.resolve(ino, Some(stub), self).await { - Ok(icb) => { - let t = then_fn.take().unwrap_or_else(|| { - unreachable!("then_fn consumed more than once") - }); - let result = t(&icb); - self.write_back_if_present(ino, icb).await; - drop(tx); - return Ok(result); - } - Err(e) => { - if fallback.rc() > 0 { - self.write_back_if_present(ino, fallback).await; - } else { - self.inode_table.remove_async(&ino).await; - } - drop(tx); - return Err(e); - } - } - } - IcbState::InFlight(rx) => { - let mut rx = rx.clone(); - drop(occ); - let _ = rx.changed().await; - } - }, - Entry::Vacant(vac) => { - let (tx, rx) = watch::channel(()); - vac.insert_entry(IcbState::InFlight(rx)); - - match self.resolver.resolve(ino, None, self).await { - Ok(icb) => { - let t = then_fn - .take() - .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); - let result = t(&icb); - self.write_back_if_present(ino, icb).await; - drop(tx); - return Ok(result); - } - Err(e) => { - self.inode_table.remove_async(&ino).await; - drop(tx); - return Err(e); - } - } - } - } - } - } - - /// Increment rc. **Awaits** `InFlight`. - /// - /// Returns `None` if the inode does not exist or was evicted concurrently. - /// This can happen when a concurrent `forget` removes the entry between the - /// caller's insert/cache and this `inc_rc` call, or when a concurrent - /// `get_or_resolve` swaps the entry to `InFlight` and the entry is then - /// evicted on resolution failure. Callers in FUSE `lookup` paths should - /// treat `None` as a lookup failure to avoid ref-count leaks (the kernel - /// would hold a reference the cache no longer tracks). - #[instrument(name = "AsyncICache::inc_rc", skip(self))] - pub async fn inc_rc(&self, ino: Inode) -> Option { - loop { - if !self.wait_for_available(ino).await { - warn!(ino, "inc_rc: inode not in table"); - return None; - } - let result = self - .inode_table - .update_async(&ino, |_, state| match state { - IcbState::Available(icb) => { - *icb.rc_mut() += 1; - Some(icb.rc()) - } - IcbState::InFlight(_) => None, - }) - .await - .flatten(); - - match result { - Some(rc) => return Some(rc), - None => { - // Entry was concurrently replaced with InFlight or evicted. - if !self.contains(ino) { - warn!(ino, "inc_rc: inode evicted concurrently"); - return None; - } - // Entry exists but became InFlight — retry. - } - } - } - } - - /// Decrement rc by `nlookups`. If rc drops to zero, evicts and returns - /// the ICB. **Awaits** `InFlight` entries. - #[instrument(name = "AsyncICache::forget", skip(self))] - pub async fn forget(&self, ino: Inode, nlookups: u64) -> Option { - use scc::hash_map::Entry; - - loop { - match self.inode_table.entry_async(ino).await { - Entry::Occupied(mut occ) => match occ.get_mut() { - IcbState::Available(icb) => { - if icb.rc() <= nlookups { - trace!(ino, "evicting inode"); - let (_, state) = occ.remove_entry(); - return state.into_available(); - } - *icb.rc_mut() -= nlookups; - trace!(ino, new_rc = icb.rc(), "decremented rc"); - return None; - } - IcbState::InFlight(rx) => { - let mut rx = rx.clone(); - drop(occ); - let _ = rx.changed().await; - } - }, - Entry::Vacant(_) => { - warn!(ino, "forget on unknown inode"); - return None; - } - } - } - } - - /// Synchronous mutable access to an `Available` entry. - /// Does **not** wait for `InFlight`. Intended for initialization. - pub fn get_icb_mut_sync(&self, ino: Inode, f: impl FnOnce(&mut R::Icb) -> T) -> Option { - self.inode_table - .update_sync(&ino, |_, state| match state { - IcbState::Available(icb) => Some(f(icb)), - IcbState::InFlight(_) => None, - }) - .flatten() - } - - /// Iterate over all `Available` entries (skips `InFlight`). - /// Async-safe iteration using `iter_async` to avoid contention on single-threaded runtimes. - pub async fn for_each(&self, mut f: impl FnMut(&Inode, &R::Icb)) { - self.inode_table - .iter_async(|ino, state| { - if let IcbState::Available(icb) = state { - f(ino, icb); - } - true // continue iteration - }) - .await; - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::collections::HashMap as StdHashMap; - use std::path::PathBuf; - use std::sync::atomic::Ordering; - use std::sync::{Arc, Mutex}; - - #[derive(Debug, Clone, PartialEq)] - struct TestIcb { - rc: u64, - path: PathBuf, - resolved: bool, - } - - impl IcbLike for TestIcb { - fn new_root(path: PathBuf) -> Self { - Self { - rc: 1, - path, - resolved: true, - } - } - fn rc(&self) -> u64 { - self.rc - } - fn rc_mut(&mut self) -> &mut u64 { - &mut self.rc - } - fn needs_resolve(&self) -> bool { - !self.resolved - } - } - - struct TestResolver { - responses: Mutex>>, - } - - impl TestResolver { - fn new() -> Self { - Self { - responses: Mutex::new(StdHashMap::new()), - } - } - - fn add(&self, ino: Inode, icb: TestIcb) { - self.responses - .lock() - .expect("test mutex") - .insert(ino, Ok(icb)); - } - - fn add_err(&self, ino: Inode, err: impl Into) { - self.responses - .lock() - .expect("test mutex") - .insert(ino, Err(err.into())); - } - } - - impl IcbResolver for TestResolver { - type Icb = TestIcb; - type Error = String; - - fn resolve( - &self, - ino: Inode, - _stub: Option, - _cache: &AsyncICache, - ) -> impl Future> + Send { - let result = self - .responses - .lock() - .expect("test mutex") - .remove(&ino) - .unwrap_or_else(|| Err(format!("no response for inode {ino}"))); - async move { result } - } - } - - fn test_cache() -> AsyncICache { - AsyncICache::new(TestResolver::new(), 1, "/root") - } - - fn test_cache_with(resolver: TestResolver) -> AsyncICache { - AsyncICache::new(resolver, 1, "/root") - } - - #[tokio::test] - async fn contains_returns_true_for_root() { - let cache = test_cache(); - assert!(cache.contains(1), "root should exist"); - } - - #[tokio::test] - async fn contains_returns_false_for_missing() { - let cache = test_cache(); - assert!(!cache.contains(999), "missing inode should not exist"); - } - - #[tokio::test] - async fn contains_after_resolver_completes() { - let resolver = TestResolver::new(); - resolver.add( - 42, - TestIcb { - rc: 1, - path: "/test".into(), - resolved: true, - }, - ); - let cache = Arc::new(test_cache_with(resolver)); - - // Trigger resolve in background - let cache2 = Arc::clone(&cache); - let handle = tokio::spawn(async move { cache2.get_or_resolve(42, |_| ()).await }); - - handle - .await - .expect("task panicked") - .expect("resolve failed"); - assert!(cache.contains(42), "should be true after resolve"); - } - - #[tokio::test] - async fn new_creates_root_entry() { - let cache = test_cache(); - assert_eq!(cache.inode_count(), 1, "should have exactly 1 entry"); - } - - #[tokio::test] - async fn get_icb_returns_value() { - let cache = test_cache(); - let path = cache.get_icb(1, |icb| icb.path.clone()).await; - assert_eq!(path, Some(PathBuf::from("/root"))); - } - - #[tokio::test] - async fn get_icb_returns_none_for_missing() { - let cache = test_cache(); - let result = cache.get_icb(999, IcbLike::rc).await; - assert_eq!(result, None, "missing inode should return None"); - } - - #[tokio::test] - async fn get_icb_mut_modifies_value() { - let cache = test_cache(); - cache - .get_icb_mut(1, |icb| { - *icb.rc_mut() += 10; - }) - .await; - let rc = cache.get_icb(1, IcbLike::rc).await; - assert_eq!(rc, Some(11), "root starts at rc=1, +10 = 11"); - } - - #[tokio::test] - async fn get_icb_after_resolver_completes() { - let resolver = TestResolver::new(); - resolver.add( - 42, - TestIcb { - rc: 1, - path: "/loaded".into(), - resolved: true, - }, - ); - let cache = test_cache_with(resolver); - - // Resolve inode 42 - cache - .get_or_resolve(42, |_| ()) - .await - .expect("resolve failed"); - - let path = cache.get_icb(42, |icb| icb.path.clone()).await; - assert_eq!(path, Some(PathBuf::from("/loaded"))); - } - - #[tokio::test] - async fn insert_icb_adds_entry() { - let cache = test_cache(); - cache - .insert_icb( - 42, - TestIcb { - rc: 1, - path: "/foo".into(), - resolved: true, - }, - ) - .await; - assert!(cache.contains(42), "inserted entry should exist"); - assert_eq!(cache.inode_count(), 2, "root + inserted = 2"); - } - - #[tokio::test] - async fn insert_icb_does_not_clobber_inflight() { - let cache = Arc::new(test_cache()); - let (tx, rx) = watch::channel(()); - cache - .inode_table - .upsert_async(42, IcbState::InFlight(rx)) - .await; - - // Spawn insert_icb in background — should wait for InFlight to resolve - let cache2 = Arc::clone(&cache); - let handle = tokio::spawn(async move { - cache2 - .insert_icb( - 42, - TestIcb { - rc: 5, - path: "/inserted".into(), - resolved: true, - }, - ) - .await; - }); - - // Give insert_icb time to start waiting - tokio::task::yield_now().await; - - // Complete the InFlight from the resolver side (write directly) - cache - .inode_table - .upsert_async( - 42, - IcbState::Available(TestIcb { - rc: 1, - path: "/resolved".into(), - resolved: true, - }), - ) - .await; - drop(tx); // signal watchers - - handle.await.expect("task panicked"); - - // After insert_icb completes, it should have overwritten the resolved value - let path = cache.get_icb(42, |icb| icb.path.clone()).await; - assert_eq!(path, Some(PathBuf::from("/inserted"))); - } - - #[tokio::test] - async fn entry_or_insert_creates_new() { - let cache = test_cache(); - let rc = cache - .entry_or_insert_icb( - 42, - || TestIcb { - rc: 0, - path: "/new".into(), - resolved: true, - }, - |icb| { - *icb.rc_mut() += 1; - icb.rc() - }, - ) - .await; - assert_eq!(rc, 1, "factory creates rc=0, then +1 = 1"); - } - - #[tokio::test] - async fn entry_or_insert_returns_existing() { - let cache = test_cache(); - cache - .insert_icb( - 42, - TestIcb { - rc: 5, - path: "/existing".into(), - resolved: true, - }, - ) - .await; - - let rc = cache - .entry_or_insert_icb( - 42, - || panic!("factory should not be called"), - |icb| icb.rc(), - ) - .await; - assert_eq!(rc, 5, "existing entry rc should be 5"); - } - - #[tokio::test] - async fn entry_or_insert_after_resolver_completes() { - let resolver = TestResolver::new(); - resolver.add( - 42, - TestIcb { - rc: 1, - path: "/resolved".into(), - resolved: true, - }, - ); - let cache = Arc::new(test_cache_with(resolver)); - - // Start resolve in background - let cache2 = Arc::clone(&cache); - let resolve_handle = tokio::spawn(async move { cache2.get_or_resolve(42, |_| ()).await }); - - // Wait for resolve to finish - resolve_handle - .await - .expect("task panicked") - .expect("resolve failed"); - - // Now entry_or_insert should find the existing entry - let rc = cache - .entry_or_insert_icb( - 42, - || panic!("factory should not be called"), - |icb| icb.rc(), - ) - .await; - assert_eq!(rc, 1, "should find the resolved entry"); - } - - #[tokio::test] - async fn inc_rc_increments() { - let cache = test_cache(); - cache - .insert_icb( - 42, - TestIcb { - rc: 1, - path: "/a".into(), - resolved: true, - }, - ) - .await; - let new_rc = cache.inc_rc(42).await; - assert_eq!(new_rc, Some(2), "rc 1 + 1 = 2"); - } - - #[tokio::test] - async fn forget_decrements_rc() { - let cache = test_cache(); - cache - .insert_icb( - 42, - TestIcb { - rc: 5, - path: "/a".into(), - resolved: true, - }, - ) - .await; - - let evicted = cache.forget(42, 2).await; - assert!(evicted.is_none(), "rc 5 - 2 = 3, should not evict"); - - let rc = cache.get_icb(42, IcbLike::rc).await; - assert_eq!(rc, Some(3), "rc should be 3 after forget(2)"); - } - - #[tokio::test] - async fn forget_evicts_when_rc_drops_to_zero() { - let cache = test_cache(); - cache - .insert_icb( - 42, - TestIcb { - rc: 3, - path: "/a".into(), - resolved: true, - }, - ) - .await; - - let evicted = cache.forget(42, 3).await; - assert!(evicted.is_some(), "rc 3 - 3 = 0, should evict"); - assert!(!cache.contains(42), "evicted entry should be gone"); - assert_eq!(cache.inode_count(), 1, "only root remains"); - } - - #[tokio::test] - async fn forget_unknown_inode_returns_none() { - let cache = test_cache(); - let evicted = cache.forget(999, 1).await; - assert!(evicted.is_none(), "unknown inode should return None"); - } - - #[tokio::test] - async fn for_each_iterates_available_entries() { - let cache = test_cache(); - cache - .insert_icb( - 2, - TestIcb { - rc: 1, - path: "/a".into(), - resolved: true, - }, - ) - .await; - cache - .insert_icb( - 3, - TestIcb { - rc: 1, - path: "/b".into(), - resolved: true, - }, - ) - .await; - - let mut seen = std::collections::HashSet::new(); - cache - .for_each(|ino, _icb| { - seen.insert(*ino); - }) - .await; - assert_eq!(seen.len(), 3, "should see all 3 entries"); - assert!(seen.contains(&1), "should contain root"); - assert!(seen.contains(&2), "should contain inode 2"); - assert!(seen.contains(&3), "should contain inode 3"); - } - - #[tokio::test] - async fn for_each_skips_inflight() { - let cache = test_cache(); - // Directly insert an InFlight entry for testing iteration - let (_tx, rx) = watch::channel(()); - cache - .inode_table - .upsert_async(42, IcbState::InFlight(rx)) - .await; - - let mut count = 0; - cache - .for_each(|_, _| { - count += 1; - }) - .await; - assert_eq!(count, 1, "only root, not the InFlight entry"); - } - - #[tokio::test] - async fn wait_does_not_miss_signal_on_immediate_complete() { - let cache = Arc::new(test_cache()); - - // Insert InFlight manually, then immediately complete before anyone waits - let (tx, rx) = watch::channel(()); - cache - .inode_table - .upsert_async(42, IcbState::InFlight(rx)) - .await; - - // Complete before any waiter (simulate resolver by writing directly) - cache - .inode_table - .upsert_async( - 42, - IcbState::Available(TestIcb { - rc: 1, - path: "/fast".into(), - resolved: true, - }), - ) - .await; - drop(tx); - - assert!(cache.contains(42), "entry should exist in table"); - } - - // -- get_or_resolve tests -- - - #[tokio::test] - async fn get_or_resolve_returns_existing() { - let cache = test_cache(); - cache - .insert_icb( - 42, - TestIcb { - rc: 1, - path: "/existing".into(), - resolved: true, - }, - ) - .await; - - let path: Result = cache.get_or_resolve(42, |icb| icb.path.clone()).await; - assert_eq!(path, Ok(PathBuf::from("/existing"))); - } - - #[tokio::test] - async fn get_or_resolve_resolves_missing() { - let resolver = TestResolver::new(); - resolver.add( - 42, - TestIcb { - rc: 1, - path: "/resolved".into(), - resolved: true, - }, - ); - let cache = test_cache_with(resolver); - - let path: Result = cache.get_or_resolve(42, |icb| icb.path.clone()).await; - assert_eq!(path, Ok(PathBuf::from("/resolved"))); - // Should now be cached - assert!(cache.contains(42)); - } - - #[tokio::test] - async fn get_or_resolve_propagates_error() { - let resolver = TestResolver::new(); - resolver.add_err(42, "network error"); - let cache = test_cache_with(resolver); - - let result: Result = - cache.get_or_resolve(42, |icb| icb.path.clone()).await; - assert_eq!(result, Err("network error".to_owned())); - // Entry should be cleaned up on error - assert!(!cache.contains(42)); - } - - struct CountingResolver { - count: Arc, - } - - impl IcbResolver for CountingResolver { - type Icb = TestIcb; - type Error = String; - - fn resolve( - &self, - _ino: Inode, - _stub: Option, - _cache: &AsyncICache, - ) -> impl Future> + Send { - self.count.fetch_add(1, Ordering::SeqCst); - async { - tokio::task::yield_now().await; - Ok(TestIcb { - rc: 1, - path: "/coalesced".into(), - resolved: true, - }) - } - } - } - - #[tokio::test] - async fn get_or_resolve_coalesces_concurrent_requests() { - use std::sync::atomic::AtomicUsize; - - let resolve_count = Arc::new(AtomicUsize::new(0)); - - let cache = Arc::new(AsyncICache::new( - CountingResolver { - count: Arc::clone(&resolve_count), - }, - 1, - "/root", - )); - - let mut handles = Vec::new(); - for _ in 0..5 { - let c = Arc::clone(&cache); - handles.push(tokio::spawn(async move { - c.get_or_resolve(42, |icb| icb.path.clone()).await - })); - } - - for h in handles { - assert_eq!( - h.await.expect("task panicked"), - Ok(PathBuf::from("/coalesced")), - ); - } - - // Resolver should only have been called ONCE (not 5 times) - assert_eq!( - resolve_count.load(Ordering::SeqCst), - 1, - "should coalesce to 1 resolve call" - ); - } - - #[test] - fn icb_state_into_available_returns_inner() { - let state = IcbState::Available(TestIcb { - rc: 1, - path: "/test".into(), - resolved: true, - }); - assert!(state.into_available().is_some()); - } - - #[test] - fn icb_state_into_available_returns_none_for_inflight() { - let (_tx, rx) = watch::channel(()); - let state: IcbState = IcbState::InFlight(rx); - assert!(state.into_available().is_none()); - } - - #[tokio::test] - async fn get_or_resolve_resolves_stub_entry() { - let resolver = TestResolver::new(); - resolver.add( - 42, - TestIcb { - rc: 1, - path: "/resolved".into(), - resolved: true, - }, - ); - let cache = test_cache_with(resolver); - - // Insert unresolved stub - cache - .insert_icb( - 42, - TestIcb { - rc: 0, - path: "/stub".into(), - resolved: false, - }, - ) - .await; - - // get_or_resolve should trigger resolution because needs_resolve() == true - let path: Result = cache.get_or_resolve(42, |icb| icb.path.clone()).await; - assert_eq!(path, Ok(PathBuf::from("/resolved"))); - } - - #[tokio::test] - async fn forget_handles_inflight_entry() { - let cache = Arc::new(test_cache()); - let (tx, rx) = watch::channel(()); - cache - .inode_table - .upsert_async(42, IcbState::InFlight(rx)) - .await; - - let cache2 = Arc::clone(&cache); - let handle = tokio::spawn(async move { cache2.forget(42, 1).await }); - - // Give forget time to start waiting - tokio::task::yield_now().await; - - // Simulate resolver completing (write directly to inode_table) - cache - .inode_table - .upsert_async( - 42, - IcbState::Available(TestIcb { - rc: 3, - path: "/inflight".into(), - resolved: true, - }), - ) - .await; - drop(tx); - - let evicted = handle.await.expect("task panicked"); - assert!(evicted.is_none(), "rc=3 - 1 = 2, should not evict"); - - let rc = cache.get_icb(42, IcbLike::rc).await; - assert_eq!(rc, Some(2), "rc should be 2 after forget(1) on rc=3"); - } - - #[tokio::test] - async fn get_or_resolve_error_preserves_stub_with_nonzero_rc() { - let resolver = TestResolver::new(); - resolver.add_err(42, "resolve failed"); - let cache = test_cache_with(resolver); - - // Insert a stub with rc=2 (simulates a looked-up entry needing resolution) - cache - .insert_icb( - 42, - TestIcb { - rc: 2, - path: "/stub".into(), - resolved: false, - }, - ) - .await; - - // get_or_resolve should fail - let result: Result = - cache.get_or_resolve(42, |icb| icb.path.clone()).await; - assert!(result.is_err(), "should propagate resolver error"); - - // The stub should be preserved since rc > 0 - assert!(cache.contains(42), "entry with rc=2 should survive error"); - let rc = cache.get_icb(42, IcbLike::rc).await; - assert_eq!(rc, Some(2), "rc should be preserved"); - } - - #[tokio::test] - async fn inc_rc_missing_inode_returns_none() { - let cache = test_cache(); - assert_eq!(cache.inc_rc(999).await, None); - } - - #[tokio::test] - async fn inc_rc_waits_for_inflight() { - let cache = Arc::new(test_cache()); - let (tx, rx) = watch::channel(()); - cache - .inode_table - .upsert_async(42, IcbState::InFlight(rx)) - .await; - - let cache2 = Arc::clone(&cache); - let handle = tokio::spawn(async move { cache2.inc_rc(42).await }); - - // Simulate resolver completing by writing directly to inode_table - cache - .inode_table - .upsert_async( - 42, - IcbState::Available(TestIcb { - rc: 1, - path: "/a".into(), - resolved: true, - }), - ) - .await; - drop(tx); - - let result = handle - .await - .unwrap_or_else(|e| panic!("task panicked: {e}")); - assert_eq!( - result, - Some(2), - "waited for Available, then incremented 1 -> 2" - ); - } - - #[tokio::test] - async fn inc_rc_returns_none_after_concurrent_eviction() { - let cache = Arc::new(test_cache()); - let (tx, rx) = watch::channel(()); - cache - .inode_table - .upsert_async(42, IcbState::InFlight(rx)) - .await; - - let cache2 = Arc::clone(&cache); - let handle = tokio::spawn(async move { cache2.inc_rc(42).await }); - - // Evict instead of completing - cache.inode_table.remove_async(&42).await; - drop(tx); - - let result = handle - .await - .unwrap_or_else(|e| panic!("task panicked: {e}")); - assert_eq!(result, None, "evicted entry should return None"); - } - - /// Resolver that pauses mid-resolution via a `Notify`, allowing the test - /// to interleave a `forget` while the resolve future is suspended. - struct SlowResolver { - /// Signalled by the resolver once it has started (so the test knows - /// resolution is in progress). - started: Arc, - /// The resolver waits on this before returning (the test signals it - /// after calling `forget`). - proceed: Arc, - } - - impl IcbResolver for SlowResolver { - type Icb = TestIcb; - type Error = String; - - fn resolve( - &self, - _ino: Inode, - _stub: Option, - _cache: &AsyncICache, - ) -> impl Future> + Send { - let started = Arc::clone(&self.started); - let proceed = Arc::clone(&self.proceed); - async move { - started.notify_one(); - proceed.notified().await; - Ok(TestIcb { - rc: 1, - path: "/slow-resolved".into(), - resolved: true, - }) - } - } - } - - /// Regression test: `get_icb` must survive the entry cycling back to - /// `InFlight` between when `wait_for_available` returns and when - /// `read_async` runs. The loop in `get_icb` should retry and eventually - /// return the final resolved value. - #[tokio::test] - async fn wait_for_available_retries_on_re_inflight() { - let cache = Arc::new(test_cache()); - let ino: Inode = 42; - - // Phase 1: insert an InFlight entry. - let (tx1, rx1) = watch::channel(()); - cache - .inode_table - .upsert_async(ino, IcbState::InFlight(rx1)) - .await; - - // Spawn get_icb — it will wait for InFlight to resolve. - let cache_get = Arc::clone(&cache); - let get_handle = - tokio::spawn(async move { cache_get.get_icb(ino, |icb| icb.path.clone()).await }); - - // Give get_icb time to start waiting on the watch channel. - tokio::task::yield_now().await; - - // Phase 1 complete: transition to Available briefly, then immediately - // back to InFlight (simulates get_or_resolve finding a stub and - // re-entering InFlight for a second resolution). - let (tx2, rx2) = watch::channel(()); - cache - .inode_table - .upsert_async(ino, IcbState::InFlight(rx2)) - .await; - // Signal phase-1 watchers so get_icb wakes up; it will re-read the - // entry and find InFlight again, then loop back to wait. - drop(tx1); - - // Give get_icb time to re-enter the wait loop. - tokio::task::yield_now().await; - - // Phase 2 complete: write the final resolved value. - cache - .inode_table - .upsert_async( - ino, - IcbState::Available(TestIcb { - rc: 1, - path: "/fully-resolved".into(), - resolved: true, - }), - ) - .await; - drop(tx2); - - // get_icb should return the final resolved value (not None). - let result = get_handle.await.expect("get_icb task panicked"); - assert_eq!( - result, - Some(PathBuf::from("/fully-resolved")), - "get_icb must survive re-InFlight and return the final resolved value" - ); - } - - /// Regression test: an entry evicted by `forget` during an in-progress - /// `get_or_resolve` must NOT be resurrected when resolution completes. - #[tokio::test] - async fn get_or_resolve_does_not_resurrect_evicted_entry() { - let started = Arc::new(tokio::sync::Notify::new()); - let proceed = Arc::new(tokio::sync::Notify::new()); - - let cache = Arc::new(AsyncICache::new( - SlowResolver { - started: Arc::clone(&started), - proceed: Arc::clone(&proceed), - }, - 1, - "/root", - )); - - let ino: Inode = 42; - - // Insert a stub with rc=1 (simulates a looked-up, unresolved entry). - cache - .insert_icb( - ino, - TestIcb { - rc: 1, - path: "/stub".into(), - resolved: false, - }, - ) - .await; - - // Spawn get_or_resolve which will trigger slow resolution. - let cache2 = Arc::clone(&cache); - let resolve_handle = - tokio::spawn(async move { cache2.get_or_resolve(ino, |icb| icb.path.clone()).await }); - - // Wait until the resolver has started (entry is now InFlight). - started.notified().await; - - // Evict the entry while resolution is in progress. - // forget waits for InFlight, so we need to complete resolution for - // forget to proceed. Instead, remove the InFlight entry directly to - // simulate a concurrent eviction (e.g., by another path that already - // removed the entry). - cache.inode_table.remove_async(&ino).await; - - // Let the resolver finish. - proceed.notify_one(); - - // Wait for get_or_resolve to complete. - drop(resolve_handle.await.expect("task panicked")); - - // The entry must NOT have been resurrected by write_back_if_present. - assert!( - !cache.contains(ino), - "evicted entry must not be resurrected after resolution completes" - ); - } -} diff --git a/src/fs/icache/bridge.rs b/src/fs/icache/bridge.rs deleted file mode 100644 index e674a564..00000000 --- a/src/fs/icache/bridge.rs +++ /dev/null @@ -1,138 +0,0 @@ -use crate::fs::r#trait::{FileAttr, FileHandle, Inode}; - -/// Bidirectional bridge for both inodes and file handles between two Fs layers. -/// -/// Convention: **left = outer (caller), right = inner (callee)**. -/// `forward(left)` → right, `backward(right)` → left. -pub struct HashMapBridge { - inode_map: bimap::BiMap, - fh_map: bimap::BiMap, -} - -impl HashMapBridge { - pub fn new() -> Self { - Self { - inode_map: bimap::BiMap::new(), - fh_map: bimap::BiMap::new(), - } - } - - // ── Inode methods ──────────────────────────────────────────────────── - - pub fn insert_inode(&mut self, left: Inode, right: Inode) { - self.inode_map.insert(left, right); - } - - /// Look up right→left, or allocate a new left inode if unmapped. - pub fn backward_or_insert_inode( - &mut self, - right: Inode, - allocate: impl FnOnce() -> Inode, - ) -> Inode { - if let Some(&left) = self.inode_map.get_by_right(&right) { - left - } else { - let left = allocate(); - self.inode_map.insert(left, right); - left - } - } - - /// Look up left→right, or allocate a new right inode if unmapped. - pub fn forward_or_insert_inode( - &mut self, - left: Inode, - allocate: impl FnOnce() -> Inode, - ) -> Inode { - if let Some(&right) = self.inode_map.get_by_left(&left) { - right - } else { - let right = allocate(); - self.inode_map.insert(left, right); - right - } - } - - /// Remove an inode mapping by its left (outer) key. - pub fn remove_inode_by_left(&mut self, left: Inode) { - self.inode_map.remove_by_left(&left); - } - - /// Look up left→right directly. - pub fn inode_map_get_by_left(&self, left: Inode) -> Option<&Inode> { - self.inode_map.get_by_left(&left) - } - - /// Rewrite the `ino` field in a [`FileAttr`] from right (inner) to left (outer) namespace. - pub fn attr_backward(&self, attr: FileAttr) -> FileAttr { - let backward = |ino: Inode| -> Inode { - if let Some(&left) = self.inode_map.get_by_right(&ino) { - left - } else { - tracing::warn!( - inner_ino = ino, - "attr_backward: no bridge mapping, using raw inner inode" - ); - ino - } - }; - rewrite_attr_ino(attr, backward) - } - - // ── File handle methods ────────────────────────────────────────────── - - pub fn insert_fh(&mut self, left: FileHandle, right: FileHandle) { - self.fh_map.insert(left, right); - } - - pub fn fh_forward(&self, left: FileHandle) -> Option { - self.fh_map.get_by_left(&left).copied() - } - - /// Remove a file handle mapping by its left (outer) key. - pub fn remove_fh_by_left(&mut self, left: FileHandle) { - self.fh_map.remove_by_left(&left); - } -} - -/// Rewrite the `ino` field in a [`FileAttr`] using the given translation function. -fn rewrite_attr_ino(attr: FileAttr, translate: impl Fn(Inode) -> Inode) -> FileAttr { - match attr { - FileAttr::RegularFile { - mut common, - size, - blocks, - } => { - common.ino = translate(common.ino); - FileAttr::RegularFile { - common, - size, - blocks, - } - } - FileAttr::Directory { mut common } => { - common.ino = translate(common.ino); - FileAttr::Directory { common } - } - FileAttr::Symlink { mut common, size } => { - common.ino = translate(common.ino); - FileAttr::Symlink { common, size } - } - FileAttr::CharDevice { mut common, rdev } => { - common.ino = translate(common.ino); - FileAttr::CharDevice { common, rdev } - } - FileAttr::BlockDevice { mut common, rdev } => { - common.ino = translate(common.ino); - FileAttr::BlockDevice { common, rdev } - } - FileAttr::NamedPipe { mut common } => { - common.ino = translate(common.ino); - FileAttr::NamedPipe { common } - } - FileAttr::Socket { mut common } => { - common.ino = translate(common.ino); - FileAttr::Socket { common } - } - } -} diff --git a/src/fs/icache/file_table.rs b/src/fs/icache/file_table.rs deleted file mode 100644 index 332a6ffb..00000000 --- a/src/fs/icache/file_table.rs +++ /dev/null @@ -1,22 +0,0 @@ -use std::sync::atomic::{AtomicU64, Ordering}; - -use crate::fs::r#trait::FileHandle; - -/// Monotonically increasing file handle allocator. -#[must_use] -pub struct FileTable { - next_fh: AtomicU64, -} - -impl FileTable { - pub fn new() -> Self { - Self { - next_fh: AtomicU64::new(1), - } - } - - #[must_use] - pub fn allocate(&self) -> FileHandle { - self.next_fh.fetch_add(1, Ordering::Relaxed) - } -} diff --git a/src/fs/icache/inode_factory.rs b/src/fs/icache/inode_factory.rs deleted file mode 100644 index 1a603388..00000000 --- a/src/fs/icache/inode_factory.rs +++ /dev/null @@ -1,19 +0,0 @@ -use crate::fs::r#trait::Inode; -use std::sync::atomic::{AtomicU64, Ordering}; - -/// Monotonically increasing inode allocator. -pub struct InodeFactory { - next_inode: AtomicU64, -} - -impl InodeFactory { - pub fn new(start: Inode) -> Self { - Self { - next_inode: AtomicU64::new(start), - } - } - - pub fn allocate(&self) -> Inode { - self.next_inode.fetch_add(1, Ordering::Relaxed) - } -} diff --git a/src/fs/icache/mod.rs b/src/fs/icache/mod.rs deleted file mode 100644 index 2ccd80bd..00000000 --- a/src/fs/icache/mod.rs +++ /dev/null @@ -1,21 +0,0 @@ -//! Generic directory cache and inode management primitives. - -pub mod async_cache; -pub mod bridge; -mod file_table; -mod inode_factory; - -pub use async_cache::AsyncICache; -pub use async_cache::IcbResolver; -pub use file_table::FileTable; -pub use inode_factory::InodeFactory; - -/// Common interface for inode control block types usable with `ICache`. -pub trait IcbLike: Clone { - /// Create an ICB with rc=1, the given path, and no children. - fn new_root(path: std::path::PathBuf) -> Self; - fn rc(&self) -> u64; - fn rc_mut(&mut self) -> &mut u64; - /// Returns true if this entry needs resolution (e.g., attr not yet fetched). - fn needs_resolve(&self) -> bool; -} diff --git a/src/fs/mescloud/common.rs b/src/fs/mescloud/common.rs index 340b5887..6e9c8bf8 100644 --- a/src/fs/mescloud/common.rs +++ b/src/fs/mescloud/common.rs @@ -1,12 +1,12 @@ //! Shared types and helpers used by both `MesaFS` and `RepoFs`. +use std::ffi::{OsStr, OsString}; + +use bytes::Bytes; +use git_fs::fs::{FileHandle, INode, InodeAddr, OpenFlags as LibOpenFlags}; use mesa_dev::low_level::apis; use thiserror::Error; -use crate::fs::r#trait::{FileAttr, Inode}; - -pub(super) use super::icache::InodeControlBlock; - /// A concrete error type that preserves the structure of `mesa_dev::low_level::apis::Error` /// without the generic parameter. #[derive(Debug, Error)] @@ -51,50 +51,22 @@ pub enum LookupError { #[error("inode not found")] InodeNotFound, - #[error("file does not exist")] - FileDoesNotExist, - #[error("remote mesa error")] RemoteMesaError(#[from] MesaApiError), } -impl From for i32 { - fn from(e: LookupError) -> Self { - match e { - LookupError::InodeNotFound | LookupError::FileDoesNotExist => libc::ENOENT, - LookupError::RemoteMesaError(_) => libc::EIO, - } - } -} - #[derive(Debug, Error)] pub enum GetAttrError { #[error("inode not found")] InodeNotFound, } -impl From for i32 { - fn from(e: GetAttrError) -> Self { - match e { - GetAttrError::InodeNotFound => libc::ENOENT, - } - } -} - -#[derive(Debug, Error)] +#[derive(Debug, Clone, Copy, Error)] pub enum OpenError { #[error("inode not found")] InodeNotFound, } -impl From for i32 { - fn from(e: OpenError) -> Self { - match e { - OpenError::InodeNotFound => libc::ENOENT, - } - } -} - #[derive(Debug, Error)] pub enum ReadError { #[error("file not open")] @@ -113,17 +85,6 @@ pub enum ReadError { Base64Decode(#[from] base64::DecodeError), } -impl From for i32 { - fn from(e: ReadError) -> Self { - match e { - ReadError::FileNotOpen => libc::EBADF, - ReadError::InodeNotFound => libc::ENOENT, - ReadError::RemoteMesaError(_) | ReadError::Base64Decode(_) => libc::EIO, - ReadError::NotAFile => libc::EISDIR, - } - } -} - #[derive(Debug, Error)] pub enum ReadDirError { #[error("inode not found")] @@ -143,18 +104,7 @@ impl From for ReadDirError { fn from(e: LookupError) -> Self { match e { LookupError::RemoteMesaError(api) => Self::RemoteMesaError(api), - LookupError::InodeNotFound | LookupError::FileDoesNotExist => Self::InodeNotFound, - } - } -} - -impl From for i32 { - fn from(e: ReadDirError) -> Self { - match e { - ReadDirError::InodeNotFound => libc::ENOENT, - ReadDirError::RemoteMesaError(_) => libc::EIO, - ReadDirError::NotADirectory => libc::ENOTDIR, - ReadDirError::NotPermitted => libc::EPERM, + LookupError::InodeNotFound => Self::InodeNotFound, } } } @@ -165,18 +115,38 @@ pub enum ReleaseError { FileNotOpen, } -impl From for i32 { - fn from(e: ReleaseError) -> Self { - match e { - ReleaseError::FileNotOpen => libc::EBADF, - } - } +/// A directory entry for readdir results, using lib types. +pub struct FsDirEntry { + pub ino: InodeAddr, + pub name: OsString, } -/// Allows a parent compositor to peek at cached attrs from a child filesystem. +/// Trait for child filesystems composed by [`CompositeFs`](super::composite::CompositeFs). +/// +/// Uses lib types (`INode`, `InodeAddr`) directly — no conversion to/from `FileAttr`. +/// Replaces the old `Fs + InodeCachePeek` bound. #[async_trait::async_trait] -pub(super) trait InodeCachePeek { - async fn peek_attr(&self, ino: Inode) -> Option; +pub(super) trait ChildFs: Send + Sync { + /// Look up a child by name within the given parent directory. + async fn lookup(&mut self, parent: InodeAddr, name: &OsStr) -> Result; + + /// List all children of a directory, returning full `INode` data for each. + async fn readdir(&mut self, ino: InodeAddr) -> Result, ReadDirError>; + + /// Open a file for reading. + async fn open(&mut self, ino: InodeAddr, flags: LibOpenFlags) -> Result; + + /// Read data from an open file. + async fn read( + &mut self, + ino: InodeAddr, + fh: FileHandle, + offset: u64, + size: u32, + ) -> Result; + + /// Release (close) a file handle. + async fn release(&mut self, ino: InodeAddr, fh: FileHandle) -> Result<(), ReleaseError>; } #[cfg(test)] @@ -189,12 +159,6 @@ mod tests { assert!(matches!(err, ReadDirError::InodeNotFound)); } - #[test] - fn lookup_file_does_not_exist_converts_to_readdir_inode_not_found() { - let err: ReadDirError = LookupError::FileDoesNotExist.into(); - assert!(matches!(err, ReadDirError::InodeNotFound)); - } - #[test] fn lookup_remote_error_converts_to_readdir_remote_error() { let api_err = MesaApiError::Response { diff --git a/src/fs/mescloud/composite.rs b/src/fs/mescloud/composite.rs index 6dbac250..3356b7b5 100644 --- a/src/fs/mescloud/composite.rs +++ b/src/fs/mescloud/composite.rs @@ -1,308 +1,460 @@ use std::collections::HashMap; use std::ffi::OsStr; +use std::sync::atomic::{AtomicU64, Ordering}; use bytes::Bytes; -use tracing::{instrument, trace, warn}; - -use crate::fs::icache::bridge::HashMapBridge; -use crate::fs::icache::{FileTable, IcbResolver}; -use crate::fs::r#trait::{ - DirEntry, FileAttr, FileHandle, FilesystemStats, Fs, Inode, LockOwner, OpenFile, OpenFlags, +use git_fs::cache::async_backed::FutureBackedCache; +use git_fs::fs::dcache::DCache; +use git_fs::fs::{ + AsyncFsStats, FileHandle, INode, INodeType, InodeAddr, InodePerms, LoadedAddr, OpenFlags, }; +use rustc_hash::FxHashMap; +use tracing::{instrument, trace}; use super::common::{ - GetAttrError, InodeCachePeek, LookupError, OpenError, ReadDirError, ReadError, ReleaseError, + ChildFs, FsDirEntry, GetAttrError, LookupError, OpenError, ReadDirError, ReadError, + ReleaseError, }; -use super::icache::{InodeControlBlock, MescloudICache}; -/// A child filesystem slot: inner filesystem + bidirectional inode/fh bridge. +/// Bidirectional inode mapping between outer (composite) and inner (child) address spaces. +/// +/// Convention: **outer = left, inner = right**. +pub(super) struct InodeBridge { + map: bimap::BiMap, +} + +impl InodeBridge { + pub fn new() -> Self { + Self { + map: bimap::BiMap::new(), + } + } + + pub fn insert(&mut self, outer: InodeAddr, inner: InodeAddr) { + self.map.insert(outer, inner); + } + + pub fn forward(&self, outer: InodeAddr) -> Option { + self.map.get_by_left(&outer).copied() + } + + #[expect(dead_code, reason = "will be needed by future callers")] + pub fn backward(&self, inner: InodeAddr) -> Option { + self.map.get_by_right(&inner).copied() + } + + /// Look up inner->outer, or allocate a new outer address if unmapped. + pub fn backward_or_insert( + &mut self, + inner: InodeAddr, + allocate: impl FnOnce() -> InodeAddr, + ) -> InodeAddr { + if let Some(&outer) = self.map.get_by_right(&inner) { + outer + } else { + let outer = allocate(); + self.map.insert(outer, inner); + outer + } + } + + pub fn remove_by_outer(&mut self, outer: InodeAddr) { + self.map.remove_by_left(&outer); + } + + #[expect(dead_code, reason = "will be needed by future callers")] + pub fn get_inner(&self, outer: InodeAddr) -> Option<&InodeAddr> { + self.map.get_by_left(&outer) + } +} + pub(super) struct ChildSlot { pub inner: Inner, - pub bridge: HashMapBridge, + pub bridge: InodeBridge, } -/// Layered filesystem that presents multiple child filesystems under a single -/// inode namespace. -/// -/// `MesaCloud`'s filesystem is a hierarchy of compositions: -/// -/// ```text -/// MesaFS (CompositeFs<_, OrgFs>) -/// └─ OrgFs (CompositeFs<_, RepoFs>) -/// └─ RepoFs (leaf — backed by git) -/// ``` -/// -/// Each child filesystem numbers its inodes starting from 1, so the composite -/// maintains a bidirectional inode/file-handle bridge per child (see -/// [`ChildSlot`]) to translate between the outer namespace visible to FUSE and -/// each child's internal namespace. -pub(super) struct CompositeFs -where - R: IcbResolver, -{ - pub icache: MescloudICache, - pub file_table: FileTable, - pub readdir_buf: Vec, - /// Maps outer inode to index into `slots` for child-root inodes. - pub child_inodes: HashMap, - /// Maps every translated outer inode to its owning slot index. - pub inode_to_slot: HashMap, - pub slots: Vec>, +/// Tracks an open file: which child slot owns it and the inner fh. +struct OpenFileEntry { + slot_idx: usize, + inner_ino: InodeAddr, + inner_fh: FileHandle, +} + +pub(super) struct CompositeFs { + pub(super) inode_table: FutureBackedCache, + pub(super) directory_cache: DCache, + readdir_populated: FutureBackedCache, + next_ino: AtomicU64, + next_fh: AtomicU64, + refcounts: FxHashMap, + pub(super) readdir_buf: Vec, + open_files: HashMap, + pub(super) child_inodes: HashMap, + pub(super) inode_to_slot: HashMap, + pub(super) slots: Vec>, + fs_owner: (u32, u32), + block_size: u32, } -impl CompositeFs -where - R: IcbResolver, - Inner: Fs< - LookupError = LookupError, - GetAttrError = GetAttrError, - OpenError = OpenError, - ReadError = ReadError, - ReaddirError = ReadDirError, - ReleaseError = ReleaseError, - > + InodeCachePeek - + Send - + Sync, -{ - /// Look up which child slot owns an inode via direct map. - #[instrument(name = "CompositeFs::slot_for_inode", skip(self))] - pub fn slot_for_inode(&self, ino: Inode) -> Option { +impl CompositeFs { + pub const ROOT_INO: InodeAddr = 1; + + pub fn new(fs_owner: (u32, u32), block_size: u32) -> Self { + let inode_table = FutureBackedCache::default(); + let now = std::time::SystemTime::now(); + let root = INode { + addr: Self::ROOT_INO, + permissions: InodePerms::from_bits_truncate(0o755), + uid: fs_owner.0, + gid: fs_owner.1, + create_time: now, + last_modified_at: now, + parent: None, + size: 0, + itype: INodeType::Directory, + }; + inode_table.insert_sync(Self::ROOT_INO, root); + + let mut refcounts = FxHashMap::default(); + refcounts.insert(Self::ROOT_INO, 1); + + Self { + inode_table, + directory_cache: DCache::new(), + readdir_populated: FutureBackedCache::default(), + next_ino: AtomicU64::new(Self::ROOT_INO + 1), + next_fh: AtomicU64::new(1), + refcounts, + readdir_buf: Vec::new(), + open_files: HashMap::new(), + child_inodes: HashMap::new(), + inode_to_slot: HashMap::new(), + slots: Vec::new(), + fs_owner, + block_size, + } + } + + pub fn allocate_inode(&self) -> InodeAddr { + self.next_ino.fetch_add(1, Ordering::Relaxed) + } + + pub fn fs_owner(&self) -> (u32, u32) { + self.fs_owner + } + + #[expect(dead_code, reason = "available for future use")] + pub fn block_size(&self) -> u32 { + self.block_size + } + + pub fn add_child(&mut self, inner: Inner, child_root_ino: InodeAddr) -> InodeAddr { + self.add_child_with_parent(inner, child_root_ino, Self::ROOT_INO) + } + + pub fn cache_inode(&self, inode: INode) { + self.inode_table.insert_sync(inode.addr, inode); + } + + /// Insert the inode into the table and initialise its refcount to zero. + /// + /// The caller is responsible for bumping the refcount via [`inc_rc`](Self::inc_rc). + pub fn cache_inode_and_init_rc(&mut self, inode: INode) { + let addr = inode.addr; + self.inode_table.insert_sync(addr, inode); + self.refcounts.entry(addr).or_insert(0); + } + + pub fn inc_rc(&mut self, addr: InodeAddr) -> Option { + let rc = self.refcounts.get_mut(&addr)?; + *rc += 1; + Some(*rc) + } + + pub fn slot_for_inode(&self, ino: InodeAddr) -> Option { self.inode_to_slot.get(&ino).copied() } - /// Allocate an outer file handle and map it through the bridge. - #[must_use] - pub fn alloc_fh(&mut self, slot_idx: usize, inner_fh: FileHandle) -> FileHandle { - let fh = self.file_table.allocate(); - self.slots[slot_idx].bridge.insert_fh(fh, inner_fh); - fh + /// Like [`add_child`](Self::add_child) but sets a custom parent inode + /// instead of always using `ROOT_INO`. + pub fn add_child_with_parent( + &mut self, + inner: Inner, + child_root_ino: InodeAddr, + parent_ino: InodeAddr, + ) -> InodeAddr { + let outer_ino = self.allocate_inode(); + let now = std::time::SystemTime::now(); + let inode = INode { + addr: outer_ino, + permissions: InodePerms::from_bits_truncate(0o755), + uid: self.fs_owner.0, + gid: self.fs_owner.1, + create_time: now, + last_modified_at: now, + parent: Some(parent_ino), + size: 0, + itype: INodeType::Directory, + }; + self.inode_table.insert_sync(outer_ino, inode); + + let mut bridge = InodeBridge::new(); + bridge.insert(outer_ino, child_root_ino); + + let idx = self.slots.len(); + self.slots.push(ChildSlot { inner, bridge }); + self.child_inodes.insert(outer_ino, idx); + self.inode_to_slot.insert(outer_ino, idx); + + outer_ino } +} - /// Translate an inner inode to an outer inode, allocating if needed. - /// Also inserts a stub ICB into the outer icache when the inode is new. - #[instrument(name = "CompositeFs::translate_inner_ino", skip(self, name))] - pub async fn translate_inner_ino( +impl CompositeFs { + #[instrument(name = "CompositeFs::delegated_lookup", skip(self, name))] + pub async fn delegated_lookup( &mut self, - slot_idx: usize, - inner_ino: Inode, - parent_outer_ino: Inode, + parent: InodeAddr, name: &OsStr, - ) -> Inode { - let outer_ino = self.slots[slot_idx] + ) -> Result { + // Fast path: DCache hit + inode still in table + if let Some(dentry) = self.directory_cache.lookup(LoadedAddr(parent), name) + && let Some(inode) = self.inode_table.get(&dentry.ino.0).await + { + *self.refcounts.entry(inode.addr).or_insert(0) += 1; + return Ok(inode); + } + + // Slow path: delegate to child + let idx = self + .inode_to_slot + .get(&parent) + .copied() + .ok_or(LookupError::InodeNotFound)?; + let inner_parent = self.slots[idx] .bridge - .backward_or_insert_inode(inner_ino, || self.icache.allocate_inode()); - self.inode_to_slot.insert(outer_ino, slot_idx); - self.icache - .entry_or_insert_icb( - outer_ino, - || InodeControlBlock { - rc: 0, - path: name.into(), - parent: Some(parent_outer_ino), - attr: None, - children: None, - }, - |_| {}, + .forward(parent) + .ok_or(LookupError::InodeNotFound)?; + let inner_inode = self.slots[idx].inner.lookup(inner_parent, name).await?; + + let next_ino = &self.next_ino; + let outer_ino = self.slots[idx] + .bridge + .backward_or_insert(inner_inode.addr, || { + next_ino.fetch_add(1, Ordering::Relaxed) + }); + self.inode_to_slot.insert(outer_ino, idx); + + let remapped = INode { + addr: outer_ino, + ..inner_inode + }; + self.inode_table + .get_or_init(outer_ino, || async move { remapped }) + .await; + + let is_dir = matches!(inner_inode.itype, INodeType::Directory); + self.directory_cache + .insert( + LoadedAddr(parent), + name.to_os_string(), + LoadedAddr(outer_ino), + is_dir, ) .await; - outer_ino + + *self.refcounts.entry(outer_ino).or_insert(0) += 1; + let rc = self.refcounts[&outer_ino]; + trace!( + outer_ino, + inner_ino = inner_inode.addr, + rc, + "lookup: resolved via delegation" + ); + + Ok(remapped) + } + + #[instrument(name = "CompositeFs::delegated_readdir", skip(self))] + pub async fn delegated_readdir( + &mut self, + ino: InodeAddr, + ) -> Result<&[FsDirEntry], ReadDirError> { + let idx = self + .inode_to_slot + .get(&ino) + .copied() + .ok_or(ReadDirError::InodeNotFound)?; + + if self.readdir_populated.get(&LoadedAddr(ino)).await.is_none() { + let inner_ino = self.slots[idx] + .bridge + .forward(ino) + .ok_or(ReadDirError::InodeNotFound)?; + let inner_entries = self.slots[idx].inner.readdir(inner_ino).await?; + + for (name, child_inode) in &inner_entries { + let next_ino = &self.next_ino; + let outer_child = self.slots[idx] + .bridge + .backward_or_insert(child_inode.addr, || { + next_ino.fetch_add(1, Ordering::Relaxed) + }); + self.inode_to_slot.insert(outer_child, idx); + + let remapped = INode { + addr: outer_child, + ..*child_inode + }; + self.inode_table + .get_or_init(outer_child, || async move { remapped }) + .await; + + let is_dir = matches!(child_inode.itype, INodeType::Directory); + self.directory_cache + .insert( + LoadedAddr(ino), + name.clone(), + LoadedAddr(outer_child), + is_dir, + ) + .await; + } + + self.readdir_populated + .get_or_init(LoadedAddr(ino), || async {}) + .await; + } + + let mut children = self.directory_cache.readdir(LoadedAddr(ino)).await; + children.sort_unstable_by(|(a, _), (b, _)| a.cmp(b)); + + let mut entries = Vec::with_capacity(children.len()); + for (name, dvalue) in &children { + if let Some(inode) = self.inode_table.get(&dvalue.ino.0).await { + entries.push(FsDirEntry { + ino: inode.addr, + name: name.clone(), + }); + } + } + + self.readdir_buf = entries; + Ok(&self.readdir_buf) } - /// Get cached file attributes for an inode. #[instrument(name = "CompositeFs::delegated_getattr", skip(self))] - pub async fn delegated_getattr(&self, ino: Inode) -> Result { - self.icache.get_attr(ino).await.ok_or_else(|| { - warn!(ino, "getattr on unknown inode"); - GetAttrError::InodeNotFound - }) + pub async fn delegated_getattr(&self, ino: InodeAddr) -> Result { + self.inode_table + .get(&ino) + .await + .ok_or(GetAttrError::InodeNotFound) + } + + #[expect(dead_code, reason = "will be needed by future callers")] + #[must_use] + pub fn delegated_statfs(&self) -> AsyncFsStats { + AsyncFsStats { + block_size: self.block_size, + total_blocks: 0, + free_blocks: 0, + available_blocks: 0, + total_inodes: self.inode_table.len() as u64, + free_inodes: 0, + max_filename_length: 255, + } } - /// Find slot, forward inode, delegate to inner, allocate outer file handle. #[instrument(name = "CompositeFs::delegated_open", skip(self))] pub async fn delegated_open( &mut self, - ino: Inode, + ino: InodeAddr, flags: OpenFlags, - ) -> Result { - let idx = self.slot_for_inode(ino).ok_or_else(|| { - warn!(ino, "open on inode not belonging to any child"); - OpenError::InodeNotFound - })?; + ) -> Result { + let idx = self + .inode_to_slot + .get(&ino) + .copied() + .ok_or(OpenError::InodeNotFound)?; let inner_ino = self.slots[idx] .bridge - .forward_or_insert_inode(ino, || unreachable!("open: ino should be mapped")); - let inner_open = self.slots[idx].inner.open(inner_ino, flags).await?; - let outer_fh = self.alloc_fh(idx, inner_open.handle); - trace!( - ino, + .forward(ino) + .ok_or(OpenError::InodeNotFound)?; + let inner_fh = self.slots[idx].inner.open(inner_ino, flags).await?; + + let outer_fh = self.next_fh.fetch_add(1, Ordering::Relaxed); + self.open_files.insert( outer_fh, - inner_fh = inner_open.handle, - "open: assigned file handle" + OpenFileEntry { + slot_idx: idx, + inner_ino, + inner_fh, + }, ); - Ok(OpenFile { - handle: outer_fh, - options: inner_open.options, - }) + + trace!(ino, outer_fh, inner_fh, "open: assigned fh"); + Ok(outer_fh) } - /// Find slot, forward inode and file handle, delegate read to inner. - #[expect(clippy::too_many_arguments, reason = "mirrors fuser read API")] #[instrument(name = "CompositeFs::delegated_read", skip(self))] pub async fn delegated_read( &mut self, - ino: Inode, fh: FileHandle, offset: u64, size: u32, - flags: OpenFlags, - lock_owner: Option, ) -> Result { - let idx = self.slot_for_inode(ino).ok_or_else(|| { - warn!(ino, "read on inode not belonging to any child"); - ReadError::InodeNotFound - })?; - let inner_ino = self.slots[idx] - .bridge - .forward_or_insert_inode(ino, || unreachable!("read: ino should be mapped")); - let inner_fh = self.slots[idx].bridge.fh_forward(fh).ok_or_else(|| { - warn!(fh, "read: no fh mapping found"); - ReadError::FileNotOpen - })?; - self.slots[idx] + let entry = self.open_files.get(&fh).ok_or(ReadError::FileNotOpen)?; + let slot_idx = entry.slot_idx; + let inner_ino = entry.inner_ino; + let inner_fh = entry.inner_fh; + self.slots[slot_idx] .inner - .read(inner_ino, inner_fh, offset, size, flags, lock_owner) + .read(inner_ino, inner_fh, offset, size) .await } - /// Find slot, forward inode and file handle, delegate release to inner, - /// then clean up the file handle mapping. #[instrument(name = "CompositeFs::delegated_release", skip(self))] - pub async fn delegated_release( - &mut self, - ino: Inode, - fh: FileHandle, - flags: OpenFlags, - flush: bool, - ) -> Result<(), ReleaseError> { - let idx = self.slot_for_inode(ino).ok_or_else(|| { - warn!(ino, "release on inode not belonging to any child"); - ReleaseError::FileNotOpen - })?; - let inner_ino = self.slots[idx] - .bridge - .forward_or_insert_inode(ino, || unreachable!("release: ino should be mapped")); - let inner_fh = self.slots[idx].bridge.fh_forward(fh).ok_or_else(|| { - warn!(fh, "release: no fh mapping found"); - ReleaseError::FileNotOpen - })?; - let result = self.slots[idx] + pub async fn delegated_release(&mut self, fh: FileHandle) -> Result<(), ReleaseError> { + let entry = self + .open_files + .remove(&fh) + .ok_or(ReleaseError::FileNotOpen)?; + let result = self.slots[entry.slot_idx] .inner - .release(inner_ino, inner_fh, flags, flush) + .release(entry.inner_ino, entry.inner_fh) .await; - self.slots[idx].bridge.remove_fh_by_left(fh); - trace!(ino, fh, "release: cleaned up fh mapping"); + trace!(fh, "release: cleaned up fh mapping"); result } - /// Propagate forget to the inner filesystem, evict from icache, and clean - /// up bridge mappings. Returns `true` if the inode was evicted. + /// Returns `true` if the inode was evicted. /// - /// Child-root inodes (those in `child_inodes`) do NOT propagate forget to - /// the inner filesystem: the inner root's `rc=1` is an initialization - /// invariant unrelated to outer FUSE lookup counts. Propagating would - /// evict the inner root, breaking all subsequent operations on that child. + /// The composite only manages its own refcounts and inode table. + /// Inner filesystem inodes are managed by the inner FS itself through + /// its own lifecycle; the composite does not propagate forget to children. + #[expect(dead_code, reason = "will be needed by future callers")] #[must_use] #[instrument(name = "CompositeFs::delegated_forget", skip(self))] - pub async fn delegated_forget(&mut self, ino: Inode, nlookups: u64) -> bool { - let slot_idx = self.slot_for_inode(ino); - let is_child_root = self.child_inodes.contains_key(&ino); - if !is_child_root - && let Some(idx) = slot_idx - && let Some(&inner_ino) = self.slots[idx].bridge.inode_map_get_by_left(ino) - { - self.slots[idx].inner.forget(inner_ino, nlookups).await; - } - if self.icache.forget(ino, nlookups).await.is_some() { - self.child_inodes.remove(&ino); - self.inode_to_slot.remove(&ino); - if let Some(idx) = slot_idx { - self.slots[idx].bridge.remove_inode_by_left(ino); + pub fn delegated_forget(&mut self, ino: InodeAddr, nlookups: u64) -> bool { + let slot_idx = self.inode_to_slot.get(&ino).copied(); + + if let Some(rc) = self.refcounts.get_mut(&ino) { + *rc = rc.saturating_sub(nlookups); + if *rc > 0 { + return false; } - true + self.refcounts.remove(&ino); } else { - false + return false; } - } - - /// Return filesystem statistics from the icache. - #[must_use] - pub fn delegated_statfs(&self) -> FilesystemStats { - self.icache.statfs() - } - - /// Delegation branch for lookup when the parent is owned by a child slot. - #[instrument(name = "CompositeFs::delegated_lookup", skip(self, name))] - pub async fn delegated_lookup( - &mut self, - parent: Inode, - name: &OsStr, - ) -> Result { - let idx = self - .slot_for_inode(parent) - .ok_or(LookupError::InodeNotFound)?; - let inner_parent = self.slots[idx] - .bridge - .forward_or_insert_inode(parent, || unreachable!("lookup: parent should be mapped")); - let inner_attr = self.slots[idx].inner.lookup(inner_parent, name).await?; - let inner_ino = inner_attr.common().ino; - let outer_ino = self.translate_inner_ino(idx, inner_ino, parent, name).await; - let outer_attr = self.slots[idx].bridge.attr_backward(inner_attr); - self.icache.cache_attr(outer_ino, outer_attr).await; - // None means the entry was concurrently evicted; fail the lookup so - // the kernel doesn't hold a ref the cache no longer tracks. - let rc = self - .icache - .inc_rc(outer_ino) - .await - .ok_or(LookupError::InodeNotFound)?; - trace!(outer_ino, inner_ino, rc, "lookup: resolved via delegation"); - Ok(outer_attr) - } - /// Delegation branch for readdir when the inode is owned by a child slot. - #[instrument(name = "CompositeFs::delegated_readdir", skip(self))] - pub async fn delegated_readdir(&mut self, ino: Inode) -> Result<&[DirEntry], ReadDirError> { - let idx = self - .slot_for_inode(ino) - .ok_or(ReadDirError::InodeNotFound)?; - let inner_ino = self.slots[idx] - .bridge - .forward_or_insert_inode(ino, || unreachable!("readdir: ino should be mapped")); - let inner_entries = self.slots[idx].inner.readdir(inner_ino).await?; - let inner_entries: Vec = inner_entries.to_vec(); - let evicted = self.icache.evict_zero_rc_children(ino).await; - for evicted_ino in evicted { - if let Some(slot) = self.inode_to_slot.remove(&evicted_ino) { - self.slots[slot].bridge.remove_inode_by_left(evicted_ino); - } - self.child_inodes.remove(&evicted_ino); + self.inode_table.remove_sync(&ino); + self.child_inodes.remove(&ino); + self.inode_to_slot.remove(&ino); + if let Some(idx) = slot_idx { + self.slots[idx].bridge.remove_by_outer(ino); } - let mut outer_entries = Vec::with_capacity(inner_entries.len()); - for entry in &inner_entries { - let outer_child_ino = self - .translate_inner_ino(idx, entry.ino, ino, &entry.name) - .await; - if let Some(inner_attr) = self.slots[idx].inner.peek_attr(entry.ino).await { - let outer_attr = self.slots[idx].bridge.attr_backward(inner_attr); - self.icache.cache_attr(outer_child_ino, outer_attr).await; - } - outer_entries.push(DirEntry { - ino: outer_child_ino, - name: entry.name.clone(), - kind: entry.kind, - }); - } - self.readdir_buf = outer_entries; - Ok(&self.readdir_buf) + + true } } diff --git a/src/fs/mescloud/icache.rs b/src/fs/mescloud/icache.rs deleted file mode 100644 index 15f1f5d7..00000000 --- a/src/fs/mescloud/icache.rs +++ /dev/null @@ -1,437 +0,0 @@ -//! Mescloud-specific inode control block, helpers, and directory cache wrapper. - -use std::ffi::OsStr; -use std::time::SystemTime; - -use crate::fs::icache::{AsyncICache, IcbLike, IcbResolver, InodeFactory}; -use crate::fs::r#trait::{ - CommonFileAttr, DirEntryType, FileAttr, FilesystemStats, Inode, Permissions, -}; - -/// Inode control block for mescloud filesystem layers. -#[derive(Clone)] -pub struct InodeControlBlock { - pub parent: Option, - pub rc: u64, - pub path: std::path::PathBuf, - /// Cached file attributes from the last lookup. - pub attr: Option, - /// Cached directory children from the resolver (directories only). - pub children: Option>, -} - -impl IcbLike for InodeControlBlock { - fn new_root(path: std::path::PathBuf) -> Self { - Self { - rc: 1, - parent: None, - path, - attr: None, - children: None, - } - } - - fn rc(&self) -> u64 { - self.rc - } - - fn rc_mut(&mut self) -> &mut u64 { - &mut self.rc - } - - fn needs_resolve(&self) -> bool { - match self.attr { - None => true, - Some(FileAttr::Directory { .. }) => self.children.is_none(), - Some(_) => false, - } - } -} - -/// Calculate the number of blocks needed for a given size. -pub fn blocks_of_size(block_size: u32, size: u64) -> u64 { - size.div_ceil(u64::from(block_size)) -} - -/// Free function -- usable by both `MescloudICache` and resolvers. -pub fn make_common_file_attr( - ino: Inode, - perm: u16, - atime: SystemTime, - mtime: SystemTime, - fs_owner: (u32, u32), - block_size: u32, -) -> CommonFileAttr { - CommonFileAttr { - ino, - atime, - mtime, - ctime: SystemTime::UNIX_EPOCH, - crtime: SystemTime::UNIX_EPOCH, - perm: Permissions::from_bits_truncate(perm), - nlink: 1, - uid: fs_owner.0, - gid: fs_owner.1, - blksize: block_size, - } -} - -/// Mescloud-specific directory cache wrapper over `AsyncICache`. -pub struct MescloudICache> { - inner: AsyncICache, - inode_factory: InodeFactory, - fs_owner: (u32, u32), - block_size: u32, -} - -impl> MescloudICache { - /// Create a new `MescloudICache`. Initializes root ICB (rc=1), caches root dir attr. - pub fn new(resolver: R, root_ino: Inode, fs_owner: (u32, u32), block_size: u32) -> Self { - let cache = Self { - inner: AsyncICache::new(resolver, root_ino, "/"), - inode_factory: InodeFactory::new(root_ino + 1), - fs_owner, - block_size, - }; - - // Set root directory attr synchronously during initialization - let now = SystemTime::now(); - let root_attr = FileAttr::Directory { - common: make_common_file_attr(root_ino, 0o755, now, now, fs_owner, block_size), - }; - cache.inner.get_icb_mut_sync(root_ino, |icb| { - icb.attr = Some(root_attr); - }); - - cache - } - - // -- Delegated from AsyncICache (async) -- - - pub fn contains(&self, ino: Inode) -> bool { - self.inner.contains(ino) - } - - pub async fn get_icb( - &self, - ino: Inode, - // `Sync` required: see comment on `AsyncICache::get_icb`. - f: impl Fn(&InodeControlBlock) -> T + Send + Sync, - ) -> Option { - self.inner.get_icb(ino, f).await - } - - pub async fn insert_icb(&self, ino: Inode, icb: InodeControlBlock) { - self.inner.insert_icb(ino, icb).await; - } - - pub async fn entry_or_insert_icb( - &self, - ino: Inode, - factory: impl FnOnce() -> InodeControlBlock, - then: impl FnOnce(&mut InodeControlBlock) -> T, - ) -> T { - self.inner.entry_or_insert_icb(ino, factory, then).await - } - - pub async fn inc_rc(&self, ino: Inode) -> Option { - self.inner.inc_rc(ino).await - } - - pub async fn forget(&self, ino: Inode, nlookups: u64) -> Option { - self.inner.forget(ino, nlookups).await - } - - pub async fn get_or_resolve( - &self, - ino: Inode, - then: impl FnOnce(&InodeControlBlock) -> T, - ) -> Result { - self.inner.get_or_resolve(ino, then).await - } - - // -- Domain-specific -- - - /// Allocate a new inode number. - pub fn allocate_inode(&self) -> Inode { - self.inode_factory.allocate() - } - - pub async fn get_attr(&self, ino: Inode) -> Option { - self.inner.get_icb(ino, |icb| icb.attr).await.flatten() - } - - pub async fn cache_attr(&self, ino: Inode, attr: FileAttr) { - self.inner - .get_icb_mut(ino, |icb| { - icb.attr = Some(attr); - }) - .await; - } - - pub fn fs_owner(&self) -> (u32, u32) { - self.fs_owner - } - - pub fn block_size(&self) -> u32 { - self.block_size - } - - pub fn statfs(&self) -> FilesystemStats { - FilesystemStats { - block_size: self.block_size, - fragment_size: u64::from(self.block_size), - total_blocks: 0, - free_blocks: 0, - available_blocks: 0, - total_inodes: self.inner.inode_count() as u64, - free_inodes: 0, - available_inodes: 0, - filesystem_id: 0, - mount_flags: 0, - max_filename_length: 255, - } - } - - /// Evict all `Available` children of `parent` that have `rc == 0`. - /// Returns the list of evicted inode numbers so callers can clean up - /// associated state (e.g., bridge mappings, slot tracking). - pub async fn evict_zero_rc_children(&self, parent: Inode) -> Vec { - let mut to_evict = Vec::new(); - self.inner - .for_each(|&ino, icb| { - if icb.rc == 0 && icb.parent == Some(parent) { - to_evict.push(ino); - } - }) - .await; - let mut evicted = Vec::new(); - for ino in to_evict { - if self.inner.forget(ino, 0).await.is_some() { - evicted.push(ino); - } - } - evicted - } - - /// Find an existing child by (parent, name) or allocate a new inode. - /// If new, inserts a stub ICB (parent+path set, attr=None, children=None, rc=0). - /// Does NOT bump rc. Returns the inode number. - /// - /// # Safety invariant - /// - /// The `for_each` scan and `insert_icb` are **not** atomic. If two callers - /// race with the same `(parent, name)`, both may allocate distinct inodes - /// for the same logical child. This is currently safe because all callers - /// go through `&mut self` on the owning `Fs` implementation. - pub async fn ensure_child_ino(&self, parent: Inode, name: &OsStr) -> Inode { - // Search for existing child by parent + name - let mut existing_ino = None; - self.inner - .for_each(|&ino, icb| { - if icb.parent == Some(parent) && icb.path.as_os_str() == name { - existing_ino = Some(ino); - } - }) - .await; - - if let Some(ino) = existing_ino { - return ino; - } - - // Allocate new inode and insert stub - let ino = self.inode_factory.allocate(); - self.inner - .insert_icb( - ino, - InodeControlBlock { - rc: 0, - path: name.into(), - parent: Some(parent), - attr: None, - children: None, - }, - ) - .await; - ino - } -} - -#[cfg(test)] -mod tests { - use std::future::Future; - - use super::*; - use crate::fs::icache::async_cache::AsyncICache; - use crate::fs::r#trait::DirEntryType; - - fn dummy_dir_attr(ino: Inode) -> FileAttr { - let now = SystemTime::now(); - FileAttr::Directory { - common: make_common_file_attr(ino, 0o755, now, now, (0, 0), 4096), - } - } - - fn dummy_file_attr(ino: Inode) -> FileAttr { - let now = SystemTime::now(); - FileAttr::RegularFile { - common: make_common_file_attr(ino, 0o644, now, now, (0, 0), 4096), - size: 100, - blocks: 1, - } - } - - #[test] - fn needs_resolve_stub_returns_true() { - let icb = InodeControlBlock { - parent: Some(1), - rc: 0, - path: "stub".into(), - attr: None, - children: None, - }; - assert!(icb.needs_resolve()); - } - - #[test] - fn needs_resolve_file_with_attr_returns_false() { - let icb = InodeControlBlock { - parent: Some(1), - rc: 1, - path: "file.txt".into(), - attr: Some(dummy_file_attr(2)), - children: None, - }; - assert!(!icb.needs_resolve()); - } - - #[test] - fn needs_resolve_dir_without_children_returns_true() { - let icb = InodeControlBlock { - parent: Some(1), - rc: 1, - path: "dir".into(), - attr: Some(dummy_dir_attr(3)), - children: None, - }; - assert!(icb.needs_resolve()); - } - - #[test] - fn needs_resolve_dir_with_children_returns_false() { - let icb = InodeControlBlock { - parent: Some(1), - rc: 1, - path: "dir".into(), - attr: Some(dummy_dir_attr(3)), - children: Some(vec![("README.md".to_owned(), DirEntryType::RegularFile)]), - }; - assert!(!icb.needs_resolve()); - } - - #[test] - fn needs_resolve_dir_with_empty_children_returns_false() { - let icb = InodeControlBlock { - parent: Some(1), - rc: 1, - path: "empty-dir".into(), - attr: Some(dummy_dir_attr(4)), - children: Some(vec![]), - }; - assert!(!icb.needs_resolve()); - } - - struct NoOpResolver; - - impl IcbResolver for NoOpResolver { - type Icb = InodeControlBlock; - type Error = std::convert::Infallible; - - #[expect( - clippy::manual_async_fn, - reason = "must match IcbResolver trait signature" - )] - fn resolve( - &self, - _ino: Inode, - _stub: Option, - _cache: &AsyncICache, - ) -> impl Future> + Send { - async { unreachable!("NoOpResolver should not be called") } - } - } - - fn test_mescloud_cache() -> MescloudICache { - MescloudICache::new(NoOpResolver, 1, (0, 0), 4096) - } - - #[tokio::test] - async fn evict_zero_rc_children_removes_stubs() { - let cache = test_mescloud_cache(); - - // Insert stubs as children of root (ino=1) with rc=0 - cache - .insert_icb( - 10, - InodeControlBlock { - rc: 0, - path: "child_a".into(), - parent: Some(1), - attr: None, - children: None, - }, - ) - .await; - cache - .insert_icb( - 11, - InodeControlBlock { - rc: 0, - path: "child_b".into(), - parent: Some(1), - attr: None, - children: None, - }, - ) - .await; - - // Insert a child with rc > 0 — should survive - cache - .insert_icb( - 12, - InodeControlBlock { - rc: 1, - path: "active".into(), - parent: Some(1), - attr: None, - children: None, - }, - ) - .await; - - // Insert a stub under a different parent — should survive - cache - .insert_icb( - 20, - InodeControlBlock { - rc: 0, - path: "other".into(), - parent: Some(12), - attr: None, - children: None, - }, - ) - .await; - - let evicted = cache.evict_zero_rc_children(1).await; - assert_eq!(evicted.len(), 2, "should evict 2 zero-rc children of root"); - - assert!(!cache.contains(10), "child_a should be evicted"); - assert!(!cache.contains(11), "child_b should be evicted"); - assert!(cache.contains(12), "active child should survive"); - assert!( - cache.contains(20), - "child of different parent should survive" - ); - } -} diff --git a/src/fs/mescloud/mod.rs b/src/fs/mescloud/mod.rs index 1a3cce80..15a70725 100644 --- a/src/fs/mescloud/mod.rs +++ b/src/fs/mescloud/mod.rs @@ -1,24 +1,23 @@ -use std::collections::HashMap; -use std::ffi::OsStr; +use std::ffi::{OsStr, OsString}; use std::future::Future; +use std::sync::Arc; +use std::sync::atomic::{AtomicBool, Ordering}; use std::time::SystemTime; use bytes::Bytes; +use git_fs::fs::{FileHandle, INode, INodeType, InodeAddr, InodePerms, OpenFlags}; use mesa_dev::MesaClient; use opentelemetry::propagation::Injector; use secrecy::ExposeSecret as _; -use tracing::{Instrument as _, instrument, trace, warn}; +use tracing::{instrument, trace, warn}; use tracing_opentelemetry::OpenTelemetrySpanExt as _; use crate::app_config::CacheConfig; -use crate::fs::icache::bridge::HashMapBridge; -use crate::fs::icache::{AsyncICache, FileTable, IcbResolver}; -use crate::fs::r#trait::{ - DirEntry, DirEntryType, FileAttr, FileHandle, FilesystemStats, Fs, Inode, LockOwner, OpenFile, - OpenFlags, -}; -use composite::{ChildSlot, CompositeFs}; +pub use common::FsDirEntry; +use composite::CompositeFs; + +pub use common::{GetAttrError, LookupError, OpenError, ReadDirError, ReadError, ReleaseError}; #[cfg(feature = "staging")] const MESA_API_BASE_URL: &str = "https://staging.depot.mesa.dev/api/v1"; @@ -27,17 +26,11 @@ const MESA_API_BASE_URL: &str = "https://depot.mesa.dev/api/v1"; mod common; mod composite; -use common::InodeControlBlock; -pub use common::{GetAttrError, LookupError, OpenError, ReadDirError, ReadError, ReleaseError}; - -use icache as mescloud_icache; -use icache::MescloudICache; mod org; pub use org::OrgConfig; use org::OrgFs; -pub mod icache; pub mod repo; struct HeaderInjector<'a>(&'a mut reqwest::header::HeaderMap); @@ -89,50 +82,6 @@ fn build_mesa_client(api_key: &str) -> MesaClient { .build() } -struct MesaResolver { - fs_owner: (u32, u32), - block_size: u32, -} - -impl IcbResolver for MesaResolver { - type Icb = InodeControlBlock; - type Error = std::convert::Infallible; - - fn resolve( - &self, - ino: Inode, - stub: Option, - _cache: &AsyncICache, - ) -> impl Future> + Send - where - Self: Sized, - { - let fs_owner = self.fs_owner; - let block_size = self.block_size; - async move { - let stub = stub.unwrap_or_else(|| InodeControlBlock { - parent: None, - path: "/".into(), - rc: 0, - attr: None, - children: None, - }); - let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, 0o755, now, now, fs_owner, block_size, - ), - }; - Ok(InodeControlBlock { - attr: Some(attr), - children: Some(vec![]), - ..stub - }) - } - .instrument(tracing::info_span!("MesaResolver::resolve", ino)) - } -} - /// Classifies an inode by its role in the mesa hierarchy. enum InodeRole { /// The filesystem root (ino == 1). @@ -146,11 +95,11 @@ enum InodeRole { /// Composes multiple [`OrgFs`] instances, each with its own inode namespace, /// delegating to [`CompositeFs`] for inode/fh translation at each boundary. pub struct MesaFS { - composite: CompositeFs, + composite: CompositeFs, } impl MesaFS { - const ROOT_NODE_INO: Inode = 1; + const ROOT_NODE_INO: InodeAddr = CompositeFs::::ROOT_INO; const BLOCK_SIZE: u32 = 4096; /// Create a new `MesaFS` instance. @@ -160,38 +109,17 @@ impl MesaFS { fs_owner: (u32, u32), cache: &CacheConfig, ) -> Self { - let resolver = MesaResolver { - fs_owner, - block_size: Self::BLOCK_SIZE, - }; - Self { - composite: CompositeFs { - icache: MescloudICache::new( - resolver, - Self::ROOT_NODE_INO, - fs_owner, - Self::BLOCK_SIZE, - ), - file_table: FileTable::new(), - readdir_buf: Vec::new(), - child_inodes: HashMap::new(), - inode_to_slot: HashMap::new(), - slots: orgs - .map(|org_conf| { - let client = build_mesa_client(org_conf.api_key.expose_secret()); - let org = OrgFs::new(org_conf.name, client, fs_owner, cache.clone()); - ChildSlot { - inner: org, - bridge: HashMapBridge::new(), - } - }) - .collect(), - }, + let mut composite = CompositeFs::new(fs_owner, Self::BLOCK_SIZE); + for org_conf in orgs { + let client = build_mesa_client(org_conf.api_key.expose_secret()); + let org = OrgFs::new(org_conf.name, client, fs_owner, cache.clone()); + composite.add_child(org, OrgFs::ROOT_INO); } + Self { composite } } /// Classify an inode by its role. - fn inode_role(&self, ino: Inode) -> Option { + fn inode_role(&self, ino: InodeAddr) -> Option { if ino == Self::ROOT_NODE_INO { return Some(InodeRole::Root); } @@ -205,10 +133,8 @@ impl MesaFS { } /// Ensure a mesa-level inode exists for the org at `org_idx`. - /// Seeds the bridge with (`mesa_org_ino`, `OrgFs::ROOT_INO`). /// Does NOT bump rc. - async fn ensure_org_inode(&mut self, org_idx: usize) -> (Inode, FileAttr) { - // Check if an inode already exists. + async fn ensure_org_inode(&mut self, org_idx: usize) -> (InodeAddr, INode) { let existing_ino = self .composite .child_inodes @@ -217,104 +143,62 @@ impl MesaFS { .map(|(&ino, _)| ino); if let Some(existing_ino) = existing_ino { - if let Some(attr) = self.composite.icache.get_attr(existing_ino).await { - let rc = self - .composite - .icache - .get_icb(existing_ino, |icb| icb.rc) - .await - .unwrap_or(0); + if let Ok(inode) = self.composite.delegated_getattr(existing_ino).await { trace!( ino = existing_ino, - org_idx, rc, "ensure_org_inode: reusing existing inode" - ); - return (existing_ino, attr); - } - if self.composite.icache.contains(existing_ino) { - // ICB exists but attr missing — rebuild and cache. - warn!( - ino = existing_ino, - org_idx, "ensure_org_inode: attr missing, rebuilding" + org_idx, "ensure_org_inode: reusing existing inode" ); - let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - existing_ino, - 0o755, - now, - now, - self.composite.icache.fs_owner(), - self.composite.icache.block_size(), - ), - }; - self.composite.icache.cache_attr(existing_ino, attr).await; - return (existing_ino, attr); + return (existing_ino, inode); } - // ICB was evicted — clean up stale tracking entries. warn!( ino = existing_ino, - org_idx, "ensure_org_inode: ICB evicted, cleaning up stale entry" + org_idx, "ensure_org_inode: evicted, rebuilding" ); - self.composite.child_inodes.remove(&existing_ino); - self.composite.inode_to_slot.remove(&existing_ino); + let now = SystemTime::now(); + let inode = INode { + addr: existing_ino, + permissions: InodePerms::from_bits_truncate(0o755), + uid: self.composite.fs_owner().0, + gid: self.composite.fs_owner().1, + create_time: now, + last_modified_at: now, + parent: Some(Self::ROOT_NODE_INO), + size: 0, + itype: INodeType::Directory, + }; + self.composite.cache_inode(inode); + self.composite.inode_to_slot.insert(existing_ino, org_idx); + self.composite.child_inodes.insert(existing_ino, org_idx); + return (existing_ino, inode); } - // Allocate new. + warn!( + org_idx, + "ensure_org_inode: no child_inodes entry for org slot" + ); let org_name = self.composite.slots[org_idx].inner.name().to_owned(); - let ino = self.composite.icache.allocate_inode(); - trace!(ino, org_idx, org = %org_name, "ensure_org_inode: allocated new inode"); - + let ino = self.composite.allocate_inode(); let now = SystemTime::now(); - self.composite - .icache - .insert_icb( - ino, - InodeControlBlock { - rc: 0, - path: org_name.as_str().into(), - parent: Some(Self::ROOT_NODE_INO), - attr: None, - children: None, - }, - ) - .await; - + let inode = INode { + addr: ino, + permissions: InodePerms::from_bits_truncate(0o755), + uid: self.composite.fs_owner().0, + gid: self.composite.fs_owner().1, + create_time: now, + last_modified_at: now, + parent: Some(Self::ROOT_NODE_INO), + size: 0, + itype: INodeType::Directory, + }; + self.composite.cache_inode(inode); self.composite.child_inodes.insert(ino, org_idx); self.composite.inode_to_slot.insert(ino, org_idx); - - // Reset bridge (may have stale mappings from a previous eviction cycle) - // and seed: mesa org-root <-> OrgFs::ROOT_INO. - self.composite.slots[org_idx].bridge = HashMapBridge::new(); - self.composite.slots[org_idx] - .bridge - .insert_inode(ino, OrgFs::ROOT_INO); - - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, - 0o755, - now, - now, - self.composite.icache.fs_owner(), - self.composite.icache.block_size(), - ), - }; - self.composite.icache.cache_attr(ino, attr).await; - (ino, attr) + trace!(ino, org_idx, org = %org_name, "ensure_org_inode: allocated new inode"); + (ino, inode) } -} - -#[async_trait::async_trait] -impl Fs for MesaFS { - type LookupError = LookupError; - type GetAttrError = GetAttrError; - type OpenError = OpenError; - type ReadError = ReadError; - type ReaddirError = ReadDirError; - type ReleaseError = ReleaseError; #[instrument(name = "MesaFS::lookup", skip(self))] - async fn lookup(&mut self, parent: Inode, name: &OsStr) -> Result { + pub async fn lookup(&mut self, parent: InodeAddr, name: &OsStr) -> Result { let role = self.inode_role(parent).ok_or(LookupError::InodeNotFound)?; match role { InodeRole::Root => { @@ -327,31 +211,23 @@ impl Fs for MesaFS { .ok_or(LookupError::InodeNotFound)?; trace!(org = org_name, "lookup: matched org"); - let (ino, attr) = self.ensure_org_inode(org_idx).await; - let rc = self - .composite - .icache + let (ino, inode) = self.ensure_org_inode(org_idx).await; + self.composite .inc_rc(ino) - .await .ok_or(LookupError::InodeNotFound)?; - trace!(ino, org = org_name, rc, "lookup: resolved org inode"); - Ok(attr) + Ok(inode) } InodeRole::OrgOwned => self.composite.delegated_lookup(parent, name).await, } } #[instrument(name = "MesaFS::getattr", skip(self))] - async fn getattr( - &mut self, - ino: Inode, - _fh: Option, - ) -> Result { + pub async fn getattr(&self, ino: InodeAddr) -> Result { self.composite.delegated_getattr(ino).await } #[instrument(name = "MesaFS::readdir", skip(self))] - async fn readdir(&mut self, ino: Inode) -> Result<&[DirEntry], ReadDirError> { + pub async fn readdir(&mut self, ino: InodeAddr) -> Result<&[FsDirEntry], ReadDirError> { let role = self.inode_role(ino).ok_or(ReadDirError::InodeNotFound)?; match role { InodeRole::Root => { @@ -365,11 +241,10 @@ impl Fs for MesaFS { let mut entries = Vec::with_capacity(org_info.len()); for (org_idx, name) in &org_info { - let (org_ino, _) = self.ensure_org_inode(*org_idx).await; - entries.push(DirEntry { - ino: org_ino, + let (entry_ino, _) = self.ensure_org_inode(*org_idx).await; + entries.push(FsDirEntry { + ino: entry_ino, name: name.clone().into(), - kind: DirEntryType::Directory, }); } @@ -382,45 +257,178 @@ impl Fs for MesaFS { } #[instrument(name = "MesaFS::open", skip(self))] - async fn open(&mut self, ino: Inode, flags: OpenFlags) -> Result { + pub async fn open( + &mut self, + ino: InodeAddr, + flags: OpenFlags, + ) -> Result { self.composite.delegated_open(ino, flags).await } #[instrument(name = "MesaFS::read", skip(self))] - async fn read( + pub async fn read( &mut self, - ino: Inode, fh: FileHandle, offset: u64, size: u32, - flags: OpenFlags, - lock_owner: Option, ) -> Result { - self.composite - .delegated_read(ino, fh, offset, size, flags, lock_owner) - .await + self.composite.delegated_read(fh, offset, size).await } #[instrument(name = "MesaFS::release", skip(self))] - async fn release( - &mut self, - ino: Inode, - fh: FileHandle, - flags: OpenFlags, - flush: bool, - ) -> Result<(), ReleaseError> { - self.composite - .delegated_release(ino, fh, flags, flush) - .await + pub async fn release(&mut self, fh: FileHandle) -> Result<(), ReleaseError> { + self.composite.delegated_release(fh).await } +} + +/// A file reader that delegates reads to `MesaFS` through a shared mutex. +/// +/// Resources are released via [`FileReader::close`](git_fs::fs::async_fs::FileReader::close), +/// which is called by the FUSE adapter during `release`. Dropping without +/// calling `close()` emits a diagnostic warning. +pub struct MesaFsReader { + inner: Arc>, + fh: FileHandle, + closed: AtomicBool, +} - #[instrument(name = "MesaFS::forget", skip(self))] - async fn forget(&mut self, ino: Inode, nlookups: u64) { - // MesaFS has no extra state to clean up on eviction (unlike OrgFs::owner_inodes). - let _ = self.composite.delegated_forget(ino, nlookups).await; +impl git_fs::fs::async_fs::FileReader for MesaFsReader { + fn read( + &self, + offset: u64, + size: u32, + ) -> impl Future> + Send { + let inner = Arc::clone(&self.inner); + let fh = self.fh; + async move { + let mut guard = inner.lock().await; + guard + .read(fh, offset, size) + .await + .map_err(|e| std::io::Error::other(e.to_string())) + } } - async fn statfs(&mut self) -> Result { - Ok(self.composite.delegated_statfs()) + fn close(&self) -> impl Future> + Send { + self.closed.store(true, Ordering::Relaxed); + let inner = Arc::clone(&self.inner); + let fh = self.fh; + async move { + let mut guard = inner.lock().await; + guard + .release(fh) + .await + .map_err(|e| std::io::Error::other(e.to_string())) + } + } +} + +impl Drop for MesaFsReader { + fn drop(&mut self) { + if !self.closed.load(Ordering::Relaxed) { + tracing::warn!(fh = self.fh, "MesaFsReader dropped without close()"); + } + } +} + +/// A [`FsDataProvider`](git_fs::fs::async_fs::FsDataProvider) that wraps +/// `MesaFS` behind a shared mutex. +#[derive(Clone)] +pub struct MesaFsProvider { + inner: Arc>, +} + +impl MesaFsProvider { + /// Create a new provider wrapping the given `MesaFS`. + pub fn new(mesa_fs: MesaFS) -> Self { + Self { + inner: Arc::new(tokio::sync::Mutex::new(mesa_fs)), + } + } +} + +fn lookup_error_to_io(e: LookupError) -> std::io::Error { + match e { + LookupError::InodeNotFound => std::io::Error::from_raw_os_error(libc::ENOENT), + LookupError::RemoteMesaError(api) => std::io::Error::other(api.to_string()), + } +} + +fn readdir_error_to_io(e: ReadDirError) -> std::io::Error { + match e { + ReadDirError::InodeNotFound => std::io::Error::from_raw_os_error(libc::ENOENT), + ReadDirError::NotADirectory => std::io::Error::from_raw_os_error(libc::ENOTDIR), + ReadDirError::NotPermitted => std::io::Error::from_raw_os_error(libc::EPERM), + ReadDirError::RemoteMesaError(api) => std::io::Error::other(api.to_string()), + } +} + +fn open_error_to_io(e: OpenError) -> std::io::Error { + match e { + OpenError::InodeNotFound => std::io::Error::from_raw_os_error(libc::ENOENT), + } +} + +impl git_fs::fs::async_fs::FsDataProvider for MesaFsProvider { + type Reader = MesaFsReader; + + fn lookup( + &self, + parent: INode, + name: &OsStr, + ) -> impl Future> + Send { + let inner = Arc::clone(&self.inner); + let name = name.to_os_string(); + async move { + let mut guard = inner.lock().await; + guard + .lookup(parent.addr, &name) + .await + .map_err(lookup_error_to_io) + } + } + + fn readdir( + &self, + parent: INode, + ) -> impl Future, std::io::Error>> + Send { + let inner = Arc::clone(&self.inner); + async move { + let mut guard = inner.lock().await; + let dir_entries: Vec<(OsString, InodeAddr)> = { + let entries = guard + .readdir(parent.addr) + .await + .map_err(readdir_error_to_io)?; + entries.iter().map(|e| (e.name.clone(), e.ino)).collect() + }; + let mut result = Vec::with_capacity(dir_entries.len()); + for (name, ino) in dir_entries { + if let Ok(inode) = guard.getattr(ino).await { + result.push((name, inode)); + } + } + Ok(result) + } + } + + fn open( + &self, + inode: INode, + flags: OpenFlags, + ) -> impl Future> + Send { + let inner = Arc::clone(&self.inner); + async move { + let mut guard = inner.lock().await; + let fh = guard + .open(inode.addr, flags) + .await + .map_err(open_error_to_io)?; + Ok(MesaFsReader { + inner: Arc::clone(&inner), + fh, + closed: AtomicBool::new(false), + }) + } } } diff --git a/src/fs/mescloud/org.rs b/src/fs/mescloud/org.rs index 1f3b8b5f..feefaf8e 100644 --- a/src/fs/mescloud/org.rs +++ b/src/fs/mescloud/org.rs @@ -1,73 +1,19 @@ use std::collections::HashMap; -use std::ffi::OsStr; -use std::future::Future; +use std::ffi::{OsStr, OsString}; use std::time::SystemTime; use bytes::Bytes; use futures::TryStreamExt as _; +use git_fs::fs::{FileHandle, INode, INodeType, InodeAddr, InodePerms, OpenFlags}; use mesa_dev::MesaClient; use secrecy::SecretString; -use tracing::{Instrument as _, instrument, trace, warn}; - -pub use super::common::{ - GetAttrError, LookupError, OpenError, ReadDirError, ReadError, ReleaseError, -}; -use super::common::{InodeControlBlock, MesaApiError}; -use super::composite::{ChildSlot, CompositeFs}; -use super::icache as mescloud_icache; -use super::icache::MescloudICache; +use tracing::{instrument, trace, warn}; + +use super::common::{ChildFs, MesaApiError}; +pub use super::common::{LookupError, OpenError, ReadDirError, ReadError, ReleaseError}; +use super::composite::CompositeFs; use super::repo::RepoFs; use crate::app_config::CacheConfig; -use crate::fs::icache::bridge::HashMapBridge; -use crate::fs::icache::{AsyncICache, FileTable, IcbResolver}; -use crate::fs::r#trait::{ - DirEntry, DirEntryType, FileAttr, FileHandle, FilesystemStats, Fs, Inode, LockOwner, OpenFile, - OpenFlags, -}; - -pub(super) struct OrgResolver { - fs_owner: (u32, u32), - block_size: u32, -} - -impl IcbResolver for OrgResolver { - type Icb = InodeControlBlock; - type Error = LookupError; - - fn resolve( - &self, - ino: Inode, - stub: Option, - _cache: &AsyncICache, - ) -> impl Future> + Send - where - Self: Sized, - { - let fs_owner = self.fs_owner; - let block_size = self.block_size; - async move { - let stub = stub.unwrap_or_else(|| InodeControlBlock { - parent: None, - path: "/".into(), - rc: 0, - attr: None, - children: None, - }); - let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, 0o755, now, now, fs_owner, block_size, - ), - }; - Ok(InodeControlBlock { - attr: Some(attr), - children: Some(vec![]), - ..stub - }) - } - .instrument(tracing::info_span!("OrgResolver::resolve", ino)) - } -} #[derive(Debug, Clone)] pub struct OrgConfig { @@ -81,7 +27,7 @@ enum InodeRole { OrgRoot, /// A virtual owner directory (github only). OwnerDir, - /// An inode owned by some repo. + /// An inode owned by some repo (either a child-root or delegated). RepoOwned, } @@ -92,14 +38,14 @@ enum InodeRole { pub struct OrgFs { name: String, client: MesaClient, - composite: CompositeFs, + composite: CompositeFs, /// Maps org-level owner-dir inodes to owner name (github only). - owner_inodes: HashMap, + owner_inodes: HashMap, cache_config: CacheConfig, } impl OrgFs { - pub(crate) const ROOT_INO: Inode = 1; + pub(crate) const ROOT_INO: InodeAddr = CompositeFs::::ROOT_INO; const BLOCK_SIZE: u32 = 4096; /// The name of the organization. @@ -123,31 +69,14 @@ impl OrgFs { /// Ensure an inode exists for a virtual owner directory (github only). Does NOT bump rc. /// TODO(MES-674): Cleanup "special" casing for github. - async fn ensure_owner_inode(&mut self, owner: &str) -> (Inode, FileAttr) { + async fn ensure_owner_inode(&mut self, owner: &str) -> (InodeAddr, INode) { // Check existing let mut stale_ino = None; for (&ino, existing_owner) in &self.owner_inodes { if existing_owner == owner { - if let Some(attr) = self.composite.icache.get_attr(ino).await { - return (ino, attr); - } - if self.composite.icache.contains(ino) { - // ICB exists but attr missing — rebuild and cache - let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, - 0o755, - now, - now, - self.composite.icache.fs_owner(), - self.composite.icache.block_size(), - ), - }; - self.composite.icache.cache_attr(ino, attr).await; - return (ino, attr); + if let Ok(inode) = self.composite.delegated_getattr(ino).await { + return (ino, inode); } - // ICB was evicted — mark for cleanup stale_ino = Some(ino); break; } @@ -156,35 +85,22 @@ impl OrgFs { self.owner_inodes.remove(&ino); } - // Allocate new - let ino = self.composite.icache.allocate_inode(); + let ino = self.composite.allocate_inode(); let now = SystemTime::now(); - self.composite - .icache - .insert_icb( - ino, - InodeControlBlock { - rc: 0, - path: owner.into(), - parent: Some(Self::ROOT_INO), - attr: None, - children: None, - }, - ) - .await; - self.owner_inodes.insert(ino, owner.to_owned()); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, - 0o755, - now, - now, - self.composite.icache.fs_owner(), - self.composite.icache.block_size(), - ), + let inode = INode { + addr: ino, + permissions: InodePerms::from_bits_truncate(0o755), + uid: self.composite.fs_owner().0, + gid: self.composite.fs_owner().1, + create_time: now, + last_modified_at: now, + parent: Some(Self::ROOT_INO), + size: 0, + itype: INodeType::Directory, }; - self.composite.icache.cache_attr(ino, attr).await; - (ino, attr) + self.composite.cache_inode_and_init_rc(inode); + self.owner_inodes.insert(ino, owner.to_owned()); + (ino, inode) } #[must_use] @@ -194,28 +110,17 @@ impl OrgFs { fs_owner: (u32, u32), cache_config: CacheConfig, ) -> Self { - let resolver = OrgResolver { - fs_owner, - block_size: Self::BLOCK_SIZE, - }; Self { name, client, - composite: CompositeFs { - icache: MescloudICache::new(resolver, Self::ROOT_INO, fs_owner, Self::BLOCK_SIZE), - file_table: FileTable::new(), - readdir_buf: Vec::new(), - child_inodes: HashMap::new(), - inode_to_slot: HashMap::new(), - slots: Vec::new(), - }, + composite: CompositeFs::new(fs_owner, Self::BLOCK_SIZE), owner_inodes: HashMap::new(), cache_config, } } /// Classify an inode by its role. - fn inode_role(&self, ino: Inode) -> Option { + fn inode_role(&self, ino: InodeAddr) -> Option { if ino == Self::ROOT_INO { return Some(InodeRole::OrgRoot); } @@ -242,144 +147,92 @@ impl OrgFs { repo_name: &str, display_name: &str, default_branch: &str, - parent_ino: Inode, - ) -> (Inode, FileAttr) { + parent_ino: InodeAddr, + ) -> (InodeAddr, INode) { // Check existing repos. for (&ino, &idx) in &self.composite.child_inodes { if self.composite.slots[idx].inner.repo_name() == repo_name { - if let Some(attr) = self.composite.icache.get_attr(ino).await { - let rc = self - .composite - .icache - .get_icb(ino, |icb| icb.rc) - .await - .unwrap_or(0); - trace!(ino, repo = repo_name, rc, "ensure_repo_inode: reusing"); - return (ino, attr); + if let Ok(inode) = self.composite.delegated_getattr(ino).await { + trace!(ino, repo = repo_name, "ensure_repo_inode: reusing"); + return (ino, inode); } warn!( ino, repo = repo_name, "ensure_repo_inode: attr missing, rebuilding" ); - return self.make_repo_dir_attr(ino).await; + return self.make_repo_dir_inode(ino); } } - // Check for orphaned slot (slot exists but not in child_inodes). - if let Some(idx) = self - .composite - .slots - .iter() - .position(|s| s.inner.repo_name() == repo_name) - { - return self.register_repo_slot(idx, display_name, parent_ino).await; - } - - // Allocate truly new slot. - let ino = self.composite.icache.allocate_inode(); - trace!( - ino, - repo = repo_name, - "ensure_repo_inode: allocated new inode" - ); - - self.composite - .icache - .insert_icb( - ino, - InodeControlBlock { - rc: 0, - path: display_name.into(), - parent: Some(parent_ino), - attr: None, - children: None, - }, - ) - .await; - + // Create new RepoFs and register as child. let repo = RepoFs::new( self.client.clone(), self.name.clone(), repo_name.to_owned(), default_branch.to_owned(), - self.composite.icache.fs_owner(), - // TODO(markovejnovic): Unnecessary clone. Refactoring for clearer ownership semantics - // would be ideal. + self.composite.fs_owner(), self.cache_config.clone(), ) .await; - let mut bridge = HashMapBridge::new(); - bridge.insert_inode(ino, RepoFs::ROOT_INO); - - let idx = self.composite.slots.len(); - self.composite.slots.push(ChildSlot { - inner: repo, - bridge, - }); - self.composite.child_inodes.insert(ino, idx); - self.composite.inode_to_slot.insert(ino, idx); - - self.make_repo_dir_attr(ino).await - } - - /// Allocate a new inode, register it in an existing (orphaned) slot, and - /// return `(ino, attr)`. - async fn register_repo_slot( - &mut self, - idx: usize, - display_name: &str, - parent_ino: Inode, - ) -> (Inode, FileAttr) { - let ino = self.composite.icache.allocate_inode(); - trace!(ino, idx, "register_repo_slot: reusing orphaned slot"); + let outer_ino = self + .composite + .add_child_with_parent(repo, RepoFs::ROOT_INO, parent_ino); + trace!( + ino = outer_ino, + repo = repo_name, + "ensure_repo_inode: allocated new inode" + ); + // Register in directory cache so readdir sees it. self.composite - .icache - .insert_icb( - ino, - InodeControlBlock { - rc: 0, - path: display_name.into(), - parent: Some(parent_ino), - attr: None, - children: None, - }, + .directory_cache + .insert( + git_fs::fs::LoadedAddr(parent_ino), + OsString::from(display_name), + git_fs::fs::LoadedAddr(outer_ino), + true, ) .await; - warn!( - ino, - idx, - "register_repo_slot: resetting bridge for orphaned slot; \ - inner filesystem will not receive forget for stale inode mappings" - ); - self.composite.slots[idx].bridge = HashMapBridge::new(); - self.composite.slots[idx] - .bridge - .insert_inode(ino, RepoFs::ROOT_INO); - self.composite.child_inodes.insert(ino, idx); - self.composite.inode_to_slot.insert(ino, idx); - - self.make_repo_dir_attr(ino).await + let inode = self + .composite + .delegated_getattr(outer_ino) + .await + .unwrap_or_else(|_| { + let now = SystemTime::now(); + INode { + addr: outer_ino, + permissions: InodePerms::from_bits_truncate(0o755), + uid: self.composite.fs_owner().0, + gid: self.composite.fs_owner().1, + create_time: now, + last_modified_at: now, + parent: Some(parent_ino), + size: 0, + itype: INodeType::Directory, + } + }); + (outer_ino, inode) } - /// Build and cache a directory attr for `ino`, returning `(ino, attr)`. - async fn make_repo_dir_attr(&self, ino: Inode) -> (Inode, FileAttr) { + /// Build a directory inode for `ino`, returning `(ino, inode)`. + fn make_repo_dir_inode(&self, ino: InodeAddr) -> (InodeAddr, INode) { let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, - 0o755, - now, - now, - self.composite.icache.fs_owner(), - self.composite.icache.block_size(), - ), + let inode = INode { + addr: ino, + permissions: InodePerms::from_bits_truncate(0o755), + uid: self.composite.fs_owner().0, + gid: self.composite.fs_owner().1, + create_time: now, + last_modified_at: now, + parent: None, + size: 0, + itype: INodeType::Directory, }; - self.composite.icache.cache_attr(ino, attr).await; - (ino, attr) + self.composite.cache_inode(inode); + (ino, inode) } /// Fetch a repo by name via the API. @@ -398,62 +251,36 @@ impl OrgFs { } #[async_trait::async_trait] -impl super::common::InodeCachePeek for OrgFs { - async fn peek_attr(&self, ino: Inode) -> Option { - self.composite.icache.get_attr(ino).await - } -} - -#[async_trait::async_trait] -impl Fs for OrgFs { - type LookupError = LookupError; - type GetAttrError = GetAttrError; - type OpenError = OpenError; - type ReadError = ReadError; - type ReaddirError = ReadDirError; - type ReleaseError = ReleaseError; - +impl ChildFs for OrgFs { #[instrument(name = "OrgFs::lookup", skip(self), fields(org = %self.name))] - async fn lookup(&mut self, parent: Inode, name: &OsStr) -> Result { + async fn lookup(&mut self, parent: InodeAddr, name: &OsStr) -> Result { let role = self.inode_role(parent).ok_or(LookupError::InodeNotFound)?; match role { InodeRole::OrgRoot => { - // TODO(MES-674): Cleanup "special" casing for github. let name_str = name.to_str().ok_or(LookupError::InodeNotFound)?; if self.is_github() { - // name is an owner like "torvalds" — create lazily, no API validation. trace!(owner = name_str, "lookup: resolving github owner dir"); - let (ino, attr) = self.ensure_owner_inode(name_str).await; + let (ino, inode) = self.ensure_owner_inode(name_str).await; self.composite - .icache .inc_rc(ino) - .await .ok_or(LookupError::InodeNotFound)?; - Ok(attr) + Ok(inode) } else { - // Children of org root are repos. trace!(repo = name_str, "lookup: resolving repo"); - - // Validate repo exists via API. let repo = self.wait_for_sync(name_str).await?; - - let (ino, attr) = self + let (ino, inode) = self .ensure_repo_inode(name_str, name_str, &repo.default_branch, Self::ROOT_INO) .await; let rc = self .composite - .icache .inc_rc(ino) - .await .ok_or(LookupError::InodeNotFound)?; trace!(ino, repo = name_str, rc, "lookup: resolved repo inode"); - Ok(attr) + Ok(inode) } } InodeRole::OwnerDir => { - // TODO(MES-674): Cleanup "special" casing for github. - // Parent is an owner dir, name is a repo like "linux". let owner = self .owner_inodes .get(&parent) @@ -464,49 +291,32 @@ impl Fs for OrgFs { let encoded = Self::encode_github_repo_name(&full_decoded); trace!( - owner = %owner, - repo = repo_name_str, - encoded = %encoded, + owner = %owner, repo = repo_name_str, encoded = %encoded, "lookup: resolving github repo via owner dir" ); - // Validate via API (uses encoded name). let repo = self.wait_for_sync(&encoded).await?; - - let (ino, attr) = self + let (ino, inode) = self .ensure_repo_inode(&encoded, repo_name_str, &repo.default_branch, parent) .await; self.composite - .icache .inc_rc(ino) - .await .ok_or(LookupError::InodeNotFound)?; - Ok(attr) + Ok(inode) } InodeRole::RepoOwned => self.composite.delegated_lookup(parent, name).await, } } - #[instrument(name = "OrgFs::getattr", skip(self), fields(org = %self.name))] - async fn getattr( - &mut self, - ino: Inode, - _fh: Option, - ) -> Result { - self.composite.delegated_getattr(ino).await - } - #[instrument(name = "OrgFs::readdir", skip(self), fields(org = %self.name))] - async fn readdir(&mut self, ino: Inode) -> Result<&[DirEntry], ReadDirError> { + async fn readdir(&mut self, ino: InodeAddr) -> Result, ReadDirError> { let role = self.inode_role(ino).ok_or(ReadDirError::InodeNotFound)?; match role { InodeRole::OrgRoot => { - // TODO(MES-674): Cleanup "special" casing for github. if self.is_github() { return Err(ReadDirError::NotPermitted); } - // List repos via API. let repos: Vec = self .client .org(&self.name) @@ -528,70 +338,53 @@ impl Fs for OrgFs { let mut entries = Vec::with_capacity(repo_infos.len()); for (repo_name, default_branch) in &repo_infos { - let (repo_ino, _) = self + let (_, inode) = self .ensure_repo_inode(repo_name, repo_name, default_branch, Self::ROOT_INO) .await; - entries.push(DirEntry { - ino: repo_ino, - name: repo_name.clone().into(), - kind: DirEntryType::Directory, - }); + entries.push((OsString::from(repo_name), inode)); } - self.composite.readdir_buf = entries; - Ok(&self.composite.readdir_buf) - } - InodeRole::OwnerDir if self.is_github() => { - // TODO(MES-674): Cleanup "special" casing for github. - Err(ReadDirError::NotPermitted) + Ok(entries) } + InodeRole::OwnerDir if self.is_github() => Err(ReadDirError::NotPermitted), InodeRole::OwnerDir => Err(ReadDirError::NotADirectory), - InodeRole::RepoOwned => self.composite.delegated_readdir(ino).await, + InodeRole::RepoOwned => { + let dir_entries: Vec<_> = self + .composite + .delegated_readdir(ino) + .await? + .iter() + .map(|e| (e.name.clone(), e.ino)) + .collect(); + let mut entries = Vec::with_capacity(dir_entries.len()); + for (name, child_ino) in dir_entries { + if let Some(inode) = self.composite.inode_table.get(&child_ino).await { + entries.push((name, inode)); + } + } + Ok(entries) + } } } #[instrument(name = "OrgFs::open", skip(self), fields(org = %self.name))] - async fn open(&mut self, ino: Inode, flags: OpenFlags) -> Result { + async fn open(&mut self, ino: InodeAddr, flags: OpenFlags) -> Result { self.composite.delegated_open(ino, flags).await } #[instrument(name = "OrgFs::read", skip(self), fields(org = %self.name))] async fn read( &mut self, - ino: Inode, + _ino: InodeAddr, fh: FileHandle, offset: u64, size: u32, - flags: OpenFlags, - lock_owner: Option, ) -> Result { - self.composite - .delegated_read(ino, fh, offset, size, flags, lock_owner) - .await + self.composite.delegated_read(fh, offset, size).await } #[instrument(name = "OrgFs::release", skip(self), fields(org = %self.name))] - async fn release( - &mut self, - ino: Inode, - fh: FileHandle, - flags: OpenFlags, - flush: bool, - ) -> Result<(), ReleaseError> { - self.composite - .delegated_release(ino, fh, flags, flush) - .await - } - - #[instrument(name = "OrgFs::forget", skip(self), fields(org = %self.name))] - async fn forget(&mut self, ino: Inode, nlookups: u64) { - let evicted = self.composite.delegated_forget(ino, nlookups).await; - if evicted { - self.owner_inodes.remove(&ino); - } - } - - async fn statfs(&mut self) -> Result { - Ok(self.composite.delegated_statfs()) + async fn release(&mut self, _ino: InodeAddr, fh: FileHandle) -> Result<(), ReleaseError> { + self.composite.delegated_release(fh).await } } diff --git a/src/fs/mescloud/repo.rs b/src/fs/mescloud/repo.rs index 11b334a7..acff3d04 100644 --- a/src/fs/mescloud/repo.rs +++ b/src/fs/mescloud/repo.rs @@ -2,197 +2,436 @@ //! //! This module directly accesses the mesa repo through the Rust SDK, on a per-repo basis. +use std::collections::HashMap; +use std::ffi::OsString; use std::future::Future; -use std::{collections::HashMap, ffi::OsStr, path::PathBuf, time::SystemTime}; +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::SystemTime; +use std::{ffi::OsStr, path::PathBuf}; use base64::Engine as _; use bytes::Bytes; use mesa_dev::MesaClient; use mesa_dev::low_level::content::{Content, DirEntry as MesaDirEntry}; use num_traits::cast::ToPrimitive as _; -use tracing::{Instrument as _, instrument, trace, warn}; +use tracing::warn; use git_fs::cache::fcache::FileCache; use git_fs::cache::traits::{AsyncReadableCache as _, AsyncWritableCache as _}; +use git_fs::fs::async_fs::{FileReader, FsDataProvider}; +use git_fs::fs::{ + INode, INodeType, InodeAddr, InodePerms, LoadedAddr, OpenFlags as AsyncOpenFlags, +}; use crate::app_config::CacheConfig; -use crate::fs::icache::{AsyncICache, FileTable, IcbResolver}; -use crate::fs::r#trait::{ - DirEntry, DirEntryType, FileAttr, FileHandle, FileOpenOptions, FilesystemStats, Fs, Inode, - LockOwner, OpenFile, OpenFlags, -}; use super::common::MesaApiError; -pub use super::common::{ - GetAttrError, LookupError, OpenError, ReadDirError, ReadError, ReleaseError, -}; -use super::icache as mescloud_icache; -use super::icache::{InodeControlBlock, MescloudICache}; +pub use super::common::{LookupError, OpenError, ReadDirError, ReadError, ReleaseError}; + +fn mesa_api_error_to_io(e: MesaApiError) -> std::io::Error { + match &e { + MesaApiError::Response { status, .. } if *status == 404 => { + std::io::Error::from_raw_os_error(libc::ENOENT) + } + MesaApiError::Reqwest(_) + | MesaApiError::ReqwestMiddleware(_) + | MesaApiError::Serde(_) + | MesaApiError::SerdePath(_) + | MesaApiError::Io(_) + | MesaApiError::Response { .. } => std::io::Error::other(e), + } +} -pub(super) struct RepoResolver { +#[derive(Clone)] +pub(super) struct MesRepoProvider { + inner: Arc, +} + +struct MesRepoProviderInner { client: MesaClient, org_name: String, repo_name: String, ref_: String, fs_owner: (u32, u32), - block_size: u32, + next_addr: AtomicU64, + /// Maps inode addresses to repo-relative paths (e.g., "src/main.rs"). + /// Root directory maps to an empty `PathBuf`. + path_map: scc::HashMap, + file_cache: Option>>, +} + +impl MesRepoProvider { + pub(super) fn new( + client: MesaClient, + org_name: String, + repo_name: String, + ref_: String, + fs_owner: (u32, u32), + file_cache: Option>>, + ) -> Self { + Self { + inner: Arc::new(MesRepoProviderInner { + client, + org_name, + repo_name, + ref_, + fs_owner, + next_addr: AtomicU64::new(2), // 1 is reserved for root + path_map: scc::HashMap::new(), + file_cache, + }), + } + } + + /// Store the path for the root inode address. + pub(super) fn seed_root_path(&self, root_addr: InodeAddr) { + // Root maps to empty PathBuf (no path prefix for API calls) + drop(self.inner.path_map.insert_sync(root_addr, PathBuf::new())); + } + + /// Remove the path entry for an inode. Called during forget/cleanup. + #[expect(dead_code, reason = "will be needed when child forget is implemented")] + pub(super) fn remove_path(&self, addr: InodeAddr) { + self.inner.path_map.remove_sync(&addr); + } + + /// The name of the repository. + pub(super) fn repo_name(&self) -> &str { + &self.inner.repo_name + } } -impl IcbResolver for RepoResolver { - type Icb = InodeControlBlock; - type Error = LookupError; +impl FsDataProvider for MesRepoProvider { + type Reader = MesFileReader; + + fn lookup( + &self, + parent: INode, + name: &OsStr, + ) -> impl Future> + Send { + let inner = Arc::clone(&self.inner); + let name = name.to_os_string(); + async move { + let parent_path = inner + .path_map + .get_async(&parent.addr) + .await + .map(|e| e.get().clone()) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let child_path = parent_path.join(&name); + let child_path_str = child_path.to_str().ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "path contains non-UTF-8 characters", + ) + })?; + + let content = inner + .client + .org(&inner.org_name) + .repos() + .at(&inner.repo_name) + .content() + .get(Some(inner.ref_.as_str()), Some(child_path_str), Some(1u64)) + .await + .map_err(MesaApiError::from) + .map_err(mesa_api_error_to_io)?; + + let now = SystemTime::now(); + let (uid, gid) = inner.fs_owner; + + let (itype, size) = match &content { + Content::File(f) => (INodeType::File, f.size.to_u64().unwrap_or(0)), + Content::Symlink(s) => (INodeType::File, s.size.to_u64().unwrap_or(0)), + Content::Dir(_) => (INodeType::Directory, 0), + }; + + let perms = if itype == INodeType::Directory { + InodePerms::from_bits_truncate(0o755) + } else { + InodePerms::from_bits_truncate(0o644) + }; + + let addr = inner.next_addr.fetch_add(1, Ordering::Relaxed); + drop(inner.path_map.insert_async(addr, child_path).await); + + Ok(INode { + addr, + permissions: perms, + uid, + gid, + create_time: now, + last_modified_at: now, + parent: Some(parent.addr), + size, + itype, + }) + } + } + + fn readdir( + &self, + parent: INode, + ) -> impl Future, std::io::Error>> + Send { + let inner = Arc::clone(&self.inner); + async move { + let parent_path = inner + .path_map + .get_async(&parent.addr) + .await + .map(|e| e.get().clone()) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let api_path = if parent_path.as_os_str().is_empty() { + None + } else { + Some( + parent_path + .to_str() + .ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "path contains non-UTF-8 characters", + ) + })? + .to_owned(), + ) + }; + + let content = inner + .client + .org(&inner.org_name) + .repos() + .at(&inner.repo_name) + .content() + .get(Some(inner.ref_.as_str()), api_path.as_deref(), Some(1u64)) + .await + .map_err(MesaApiError::from) + .map_err(mesa_api_error_to_io)?; + + let dir = match content { + Content::Dir(d) => d, + Content::File(_) | Content::Symlink(_) => { + return Err(std::io::Error::from_raw_os_error(libc::ENOTDIR)); + } + }; + + let now = SystemTime::now(); + let (uid, gid) = inner.fs_owner; + let mut entries = Vec::with_capacity(dir.entries.len()); + + for entry in dir.entries { + let (name, itype, size) = match entry { + MesaDirEntry::File(f) => { + let Some(name) = f.name else { continue }; + (name, INodeType::File, f.size.to_u64().unwrap_or(0)) + } + MesaDirEntry::Symlink(s) => { + let Some(name) = s.name else { continue }; + (name, INodeType::File, s.size.to_u64().unwrap_or(0)) + } + MesaDirEntry::Dir(d) => { + let Some(name) = d.name else { continue }; + (name, INodeType::Directory, 0) + } + }; + + let perms = if itype == INodeType::Directory { + InodePerms::from_bits_truncate(0o755) + } else { + InodePerms::from_bits_truncate(0o644) + }; + + let addr = inner.next_addr.fetch_add(1, Ordering::Relaxed); + let child_path = parent_path.join(&name); + drop(inner.path_map.insert_async(addr, child_path).await); + + let inode = INode { + addr, + permissions: perms, + uid, + gid, + create_time: now, + last_modified_at: now, + parent: Some(parent.addr), + size, + itype, + }; + + entries.push((OsString::from(name), inode)); + } + + Ok(entries) + } + } - fn resolve( + fn open( &self, - ino: Inode, - stub: Option, - cache: &AsyncICache, - ) -> impl Future> + Send - where - Self: Sized, - { + inode: INode, + _flags: AsyncOpenFlags, + ) -> impl Future> + Send { + let inner = Arc::clone(&self.inner); + async move { + let path = inner + .path_map + .get_async(&inode.addr) + .await + .map(|e| e.get().clone()) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + Ok(MesFileReader { + client: inner.client.clone(), + org_name: inner.org_name.clone(), + repo_name: inner.repo_name.clone(), + ref_: inner.ref_.clone(), + path, + file_cache: inner.file_cache.clone(), + inode_addr: inode.addr, + }) + } + } +} + +pub(super) struct MesFileReader { + client: MesaClient, + org_name: String, + repo_name: String, + ref_: String, + path: PathBuf, + file_cache: Option>>, + inode_addr: InodeAddr, +} + +impl FileReader for MesFileReader { + fn read( + &self, + offset: u64, + size: u32, + ) -> impl Future> + Send { let client = self.client.clone(); let org_name = self.org_name.clone(); let repo_name = self.repo_name.clone(); let ref_ = self.ref_.clone(); - let fs_owner = self.fs_owner; - let block_size = self.block_size; + let path = self.path.clone(); + let file_cache = self.file_cache.clone(); + let inode_addr = self.inode_addr; async move { - let stub = stub.ok_or(LookupError::InodeNotFound)?; - let file_path = build_repo_path(stub.parent, &stub.path, cache, RepoFs::ROOT_INO).await; - - // Non-root inodes must have a resolvable path. - if stub.parent.is_some() && file_path.is_none() { - return Err(LookupError::InodeNotFound); + // Try the file cache first. + if let Some(cache) = &file_cache + && let Some(data) = cache.get(&inode_addr).await + { + let start = usize::try_from(offset) + .unwrap_or(data.len()) + .min(data.len()); + let end = start.saturating_add(size as usize).min(data.len()); + return Ok(Bytes::copy_from_slice(&data[start..end])); } + // Cache miss -- fetch from the Mesa API. + let path_str = path.to_str().ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "path contains non-UTF-8 characters", + ) + })?; + + let api_path = if path_str.is_empty() { + None + } else { + Some(path_str) + }; + let content = client .org(&org_name) .repos() .at(&repo_name) .content() - .get(Some(ref_.as_str()), file_path.as_deref(), Some(1u64)) + .get(Some(ref_.as_str()), api_path, None) .await - .map_err(MesaApiError::from)?; - - let now = SystemTime::now(); - let attr = match &content { - Content::File(f) => { - let size = f.size.to_u64().unwrap_or(0); - FileAttr::RegularFile { - common: mescloud_icache::make_common_file_attr( - ino, 0o644, now, now, fs_owner, block_size, - ), - size, - blocks: mescloud_icache::blocks_of_size(block_size, size), - } + .map_err(MesaApiError::from) + .map_err(mesa_api_error_to_io)?; + + let encoded_content = match content { + Content::File(f) => f.content.unwrap_or_default(), + Content::Symlink(s) => s.content.unwrap_or_default(), + Content::Dir(_) => { + return Err(std::io::Error::from_raw_os_error(libc::EISDIR)); } - Content::Symlink(s) => { - let size = s.size.to_u64().unwrap_or(0); - FileAttr::RegularFile { - common: mescloud_icache::make_common_file_attr( - ino, 0o644, now, now, fs_owner, block_size, - ), - size, - blocks: mescloud_icache::blocks_of_size(block_size, size), - } - } - Content::Dir(_) => FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - ino, 0o755, now, now, fs_owner, block_size, - ), - }, }; - let children = match content { - Content::Dir(d) => Some( - d.entries - .into_iter() - .filter_map(|e| { - let (name, kind) = match e { - MesaDirEntry::File(f) => (f.name?, DirEntryType::RegularFile), - // TODO(MES-712): return DirEntryType::Symlink once readlink is wired up. - MesaDirEntry::Symlink(s) => (s.name?, DirEntryType::RegularFile), - MesaDirEntry::Dir(d) => (d.name?, DirEntryType::Directory), - }; - Some((name, kind)) - }) - .collect(), - ), - Content::File(_) | Content::Symlink(_) => None, - }; + let decoded = base64::engine::general_purpose::STANDARD + .decode(&encoded_content) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; - Ok(InodeControlBlock { - parent: stub.parent, - path: stub.path, - rc: stub.rc, - attr: Some(attr), - children, - }) + let start = usize::try_from(offset) + .unwrap_or(decoded.len()) + .min(decoded.len()); + let end = start.saturating_add(size as usize).min(decoded.len()); + let result = Bytes::copy_from_slice(&decoded[start..end]); + + // Store the decoded content in the cache for future reads. + if let Some(cache) = &file_cache + && let Err(e) = cache.insert(&inode_addr, decoded).await + { + warn!(error = ?e, inode_addr, "failed to cache file content"); + } + + Ok(result) } - .instrument(tracing::info_span!("RepoResolver::resolve", ino)) } } -/// Walk the parent chain in the cache to build the repo-relative path. -/// Returns `None` for the root inode (maps to `path=None` in the mesa content API). -async fn build_repo_path( - parent: Option, - name: &std::path::Path, - cache: &AsyncICache, - root_ino: Inode, -) -> Option { - /// Maximum parent-chain depth before bailing out. Prevents infinite loops - /// if a bug creates a cycle in the parent pointers. - const MAX_DEPTH: usize = 1024; - - let parent = parent?; - if parent == root_ino { - return name.to_str().map(String::from); +mod repo_fs_inner { + #![allow(clippy::future_not_send, clippy::mem_forget)] + use git_fs::cache::async_backed::FutureBackedCache; + use git_fs::fs::async_fs::AsyncFs; + use git_fs::fs::{INode, InodeAddr}; + use ouroboros::self_referencing; + + use super::MesRepoProvider; + + #[self_referencing] + pub struct RepoFsInner { + pub(super) inode_table: FutureBackedCache, + #[borrows(inode_table)] + #[covariant] + pub(super) fs: AsyncFs<'this, MesRepoProvider>, } - let mut components = vec![name.to_path_buf()]; - let mut current = parent; - for _ in 0..MAX_DEPTH { - if current == root_ino { - break; + impl RepoFsInner { + pub fn create( + inode_table: FutureBackedCache, + provider: MesRepoProvider, + ) -> Self { + RepoFsInnerBuilder { + inode_table, + fs_builder: |tbl| AsyncFs::new_preseeded(provider, tbl), + } + .build() } - let (path, next_parent) = cache - .get_icb(current, |icb| (icb.path.clone(), icb.parent)) - .await?; - components.push(path); - current = next_parent?; } - if current != root_ino { - tracing::warn!("build_repo_path: exceeded MAX_DEPTH={MAX_DEPTH}, possible parent cycle"); - return None; - } - components.reverse(); - let joined: PathBuf = components.iter().collect(); - joined.to_str().map(String::from) } +use repo_fs_inner::RepoFsInner; /// A filesystem rooted at a single mesa repository. /// -/// Implements [`Fs`] for navigating files and directories within one repo. -/// Does not handle organizations or multi-repo hierarchy — that is [`super::MesaFS`]'s job. +/// Wraps [`AsyncFs`] via ouroboros to co-locate the inode table +/// and the filesystem that borrows it. Implements [`Fs`] as a thin adapter. pub struct RepoFs { - client: MesaClient, - org_name: String, - repo_name: String, - ref_: String, - - icache: MescloudICache, - file_table: FileTable, - readdir_buf: Vec, - open_files: HashMap, - file_cache: Option>, + inner: RepoFsInner, + /// Reference counts for inodes held by the kernel. + refcounts: rustc_hash::FxHashMap, + /// Open file handles mapped to readers. + open_files: HashMap>, + /// Provider clone for accessing `repo_name` and `path_map` cleanup. + provider: MesRepoProvider, } impl RepoFs { - pub(crate) const ROOT_INO: Inode = 1; - const BLOCK_SIZE: u32 = 4096; + pub(crate) const ROOT_INO: InodeAddr = 1; /// Create a new `RepoFs` for a specific org and repo. pub async fn new( @@ -203,24 +442,15 @@ impl RepoFs { fs_owner: (u32, u32), cache_config: CacheConfig, ) -> Self { - let resolver = RepoResolver { - client: client.clone(), - org_name: org_name.clone(), - repo_name: repo_name.clone(), - ref_: ref_.clone(), - fs_owner, - block_size: Self::BLOCK_SIZE, - }; - let file_cache = match cache_config.max_size { Some(max_size) if max_size.as_u64() > 0 => { let cache_dir = cache_config.path.join(&org_name).join(&repo_name); let max_bytes = max_size.as_u64().try_into().unwrap_or(usize::MAX); match FileCache::new(&cache_dir, max_bytes).await { - Ok(cache) => Some(cache), + Ok(cache) => Some(Arc::new(cache)), Err(e) => { warn!(error = ?e, org = %org_name, repo = %repo_name, - "failed to create file cache, continuing without caching",); + "failed to create file cache, continuing without caching"); None } } @@ -228,317 +458,140 @@ impl RepoFs { _ => None, }; + let provider = + MesRepoProvider::new(client, org_name, repo_name, ref_, fs_owner, file_cache); + provider.seed_root_path(Self::ROOT_INO); + + let root = INode { + addr: Self::ROOT_INO, + permissions: InodePerms::from_bits_truncate(0o755), + uid: fs_owner.0, + gid: fs_owner.1, + create_time: SystemTime::now(), + last_modified_at: SystemTime::now(), + parent: None, + size: 0, + itype: INodeType::Directory, + }; + + let inode_table = git_fs::cache::async_backed::FutureBackedCache::default(); + inode_table.insert_sync(root.addr, root); + + let inner = RepoFsInner::create(inode_table, provider.clone()); + + let mut refcounts = rustc_hash::FxHashMap::default(); + refcounts.insert(Self::ROOT_INO, 1); + Self { - client, - org_name, - repo_name, - ref_, - icache: MescloudICache::new(resolver, Self::ROOT_INO, fs_owner, Self::BLOCK_SIZE), - file_table: FileTable::new(), - readdir_buf: Vec::new(), + inner, + refcounts, open_files: HashMap::new(), - file_cache, + provider, } } /// The name of the repository this filesystem is rooted at. pub(crate) fn repo_name(&self) -> &str { - &self.repo_name - } - - /// Build the repo-relative path for an inode by walking up the parent chain. - /// - /// Returns `None` for the root inode (the repo top-level maps to `path=None` in the - /// mesa content API). - async fn path_of_inode(&self, ino: Inode) -> Option { - /// Maximum parent-chain depth before bailing out. - const MAX_DEPTH: usize = 1024; - - if ino == Self::ROOT_INO { - return None; - } - - let mut components = Vec::new(); - let mut current = ino; - for _ in 0..MAX_DEPTH { - if current == Self::ROOT_INO { - break; - } - let (path, parent) = self - .icache - .get_icb(current, |icb| (icb.path.clone(), icb.parent)) - .await?; - components.push(path); - current = parent?; - } - if current != Self::ROOT_INO { - tracing::warn!( - ino, - "path_of_inode: exceeded MAX_DEPTH={MAX_DEPTH}, possible parent cycle" - ); - return None; - } - components.reverse(); - let joined: PathBuf = components.iter().collect(); - joined.to_str().map(String::from) + self.provider.repo_name() } } -#[async_trait::async_trait] -impl super::common::InodeCachePeek for RepoFs { - async fn peek_attr(&self, ino: Inode) -> Option { - self.icache.get_attr(ino).await - } +#[expect( + clippy::wildcard_enum_match_arm, + reason = "mapping all ErrorKind variants is impractical; EIO is the sensible default" +)] +fn io_error_to_errno(e: &std::io::Error) -> i32 { + e.raw_os_error().unwrap_or_else(|| match e.kind() { + std::io::ErrorKind::NotFound => libc::ENOENT, + std::io::ErrorKind::PermissionDenied => libc::EACCES, + std::io::ErrorKind::AlreadyExists => libc::EEXIST, + _ => libc::EIO, + }) } #[async_trait::async_trait] -impl Fs for RepoFs { - type LookupError = LookupError; - type GetAttrError = GetAttrError; - type OpenError = OpenError; - type ReadError = ReadError; - type ReaddirError = ReadDirError; - type ReleaseError = ReleaseError; - - #[instrument(name = "RepoFs::lookup", skip(self), fields(repo = %self.repo_name))] - async fn lookup(&mut self, parent: Inode, name: &OsStr) -> Result { - debug_assert!( - self.icache.contains(parent), - "lookup: parent inode {parent} not in inode table" - ); - - let ino = self.icache.ensure_child_ino(parent, name).await; - let attr = self - .icache - .get_or_resolve(ino, |icb| icb.attr) - .await? - .ok_or(LookupError::InodeNotFound)?; - - let rc = self - .icache - .inc_rc(ino) +impl super::common::ChildFs for RepoFs { + async fn lookup(&mut self, parent: InodeAddr, name: &OsStr) -> Result { + let tracked = self + .inner + .borrow_fs() + .lookup(LoadedAddr(parent), name) .await - .ok_or(LookupError::InodeNotFound)?; - trace!(ino, ?name, rc, "resolved inode"); - Ok(attr) - } - - #[instrument(name = "RepoFs::getattr", skip(self), fields(repo = %self.repo_name))] - async fn getattr( - &mut self, - ino: Inode, - _fh: Option, - ) -> Result { - self.icache.get_attr(ino).await.ok_or_else(|| { - warn!(ino, "getattr on unknown inode"); - GetAttrError::InodeNotFound - }) + .map_err(|e| { + if io_error_to_errno(&e) == libc::ENOENT { + LookupError::InodeNotFound + } else { + LookupError::RemoteMesaError(MesaApiError::Io(e)) + } + })?; + *self.refcounts.entry(tracked.inode.addr).or_insert(0) += 1; + Ok(tracked.inode) } - #[instrument(name = "RepoFs::readdir", skip(self), fields(repo = %self.repo_name))] - async fn readdir(&mut self, ino: Inode) -> Result<&[DirEntry], ReadDirError> { - debug_assert!( - self.icache.contains(ino), - "readdir: inode {ino} not in inode table" - ); - debug_assert!( - matches!( - self.icache.get_attr(ino).await, - Some(FileAttr::Directory { .. }) | None - ), - "readdir: inode {ino} has non-directory cached attr" - ); - - let children = self - .icache - .get_or_resolve(ino, |icb| icb.children.clone()) - .await? - .ok_or(ReadDirError::NotADirectory)?; - - trace!( - ino, - count = children.len(), - "readdir: resolved directory listing from icache" - ); - - self.icache.evict_zero_rc_children(ino).await; - - let mut entries = Vec::with_capacity(children.len()); - for (name, kind) in &children { - let child_ino = self.icache.ensure_child_ino(ino, OsStr::new(name)).await; - // Only cache directory attrs in readdir. File attrs are left as - // None so that lookup triggers the resolver to fetch the real file - // size. Caching placeholder file attrs (size=0) would poison - // needs_resolve(), preventing resolution on subsequent lookups. - if *kind == DirEntryType::Directory { - let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: mescloud_icache::make_common_file_attr( - child_ino, - 0o755, - now, - now, - self.icache.fs_owner(), - self.icache.block_size(), - ), - }; - self.icache.cache_attr(child_ino, attr).await; - } - entries.push(DirEntry { - ino: child_ino, - name: name.clone().into(), - kind: *kind, - }); - } - - self.readdir_buf = entries; - Ok(&self.readdir_buf) + async fn readdir(&mut self, ino: InodeAddr) -> Result, ReadDirError> { + let mut entries = Vec::new(); + self.inner + .borrow_fs() + .readdir(LoadedAddr(ino), 0, |de, _offset| { + entries.push((de.name.to_os_string(), de.inode)); + false + }) + .await + .map_err(|e| { + if io_error_to_errno(&e) == libc::ENOTDIR { + ReadDirError::NotADirectory + } else if io_error_to_errno(&e) == libc::ENOENT { + ReadDirError::InodeNotFound + } else { + ReadDirError::RemoteMesaError(MesaApiError::Io(e)) + } + })?; + Ok(entries) } - #[instrument(name = "RepoFs::open", skip(self), fields(repo = %self.repo_name))] - async fn open(&mut self, ino: Inode, _flags: OpenFlags) -> Result { - if !self.icache.contains(ino) { - warn!(ino, "open on unknown inode"); - return Err(OpenError::InodeNotFound); - } - debug_assert!( - matches!( - self.icache.get_attr(ino).await, - Some(FileAttr::RegularFile { .. }) | None - ), - "open: inode {ino} has non-file cached attr" - ); - let fh = self.file_table.allocate(); - self.open_files.insert(fh, ino); - trace!(ino, fh, "assigned file handle"); - Ok(OpenFile { - handle: fh, - options: FileOpenOptions::empty(), - }) + async fn open( + &mut self, + ino: InodeAddr, + flags: AsyncOpenFlags, + ) -> Result { + let open_file = self + .inner + .borrow_fs() + .open(LoadedAddr(ino), flags) + .await + .map_err(|_| OpenError::InodeNotFound)?; + self.open_files + .insert(open_file.fh, Arc::clone(&open_file.reader)); + Ok(open_file.fh) } - #[instrument(name = "RepoFs::read", skip(self), fields(repo = %self.repo_name))] async fn read( &mut self, - ino: Inode, - fh: FileHandle, + _ino: InodeAddr, + fh: git_fs::fs::FileHandle, offset: u64, size: u32, - _flags: OpenFlags, - _lock_owner: Option, ) -> Result { - let &file_ino = self.open_files.get(&fh).ok_or_else(|| { - warn!(fh, "read on unknown file handle"); - ReadError::FileNotOpen - })?; - debug_assert!( - file_ino == ino, - "read: file handle {fh} maps to inode {file_ino}, but caller passed inode {ino}" - ); - debug_assert!( - matches!( - self.icache.get_attr(ino).await, - Some(FileAttr::RegularFile { .. }) | None - ), - "read: inode {ino} has non-file cached attr" - ); - - // Try the file cache first. - if let Some(cache) = &self.file_cache - && let Some(data) = cache.get(&ino).await - { - let start = usize::try_from(offset) - .unwrap_or(data.len()) - .min(data.len()); - let end = start.saturating_add(size as usize).min(data.len()); - trace!( - ino, - fh, - cached = true, - decoded_len = data.len(), - start, - end, - "read content" - ); - return Ok(Bytes::copy_from_slice(&data[start..end])); - } - - // Cache miss — fetch from the Mesa API. - let file_path = self.path_of_inode(ino).await; - - if ino != Self::ROOT_INO && file_path.is_none() { - warn!(ino, "read: path_of_inode returned None for non-root inode"); - return Err(ReadError::InodeNotFound); - } - - let content = self - .client - .org(&self.org_name) - .repos() - .at(&self.repo_name) - .content() - .get(Some(self.ref_.as_str()), file_path.as_deref(), None) - .await - .map_err(MesaApiError::from)?; - - let encoded_content = match content { - Content::File(f) => f.content.unwrap_or_default(), - // TODO(MES-712): return ReadError::NotAFile once symlinks are surfaced as - // DirEntryType::Symlink, and implement readlink to return the link target. - Content::Symlink(s) => s.content.unwrap_or_default(), - Content::Dir(_) => return Err(ReadError::NotAFile), - }; - - let decoded = base64::engine::general_purpose::STANDARD.decode(&encoded_content)?; - - let start = usize::try_from(offset) - .unwrap_or(decoded.len()) - .min(decoded.len()); - let end = start.saturating_add(size as usize).min(decoded.len()); - let result = Bytes::copy_from_slice(&decoded[start..end]); - trace!(ino, fh, cached = false, path = ?file_path, decoded_len = decoded.len(), start, end, "read content"); - - // Store the decoded content in the cache for future reads. - if let Some(cache) = &self.file_cache - && let Err(e) = cache.insert(&ino, decoded).await - { - warn!(error = ?e, ino, "failed to cache file content"); - } - - Ok(result) + let reader = self.open_files.get(&fh).ok_or(ReadError::FileNotOpen)?; + reader.read(offset, size).await.map_err(|e| { + if io_error_to_errno(&e) == libc::EISDIR { + ReadError::NotAFile + } else if io_error_to_errno(&e) == libc::ENOENT { + ReadError::InodeNotFound + } else { + ReadError::RemoteMesaError(MesaApiError::Io(e)) + } + }) } - #[instrument(name = "RepoFs::release", skip(self), fields(repo = %self.repo_name))] async fn release( &mut self, - ino: Inode, - fh: FileHandle, - _flags: OpenFlags, - _flush: bool, + _ino: InodeAddr, + fh: git_fs::fs::FileHandle, ) -> Result<(), ReleaseError> { - let released_ino = self.open_files.remove(&fh).ok_or_else(|| { - warn!(fh, "release on unknown file handle"); - ReleaseError::FileNotOpen - })?; - debug_assert!( - released_ino == ino, - "release: file handle {fh} mapped to inode {released_ino}, but caller passed inode {ino}" - ); - trace!(ino = released_ino, fh, "closed file handle"); + self.open_files + .remove(&fh) + .ok_or(ReleaseError::FileNotOpen)?; Ok(()) } - - #[instrument(name = "RepoFs::forget", skip(self), fields(repo = %self.repo_name))] - async fn forget(&mut self, ino: Inode, nlookups: u64) { - debug_assert!( - self.icache.contains(ino), - "forget: inode {ino} not in inode table" - ); - - self.icache.forget(ino, nlookups).await; - } - - async fn statfs(&mut self) -> Result { - Ok(self.icache.statfs()) - } } diff --git a/src/fs/mod.rs b/src/fs/mod.rs index 003e1b04..a696e56f 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -1,4 +1 @@ -pub mod fuser; -pub mod icache; pub mod mescloud; -pub mod r#trait; diff --git a/src/fs/trait.rs b/src/fs/trait.rs deleted file mode 100644 index f4d98529..00000000 --- a/src/fs/trait.rs +++ /dev/null @@ -1,375 +0,0 @@ -//! Generic trait for implementing filesystems. -//! -//! Note that this is a slightly cleaner interface than directly using fuser. The whole point of -//! this is to abstract away fuser-specific details. -use async_trait::async_trait; -use std::{ - ffi::{OsStr, OsString}, - time::{Duration, SystemTime}, -}; -use tracing::error; - -use bitflags::bitflags; -use bytes::Bytes; - -/// Type representing an inode. -pub type Inode = u64; - -pub type FileHandle = u64; - -/// An opaque lock owner identifier provided by the kernel. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct LockOwner(pub u64); - -bitflags! { - #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] - pub struct Permissions: u16 { - // Other - const OTHER_EXECUTE = 1 << 0; - const OTHER_WRITE = 1 << 1; - const OTHER_READ = 1 << 2; - - // Group - const GROUP_EXECUTE = 1 << 3; - const GROUP_WRITE = 1 << 4; - const GROUP_READ = 1 << 5; - - // Owner - const OWNER_EXECUTE = 1 << 6; - const OWNER_WRITE = 1 << 7; - const OWNER_READ = 1 << 8; - - // Special bits - const STICKY = 1 << 9; - const SETGID = 1 << 10; - const SETUID = 1 << 11; - - const OTHER_RWX = Self::OTHER_READ.bits() - | Self::OTHER_WRITE.bits() - | Self::OTHER_EXECUTE.bits(); - const GROUP_RWX = Self::GROUP_READ.bits() - | Self::GROUP_WRITE.bits() - | Self::GROUP_EXECUTE.bits(); - const OWNER_RWX = Self::OWNER_READ.bits() - | Self::OWNER_WRITE.bits() - | Self::OWNER_EXECUTE.bits(); - } -} - -bitflags! { - #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] - pub struct OpenFlags: i32 { - // Access modes (mutually exclusive) - const RDONLY = libc::O_RDONLY; - const WRONLY = libc::O_WRONLY; - const RDWR = libc::O_RDWR; - - // Creation/status flags - const APPEND = libc::O_APPEND; - const TRUNC = libc::O_TRUNC; - const CREAT = libc::O_CREAT; - const EXCL = libc::O_EXCL; - - // Behavior flags - const NONBLOCK = libc::O_NONBLOCK; - const SYNC = libc::O_SYNC; - const DSYNC = libc::O_DSYNC; - const NOFOLLOW = libc::O_NOFOLLOW; - const CLOEXEC = libc::O_CLOEXEC; - const DIRECTORY = libc::O_DIRECTORY; - - #[cfg(target_os = "linux")] - const NOATIME = libc::O_NOATIME; - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct CommonFileAttr { - pub ino: Inode, - pub atime: SystemTime, - pub mtime: SystemTime, - pub ctime: SystemTime, - pub crtime: SystemTime, - pub perm: Permissions, - pub nlink: u32, - pub uid: u32, - pub gid: u32, - pub blksize: u32, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum FileAttr { - RegularFile { - common: CommonFileAttr, - size: u64, - blocks: u64, - }, - Directory { - common: CommonFileAttr, - }, - Symlink { - common: CommonFileAttr, - size: u64, - }, - CharDevice { - common: CommonFileAttr, - rdev: u64, - }, - BlockDevice { - common: CommonFileAttr, - rdev: u64, - }, - NamedPipe { - common: CommonFileAttr, - }, - Socket { - common: CommonFileAttr, - }, -} - -impl FileAttr { - pub fn common(&self) -> &CommonFileAttr { - match self { - Self::RegularFile { common, .. } - | Self::Directory { common } - | Self::Symlink { common, .. } - | Self::CharDevice { common, .. } - | Self::BlockDevice { common, .. } - | Self::NamedPipe { common } - | Self::Socket { common } => common, - } - } -} - -bitflags! { - #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] - pub (crate) struct FileOpenOptions: u32 { - const DIRECT_IO = 1 << 0; - const KEEP_CACHE = 1 << 1; - const NONSEEKABLE = 1 << 2; - const STREAM = 1 << 4; - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct OpenFile { - pub handle: FileHandle, - pub options: FileOpenOptions, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum DirEntryType { - RegularFile, - Directory, - Symlink, - CharDevice, - BlockDevice, - NamedPipe, - Socket, -} - -impl TryFrom for FileAttr { - type Error = (); - - #[expect( - clippy::cast_possible_truncation, - reason = "metadata mode/nlink/blksize narrowing is intentional" - )] - #[expect( - clippy::cast_sign_loss, - reason = "nsecs from MetadataExt is always in [0, 999_999_999]" - )] - fn try_from(meta: std::fs::Metadata) -> Result { - use std::os::unix::fs::FileTypeExt as _; - use std::os::unix::fs::MetadataExt as _; - - fn to_systime(secs: i64, nsecs: i64) -> SystemTime { - if secs >= 0 { - std::time::UNIX_EPOCH + Duration::new(secs.cast_unsigned(), nsecs as u32) - } else { - // nsecs is always in [0, 999_999_999] from MetadataExt. - // For negative secs, subtract whole seconds then add back nsecs. - std::time::UNIX_EPOCH - Duration::from_secs((-secs).cast_unsigned()) - + Duration::from_nanos(nsecs.cast_unsigned()) - } - } - - let common_attr = CommonFileAttr { - ino: meta.ino(), - atime: to_systime(meta.atime(), meta.atime_nsec()), - mtime: to_systime(meta.mtime(), meta.mtime_nsec()), - ctime: to_systime(meta.ctime(), meta.ctime_nsec()), - crtime: to_systime(0, 0), // Not available in std::fs::Metadata - perm: Permissions::from_bits_truncate(meta.mode() as u16), - nlink: meta.nlink() as u32, - uid: meta.uid(), - gid: meta.gid(), - blksize: meta.blksize() as u32, - }; - - let ft = meta.file_type(); - if ft.is_file() { - Ok(Self::RegularFile { - common: common_attr, - size: meta.len(), - blocks: meta.blocks(), - }) - } else if ft.is_dir() { - Ok(Self::Directory { - common: common_attr, - }) - } else if ft.is_symlink() { - Ok(Self::Symlink { - common: common_attr, - size: meta.len(), - }) - } else if ft.is_char_device() { - Ok(Self::CharDevice { - common: common_attr, - rdev: meta.rdev(), - }) - } else if ft.is_block_device() { - Ok(Self::BlockDevice { - common: common_attr, - rdev: meta.rdev(), - }) - } else if ft.is_fifo() { - Ok(Self::NamedPipe { - common: common_attr, - }) - } else if ft.is_socket() { - Ok(Self::Socket { - common: common_attr, - }) - } else { - debug_assert!( - false, - "Unknown file type encountered in FileAttr conversion" - ); - Err(()) - } - } -} - -impl From for DirEntryType { - fn from(attr: FileAttr) -> Self { - match attr { - FileAttr::RegularFile { .. } => Self::RegularFile, - FileAttr::Directory { .. } => Self::Directory, - FileAttr::Symlink { .. } => Self::Symlink, - FileAttr::CharDevice { .. } => Self::CharDevice, - FileAttr::BlockDevice { .. } => Self::BlockDevice, - FileAttr::NamedPipe { .. } => Self::NamedPipe, - FileAttr::Socket { .. } => Self::Socket, - } - } -} - -impl TryFrom for DirEntryType { - type Error = (); - - fn try_from(ft: std::fs::FileType) -> Result { - use std::os::unix::fs::FileTypeExt as _; - - if ft.is_file() { - Ok(Self::RegularFile) - } else if ft.is_dir() { - Ok(Self::Directory) - } else if ft.is_symlink() { - Ok(Self::Symlink) - } else if ft.is_char_device() { - Ok(Self::CharDevice) - } else if ft.is_block_device() { - Ok(Self::BlockDevice) - } else if ft.is_fifo() { - Ok(Self::NamedPipe) - } else if ft.is_socket() { - Ok(Self::Socket) - } else { - debug_assert!( - false, - "Unknown file type encountered in DirEntryType conversion" - ); - error!(ft = ?ft, "Unknown file type encountered in DirEntryType conversion"); - Err(()) - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct DirEntry { - pub ino: Inode, - // TODO(markovejnovic): This OsString is hella expensive - pub name: OsString, - pub kind: DirEntryType, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct FilesystemStats { - pub block_size: u32, - pub fragment_size: u64, - pub total_blocks: u64, - pub free_blocks: u64, - pub available_blocks: u64, - pub total_inodes: u64, - pub free_inodes: u64, - pub available_inodes: u64, - pub filesystem_id: u64, - pub mount_flags: u32, - pub max_filename_length: u32, -} - -#[async_trait] -pub trait Fs { - type LookupError: std::error::Error; - type GetAttrError: std::error::Error; - type OpenError: std::error::Error; - type ReadError: std::error::Error; - type ReaddirError: std::error::Error; - type ReleaseError: std::error::Error; - - /// For each lookup call made by the kernel, it expects the icache to be updated with the - /// returned `FileAttr`. - async fn lookup(&mut self, parent: Inode, name: &OsStr) -> Result; - - /// Can be called in two contexts -- the file is not open (in which case `fh` is `None`), - /// or the file is open (in which case `fh` is `Some`). - async fn getattr( - &mut self, - ino: Inode, - fh: Option, - ) -> Result; - - /// Read the contents of a directory. - async fn readdir(&mut self, ino: Inode) -> Result<&[DirEntry], Self::ReaddirError>; - - /// Open a file for reading. - async fn open(&mut self, ino: Inode, flags: OpenFlags) -> Result; - - /// Read data from an open file. - #[expect(clippy::too_many_arguments, reason = "mirrors fuser read API")] - async fn read( - &mut self, - ino: Inode, - fh: FileHandle, - offset: u64, - size: u32, - flags: OpenFlags, - lock_owner: Option, - ) -> Result; - - /// Called when the kernel closes a file handle. - async fn release( - &mut self, - ino: Inode, - fh: FileHandle, - flags: OpenFlags, - flush: bool, - ) -> Result<(), Self::ReleaseError>; - - /// Called when the kernel is done with an inode. - async fn forget(&mut self, ino: Inode, nlookups: u64); - - /// Get filesystem statistics. - async fn statfs(&mut self) -> Result; -} diff --git a/tests/async_fs_correctness.rs b/tests/async_fs_correctness.rs new file mode 100644 index 00000000..5fe27a28 --- /dev/null +++ b/tests/async_fs_correctness.rs @@ -0,0 +1,609 @@ +#![allow(clippy::unwrap_used, clippy::expect_used, missing_docs)] + +mod common; + +use std::ffi::{OsStr, OsString}; + +use git_fs::cache::async_backed::FutureBackedCache; +use git_fs::fs::async_fs::{AsyncFs, InodeLifecycle}; +use git_fs::fs::{INode, INodeType, LoadedAddr, OpenFlags}; + +use common::async_fs_mocks::{MockFsDataProvider, MockFsState, make_inode}; + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lifecycle_inc_returns_count_after_increment() { + let table = FutureBackedCache::default(); + let inode = make_inode(100, INodeType::File, 0, Some(1)); + table.insert_sync(100, inode); + + let mut lifecycle = InodeLifecycle::from_table(table); + + assert_eq!(lifecycle.inc(100), 1, "first inc should return 1"); + assert_eq!(lifecycle.inc(100), 2, "second inc should return 2"); + assert_eq!(lifecycle.inc(100), 3, "third inc should return 3"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lifecycle_dec_returns_remaining_count() { + let table = FutureBackedCache::default(); + let inode = make_inode(100, INodeType::File, 0, Some(1)); + table.insert_sync(100, inode); + + let mut lifecycle = InodeLifecycle::from_table(table); + lifecycle.inc(100); + lifecycle.inc(100); + + assert_eq!(lifecycle.dec(&100), Some(1), "dec from 2 should give 1"); + assert_eq!(lifecycle.dec(&100), Some(0), "dec from 1 should give 0"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lifecycle_dec_unknown_addr_returns_none() { + let table: FutureBackedCache = FutureBackedCache::default(); + let mut lifecycle = InodeLifecycle::from_table(table); + + assert_eq!( + lifecycle.dec(&999), + None, + "dec on unknown key should return None" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lifecycle_dec_to_zero_evicts_from_table() { + let table = FutureBackedCache::default(); + let inode = make_inode(100, INodeType::File, 0, Some(1)); + table.insert_sync(100, inode); + + let mut lifecycle = InodeLifecycle::from_table(table); + lifecycle.inc(100); + + assert_eq!(lifecycle.dec(&100), Some(0)); + // The inode should have been evicted from the table. + assert!( + lifecycle.table().get(&100).await.is_none(), + "inode should be evicted after refcount hits zero" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lifecycle_dec_count_decrements_by_n() { + let table: FutureBackedCache = FutureBackedCache::default(); + let inode = make_inode(100, INodeType::File, 0, Some(1)); + table.insert_sync(100, inode); + + let mut lifecycle = InodeLifecycle::from_table(table); + lifecycle.inc(100); + lifecycle.inc(100); + lifecycle.inc(100); // count = 3 + + assert_eq!( + lifecycle.dec_count(&100, 2), + Some(1), + "dec_count(3, 2) should give 1" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lifecycle_dec_count_to_zero_evicts() { + let table = FutureBackedCache::default(); + let inode = make_inode(100, INodeType::File, 0, Some(1)); + table.insert_sync(100, inode); + + let mut lifecycle = InodeLifecycle::from_table(table); + lifecycle.inc(100); + lifecycle.inc(100); // count = 2 + + assert_eq!(lifecycle.dec_count(&100, 2), Some(0)); + assert!( + lifecycle.table().get(&100).await.is_none(), + "inode should be evicted after dec_count to zero" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lifecycle_table_returns_underlying_cache() { + let table = FutureBackedCache::default(); + let inode = make_inode(42, INodeType::Directory, 0, None); + table.insert_sync(42, inode); + + let lifecycle = InodeLifecycle::from_table(table); + + let fetched = lifecycle.table().get(&42).await; + assert_eq!( + fetched.map(|n| n.addr), + Some(42), + "table() should expose the underlying cache" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn new_seeds_root_inode_into_table() { + let table = FutureBackedCache::default(); + let root = make_inode(1, INodeType::Directory, 0, None); + let dp = MockFsDataProvider::new(MockFsState::default()); + + let fs = AsyncFs::new(dp, root, &table).await; + + assert_eq!(fs.inode_count(), 1, "root should be the only inode"); + let fetched = table.get(&1).await; + assert_eq!( + fetched.map(|n| n.addr), + Some(1), + "root inode should be in the table" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn new_preseeded_does_not_insert_root() { + let table: FutureBackedCache = FutureBackedCache::default(); + let dp = MockFsDataProvider::new(MockFsState::default()); + + let fs = AsyncFs::new_preseeded(dp, &table); + + assert_eq!( + fs.inode_count(), + 0, + "preseeded constructor should not insert anything" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn statfs_reports_inode_count() { + let table = FutureBackedCache::default(); + let root = make_inode(1, INodeType::Directory, 0, None); + let dp = MockFsDataProvider::new(MockFsState::default()); + + let fs = AsyncFs::new(dp, root, &table).await; + let stats = fs.statfs(); + + assert_eq!(stats.block_size, 4096); + assert_eq!(stats.total_inodes, 1, "should reflect the root inode"); + assert_eq!(stats.free_blocks, 0); + assert_eq!(stats.max_filename_length, 255); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn loaded_inode_returns_seeded_inode() { + let table = FutureBackedCache::default(); + let root = make_inode(1, INodeType::Directory, 0, None); + let dp = MockFsDataProvider::new(MockFsState::default()); + + let fs = AsyncFs::new(dp, root, &table).await; + + let inode = fs.loaded_inode(LoadedAddr(1)).await.unwrap(); + assert_eq!(inode.addr, 1); + assert_eq!(inode.itype, INodeType::Directory); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn loaded_inode_returns_enoent_for_missing_addr() { + let table = FutureBackedCache::default(); + let root = make_inode(1, INodeType::Directory, 0, None); + let dp = MockFsDataProvider::new(MockFsState::default()); + + let fs = AsyncFs::new(dp, root, &table).await; + + let err = fs.loaded_inode(LoadedAddr(999)).await.unwrap_err(); + assert_eq!(err.raw_os_error(), Some(libc::ENOENT)); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn getattr_delegates_to_loaded_inode() { + let table = FutureBackedCache::default(); + let root = make_inode(1, INodeType::Directory, 4096, None); + let dp = MockFsDataProvider::new(MockFsState::default()); + + let fs = AsyncFs::new(dp, root, &table).await; + + let inode = fs.getattr(LoadedAddr(1)).await.unwrap(); + assert_eq!(inode.addr, 1); + assert_eq!(inode.size, 4096); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lookup_resolves_child_via_data_provider() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child = make_inode(10, INodeType::File, 42, Some(1)); + + let mut state = MockFsState::default(); + state.lookups.insert((1, "readme.md".into()), child); + let dp = MockFsDataProvider::new(state); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + let tracked = fs + .lookup(LoadedAddr(1), OsStr::new("readme.md")) + .await + .unwrap(); + + assert_eq!(tracked.inode.addr, 10); + assert_eq!(tracked.inode.size, 42); + assert_eq!(tracked.inode.itype, INodeType::File); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lookup_populates_inode_table() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child = make_inode(10, INodeType::File, 100, Some(1)); + + let mut state = MockFsState::default(); + state.lookups.insert((1, "file.txt".into()), child); + let dp = MockFsDataProvider::new(state); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + fs.lookup(LoadedAddr(1), OsStr::new("file.txt")) + .await + .unwrap(); + + // The child should now be in the inode table. + let cached = table.get(&10).await; + assert_eq!( + cached.map(|n| n.addr), + Some(10), + "child inode should be cached in the table after lookup" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lookup_second_call_uses_cache() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child = make_inode(10, INodeType::File, 100, Some(1)); + + let mut state = MockFsState::default(); + state.lookups.insert((1, "cached.txt".into()), child); + let dp = MockFsDataProvider::new(state); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + let first = fs + .lookup(LoadedAddr(1), OsStr::new("cached.txt")) + .await + .unwrap(); + let second = fs + .lookup(LoadedAddr(1), OsStr::new("cached.txt")) + .await + .unwrap(); + + assert_eq!(first.inode.addr, second.inode.addr); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lookup_propagates_provider_error() { + let root = make_inode(1, INodeType::Directory, 0, None); + // No lookups configured — provider will return ENOENT. + let dp = MockFsDataProvider::new(MockFsState::default()); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + let err = fs + .lookup(LoadedAddr(1), OsStr::new("nonexistent")) + .await + .unwrap_err(); + assert_eq!(err.raw_os_error(), Some(libc::ENOENT)); +} + +// open and OpenFile::read tests + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn open_returns_file_handle_and_reader() { + let root = make_inode(1, INodeType::Directory, 0, None); + let file = make_inode(10, INodeType::File, 5, Some(1)); + + let mut state = MockFsState::default(); + state + .file_contents + .insert(10, bytes::Bytes::from_static(b"hello")); + let dp = MockFsDataProvider::new(state); + + let table = FutureBackedCache::default(); + table.insert_sync(10, file); + let fs = AsyncFs::new(dp, root, &table).await; + + let open_file = fs.open(LoadedAddr(10), OpenFlags::RDONLY).await.unwrap(); + + assert!(open_file.fh >= 1, "file handle should start at 1"); + let data = open_file.read(0, 5).await.unwrap(); + assert_eq!(&data[..], b"hello"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn open_returns_eisdir_for_directory() { + let root = make_inode(1, INodeType::Directory, 0, None); + let dp = MockFsDataProvider::new(MockFsState::default()); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + let err = fs.open(LoadedAddr(1), OpenFlags::RDONLY).await.unwrap_err(); + assert_eq!(err.raw_os_error(), Some(libc::EISDIR)); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn open_returns_enoent_for_missing_inode() { + let root = make_inode(1, INodeType::Directory, 0, None); + let dp = MockFsDataProvider::new(MockFsState::default()); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + let err = fs + .open(LoadedAddr(999), OpenFlags::RDONLY) + .await + .unwrap_err(); + assert_eq!(err.raw_os_error(), Some(libc::ENOENT)); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn open_assigns_unique_file_handles() { + let root = make_inode(1, INodeType::Directory, 0, None); + let file = make_inode(10, INodeType::File, 0, Some(1)); + + let dp = MockFsDataProvider::new(MockFsState::default()); + + let table = FutureBackedCache::default(); + table.insert_sync(10, file); + let fs = AsyncFs::new(dp, root, &table).await; + + let fh1 = fs.open(LoadedAddr(10), OpenFlags::RDONLY).await.unwrap().fh; + let fh2 = fs.open(LoadedAddr(10), OpenFlags::RDONLY).await.unwrap().fh; + + assert_ne!(fh1, fh2, "each open should produce a unique file handle"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn open_file_read_with_offset() { + let root = make_inode(1, INodeType::Directory, 0, None); + let file = make_inode(10, INodeType::File, 11, Some(1)); + + let mut state = MockFsState::default(); + state + .file_contents + .insert(10, bytes::Bytes::from_static(b"hello world")); + let dp = MockFsDataProvider::new(state); + + let table = FutureBackedCache::default(); + table.insert_sync(10, file); + let fs = AsyncFs::new(dp, root, &table).await; + + let open_file = fs.open(LoadedAddr(10), OpenFlags::RDONLY).await.unwrap(); + + let data = open_file.read(6, 5).await.unwrap(); + assert_eq!(&data[..], b"world"); +} + +// readdir tests + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn readdir_lists_children_sorted_by_name() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child_b = make_inode(10, INodeType::File, 10, Some(1)); + let child_a = make_inode(11, INodeType::File, 20, Some(1)); + + let mut state = MockFsState::default(); + state.directories.insert( + 1, + vec![ + (OsString::from("b.txt"), child_b), + (OsString::from("a.txt"), child_a), + ], + ); + let dp = MockFsDataProvider::new(state); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + let mut entries: Vec<(OsString, u64)> = Vec::new(); + fs.readdir(LoadedAddr(1), 0, |entry, _offset| { + entries.push((entry.name.to_os_string(), entry.inode.addr)); + false // don't stop + }) + .await + .unwrap(); + + assert_eq!(entries.len(), 2); + assert_eq!(entries[0].0, "a.txt", "entries should be sorted by name"); + assert_eq!(entries[0].1, 11); + assert_eq!(entries[1].0, "b.txt"); + assert_eq!(entries[1].1, 10); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn readdir_respects_offset() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child_a = make_inode(10, INodeType::File, 10, Some(1)); + let child_b = make_inode(11, INodeType::File, 20, Some(1)); + let child_c = make_inode(12, INodeType::File, 30, Some(1)); + + let mut state = MockFsState::default(); + state.directories.insert( + 1, + vec![ + (OsString::from("a"), child_a), + (OsString::from("b"), child_b), + (OsString::from("c"), child_c), + ], + ); + let dp = MockFsDataProvider::new(state); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + // First readdir to populate cache + fs.readdir(LoadedAddr(1), 0, |_, _| false).await.unwrap(); + + // Second readdir starting at offset 2 (skip first two) + let mut entries: Vec = Vec::new(); + fs.readdir(LoadedAddr(1), 2, |entry, _| { + entries.push(entry.name.to_os_string()); + false + }) + .await + .unwrap(); + + assert_eq!(entries, vec![OsString::from("c")]); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn readdir_stops_when_filler_returns_true() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child_a = make_inode(10, INodeType::File, 10, Some(1)); + let child_b = make_inode(11, INodeType::File, 20, Some(1)); + let child_c = make_inode(12, INodeType::File, 30, Some(1)); + + let mut state = MockFsState::default(); + state.directories.insert( + 1, + vec![ + (OsString::from("a"), child_a), + (OsString::from("b"), child_b), + (OsString::from("c"), child_c), + ], + ); + let dp = MockFsDataProvider::new(state); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + let mut count = 0; + fs.readdir(LoadedAddr(1), 0, |_, _| { + count += 1; + count >= 2 // stop after 2 entries + }) + .await + .unwrap(); + + assert_eq!(count, 2, "filler should have been called exactly twice"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn readdir_returns_enotdir_for_file() { + let root = make_inode(1, INodeType::Directory, 0, None); + let file = make_inode(10, INodeType::File, 100, Some(1)); + + let dp = MockFsDataProvider::new(MockFsState::default()); + + let table = FutureBackedCache::default(); + table.insert_sync(10, file); + let fs = AsyncFs::new(dp, root, &table).await; + + let err = fs + .readdir(LoadedAddr(10), 0, |_, _| false) + .await + .unwrap_err(); + assert_eq!(err.raw_os_error(), Some(libc::ENOTDIR)); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn readdir_populates_inode_table_with_children() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child = make_inode(10, INodeType::File, 42, Some(1)); + + let mut state = MockFsState::default(); + state + .directories + .insert(1, vec![(OsString::from("child.txt"), child)]); + let dp = MockFsDataProvider::new(state); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + fs.readdir(LoadedAddr(1), 0, |_, _| false).await.unwrap(); + + let cached = table.get(&10).await; + assert_eq!( + cached.map(|n| n.addr), + Some(10), + "readdir should populate children into the inode table" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn readdir_empty_directory() { + let root = make_inode(1, INodeType::Directory, 0, None); + + let mut state = MockFsState::default(); + state.directories.insert(1, vec![]); + let dp = MockFsDataProvider::new(state); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + let mut count = 0; + fs.readdir(LoadedAddr(1), 0, |_, _| { + count += 1; + false + }) + .await + .unwrap(); + + assert_eq!(count, 0, "empty directory should yield no entries"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn readdir_provides_correct_next_offsets() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child_a = make_inode(10, INodeType::File, 0, Some(1)); + let child_b = make_inode(11, INodeType::File, 0, Some(1)); + + let mut state = MockFsState::default(); + state.directories.insert( + 1, + vec![ + (OsString::from("a"), child_a), + (OsString::from("b"), child_b), + ], + ); + let dp = MockFsDataProvider::new(state); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + let mut offsets: Vec = Vec::new(); + fs.readdir(LoadedAddr(1), 0, |_, next_offset| { + offsets.push(next_offset); + false + }) + .await + .unwrap(); + + assert_eq!( + offsets, + vec![1, 2], + "offsets should be 1-indexed and sequential" + ); +} + +// lookup-after-readdir integration test + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn lookup_after_readdir_uses_directory_cache() { + let root = make_inode(1, INodeType::Directory, 0, None); + let child = make_inode(10, INodeType::File, 42, Some(1)); + + let mut state = MockFsState::default(); + // Only configure readdir — no lookup entry. If the directory cache + // fast path is broken, the lookup will fail with ENOENT. + state + .directories + .insert(1, vec![(OsString::from("file.txt"), child)]); + let dp = MockFsDataProvider::new(state); + + let table = FutureBackedCache::default(); + let fs = AsyncFs::new(dp, root, &table).await; + + // readdir populates the directory cache. + fs.readdir(LoadedAddr(1), 0, |_, _| false).await.unwrap(); + + // lookup should hit the directory cache fast path. + let tracked = fs + .lookup(LoadedAddr(1), OsStr::new("file.txt")) + .await + .unwrap(); + assert_eq!(tracked.inode.addr, 10); +} diff --git a/tests/common/async_fs_mocks.rs b/tests/common/async_fs_mocks.rs new file mode 100644 index 00000000..5c132eec --- /dev/null +++ b/tests/common/async_fs_mocks.rs @@ -0,0 +1,104 @@ +#![allow(missing_docs, clippy::unwrap_used)] + +use std::collections::HashMap; +use std::ffi::{OsStr, OsString}; +use std::sync::Arc; +use std::time::SystemTime; + +use bytes::Bytes; + +use git_fs::fs::async_fs::{FileReader, FsDataProvider}; +use git_fs::fs::{INode, INodeType, InodePerms, OpenFlags}; + +/// Builds an `INode` with sensible defaults. Only `addr` and `itype` are required. +pub fn make_inode(addr: u64, itype: INodeType, size: u64, parent: Option) -> INode { + INode { + addr, + permissions: InodePerms::OWNER_RWX | InodePerms::GROUP_READ | InodePerms::OTHER_READ, + uid: 1000, + gid: 1000, + create_time: SystemTime::UNIX_EPOCH, + last_modified_at: SystemTime::UNIX_EPOCH, + parent, + size, + itype, + } +} + +/// A mock `FileReader` that returns a fixed byte slice for any read. +#[derive(Debug, Clone)] +pub struct MockFileReader { + pub data: Bytes, +} + +impl FileReader for MockFileReader { + #[expect( + clippy::cast_possible_truncation, + reason = "test mock — offsets stay small" + )] + async fn read(&self, offset: u64, size: u32) -> Result { + let start = (offset as usize).min(self.data.len()); + let end = (start + size as usize).min(self.data.len()); + Ok(self.data.slice(start..end)) + } +} + +/// Shared state backing `MockFsDataProvider`. +#[derive(Debug, Default)] +pub struct MockFsState { + /// `(parent_addr, child_name) -> child_inode` + pub lookups: HashMap<(u64, OsString), INode>, + /// `parent_addr -> vec of (child_name, child_inode)` + pub directories: HashMap>, + /// `inode_addr -> file content bytes` + pub file_contents: HashMap, +} + +/// A clonable mock data provider for `AsyncFs` tests. +#[derive(Debug, Clone)] +pub struct MockFsDataProvider { + pub state: Arc, +} + +impl MockFsDataProvider { + pub fn new(state: MockFsState) -> Self { + Self { + state: Arc::new(state), + } + } +} + +impl FsDataProvider for MockFsDataProvider { + type Reader = MockFileReader; + + async fn lookup(&self, parent: INode, name: &OsStr) -> Result { + let key = (parent.addr, name.to_os_string()); + self.state + .lookups + .get(&key) + .copied() + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT)) + } + + async fn readdir(&self, parent: INode) -> Result, std::io::Error> { + self.state + .directories + .get(&parent.addr) + .cloned() + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT)) + } + + async fn open( + &self, + inode: INode, + _flags: OpenFlags, + ) -> Result { + let data = self + .state + .file_contents + .get(&inode.addr) + .cloned() + .unwrap_or_default(); + Ok(MockFileReader { data }) + } +} diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 101f9295..2729c866 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -1,4 +1,6 @@ -#![allow(missing_docs, clippy::unwrap_used)] +#![allow(dead_code, missing_docs, clippy::unwrap_used)] + +pub mod async_fs_mocks; use std::sync::{Arc, Mutex}; use std::time::Duration; From c80c09c88794589c18d1d062a0c0949a4abc64d9 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 12:20:18 -0800 Subject: [PATCH 02/24] refactor: remove redundant FUSE error types, use io_to_errno helper --- lib/fs/fuser.rs | 135 +++++++++++++----------------------------------- 1 file changed, 36 insertions(+), 99 deletions(-) diff --git a/lib/fs/fuser.rs b/lib/fs/fuser.rs index 50042a24..61814119 100644 --- a/lib/fs/fuser.rs +++ b/lib/fs/fuser.rs @@ -9,60 +9,18 @@ use super::{FileHandle, INode, INodeType, InodeAddr, LoadedAddr, OpenFlags}; use crate::cache::async_backed::FutureBackedCache; use tracing::{debug, error, instrument}; -/// Wrapper converting [`std::io::Error`] to errno. -#[derive(Debug, thiserror::Error)] -#[error("{0}")] -struct FuseIoError(std::io::Error); - +/// Convert an I/O error to the corresponding errno value for FUSE replies. #[expect( clippy::wildcard_enum_match_arm, reason = "ErrorKind is non_exhaustive; EIO is the safe default" )] -impl From for i32 { - fn from(e: FuseIoError) -> Self { - e.0.raw_os_error().unwrap_or_else(|| match e.0.kind() { - std::io::ErrorKind::NotFound => libc::ENOENT, - std::io::ErrorKind::PermissionDenied => libc::EACCES, - std::io::ErrorKind::AlreadyExists => libc::EEXIST, - _ => libc::EIO, - }) - } -} - -/// Error for read operations. -#[derive(Debug, thiserror::Error)] -enum FuseReadError { - /// The file handle was not open. - #[error("file handle not open")] - NotOpen, - /// An I/O error occurred during the read. - #[error("I/O error: {0}")] - Io(#[from] std::io::Error), -} - -impl From for i32 { - fn from(e: FuseReadError) -> Self { - match e { - FuseReadError::NotOpen => libc::EBADF, - FuseReadError::Io(ref io) => io.raw_os_error().unwrap_or(libc::EIO), - } - } -} - -/// Error for release operations. -#[derive(Debug, thiserror::Error)] -enum FuseReleaseError { - /// The file handle was not open. - #[error("file handle not open")] - NotOpen, -} - -impl From for i32 { - fn from(e: FuseReleaseError) -> Self { - match e { - FuseReleaseError::NotOpen => libc::EBADF, - } - } +fn io_to_errno(e: &std::io::Error) -> i32 { + e.raw_os_error().unwrap_or_else(|| match e.kind() { + std::io::ErrorKind::NotFound => libc::ENOENT, + std::io::ErrorKind::PermissionDenied => libc::EACCES, + std::io::ErrorKind::AlreadyExists => libc::EEXIST, + _ => libc::EIO, + }) } mod inner { @@ -200,14 +158,9 @@ impl fuser::Filesystem for FuserAdapter { reply: fuser::ReplyEntry, ) { let result = self.runtime.block_on(async { - let tracked = self - .inner - .get_fs() - .lookup(LoadedAddr(parent), name) - .await - .map_err(FuseIoError)?; + let tracked = self.inner.get_fs().lookup(LoadedAddr(parent), name).await?; self.inner.ward_inc(tracked.inode.addr); - Ok::<_, FuseIoError>(tracked.inode) + Ok::<_, std::io::Error>(tracked.inode) }); match result { Ok(inode) => { @@ -217,7 +170,7 @@ impl fuser::Filesystem for FuserAdapter { } Err(e) => { debug!(error = %e, "replying error"); - reply.error(e.into()); + reply.error(io_to_errno(&e)); } } } @@ -230,13 +183,9 @@ impl fuser::Filesystem for FuserAdapter { _fh: Option, reply: fuser::ReplyAttr, ) { - let result = self.runtime.block_on(async { - self.inner - .get_fs() - .getattr(LoadedAddr(ino)) - .await - .map_err(FuseIoError) - }); + let result = self + .runtime + .block_on(async { self.inner.get_fs().getattr(LoadedAddr(ino)).await }); match result { Ok(inode) => { let attr = inode_to_fuser_attr(&inode, BLOCK_SIZE); @@ -245,7 +194,7 @@ impl fuser::Filesystem for FuserAdapter { } Err(e) => { debug!(error = %e, "replying error"); - reply.error(e.into()); + reply.error(io_to_errno(&e)); } } } @@ -268,16 +217,15 @@ impl fuser::Filesystem for FuserAdapter { entries.push((de.inode.addr, de.name.to_os_string(), de.inode.itype)); false }) - .await - .map_err(FuseIoError)?; - Ok::<_, FuseIoError>(entries) + .await?; + Ok::<_, std::io::Error>(entries) }); let entries = match result { Ok(entries) => entries, Err(e) => { debug!(error = %e, "replying error"); - reply.error(e.into()); + reply.error(io_to_errno(&e)); return; } }; @@ -310,15 +258,10 @@ impl fuser::Filesystem for FuserAdapter { fn open(&mut self, _req: &fuser::Request<'_>, ino: u64, flags: i32, reply: fuser::ReplyOpen) { let flags = OpenFlags::from_bits_truncate(flags); let result = self.runtime.block_on(async { - let open_file = self - .inner - .get_fs() - .open(LoadedAddr(ino), flags) - .await - .map_err(FuseIoError)?; + let open_file = self.inner.get_fs().open(LoadedAddr(ino), flags).await?; let fh = open_file.fh; self.open_files.insert(fh, Arc::clone(&open_file.reader)); - Ok::<_, FuseIoError>(fh) + Ok::<_, std::io::Error>(fh) }); match result { Ok(fh) => { @@ -327,7 +270,7 @@ impl fuser::Filesystem for FuserAdapter { } Err(e) => { debug!(error = %e, "replying error"); - reply.error(e.into()); + reply.error(io_to_errno(&e)); } } } @@ -347,9 +290,12 @@ impl fuser::Filesystem for FuserAdapter { _lock_owner: Option, reply: fuser::ReplyData, ) { - let result: Result<_, FuseReadError> = self.runtime.block_on(async { - let reader = self.open_files.get(&fh).ok_or(FuseReadError::NotOpen)?; - Ok(reader.read(offset.cast_unsigned(), size).await?) + let result = self.runtime.block_on(async { + let reader = self + .open_files + .get(&fh) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::EBADF))?; + reader.read(offset.cast_unsigned(), size).await }); match result { Ok(data) => { @@ -358,7 +304,7 @@ impl fuser::Filesystem for FuserAdapter { } Err(e) => { debug!(error = %e, "replying error"); - reply.error(e.into()); + reply.error(io_to_errno(&e)); } } } @@ -377,24 +323,15 @@ impl fuser::Filesystem for FuserAdapter { _flush: bool, reply: fuser::ReplyEmpty, ) { - let result: Result<_, FuseReleaseError> = match self.open_files.remove(&fh) { - Some(reader) => { - if let Err(e) = self.runtime.block_on(reader.close()) { - debug!(error = %e, "reader close reported error"); - } - Ok(()) - } - None => Err(FuseReleaseError::NotOpen), - }; - match result { - Ok(()) => { - debug!("replying ok"); - reply.ok(); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(e.into()); + if let Some(reader) = self.open_files.remove(&fh) { + if let Err(e) = self.runtime.block_on(reader.close()) { + debug!(error = %e, "reader close reported error"); } + debug!("replying ok"); + reply.ok(); + } else { + debug!("file handle not open, replying error"); + reply.error(libc::EBADF); } } From 7a63d496779e22ec75d86269a661f5853213b93f Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 12:30:13 -0800 Subject: [PATCH 03/24] feat: add FuseReply trait and FuseResultExt for centralized FUSE error handling --- lib/fs/fuser.rs | 50 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/lib/fs/fuser.rs b/lib/fs/fuser.rs index 61814119..a24397de 100644 --- a/lib/fs/fuser.rs +++ b/lib/fs/fuser.rs @@ -23,6 +23,56 @@ fn io_to_errno(e: &std::io::Error) -> i32 { }) } +/// Trait abstracting the `.error(errno)` method common to all fuser reply types. +trait FuseReply { + fn error(self, errno: i32); +} + +macro_rules! impl_fuse_reply { + ($($ty:ty),* $(,)?) => { + $(impl FuseReply for $ty { + fn error(self, errno: i32) { + // Calls the inherent fuser method (not this trait method). + self.error(errno); + } + })* + }; +} + +// ReplyEmpty and ReplyStatfs are excluded: release and statfs +// do not follow the block_on -> fuse_reply pattern. +impl_fuse_reply!( + fuser::ReplyEntry, + fuser::ReplyAttr, + fuser::ReplyDirectory, + fuser::ReplyOpen, + fuser::ReplyData, +); + +/// Extension trait on `Result` for FUSE reply handling. +/// +/// Centralizes the error-logging + errno-reply path so each FUSE callback +/// only has to express its success path. +#[expect( + dead_code, + reason = "will be used by FUSE callbacks in upcoming commits" +)] +trait FuseResultExt { + fn fuse_reply(self, reply: R, on_ok: impl FnOnce(T, R)); +} + +impl FuseResultExt for Result { + fn fuse_reply(self, reply: R, on_ok: impl FnOnce(T, R)) { + match self { + Ok(val) => on_ok(val, reply), + Err(e) => { + debug!(error = %e, "replying error"); + reply.error(io_to_errno(&e)); + } + } + } +} + mod inner { #![allow(clippy::future_not_send, clippy::mem_forget)] From 98e906f9f5ea3ce684214de8164e937c13aeba56 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 12:37:32 -0800 Subject: [PATCH 04/24] refactor: use fuse_reply in getattr --- lib/fs/fuser.rs | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/lib/fs/fuser.rs b/lib/fs/fuser.rs index a24397de..41ff2140 100644 --- a/lib/fs/fuser.rs +++ b/lib/fs/fuser.rs @@ -53,10 +53,6 @@ impl_fuse_reply!( /// /// Centralizes the error-logging + errno-reply path so each FUSE callback /// only has to express its success path. -#[expect( - dead_code, - reason = "will be used by FUSE callbacks in upcoming commits" -)] trait FuseResultExt { fn fuse_reply(self, reply: R, on_ok: impl FnOnce(T, R)); } @@ -233,20 +229,13 @@ impl fuser::Filesystem for FuserAdapter { _fh: Option, reply: fuser::ReplyAttr, ) { - let result = self - .runtime - .block_on(async { self.inner.get_fs().getattr(LoadedAddr(ino)).await }); - match result { - Ok(inode) => { + self.runtime + .block_on(async { self.inner.get_fs().getattr(LoadedAddr(ino)).await }) + .fuse_reply(reply, |inode, reply| { let attr = inode_to_fuser_attr(&inode, BLOCK_SIZE); debug!(?attr, "replying..."); reply.attr(&Self::SHAMEFUL_TTL, &attr); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(io_to_errno(&e)); - } - } + }); } #[instrument(name = "FuserAdapter::readdir", skip(self, _req, _fh, offset, reply))] From 3d26de286e5c41523718810c553d5dea1b23ec76 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 12:37:58 -0800 Subject: [PATCH 05/24] refactor: use fuse_reply in lookup --- lib/fs/fuser.rs | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/lib/fs/fuser.rs b/lib/fs/fuser.rs index 41ff2140..704cddaf 100644 --- a/lib/fs/fuser.rs +++ b/lib/fs/fuser.rs @@ -203,22 +203,17 @@ impl fuser::Filesystem for FuserAdapter { name: &OsStr, reply: fuser::ReplyEntry, ) { - let result = self.runtime.block_on(async { - let tracked = self.inner.get_fs().lookup(LoadedAddr(parent), name).await?; - self.inner.ward_inc(tracked.inode.addr); - Ok::<_, std::io::Error>(tracked.inode) - }); - match result { - Ok(inode) => { + self.runtime + .block_on(async { + let tracked = self.inner.get_fs().lookup(LoadedAddr(parent), name).await?; + self.inner.ward_inc(tracked.inode.addr); + Ok::<_, std::io::Error>(tracked.inode) + }) + .fuse_reply(reply, |inode, reply| { let f_attr = inode_to_fuser_attr(&inode, BLOCK_SIZE); debug!(?f_attr, "replying..."); reply.entry(&Self::SHAMEFUL_TTL, &f_attr, 0); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(io_to_errno(&e)); - } - } + }); } #[instrument(name = "FuserAdapter::getattr", skip(self, _req, _fh, reply))] From 44ffc1fac2be9d6fa593d6fe5c251173c4e3ddc3 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 12:38:24 -0800 Subject: [PATCH 06/24] refactor: use fuse_reply in open --- lib/fs/fuser.rs | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/lib/fs/fuser.rs b/lib/fs/fuser.rs index 704cddaf..7d648598 100644 --- a/lib/fs/fuser.rs +++ b/lib/fs/fuser.rs @@ -291,22 +291,17 @@ impl fuser::Filesystem for FuserAdapter { #[instrument(name = "FuserAdapter::open", skip(self, _req, flags, reply))] fn open(&mut self, _req: &fuser::Request<'_>, ino: u64, flags: i32, reply: fuser::ReplyOpen) { let flags = OpenFlags::from_bits_truncate(flags); - let result = self.runtime.block_on(async { - let open_file = self.inner.get_fs().open(LoadedAddr(ino), flags).await?; - let fh = open_file.fh; - self.open_files.insert(fh, Arc::clone(&open_file.reader)); - Ok::<_, std::io::Error>(fh) - }); - match result { - Ok(fh) => { + self.runtime + .block_on(async { + let open_file = self.inner.get_fs().open(LoadedAddr(ino), flags).await?; + let fh = open_file.fh; + self.open_files.insert(fh, Arc::clone(&open_file.reader)); + Ok::<_, std::io::Error>(fh) + }) + .fuse_reply(reply, |fh, reply| { debug!(handle = fh, "replying..."); reply.opened(fh, 0); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(io_to_errno(&e)); - } - } + }); } #[instrument( From 74904cd4e9db84068c781be50c089cacf4f07e19 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 12:38:51 -0800 Subject: [PATCH 07/24] refactor: use fuse_reply in read --- lib/fs/fuser.rs | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/lib/fs/fuser.rs b/lib/fs/fuser.rs index 7d648598..824fafa2 100644 --- a/lib/fs/fuser.rs +++ b/lib/fs/fuser.rs @@ -319,23 +319,18 @@ impl fuser::Filesystem for FuserAdapter { _lock_owner: Option, reply: fuser::ReplyData, ) { - let result = self.runtime.block_on(async { - let reader = self - .open_files - .get(&fh) - .ok_or_else(|| std::io::Error::from_raw_os_error(libc::EBADF))?; - reader.read(offset.cast_unsigned(), size).await - }); - match result { - Ok(data) => { + self.runtime + .block_on(async { + let reader = self + .open_files + .get(&fh) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::EBADF))?; + reader.read(offset.cast_unsigned(), size).await + }) + .fuse_reply(reply, |data, reply| { debug!(read_bytes = data.len(), "replying..."); reply.data(&data); - } - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(io_to_errno(&e)); - } - } + }); } #[instrument( From cc820ca8c8a5b666bf8110f649630a0cc7d3e3e0 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 12:39:43 -0800 Subject: [PATCH 08/24] refactor: use fuse_reply in readdir --- lib/fs/fuser.rs | 81 ++++++++++++++++++++++--------------------------- 1 file changed, 37 insertions(+), 44 deletions(-) diff --git a/lib/fs/fuser.rs b/lib/fs/fuser.rs index 824fafa2..886a5f6f 100644 --- a/lib/fs/fuser.rs +++ b/lib/fs/fuser.rs @@ -240,52 +240,45 @@ impl fuser::Filesystem for FuserAdapter { ino: u64, _fh: u64, offset: i64, - mut reply: fuser::ReplyDirectory, + reply: fuser::ReplyDirectory, ) { let offset_u64 = offset.cast_unsigned(); - let result = self.runtime.block_on(async { - let mut entries = Vec::new(); - self.inner - .get_fs() - .readdir(LoadedAddr(ino), offset_u64, |de, _next_offset| { - entries.push((de.inode.addr, de.name.to_os_string(), de.inode.itype)); - false - }) - .await?; - Ok::<_, std::io::Error>(entries) - }); - - let entries = match result { - Ok(entries) => entries, - Err(e) => { - debug!(error = %e, "replying error"); - reply.error(io_to_errno(&e)); - return; - } - }; - - #[expect( - clippy::cast_possible_truncation, - reason = "offset fits in usize on supported 64-bit platforms" - )] - for (i, (entry_ino, entry_name, entry_itype)) in entries.iter().enumerate() { - let kind = inode_type_to_fuser(*entry_itype); - let abs_idx = offset_u64 as usize + i + 1; - let Ok(idx): Result = abs_idx.try_into() else { - error!("Directory entry index {} too large for fuser", abs_idx); - reply.error(libc::EIO); - return; - }; - - debug!(?entry_name, ino = entry_ino, "adding entry to reply..."); - if reply.add(*entry_ino, idx, kind, entry_name) { - debug!("buffer full for now, stopping readdir"); - break; - } - } - - debug!("finalizing reply..."); - reply.ok(); + self.runtime + .block_on(async { + let mut entries = Vec::new(); + self.inner + .get_fs() + .readdir(LoadedAddr(ino), offset_u64, |de, _next_offset| { + entries.push((de.inode.addr, de.name.to_os_string(), de.inode.itype)); + false + }) + .await?; + Ok::<_, std::io::Error>(entries) + }) + .fuse_reply(reply, |entries, mut reply| { + for (i, (entry_ino, entry_name, entry_itype)) in entries.iter().enumerate() { + let kind = inode_type_to_fuser(*entry_itype); + #[expect( + clippy::cast_possible_truncation, + reason = "offset fits in usize on supported 64-bit platforms" + )] + let abs_idx = offset_u64 as usize + i + 1; + let Ok(idx): Result = abs_idx.try_into() else { + error!("Directory entry index {} too large for fuser", abs_idx); + reply.error(libc::EIO); + return; + }; + + debug!(?entry_name, ino = entry_ino, "adding entry to reply..."); + if reply.add(*entry_ino, idx, kind, entry_name) { + debug!("buffer full for now, stopping readdir"); + break; + } + } + + debug!("finalizing reply..."); + reply.ok(); + }); } #[instrument(name = "FuserAdapter::open", skip(self, _req, flags, reply))] From e7d59095b520ace0e7a35e67ca7cbff2dc40999f Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 13:26:29 -0800 Subject: [PATCH 09/24] DCache with per-parent info --- lib/fs/dcache.rs | 184 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 167 insertions(+), 17 deletions(-) diff --git a/lib/fs/dcache.rs b/lib/fs/dcache.rs index 5138e802..fab36c7b 100644 --- a/lib/fs/dcache.rs +++ b/lib/fs/dcache.rs @@ -1,4 +1,6 @@ use std::ffi::{OsStr, OsString}; +use std::sync::Arc; +use std::sync::atomic::{AtomicBool, Ordering}; use crate::fs::LoadedAddr; @@ -11,29 +13,69 @@ pub struct DValue { pub is_dir: bool, } -/// In-memory directory entry cache mapping `(parent, name)` to child metadata. +/// Per-parent directory state holding child entries and a population flag. +struct DirState { + children: scc::HashMap, + populated: AtomicBool, +} + +impl DirState { + fn new() -> Self { + Self { + children: scc::HashMap::new(), + populated: AtomicBool::new(false), + } + } +} + +/// In-memory directory entry cache with per-parent child maps. /// -/// Backed by [`scc::HashMap`] for atomic upsert on insert. The `readdir` -/// implementation scans the entire map and filters by parent — this is O(n) -/// over the cache size rather than O(log n + k) with an ordered index, but -/// guarantees that `insert` never creates a window where an entry is absent. -#[derive(Default)] +/// Each parent directory gets its own [`DirState`] containing a +/// [`scc::HashMap`] of child entries and an [`AtomicBool`] population flag. +/// This makes `readdir` O(k) in the number of children rather than O(n) +/// over the entire cache. pub struct DCache { - cache: scc::HashMap<(LoadedAddr, OsString), DValue>, + dirs: scc::HashMap>, +} + +impl Default for DCache { + fn default() -> Self { + Self::new() + } } impl DCache { /// Creates an empty directory cache. #[must_use] pub fn new() -> Self { - Self::default() + Self { + dirs: scc::HashMap::new(), + } + } + + /// Returns the [`DirState`] for `parent_ino`, creating one if absent. + fn dir_state(&self, parent_ino: LoadedAddr) -> Arc { + if let Some(entry) = self.dirs.read_sync(&parent_ino, |_, v| Arc::clone(v)) { + return entry; + } + let state = Arc::new(DirState::new()); + match self.dirs.entry_sync(parent_ino) { + scc::hash_map::Entry::Occupied(occ) => Arc::clone(occ.get()), + scc::hash_map::Entry::Vacant(vac) => { + let cloned = Arc::clone(&state); + vac.insert_entry(state); + cloned + } + } } /// Looks up a single child entry by parent inode and name. #[must_use] pub fn lookup(&self, parent_ino: LoadedAddr, name: &OsStr) -> Option { - let key = (parent_ino, name.to_os_string()); - self.cache.read_sync(&key, |_, v| v.clone()) + let state = self.dirs.read_sync(&parent_ino, |_, v| Arc::clone(v))?; + state + .children + .read_sync(&name.to_os_string(), |_, v| v.clone()) } /// Atomically inserts or overwrites a child entry in the cache. @@ -44,22 +86,130 @@ impl DCache { ino: LoadedAddr, is_dir: bool, ) { - let key = (parent_ino, name); + let state = self.dir_state(parent_ino); let value = DValue { ino, is_dir }; - self.cache.upsert_async(key, value).await; + state.children.upsert_async(name, value).await; } /// Returns all cached children of `parent_ino` as `(name, value)` pairs. pub async fn readdir(&self, parent_ino: LoadedAddr) -> Vec<(OsString, DValue)> { + let Some(state) = self.dirs.read_sync(&parent_ino, |_, v| Arc::clone(v)) else { + return Vec::new(); + }; let mut entries = Vec::new(); - self.cache - .iter_async(|key, value| { - if key.0 == parent_ino { - entries.push((key.1.clone(), value.clone())); - } + state + .children + .iter_async(|k, v| { + entries.push((k.clone(), v.clone())); true }) .await; entries } + + /// Returns `true` if the directory at `parent_ino` has been fully populated. + #[must_use] + pub fn is_populated(&self, parent_ino: LoadedAddr) -> bool { + self.dirs + .read_sync(&parent_ino, |_, v| v.populated.load(Ordering::Acquire)) + .unwrap_or(false) + } + + /// Marks the directory at `parent_ino` as fully populated. + pub fn mark_populated(&self, parent_ino: LoadedAddr) { + let state = self.dir_state(parent_ino); + state.populated.store(true, Ordering::Release); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::ffi::OsString; + + #[tokio::test] + async fn lookup_returns_none_for_missing_entry() { + let cache = DCache::new(); + assert!(cache.lookup(LoadedAddr(1), OsStr::new("foo")).is_none()); + } + + #[tokio::test] + async fn insert_then_lookup() { + let cache = DCache::new(); + cache + .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(10), false) + .await; + let dv = cache.lookup(LoadedAddr(1), OsStr::new("foo")); + assert!(dv.is_some(), "entry should be present after insert"); + let dv = dv.expect("checked above"); + assert_eq!(dv.ino, LoadedAddr(10)); + assert!(!dv.is_dir); + } + + #[tokio::test] + async fn readdir_returns_only_children_of_parent() { + let cache = DCache::new(); + cache + .insert(LoadedAddr(1), OsString::from("a"), LoadedAddr(10), false) + .await; + cache + .insert(LoadedAddr(1), OsString::from("b"), LoadedAddr(11), true) + .await; + cache + .insert(LoadedAddr(2), OsString::from("c"), LoadedAddr(12), false) + .await; + let children = cache.readdir(LoadedAddr(1)).await; + assert_eq!(children.len(), 2); + let names: Vec<_> = children.iter().map(|(n, _)| n.clone()).collect(); + assert!(names.contains(&OsString::from("a"))); + assert!(names.contains(&OsString::from("b"))); + } + + #[tokio::test] + async fn readdir_empty_parent_returns_empty() { + let cache = DCache::new(); + let children = cache.readdir(LoadedAddr(1)).await; + assert!(children.is_empty()); + } + + #[tokio::test] + async fn is_populated_false_by_default() { + let cache = DCache::new(); + assert!(!cache.is_populated(LoadedAddr(1))); + } + + #[tokio::test] + async fn mark_populated_then_check() { + let cache = DCache::new(); + cache.mark_populated(LoadedAddr(1)); + assert!(cache.is_populated(LoadedAddr(1))); + } + + #[tokio::test] + async fn insert_does_not_mark_populated() { + let cache = DCache::new(); + cache + .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(10), false) + .await; + assert!( + !cache.is_populated(LoadedAddr(1)), + "insert alone should not mark a directory as populated" + ); + } + + #[tokio::test] + async fn upsert_overwrites_existing_entry() { + let cache = DCache::new(); + cache + .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(10), false) + .await; + cache + .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(20), true) + .await; + let dv = cache.lookup(LoadedAddr(1), OsStr::new("foo")); + assert!(dv.is_some(), "entry should still be present after upsert"); + let dv = dv.expect("checked above"); + assert_eq!(dv.ino, LoadedAddr(20)); + assert!(dv.is_dir); + } } From 44d5f0751e56a22fbe7f1d678c0e8502135f8842 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 13:28:37 -0800 Subject: [PATCH 10/24] refactor: use DCache population tracking, remove readdir_populated from AsyncFs --- lib/fs/async_fs.rs | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/lib/fs/async_fs.rs b/lib/fs/async_fs.rs index 7626578f..3bf3b0f3 100644 --- a/lib/fs/async_fs.rs +++ b/lib/fs/async_fs.rs @@ -193,9 +193,6 @@ pub struct AsyncFs<'tbl, DP: FsDataProvider> { /// Monotonically increasing file handle counter. Starts at 1 (0 is reserved). next_fh: AtomicU64, - - /// Tracks which directories have had their children fetched via `dp.readdir`. - readdir_populated: FutureBackedCache, } impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { @@ -215,7 +212,6 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { directory_cache: DCache::new(), data_provider, next_fh: AtomicU64::new(1), - readdir_populated: FutureBackedCache::default(), } } @@ -235,7 +231,6 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { directory_cache: DCache::new(), data_provider, next_fh: AtomicU64::new(1), - readdir_populated: FutureBackedCache::default(), } } @@ -392,7 +387,7 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { } // Populate the directory cache on first readdir for this parent. - if self.readdir_populated.get(&parent).await.is_none() { + if !self.directory_cache.is_populated(parent) { let children = self.data_provider.readdir(parent_inode).await?; for (name, child_inode) in children { self.inode_table @@ -407,9 +402,7 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { ) .await; } - self.readdir_populated - .get_or_init(parent, || async {}) - .await; + self.directory_cache.mark_populated(parent); } let mut children = self.directory_cache.readdir(parent).await; From f07db8be693508b4d51fa1b96f154317db448cf0 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 13:30:48 -0800 Subject: [PATCH 11/24] refactor: use DCache population tracking in CompositeFs --- src/fs/mescloud/composite.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/fs/mescloud/composite.rs b/src/fs/mescloud/composite.rs index 3356b7b5..91c35806 100644 --- a/src/fs/mescloud/composite.rs +++ b/src/fs/mescloud/composite.rs @@ -83,7 +83,6 @@ struct OpenFileEntry { pub(super) struct CompositeFs { pub(super) inode_table: FutureBackedCache, pub(super) directory_cache: DCache, - readdir_populated: FutureBackedCache, next_ino: AtomicU64, next_fh: AtomicU64, refcounts: FxHashMap, @@ -121,7 +120,6 @@ impl CompositeFs { Self { inode_table, directory_cache: DCache::new(), - readdir_populated: FutureBackedCache::default(), next_ino: AtomicU64::new(Self::ROOT_INO + 1), next_fh: AtomicU64::new(1), refcounts, @@ -286,7 +284,7 @@ impl CompositeFs { .copied() .ok_or(ReadDirError::InodeNotFound)?; - if self.readdir_populated.get(&LoadedAddr(ino)).await.is_none() { + if !self.directory_cache.is_populated(LoadedAddr(ino)) { let inner_ino = self.slots[idx] .bridge .forward(ino) @@ -321,9 +319,7 @@ impl CompositeFs { .await; } - self.readdir_populated - .get_or_init(LoadedAddr(ino), || async {}) - .await; + self.directory_cache.mark_populated(LoadedAddr(ino)); } let mut children = self.directory_cache.readdir(LoadedAddr(ino)).await; From 7558e8624380775aac8b7c175d9e7fcfeafe3ca0 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 13:31:36 -0800 Subject: [PATCH 12/24] fix: update stale readdir_populated comment in async_fs.rs --- lib/fs/async_fs.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/fs/async_fs.rs b/lib/fs/async_fs.rs index 3bf3b0f3..761149d2 100644 --- a/lib/fs/async_fs.rs +++ b/lib/fs/async_fs.rs @@ -368,7 +368,7 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { /// /// # Concurrency /// - /// The `readdir_populated` check-then-populate is **not** atomic. If two + /// The `is_populated` check-then-populate is **not** atomic. If two /// concurrent callers invoke `readdir` for the same parent, both may call /// `dp.readdir()` and insert duplicate children. This is safe when the /// caller serializes access (e.g. via `&mut self` on the `Fs` trait). From bcf2f1eb46b5b2f90d1b335a9cfb76463ddc66f9 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 14:08:22 -0800 Subject: [PATCH 13/24] feat: add ConcurrentBridge for lock-free inode address translation --- lib/fs/bridge.rs | 76 +++++++++++++++++++++++++++++++++++++++++++ lib/fs/mod.rs | 2 ++ tests/bridge_tests.rs | 49 ++++++++++++++++++++++++++++ 3 files changed, 127 insertions(+) create mode 100644 lib/fs/bridge.rs create mode 100644 tests/bridge_tests.rs diff --git a/lib/fs/bridge.rs b/lib/fs/bridge.rs new file mode 100644 index 00000000..5bb1b028 --- /dev/null +++ b/lib/fs/bridge.rs @@ -0,0 +1,76 @@ +//! Lock-free bidirectional inode address mapping. +//! +//! [`ConcurrentBridge`] maps between "outer" (composite) and "inner" (child) +//! inode address spaces using two [`scc::HashMap`]s. + +use crate::fs::InodeAddr; + +/// Bidirectional inode mapping between outer (composite) and inner (child) address spaces. +/// +/// Uses two lock-free `scc::HashMap`s. Insertion order: forward map first, +/// then backward map, so any observer that discovers an outer addr via +/// `backward` can immediately resolve it via `forward`. +pub struct ConcurrentBridge { + /// outer -> inner + fwd: scc::HashMap, + /// inner -> outer + bwd: scc::HashMap, +} + +impl ConcurrentBridge { + /// Creates an empty bridge. + #[must_use] + pub fn new() -> Self { + Self { + fwd: scc::HashMap::new(), + bwd: scc::HashMap::new(), + } + } + + /// Insert a mapping from outer to inner. + /// + /// Inserts into the forward map first (see module docs for ordering rationale). + pub fn insert(&self, outer: InodeAddr, inner: InodeAddr) { + let _ = self.fwd.insert_sync(outer, inner); + let _ = self.bwd.insert_sync(inner, outer); + } + + /// Resolve outer -> inner. + #[must_use] + pub fn forward(&self, outer: InodeAddr) -> Option { + self.fwd.read_sync(&outer, |_, &v| v) + } + + /// Resolve inner -> outer. + #[must_use] + pub fn backward(&self, inner: InodeAddr) -> Option { + self.bwd.read_sync(&inner, |_, &v| v) + } + + /// Look up inner -> outer, or allocate a new outer address if unmapped. + pub fn backward_or_insert( + &self, + inner: InodeAddr, + allocate: impl FnOnce() -> InodeAddr, + ) -> InodeAddr { + if let Some(outer) = self.backward(inner) { + return outer; + } + let outer = allocate(); + self.insert(outer, inner); + outer + } + + /// Remove the mapping for the given outer address. + pub fn remove_by_outer(&self, outer: InodeAddr) { + if let Some((_, inner)) = self.fwd.remove_sync(&outer) { + self.bwd.remove_sync(&inner); + } + } +} + +impl Default for ConcurrentBridge { + fn default() -> Self { + Self::new() + } +} diff --git a/lib/fs/mod.rs b/lib/fs/mod.rs index e8f971b4..f5d42961 100644 --- a/lib/fs/mod.rs +++ b/lib/fs/mod.rs @@ -1,6 +1,8 @@ //! Useful filesystem generalizations. /// Async filesystem cache with concurrent inode management. pub mod async_fs; +/// Lock-free bidirectional inode address mapping. +pub mod bridge; /// Directory entry cache for fast parent-child lookups. pub mod dcache; /// FUSE adapter: maps [`fuser::Filesystem`] callbacks to [`async_fs::AsyncFs`]. diff --git a/tests/bridge_tests.rs b/tests/bridge_tests.rs new file mode 100644 index 00000000..b0598e4d --- /dev/null +++ b/tests/bridge_tests.rs @@ -0,0 +1,49 @@ +#![allow(clippy::unwrap_used, missing_docs)] + +use git_fs::fs::bridge::ConcurrentBridge; + +#[test] +fn insert_then_forward_returns_inner() { + let bridge = ConcurrentBridge::new(); + bridge.insert(10, 100); + assert_eq!(bridge.forward(10), Some(100)); +} + +#[test] +fn insert_then_backward_returns_outer() { + let bridge = ConcurrentBridge::new(); + bridge.insert(10, 100); + assert_eq!(bridge.backward(100), Some(10)); +} + +#[test] +fn forward_missing_returns_none() { + let bridge = ConcurrentBridge::new(); + assert_eq!(bridge.forward(42), None); +} + +#[test] +fn backward_or_insert_existing_returns_cached() { + let bridge = ConcurrentBridge::new(); + bridge.insert(10, 100); + let outer = bridge.backward_or_insert(100, || 999); + assert_eq!(outer, 10, "should return existing outer addr"); +} + +#[test] +fn backward_or_insert_new_allocates() { + let bridge = ConcurrentBridge::new(); + let outer = bridge.backward_or_insert(200, || 50); + assert_eq!(outer, 50, "should use allocator"); + assert_eq!(bridge.forward(50), Some(200)); + assert_eq!(bridge.backward(200), Some(50)); +} + +#[test] +fn remove_by_outer_clears_both_directions() { + let bridge = ConcurrentBridge::new(); + bridge.insert(10, 100); + bridge.remove_by_outer(10); + assert_eq!(bridge.forward(10), None); + assert_eq!(bridge.backward(100), None); +} From a19e91d1d7e395d11501f4a422213c4800518bac Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 14:12:17 -0800 Subject: [PATCH 14/24] fix: eliminate TOCTOU race in ConcurrentBridge::backward_or_insert Use `scc::HashMap::entry_sync` for atomic check-and-insert instead of separate backward() + insert() calls that allowed two concurrent callers to both allocate for the same inner address. Also add #[must_use]. --- lib/fs/bridge.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/lib/fs/bridge.rs b/lib/fs/bridge.rs index 5bb1b028..350d8750 100644 --- a/lib/fs/bridge.rs +++ b/lib/fs/bridge.rs @@ -48,17 +48,22 @@ impl ConcurrentBridge { } /// Look up inner -> outer, or allocate a new outer address if unmapped. + #[must_use] pub fn backward_or_insert( &self, inner: InodeAddr, allocate: impl FnOnce() -> InodeAddr, ) -> InodeAddr { - if let Some(outer) = self.backward(inner) { - return outer; + match self.bwd.entry_sync(inner) { + scc::hash_map::Entry::Occupied(occ) => *occ.get(), + scc::hash_map::Entry::Vacant(vac) => { + let outer = allocate(); + vac.insert_entry(outer); + // Populate forward map after backward is committed. + let _ = self.fwd.insert_sync(outer, inner); + outer + } } - let outer = allocate(); - self.insert(outer, inner); - outer } /// Remove the mapping for the given outer address. From d9fdc04b8c75e6016502889c5a67e7e4b0584fc8 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 14:15:20 -0800 Subject: [PATCH 15/24] feat: add CompositeRoot trait, ChildInner, and CompositeReader --- lib/fs/composite.rs | 125 ++++++++++++++++++++++++++++++++++++++++++++ lib/fs/mod.rs | 2 + 2 files changed, 127 insertions(+) create mode 100644 lib/fs/composite.rs diff --git a/lib/fs/composite.rs b/lib/fs/composite.rs new file mode 100644 index 00000000..36969c67 --- /dev/null +++ b/lib/fs/composite.rs @@ -0,0 +1,125 @@ +//! Generic composite filesystem types. +//! +//! A composite filesystem presents multiple child filesystems under a single +//! virtual root directory. The [`CompositeRoot`] trait describes how children +//! are discovered, [`ChildInner`] co-locates an inode table with an +//! [`AsyncFs`](super::async_fs::AsyncFs), and [`CompositeReader`] wraps a +//! child reader so the composite layer can expose it through [`FileReader`]. + +use std::ffi::{OsStr, OsString}; +use std::future::Future; +use std::sync::Arc; + +use bytes::Bytes; + +use crate::fs::INode; +use crate::fs::async_fs::{FileReader, FsDataProvider}; + +/// Descriptor for a child filesystem returned by [`CompositeRoot`]. +pub struct ChildDescriptor { + /// The name this child is listed as in the composite root directory. + pub name: OsString, + /// The data provider for this child. + pub provider: DP, + /// The root inode of the child filesystem. + pub root_ino: INode, +} + +/// Describes the children that a composite filesystem exposes at its root. +/// +/// Implementors define domain-specific child resolution: what children exist, +/// and what [`FsDataProvider`] backs each child. +pub trait CompositeRoot: Send + Sync + 'static { + /// The data provider type for child filesystems. + type ChildDP: FsDataProvider; + + /// Resolve a child by name, returning its data provider and root inode. + /// + /// Called on lookup at the composite root. Returns `None` if the name + /// does not correspond to a known child. + fn resolve_child( + &self, + name: &OsStr, + ) -> impl Future>, std::io::Error>> + Send; + + /// List all children at the composite root. + /// + /// Called on readdir at the composite root. + fn list_children( + &self, + ) -> impl Future>, std::io::Error>> + Send; +} + +mod child_inner_impl { + #![allow(clippy::future_not_send, clippy::mem_forget)] + + use ouroboros::self_referencing; + + use crate::cache::async_backed::FutureBackedCache; + use crate::fs::async_fs::{AsyncFs, FsDataProvider}; + use crate::fs::{INode, InodeAddr}; + + /// Self-referential struct co-locating an inode table and [`AsyncFs`]. + /// + /// The `AsyncFs` borrows from the table directly, avoiding an extra + /// indirection. This mirrors the [`FuseBridgeInner`](super::super::fuser) + /// pattern. + #[self_referencing] + pub struct ChildInner { + pub(super) table: FutureBackedCache, + #[borrows(table)] + #[covariant] + pub(super) fs: AsyncFs<'this, DP>, + } + + impl ChildInner { + #[expect(dead_code, reason = "used by CompositeFs in a follow-up commit")] + pub(super) fn create(table: FutureBackedCache, provider: DP) -> Self { + ChildInnerBuilder { + table, + fs_builder: |tbl| AsyncFs::new_preseeded(provider, tbl), + } + .build() + } + + #[expect(dead_code, reason = "used by CompositeFs in a follow-up commit")] + pub(super) fn get_fs(&self) -> &AsyncFs<'_, DP> { + self.borrow_fs() + } + } +} + +pub use child_inner_impl::ChildInner; + +/// Wraps a child's reader so that the composite layer can expose it as its own +/// [`FileReader`]. +pub struct CompositeReader { + inner: Arc, +} + +impl CompositeReader { + /// Create a new `CompositeReader` wrapping the given reader. + pub fn new(inner: Arc) -> Self { + Self { inner } + } +} + +impl std::fmt::Debug for CompositeReader { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CompositeReader").finish_non_exhaustive() + } +} + +impl FileReader for CompositeReader { + fn read( + &self, + offset: u64, + size: u32, + ) -> impl Future> + Send { + self.inner.read(offset, size) + } + + fn close(&self) -> impl Future> + Send { + self.inner.close() + } +} diff --git a/lib/fs/mod.rs b/lib/fs/mod.rs index f5d42961..ed93bd25 100644 --- a/lib/fs/mod.rs +++ b/lib/fs/mod.rs @@ -3,6 +3,8 @@ pub mod async_fs; /// Lock-free bidirectional inode address mapping. pub mod bridge; +/// Generic composite filesystem types. +pub mod composite; /// Directory entry cache for fast parent-child lookups. pub mod dcache; /// FUSE adapter: maps [`fuser::Filesystem`] callbacks to [`async_fs::AsyncFs`]. From e2f8215b48bbd62a978d21f78815eee9e293dd4c Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 14:18:21 -0800 Subject: [PATCH 16/24] test: extract async_backed inline tests to tests/async_backed_correctness.rs --- lib/cache/async_backed.rs | 101 ------------------------------ tests/async_backed_correctness.rs | 99 +++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 101 deletions(-) create mode 100644 tests/async_backed_correctness.rs diff --git a/lib/cache/async_backed.rs b/lib/cache/async_backed.rs index c3fddd05..8f15803b 100644 --- a/lib/cache/async_backed.rs +++ b/lib/cache/async_backed.rs @@ -288,104 +288,3 @@ where } } } - -#[cfg(test)] -mod tests { - use super::*; - - #[tokio::test] - async fn try_init_ok_caches_value() { - let cache = FutureBackedCache::::default(); - let result: Result = cache - .get_or_try_init(1, || async { Ok("hello".to_owned()) }) - .await; - assert_eq!(result.unwrap(), "hello", "should return Ok value"); - - // Value should now be cached (get returns it without factory) - let cached = cache.get(&1).await; - assert_eq!(cached.unwrap(), "hello", "value should be in cache"); - } - - #[tokio::test] - async fn try_init_err_does_not_cache() { - let cache = FutureBackedCache::::default(); - let result: Result = cache.get_or_try_init(1, || async { Err("boom") }).await; - assert_eq!(result.unwrap_err(), "boom", "should return the error"); - - // Cache should be empty — error was not stored - assert!(cache.is_empty(), "cache should have no entries after error"); - assert!(cache.get(&1).await.is_none(), "key should not exist"); - } - - #[tokio::test] - async fn try_init_err_then_retry_ok() { - let cache = FutureBackedCache::::default(); - - // First call: factory fails - let r1: Result = cache.get_or_try_init(1, || async { Err("fail") }).await; - assert!(r1.is_err(), "first call should fail"); - - // Second call: factory succeeds - let r2: Result = cache - .get_or_try_init(1, || async { Ok("recovered".to_owned()) }) - .await; - assert_eq!(r2.unwrap(), "recovered", "retry should succeed"); - - // Value should now be cached - let cached = cache.get(&1).await; - assert_eq!(cached.unwrap(), "recovered"); - } - - #[tokio::test] - async fn try_init_returns_value_cached_by_init() { - let cache = FutureBackedCache::::default(); - - // Populate via infallible get_or_init - cache - .get_or_init(1, || async { "from_init".to_owned() }) - .await; - - // get_or_try_init should return the cached value without running factory - let result: Result = cache - .get_or_try_init(1, || async { panic!("factory should not run") }) - .await; - assert_eq!(result.unwrap(), "from_init"); - } - - #[tokio::test] - async fn panic_in_factory_is_recovered() { - use std::sync::Arc; - use std::sync::atomic::{AtomicUsize, Ordering}; - - let cache = Arc::new(FutureBackedCache::::default()); - let call_count = Arc::new(AtomicUsize::new(0)); - - // Spawn a task whose factory panics. tokio::spawn catches the panic. - let cache2 = Arc::clone(&cache); - let call_count2 = Arc::clone(&call_count); - let handle = tokio::spawn(async move { - cache2 - .get_or_init(1, || { - call_count2.fetch_add(1, Ordering::Relaxed); - async { panic!("boom") } - }) - .await - }); - // The spawned task panics internally; JoinHandle returns Err. - assert!(handle.await.is_err(), "task should have panicked"); - - // The key should NOT be permanently bricked. A new caller should succeed. - let v = cache - .get_or_init(1, || { - call_count.fetch_add(1, Ordering::Relaxed); - async { "recovered".to_owned() } - }) - .await; - assert_eq!(v, "recovered", "should recover after panic"); - assert_eq!( - call_count.load(Ordering::Relaxed), - 2, - "factory called twice" - ); - } -} diff --git a/tests/async_backed_correctness.rs b/tests/async_backed_correctness.rs new file mode 100644 index 00000000..457ba948 --- /dev/null +++ b/tests/async_backed_correctness.rs @@ -0,0 +1,99 @@ +#![allow(clippy::unwrap_used, missing_docs)] + +use std::sync::Arc; +use std::sync::atomic::{AtomicUsize, Ordering}; + +use git_fs::cache::async_backed::FutureBackedCache; + +#[tokio::test] +async fn try_init_ok_caches_value() { + let cache = FutureBackedCache::::default(); + let result: Result = cache + .get_or_try_init(1, || async { Ok("hello".to_owned()) }) + .await; + assert_eq!(result.unwrap(), "hello", "should return Ok value"); + + // Value should now be cached (get returns it without factory) + let cached = cache.get(&1).await; + assert_eq!(cached.unwrap(), "hello", "value should be in cache"); +} + +#[tokio::test] +async fn try_init_err_does_not_cache() { + let cache = FutureBackedCache::::default(); + let result: Result = cache.get_or_try_init(1, || async { Err("boom") }).await; + assert_eq!(result.unwrap_err(), "boom", "should return the error"); + + // Cache should be empty — error was not stored + assert!(cache.is_empty(), "cache should have no entries after error"); + assert!(cache.get(&1).await.is_none(), "key should not exist"); +} + +#[tokio::test] +async fn try_init_err_then_retry_ok() { + let cache = FutureBackedCache::::default(); + + // First call: factory fails + let r1: Result = cache.get_or_try_init(1, || async { Err("fail") }).await; + assert!(r1.is_err(), "first call should fail"); + + // Second call: factory succeeds + let r2: Result = cache + .get_or_try_init(1, || async { Ok("recovered".to_owned()) }) + .await; + assert_eq!(r2.unwrap(), "recovered", "retry should succeed"); + + // Value should now be cached + let cached = cache.get(&1).await; + assert_eq!(cached.unwrap(), "recovered"); +} + +#[tokio::test] +async fn try_init_returns_value_cached_by_init() { + let cache = FutureBackedCache::::default(); + + // Populate via infallible get_or_init + cache + .get_or_init(1, || async { "from_init".to_owned() }) + .await; + + // get_or_try_init should return the cached value without running factory + let result: Result = cache + .get_or_try_init(1, || async { panic!("factory should not run") }) + .await; + assert_eq!(result.unwrap(), "from_init"); +} + +#[tokio::test] +async fn panic_in_factory_is_recovered() { + let cache = Arc::new(FutureBackedCache::::default()); + let call_count = Arc::new(AtomicUsize::new(0)); + + // Spawn a task whose factory panics. tokio::spawn catches the panic. + let cache2 = Arc::clone(&cache); + let call_count2 = Arc::clone(&call_count); + let handle = tokio::spawn(async move { + cache2 + .get_or_init(1, || { + call_count2.fetch_add(1, Ordering::Relaxed); + async { panic!("boom") } + }) + .await + }); + // The spawned task panics internally; JoinHandle returns Err. + assert!(handle.await.is_err(), "task should have panicked"); + + // The key should NOT be permanently bricked. A new caller should succeed. + let v = cache + .get_or_init(1, || { + call_count.fetch_add(1, Ordering::Relaxed); + async { "recovered".to_owned() } + }) + .await; + assert_eq!(v, "recovered", "should recover after panic"); + assert_eq!( + call_count.load(Ordering::Relaxed), + 2, + "factory called twice" + ); +} From 6fe9dd52afe722fe6e6898db49dd9c3334138e2e Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 14:18:51 -0800 Subject: [PATCH 17/24] fix: add #[must_use] to CompositeReader::new --- lib/fs/composite.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/fs/composite.rs b/lib/fs/composite.rs index 36969c67..d8237dcb 100644 --- a/lib/fs/composite.rs +++ b/lib/fs/composite.rs @@ -99,6 +99,7 @@ pub struct CompositeReader { impl CompositeReader { /// Create a new `CompositeReader` wrapping the given reader. + #[must_use] pub fn new(inner: Arc) -> Self { Self { inner } } From 781d7bb28376cbfaffacfc7a8f17ef43e62fdc4b Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 14:21:15 -0800 Subject: [PATCH 18/24] test: extract dcache inline tests to tests/dcache_correctness.rs --- lib/fs/dcache.rs | 92 ------------------------------------- tests/dcache_correctness.rs | 92 +++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 92 deletions(-) create mode 100644 tests/dcache_correctness.rs diff --git a/lib/fs/dcache.rs b/lib/fs/dcache.rs index fab36c7b..4870a401 100644 --- a/lib/fs/dcache.rs +++ b/lib/fs/dcache.rs @@ -121,95 +121,3 @@ impl DCache { state.populated.store(true, Ordering::Release); } } - -#[cfg(test)] -mod tests { - use super::*; - use std::ffi::OsString; - - #[tokio::test] - async fn lookup_returns_none_for_missing_entry() { - let cache = DCache::new(); - assert!(cache.lookup(LoadedAddr(1), OsStr::new("foo")).is_none()); - } - - #[tokio::test] - async fn insert_then_lookup() { - let cache = DCache::new(); - cache - .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(10), false) - .await; - let dv = cache.lookup(LoadedAddr(1), OsStr::new("foo")); - assert!(dv.is_some(), "entry should be present after insert"); - let dv = dv.expect("checked above"); - assert_eq!(dv.ino, LoadedAddr(10)); - assert!(!dv.is_dir); - } - - #[tokio::test] - async fn readdir_returns_only_children_of_parent() { - let cache = DCache::new(); - cache - .insert(LoadedAddr(1), OsString::from("a"), LoadedAddr(10), false) - .await; - cache - .insert(LoadedAddr(1), OsString::from("b"), LoadedAddr(11), true) - .await; - cache - .insert(LoadedAddr(2), OsString::from("c"), LoadedAddr(12), false) - .await; - let children = cache.readdir(LoadedAddr(1)).await; - assert_eq!(children.len(), 2); - let names: Vec<_> = children.iter().map(|(n, _)| n.clone()).collect(); - assert!(names.contains(&OsString::from("a"))); - assert!(names.contains(&OsString::from("b"))); - } - - #[tokio::test] - async fn readdir_empty_parent_returns_empty() { - let cache = DCache::new(); - let children = cache.readdir(LoadedAddr(1)).await; - assert!(children.is_empty()); - } - - #[tokio::test] - async fn is_populated_false_by_default() { - let cache = DCache::new(); - assert!(!cache.is_populated(LoadedAddr(1))); - } - - #[tokio::test] - async fn mark_populated_then_check() { - let cache = DCache::new(); - cache.mark_populated(LoadedAddr(1)); - assert!(cache.is_populated(LoadedAddr(1))); - } - - #[tokio::test] - async fn insert_does_not_mark_populated() { - let cache = DCache::new(); - cache - .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(10), false) - .await; - assert!( - !cache.is_populated(LoadedAddr(1)), - "insert alone should not mark a directory as populated" - ); - } - - #[tokio::test] - async fn upsert_overwrites_existing_entry() { - let cache = DCache::new(); - cache - .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(10), false) - .await; - cache - .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(20), true) - .await; - let dv = cache.lookup(LoadedAddr(1), OsStr::new("foo")); - assert!(dv.is_some(), "entry should still be present after upsert"); - let dv = dv.expect("checked above"); - assert_eq!(dv.ino, LoadedAddr(20)); - assert!(dv.is_dir); - } -} diff --git a/tests/dcache_correctness.rs b/tests/dcache_correctness.rs new file mode 100644 index 00000000..59731d28 --- /dev/null +++ b/tests/dcache_correctness.rs @@ -0,0 +1,92 @@ +#![allow(clippy::unwrap_used, missing_docs)] + +use std::ffi::{OsStr, OsString}; + +use git_fs::fs::LoadedAddr; +use git_fs::fs::dcache::DCache; + +#[tokio::test] +async fn lookup_returns_none_for_missing_entry() { + let cache = DCache::new(); + assert!(cache.lookup(LoadedAddr(1), OsStr::new("foo")).is_none()); +} + +#[tokio::test] +async fn insert_then_lookup() { + let cache = DCache::new(); + cache + .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(10), false) + .await; + let dv = cache.lookup(LoadedAddr(1), OsStr::new("foo")); + assert!(dv.is_some(), "entry should be present after insert"); + let dv = dv.expect("checked above"); + assert_eq!(dv.ino, LoadedAddr(10)); + assert!(!dv.is_dir); +} + +#[tokio::test] +async fn readdir_returns_only_children_of_parent() { + let cache = DCache::new(); + cache + .insert(LoadedAddr(1), OsString::from("a"), LoadedAddr(10), false) + .await; + cache + .insert(LoadedAddr(1), OsString::from("b"), LoadedAddr(11), true) + .await; + cache + .insert(LoadedAddr(2), OsString::from("c"), LoadedAddr(12), false) + .await; + let children = cache.readdir(LoadedAddr(1)).await; + assert_eq!(children.len(), 2); + let names: Vec<_> = children.iter().map(|(n, _)| n.clone()).collect(); + assert!(names.contains(&OsString::from("a"))); + assert!(names.contains(&OsString::from("b"))); +} + +#[tokio::test] +async fn readdir_empty_parent_returns_empty() { + let cache = DCache::new(); + let children = cache.readdir(LoadedAddr(1)).await; + assert!(children.is_empty()); +} + +#[tokio::test] +async fn is_populated_false_by_default() { + let cache = DCache::new(); + assert!(!cache.is_populated(LoadedAddr(1))); +} + +#[tokio::test] +async fn mark_populated_then_check() { + let cache = DCache::new(); + cache.mark_populated(LoadedAddr(1)); + assert!(cache.is_populated(LoadedAddr(1))); +} + +#[tokio::test] +async fn insert_does_not_mark_populated() { + let cache = DCache::new(); + cache + .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(10), false) + .await; + assert!( + !cache.is_populated(LoadedAddr(1)), + "insert alone should not mark a directory as populated" + ); +} + +#[tokio::test] +async fn upsert_overwrites_existing_entry() { + let cache = DCache::new(); + cache + .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(10), false) + .await; + cache + .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(20), true) + .await; + let dv = cache.lookup(LoadedAddr(1), OsStr::new("foo")); + assert!(dv.is_some(), "entry should still be present after upsert"); + let dv = dv.expect("checked above"); + assert_eq!(dv.ino, LoadedAddr(20)); + assert!(dv.is_dir); +} From 903392f23572fa882a9c1f415fcd4c77cd0c8981 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 14:24:26 -0800 Subject: [PATCH 19/24] feat: add CompositeFs struct with FsDataProvider impl --- lib/fs/composite.rs | 350 +++++++++++++++++++++++++++++++++++++- src/fs/mescloud/common.rs | 3 + 2 files changed, 349 insertions(+), 4 deletions(-) diff --git a/lib/fs/composite.rs b/lib/fs/composite.rs index d8237dcb..ceb29308 100644 --- a/lib/fs/composite.rs +++ b/lib/fs/composite.rs @@ -9,11 +9,14 @@ use std::ffi::{OsStr, OsString}; use std::future::Future; use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; use bytes::Bytes; -use crate::fs::INode; -use crate::fs::async_fs::{FileReader, FsDataProvider}; +use crate::cache::async_backed::FutureBackedCache; +use crate::fs::async_fs::{FileReader, FsDataProvider, OpenFile}; +use crate::fs::bridge::ConcurrentBridge; +use crate::fs::{INode, INodeType, InodeAddr, InodePerms, LoadedAddr, OpenFlags}; /// Descriptor for a child filesystem returned by [`CompositeRoot`]. pub struct ChildDescriptor { @@ -73,7 +76,6 @@ mod child_inner_impl { } impl ChildInner { - #[expect(dead_code, reason = "used by CompositeFs in a follow-up commit")] pub(super) fn create(table: FutureBackedCache, provider: DP) -> Self { ChildInnerBuilder { table, @@ -82,7 +84,6 @@ mod child_inner_impl { .build() } - #[expect(dead_code, reason = "used by CompositeFs in a follow-up commit")] pub(super) fn get_fs(&self) -> &AsyncFs<'_, DP> { self.borrow_fs() } @@ -124,3 +125,344 @@ impl FileReader for CompositeReader { self.inner.close() } } + +struct ChildSlot { + inner: Arc>, + bridge: ConcurrentBridge, +} + +struct CompositeFsInner { + root: R, + /// Child slots, indexed by slot number. + slots: scc::HashMap>, + /// Maps a composite-level outer inode to its child slot index. + addr_to_slot: scc::HashMap, + /// Maps child name to slot index (for dedup on concurrent resolve). + name_to_slot: scc::HashMap, + /// Monotonically increasing slot counter. + next_slot: AtomicU64, + /// Monotonically increasing inode counter. Starts at 2 (1 = root). + next_ino: AtomicU64, + /// The filesystem owner uid/gid. + fs_owner: (u32, u32), +} + +/// A generic composite filesystem that routes to child `AsyncFs` instances. +/// +/// Implements [`FsDataProvider`] so it can be used inside another `AsyncFs`. +/// Clone is cheap (shared `Arc`). +pub struct CompositeFs { + inner: Arc>, +} + +impl Clone for CompositeFs { + fn clone(&self) -> Self { + Self { + inner: Arc::clone(&self.inner), + } + } +} + +impl CompositeFs { + /// Root inode address for this composite level. + pub const ROOT_INO: InodeAddr = 1; + + /// Create a new composite filesystem. + #[must_use] + pub fn new(root: R, fs_owner: (u32, u32)) -> Self { + Self { + inner: Arc::new(CompositeFsInner { + root, + slots: scc::HashMap::new(), + addr_to_slot: scc::HashMap::new(), + name_to_slot: scc::HashMap::new(), + next_slot: AtomicU64::new(0), + next_ino: AtomicU64::new(2), // 1 = root + fs_owner, + }), + } + } + + /// Build the root inode for this composite filesystem. + #[must_use] + pub fn make_root_inode(&self) -> INode { + let now = std::time::SystemTime::now(); + INode { + addr: Self::ROOT_INO, + permissions: InodePerms::from_bits_truncate(0o755), + uid: self.inner.fs_owner.0, + gid: self.inner.fs_owner.1, + create_time: now, + last_modified_at: now, + parent: None, + size: 0, + itype: INodeType::Directory, + } + } + + fn allocate_ino(&self) -> InodeAddr { + self.inner.next_ino.fetch_add(1, Ordering::Relaxed) + } + + fn make_child_dir_inode(&self, addr: InodeAddr) -> INode { + let now = std::time::SystemTime::now(); + INode { + addr, + permissions: InodePerms::from_bits_truncate(0o755), + uid: self.inner.fs_owner.0, + gid: self.inner.fs_owner.1, + create_time: now, + last_modified_at: now, + parent: Some(Self::ROOT_INO), + size: 0, + itype: INodeType::Directory, + } + } + + /// Register a child, returning the composite-level outer inode address. + /// + /// If the child is already registered by name, the existing outer address + /// is returned. Otherwise a new slot is created with a fresh inode table + /// and bridge mapping. + fn register_child(&self, desc: &ChildDescriptor) -> InodeAddr + where + R::ChildDP: Clone, + { + // Fast path: already registered by name. + match self.inner.name_to_slot.entry_sync(desc.name.clone()) { + scc::hash_map::Entry::Occupied(occ) => { + let slot_idx = *occ.get(); + // Return existing outer address for this child's root inode. + if let Some(outer) = self + .inner + .slots + .read_sync(&slot_idx, |_, slot| { + slot.bridge.backward(desc.root_ino.addr) + }) + .flatten() + { + return outer; + } + // Slot exists but bridge has no mapping — should not happen, + // but fall through to create a fresh slot below. + // (Remove stale name entry so the vacant path can re-insert.) + drop(occ); + self.inner.name_to_slot.remove_sync(&desc.name); + } + scc::hash_map::Entry::Vacant(vac) => { + // Claim the name slot atomically. + let outer_ino = self.allocate_ino(); + #[expect( + clippy::cast_possible_truncation, + reason = "slot index fits in usize on 64-bit" + )] + let slot_idx = self.inner.next_slot.fetch_add(1, Ordering::Relaxed) as usize; + + let table = FutureBackedCache::default(); + table.insert_sync(desc.root_ino.addr, desc.root_ino); + let child_inner = Arc::new(ChildInner::create(table, desc.provider.clone())); + + let bridge = ConcurrentBridge::new(); + bridge.insert(outer_ino, desc.root_ino.addr); + + drop(self.inner.slots.insert_sync( + slot_idx, + ChildSlot { + inner: child_inner, + bridge, + }, + )); + let _ = self.inner.addr_to_slot.insert_sync(outer_ino, slot_idx); + vac.insert_entry(slot_idx); + + return outer_ino; + } + } + + // Fallback: name was stale, create fresh. This path is rare. + let outer_ino = self.allocate_ino(); + #[expect( + clippy::cast_possible_truncation, + reason = "slot index fits in usize on 64-bit" + )] + let slot_idx = self.inner.next_slot.fetch_add(1, Ordering::Relaxed) as usize; + + let table = FutureBackedCache::default(); + table.insert_sync(desc.root_ino.addr, desc.root_ino); + let child_inner = Arc::new(ChildInner::create(table, desc.provider.clone())); + + let bridge = ConcurrentBridge::new(); + bridge.insert(outer_ino, desc.root_ino.addr); + + drop(self.inner.slots.insert_sync( + slot_idx, + ChildSlot { + inner: child_inner, + bridge, + }, + )); + let _ = self.inner.addr_to_slot.insert_sync(outer_ino, slot_idx); + drop( + self.inner + .name_to_slot + .insert_sync(desc.name.clone(), slot_idx), + ); + + outer_ino + } +} + +impl FsDataProvider for CompositeFs +where + R::ChildDP: Clone, + <::ChildDP as FsDataProvider>::Reader: 'static, +{ + type Reader = CompositeReader<<::ChildDP as FsDataProvider>::Reader>; + + async fn lookup(&self, parent: INode, name: &OsStr) -> Result { + if parent.addr == Self::ROOT_INO { + let desc = self + .inner + .root + .resolve_child(name) + .await? + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let outer_ino = self.register_child(&desc); + Ok(self.make_child_dir_inode(outer_ino)) + } else { + let slot_idx = self + .inner + .addr_to_slot + .read_sync(&parent.addr, |_, &v| v) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + // Extract Arc and inner parent address under the guard. + let (child, inner_parent) = self + .inner + .slots + .read_sync(&slot_idx, |_, slot| { + (Arc::clone(&slot.inner), slot.bridge.forward(parent.addr)) + }) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let inner_parent = + inner_parent.ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + // Await the lookup outside any scc guard. + let tracked = child + .get_fs() + .lookup(LoadedAddr(inner_parent), name) + .await?; + let child_inode = tracked.inode; + + // Translate inner address back to composite-level address. + let outer_ino = self + .inner + .slots + .read_sync(&slot_idx, |_, slot| { + let next_ino = &self.inner.next_ino; + slot.bridge.backward_or_insert(child_inode.addr, || { + next_ino.fetch_add(1, Ordering::Relaxed) + }) + }) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let _ = self.inner.addr_to_slot.insert_sync(outer_ino, slot_idx); + + Ok(INode { + addr: outer_ino, + ..child_inode + }) + } + } + + async fn readdir(&self, parent: INode) -> Result, std::io::Error> { + if parent.addr == Self::ROOT_INO { + let children = self.inner.root.list_children().await?; + let mut entries = Vec::with_capacity(children.len()); + for desc in &children { + let outer_ino = self.register_child(desc); + entries.push((desc.name.clone(), self.make_child_dir_inode(outer_ino))); + } + Ok(entries) + } else { + let slot_idx = self + .inner + .addr_to_slot + .read_sync(&parent.addr, |_, &v| v) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let (child, inner_parent) = self + .inner + .slots + .read_sync(&slot_idx, |_, slot| { + (Arc::clone(&slot.inner), slot.bridge.forward(parent.addr)) + }) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let inner_parent = + inner_parent.ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + // Collect child entries outside the guard. + let mut child_entries = Vec::new(); + child + .get_fs() + .readdir(LoadedAddr(inner_parent), 0, |de, _offset| { + child_entries.push((de.name.to_os_string(), de.inode)); + false + }) + .await?; + + // Translate all inner addresses to composite-level addresses. + let mut entries = Vec::with_capacity(child_entries.len()); + for (name, child_inode) in child_entries { + let outer_ino = self + .inner + .slots + .read_sync(&slot_idx, |_, slot| { + let next_ino = &self.inner.next_ino; + slot.bridge.backward_or_insert(child_inode.addr, || { + next_ino.fetch_add(1, Ordering::Relaxed) + }) + }) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let _ = self.inner.addr_to_slot.insert_sync(outer_ino, slot_idx); + entries.push(( + name, + INode { + addr: outer_ino, + ..child_inode + }, + )); + } + Ok(entries) + } + } + + async fn open(&self, inode: INode, flags: OpenFlags) -> Result { + let slot_idx = self + .inner + .addr_to_slot + .read_sync(&inode.addr, |_, &v| v) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let (child, inner_ino) = self + .inner + .slots + .read_sync(&slot_idx, |_, slot| { + (Arc::clone(&slot.inner), slot.bridge.forward(inode.addr)) + }) + .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let inner_ino = inner_ino.ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + + let open_file: OpenFile<<::ChildDP as FsDataProvider>::Reader> = + child.get_fs().open(LoadedAddr(inner_ino), flags).await?; + + Ok(CompositeReader { + inner: open_file.reader, + }) + } +} diff --git a/src/fs/mescloud/common.rs b/src/fs/mescloud/common.rs index 6e9c8bf8..473b5e54 100644 --- a/src/fs/mescloud/common.rs +++ b/src/fs/mescloud/common.rs @@ -149,6 +149,9 @@ pub(super) trait ChildFs: Send + Sync { async fn release(&mut self, ino: InodeAddr, fh: FileHandle) -> Result<(), ReleaseError>; } +// Tests kept inline: these types live in the binary crate and are not +// re-exported through the `git_fs` lib, so integration tests in `tests/` +// cannot access them. #[cfg(test)] mod tests { use super::*; From 4c55565e46733798ce61689a4c1a718b1a6e17d2 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 14:29:23 -0800 Subject: [PATCH 20/24] refactor: extract slot creation helper in register_child --- lib/fs/composite.rs | 89 ++++++++++++++++++++++----------------------- 1 file changed, 43 insertions(+), 46 deletions(-) diff --git a/lib/fs/composite.rs b/lib/fs/composite.rs index ceb29308..bf063307 100644 --- a/lib/fs/composite.rs +++ b/lib/fs/composite.rs @@ -219,6 +219,39 @@ impl CompositeFs { } } + /// Allocate a new child slot with a fresh inode table and bridge mapping. + /// + /// Returns `(outer_ino, slot_idx)` for the newly created slot. + fn create_child_slot(&self, desc: &ChildDescriptor) -> (InodeAddr, usize) + where + R::ChildDP: Clone, + { + let outer_ino = self.allocate_ino(); + #[expect( + clippy::cast_possible_truncation, + reason = "slot index fits in usize on 64-bit" + )] + let slot_idx = self.inner.next_slot.fetch_add(1, Ordering::Relaxed) as usize; + + let table = FutureBackedCache::default(); + table.insert_sync(desc.root_ino.addr, desc.root_ino); + let child_inner = Arc::new(ChildInner::create(table, desc.provider.clone())); + + let bridge = ConcurrentBridge::new(); + bridge.insert(outer_ino, desc.root_ino.addr); + + drop(self.inner.slots.insert_sync( + slot_idx, + ChildSlot { + inner: child_inner, + bridge, + }, + )); + let _ = self.inner.addr_to_slot.insert_sync(outer_ino, slot_idx); + + (outer_ino, slot_idx) + } + /// Register a child, returning the composite-level outer inode address. /// /// If the child is already registered by name, the existing outer address @@ -246,62 +279,26 @@ impl CompositeFs { // Slot exists but bridge has no mapping — should not happen, // but fall through to create a fresh slot below. // (Remove stale name entry so the vacant path can re-insert.) + // + // Race window: between `drop(occ)` and the `remove_sync` below, + // another thread could read the stale entry and resolve to a + // broken slot. In the worst case two threads create separate + // slots for the same child — the last writer to `name_to_slot` + // wins and the other slot becomes orphaned. This is functionally + // harmless: the orphaned slot is never reached via name lookup + // and will not serve any future requests. drop(occ); self.inner.name_to_slot.remove_sync(&desc.name); } scc::hash_map::Entry::Vacant(vac) => { - // Claim the name slot atomically. - let outer_ino = self.allocate_ino(); - #[expect( - clippy::cast_possible_truncation, - reason = "slot index fits in usize on 64-bit" - )] - let slot_idx = self.inner.next_slot.fetch_add(1, Ordering::Relaxed) as usize; - - let table = FutureBackedCache::default(); - table.insert_sync(desc.root_ino.addr, desc.root_ino); - let child_inner = Arc::new(ChildInner::create(table, desc.provider.clone())); - - let bridge = ConcurrentBridge::new(); - bridge.insert(outer_ino, desc.root_ino.addr); - - drop(self.inner.slots.insert_sync( - slot_idx, - ChildSlot { - inner: child_inner, - bridge, - }, - )); - let _ = self.inner.addr_to_slot.insert_sync(outer_ino, slot_idx); + let (outer_ino, slot_idx) = self.create_child_slot(desc); vac.insert_entry(slot_idx); - return outer_ino; } } // Fallback: name was stale, create fresh. This path is rare. - let outer_ino = self.allocate_ino(); - #[expect( - clippy::cast_possible_truncation, - reason = "slot index fits in usize on 64-bit" - )] - let slot_idx = self.inner.next_slot.fetch_add(1, Ordering::Relaxed) as usize; - - let table = FutureBackedCache::default(); - table.insert_sync(desc.root_ino.addr, desc.root_ino); - let child_inner = Arc::new(ChildInner::create(table, desc.provider.clone())); - - let bridge = ConcurrentBridge::new(); - bridge.insert(outer_ino, desc.root_ino.addr); - - drop(self.inner.slots.insert_sync( - slot_idx, - ChildSlot { - inner: child_inner, - bridge, - }, - )); - let _ = self.inner.addr_to_slot.insert_sync(outer_ino, slot_idx); + let (outer_ino, slot_idx) = self.create_child_slot(desc); drop( self.inner .name_to_slot From 5e31225a2e11801bcfffba031d55441e4ad0461f Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 14:32:54 -0800 Subject: [PATCH 21/24] test: add integration tests for generic CompositeFs --- tests/common/composite_mocks.rs | 55 ++++++ tests/common/mod.rs | 1 + tests/composite_fs_tests.rs | 285 ++++++++++++++++++++++++++++++++ 3 files changed, 341 insertions(+) create mode 100644 tests/common/composite_mocks.rs create mode 100644 tests/composite_fs_tests.rs diff --git a/tests/common/composite_mocks.rs b/tests/common/composite_mocks.rs new file mode 100644 index 00000000..413621d3 --- /dev/null +++ b/tests/common/composite_mocks.rs @@ -0,0 +1,55 @@ +#![allow(missing_docs, clippy::unwrap_used)] + +use std::collections::HashMap; +use std::ffi::{OsStr, OsString}; +use std::sync::Arc; + +use git_fs::fs::INode; +use git_fs::fs::composite::{ChildDescriptor, CompositeRoot}; + +use super::async_fs_mocks::MockFsDataProvider; + +/// A mock `CompositeRoot` that resolves children from a fixed map. +pub struct MockRoot { + pub children: Arc>, +} + +impl MockRoot { + pub fn new(children: HashMap) -> Self { + Self { + children: Arc::new(children), + } + } +} + +impl CompositeRoot for MockRoot { + type ChildDP = MockFsDataProvider; + + async fn resolve_child( + &self, + name: &OsStr, + ) -> Result>, std::io::Error> { + Ok(self + .children + .get(name) + .map(|(provider, root_ino)| ChildDescriptor { + name: name.to_os_string(), + provider: provider.clone(), + root_ino: *root_ino, + })) + } + + async fn list_children( + &self, + ) -> Result>, std::io::Error> { + Ok(self + .children + .iter() + .map(|(name, (provider, root_ino))| ChildDescriptor { + name: name.clone(), + provider: provider.clone(), + root_ino: *root_ino, + }) + .collect()) + } +} diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 2729c866..96aedec1 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -1,6 +1,7 @@ #![allow(dead_code, missing_docs, clippy::unwrap_used)] pub mod async_fs_mocks; +pub mod composite_mocks; use std::sync::{Arc, Mutex}; use std::time::Duration; diff --git a/tests/composite_fs_tests.rs b/tests/composite_fs_tests.rs new file mode 100644 index 00000000..d6470a6a --- /dev/null +++ b/tests/composite_fs_tests.rs @@ -0,0 +1,285 @@ +#![allow(clippy::unwrap_used, clippy::expect_used, missing_docs)] + +mod common; + +use std::collections::HashMap; +use std::ffi::{OsStr, OsString}; + +use bytes::Bytes; + +use git_fs::cache::async_backed::FutureBackedCache; +use git_fs::fs::async_fs::AsyncFs; +use git_fs::fs::composite::CompositeFs; +use git_fs::fs::{INode, INodeType, LoadedAddr, OpenFlags}; + +use common::async_fs_mocks::{MockFsDataProvider, MockFsState, make_inode}; +use common::composite_mocks::MockRoot; + +/// Build a child data provider with a root directory and a set of children. +/// +/// Each child is `(name, addr, itype, size)`. Files get auto-generated content +/// of the form `"content of {name}"`. +fn make_child_provider( + root_addr: u64, + children: &[(&str, u64, INodeType, u64)], +) -> (MockFsDataProvider, INode) { + let root = make_inode(root_addr, INodeType::Directory, 0, None); + let mut state = MockFsState::default(); + let mut dir_entries = Vec::new(); + for (name, addr, itype, size) in children { + let child = make_inode(*addr, *itype, *size, Some(root_addr)); + state + .lookups + .insert((root_addr, OsString::from(name)), child); + dir_entries.push((OsString::from(name), child)); + if *itype == INodeType::File { + state + .file_contents + .insert(*addr, Bytes::from(format!("content of {name}"))); + } + } + state.directories.insert(root_addr, dir_entries); + (MockFsDataProvider::new(state), root) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn composite_root_lookup_resolves_child() { + let (provider, root_ino) = make_child_provider(100, &[("file.txt", 101, INodeType::File, 42)]); + + let mut children = HashMap::new(); + children.insert(OsString::from("repo-a"), (provider, root_ino)); + + let mock_root = MockRoot::new(children); + let composite = CompositeFs::new(mock_root, (1000, 1000)); + let root_inode = composite.make_root_inode(); + + let table = FutureBackedCache::default(); + table.insert_sync(1, root_inode); + let afs = AsyncFs::new_preseeded(composite, &table); + + let tracked = afs + .lookup(LoadedAddr(1), OsStr::new("repo-a")) + .await + .unwrap(); + + assert_eq!( + tracked.inode.itype, + INodeType::Directory, + "child should appear as a directory at composite level" + ); + assert_ne!( + tracked.inode.addr, 1, + "child should have a composite-level address different from root" + ); + assert_eq!( + tracked.inode.parent, + Some(1), + "child directory should have the composite root as parent" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn composite_root_readdir_lists_children() { + let (prov_a, root_a) = make_child_provider(100, &[]); + let (prov_b, root_b) = make_child_provider(200, &[]); + + let mut children = HashMap::new(); + children.insert(OsString::from("alpha"), (prov_a, root_a)); + children.insert(OsString::from("beta"), (prov_b, root_b)); + + let mock_root = MockRoot::new(children); + let composite = CompositeFs::new(mock_root, (1000, 1000)); + let root_inode = composite.make_root_inode(); + + let table = FutureBackedCache::default(); + table.insert_sync(1, root_inode); + let afs = AsyncFs::new_preseeded(composite, &table); + + let mut entries = Vec::new(); + afs.readdir(LoadedAddr(1), 0, |de, _offset| { + entries.push(de.name.to_os_string()); + false + }) + .await + .unwrap(); + + entries.sort(); + assert_eq!(entries.len(), 2, "should list both children"); + assert_eq!(entries[0], "alpha"); + assert_eq!(entries[1], "beta"); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn composite_delegated_lookup_reaches_child() { + let (provider, root_ino) = make_child_provider( + 100, + &[ + ("readme.md", 101, INodeType::File, 256), + ("src", 102, INodeType::Directory, 0), + ], + ); + + let mut children = HashMap::new(); + children.insert(OsString::from("my-repo"), (provider, root_ino)); + + let mock_root = MockRoot::new(children); + let composite = CompositeFs::new(mock_root, (1000, 1000)); + let root_inode = composite.make_root_inode(); + + let table = FutureBackedCache::default(); + table.insert_sync(1, root_inode); + let afs = AsyncFs::new_preseeded(composite, &table); + + // First, lookup the child at root level. + let child_dir = afs + .lookup(LoadedAddr(1), OsStr::new("my-repo")) + .await + .unwrap(); + let child_addr = child_dir.inode.addr; + + // Then, lookup a file inside the child. + let file = afs + .lookup(LoadedAddr(child_addr), OsStr::new("readme.md")) + .await + .unwrap(); + + assert_eq!(file.inode.itype, INodeType::File); + assert_eq!(file.inode.size, 256); + + // Also lookup a subdirectory inside the child. + let subdir = afs + .lookup(LoadedAddr(child_addr), OsStr::new("src")) + .await + .unwrap(); + + assert_eq!(subdir.inode.itype, INodeType::Directory); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn composite_open_and_read_through_child() { + let (provider, root_ino) = make_child_provider(100, &[("hello.txt", 101, INodeType::File, 20)]); + + let mut children = HashMap::new(); + children.insert(OsString::from("repo"), (provider, root_ino)); + + let mock_root = MockRoot::new(children); + let composite = CompositeFs::new(mock_root, (1000, 1000)); + let root_inode = composite.make_root_inode(); + + let table = FutureBackedCache::default(); + table.insert_sync(1, root_inode); + let afs = AsyncFs::new_preseeded(composite, &table); + + // Navigate to the file. + let child_dir = afs.lookup(LoadedAddr(1), OsStr::new("repo")).await.unwrap(); + let file_tracked = afs + .lookup(LoadedAddr(child_dir.inode.addr), OsStr::new("hello.txt")) + .await + .unwrap(); + let file_addr = file_tracked.inode.addr; + + // Open and read. + let open_file = afs + .open(LoadedAddr(file_addr), OpenFlags::empty()) + .await + .unwrap(); + let data = open_file.read(0, 1024).await.unwrap(); + + assert_eq!( + data, + Bytes::from("content of hello.txt"), + "should read the file content through the composite layer" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn composite_lookup_unknown_child_returns_enoent() { + let (provider, root_ino) = make_child_provider(100, &[]); + + let mut children = HashMap::new(); + children.insert(OsString::from("existing"), (provider, root_ino)); + + let mock_root = MockRoot::new(children); + let composite = CompositeFs::new(mock_root, (1000, 1000)); + let root_inode = composite.make_root_inode(); + + let table = FutureBackedCache::default(); + table.insert_sync(1, root_inode); + let afs = AsyncFs::new_preseeded(composite, &table); + + let err = afs + .lookup(LoadedAddr(1), OsStr::new("nonexistent")) + .await + .unwrap_err(); + + assert_eq!( + err.raw_os_error(), + Some(libc::ENOENT), + "looking up a nonexistent child at root should return ENOENT" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn composite_readdir_delegated_lists_child_contents() { + let (provider, root_ino) = make_child_provider( + 100, + &[ + ("a.rs", 101, INodeType::File, 10), + ("b.rs", 102, INodeType::File, 20), + ("lib", 103, INodeType::Directory, 0), + ], + ); + + let mut children = HashMap::new(); + children.insert(OsString::from("repo"), (provider, root_ino)); + + let mock_root = MockRoot::new(children); + let composite = CompositeFs::new(mock_root, (1000, 1000)); + let root_inode = composite.make_root_inode(); + + let table = FutureBackedCache::default(); + table.insert_sync(1, root_inode); + let afs = AsyncFs::new_preseeded(composite, &table); + + // Navigate into the child. + let child_dir = afs.lookup(LoadedAddr(1), OsStr::new("repo")).await.unwrap(); + + // Readdir inside the child. + let mut entries = Vec::new(); + afs.readdir(LoadedAddr(child_dir.inode.addr), 0, |de, _offset| { + entries.push((de.name.to_os_string(), de.inode.itype)); + false + }) + .await + .unwrap(); + + entries.sort_by(|(a, _), (b, _)| a.cmp(b)); + assert_eq!(entries.len(), 3); + assert_eq!(entries[0], (OsString::from("a.rs"), INodeType::File)); + assert_eq!(entries[1], (OsString::from("b.rs"), INodeType::File)); + assert_eq!(entries[2], (OsString::from("lib"), INodeType::Directory)); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn composite_repeated_lookup_returns_same_addr() { + let (provider, root_ino) = make_child_provider(100, &[]); + + let mut children = HashMap::new(); + children.insert(OsString::from("repo"), (provider, root_ino)); + + let mock_root = MockRoot::new(children); + let composite = CompositeFs::new(mock_root, (1000, 1000)); + let root_inode = composite.make_root_inode(); + + let table = FutureBackedCache::default(); + table.insert_sync(1, root_inode); + let afs = AsyncFs::new_preseeded(composite, &table); + + let first = afs.lookup(LoadedAddr(1), OsStr::new("repo")).await.unwrap(); + let second = afs.lookup(LoadedAddr(1), OsStr::new("repo")).await.unwrap(); + + assert_eq!( + first.inode.addr, second.inode.addr, + "repeated lookups for the same child should return the same composite address" + ); +} From aa989f7118c8159bce364af0b9d8f535022c6955 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 15:03:06 -0800 Subject: [PATCH 22/24] feat: add domain roots (MesaRoot, StandardOrgRoot, GithubOrgRoot) and OrgChildDP enum --- src/fs/mescloud/mod.rs | 1 + src/fs/mescloud/roots.rs | 483 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 484 insertions(+) create mode 100644 src/fs/mescloud/roots.rs diff --git a/src/fs/mescloud/mod.rs b/src/fs/mescloud/mod.rs index 15a70725..a9e5e155 100644 --- a/src/fs/mescloud/mod.rs +++ b/src/fs/mescloud/mod.rs @@ -32,6 +32,7 @@ pub use org::OrgConfig; use org::OrgFs; pub mod repo; +mod roots; struct HeaderInjector<'a>(&'a mut reqwest::header::HeaderMap); diff --git a/src/fs/mescloud/roots.rs b/src/fs/mescloud/roots.rs new file mode 100644 index 00000000..aafe0c4c --- /dev/null +++ b/src/fs/mescloud/roots.rs @@ -0,0 +1,483 @@ +//! Domain-specific [`CompositeRoot`] implementations and the [`OrgChildDP`] enum. +//! +//! Bridges the generic `CompositeFs` from `lib/fs/composite.rs` with +//! Mesa/GitHub-specific org and repo resolution logic. +//! +//! These types are not yet wired into the daemon entry point; they will be +//! connected in a follow-up change that replaces the old `MesaFS` + `OrgFs` +//! pipeline. +#![expect(dead_code, reason = "wired in the follow-up daemon change")] + +use std::ffi::{OsStr, OsString}; +use std::future::Future; +use std::sync::Arc; +use std::time::SystemTime; + +use base64::Engine as _; +use futures::TryStreamExt as _; +use mesa_dev::MesaClient; +use tracing::warn; + +use git_fs::cache::fcache::FileCache; +use git_fs::fs::async_fs::{FileReader, FsDataProvider}; +use git_fs::fs::composite::{ChildDescriptor, CompositeFs, CompositeReader, CompositeRoot}; +use git_fs::fs::{INode, INodeType, InodeAddr, InodePerms, OpenFlags}; + +use super::common::MesaApiError; +use super::repo::{MesFileReader, MesRepoProvider}; +use crate::app_config::CacheConfig; + +const CHILD_ROOT_ADDR: InodeAddr = 1; + +fn mesa_api_error_to_io(e: MesaApiError) -> std::io::Error { + match &e { + MesaApiError::Response { status, .. } if *status == 404 => { + std::io::Error::from_raw_os_error(libc::ENOENT) + } + MesaApiError::Reqwest(_) + | MesaApiError::ReqwestMiddleware(_) + | MesaApiError::Serde(_) + | MesaApiError::SerdePath(_) + | MesaApiError::Io(_) + | MesaApiError::Response { .. } => std::io::Error::other(e), + } +} + +/// Create a [`MesRepoProvider`] and its root [`INode`] for a given repo. +async fn create_repo_provider( + client: &MesaClient, + org_name: &str, + repo_name: &str, + ref_: &str, + fs_owner: (u32, u32), + cache_config: &CacheConfig, +) -> (MesRepoProvider, INode) { + let file_cache = match cache_config.max_size { + Some(max_size) if max_size.as_u64() > 0 => { + let cache_dir = cache_config.path.join(org_name).join(repo_name); + let max_bytes = max_size.as_u64().try_into().unwrap_or(usize::MAX); + match FileCache::new(&cache_dir, max_bytes).await { + Ok(cache) => Some(Arc::new(cache)), + Err(e) => { + warn!(error = ?e, org = %org_name, repo = %repo_name, + "failed to create file cache, continuing without caching"); + None + } + } + } + _ => None, + }; + + let provider = MesRepoProvider::new( + client.clone(), + org_name.to_owned(), + repo_name.to_owned(), + ref_.to_owned(), + fs_owner, + file_cache, + ); + + provider.seed_root_path(CHILD_ROOT_ADDR); + + let now = SystemTime::now(); + let root_ino = INode { + addr: CHILD_ROOT_ADDR, + permissions: InodePerms::from_bits_truncate(0o755), + uid: fs_owner.0, + gid: fs_owner.1, + create_time: now, + last_modified_at: now, + parent: None, + size: 0, + itype: INodeType::Directory, + }; + + (provider, root_ino) +} + +/// Returns `Ok(())` if the error is a 404; otherwise returns the IO error. +/// +/// Callers use this to treat 404 as "not found" (return `Ok(None)`) while +/// propagating all other API errors. +fn check_not_found(e: MesaApiError) -> Result<(), std::io::Error> { + match &e { + MesaApiError::Response { status, .. } if *status == 404 => Ok(()), + MesaApiError::Reqwest(_) + | MesaApiError::ReqwestMiddleware(_) + | MesaApiError::Serde(_) + | MesaApiError::SerdePath(_) + | MesaApiError::Io(_) + | MesaApiError::Response { .. } => Err(mesa_api_error_to_io(e)), + } +} + +pub(super) struct StandardOrgRoot { + client: MesaClient, + org_name: String, + cache_config: CacheConfig, + fs_owner: (u32, u32), +} + +impl StandardOrgRoot { + pub(super) fn new( + client: MesaClient, + org_name: String, + cache_config: CacheConfig, + fs_owner: (u32, u32), + ) -> Self { + Self { + client, + org_name, + cache_config, + fs_owner, + } + } +} + +impl CompositeRoot for StandardOrgRoot { + type ChildDP = MesRepoProvider; + + async fn resolve_child( + &self, + name: &OsStr, + ) -> Result>, std::io::Error> { + let name_str = name.to_str().ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "repo name contains non-UTF-8 characters", + ) + })?; + + let repo = match self + .client + .org(&self.org_name) + .repos() + .at(name_str) + .get() + .await + .map_err(MesaApiError::from) + { + Ok(repo) => repo, + Err(e) => { + check_not_found(e)?; + return Ok(None); + } + }; + + // Single-repo GET returns `default_branch: String` (non-optional), + // unlike the list endpoint which returns `Option`. + let (provider, root_ino) = create_repo_provider( + &self.client, + &self.org_name, + name_str, + &repo.default_branch, + self.fs_owner, + &self.cache_config, + ) + .await; + + Ok(Some(ChildDescriptor { + name: name.to_os_string(), + provider, + root_ino, + })) + } + + async fn list_children(&self) -> Result>, std::io::Error> { + let repos: Vec = self + .client + .org(&self.org_name) + .repos() + .list(None) + .try_collect() + .await + .map_err(MesaApiError::from) + .map_err(mesa_api_error_to_io)?; + + let mut children = Vec::with_capacity(repos.len()); + for repo in repos { + let Some(repo_name) = repo.name else { + continue; + }; + let default_branch = repo.default_branch.unwrap_or_else(|| "main".to_owned()); + + let (provider, root_ino) = create_repo_provider( + &self.client, + &self.org_name, + &repo_name, + &default_branch, + self.fs_owner, + &self.cache_config, + ) + .await; + + children.push(ChildDescriptor { + name: OsString::from(repo_name), + provider, + root_ino, + }); + } + + Ok(children) + } +} + +pub(super) struct GithubRepoRoot { + client: MesaClient, + org_name: String, + owner: String, + cache_config: CacheConfig, + fs_owner: (u32, u32), +} + +impl CompositeRoot for GithubRepoRoot { + type ChildDP = MesRepoProvider; + + async fn resolve_child( + &self, + name: &OsStr, + ) -> Result>, std::io::Error> { + let repo_name = name.to_str().ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "repo name contains non-UTF-8 characters", + ) + })?; + + let full_decoded = format!("{}/{}", self.owner, repo_name); + let encoded = base64::engine::general_purpose::STANDARD.encode(&full_decoded); + + let repo = match self + .client + .org(&self.org_name) + .repos() + .at(&encoded) + .get() + .await + .map_err(MesaApiError::from) + { + Ok(repo) => repo, + Err(e) => { + check_not_found(e)?; + return Ok(None); + } + }; + + // Single-repo GET returns `default_branch: String` (non-optional). + let (provider, root_ino) = create_repo_provider( + &self.client, + &self.org_name, + &encoded, + &repo.default_branch, + self.fs_owner, + &self.cache_config, + ) + .await; + + Ok(Some(ChildDescriptor { + name: name.to_os_string(), + provider, + root_ino, + })) + } + + async fn list_children(&self) -> Result>, std::io::Error> { + Err(std::io::Error::from_raw_os_error(libc::EPERM)) + } +} + +pub(super) struct GithubOrgRoot { + client: MesaClient, + org_name: String, + cache_config: CacheConfig, + fs_owner: (u32, u32), +} + +impl GithubOrgRoot { + pub(super) fn new( + client: MesaClient, + org_name: String, + cache_config: CacheConfig, + fs_owner: (u32, u32), + ) -> Self { + Self { + client, + org_name, + cache_config, + fs_owner, + } + } +} + +impl CompositeRoot for GithubOrgRoot { + type ChildDP = CompositeFs; + + async fn resolve_child( + &self, + name: &OsStr, + ) -> Result>, std::io::Error> { + let owner = name.to_str().ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "owner name contains non-UTF-8 characters", + ) + })?; + + let repo_root = GithubRepoRoot { + client: self.client.clone(), + org_name: self.org_name.clone(), + owner: owner.to_owned(), + cache_config: self.cache_config.clone(), + fs_owner: self.fs_owner, + }; + + let composite = CompositeFs::new(repo_root, self.fs_owner); + let root_ino = composite.make_root_inode(); + + Ok(Some(ChildDescriptor { + name: name.to_os_string(), + provider: composite, + root_ino, + })) + } + + async fn list_children(&self) -> Result>, std::io::Error> { + Err(std::io::Error::from_raw_os_error(libc::EPERM)) + } +} + +#[derive(Clone)] +pub(super) enum OrgChildDP { + Standard(CompositeFs), + Github(CompositeFs), +} + +impl OrgChildDP { + fn make_root_inode(&self) -> INode { + match self { + Self::Standard(c) => c.make_root_inode(), + Self::Github(c) => c.make_root_inode(), + } + } +} + +impl FsDataProvider for OrgChildDP { + type Reader = OrgChildReader; + + fn lookup( + &self, + parent: INode, + name: &OsStr, + ) -> impl Future> + Send { + let this = self.clone(); + let name = name.to_os_string(); + async move { + match this { + Self::Standard(c) => c.lookup(parent, &name).await, + Self::Github(c) => c.lookup(parent, &name).await, + } + } + } + + fn readdir( + &self, + parent: INode, + ) -> impl Future, std::io::Error>> + Send { + let this = self.clone(); + async move { + match this { + Self::Standard(c) => c.readdir(parent).await, + Self::Github(c) => c.readdir(parent).await, + } + } + } + + fn open( + &self, + inode: INode, + flags: OpenFlags, + ) -> impl Future> + Send { + let this = self.clone(); + async move { + match this { + Self::Standard(c) => c.open(inode, flags).await.map(OrgChildReader::Standard), + Self::Github(c) => c.open(inode, flags).await.map(OrgChildReader::Github), + } + } + } +} + +pub(super) enum OrgChildReader { + Standard(CompositeReader), + Github(CompositeReader>), +} + +impl std::fmt::Debug for OrgChildReader { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Standard(_) => f.debug_tuple("Standard").finish(), + Self::Github(_) => f.debug_tuple("Github").finish(), + } + } +} + +impl FileReader for OrgChildReader { + fn read( + &self, + offset: u64, + size: u32, + ) -> impl Future> + Send { + match self { + Self::Standard(r) => futures::future::Either::Left(r.read(offset, size)), + Self::Github(r) => futures::future::Either::Right(r.read(offset, size)), + } + } + + fn close(&self) -> impl Future> + Send { + match self { + Self::Standard(r) => futures::future::Either::Left(r.close()), + Self::Github(r) => futures::future::Either::Right(r.close()), + } + } +} + +pub(super) struct MesaRoot { + orgs: Vec<(OsString, OrgChildDP)>, +} + +impl MesaRoot { + pub(super) fn new(orgs: Vec<(OsString, OrgChildDP)>) -> Self { + Self { orgs } + } +} + +impl CompositeRoot for MesaRoot { + type ChildDP = OrgChildDP; + + async fn resolve_child( + &self, + name: &OsStr, + ) -> Result>, std::io::Error> { + let found = self.orgs.iter().find(|(n, _)| n == name); + match found { + Some((_, dp)) => Ok(Some(ChildDescriptor { + name: name.to_os_string(), + provider: dp.clone(), + root_ino: dp.make_root_inode(), + })), + None => Ok(None), + } + } + + async fn list_children(&self) -> Result>, std::io::Error> { + Ok(self + .orgs + .iter() + .map(|(name, dp)| ChildDescriptor { + name: name.clone(), + provider: dp.clone(), + root_ino: dp.make_root_inode(), + }) + .collect()) + } +} From 9885de0dc3d56f41a9c055549b35b5d47faf6154 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 15:23:43 -0800 Subject: [PATCH 23/24] refactor: wire CompositeFs into daemon, delete old composite.rs and ChildFs --- lib/fs/async_fs.rs | 3 +- src/daemon.rs | 66 ++--- src/fs/mescloud/common.rs | 139 +---------- src/fs/mescloud/composite.rs | 456 ----------------------------------- src/fs/mescloud/mod.rs | 378 +---------------------------- src/fs/mescloud/org.rs | 390 ------------------------------ src/fs/mescloud/repo.rs | 248 +------------------ src/fs/mescloud/roots.rs | 40 +-- 8 files changed, 73 insertions(+), 1647 deletions(-) delete mode 100644 src/fs/mescloud/composite.rs delete mode 100644 src/fs/mescloud/org.rs diff --git a/lib/fs/async_fs.rs b/lib/fs/async_fs.rs index 761149d2..1f81a87e 100644 --- a/lib/fs/async_fs.rs +++ b/lib/fs/async_fs.rs @@ -370,8 +370,7 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { /// /// The `is_populated` check-then-populate is **not** atomic. If two /// concurrent callers invoke `readdir` for the same parent, both may call - /// `dp.readdir()` and insert duplicate children. This is safe when the - /// caller serializes access (e.g. via `&mut self` on the `Fs` trait). + /// `dp.readdir()` and insert duplicate children. /// /// TODO(MES-746): Implement `opendir` and `releasedir` to snapshot directory contents and /// avoid racing with `lookup`/`createfile`. diff --git a/src/daemon.rs b/src/daemon.rs index 0a7a9f31..102e476b 100644 --- a/src/daemon.rs +++ b/src/daemon.rs @@ -1,7 +1,6 @@ use tokio::select; use crate::app_config; -use crate::fs::mescloud::{MesaFS, OrgConfig}; use tracing::{debug, error, info}; mod managed_fuse { @@ -15,12 +14,11 @@ mod managed_fuse { use nix::errno::Errno; use git_fs::cache::async_backed::FutureBackedCache; - use git_fs::fs::{INode, INodeType, InodePerms}; - use super::{MesaFS, OrgConfig, app_config, debug, error}; - use crate::fs::mescloud::MesaFsProvider; + use super::{app_config, debug, error}; use fuser::BackgroundSession; use git_fs::fs::fuser::FuserAdapter; + use secrecy::ExposeSecret as _; pub struct FuseCoreScope { _session: BackgroundSession, @@ -40,32 +38,44 @@ mod managed_fuse { config: app_config::Config, handle: tokio::runtime::Handle, ) -> Result { - let orgs = config - .organizations - .iter() - .map(|(org_name, org)| OrgConfig { - name: org_name.clone(), - api_key: org.api_key.clone(), - }); - let mesa_fs = MesaFS::new(orgs, (config.uid, config.gid), &config.cache); + let fs_owner = (config.uid, config.gid); + + let mut org_children = Vec::new(); + for (org_name, org_conf) in &config.organizations { + let client = + crate::fs::mescloud::build_mesa_client(org_conf.api_key.expose_secret()); + let dp = if org_name == "github" { + let github_org_root = crate::fs::mescloud::roots::GithubOrgRoot::new( + client, + org_name.clone(), + config.cache.clone(), + fs_owner, + ); + crate::fs::mescloud::roots::OrgChildDP::Github( + git_fs::fs::composite::CompositeFs::new(github_org_root, fs_owner), + ) + } else { + let standard_org_root = crate::fs::mescloud::roots::StandardOrgRoot::new( + client, + org_name.clone(), + config.cache.clone(), + fs_owner, + ); + crate::fs::mescloud::roots::OrgChildDP::Standard( + git_fs::fs::composite::CompositeFs::new(standard_org_root, fs_owner), + ) + }; + org_children.push((std::ffi::OsString::from(org_name), dp)); + } + + let mesa_root = crate::fs::mescloud::roots::MesaRoot::new(org_children); + let composite = git_fs::fs::composite::CompositeFs::new(mesa_root, fs_owner); let table = FutureBackedCache::default(); - let now = std::time::SystemTime::now(); - let root = INode { - addr: 1, - permissions: InodePerms::from_bits_truncate(0o755), - uid: config.uid, - gid: config.gid, - create_time: now, - last_modified_at: now, - parent: None, - size: 0, - itype: INodeType::Directory, - }; - table.insert_sync(1, root); - - let provider = MesaFsProvider::new(mesa_fs); - let fuse_adapter = FuserAdapter::new(table, provider, handle); + let root_inode = composite.make_root_inode(); + table.insert_sync(1, root_inode); + + let fuse_adapter = FuserAdapter::new(table, composite, handle); let mount_opts = [ fuser::MountOption::FSName("git-fs".to_owned()), fuser::MountOption::RO, diff --git a/src/fs/mescloud/common.rs b/src/fs/mescloud/common.rs index 473b5e54..cf57e392 100644 --- a/src/fs/mescloud/common.rs +++ b/src/fs/mescloud/common.rs @@ -1,9 +1,3 @@ -//! Shared types and helpers used by both `MesaFS` and `RepoFs`. - -use std::ffi::{OsStr, OsString}; - -use bytes::Bytes; -use git_fs::fs::{FileHandle, INode, InodeAddr, OpenFlags as LibOpenFlags}; use mesa_dev::low_level::apis; use thiserror::Error; @@ -46,129 +40,16 @@ impl From> for MesaAp } } -#[derive(Debug, Error)] -pub enum LookupError { - #[error("inode not found")] - InodeNotFound, - - #[error("remote mesa error")] - RemoteMesaError(#[from] MesaApiError), -} - -#[derive(Debug, Error)] -pub enum GetAttrError { - #[error("inode not found")] - InodeNotFound, -} - -#[derive(Debug, Clone, Copy, Error)] -pub enum OpenError { - #[error("inode not found")] - InodeNotFound, -} - -#[derive(Debug, Error)] -pub enum ReadError { - #[error("file not open")] - FileNotOpen, - - #[error("inode not found")] - InodeNotFound, - - #[error("remote mesa error")] - RemoteMesaError(#[from] MesaApiError), - - #[error("content is not a file")] - NotAFile, - - #[error("base64 decode error: {0}")] - Base64Decode(#[from] base64::DecodeError), -} - -#[derive(Debug, Error)] -pub enum ReadDirError { - #[error("inode not found")] - InodeNotFound, - - #[error("remote mesa error")] - RemoteMesaError(#[from] MesaApiError), - - #[error("inode is not a directory")] - NotADirectory, - - #[error("operation not permitted")] - NotPermitted, -} - -impl From for ReadDirError { - fn from(e: LookupError) -> Self { - match e { - LookupError::RemoteMesaError(api) => Self::RemoteMesaError(api), - LookupError::InodeNotFound => Self::InodeNotFound, +pub(super) fn mesa_api_error_to_io(e: MesaApiError) -> std::io::Error { + match &e { + MesaApiError::Response { status, .. } if *status == 404 => { + std::io::Error::from_raw_os_error(libc::ENOENT) } - } -} - -#[derive(Debug, Error)] -pub enum ReleaseError { - #[error("file not open")] - FileNotOpen, -} - -/// A directory entry for readdir results, using lib types. -pub struct FsDirEntry { - pub ino: InodeAddr, - pub name: OsString, -} - -/// Trait for child filesystems composed by [`CompositeFs`](super::composite::CompositeFs). -/// -/// Uses lib types (`INode`, `InodeAddr`) directly — no conversion to/from `FileAttr`. -/// Replaces the old `Fs + InodeCachePeek` bound. -#[async_trait::async_trait] -pub(super) trait ChildFs: Send + Sync { - /// Look up a child by name within the given parent directory. - async fn lookup(&mut self, parent: InodeAddr, name: &OsStr) -> Result; - - /// List all children of a directory, returning full `INode` data for each. - async fn readdir(&mut self, ino: InodeAddr) -> Result, ReadDirError>; - - /// Open a file for reading. - async fn open(&mut self, ino: InodeAddr, flags: LibOpenFlags) -> Result; - - /// Read data from an open file. - async fn read( - &mut self, - ino: InodeAddr, - fh: FileHandle, - offset: u64, - size: u32, - ) -> Result; - - /// Release (close) a file handle. - async fn release(&mut self, ino: InodeAddr, fh: FileHandle) -> Result<(), ReleaseError>; -} - -// Tests kept inline: these types live in the binary crate and are not -// re-exported through the `git_fs` lib, so integration tests in `tests/` -// cannot access them. -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn lookup_inode_not_found_converts_to_readdir_inode_not_found() { - let err: ReadDirError = LookupError::InodeNotFound.into(); - assert!(matches!(err, ReadDirError::InodeNotFound)); - } - - #[test] - fn lookup_remote_error_converts_to_readdir_remote_error() { - let api_err = MesaApiError::Response { - status: 500, - body: "test".to_owned(), - }; - let err: ReadDirError = LookupError::RemoteMesaError(api_err).into(); - assert!(matches!(err, ReadDirError::RemoteMesaError(_))); + MesaApiError::Reqwest(_) + | MesaApiError::ReqwestMiddleware(_) + | MesaApiError::Serde(_) + | MesaApiError::SerdePath(_) + | MesaApiError::Io(_) + | MesaApiError::Response { .. } => std::io::Error::other(e), } } diff --git a/src/fs/mescloud/composite.rs b/src/fs/mescloud/composite.rs deleted file mode 100644 index 91c35806..00000000 --- a/src/fs/mescloud/composite.rs +++ /dev/null @@ -1,456 +0,0 @@ -use std::collections::HashMap; -use std::ffi::OsStr; -use std::sync::atomic::{AtomicU64, Ordering}; - -use bytes::Bytes; -use git_fs::cache::async_backed::FutureBackedCache; -use git_fs::fs::dcache::DCache; -use git_fs::fs::{ - AsyncFsStats, FileHandle, INode, INodeType, InodeAddr, InodePerms, LoadedAddr, OpenFlags, -}; -use rustc_hash::FxHashMap; -use tracing::{instrument, trace}; - -use super::common::{ - ChildFs, FsDirEntry, GetAttrError, LookupError, OpenError, ReadDirError, ReadError, - ReleaseError, -}; - -/// Bidirectional inode mapping between outer (composite) and inner (child) address spaces. -/// -/// Convention: **outer = left, inner = right**. -pub(super) struct InodeBridge { - map: bimap::BiMap, -} - -impl InodeBridge { - pub fn new() -> Self { - Self { - map: bimap::BiMap::new(), - } - } - - pub fn insert(&mut self, outer: InodeAddr, inner: InodeAddr) { - self.map.insert(outer, inner); - } - - pub fn forward(&self, outer: InodeAddr) -> Option { - self.map.get_by_left(&outer).copied() - } - - #[expect(dead_code, reason = "will be needed by future callers")] - pub fn backward(&self, inner: InodeAddr) -> Option { - self.map.get_by_right(&inner).copied() - } - - /// Look up inner->outer, or allocate a new outer address if unmapped. - pub fn backward_or_insert( - &mut self, - inner: InodeAddr, - allocate: impl FnOnce() -> InodeAddr, - ) -> InodeAddr { - if let Some(&outer) = self.map.get_by_right(&inner) { - outer - } else { - let outer = allocate(); - self.map.insert(outer, inner); - outer - } - } - - pub fn remove_by_outer(&mut self, outer: InodeAddr) { - self.map.remove_by_left(&outer); - } - - #[expect(dead_code, reason = "will be needed by future callers")] - pub fn get_inner(&self, outer: InodeAddr) -> Option<&InodeAddr> { - self.map.get_by_left(&outer) - } -} - -pub(super) struct ChildSlot { - pub inner: Inner, - pub bridge: InodeBridge, -} - -/// Tracks an open file: which child slot owns it and the inner fh. -struct OpenFileEntry { - slot_idx: usize, - inner_ino: InodeAddr, - inner_fh: FileHandle, -} - -pub(super) struct CompositeFs { - pub(super) inode_table: FutureBackedCache, - pub(super) directory_cache: DCache, - next_ino: AtomicU64, - next_fh: AtomicU64, - refcounts: FxHashMap, - pub(super) readdir_buf: Vec, - open_files: HashMap, - pub(super) child_inodes: HashMap, - pub(super) inode_to_slot: HashMap, - pub(super) slots: Vec>, - fs_owner: (u32, u32), - block_size: u32, -} - -impl CompositeFs { - pub const ROOT_INO: InodeAddr = 1; - - pub fn new(fs_owner: (u32, u32), block_size: u32) -> Self { - let inode_table = FutureBackedCache::default(); - let now = std::time::SystemTime::now(); - let root = INode { - addr: Self::ROOT_INO, - permissions: InodePerms::from_bits_truncate(0o755), - uid: fs_owner.0, - gid: fs_owner.1, - create_time: now, - last_modified_at: now, - parent: None, - size: 0, - itype: INodeType::Directory, - }; - inode_table.insert_sync(Self::ROOT_INO, root); - - let mut refcounts = FxHashMap::default(); - refcounts.insert(Self::ROOT_INO, 1); - - Self { - inode_table, - directory_cache: DCache::new(), - next_ino: AtomicU64::new(Self::ROOT_INO + 1), - next_fh: AtomicU64::new(1), - refcounts, - readdir_buf: Vec::new(), - open_files: HashMap::new(), - child_inodes: HashMap::new(), - inode_to_slot: HashMap::new(), - slots: Vec::new(), - fs_owner, - block_size, - } - } - - pub fn allocate_inode(&self) -> InodeAddr { - self.next_ino.fetch_add(1, Ordering::Relaxed) - } - - pub fn fs_owner(&self) -> (u32, u32) { - self.fs_owner - } - - #[expect(dead_code, reason = "available for future use")] - pub fn block_size(&self) -> u32 { - self.block_size - } - - pub fn add_child(&mut self, inner: Inner, child_root_ino: InodeAddr) -> InodeAddr { - self.add_child_with_parent(inner, child_root_ino, Self::ROOT_INO) - } - - pub fn cache_inode(&self, inode: INode) { - self.inode_table.insert_sync(inode.addr, inode); - } - - /// Insert the inode into the table and initialise its refcount to zero. - /// - /// The caller is responsible for bumping the refcount via [`inc_rc`](Self::inc_rc). - pub fn cache_inode_and_init_rc(&mut self, inode: INode) { - let addr = inode.addr; - self.inode_table.insert_sync(addr, inode); - self.refcounts.entry(addr).or_insert(0); - } - - pub fn inc_rc(&mut self, addr: InodeAddr) -> Option { - let rc = self.refcounts.get_mut(&addr)?; - *rc += 1; - Some(*rc) - } - - pub fn slot_for_inode(&self, ino: InodeAddr) -> Option { - self.inode_to_slot.get(&ino).copied() - } - - /// Like [`add_child`](Self::add_child) but sets a custom parent inode - /// instead of always using `ROOT_INO`. - pub fn add_child_with_parent( - &mut self, - inner: Inner, - child_root_ino: InodeAddr, - parent_ino: InodeAddr, - ) -> InodeAddr { - let outer_ino = self.allocate_inode(); - let now = std::time::SystemTime::now(); - let inode = INode { - addr: outer_ino, - permissions: InodePerms::from_bits_truncate(0o755), - uid: self.fs_owner.0, - gid: self.fs_owner.1, - create_time: now, - last_modified_at: now, - parent: Some(parent_ino), - size: 0, - itype: INodeType::Directory, - }; - self.inode_table.insert_sync(outer_ino, inode); - - let mut bridge = InodeBridge::new(); - bridge.insert(outer_ino, child_root_ino); - - let idx = self.slots.len(); - self.slots.push(ChildSlot { inner, bridge }); - self.child_inodes.insert(outer_ino, idx); - self.inode_to_slot.insert(outer_ino, idx); - - outer_ino - } -} - -impl CompositeFs { - #[instrument(name = "CompositeFs::delegated_lookup", skip(self, name))] - pub async fn delegated_lookup( - &mut self, - parent: InodeAddr, - name: &OsStr, - ) -> Result { - // Fast path: DCache hit + inode still in table - if let Some(dentry) = self.directory_cache.lookup(LoadedAddr(parent), name) - && let Some(inode) = self.inode_table.get(&dentry.ino.0).await - { - *self.refcounts.entry(inode.addr).or_insert(0) += 1; - return Ok(inode); - } - - // Slow path: delegate to child - let idx = self - .inode_to_slot - .get(&parent) - .copied() - .ok_or(LookupError::InodeNotFound)?; - let inner_parent = self.slots[idx] - .bridge - .forward(parent) - .ok_or(LookupError::InodeNotFound)?; - let inner_inode = self.slots[idx].inner.lookup(inner_parent, name).await?; - - let next_ino = &self.next_ino; - let outer_ino = self.slots[idx] - .bridge - .backward_or_insert(inner_inode.addr, || { - next_ino.fetch_add(1, Ordering::Relaxed) - }); - self.inode_to_slot.insert(outer_ino, idx); - - let remapped = INode { - addr: outer_ino, - ..inner_inode - }; - self.inode_table - .get_or_init(outer_ino, || async move { remapped }) - .await; - - let is_dir = matches!(inner_inode.itype, INodeType::Directory); - self.directory_cache - .insert( - LoadedAddr(parent), - name.to_os_string(), - LoadedAddr(outer_ino), - is_dir, - ) - .await; - - *self.refcounts.entry(outer_ino).or_insert(0) += 1; - let rc = self.refcounts[&outer_ino]; - trace!( - outer_ino, - inner_ino = inner_inode.addr, - rc, - "lookup: resolved via delegation" - ); - - Ok(remapped) - } - - #[instrument(name = "CompositeFs::delegated_readdir", skip(self))] - pub async fn delegated_readdir( - &mut self, - ino: InodeAddr, - ) -> Result<&[FsDirEntry], ReadDirError> { - let idx = self - .inode_to_slot - .get(&ino) - .copied() - .ok_or(ReadDirError::InodeNotFound)?; - - if !self.directory_cache.is_populated(LoadedAddr(ino)) { - let inner_ino = self.slots[idx] - .bridge - .forward(ino) - .ok_or(ReadDirError::InodeNotFound)?; - let inner_entries = self.slots[idx].inner.readdir(inner_ino).await?; - - for (name, child_inode) in &inner_entries { - let next_ino = &self.next_ino; - let outer_child = self.slots[idx] - .bridge - .backward_or_insert(child_inode.addr, || { - next_ino.fetch_add(1, Ordering::Relaxed) - }); - self.inode_to_slot.insert(outer_child, idx); - - let remapped = INode { - addr: outer_child, - ..*child_inode - }; - self.inode_table - .get_or_init(outer_child, || async move { remapped }) - .await; - - let is_dir = matches!(child_inode.itype, INodeType::Directory); - self.directory_cache - .insert( - LoadedAddr(ino), - name.clone(), - LoadedAddr(outer_child), - is_dir, - ) - .await; - } - - self.directory_cache.mark_populated(LoadedAddr(ino)); - } - - let mut children = self.directory_cache.readdir(LoadedAddr(ino)).await; - children.sort_unstable_by(|(a, _), (b, _)| a.cmp(b)); - - let mut entries = Vec::with_capacity(children.len()); - for (name, dvalue) in &children { - if let Some(inode) = self.inode_table.get(&dvalue.ino.0).await { - entries.push(FsDirEntry { - ino: inode.addr, - name: name.clone(), - }); - } - } - - self.readdir_buf = entries; - Ok(&self.readdir_buf) - } - - #[instrument(name = "CompositeFs::delegated_getattr", skip(self))] - pub async fn delegated_getattr(&self, ino: InodeAddr) -> Result { - self.inode_table - .get(&ino) - .await - .ok_or(GetAttrError::InodeNotFound) - } - - #[expect(dead_code, reason = "will be needed by future callers")] - #[must_use] - pub fn delegated_statfs(&self) -> AsyncFsStats { - AsyncFsStats { - block_size: self.block_size, - total_blocks: 0, - free_blocks: 0, - available_blocks: 0, - total_inodes: self.inode_table.len() as u64, - free_inodes: 0, - max_filename_length: 255, - } - } - - #[instrument(name = "CompositeFs::delegated_open", skip(self))] - pub async fn delegated_open( - &mut self, - ino: InodeAddr, - flags: OpenFlags, - ) -> Result { - let idx = self - .inode_to_slot - .get(&ino) - .copied() - .ok_or(OpenError::InodeNotFound)?; - let inner_ino = self.slots[idx] - .bridge - .forward(ino) - .ok_or(OpenError::InodeNotFound)?; - let inner_fh = self.slots[idx].inner.open(inner_ino, flags).await?; - - let outer_fh = self.next_fh.fetch_add(1, Ordering::Relaxed); - self.open_files.insert( - outer_fh, - OpenFileEntry { - slot_idx: idx, - inner_ino, - inner_fh, - }, - ); - - trace!(ino, outer_fh, inner_fh, "open: assigned fh"); - Ok(outer_fh) - } - - #[instrument(name = "CompositeFs::delegated_read", skip(self))] - pub async fn delegated_read( - &mut self, - fh: FileHandle, - offset: u64, - size: u32, - ) -> Result { - let entry = self.open_files.get(&fh).ok_or(ReadError::FileNotOpen)?; - let slot_idx = entry.slot_idx; - let inner_ino = entry.inner_ino; - let inner_fh = entry.inner_fh; - self.slots[slot_idx] - .inner - .read(inner_ino, inner_fh, offset, size) - .await - } - - #[instrument(name = "CompositeFs::delegated_release", skip(self))] - pub async fn delegated_release(&mut self, fh: FileHandle) -> Result<(), ReleaseError> { - let entry = self - .open_files - .remove(&fh) - .ok_or(ReleaseError::FileNotOpen)?; - let result = self.slots[entry.slot_idx] - .inner - .release(entry.inner_ino, entry.inner_fh) - .await; - trace!(fh, "release: cleaned up fh mapping"); - result - } - - /// Returns `true` if the inode was evicted. - /// - /// The composite only manages its own refcounts and inode table. - /// Inner filesystem inodes are managed by the inner FS itself through - /// its own lifecycle; the composite does not propagate forget to children. - #[expect(dead_code, reason = "will be needed by future callers")] - #[must_use] - #[instrument(name = "CompositeFs::delegated_forget", skip(self))] - pub fn delegated_forget(&mut self, ino: InodeAddr, nlookups: u64) -> bool { - let slot_idx = self.inode_to_slot.get(&ino).copied(); - - if let Some(rc) = self.refcounts.get_mut(&ino) { - *rc = rc.saturating_sub(nlookups); - if *rc > 0 { - return false; - } - self.refcounts.remove(&ino); - } else { - return false; - } - - self.inode_table.remove_sync(&ino); - self.child_inodes.remove(&ino); - self.inode_to_slot.remove(&ino); - if let Some(idx) = slot_idx { - self.slots[idx].bridge.remove_by_outer(ino); - } - - true - } -} diff --git a/src/fs/mescloud/mod.rs b/src/fs/mescloud/mod.rs index a9e5e155..ab3745db 100644 --- a/src/fs/mescloud/mod.rs +++ b/src/fs/mescloud/mod.rs @@ -1,38 +1,15 @@ -use std::ffi::{OsStr, OsString}; -use std::future::Future; -use std::sync::Arc; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::time::SystemTime; - -use bytes::Bytes; -use git_fs::fs::{FileHandle, INode, INodeType, InodeAddr, InodePerms, OpenFlags}; use mesa_dev::MesaClient; use opentelemetry::propagation::Injector; -use secrecy::ExposeSecret as _; -use tracing::{instrument, trace, warn}; use tracing_opentelemetry::OpenTelemetrySpanExt as _; -use crate::app_config::CacheConfig; - -pub use common::FsDirEntry; -use composite::CompositeFs; - -pub use common::{GetAttrError, LookupError, OpenError, ReadDirError, ReadError, ReleaseError}; - #[cfg(feature = "staging")] const MESA_API_BASE_URL: &str = "https://staging.depot.mesa.dev/api/v1"; #[cfg(not(feature = "staging"))] const MESA_API_BASE_URL: &str = "https://depot.mesa.dev/api/v1"; mod common; -mod composite; - -mod org; -pub use org::OrgConfig; -use org::OrgFs; - pub mod repo; -mod roots; +pub mod roots; struct HeaderInjector<'a>(&'a mut reqwest::header::HeaderMap); @@ -72,7 +49,7 @@ impl reqwest_middleware::Middleware for OtelPropagationMiddleware { } } -fn build_mesa_client(api_key: &str) -> MesaClient { +pub fn build_mesa_client(api_key: &str) -> MesaClient { let client = reqwest_middleware::ClientBuilder::new(reqwest::Client::new()) .with(OtelPropagationMiddleware) .build(); @@ -82,354 +59,3 @@ fn build_mesa_client(api_key: &str) -> MesaClient { .with_client(client) .build() } - -/// Classifies an inode by its role in the mesa hierarchy. -enum InodeRole { - /// The filesystem root (ino == 1). - Root, - /// An inode owned by some org. - OrgOwned, -} - -/// The top-level `MesaFS` filesystem. -/// -/// Composes multiple [`OrgFs`] instances, each with its own inode namespace, -/// delegating to [`CompositeFs`] for inode/fh translation at each boundary. -pub struct MesaFS { - composite: CompositeFs, -} - -impl MesaFS { - const ROOT_NODE_INO: InodeAddr = CompositeFs::::ROOT_INO; - const BLOCK_SIZE: u32 = 4096; - - /// Create a new `MesaFS` instance. - #[must_use] - pub fn new( - orgs: impl Iterator, - fs_owner: (u32, u32), - cache: &CacheConfig, - ) -> Self { - let mut composite = CompositeFs::new(fs_owner, Self::BLOCK_SIZE); - for org_conf in orgs { - let client = build_mesa_client(org_conf.api_key.expose_secret()); - let org = OrgFs::new(org_conf.name, client, fs_owner, cache.clone()); - composite.add_child(org, OrgFs::ROOT_INO); - } - Self { composite } - } - - /// Classify an inode by its role. - fn inode_role(&self, ino: InodeAddr) -> Option { - if ino == Self::ROOT_NODE_INO { - return Some(InodeRole::Root); - } - if self.composite.child_inodes.contains_key(&ino) { - return Some(InodeRole::OrgOwned); - } - if self.composite.slot_for_inode(ino).is_some() { - return Some(InodeRole::OrgOwned); - } - None - } - - /// Ensure a mesa-level inode exists for the org at `org_idx`. - /// Does NOT bump rc. - async fn ensure_org_inode(&mut self, org_idx: usize) -> (InodeAddr, INode) { - let existing_ino = self - .composite - .child_inodes - .iter() - .find(|&(_, &idx)| idx == org_idx) - .map(|(&ino, _)| ino); - - if let Some(existing_ino) = existing_ino { - if let Ok(inode) = self.composite.delegated_getattr(existing_ino).await { - trace!( - ino = existing_ino, - org_idx, "ensure_org_inode: reusing existing inode" - ); - return (existing_ino, inode); - } - warn!( - ino = existing_ino, - org_idx, "ensure_org_inode: evicted, rebuilding" - ); - let now = SystemTime::now(); - let inode = INode { - addr: existing_ino, - permissions: InodePerms::from_bits_truncate(0o755), - uid: self.composite.fs_owner().0, - gid: self.composite.fs_owner().1, - create_time: now, - last_modified_at: now, - parent: Some(Self::ROOT_NODE_INO), - size: 0, - itype: INodeType::Directory, - }; - self.composite.cache_inode(inode); - self.composite.inode_to_slot.insert(existing_ino, org_idx); - self.composite.child_inodes.insert(existing_ino, org_idx); - return (existing_ino, inode); - } - - warn!( - org_idx, - "ensure_org_inode: no child_inodes entry for org slot" - ); - let org_name = self.composite.slots[org_idx].inner.name().to_owned(); - let ino = self.composite.allocate_inode(); - let now = SystemTime::now(); - let inode = INode { - addr: ino, - permissions: InodePerms::from_bits_truncate(0o755), - uid: self.composite.fs_owner().0, - gid: self.composite.fs_owner().1, - create_time: now, - last_modified_at: now, - parent: Some(Self::ROOT_NODE_INO), - size: 0, - itype: INodeType::Directory, - }; - self.composite.cache_inode(inode); - self.composite.child_inodes.insert(ino, org_idx); - self.composite.inode_to_slot.insert(ino, org_idx); - trace!(ino, org_idx, org = %org_name, "ensure_org_inode: allocated new inode"); - (ino, inode) - } - - #[instrument(name = "MesaFS::lookup", skip(self))] - pub async fn lookup(&mut self, parent: InodeAddr, name: &OsStr) -> Result { - let role = self.inode_role(parent).ok_or(LookupError::InodeNotFound)?; - match role { - InodeRole::Root => { - let org_name = name.to_str().ok_or(LookupError::InodeNotFound)?; - let org_idx = self - .composite - .slots - .iter() - .position(|s| s.inner.name() == org_name) - .ok_or(LookupError::InodeNotFound)?; - - trace!(org = org_name, "lookup: matched org"); - let (ino, inode) = self.ensure_org_inode(org_idx).await; - self.composite - .inc_rc(ino) - .ok_or(LookupError::InodeNotFound)?; - Ok(inode) - } - InodeRole::OrgOwned => self.composite.delegated_lookup(parent, name).await, - } - } - - #[instrument(name = "MesaFS::getattr", skip(self))] - pub async fn getattr(&self, ino: InodeAddr) -> Result { - self.composite.delegated_getattr(ino).await - } - - #[instrument(name = "MesaFS::readdir", skip(self))] - pub async fn readdir(&mut self, ino: InodeAddr) -> Result<&[FsDirEntry], ReadDirError> { - let role = self.inode_role(ino).ok_or(ReadDirError::InodeNotFound)?; - match role { - InodeRole::Root => { - let org_info: Vec<(usize, String)> = self - .composite - .slots - .iter() - .enumerate() - .map(|(idx, s)| (idx, s.inner.name().to_owned())) - .collect(); - - let mut entries = Vec::with_capacity(org_info.len()); - for (org_idx, name) in &org_info { - let (entry_ino, _) = self.ensure_org_inode(*org_idx).await; - entries.push(FsDirEntry { - ino: entry_ino, - name: name.clone().into(), - }); - } - - trace!(entry_count = entries.len(), "readdir: listing orgs"); - self.composite.readdir_buf = entries; - Ok(&self.composite.readdir_buf) - } - InodeRole::OrgOwned => self.composite.delegated_readdir(ino).await, - } - } - - #[instrument(name = "MesaFS::open", skip(self))] - pub async fn open( - &mut self, - ino: InodeAddr, - flags: OpenFlags, - ) -> Result { - self.composite.delegated_open(ino, flags).await - } - - #[instrument(name = "MesaFS::read", skip(self))] - pub async fn read( - &mut self, - fh: FileHandle, - offset: u64, - size: u32, - ) -> Result { - self.composite.delegated_read(fh, offset, size).await - } - - #[instrument(name = "MesaFS::release", skip(self))] - pub async fn release(&mut self, fh: FileHandle) -> Result<(), ReleaseError> { - self.composite.delegated_release(fh).await - } -} - -/// A file reader that delegates reads to `MesaFS` through a shared mutex. -/// -/// Resources are released via [`FileReader::close`](git_fs::fs::async_fs::FileReader::close), -/// which is called by the FUSE adapter during `release`. Dropping without -/// calling `close()` emits a diagnostic warning. -pub struct MesaFsReader { - inner: Arc>, - fh: FileHandle, - closed: AtomicBool, -} - -impl git_fs::fs::async_fs::FileReader for MesaFsReader { - fn read( - &self, - offset: u64, - size: u32, - ) -> impl Future> + Send { - let inner = Arc::clone(&self.inner); - let fh = self.fh; - async move { - let mut guard = inner.lock().await; - guard - .read(fh, offset, size) - .await - .map_err(|e| std::io::Error::other(e.to_string())) - } - } - - fn close(&self) -> impl Future> + Send { - self.closed.store(true, Ordering::Relaxed); - let inner = Arc::clone(&self.inner); - let fh = self.fh; - async move { - let mut guard = inner.lock().await; - guard - .release(fh) - .await - .map_err(|e| std::io::Error::other(e.to_string())) - } - } -} - -impl Drop for MesaFsReader { - fn drop(&mut self) { - if !self.closed.load(Ordering::Relaxed) { - tracing::warn!(fh = self.fh, "MesaFsReader dropped without close()"); - } - } -} - -/// A [`FsDataProvider`](git_fs::fs::async_fs::FsDataProvider) that wraps -/// `MesaFS` behind a shared mutex. -#[derive(Clone)] -pub struct MesaFsProvider { - inner: Arc>, -} - -impl MesaFsProvider { - /// Create a new provider wrapping the given `MesaFS`. - pub fn new(mesa_fs: MesaFS) -> Self { - Self { - inner: Arc::new(tokio::sync::Mutex::new(mesa_fs)), - } - } -} - -fn lookup_error_to_io(e: LookupError) -> std::io::Error { - match e { - LookupError::InodeNotFound => std::io::Error::from_raw_os_error(libc::ENOENT), - LookupError::RemoteMesaError(api) => std::io::Error::other(api.to_string()), - } -} - -fn readdir_error_to_io(e: ReadDirError) -> std::io::Error { - match e { - ReadDirError::InodeNotFound => std::io::Error::from_raw_os_error(libc::ENOENT), - ReadDirError::NotADirectory => std::io::Error::from_raw_os_error(libc::ENOTDIR), - ReadDirError::NotPermitted => std::io::Error::from_raw_os_error(libc::EPERM), - ReadDirError::RemoteMesaError(api) => std::io::Error::other(api.to_string()), - } -} - -fn open_error_to_io(e: OpenError) -> std::io::Error { - match e { - OpenError::InodeNotFound => std::io::Error::from_raw_os_error(libc::ENOENT), - } -} - -impl git_fs::fs::async_fs::FsDataProvider for MesaFsProvider { - type Reader = MesaFsReader; - - fn lookup( - &self, - parent: INode, - name: &OsStr, - ) -> impl Future> + Send { - let inner = Arc::clone(&self.inner); - let name = name.to_os_string(); - async move { - let mut guard = inner.lock().await; - guard - .lookup(parent.addr, &name) - .await - .map_err(lookup_error_to_io) - } - } - - fn readdir( - &self, - parent: INode, - ) -> impl Future, std::io::Error>> + Send { - let inner = Arc::clone(&self.inner); - async move { - let mut guard = inner.lock().await; - let dir_entries: Vec<(OsString, InodeAddr)> = { - let entries = guard - .readdir(parent.addr) - .await - .map_err(readdir_error_to_io)?; - entries.iter().map(|e| (e.name.clone(), e.ino)).collect() - }; - let mut result = Vec::with_capacity(dir_entries.len()); - for (name, ino) in dir_entries { - if let Ok(inode) = guard.getattr(ino).await { - result.push((name, inode)); - } - } - Ok(result) - } - } - - fn open( - &self, - inode: INode, - flags: OpenFlags, - ) -> impl Future> + Send { - let inner = Arc::clone(&self.inner); - async move { - let mut guard = inner.lock().await; - let fh = guard - .open(inode.addr, flags) - .await - .map_err(open_error_to_io)?; - Ok(MesaFsReader { - inner: Arc::clone(&inner), - fh, - closed: AtomicBool::new(false), - }) - } - } -} diff --git a/src/fs/mescloud/org.rs b/src/fs/mescloud/org.rs deleted file mode 100644 index feefaf8e..00000000 --- a/src/fs/mescloud/org.rs +++ /dev/null @@ -1,390 +0,0 @@ -use std::collections::HashMap; -use std::ffi::{OsStr, OsString}; -use std::time::SystemTime; - -use bytes::Bytes; -use futures::TryStreamExt as _; -use git_fs::fs::{FileHandle, INode, INodeType, InodeAddr, InodePerms, OpenFlags}; -use mesa_dev::MesaClient; -use secrecy::SecretString; -use tracing::{instrument, trace, warn}; - -use super::common::{ChildFs, MesaApiError}; -pub use super::common::{LookupError, OpenError, ReadDirError, ReadError, ReleaseError}; -use super::composite::CompositeFs; -use super::repo::RepoFs; -use crate::app_config::CacheConfig; - -#[derive(Debug, Clone)] -pub struct OrgConfig { - pub name: String, - pub api_key: SecretString, -} - -/// Classifies an inode by its role in the org hierarchy. -enum InodeRole { - /// The org root directory. - OrgRoot, - /// A virtual owner directory (github only). - OwnerDir, - /// An inode owned by some repo (either a child-root or delegated). - RepoOwned, -} - -/// A filesystem rooted at a single organization. -/// -/// Composes multiple [`RepoFs`] instances, each with its own inode namespace, -/// delegating to [`CompositeFs`] for inode/fh translation at each boundary. -pub struct OrgFs { - name: String, - client: MesaClient, - composite: CompositeFs, - /// Maps org-level owner-dir inodes to owner name (github only). - owner_inodes: HashMap, - cache_config: CacheConfig, -} - -impl OrgFs { - pub(crate) const ROOT_INO: InodeAddr = CompositeFs::::ROOT_INO; - const BLOCK_SIZE: u32 = 4096; - - /// The name of the organization. - #[must_use] - pub(crate) fn name(&self) -> &str { - &self.name - } - - /// Whether this org uses the github two-level owner/repo hierarchy. - /// TODO(MES-674): Cleanup "special" casing for github. - fn is_github(&self) -> bool { - self.name == "github" - } - - /// Encode "owner/repo" to base64 for API calls. - /// TODO(MES-674): Cleanup "special" casing for github. - fn encode_github_repo_name(decoded: &str) -> String { - use base64::Engine as _; - base64::engine::general_purpose::STANDARD.encode(decoded) - } - - /// Ensure an inode exists for a virtual owner directory (github only). Does NOT bump rc. - /// TODO(MES-674): Cleanup "special" casing for github. - async fn ensure_owner_inode(&mut self, owner: &str) -> (InodeAddr, INode) { - // Check existing - let mut stale_ino = None; - for (&ino, existing_owner) in &self.owner_inodes { - if existing_owner == owner { - if let Ok(inode) = self.composite.delegated_getattr(ino).await { - return (ino, inode); - } - stale_ino = Some(ino); - break; - } - } - if let Some(ino) = stale_ino { - self.owner_inodes.remove(&ino); - } - - let ino = self.composite.allocate_inode(); - let now = SystemTime::now(); - let inode = INode { - addr: ino, - permissions: InodePerms::from_bits_truncate(0o755), - uid: self.composite.fs_owner().0, - gid: self.composite.fs_owner().1, - create_time: now, - last_modified_at: now, - parent: Some(Self::ROOT_INO), - size: 0, - itype: INodeType::Directory, - }; - self.composite.cache_inode_and_init_rc(inode); - self.owner_inodes.insert(ino, owner.to_owned()); - (ino, inode) - } - - #[must_use] - pub fn new( - name: String, - client: MesaClient, - fs_owner: (u32, u32), - cache_config: CacheConfig, - ) -> Self { - Self { - name, - client, - composite: CompositeFs::new(fs_owner, Self::BLOCK_SIZE), - owner_inodes: HashMap::new(), - cache_config, - } - } - - /// Classify an inode by its role. - fn inode_role(&self, ino: InodeAddr) -> Option { - if ino == Self::ROOT_INO { - return Some(InodeRole::OrgRoot); - } - if self.owner_inodes.contains_key(&ino) { - return Some(InodeRole::OwnerDir); - } - if self.composite.child_inodes.contains_key(&ino) { - return Some(InodeRole::RepoOwned); - } - if self.composite.slot_for_inode(ino).is_some() { - return Some(InodeRole::RepoOwned); - } - None - } - - /// Ensure an inode + `RepoFs` exists for the given repo name. - /// Does NOT bump rc. - /// - /// - `repo_name`: name used for API calls / `RepoFs` (base64-encoded for github) - /// - `display_name`: name shown in filesystem ("linux" for github, same as `repo_name` otherwise) - /// - `parent_ino`: owner-dir inode for github, `ROOT_INO` otherwise - async fn ensure_repo_inode( - &mut self, - repo_name: &str, - display_name: &str, - default_branch: &str, - parent_ino: InodeAddr, - ) -> (InodeAddr, INode) { - // Check existing repos. - for (&ino, &idx) in &self.composite.child_inodes { - if self.composite.slots[idx].inner.repo_name() == repo_name { - if let Ok(inode) = self.composite.delegated_getattr(ino).await { - trace!(ino, repo = repo_name, "ensure_repo_inode: reusing"); - return (ino, inode); - } - warn!( - ino, - repo = repo_name, - "ensure_repo_inode: attr missing, rebuilding" - ); - return self.make_repo_dir_inode(ino); - } - } - - // Create new RepoFs and register as child. - let repo = RepoFs::new( - self.client.clone(), - self.name.clone(), - repo_name.to_owned(), - default_branch.to_owned(), - self.composite.fs_owner(), - self.cache_config.clone(), - ) - .await; - - let outer_ino = self - .composite - .add_child_with_parent(repo, RepoFs::ROOT_INO, parent_ino); - trace!( - ino = outer_ino, - repo = repo_name, - "ensure_repo_inode: allocated new inode" - ); - - // Register in directory cache so readdir sees it. - self.composite - .directory_cache - .insert( - git_fs::fs::LoadedAddr(parent_ino), - OsString::from(display_name), - git_fs::fs::LoadedAddr(outer_ino), - true, - ) - .await; - - let inode = self - .composite - .delegated_getattr(outer_ino) - .await - .unwrap_or_else(|_| { - let now = SystemTime::now(); - INode { - addr: outer_ino, - permissions: InodePerms::from_bits_truncate(0o755), - uid: self.composite.fs_owner().0, - gid: self.composite.fs_owner().1, - create_time: now, - last_modified_at: now, - parent: Some(parent_ino), - size: 0, - itype: INodeType::Directory, - } - }); - (outer_ino, inode) - } - - /// Build a directory inode for `ino`, returning `(ino, inode)`. - fn make_repo_dir_inode(&self, ino: InodeAddr) -> (InodeAddr, INode) { - let now = SystemTime::now(); - let inode = INode { - addr: ino, - permissions: InodePerms::from_bits_truncate(0o755), - uid: self.composite.fs_owner().0, - gid: self.composite.fs_owner().1, - create_time: now, - last_modified_at: now, - parent: None, - size: 0, - itype: INodeType::Directory, - }; - self.composite.cache_inode(inode); - (ino, inode) - } - - /// Fetch a repo by name via the API. - async fn wait_for_sync( - &self, - repo_name: &str, - ) -> Result { - self.client - .org(&self.name) - .repos() - .at(repo_name) - .get() - .await - .map_err(MesaApiError::from) - } -} - -#[async_trait::async_trait] -impl ChildFs for OrgFs { - #[instrument(name = "OrgFs::lookup", skip(self), fields(org = %self.name))] - async fn lookup(&mut self, parent: InodeAddr, name: &OsStr) -> Result { - let role = self.inode_role(parent).ok_or(LookupError::InodeNotFound)?; - match role { - InodeRole::OrgRoot => { - let name_str = name.to_str().ok_or(LookupError::InodeNotFound)?; - - if self.is_github() { - trace!(owner = name_str, "lookup: resolving github owner dir"); - let (ino, inode) = self.ensure_owner_inode(name_str).await; - self.composite - .inc_rc(ino) - .ok_or(LookupError::InodeNotFound)?; - Ok(inode) - } else { - trace!(repo = name_str, "lookup: resolving repo"); - let repo = self.wait_for_sync(name_str).await?; - let (ino, inode) = self - .ensure_repo_inode(name_str, name_str, &repo.default_branch, Self::ROOT_INO) - .await; - let rc = self - .composite - .inc_rc(ino) - .ok_or(LookupError::InodeNotFound)?; - trace!(ino, repo = name_str, rc, "lookup: resolved repo inode"); - Ok(inode) - } - } - InodeRole::OwnerDir => { - let owner = self - .owner_inodes - .get(&parent) - .ok_or(LookupError::InodeNotFound)? - .clone(); - let repo_name_str = name.to_str().ok_or(LookupError::InodeNotFound)?; - let full_decoded = format!("{owner}/{repo_name_str}"); - let encoded = Self::encode_github_repo_name(&full_decoded); - - trace!( - owner = %owner, repo = repo_name_str, encoded = %encoded, - "lookup: resolving github repo via owner dir" - ); - - let repo = self.wait_for_sync(&encoded).await?; - let (ino, inode) = self - .ensure_repo_inode(&encoded, repo_name_str, &repo.default_branch, parent) - .await; - self.composite - .inc_rc(ino) - .ok_or(LookupError::InodeNotFound)?; - Ok(inode) - } - InodeRole::RepoOwned => self.composite.delegated_lookup(parent, name).await, - } - } - - #[instrument(name = "OrgFs::readdir", skip(self), fields(org = %self.name))] - async fn readdir(&mut self, ino: InodeAddr) -> Result, ReadDirError> { - let role = self.inode_role(ino).ok_or(ReadDirError::InodeNotFound)?; - match role { - InodeRole::OrgRoot => { - if self.is_github() { - return Err(ReadDirError::NotPermitted); - } - - let repos: Vec = self - .client - .org(&self.name) - .repos() - .list(None) - .try_collect() - .await - .map_err(MesaApiError::from)?; - - let repo_infos: Vec<(String, String)> = repos - .into_iter() - .filter_map(|r| { - let name = r.name?; - let branch = r.default_branch.unwrap_or_else(|| "main".to_owned()); - Some((name, branch)) - }) - .collect(); - trace!(count = repo_infos.len(), "readdir: fetched repo list"); - - let mut entries = Vec::with_capacity(repo_infos.len()); - for (repo_name, default_branch) in &repo_infos { - let (_, inode) = self - .ensure_repo_inode(repo_name, repo_name, default_branch, Self::ROOT_INO) - .await; - entries.push((OsString::from(repo_name), inode)); - } - - Ok(entries) - } - InodeRole::OwnerDir if self.is_github() => Err(ReadDirError::NotPermitted), - InodeRole::OwnerDir => Err(ReadDirError::NotADirectory), - InodeRole::RepoOwned => { - let dir_entries: Vec<_> = self - .composite - .delegated_readdir(ino) - .await? - .iter() - .map(|e| (e.name.clone(), e.ino)) - .collect(); - let mut entries = Vec::with_capacity(dir_entries.len()); - for (name, child_ino) in dir_entries { - if let Some(inode) = self.composite.inode_table.get(&child_ino).await { - entries.push((name, inode)); - } - } - Ok(entries) - } - } - } - - #[instrument(name = "OrgFs::open", skip(self), fields(org = %self.name))] - async fn open(&mut self, ino: InodeAddr, flags: OpenFlags) -> Result { - self.composite.delegated_open(ino, flags).await - } - - #[instrument(name = "OrgFs::read", skip(self), fields(org = %self.name))] - async fn read( - &mut self, - _ino: InodeAddr, - fh: FileHandle, - offset: u64, - size: u32, - ) -> Result { - self.composite.delegated_read(fh, offset, size).await - } - - #[instrument(name = "OrgFs::release", skip(self), fields(org = %self.name))] - async fn release(&mut self, _ino: InodeAddr, fh: FileHandle) -> Result<(), ReleaseError> { - self.composite.delegated_release(fh).await - } -} diff --git a/src/fs/mescloud/repo.rs b/src/fs/mescloud/repo.rs index acff3d04..f13ead88 100644 --- a/src/fs/mescloud/repo.rs +++ b/src/fs/mescloud/repo.rs @@ -2,13 +2,12 @@ //! //! This module directly accesses the mesa repo through the Rust SDK, on a per-repo basis. -use std::collections::HashMap; -use std::ffi::OsString; +use std::ffi::{OsStr, OsString}; use std::future::Future; +use std::path::PathBuf; use std::sync::Arc; use std::sync::atomic::{AtomicU64, Ordering}; use std::time::SystemTime; -use std::{ffi::OsStr, path::PathBuf}; use base64::Engine as _; use bytes::Bytes; @@ -20,31 +19,12 @@ use tracing::warn; use git_fs::cache::fcache::FileCache; use git_fs::cache::traits::{AsyncReadableCache as _, AsyncWritableCache as _}; use git_fs::fs::async_fs::{FileReader, FsDataProvider}; -use git_fs::fs::{ - INode, INodeType, InodeAddr, InodePerms, LoadedAddr, OpenFlags as AsyncOpenFlags, -}; +use git_fs::fs::{INode, INodeType, InodeAddr, InodePerms, OpenFlags as AsyncOpenFlags}; -use crate::app_config::CacheConfig; - -use super::common::MesaApiError; -pub use super::common::{LookupError, OpenError, ReadDirError, ReadError, ReleaseError}; - -fn mesa_api_error_to_io(e: MesaApiError) -> std::io::Error { - match &e { - MesaApiError::Response { status, .. } if *status == 404 => { - std::io::Error::from_raw_os_error(libc::ENOENT) - } - MesaApiError::Reqwest(_) - | MesaApiError::ReqwestMiddleware(_) - | MesaApiError::Serde(_) - | MesaApiError::SerdePath(_) - | MesaApiError::Io(_) - | MesaApiError::Response { .. } => std::io::Error::other(e), - } -} +use super::common::{MesaApiError, mesa_api_error_to_io}; #[derive(Clone)] -pub(super) struct MesRepoProvider { +pub struct MesRepoProvider { inner: Arc, } @@ -97,6 +77,10 @@ impl MesRepoProvider { } /// The name of the repository. + #[expect( + dead_code, + reason = "useful diagnostic accessor retained for future use" + )] pub(super) fn repo_name(&self) -> &str { &self.inner.repo_name } @@ -294,7 +278,7 @@ impl FsDataProvider for MesRepoProvider { } } -pub(super) struct MesFileReader { +pub struct MesFileReader { client: MesaClient, org_name: String, repo_name: String, @@ -383,215 +367,3 @@ impl FileReader for MesFileReader { } } } - -mod repo_fs_inner { - #![allow(clippy::future_not_send, clippy::mem_forget)] - use git_fs::cache::async_backed::FutureBackedCache; - use git_fs::fs::async_fs::AsyncFs; - use git_fs::fs::{INode, InodeAddr}; - use ouroboros::self_referencing; - - use super::MesRepoProvider; - - #[self_referencing] - pub struct RepoFsInner { - pub(super) inode_table: FutureBackedCache, - #[borrows(inode_table)] - #[covariant] - pub(super) fs: AsyncFs<'this, MesRepoProvider>, - } - - impl RepoFsInner { - pub fn create( - inode_table: FutureBackedCache, - provider: MesRepoProvider, - ) -> Self { - RepoFsInnerBuilder { - inode_table, - fs_builder: |tbl| AsyncFs::new_preseeded(provider, tbl), - } - .build() - } - } -} -use repo_fs_inner::RepoFsInner; - -/// A filesystem rooted at a single mesa repository. -/// -/// Wraps [`AsyncFs`] via ouroboros to co-locate the inode table -/// and the filesystem that borrows it. Implements [`Fs`] as a thin adapter. -pub struct RepoFs { - inner: RepoFsInner, - /// Reference counts for inodes held by the kernel. - refcounts: rustc_hash::FxHashMap, - /// Open file handles mapped to readers. - open_files: HashMap>, - /// Provider clone for accessing `repo_name` and `path_map` cleanup. - provider: MesRepoProvider, -} - -impl RepoFs { - pub(crate) const ROOT_INO: InodeAddr = 1; - - /// Create a new `RepoFs` for a specific org and repo. - pub async fn new( - client: MesaClient, - org_name: String, - repo_name: String, - ref_: String, - fs_owner: (u32, u32), - cache_config: CacheConfig, - ) -> Self { - let file_cache = match cache_config.max_size { - Some(max_size) if max_size.as_u64() > 0 => { - let cache_dir = cache_config.path.join(&org_name).join(&repo_name); - let max_bytes = max_size.as_u64().try_into().unwrap_or(usize::MAX); - match FileCache::new(&cache_dir, max_bytes).await { - Ok(cache) => Some(Arc::new(cache)), - Err(e) => { - warn!(error = ?e, org = %org_name, repo = %repo_name, - "failed to create file cache, continuing without caching"); - None - } - } - } - _ => None, - }; - - let provider = - MesRepoProvider::new(client, org_name, repo_name, ref_, fs_owner, file_cache); - provider.seed_root_path(Self::ROOT_INO); - - let root = INode { - addr: Self::ROOT_INO, - permissions: InodePerms::from_bits_truncate(0o755), - uid: fs_owner.0, - gid: fs_owner.1, - create_time: SystemTime::now(), - last_modified_at: SystemTime::now(), - parent: None, - size: 0, - itype: INodeType::Directory, - }; - - let inode_table = git_fs::cache::async_backed::FutureBackedCache::default(); - inode_table.insert_sync(root.addr, root); - - let inner = RepoFsInner::create(inode_table, provider.clone()); - - let mut refcounts = rustc_hash::FxHashMap::default(); - refcounts.insert(Self::ROOT_INO, 1); - - Self { - inner, - refcounts, - open_files: HashMap::new(), - provider, - } - } - - /// The name of the repository this filesystem is rooted at. - pub(crate) fn repo_name(&self) -> &str { - self.provider.repo_name() - } -} - -#[expect( - clippy::wildcard_enum_match_arm, - reason = "mapping all ErrorKind variants is impractical; EIO is the sensible default" -)] -fn io_error_to_errno(e: &std::io::Error) -> i32 { - e.raw_os_error().unwrap_or_else(|| match e.kind() { - std::io::ErrorKind::NotFound => libc::ENOENT, - std::io::ErrorKind::PermissionDenied => libc::EACCES, - std::io::ErrorKind::AlreadyExists => libc::EEXIST, - _ => libc::EIO, - }) -} - -#[async_trait::async_trait] -impl super::common::ChildFs for RepoFs { - async fn lookup(&mut self, parent: InodeAddr, name: &OsStr) -> Result { - let tracked = self - .inner - .borrow_fs() - .lookup(LoadedAddr(parent), name) - .await - .map_err(|e| { - if io_error_to_errno(&e) == libc::ENOENT { - LookupError::InodeNotFound - } else { - LookupError::RemoteMesaError(MesaApiError::Io(e)) - } - })?; - *self.refcounts.entry(tracked.inode.addr).or_insert(0) += 1; - Ok(tracked.inode) - } - - async fn readdir(&mut self, ino: InodeAddr) -> Result, ReadDirError> { - let mut entries = Vec::new(); - self.inner - .borrow_fs() - .readdir(LoadedAddr(ino), 0, |de, _offset| { - entries.push((de.name.to_os_string(), de.inode)); - false - }) - .await - .map_err(|e| { - if io_error_to_errno(&e) == libc::ENOTDIR { - ReadDirError::NotADirectory - } else if io_error_to_errno(&e) == libc::ENOENT { - ReadDirError::InodeNotFound - } else { - ReadDirError::RemoteMesaError(MesaApiError::Io(e)) - } - })?; - Ok(entries) - } - - async fn open( - &mut self, - ino: InodeAddr, - flags: AsyncOpenFlags, - ) -> Result { - let open_file = self - .inner - .borrow_fs() - .open(LoadedAddr(ino), flags) - .await - .map_err(|_| OpenError::InodeNotFound)?; - self.open_files - .insert(open_file.fh, Arc::clone(&open_file.reader)); - Ok(open_file.fh) - } - - async fn read( - &mut self, - _ino: InodeAddr, - fh: git_fs::fs::FileHandle, - offset: u64, - size: u32, - ) -> Result { - let reader = self.open_files.get(&fh).ok_or(ReadError::FileNotOpen)?; - reader.read(offset, size).await.map_err(|e| { - if io_error_to_errno(&e) == libc::EISDIR { - ReadError::NotAFile - } else if io_error_to_errno(&e) == libc::ENOENT { - ReadError::InodeNotFound - } else { - ReadError::RemoteMesaError(MesaApiError::Io(e)) - } - }) - } - - async fn release( - &mut self, - _ino: InodeAddr, - fh: git_fs::fs::FileHandle, - ) -> Result<(), ReleaseError> { - self.open_files - .remove(&fh) - .ok_or(ReleaseError::FileNotOpen)?; - Ok(()) - } -} diff --git a/src/fs/mescloud/roots.rs b/src/fs/mescloud/roots.rs index aafe0c4c..8893d379 100644 --- a/src/fs/mescloud/roots.rs +++ b/src/fs/mescloud/roots.rs @@ -3,10 +3,8 @@ //! Bridges the generic `CompositeFs` from `lib/fs/composite.rs` with //! Mesa/GitHub-specific org and repo resolution logic. //! -//! These types are not yet wired into the daemon entry point; they will be -//! connected in a follow-up change that replaces the old `MesaFS` + `OrgFs` -//! pipeline. -#![expect(dead_code, reason = "wired in the follow-up daemon change")] +//! These types are wired into the daemon entry point, replacing the old +//! `MesaFS` + `OrgFs` pipeline. use std::ffi::{OsStr, OsString}; use std::future::Future; @@ -23,26 +21,12 @@ use git_fs::fs::async_fs::{FileReader, FsDataProvider}; use git_fs::fs::composite::{ChildDescriptor, CompositeFs, CompositeReader, CompositeRoot}; use git_fs::fs::{INode, INodeType, InodeAddr, InodePerms, OpenFlags}; -use super::common::MesaApiError; +use super::common::{MesaApiError, mesa_api_error_to_io}; use super::repo::{MesFileReader, MesRepoProvider}; use crate::app_config::CacheConfig; const CHILD_ROOT_ADDR: InodeAddr = 1; -fn mesa_api_error_to_io(e: MesaApiError) -> std::io::Error { - match &e { - MesaApiError::Response { status, .. } if *status == 404 => { - std::io::Error::from_raw_os_error(libc::ENOENT) - } - MesaApiError::Reqwest(_) - | MesaApiError::ReqwestMiddleware(_) - | MesaApiError::Serde(_) - | MesaApiError::SerdePath(_) - | MesaApiError::Io(_) - | MesaApiError::Response { .. } => std::io::Error::other(e), - } -} - /// Create a [`MesRepoProvider`] and its root [`INode`] for a given repo. async fn create_repo_provider( client: &MesaClient, @@ -111,7 +95,7 @@ fn check_not_found(e: MesaApiError) -> Result<(), std::io::Error> { } } -pub(super) struct StandardOrgRoot { +pub struct StandardOrgRoot { client: MesaClient, org_name: String, cache_config: CacheConfig, @@ -119,7 +103,7 @@ pub(super) struct StandardOrgRoot { } impl StandardOrgRoot { - pub(super) fn new( + pub fn new( client: MesaClient, org_name: String, cache_config: CacheConfig, @@ -222,7 +206,7 @@ impl CompositeRoot for StandardOrgRoot { } } -pub(super) struct GithubRepoRoot { +pub struct GithubRepoRoot { client: MesaClient, org_name: String, owner: String, @@ -286,7 +270,7 @@ impl CompositeRoot for GithubRepoRoot { } } -pub(super) struct GithubOrgRoot { +pub struct GithubOrgRoot { client: MesaClient, org_name: String, cache_config: CacheConfig, @@ -294,7 +278,7 @@ pub(super) struct GithubOrgRoot { } impl GithubOrgRoot { - pub(super) fn new( + pub fn new( client: MesaClient, org_name: String, cache_config: CacheConfig, @@ -347,7 +331,7 @@ impl CompositeRoot for GithubOrgRoot { } #[derive(Clone)] -pub(super) enum OrgChildDP { +pub enum OrgChildDP { Standard(CompositeFs), Github(CompositeFs), } @@ -407,7 +391,7 @@ impl FsDataProvider for OrgChildDP { } } -pub(super) enum OrgChildReader { +pub enum OrgChildReader { Standard(CompositeReader), Github(CompositeReader>), } @@ -441,12 +425,12 @@ impl FileReader for OrgChildReader { } } -pub(super) struct MesaRoot { +pub struct MesaRoot { orgs: Vec<(OsString, OrgChildDP)>, } impl MesaRoot { - pub(super) fn new(orgs: Vec<(OsString, OrgChildDP)>) -> Self { + pub fn new(orgs: Vec<(OsString, OrgChildDP)>) -> Self { Self { orgs } } } From 36d9fea53f29b38215e249c52f8bf0b01cf98f3c Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Fri, 20 Feb 2026 16:20:10 -0800 Subject: [PATCH 24/24] bug fixes --- lib/cache/async_backed.rs | 72 ++++++++++++++---------- lib/fs/async_fs.rs | 79 +++++++++++++++++--------- lib/fs/composite.rs | 107 +++++++++++++++--------------------- lib/fs/dcache.rs | 76 ++++++++++++++++++++----- lib/fs/fuser.rs | 9 ++- src/fs/mescloud/repo.rs | 7 ++- src/fs/mescloud/roots.rs | 7 +++ tests/dcache_correctness.rs | 54 +++++++++++++++--- 8 files changed, 270 insertions(+), 141 deletions(-) diff --git a/lib/cache/async_backed.rs b/lib/cache/async_backed.rs index 8f15803b..273bcd39 100644 --- a/lib/cache/async_backed.rs +++ b/lib/cache/async_backed.rs @@ -7,6 +7,7 @@ //! Note that this cache does not support automatic eviction. use std::panic::AssertUnwindSafe; +use std::sync::atomic::{AtomicU64, Ordering}; use std::{fmt::Debug, future::Future, hash::Hash, pin::Pin}; use futures::FutureExt as _; @@ -17,10 +18,12 @@ type SharedFut = Shared> + Send>>>; /// Two-state slot: `InFlight` while a factory future is running, then promoted to `Ready` once /// the future completes. /// -/// The `InFlight` variant holds a `Shared<..., Output = Option>` where `None` signals that the -/// factory panicked (caught by `catch_unwind`). On `None`, callers remove the entry and retry. +/// The `InFlight` variant holds a generation counter and a `Shared<..., Output = Option>` +/// where `None` signals that the factory panicked (caught by `catch_unwind`). On `None`, callers +/// remove the entry only if the generation matches, avoiding destruction of a valid re-inserted +/// entry. enum Slot { - InFlight(SharedFut), + InFlight(u64, SharedFut), Ready(V), } @@ -30,6 +33,7 @@ enum Slot { /// invocation of the factory runs. All callers receive a clone of the result. pub struct FutureBackedCache { map: scc::HashMap>, + next_gen: AtomicU64, } impl Default for FutureBackedCache @@ -40,6 +44,7 @@ where fn default() -> Self { Self { map: scc::HashMap::default(), + next_gen: AtomicU64::new(0), } } } @@ -69,14 +74,14 @@ where .map .read_async(&key, |_, slot| match slot { Slot::Ready(v) => Ok(v.clone()), - Slot::InFlight(shared) => Err(shared.clone()), + Slot::InFlight(generation, shared) => Err((*generation, shared.clone())), }) .await; match existing { Some(Ok(v)) => return v, - Some(Err(shared)) => { - if let Some(v) = self.await_shared(&key, shared).await { + Some(Err((generation, shared))) => { + if let Some(v) = self.await_shared(&key, generation, shared).await { return v; } // Factory panicked; entry removed. Fall through to re-insert below. @@ -85,20 +90,21 @@ where } // Slow path: use entry_async for atomic check-and-insert. - let shared = match self.map.entry_async(key.clone()).await { + let (generation, shared) = match self.map.entry_async(key.clone()).await { scc::hash_map::Entry::Occupied(occ) => match occ.get() { Slot::Ready(v) => return v.clone(), - Slot::InFlight(shared) => shared.clone(), + Slot::InFlight(g, shared) => (*g, shared.clone()), }, scc::hash_map::Entry::Vacant(vac) => { + let generation = self.next_gen.fetch_add(1, Ordering::Relaxed); let shared = Self::make_shared(factory); let ret = shared.clone(); - vac.insert_entry(Slot::InFlight(shared)); - ret + vac.insert_entry(Slot::InFlight(generation, shared)); + (generation, ret) } }; - if let Some(v) = self.await_shared(&key, shared).await { + if let Some(v) = self.await_shared(&key, generation, shared).await { return v; } @@ -124,14 +130,14 @@ where .map .read_async(&key, |_, slot| match slot { Slot::Ready(v) => Ok(v.clone()), - Slot::InFlight(shared) => Err(shared.clone()), + Slot::InFlight(generation, shared) => Err((*generation, shared.clone())), }) .await; match existing { Some(Ok(v)) => return Ok(v), - Some(Err(shared)) => { - if let Some(v) = self.await_shared(&key, shared).await { + Some(Err((generation, shared))) => { + if let Some(v) = self.await_shared(&key, generation, shared).await { return Ok(v); } // Factory panicked; entry was removed. Fall through to run our own factory. @@ -147,10 +153,13 @@ where match self.map.entry_async(key).await { scc::hash_map::Entry::Occupied(occ) => match occ.get() { Slot::Ready(v) => Ok(v.clone()), - Slot::InFlight(shared) => Ok(self - .await_shared(occ.key(), shared.clone()) - .await - .unwrap_or(val)), + Slot::InFlight(g, shared) => { + let generation = *g; + Ok(self + .await_shared(occ.key(), generation, shared.clone()) + .await + .unwrap_or(val)) + } }, scc::hash_map::Entry::Vacant(vac) => { vac.insert_entry(Slot::Ready(val.clone())); @@ -170,25 +179,30 @@ where .map .read_async(key, |_, slot| match slot { Slot::Ready(v) => Ok(v.clone()), - Slot::InFlight(shared) => Err(shared.clone()), + Slot::InFlight(generation, shared) => Err((*generation, shared.clone())), }) .await; match existing { Some(Ok(v)) => Some(v), - Some(Err(shared)) => self.await_shared(key, shared).await, + Some(Err((generation, shared))) => self.await_shared(key, generation, shared).await, None => None, } } /// Await a `Shared` future, handle promotion to `Ready`, and handle panic recovery. /// + /// The `observed_gen` parameter is the generation of the `InFlight` slot that was read. + /// On panic recovery, only the entry with this exact generation is removed, preventing + /// destruction of a valid entry re-inserted by a recovered thread. + /// /// Returns `Some(v)` on success. Returns `None` if the factory panicked, after removing /// the poisoned entry from the map. - async fn await_shared(&self, key: &K, shared: SharedFut) -> Option { + async fn await_shared(&self, key: &K, observed_gen: u64, shared: SharedFut) -> Option { let mut guard = PromoteGuard { map: &self.map, key, + observed_gen, value: None, }; @@ -199,7 +213,7 @@ where self.map .update_async(key, |_, slot| { - if matches!(slot, Slot::InFlight(_)) { + if matches!(slot, Slot::InFlight(g, _) if *g == observed_gen) { *slot = Slot::Ready(v.clone()); } }) @@ -209,11 +223,11 @@ where Some(v) } else { // Factory panicked. Remove the poisoned InFlight entry so the next caller - // can retry. - drop( - self.map - .remove_if_sync(key, |slot| matches!(slot, Slot::InFlight(_))), - ); + // can retry — but only if the generation matches our observation. + drop(self.map.remove_if_sync( + key, + |slot| matches!(slot, Slot::InFlight(g, _) if *g == observed_gen), + )); None } } @@ -270,6 +284,7 @@ where { map: &'a scc::HashMap>, key: &'a K, + observed_gen: u64, value: Option, } @@ -280,8 +295,9 @@ where { fn drop(&mut self) { if let Some(v) = self.value.take() { + let generation = self.observed_gen; self.map.update_sync(self.key, |_, slot| { - if matches!(slot, Slot::InFlight(_)) { + if matches!(slot, Slot::InFlight(g, _) if *g == generation) { *slot = Slot::Ready(v); } }); diff --git a/lib/fs/async_fs.rs b/lib/fs/async_fs.rs index 1f81a87e..a13a6617 100644 --- a/lib/fs/async_fs.rs +++ b/lib/fs/async_fs.rs @@ -64,6 +64,12 @@ pub trait FsDataProvider: Clone + Send + Sync + 'static { inode: INode, flags: OpenFlags, ) -> impl Future> + Send; + + /// Called when the kernel forgets an inode (refcount reaches zero). + /// + /// Implementations should clean up any internal mappings for the given + /// address (e.g. bridge maps, path maps). The default is a no-op. + fn forget(&self, _addr: InodeAddr) {} } /// Zero-sized tag whose [`StatelessDrop`] implementation automatically evicts @@ -76,6 +82,15 @@ impl<'a> StatelessDrop<&'a FutureBackedCache, InodeAddr> for I } } +impl<'a, DP: FsDataProvider> StatelessDrop<(&'a FutureBackedCache, DP), InodeAddr> + for InodeForget +{ + fn delete(ctx: &(&'a FutureBackedCache, DP), key: &InodeAddr) { + ctx.0.remove_sync(key); + ctx.1.forget(*key); + } +} + /// A looked-up inode whose lifetime must be managed by the caller. /// /// Each `TrackedINode` returned by [`AsyncFs::lookup`] represents one @@ -283,14 +298,14 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { // Inode was evicted from the table — fall through to the slow path. let name_owned = name.to_os_string(); - let name_for_cache = name_owned.clone(); let lookup_key = (parent.0, name_owned.clone()); let dp = self.data_provider.clone(); let child = self .lookup_cache - .get_or_try_init(lookup_key, || async move { - dp.lookup(parent_ino, &name_owned).await + .get_or_try_init(lookup_key, || { + let name_for_dp = name_owned.clone(); + async move { dp.lookup(parent_ino, &name_for_dp).await } }) .await?; @@ -301,7 +316,7 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { self.directory_cache .insert( parent, - name_for_cache, + name_owned, LoadedAddr(child.addr), matches!(child.itype, INodeType::Directory), ) @@ -366,12 +381,6 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { /// returns `true` (indicating the caller's buffer is full), iteration /// stops early. /// - /// # Concurrency - /// - /// The `is_populated` check-then-populate is **not** atomic. If two - /// concurrent callers invoke `readdir` for the same parent, both may call - /// `dp.readdir()` and insert duplicate children. - /// /// TODO(MES-746): Implement `opendir` and `releasedir` to snapshot directory contents and /// avoid racing with `lookup`/`createfile`. pub async fn readdir( @@ -380,28 +389,48 @@ impl<'tbl, DP: FsDataProvider> AsyncFs<'tbl, DP> { offset: u64, mut filler: impl FnMut(DirEntry<'_>, u64) -> bool, ) -> Result<(), std::io::Error> { + use crate::fs::dcache::PopulateStatus; + let parent_inode = self.loaded_inode(parent).await?; if parent_inode.itype != INodeType::Directory { return Err(std::io::Error::from_raw_os_error(libc::ENOTDIR)); } // Populate the directory cache on first readdir for this parent. - if !self.directory_cache.is_populated(parent) { - let children = self.data_provider.readdir(parent_inode).await?; - for (name, child_inode) in children { - self.inode_table - .get_or_init(child_inode.addr, || async move { child_inode }) - .await; - self.directory_cache - .insert( - parent, - name, - LoadedAddr(child_inode.addr), - child_inode.itype == INodeType::Directory, - ) - .await; + // Uses a three-state CAS gate to prevent duplicate dp.readdir() calls. + loop { + match self.directory_cache.try_claim_populate(parent) { + PopulateStatus::Claimed => { + match self.data_provider.readdir(parent_inode).await { + Ok(children) => { + for (name, child_inode) in children { + self.inode_table + .get_or_init(child_inode.addr, || async move { child_inode }) + .await; + self.directory_cache + .insert( + parent, + name, + LoadedAddr(child_inode.addr), + child_inode.itype == INodeType::Directory, + ) + .await; + } + self.directory_cache.finish_populate(parent); + } + Err(e) => { + self.directory_cache.abort_populate(parent); + return Err(e); + } + } + break; + } + PopulateStatus::InProgress => { + self.directory_cache.wait_populated(parent).await; + // Re-check: the populator may have aborted. + } + PopulateStatus::Done => break, } - self.directory_cache.mark_populated(parent); } let mut children = self.directory_cache.readdir(parent).await; diff --git a/lib/fs/composite.rs b/lib/fs/composite.rs index bf063307..7d9748c6 100644 --- a/lib/fs/composite.rs +++ b/lib/fs/composite.rs @@ -128,7 +128,7 @@ impl FileReader for CompositeReader { struct ChildSlot { inner: Arc>, - bridge: ConcurrentBridge, + bridge: Arc, } struct CompositeFsInner { @@ -237,7 +237,7 @@ impl CompositeFs { table.insert_sync(desc.root_ino.addr, desc.root_ino); let child_inner = Arc::new(ChildInner::create(table, desc.provider.clone())); - let bridge = ConcurrentBridge::new(); + let bridge = Arc::new(ConcurrentBridge::new()); bridge.insert(outer_ino, desc.root_ino.addr); drop(self.inner.slots.insert_sync( @@ -261,51 +261,29 @@ impl CompositeFs { where R::ChildDP: Clone, { - // Fast path: already registered by name. match self.inner.name_to_slot.entry_sync(desc.name.clone()) { - scc::hash_map::Entry::Occupied(occ) => { + scc::hash_map::Entry::Occupied(mut occ) => { let slot_idx = *occ.get(); - // Return existing outer address for this child's root inode. - if let Some(outer) = self + // Extract bridge Arc from the slot guard, then query outside. + let bridge = self .inner .slots - .read_sync(&slot_idx, |_, slot| { - slot.bridge.backward(desc.root_ino.addr) - }) - .flatten() - { + .read_sync(&slot_idx, |_, slot| Arc::clone(&slot.bridge)); + if let Some(outer) = bridge.and_then(|b| b.backward(desc.root_ino.addr)) { return outer; } - // Slot exists but bridge has no mapping — should not happen, - // but fall through to create a fresh slot below. - // (Remove stale name entry so the vacant path can re-insert.) - // - // Race window: between `drop(occ)` and the `remove_sync` below, - // another thread could read the stale entry and resolve to a - // broken slot. In the worst case two threads create separate - // slots for the same child — the last writer to `name_to_slot` - // wins and the other slot becomes orphaned. This is functionally - // harmless: the orphaned slot is never reached via name lookup - // and will not serve any future requests. - drop(occ); - self.inner.name_to_slot.remove_sync(&desc.name); + // Slot exists but bridge has no mapping — replace in-place + // while still holding the entry guard to prevent races. + let (outer_ino, new_slot_idx) = self.create_child_slot(desc); + *occ.get_mut() = new_slot_idx; + outer_ino } scc::hash_map::Entry::Vacant(vac) => { let (outer_ino, slot_idx) = self.create_child_slot(desc); vac.insert_entry(slot_idx); - return outer_ino; + outer_ino } } - - // Fallback: name was stale, create fresh. This path is rare. - let (outer_ino, slot_idx) = self.create_child_slot(desc); - drop( - self.inner - .name_to_slot - .insert_sync(desc.name.clone(), slot_idx), - ); - - outer_ino } } @@ -334,12 +312,16 @@ where .read_sync(&parent.addr, |_, &v| v) .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; - // Extract Arc and inner parent address under the guard. - let (child, inner_parent) = self + // Extract Arc, bridge, and inner parent address under the guard. + let (child, bridge, inner_parent) = self .inner .slots .read_sync(&slot_idx, |_, slot| { - (Arc::clone(&slot.inner), slot.bridge.forward(parent.addr)) + ( + Arc::clone(&slot.inner), + Arc::clone(&slot.bridge), + slot.bridge.forward(parent.addr), + ) }) .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; @@ -353,17 +335,10 @@ where .await?; let child_inode = tracked.inode; - // Translate inner address back to composite-level address. - let outer_ino = self - .inner - .slots - .read_sync(&slot_idx, |_, slot| { - let next_ino = &self.inner.next_ino; - slot.bridge.backward_or_insert(child_inode.addr, || { - next_ino.fetch_add(1, Ordering::Relaxed) - }) - }) - .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + // Translate inner address back to composite-level address (outside scc guard). + let outer_ino = bridge.backward_or_insert(child_inode.addr, || { + self.inner.next_ino.fetch_add(1, Ordering::Relaxed) + }); let _ = self.inner.addr_to_slot.insert_sync(outer_ino, slot_idx); @@ -390,11 +365,15 @@ where .read_sync(&parent.addr, |_, &v| v) .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; - let (child, inner_parent) = self + let (child, bridge, inner_parent) = self .inner .slots .read_sync(&slot_idx, |_, slot| { - (Arc::clone(&slot.inner), slot.bridge.forward(parent.addr)) + ( + Arc::clone(&slot.inner), + Arc::clone(&slot.bridge), + slot.bridge.forward(parent.addr), + ) }) .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; @@ -411,19 +390,12 @@ where }) .await?; - // Translate all inner addresses to composite-level addresses. + // Translate all inner addresses to composite-level addresses (outside scc guard). let mut entries = Vec::with_capacity(child_entries.len()); for (name, child_inode) in child_entries { - let outer_ino = self - .inner - .slots - .read_sync(&slot_idx, |_, slot| { - let next_ino = &self.inner.next_ino; - slot.bridge.backward_or_insert(child_inode.addr, || { - next_ino.fetch_add(1, Ordering::Relaxed) - }) - }) - .ok_or_else(|| std::io::Error::from_raw_os_error(libc::ENOENT))?; + let outer_ino = bridge.backward_or_insert(child_inode.addr, || { + self.inner.next_ino.fetch_add(1, Ordering::Relaxed) + }); let _ = self.inner.addr_to_slot.insert_sync(outer_ino, slot_idx); entries.push(( @@ -462,4 +434,15 @@ where inner: open_file.reader, }) } + + fn forget(&self, addr: InodeAddr) { + if addr == Self::ROOT_INO { + return; + } + if let Some((_, slot_idx)) = self.inner.addr_to_slot.remove_sync(&addr) { + self.inner + .slots + .read_sync(&slot_idx, |_, slot| slot.bridge.remove_by_outer(addr)); + } + } } diff --git a/lib/fs/dcache.rs b/lib/fs/dcache.rs index 4870a401..aea5bb2c 100644 --- a/lib/fs/dcache.rs +++ b/lib/fs/dcache.rs @@ -1,6 +1,6 @@ use std::ffi::{OsStr, OsString}; use std::sync::Arc; -use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::atomic::{AtomicU8, Ordering}; use crate::fs::LoadedAddr; @@ -13,17 +13,32 @@ pub struct DValue { pub is_dir: bool, } +/// Population states for a directory. +const POPULATE_UNCLAIMED: u8 = 0; +const POPULATE_IN_PROGRESS: u8 = 1; +const POPULATE_DONE: u8 = 2; + +/// Result of attempting to claim a directory for population. +pub enum PopulateStatus { + /// This caller won the race and should populate the directory. + Claimed, + /// Another caller is currently populating; wait and re-check. + InProgress, + /// The directory is already fully populated. + Done, +} + /// Per-parent directory state holding child entries and a population flag. struct DirState { children: scc::HashMap, - populated: AtomicBool, + populated: AtomicU8, } impl DirState { fn new() -> Self { Self { children: scc::HashMap::new(), - populated: AtomicBool::new(false), + populated: AtomicU8::new(POPULATE_UNCLAIMED), } } } @@ -73,9 +88,7 @@ impl DCache { #[must_use] pub fn lookup(&self, parent_ino: LoadedAddr, name: &OsStr) -> Option { let state = self.dirs.read_sync(&parent_ino, |_, v| Arc::clone(v))?; - state - .children - .read_sync(&name.to_os_string(), |_, v| v.clone()) + state.children.read_sync(name, |_, v| v.clone()) } /// Atomically inserts or overwrites a child entry in the cache. @@ -107,17 +120,50 @@ impl DCache { entries } - /// Returns `true` if the directory at `parent_ino` has been fully populated. - #[must_use] - pub fn is_populated(&self, parent_ino: LoadedAddr) -> bool { - self.dirs - .read_sync(&parent_ino, |_, v| v.populated.load(Ordering::Acquire)) - .unwrap_or(false) + /// Atomically try to claim a directory for population. + /// + /// Uses `compare_exchange` on the three-state flag: + /// - `UNCLAIMED → IN_PROGRESS`: returns `Claimed` (caller should populate) + /// - Already `IN_PROGRESS`: returns `InProgress` (caller should wait) + /// - Already `DONE`: returns `Done` (nothing to do) + pub fn try_claim_populate(&self, parent_ino: LoadedAddr) -> PopulateStatus { + let state = self.dir_state(parent_ino); + match state.populated.compare_exchange( + POPULATE_UNCLAIMED, + POPULATE_IN_PROGRESS, + Ordering::AcqRel, + Ordering::Acquire, + ) { + Ok(_) => PopulateStatus::Claimed, + Err(POPULATE_IN_PROGRESS) => PopulateStatus::InProgress, + Err(_) => PopulateStatus::Done, + } } - /// Marks the directory at `parent_ino` as fully populated. - pub fn mark_populated(&self, parent_ino: LoadedAddr) { + /// Mark a directory as fully populated after successful population. + pub fn finish_populate(&self, parent_ino: LoadedAddr) { let state = self.dir_state(parent_ino); - state.populated.store(true, Ordering::Release); + state.populated.store(POPULATE_DONE, Ordering::Release); + } + + /// Abort a population attempt, resetting back to unclaimed so another + /// caller can retry. + pub fn abort_populate(&self, parent_ino: LoadedAddr) { + let state = self.dir_state(parent_ino); + state.populated.store(POPULATE_UNCLAIMED, Ordering::Release); + } + + /// Wait until a directory is no longer in the `InProgress` state. + pub async fn wait_populated(&self, parent_ino: LoadedAddr) { + loop { + let current = self + .dirs + .read_sync(&parent_ino, |_, v| v.populated.load(Ordering::Acquire)) + .unwrap_or(POPULATE_UNCLAIMED); + if current != POPULATE_IN_PROGRESS { + return; + } + tokio::task::yield_now().await; + } } } diff --git a/lib/fs/fuser.rs b/lib/fs/fuser.rs index 886a5f6f..7a9bed24 100644 --- a/lib/fs/fuser.rs +++ b/lib/fs/fuser.rs @@ -83,12 +83,16 @@ mod inner { /// /// Both `ward` and `fs` borrow from `table`. The ward manages inode /// refcounts; the fs serves lookup/readdir/open/read operations. + /// + /// The ward context is `(&table, DP)` so that [`InodeForget`] can both + /// remove the inode from the table and call `dp.forget()` to clean up + /// provider-internal maps (bridge mappings, path maps, etc.). #[self_referencing] pub(super) struct FuseBridgeInner { table: FutureBackedCache, #[borrows(table)] #[not_covariant] - ward: DropWard<&'this FutureBackedCache, InodeAddr, InodeForget>, + ward: DropWard<(&'this FutureBackedCache, DP), InodeAddr, InodeForget>, #[borrows(table)] #[covariant] fs: AsyncFs<'this, DP>, @@ -96,9 +100,10 @@ mod inner { impl FuseBridgeInner { pub(super) fn create(table: FutureBackedCache, provider: DP) -> Self { + let ward_provider = provider.clone(); FuseBridgeInnerBuilder { table, - ward_builder: |tbl| DropWard::new(tbl), + ward_builder: |tbl| DropWard::new((tbl, ward_provider)), fs_builder: |tbl| AsyncFs::new_preseeded(provider, tbl), } .build() diff --git a/src/fs/mescloud/repo.rs b/src/fs/mescloud/repo.rs index f13ead88..f2041d10 100644 --- a/src/fs/mescloud/repo.rs +++ b/src/fs/mescloud/repo.rs @@ -71,8 +71,7 @@ impl MesRepoProvider { } /// Remove the path entry for an inode. Called during forget/cleanup. - #[expect(dead_code, reason = "will be needed when child forget is implemented")] - pub(super) fn remove_path(&self, addr: InodeAddr) { + fn remove_path(&self, addr: InodeAddr) { self.inner.path_map.remove_sync(&addr); } @@ -276,6 +275,10 @@ impl FsDataProvider for MesRepoProvider { }) } } + + fn forget(&self, addr: InodeAddr) { + self.remove_path(addr); + } } pub struct MesFileReader { diff --git a/src/fs/mescloud/roots.rs b/src/fs/mescloud/roots.rs index 8893d379..7c8701db 100644 --- a/src/fs/mescloud/roots.rs +++ b/src/fs/mescloud/roots.rs @@ -389,6 +389,13 @@ impl FsDataProvider for OrgChildDP { } } } + + fn forget(&self, addr: InodeAddr) { + match self { + Self::Standard(c) => c.forget(addr), + Self::Github(c) => c.forget(addr), + } + } } pub enum OrgChildReader { diff --git a/tests/dcache_correctness.rs b/tests/dcache_correctness.rs index 59731d28..34dcf088 100644 --- a/tests/dcache_correctness.rs +++ b/tests/dcache_correctness.rs @@ -3,7 +3,7 @@ use std::ffi::{OsStr, OsString}; use git_fs::fs::LoadedAddr; -use git_fs::fs::dcache::DCache; +use git_fs::fs::dcache::{DCache, PopulateStatus}; #[tokio::test] async fn lookup_returns_none_for_missing_entry() { @@ -51,16 +51,53 @@ async fn readdir_empty_parent_returns_empty() { } #[tokio::test] -async fn is_populated_false_by_default() { +async fn try_claim_populate_unclaimed_returns_claimed() { let cache = DCache::new(); - assert!(!cache.is_populated(LoadedAddr(1))); + assert!(matches!( + cache.try_claim_populate(LoadedAddr(1)), + PopulateStatus::Claimed + )); } #[tokio::test] -async fn mark_populated_then_check() { +async fn finish_populate_then_claim_returns_done() { let cache = DCache::new(); - cache.mark_populated(LoadedAddr(1)); - assert!(cache.is_populated(LoadedAddr(1))); + assert!(matches!( + cache.try_claim_populate(LoadedAddr(1)), + PopulateStatus::Claimed + )); + cache.finish_populate(LoadedAddr(1)); + assert!(matches!( + cache.try_claim_populate(LoadedAddr(1)), + PopulateStatus::Done + )); +} + +#[tokio::test] +async fn double_claim_returns_in_progress() { + let cache = DCache::new(); + assert!(matches!( + cache.try_claim_populate(LoadedAddr(1)), + PopulateStatus::Claimed + )); + assert!(matches!( + cache.try_claim_populate(LoadedAddr(1)), + PopulateStatus::InProgress + )); +} + +#[tokio::test] +async fn abort_populate_allows_reclaim() { + let cache = DCache::new(); + assert!(matches!( + cache.try_claim_populate(LoadedAddr(1)), + PopulateStatus::Claimed + )); + cache.abort_populate(LoadedAddr(1)); + assert!(matches!( + cache.try_claim_populate(LoadedAddr(1)), + PopulateStatus::Claimed + )); } #[tokio::test] @@ -70,7 +107,10 @@ async fn insert_does_not_mark_populated() { .insert(LoadedAddr(1), OsString::from("foo"), LoadedAddr(10), false) .await; assert!( - !cache.is_populated(LoadedAddr(1)), + matches!( + cache.try_claim_populate(LoadedAddr(1)), + PopulateStatus::Claimed + ), "insert alone should not mark a directory as populated" ); }