From 0c543860c5ea032dddb4b6308b50778b0ba34d5e Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Mon, 1 Dec 2025 12:22:29 -0500 Subject: [PATCH 01/95] Move TransactionManager to db4-storage --- db4-graph/src/lib.rs | 43 ++----------------- db4-storage/src/lib.rs | 3 ++ db4-storage/src/transaction/mod.rs | 40 +++++++++++++++++ raphtory-storage/src/mutation/addition_ops.rs | 4 +- .../src/mutation/addition_ops_ext.rs | 4 +- raphtory/src/db/api/storage/storage.rs | 4 +- 6 files changed, 52 insertions(+), 46 deletions(-) create mode 100644 db4-storage/src/transaction/mod.rs diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index 9250e8f485..40a97ea1b0 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -1,10 +1,7 @@ use std::{ io, path::{Path, PathBuf}, - sync::{ - atomic::{self, AtomicU64, AtomicUsize}, - Arc, - }, + sync::{atomic::AtomicUsize, Arc}, }; use raphtory_api::core::{ @@ -26,8 +23,8 @@ use storage::{ }, persist::strategy::{Config, PersistentStrategy}, resolver::GIDResolverOps, - wal::{GraphWal, TransactionID, Wal}, - Extension, GIDResolver, Layer, ReadLockedLayer, WalImpl, ES, NS, + wal::Wal, + Extension, GIDResolver, Layer, ReadLockedLayer, TransactionManager, WalImpl, ES, NS, }; use tempfile::TempDir; @@ -87,40 +84,6 @@ impl<'a> From<&'a Path> for GraphDir { } } -#[derive(Debug)] -pub struct TransactionManager { - last_transaction_id: AtomicU64, - wal: Arc, -} - -impl TransactionManager { - const STARTING_TRANSACTION_ID: TransactionID = 1; - - pub fn new(wal: Arc) -> Self { - Self { - last_transaction_id: AtomicU64::new(Self::STARTING_TRANSACTION_ID), - wal, - } - } - - pub fn load(self, last_transaction_id: TransactionID) { - self.last_transaction_id - .store(last_transaction_id, atomic::Ordering::SeqCst) - } - - pub fn begin_transaction(&self) -> TransactionID { - let transaction_id = self - .last_transaction_id - .fetch_add(1, atomic::Ordering::SeqCst); - self.wal.log_begin_transaction(transaction_id).unwrap(); - transaction_id - } - - pub fn end_transaction(&self, transaction_id: TransactionID) { - self.wal.log_end_transaction(transaction_id).unwrap(); - } -} - impl Default for TemporalGraph { fn default() -> Self { Self::new(Extension::default()).unwrap() diff --git a/db4-storage/src/lib.rs b/db4-storage/src/lib.rs index 28584817d7..e1c3b8559e 100644 --- a/db4-storage/src/lib.rs +++ b/db4-storage/src/lib.rs @@ -23,6 +23,7 @@ use crate::{ node_entry::{MemNodeEntry, MemNodeRef}, }, wal::no_wal::NoWal, + transaction::TransactionManager as GenericTransactionManager, }; use parking_lot::RwLock; use raphtory_api::core::entities::{EID, VID}; @@ -36,6 +37,7 @@ pub mod persist; pub mod properties; pub mod resolver; pub mod segments; +pub mod transaction; pub mod utils; pub mod wal; @@ -46,6 +48,7 @@ pub type Layer

= GraphStore, ES

, P>; pub type WalImpl = NoWal; pub type GIDResolver = MappingResolver; +pub type TransactionManager = GenericTransactionManager; pub type ReadLockedLayer

= ReadLockedGraphStore, ES

, P>; pub type ReadLockedNodes

= ReadLockedNodeStorage, P>; diff --git a/db4-storage/src/transaction/mod.rs b/db4-storage/src/transaction/mod.rs new file mode 100644 index 0000000000..a7b175af09 --- /dev/null +++ b/db4-storage/src/transaction/mod.rs @@ -0,0 +1,40 @@ +use std::sync::{ + Arc, + atomic::{self, AtomicU64}, +}; + +use crate::wal::{GraphWal, TransactionID}; + +#[derive(Debug)] +pub struct TransactionManager { + last_transaction_id: AtomicU64, + wal: Arc, +} + +impl TransactionManager { + const STARTING_TRANSACTION_ID: TransactionID = 1; + + pub fn new(wal: Arc) -> Self { + Self { + last_transaction_id: AtomicU64::new(Self::STARTING_TRANSACTION_ID), + wal, + } + } + + pub fn load(self, last_transaction_id: TransactionID) { + self.last_transaction_id + .store(last_transaction_id, atomic::Ordering::SeqCst) + } + + pub fn begin_transaction(&self) -> TransactionID { + let transaction_id = self + .last_transaction_id + .fetch_add(1, atomic::Ordering::SeqCst); + self.wal.log_begin_transaction(transaction_id).unwrap(); + transaction_id + } + + pub fn end_transaction(&self, transaction_id: TransactionID) { + self.wal.log_end_transaction(transaction_id).unwrap(); + } +} diff --git a/raphtory-storage/src/mutation/addition_ops.rs b/raphtory-storage/src/mutation/addition_ops.rs index 162eba66f9..a6beb2c03c 100644 --- a/raphtory-storage/src/mutation/addition_ops.rs +++ b/raphtory-storage/src/mutation/addition_ops.rs @@ -5,7 +5,7 @@ use crate::{ MutationError, }, }; -use db4_graph::{TransactionManager, WriteLockedGraph}; +use db4_graph::WriteLockedGraph; use raphtory_api::{ core::{ entities::{ @@ -20,7 +20,7 @@ use raphtory_api::{ inherit::Base, }; use raphtory_core::entities::{nodes::node_ref::NodeRef, ELID}; -use storage::{Extension, WalImpl}; +use storage::{Extension, TransactionManager, WalImpl}; pub trait InternalAdditionOps { type Error: From; diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index e770f8a537..e7eb5ee13b 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -2,7 +2,7 @@ use crate::mutation::{ addition_ops::{EdgeWriteLock, InternalAdditionOps, SessionAdditionOps}, MutationError, }; -use db4_graph::{TemporalGraph, TransactionManager, WriteLockedGraph}; +use db4_graph::{TemporalGraph, WriteLockedGraph}; use raphtory_api::core::{ entities::properties::{ meta::{Meta, NODE_ID_IDX, NODE_TYPE_IDX}, @@ -23,7 +23,7 @@ use storage::{ persist::strategy::PersistentStrategy, properties::props_meta_writer::PropsMetaWriter, resolver::GIDResolverOps, - Extension, WalImpl, ES, NS, + Extension, TransactionManager, WalImpl, ES, NS, }; pub struct WriteS<'a, EXT: PersistentStrategy, ES = ES>> { diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index adf3d02c24..efaf78f58c 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -6,7 +6,7 @@ use crate::{ }, errors::GraphError, }; -use db4_graph::{TemporalGraph, TransactionManager, WriteLockedGraph}; +use db4_graph::{TemporalGraph, WriteLockedGraph}; use raphtory_api::core::{ entities::{ properties::{ @@ -35,7 +35,7 @@ use std::{ path::Path, sync::Arc, }; -use storage::{Extension, WalImpl}; +use storage::{Extension, TransactionManager, WalImpl}; #[cfg(feature = "search")] use { From 92f279353261bf5bf466020b420c58a0e0f441c6 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Mon, 1 Dec 2025 15:54:11 -0500 Subject: [PATCH 02/95] Create DurabilityOps --- raphtory-storage/src/mutation/addition_ops.rs | 26 +------------ .../src/mutation/addition_ops_ext.rs | 3 ++ .../src/mutation/durability_ops.rs | 37 +++++++++++++++++++ raphtory-storage/src/mutation/mod.rs | 3 ++ raphtory/src/db/api/mutation/addition_ops.rs | 7 ++-- raphtory/src/db/api/storage/storage.rs | 19 ++++++---- raphtory/src/db/graph/edge.rs | 4 +- 7 files changed, 62 insertions(+), 37 deletions(-) create mode 100644 raphtory-storage/src/mutation/durability_ops.rs diff --git a/raphtory-storage/src/mutation/addition_ops.rs b/raphtory-storage/src/mutation/addition_ops.rs index a6beb2c03c..3ac593e091 100644 --- a/raphtory-storage/src/mutation/addition_ops.rs +++ b/raphtory-storage/src/mutation/addition_ops.rs @@ -20,7 +20,7 @@ use raphtory_api::{ inherit::Base, }; use raphtory_core::entities::{nodes::node_ref::NodeRef, ELID}; -use storage::{Extension, TransactionManager, WalImpl}; +use storage::{Extension}; pub trait InternalAdditionOps { type Error: From; @@ -91,12 +91,6 @@ pub trait InternalAdditionOps { meta: &Meta, props: impl Iterator, ) -> Result>, Self::Error>; - - /// TODO: Not sure the below methods belong here... - - fn transaction_manager(&self) -> &TransactionManager; - - fn wal(&self) -> &WalImpl; } pub trait EdgeWriteLock: Send + Sync { @@ -294,14 +288,6 @@ impl InternalAdditionOps for GraphStorage { Ok(self.mutable()?.validate_gids(gids)?) } - fn transaction_manager(&self) -> &TransactionManager { - self.mutable().unwrap().transaction_manager.as_ref() - } - - fn wal(&self) -> &WalImpl { - self.mutable().unwrap().wal.as_ref() - } - fn resolve_node_and_type( &self, id: NodeRef, @@ -411,16 +397,6 @@ where self.base().validate_gids(gids) } - #[inline] - fn transaction_manager(&self) -> &TransactionManager { - self.base().transaction_manager() - } - - #[inline] - fn wal(&self) -> &WalImpl { - self.base().wal() - } - fn resolve_node_and_type( &self, id: NodeRef, diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index e7eb5ee13b..5b1cdcf5a1 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -1,5 +1,6 @@ use crate::mutation::{ addition_ops::{EdgeWriteLock, InternalAdditionOps, SessionAdditionOps}, + durability_ops::DurabilityOps, MutationError, }; use db4_graph::{TemporalGraph, WriteLockedGraph}; @@ -376,7 +377,9 @@ impl InternalAdditionOps for TemporalGraph { Ok(prop_ids) } } +} +impl DurabilityOps for TemporalGraph { fn transaction_manager(&self) -> &TransactionManager { &self.transaction_manager } diff --git a/raphtory-storage/src/mutation/durability_ops.rs b/raphtory-storage/src/mutation/durability_ops.rs new file mode 100644 index 0000000000..be6288a957 --- /dev/null +++ b/raphtory-storage/src/mutation/durability_ops.rs @@ -0,0 +1,37 @@ +use storage::{TransactionManager, WalImpl}; +use crate::graph::graph::GraphStorage; +use raphtory_api::inherit::Base; + +/// Accessor methods for transactions and write-ahead logging. +pub trait DurabilityOps { + fn transaction_manager(&self) -> &TransactionManager; + + fn wal(&self) -> &WalImpl; +} + +impl DurabilityOps for GraphStorage { + fn transaction_manager(&self) -> &TransactionManager { + self.mutable().unwrap().transaction_manager.as_ref() + } + + fn wal(&self) -> &WalImpl { + self.mutable().unwrap().wal.as_ref() + } +} + +pub trait InheritDurabilityOps: Base {} + +impl DurabilityOps for G +where + G::Base: DurabilityOps, +{ + #[inline] + fn transaction_manager(&self) -> &TransactionManager { + self.base().transaction_manager() + } + + #[inline] + fn wal(&self) -> &WalImpl { + self.base().wal() + } +} diff --git a/raphtory-storage/src/mutation/mod.rs b/raphtory-storage/src/mutation/mod.rs index 8dd4cf8157..835f84afca 100644 --- a/raphtory-storage/src/mutation/mod.rs +++ b/raphtory-storage/src/mutation/mod.rs @@ -4,6 +4,7 @@ use crate::{ mutation::{ addition_ops::InheritAdditionOps, deletion_ops::InheritDeletionOps, property_addition_ops::InheritPropertyAdditionOps, + durability_ops::InheritDurabilityOps, }, }; use parking_lot::RwLockWriteGuard; @@ -31,6 +32,7 @@ pub mod addition_ops; pub mod addition_ops_ext; pub mod deletion_ops; pub mod property_addition_ops; +pub mod durability_ops; pub type NodeWriterT<'a> = NodeWriter<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS>; pub type EdgeWriterT<'a> = EdgeWriter<'a, RwLockWriteGuard<'a, MemEdgeSegment>, ES>; @@ -70,5 +72,6 @@ pub trait InheritMutationOps: Base {} impl InheritAdditionOps for G {} impl InheritPropertyAdditionOps for G {} impl InheritDeletionOps for G {} +impl InheritDurabilityOps for G {} impl InheritMutationOps for Arc {} diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index ed319d3d97..351627bfbb 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -15,9 +15,10 @@ use crate::{ }; use raphtory_api::core::entities::properties::prop::Prop; use raphtory_storage::mutation::addition_ops::{EdgeWriteLock, InternalAdditionOps}; +use raphtory_storage::mutation::durability_ops::DurabilityOps; use storage::wal::{GraphWal, Wal}; -pub trait AdditionOps: StaticGraphViewOps + InternalAdditionOps> { +pub trait AdditionOps: StaticGraphViewOps + InternalAdditionOps> + DurabilityOps { // TODO: Probably add vector reference here like add /// Add a node to the graph /// @@ -143,7 +144,7 @@ pub trait AdditionOps: StaticGraphViewOps + InternalAdditionOps> + StaticGraphViewOps> AdditionOps for G { +impl> + StaticGraphViewOps + DurabilityOps> AdditionOps for G { fn add_node< V: AsNodeRef, T: TryIntoInputTime, @@ -355,7 +356,7 @@ impl> + StaticGraphViewOps> Addit add_edge_op.store_src_node_info(src_id, src.as_node_ref().as_gid_ref().left()); add_edge_op.store_dst_node_info(dst_id, dst.as_node_ref().as_gid_ref().left()); - // Log transaction end + // Log transaction end. self.transaction_manager().end_transaction(transaction_id); // Flush all wal entries to disk. diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index efaf78f58c..3b1472d9e1 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -24,6 +24,7 @@ use raphtory_storage::{ layer_ops::InheritLayerOps, mutation::{ addition_ops::{EdgeWriteLock, InternalAdditionOps, SessionAdditionOps}, + durability_ops::DurabilityOps, addition_ops_ext::{UnlockedSession, WriteS}, deletion_ops::InternalDeletionOps, property_addition_ops::InternalPropertyAdditionOps, @@ -575,14 +576,6 @@ impl InternalAdditionOps for Storage { Ok(self.graph.validate_gids(gids)?) } - fn transaction_manager(&self) -> &TransactionManager { - self.graph.mutable().unwrap().transaction_manager.as_ref() - } - - fn wal(&self) -> &WalImpl { - self.graph.mutable().unwrap().wal.as_ref() - } - fn resolve_node_and_type( &self, id: NodeRef, @@ -592,6 +585,16 @@ impl InternalAdditionOps for Storage { } } +impl DurabilityOps for Storage { + fn transaction_manager(&self) -> &TransactionManager { + self.graph.mutable().unwrap().transaction_manager.as_ref() + } + + fn wal(&self) -> &WalImpl { + self.graph.mutable().unwrap().wal.as_ref() + } +} + impl InternalPropertyAdditionOps for Storage { type Error = GraphError; fn internal_add_properties( diff --git a/raphtory/src/db/graph/edge.rs b/raphtory/src/db/graph/edge.rs index 6e4add574f..c11781c251 100644 --- a/raphtory/src/db/graph/edge.rs +++ b/raphtory/src/db/graph/edge.rs @@ -40,6 +40,7 @@ use raphtory_storage::{ graph::edges::edge_storage_ops::EdgeStorageOps, mutation::{ addition_ops::{EdgeWriteLock, InternalAdditionOps}, + durability_ops::DurabilityOps, deletion_ops::InternalDeletionOps, property_addition_ops::InternalPropertyAdditionOps, }, @@ -176,7 +177,8 @@ impl< G: StaticGraphViewOps + InternalAdditionOps + InternalPropertyAdditionOps - + InternalDeletionOps, + + InternalDeletionOps + + DurabilityOps, > EdgeView { pub fn delete(&self, t: T, layer: Option<&str>) -> Result<(), GraphError> { From cff7a1e5634b420786098103b6670701ac4fdd87 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Mon, 1 Dec 2025 19:27:24 -0500 Subject: [PATCH 03/95] Remove wal from transaction manager --- db4-graph/src/lib.rs | 6 ++-- db4-storage/src/lib.rs | 2 -- db4-storage/src/transaction/mod.rs | 36 +++++++++---------- .../src/mutation/addition_ops_ext.rs | 2 +- .../src/mutation/durability_ops.rs | 2 +- raphtory/src/db/api/storage/storage.rs | 2 +- raphtory/src/db/replay/mod.rs | 8 +++-- 7 files changed, 29 insertions(+), 29 deletions(-) diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index 40a97ea1b0..a6656d5d9d 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -24,7 +24,7 @@ use storage::{ persist::strategy::{Config, PersistentStrategy}, resolver::GIDResolverOps, wal::Wal, - Extension, GIDResolver, Layer, ReadLockedLayer, TransactionManager, WalImpl, ES, NS, + Extension, GIDResolver, Layer, ReadLockedLayer, transaction::TransactionManager, WalImpl, ES, NS, }; use tempfile::TempDir; @@ -119,7 +119,7 @@ impl, ES = ES>> TemporalGraph { node_count, storage: Arc::new(storage), graph_meta: Arc::new(GraphMeta::default()), - transaction_manager: Arc::new(TransactionManager::new(wal.clone())), + transaction_manager: Arc::new(TransactionManager::new()), wal, }) } @@ -164,7 +164,7 @@ impl, ES = ES>> TemporalGraph { node_count: AtomicUsize::new(0), storage: Arc::new(storage), graph_meta: Arc::new(GraphMeta::default()), - transaction_manager: Arc::new(TransactionManager::new(wal.clone())), + transaction_manager: Arc::new(TransactionManager::new()), wal, }) } diff --git a/db4-storage/src/lib.rs b/db4-storage/src/lib.rs index e1c3b8559e..9aa00c24ae 100644 --- a/db4-storage/src/lib.rs +++ b/db4-storage/src/lib.rs @@ -23,7 +23,6 @@ use crate::{ node_entry::{MemNodeEntry, MemNodeRef}, }, wal::no_wal::NoWal, - transaction::TransactionManager as GenericTransactionManager, }; use parking_lot::RwLock; use raphtory_api::core::entities::{EID, VID}; @@ -48,7 +47,6 @@ pub type Layer

= GraphStore, ES

, P>; pub type WalImpl = NoWal; pub type GIDResolver = MappingResolver; -pub type TransactionManager = GenericTransactionManager; pub type ReadLockedLayer

= ReadLockedGraphStore, ES

, P>; pub type ReadLockedNodes

= ReadLockedNodeStorage, P>; diff --git a/db4-storage/src/transaction/mod.rs b/db4-storage/src/transaction/mod.rs index a7b175af09..439e5b00de 100644 --- a/db4-storage/src/transaction/mod.rs +++ b/db4-storage/src/transaction/mod.rs @@ -1,40 +1,40 @@ -use std::sync::{ - Arc, - atomic::{self, AtomicU64}, -}; +use std::sync::atomic::{self, AtomicU64}; -use crate::wal::{GraphWal, TransactionID}; +use crate::wal::TransactionID; #[derive(Debug)] -pub struct TransactionManager { +pub struct TransactionManager { last_transaction_id: AtomicU64, - wal: Arc, } -impl TransactionManager { +impl TransactionManager { const STARTING_TRANSACTION_ID: TransactionID = 1; - pub fn new(wal: Arc) -> Self { + pub fn new() -> Self { Self { last_transaction_id: AtomicU64::new(Self::STARTING_TRANSACTION_ID), - wal, } } - pub fn load(self, last_transaction_id: TransactionID) { + /// Restores the last used transaction ID to the specified value. + /// Intended for using during recovery. + pub fn restore_transaction_id(&self, last_transaction_id: TransactionID) { self.last_transaction_id .store(last_transaction_id, atomic::Ordering::SeqCst) } pub fn begin_transaction(&self) -> TransactionID { - let transaction_id = self - .last_transaction_id - .fetch_add(1, atomic::Ordering::SeqCst); - self.wal.log_begin_transaction(transaction_id).unwrap(); - transaction_id + self.last_transaction_id + .fetch_add(1, atomic::Ordering::SeqCst) + } + + pub fn end_transaction(&self, _transaction_id: TransactionID) { + // No-op for now. } +} - pub fn end_transaction(&self, transaction_id: TransactionID) { - self.wal.log_end_transaction(transaction_id).unwrap(); +impl Default for TransactionManager { + fn default() -> Self { + Self::new() } } diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index 5b1cdcf5a1..875027319a 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -24,7 +24,7 @@ use storage::{ persist::strategy::PersistentStrategy, properties::props_meta_writer::PropsMetaWriter, resolver::GIDResolverOps, - Extension, TransactionManager, WalImpl, ES, NS, + Extension, transaction::TransactionManager, WalImpl, ES, NS, }; pub struct WriteS<'a, EXT: PersistentStrategy, ES = ES>> { diff --git a/raphtory-storage/src/mutation/durability_ops.rs b/raphtory-storage/src/mutation/durability_ops.rs index be6288a957..34713df7aa 100644 --- a/raphtory-storage/src/mutation/durability_ops.rs +++ b/raphtory-storage/src/mutation/durability_ops.rs @@ -1,4 +1,4 @@ -use storage::{TransactionManager, WalImpl}; +use storage::{transaction::TransactionManager, WalImpl}; use crate::graph::graph::GraphStorage; use raphtory_api::inherit::Base; diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 3b1472d9e1..91702aa4a2 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -36,7 +36,7 @@ use std::{ path::Path, sync::Arc, }; -use storage::{Extension, TransactionManager, WalImpl}; +use storage::{Extension, transaction::TransactionManager, WalImpl}; #[cfg(feature = "search")] use { diff --git a/raphtory/src/db/replay/mod.rs b/raphtory/src/db/replay/mod.rs index 2c356faa3a..ab6a7e32ca 100644 --- a/raphtory/src/db/replay/mod.rs +++ b/raphtory/src/db/replay/mod.rs @@ -10,9 +10,9 @@ use storage::{ Extension, }; -/// Wrapper struct for implementing GraphReplayer for a TemporalGraph. -/// This is needed to workaround Rust's orphan rule since both ReplayGraph and TemporalGraph -/// are foreign to this crate. +/// Wrapper struct for implementing `GraphReplayer` for a `TemporalGraph`. +/// This is needed to workaround Rust's orphan rule since both `GraphReplayer` +/// and `TemporalGraph` are foreign to this crate. #[derive(Debug)] pub struct ReplayGraph { graph: TemporalGraph, @@ -72,6 +72,8 @@ impl GraphReplayer for ReplayGraph { _ => {} } + // TODO: Check max lsn on disk to see if replay is needed. + Ok(()) } From 6f24855296c118297de6b49e1803cc953aa3e6f2 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Tue, 2 Dec 2025 15:12:12 -0500 Subject: [PATCH 04/95] Simplify wal log and replay methods --- db4-storage/src/wal/entry.rs | 77 ++------- db4-storage/src/wal/mod.rs | 147 ++---------------- raphtory-storage/src/mutation/addition_ops.rs | 1 + raphtory/src/db/replay/mod.rs | 76 +-------- 4 files changed, 30 insertions(+), 271 deletions(-) diff --git a/db4-storage/src/wal/entry.rs b/db4-storage/src/wal/entry.rs index 71ba54ce4a..d6cd68ebb4 100644 --- a/db4-storage/src/wal/entry.rs +++ b/db4-storage/src/wal/entry.rs @@ -1,6 +1,9 @@ use std::path::Path; -use raphtory_api::core::{entities::properties::prop::Prop, storage::dict_mapper::MaybeNew}; +use raphtory_api::core::{ + entities::properties::prop::Prop, + storage::dict_mapper::MaybeNew, +}; use raphtory_core::{ entities::{EID, GID, VID}, storage::timeindex::TimeIndexEntry, @@ -14,82 +17,22 @@ use crate::{ impl GraphWal for NoWal { type ReplayEntry = (); - fn log_begin_transaction(&self, _transaction_id: TransactionID) -> Result { - Ok(0) - } - - fn log_end_transaction(&self, _transaction_id: TransactionID) -> Result { - Ok(0) - } - - fn log_add_static_edge( - &self, - _transaction_id: TransactionID, - _t: TimeIndexEntry, - _src: VID, - _dst: VID, - ) -> Result { - Ok(0) - } - - fn log_add_edge( + fn log_add_edge>( &self, _transaction_id: TransactionID, _t: TimeIndexEntry, - _src: VID, - _dst: VID, + _src_name: GID, + _src_id: VID, + _dst_name: GID, + _dst_id: VID, _eid: EID, + _layer_name: Option<&str>, _layer_id: usize, - _props: &[(usize, Prop)], - ) -> Result { - Ok(0) - } - - fn log_node_id( - &self, - _transaction_id: TransactionID, - _gid: GID, - _vid: VID, - ) -> Result { - Ok(0) - } - - fn log_edge_id( - &self, - _transaction_id: TransactionID, - _src: VID, - _dst: VID, - _eid: EID, - _layer_id: usize, - ) -> Result { - Ok(0) - } - - fn log_const_prop_ids>( - &self, - _transaction_id: TransactionID, _props: &[MaybeNew<(PN, usize, Prop)>], ) -> Result { Ok(0) } - fn log_temporal_prop_ids>( - &self, - _transaction_id: TransactionID, - _props: &[MaybeNew<(PN, usize, Prop)>], - ) -> Result { - Ok(0) - } - - fn log_layer_id( - &self, - _transaction_id: TransactionID, - _name: &str, - _id: usize, - ) -> Result { - Ok(0) - } - fn log_checkpoint(&self, _lsn: LSN) -> Result { Ok(0) } diff --git a/db4-storage/src/wal/mod.rs b/db4-storage/src/wal/mod.rs index 7538781b16..912e7b2647 100644 --- a/db4-storage/src/wal/mod.rs +++ b/db4-storage/src/wal/mod.rs @@ -46,95 +46,20 @@ pub trait GraphWal { /// ReplayEntry represents the type of the wal entry returned during replay. type ReplayEntry; - fn log_begin_transaction(&self, transaction_id: TransactionID) -> Result; - - fn log_end_transaction(&self, transaction_id: TransactionID) -> Result; - - /// Log a static edge addition. - /// - /// # Arguments - /// - /// * `transaction_id` - The transaction ID - /// * `t` - The timestamp of the edge addition - /// * `src` - The source vertex ID - /// * `dst` - The destination vertex ID - fn log_add_static_edge( + fn log_add_edge>( &self, transaction_id: TransactionID, t: TimeIndexEntry, - src: VID, - dst: VID, - ) -> Result; - - /// Log an edge addition to a layer with temporal props. - /// - /// # Arguments - /// - /// * `transaction_id` - The transaction ID - /// * `t` - The timestamp of the edge addition - /// * `src` - The source vertex ID - /// * `dst` - The destination vertex ID - /// * `eid` - The edge ID - /// * `layer_id` - The layer ID - /// * `props` - The temporal properties of the edge - fn log_add_edge( - &self, - transaction_id: TransactionID, - t: TimeIndexEntry, - src: VID, - dst: VID, - eid: EID, - layer_id: usize, - props: &[(usize, Prop)], - ) -> Result; - - fn log_node_id( - &self, - transaction_id: TransactionID, - gid: GID, - vid: VID, - ) -> Result; - - fn log_edge_id( - &self, - transaction_id: TransactionID, - src: VID, - dst: VID, + src_name: GID, + src_id: VID, + dst_name: GID, + dst_id: VID, eid: EID, + layer_name: Option<&str>, layer_id: usize, - ) -> Result; - - /// Log constant prop name -> prop id mappings. - /// - /// # Arguments - /// - /// * `transaction_id` - The transaction ID - /// * `props` - A slice containing new or existing tuples of (prop name, id, value) - fn log_const_prop_ids>( - &self, - transaction_id: TransactionID, - props: &[MaybeNew<(PN, usize, Prop)>], - ) -> Result; - - /// Log temporal prop name -> prop id mappings. - /// - /// # Arguments - /// - /// * `transaction_id` - The transaction ID - /// * `props` - A slice containing new or existing tuples of (prop name, id, value). - fn log_temporal_prop_ids>( - &self, - transaction_id: TransactionID, props: &[MaybeNew<(PN, usize, Prop)>], ) -> Result; - fn log_layer_id( - &self, - transaction_id: TransactionID, - name: &str, - id: usize, - ) -> Result; - /// Logs a checkpoint record, indicating that all Wal operations upto and including /// `lsn` has been persisted to disk. fn log_checkpoint(&self, lsn: LSN) -> Result; @@ -153,66 +78,18 @@ pub trait GraphWal { /// Trait for defining callbacks for replaying from wal pub trait GraphReplayer { - fn replay_begin_transaction( - &self, - lsn: LSN, - transaction_id: TransactionID, - ) -> Result<(), StorageError>; - - fn replay_end_transaction( - &self, - lsn: LSN, - transaction_id: TransactionID, - ) -> Result<(), StorageError>; - - fn replay_add_static_edge( + fn replay_add_edge>( &self, lsn: LSN, transaction_id: TransactionID, t: TimeIndexEntry, - src: VID, - dst: VID, - ) -> Result<(), StorageError>; - - fn replay_add_edge( - &self, - lsn: LSN, - transaction_id: TransactionID, - t: TimeIndexEntry, - src: VID, - dst: VID, + src_name: GID, + src_id: VID, + dst_name: GID, + dst_id: VID, eid: EID, + layer_name: Option<&str>, layer_id: usize, - props: &[(usize, Prop)], - ) -> Result<(), StorageError>; - - fn replay_node_id( - &self, - lsn: LSN, - transaction_id: TransactionID, - gid: GID, - vid: VID, - ) -> Result<(), StorageError>; - - fn replay_const_prop_ids>( - &self, - lsn: LSN, - transaction_id: TransactionID, - props: &[MaybeNew<(PN, usize, Prop)>], - ) -> Result<(), StorageError>; - - fn replay_temporal_prop_ids>( - &self, - lsn: LSN, - transaction_id: TransactionID, props: &[MaybeNew<(PN, usize, Prop)>], ) -> Result<(), StorageError>; - - fn replay_layer_id( - &self, - lsn: LSN, - transaction_id: TransactionID, - name: &str, - id: usize, - ) -> Result<(), StorageError>; } diff --git a/raphtory-storage/src/mutation/addition_ops.rs b/raphtory-storage/src/mutation/addition_ops.rs index 3ac593e091..9fca8f2570 100644 --- a/raphtory-storage/src/mutation/addition_ops.rs +++ b/raphtory-storage/src/mutation/addition_ops.rs @@ -36,6 +36,7 @@ pub trait InternalAdditionOps { /// map layer name to id and allocate a new layer if needed fn resolve_layer(&self, layer: Option<&str>) -> Result, Self::Error>; + /// map external node id to internal id, allocating a new empty node if needed fn resolve_node(&self, id: NodeRef) -> Result, Self::Error>; diff --git a/raphtory/src/db/replay/mod.rs b/raphtory/src/db/replay/mod.rs index ab6a7e32ca..b733c1b46a 100644 --- a/raphtory/src/db/replay/mod.rs +++ b/raphtory/src/db/replay/mod.rs @@ -25,43 +25,19 @@ impl ReplayGraph { } impl GraphReplayer for ReplayGraph { - fn replay_begin_transaction( - &self, - lsn: LSN, - transaction_id: TransactionID, - ) -> Result<(), StorageError> { - Ok(()) - } - - fn replay_end_transaction( - &self, - lsn: LSN, - transaction_id: TransactionID, - ) -> Result<(), StorageError> { - Ok(()) - } - - fn replay_add_static_edge( + fn replay_add_edge>( &self, lsn: LSN, transaction_id: TransactionID, t: TimeIndexEntry, - src: VID, - dst: VID, - ) -> Result<(), StorageError> { - Ok(()) - } - - fn replay_add_edge( - &self, - lsn: LSN, - transaction_id: TransactionID, - t: TimeIndexEntry, - src: VID, - dst: VID, + src_name: GID, + src_id: VID, + dst_name: GID, + dst_id: VID, eid: EID, + layer_name: Option<&str>, layer_id: usize, - props: &[(usize, Prop)], + props: &[MaybeNew<(PN, usize, Prop)>], ) -> Result<(), StorageError> { let edge_segment = self.graph.storage().edges().get_edge_segment(eid); @@ -76,42 +52,4 @@ impl GraphReplayer for ReplayGraph { Ok(()) } - - fn replay_node_id( - &self, - lsn: LSN, - transaction_id: TransactionID, - gid: GID, - vid: VID, - ) -> Result<(), StorageError> { - Ok(()) - } - - fn replay_const_prop_ids>( - &self, - lsn: LSN, - transaction_id: TransactionID, - props: &[MaybeNew<(PN, usize, Prop)>], - ) -> Result<(), StorageError> { - Ok(()) - } - - fn replay_temporal_prop_ids>( - &self, - lsn: LSN, - transaction_id: TransactionID, - props: &[MaybeNew<(PN, usize, Prop)>], - ) -> Result<(), StorageError> { - Ok(()) - } - - fn replay_layer_id( - &self, - lsn: LSN, - transaction_id: TransactionID, - name: &str, - id: usize, - ) -> Result<(), StorageError> { - Ok(()) - } } From c192d648a22d68559f08ad3d99f6c803e72d3002 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Tue, 2 Dec 2025 15:12:54 -0500 Subject: [PATCH 05/95] Add sketch of correct logging to add_edge --- db4-storage/src/wal/mod.rs | 2 +- raphtory/src/db/api/mutation/addition_ops.rs | 100 ++++++++----------- 2 files changed, 42 insertions(+), 60 deletions(-) diff --git a/db4-storage/src/wal/mod.rs b/db4-storage/src/wal/mod.rs index 912e7b2647..36772e704c 100644 --- a/db4-storage/src/wal/mod.rs +++ b/db4-storage/src/wal/mod.rs @@ -76,7 +76,7 @@ pub trait GraphWal { ) -> Result<(), StorageError>; } -/// Trait for defining callbacks for replaying from wal +/// Trait for defining callbacks for replaying from wal. pub trait GraphReplayer { fn replay_add_edge>( &self, diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index 351627bfbb..5d8024f2b0 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -14,6 +14,7 @@ use crate::{ prelude::{GraphViewOps, NodeViewOps}, }; use raphtory_api::core::entities::properties::prop::Prop; +use raphtory_core::entities::GID; use raphtory_storage::mutation::addition_ops::{EdgeWriteLock, InternalAdditionOps}; use raphtory_storage::mutation::durability_ops::DurabilityOps; use storage::wal::{GraphWal, Wal}; @@ -249,7 +250,6 @@ impl> + StaticGraphViewOps + Dura props: PII, layer: Option<&str>, ) -> Result, GraphError> { - // Log transaction start let transaction_id = self.transaction_manager().begin_transaction(); let session = self.write_session().map_err(|err| err.into())?; @@ -268,19 +268,6 @@ impl> + StaticGraphViewOps + Dura ) .map_err(into_graph_err)?; - // Log prop name -> prop id mappings - self.wal() - .log_temporal_prop_ids(transaction_id, &props_with_status) - .unwrap(); - - let props = props_with_status - .into_iter() - .map(|maybe_new| { - let (_, prop_id, prop) = maybe_new.inner(); - (prop_id, prop) - }) - .collect::>(); - let ti = time_from_input_session(&session, t)?; let src_id = self .resolve_node(src.as_node_ref()) @@ -290,76 +277,71 @@ impl> + StaticGraphViewOps + Dura .map_err(into_graph_err)?; let layer_id = self.resolve_layer(layer).map_err(into_graph_err)?; - // Log node -> node id mappings // FIXME: We are logging node -> node id mappings AFTER they are inserted into the // resolver. Make sure resolver mapping CANNOT get to disk before Wal. - if let Some(gid) = src.as_node_ref().as_gid_ref().left() { - self.wal() - .log_node_id(transaction_id, gid.into(), src_id.inner()) - .unwrap(); - } - - if let Some(gid) = dst.as_node_ref().as_gid_ref().left() { - self.wal() - .log_node_id(transaction_id, gid.into(), dst_id.inner()) - .unwrap(); - } + let src_gid = src.as_node_ref().as_gid_ref().left().map(|gid_ref| GID::from(gid_ref)).unwrap(); + let dst_gid = dst.as_node_ref().as_gid_ref().left().map(|gid_ref| GID::from(gid_ref)).unwrap(); let src_id = src_id.inner(); let dst_id = dst_id.inner(); - // Log layer -> layer id mappings - if let Some(layer) = layer { - self.wal() - .log_layer_id(transaction_id, layer, layer_id.inner()) - .unwrap(); - } - let layer_id = layer_id.inner(); - // Holds all locks for nodes and edge until add_edge_op goes out of scope + // Hold all locks for src node, dst node and edge until add_edge_op goes out of scope. let mut add_edge_op = self .atomic_add_edge(src_id, dst_id, None, layer_id) .map_err(into_graph_err)?; - // Log edge addition - let add_static_edge_lsn = self - .wal() - .log_add_static_edge(transaction_id, ti, src_id, dst_id) - .unwrap(); - let edge_id = add_edge_op.internal_add_static_edge(src_id, dst_id, add_static_edge_lsn); - - // Log edge -> edge id mappings - // NOTE: We log edge id mappings after they are inserted into edge segments. - // This is fine as long as we hold onto segment locks for the entire operation. - let add_edge_lsn = self - .wal() - .log_add_edge( - transaction_id, - ti, - src_id, - dst_id, - edge_id.inner(), - layer_id, - &props, - ) - .unwrap(); + // NOTE: We log edge id after it is inserted into the edge segment. + // This is fine as long as we hold onto the edge segment lock through add_edge_op + // for the entire operation. + let edge_id = add_edge_op.internal_add_static_edge(src_id, dst_id, 0); + + // All names, ids and values have been generated for this operation. + // Create a wal entry to mark it as durable. + let lsn = self.wal().log_add_edge( + transaction_id, + ti, + src_gid, + src_id, + dst_gid, + dst_id, + edge_id.inner(), + layer, + layer_id, + &props_with_status, + ).unwrap(); + + let props = props_with_status + .into_iter() + .map(|maybe_new| { + let (_, prop_id, prop) = maybe_new.inner(); + (prop_id, prop) + }) + .collect::>(); + let edge_id = add_edge_op.internal_add_edge( ti, src_id, dst_id, edge_id.map(|eid| eid.with_layer(layer_id)), - add_edge_lsn, + 0, props, ); add_edge_op.store_src_node_info(src_id, src.as_node_ref().as_gid_ref().left()); add_edge_op.store_dst_node_info(dst_id, dst.as_node_ref().as_gid_ref().left()); - // Log transaction end. + // Update the src, dst and edge segments with the lsn of the wal entry. + // add_edge_op.update_lsn(lsn); + self.transaction_manager().end_transaction(transaction_id); - // Flush all wal entries to disk. + // Drop to release all the segment locks. + // FIXME: Make sure segments cannot get to disk before wal entry is flushed. + // drop(add_edge_op); + + // Flush the wal entry to disk. self.wal().sync().unwrap(); Ok(EdgeView::new( From fd49e14571effb263e25608e16ecb419523e60f4 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Tue, 2 Dec 2025 15:48:17 -0500 Subject: [PATCH 06/95] Add lsn to MemNodeSegment/MemEdgeSegment --- db4-storage/src/segments/edge.rs | 14 ++++++++++++-- db4-storage/src/segments/node.rs | 9 ++++++++- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/db4-storage/src/segments/edge.rs b/db4-storage/src/segments/edge.rs index 19a9045945..515f7cd829 100644 --- a/db4-storage/src/segments/edge.rs +++ b/db4-storage/src/segments/edge.rs @@ -51,6 +51,7 @@ impl HasRow for MemPageEntry { pub struct MemEdgeSegment { layers: Vec>, est_size: usize, + lsn: u64, } impl>> From for MemEdgeSegment { @@ -61,7 +62,11 @@ impl>> From for MemEdge !layers.is_empty(), "MemEdgeSegment must have at least one layer" ); - Self { layers, est_size } + Self { + layers, + est_size, + lsn: 0, + } } } @@ -82,6 +87,7 @@ impl MemEdgeSegment { Self { layers: vec![SegmentContainer::new(segment_id, max_page_len, meta)], est_size: 0, + lsn: 0, } } @@ -128,7 +134,11 @@ impl MemEdgeSegment { } pub fn lsn(&self) -> u64 { - self.layers.iter().map(|seg| seg.lsn()).min().unwrap_or(0) + self.lsn + } + + pub fn set_lsn(&mut self, lsn: u64) { + self.lsn = lsn; } pub fn max_page_len(&self) -> u32 { diff --git a/db4-storage/src/segments/node.rs b/db4-storage/src/segments/node.rs index 63b1d9f05d..e548d9e108 100644 --- a/db4-storage/src/segments/node.rs +++ b/db4-storage/src/segments/node.rs @@ -34,6 +34,7 @@ pub struct MemNodeSegment { segment_id: usize, max_page_len: u32, layers: Vec>, + lsn: u64, } impl>> From for MemNodeSegment { @@ -49,6 +50,7 @@ impl>> From for MemNodeSegm segment_id, max_page_len, layers, + lsn: 0, } } } @@ -140,7 +142,11 @@ impl MemNodeSegment { } pub fn lsn(&self) -> u64 { - self.layers.iter().map(|seg| seg.lsn()).min().unwrap_or(0) + self.lsn + } + + pub fn set_lsn(&mut self, lsn: u64) { + self.lsn = lsn; } pub fn to_vid(&self, pos: LocalPOS) -> VID { @@ -188,6 +194,7 @@ impl MemNodeSegment { segment_id, max_page_len, layers: vec![SegmentContainer::new(segment_id, max_page_len, meta)], + lsn: 0, } } From aa9f84b04805545dbb23ddf645c778b706e2b429 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Tue, 2 Dec 2025 15:51:29 -0500 Subject: [PATCH 07/95] Add set_lsn method for AtomicAddEdge --- db4-storage/src/pages/session.rs | 19 +++++++++++++++++++ db4-storage/src/segments/node.rs | 4 +++- raphtory-storage/src/mutation/addition_ops.rs | 4 +++- .../src/mutation/addition_ops_ext.rs | 7 ++++++- raphtory/src/db/api/mutation/addition_ops.rs | 2 +- raphtory/src/db/api/storage/storage.rs | 6 +++++- 6 files changed, 37 insertions(+), 5 deletions(-) diff --git a/db4-storage/src/pages/session.rs b/db4-storage/src/pages/session.rs index 9d6c48eccb..2619f21a46 100644 --- a/db4-storage/src/pages/session.rs +++ b/db4-storage/src/pages/session.rs @@ -6,6 +6,7 @@ use crate::{ api::{edges::EdgeSegmentOps, nodes::NodeSegmentOps}, persist::strategy::{Config, PersistentStrategy}, segments::{edge::MemEdgeSegment, node::MemNodeSegment}, + wal::LSN, }; use parking_lot::RwLockWriteGuard; use raphtory_api::core::{entities::properties::prop::Prop, storage::dict_mapper::MaybeNew}; @@ -220,4 +221,22 @@ impl< ) -> &mut WriterPair<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS> { &mut self.node_writers } + + pub fn set_lsn(&mut self, lsn: LSN) { + match &mut self.node_writers { + WriterPair::Same { writer } => { + writer.mut_segment.set_lsn(lsn); + } + WriterPair::Different { + src_writer, + dst_writer, + } => { + src_writer.mut_segment.set_lsn(lsn); + dst_writer.mut_segment.set_lsn(lsn); + } + } + if let Some(edge_writer) = &mut self.edge_writer { + edge_writer.writer.set_lsn(lsn); + } + } } diff --git a/db4-storage/src/segments/node.rs b/db4-storage/src/segments/node.rs index e548d9e108..2f4d52595e 100644 --- a/db4-storage/src/segments/node.rs +++ b/db4-storage/src/segments/node.rs @@ -146,7 +146,9 @@ impl MemNodeSegment { } pub fn set_lsn(&mut self, lsn: u64) { - self.lsn = lsn; + if lsn > self.lsn { + self.lsn = lsn; + } } pub fn to_vid(&self, pos: LocalPOS) -> VID { diff --git a/raphtory-storage/src/mutation/addition_ops.rs b/raphtory-storage/src/mutation/addition_ops.rs index 9fca8f2570..808f9ce04d 100644 --- a/raphtory-storage/src/mutation/addition_ops.rs +++ b/raphtory-storage/src/mutation/addition_ops.rs @@ -20,7 +20,7 @@ use raphtory_api::{ inherit::Base, }; use raphtory_core::entities::{nodes::node_ref::NodeRef, ELID}; -use storage::{Extension}; +use storage::{Extension, wal::LSN}; pub trait InternalAdditionOps { type Error: From; @@ -124,6 +124,8 @@ pub trait EdgeWriteLock: Send + Sync { fn store_src_node_info(&mut self, id: impl Into, node_id: Option); fn store_dst_node_info(&mut self, id: impl Into, node_id: Option); + + fn set_lsn(&mut self, lsn: LSN); } pub trait AtomicNodeAddition: Send + Sync { diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index 875027319a..0141fc6a53 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -17,7 +17,7 @@ use raphtory_core::{ nodes::node_ref::{AsNodeRef, NodeRef}, GidRef, EID, ELID, MAX_LAYER, VID, }, - storage::timeindex::TimeIndexEntry, + storage::{timeindex::TimeIndexEntry}, }; use storage::{ pages::{node_page::writer::node_info_as_props, session::WriteSession}, @@ -25,6 +25,7 @@ use storage::{ properties::props_meta_writer::PropsMetaWriter, resolver::GIDResolverOps, Extension, transaction::TransactionManager, WalImpl, ES, NS, + wal::LSN, }; pub struct WriteS<'a, EXT: PersistentStrategy, ES = ES>> { @@ -103,6 +104,10 @@ impl<'a, EXT: PersistentStrategy, ES = ES>> EdgeWriteLock for .update_c_props(pos, 0, [(NODE_ID_IDX, id.into())], 0); }; } + + fn set_lsn(&mut self, lsn: LSN) { + self.static_session.set_lsn(lsn); + } } impl<'a> SessionAdditionOps for UnlockedSession<'a> { diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index 5d8024f2b0..b10b0435f1 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -333,7 +333,7 @@ impl> + StaticGraphViewOps + Dura add_edge_op.store_dst_node_info(dst_id, dst.as_node_ref().as_gid_ref().left()); // Update the src, dst and edge segments with the lsn of the wal entry. - // add_edge_op.update_lsn(lsn); + add_edge_op.set_lsn(lsn); self.transaction_manager().end_transaction(transaction_id); diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 91702aa4a2..d58db03725 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -36,7 +36,7 @@ use std::{ path::Path, sync::Arc, }; -use storage::{Extension, transaction::TransactionManager, WalImpl}; +use storage::{Extension, transaction::TransactionManager, WalImpl, wal::LSN}; #[cfg(feature = "search")] use { @@ -338,6 +338,10 @@ impl EdgeWriteLock for AtomicAddEdgeSession<'_> { fn store_dst_node_info(&mut self, id: impl Into, node_id: Option) { self.session.store_dst_node_info(id, node_id); } + + fn set_lsn(&mut self, lsn: LSN) { + self.session.set_lsn(lsn); + } } impl<'a> SessionAdditionOps for StorageWriteSession<'a> { From c2c77fab54d0c7e599cb3e6b74566ba688f706d1 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Tue, 2 Dec 2025 16:27:57 -0500 Subject: [PATCH 08/95] Simplify WriterPair to NodeWriters --- db4-storage/src/pages/mod.rs | 34 ++++++++++------------- db4-storage/src/pages/node_page/writer.rs | 32 ++++++--------------- db4-storage/src/pages/session.rs | 24 ++++++---------- 3 files changed, 33 insertions(+), 57 deletions(-) diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index 58d7623f00..0420109d5c 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -9,7 +9,7 @@ use crate::{ }; use edge_page::writer::EdgeWriter; use edge_store::EdgeStorageInner; -use node_page::writer::{NodeWriter, WriterPair}; +use node_page::writer::{NodeWriter, NodeWriters}; use node_store::NodeStorageInner; use parking_lot::RwLockWriteGuard; use raphtory_api::core::{ @@ -315,23 +315,18 @@ impl< let (src_chunk, _) = self.nodes.resolve_pos(src); let (dst_chunk, _) = self.nodes.resolve_pos(dst); + // Acquire locks in consistent order (lower chunk ID first) to prevent deadlocks. let node_writers = if src_chunk < dst_chunk { - let src_writer = self.node_writer(src_chunk); - let dst_writer = self.node_writer(dst_chunk); - WriterPair::Different { - src_writer, - dst_writer, - } + let src = self.node_writer(src_chunk); + let dst = self.node_writer(dst_chunk); + NodeWriters { src, dst: Some(dst) } } else if src_chunk > dst_chunk { - let dst_writer = self.node_writer(dst_chunk); - let src_writer = self.node_writer(src_chunk); - WriterPair::Different { - src_writer, - dst_writer, - } + let dst = self.node_writer(dst_chunk); + let src = self.node_writer(src_chunk); + NodeWriters { src, dst: Some(dst) } } else { - let writer = self.node_writer(src_chunk); - WriterPair::Same { writer } + let src = self.node_writer(src_chunk); + NodeWriters { src, dst: None } }; let edge_writer = e_id.map(|e_id| self.edge_writer(e_id)); @@ -352,19 +347,20 @@ impl< self.nodes().get_or_create_segment(src_chunk); self.nodes().get_or_create_segment(dst_chunk); + // FIXME: This can livelock due to inconsistent lock acquisition order. loop { if let Some(src_writer) = self.nodes().try_writer(src_chunk) { if let Some(dst_writer) = self.nodes().try_writer(dst_chunk) { - break WriterPair::Different { - src_writer, - dst_writer, + break NodeWriters { + src: src_writer, + dst: Some(dst_writer), }; } } } } else { let writer = self.node_writer(src_chunk); - WriterPair::Same { writer } + NodeWriters { src: writer, dst: None } }; let edge_writer = e_id.map(|e_id| self.edge_writer(e_id)); diff --git a/db4-storage/src/pages/node_page/writer.rs b/db4-storage/src/pages/node_page/writer.rs index f11e6ce498..4973943281 100644 --- a/db4-storage/src/pages/node_page/writer.rs +++ b/db4-storage/src/pages/node_page/writer.rs @@ -231,34 +231,20 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> Drop } } -pub enum WriterPair<'a, MP: DerefMut, NS: NodeSegmentOps> { - Same { - writer: NodeWriter<'a, MP, NS>, - }, - Different { - src_writer: NodeWriter<'a, MP, NS>, - dst_writer: NodeWriter<'a, MP, NS>, - }, + +/// Holds writers for src and dst node segments when adding an edge. +/// If both nodes are in the same segment, `dst` is `None` and `src` is used for both. +pub struct NodeWriters<'a, MP: DerefMut, NS: NodeSegmentOps> { + pub src: NodeWriter<'a, MP, NS>, + pub dst: Option>, } -impl<'a, MP: DerefMut, NS: NodeSegmentOps> WriterPair<'a, MP, NS> { +impl<'a, MP: DerefMut, NS: NodeSegmentOps> NodeWriters<'a, MP, NS> { pub fn get_mut_src(&mut self) -> &mut NodeWriter<'a, MP, NS> { - match self { - WriterPair::Same { writer, .. } => writer, - WriterPair::Different { - src_writer: writer_i, - .. - } => writer_i, - } + &mut self.src } pub fn get_mut_dst(&mut self) -> &mut NodeWriter<'a, MP, NS> { - match self { - WriterPair::Same { writer, .. } => writer, - WriterPair::Different { - dst_writer: writer_j, - .. - } => writer_j, - } + self.dst.as_mut().unwrap_or(&mut self.src) } } diff --git a/db4-storage/src/pages/session.rs b/db4-storage/src/pages/session.rs index 2619f21a46..e2093f627b 100644 --- a/db4-storage/src/pages/session.rs +++ b/db4-storage/src/pages/session.rs @@ -1,5 +1,5 @@ use super::{ - GraphStore, edge_page::writer::EdgeWriter, node_page::writer::WriterPair, resolve_pos, + GraphStore, edge_page::writer::EdgeWriter, node_page::writer::NodeWriters, resolve_pos, }; use crate::{ LocalPOS, @@ -16,7 +16,7 @@ use raphtory_core::{ }; pub struct WriteSession<'a, NS: NodeSegmentOps, ES: EdgeSegmentOps, EXT: Config> { - node_writers: WriterPair<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS>, + node_writers: NodeWriters<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS>, edge_writer: Option, ES>>, graph: &'a GraphStore, } @@ -29,7 +29,7 @@ impl< > WriteSession<'a, NS, ES, EXT> { pub fn new( - node_writers: WriterPair<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS>, + node_writers: NodeWriters<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS>, edge_writer: Option, ES>>, graph: &'a GraphStore, ) -> Self { @@ -218,23 +218,17 @@ impl< pub fn node_writers( &mut self, - ) -> &mut WriterPair<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS> { + ) -> &mut NodeWriters<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS> { &mut self.node_writers } pub fn set_lsn(&mut self, lsn: LSN) { - match &mut self.node_writers { - WriterPair::Same { writer } => { - writer.mut_segment.set_lsn(lsn); - } - WriterPair::Different { - src_writer, - dst_writer, - } => { - src_writer.mut_segment.set_lsn(lsn); - dst_writer.mut_segment.set_lsn(lsn); - } + self.node_writers.src.mut_segment.set_lsn(lsn); + + if let Some(dst) = &mut self.node_writers.dst { + dst.mut_segment.set_lsn(lsn); } + if let Some(edge_writer) = &mut self.edge_writer { edge_writer.writer.set_lsn(lsn); } From f263616ad88f77e9aa9274e6558103fd7e6472a9 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Wed, 3 Dec 2025 15:01:34 -0500 Subject: [PATCH 09/95] Remove lsn args --- db4-storage/src/pages/edge_page/writer.rs | 9 ++--- db4-storage/src/pages/mod.rs | 9 +++-- db4-storage/src/pages/node_page/writer.rs | 37 +++++++------------ db4-storage/src/pages/session.rs | 33 +++++++---------- db4-storage/src/pages/test_utils/checkers.rs | 10 +++-- db4-storage/src/segments/edge.rs | 20 +--------- db4-storage/src/segments/mod.rs | 12 ------ db4-storage/src/segments/node.rs | 20 ++++------ raphtory-storage/src/mutation/addition_ops.rs | 3 -- .../src/mutation/addition_ops_ext.rs | 19 ++++------ raphtory-storage/src/mutation/deletion_ops.rs | 7 ++-- .../src/mutation/property_addition_ops.rs | 4 +- raphtory/src/db/api/mutation/addition_ops.rs | 3 +- raphtory/src/db/api/mutation/deletion_ops.rs | 2 +- raphtory/src/db/api/storage/storage.rs | 9 ++--- raphtory/src/db/api/view/graph.rs | 28 ++++++-------- raphtory/src/db/graph/edge.rs | 1 - raphtory/src/io/arrow/df_loaders.rs | 30 +++++++-------- 18 files changed, 94 insertions(+), 162 deletions(-) diff --git a/db4-storage/src/pages/edge_page/writer.rs b/db4-storage/src/pages/edge_page/writer.rs index 3348ba2510..9eb43eab37 100644 --- a/db4-storage/src/pages/edge_page/writer.rs +++ b/db4-storage/src/pages/edge_page/writer.rs @@ -42,7 +42,6 @@ impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmen dst: VID, props: impl IntoIterator, layer_id: usize, - lsn: u64, ) -> LocalPOS { let existing_edge = self .page @@ -52,7 +51,7 @@ impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmen } self.graph_stats.update_time(t.t()); self.writer - .insert_edge_internal(t, edge_pos, src, dst, layer_id, props, lsn); + .insert_edge_internal(t, edge_pos, src, dst, layer_id, props); edge_pos } @@ -88,7 +87,6 @@ impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmen src: VID, dst: VID, layer_id: usize, - lsn: u64, ) { let existing_edge = self .page @@ -98,7 +96,7 @@ impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmen } self.graph_stats.update_time(t.t()); self.writer - .delete_edge_internal(t, edge_pos, src, dst, layer_id, lsn); + .delete_edge_internal(t, edge_pos, src, dst, layer_id); } pub fn add_static_edge( @@ -106,7 +104,6 @@ impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmen edge_pos: Option, src: impl Into, dst: impl Into, - lsn: u64, exists_hint: Option, // used when edge_pos is Some but the is not counted, this is used in the bulk loader ) -> LocalPOS { let layer_id = 0; // assuming layer_id 0 for static edges, adjust as needed @@ -117,7 +114,7 @@ impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmen let edge_pos = edge_pos.unwrap_or_else(|| self.new_local_pos(layer_id)); self.writer - .insert_static_edge_internal(edge_pos, src, dst, layer_id, lsn); + .insert_static_edge_internal(edge_pos, src, dst, layer_id); edge_pos } diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index 0420109d5c..c029abf75f 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -213,10 +213,11 @@ impl< let src = src.into(); let dst = dst.into(); let mut session = self.write_session(src, dst, None); + session.set_lsn(lsn); let elid = session - .add_static_edge(src, dst, lsn) + .add_static_edge(src, dst) .map(|eid| eid.with_layer(0)); - session.add_edge_into_layer(t, src, dst, elid, lsn, props); + session.add_edge_into_layer(t, src, dst, elid, props); Ok(elid) } @@ -284,7 +285,7 @@ impl< let (segment, node_pos) = self.nodes.resolve_pos(node); let mut node_writer = self.nodes.writer(segment); let prop_writer = PropsMetaWriter::constant(self.node_meta(), props.into_iter())?; - node_writer.update_c_props(node_pos, layer_id, prop_writer.into_props_const()?, 0); // TODO: LSN + node_writer.update_c_props(node_pos, layer_id, prop_writer.into_props_const()?); Ok(()) } @@ -302,7 +303,7 @@ impl< let mut node_writer = self.nodes.writer(segment); let prop_writer = PropsMetaWriter::temporal(self.node_meta(), props.into_iter())?; - node_writer.add_props(t, node_pos, layer_id, prop_writer.into_props_temporal()?, 0); // TODO: LSN + node_writer.add_props(t, node_pos, layer_id, prop_writer.into_props_temporal()?); Ok(()) } diff --git a/db4-storage/src/pages/node_page/writer.rs b/db4-storage/src/pages/node_page/writer.rs index 4973943281..82a3eaf0ca 100644 --- a/db4-storage/src/pages/node_page/writer.rs +++ b/db4-storage/src/pages/node_page/writer.rs @@ -37,9 +37,8 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri src_pos: impl Into, dst: impl Into, e_id: impl Into, - lsn: u64, ) { - self.add_outbound_edge_inner(t, src_pos, dst, e_id, lsn); + self.add_outbound_edge_inner(t, src_pos, dst, e_id); } pub fn add_static_outbound_edge( @@ -47,10 +46,9 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri src_pos: LocalPOS, dst: impl Into, e_id: impl Into, - lsn: u64, ) { let e_id = e_id.into(); - self.add_outbound_edge_inner::(None, src_pos, dst, e_id.with_layer(0), lsn); + self.add_outbound_edge_inner::(None, src_pos, dst, e_id.with_layer(0)); } fn add_outbound_edge_inner( @@ -59,7 +57,6 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri src_pos: impl Into, dst: impl Into, e_id: impl Into, - lsn: u64, ) { let src_pos = src_pos.into(); let dst = dst.into(); @@ -71,7 +68,7 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri let layer_id = e_id.layer(); let (is_new_node, add) = self .mut_segment - .add_outbound_edge(t, src_pos, dst, e_id, lsn); + .add_outbound_edge(t, src_pos, dst, e_id); self.page.increment_est_size(add); if is_new_node && !self.page.check_node(src_pos, layer_id) { @@ -85,9 +82,8 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri dst_pos: impl Into, src: impl Into, e_id: impl Into, - lsn: u64, ) { - self.add_inbound_edge_inner(t, dst_pos, src, e_id, lsn); + self.add_inbound_edge_inner(t, dst_pos, src, e_id); } pub fn add_static_inbound_edge( @@ -95,10 +91,9 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri dst_pos: LocalPOS, src: impl Into, e_id: impl Into, - lsn: u64, ) { let e_id = e_id.into(); - self.add_inbound_edge_inner::(None, dst_pos, src, e_id.with_layer(0), lsn); + self.add_inbound_edge_inner::(None, dst_pos, src, e_id.with_layer(0)); } fn add_inbound_edge_inner( @@ -107,7 +102,6 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri dst_pos: impl Into, src: impl Into, e_id: impl Into, - lsn: u64, ) { let e_id = e_id.into(); let src = src.into(); @@ -118,7 +112,7 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri let dst_pos = dst_pos.into(); let (is_new_node, add) = self .mut_segment - .add_inbound_edge(t, dst_pos, src, e_id, lsn); + .add_inbound_edge(t, dst_pos, src, e_id); self.page.increment_est_size(add); @@ -133,11 +127,9 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri pos: LocalPOS, layer_id: usize, props: impl IntoIterator, - lsn: u64, ) { self.l_counter.update_time(t.t()); let (is_new_node, add) = self.mut_segment.add_props(t, pos, layer_id, props); - self.mut_segment.as_mut()[layer_id].set_lsn(lsn); self.page.increment_est_size(add); if is_new_node && !self.page.check_node(pos, layer_id) { self.l_counter.increment(layer_id); @@ -158,10 +150,8 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri pos: LocalPOS, layer_id: usize, props: impl IntoIterator, - lsn: u64, ) { let (is_new_node, add) = self.mut_segment.update_c_props(pos, layer_id, props); - self.mut_segment.as_mut()[layer_id].set_lsn(lsn); self.page.increment_est_size(add); if is_new_node && !self.page.check_node(pos, layer_id) { self.l_counter.increment(layer_id); @@ -172,9 +162,9 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri self.mut_segment.get_metadata(pos, layer_id, prop_id) } - pub fn update_timestamp(&mut self, t: T, pos: LocalPOS, e_id: ELID, lsn: u64) { + pub fn update_timestamp(&mut self, t: T, pos: LocalPOS, e_id: ELID) { self.l_counter.update_time(t.t()); - let add = self.mut_segment.update_timestamp(t, pos, e_id, lsn); + let add = self.mut_segment.update_timestamp(t, pos, e_id); self.page.increment_est_size(add); } @@ -194,18 +184,17 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri layer_id: usize, gid: GidRef<'_>, node_type: usize, - lsn: u64, ) { let node_type = (node_type != 0).then_some(node_type); - self.update_c_props(pos, layer_id, node_info_as_props(Some(gid), node_type), lsn); + self.update_c_props(pos, layer_id, node_info_as_props(Some(gid), node_type)); } - pub fn store_node_id(&mut self, pos: LocalPOS, layer_id: usize, gid: GidRef<'_>, lsn: u64) { - self.update_c_props(pos, layer_id, node_info_as_props(Some(gid), None), lsn); + pub fn store_node_id(&mut self, pos: LocalPOS, layer_id: usize, gid: GidRef<'_>) { + self.update_c_props(pos, layer_id, node_info_as_props(Some(gid), None)); } - pub fn update_deletion_time(&mut self, t: T, node: LocalPOS, e_id: ELID, lsn: u64) { - self.update_timestamp(t, node, e_id, lsn); + pub fn update_deletion_time(&mut self, t: T, node: LocalPOS, e_id: ELID) { + self.update_timestamp(t, node, e_id); } } diff --git a/db4-storage/src/pages/session.rs b/db4-storage/src/pages/session.rs index e2093f627b..a997343859 100644 --- a/db4-storage/src/pages/session.rs +++ b/db4-storage/src/pages/session.rs @@ -50,7 +50,6 @@ impl< src: impl Into, dst: impl Into, edge: MaybeNew, - lsn: u64, props: impl IntoIterator, ) { let src = src.into(); @@ -67,13 +66,13 @@ impl< let edge_max_page_len = writer.writer.get_or_create_layer(layer).max_page_len(); let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); - writer.add_edge(t, edge_pos, src, dst, props, layer, lsn); + writer.add_edge(t, edge_pos, src, dst, props, layer); } else { let mut writer = self.graph.edge_writer(e_id.edge); let edge_max_page_len = writer.writer.get_or_create_layer(layer).max_page_len(); let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); - writer.add_edge(t, edge_pos, src, dst, props, layer, lsn); + writer.add_edge(t, edge_pos, src, dst, props, layer); self.edge_writer = Some(writer); // Attach edge_writer to hold onto locks } @@ -88,18 +87,18 @@ impl< { self.node_writers .get_mut_src() - .add_outbound_edge(Some(t), src_pos, dst, edge_id, lsn); + .add_outbound_edge(Some(t), src_pos, dst, edge_id); self.node_writers .get_mut_dst() - .add_inbound_edge(Some(t), dst_pos, src, edge_id, lsn); + .add_inbound_edge(Some(t), dst_pos, src, edge_id); } self.node_writers .get_mut_src() - .update_timestamp(t, src_pos, e_id, lsn); + .update_timestamp(t, src_pos, e_id); self.node_writers .get_mut_dst() - .update_timestamp(t, dst_pos, e_id, lsn); + .update_timestamp(t, dst_pos, e_id); } pub fn delete_edge_from_layer( @@ -108,7 +107,6 @@ impl< src: impl Into, dst: impl Into, edge: MaybeNew, - lsn: u64, ) { let src = src.into(); let dst = dst.into(); @@ -124,13 +122,13 @@ impl< let edge_max_page_len = writer.writer.get_or_create_layer(layer).max_page_len(); let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); - writer.delete_edge(t, edge_pos, src, dst, layer, lsn); + writer.delete_edge(t, edge_pos, src, dst, layer); } else { let mut writer = self.graph.edge_writer(e_id.edge); let edge_max_page_len = writer.writer.get_or_create_layer(layer).max_page_len(); let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); - writer.delete_edge(t, edge_pos, src, dst, layer, lsn); + writer.delete_edge(t, edge_pos, src, dst, layer); self.edge_writer = Some(writer); // Attach edge_writer to hold onto locks } @@ -149,23 +147,21 @@ impl< src_pos, dst, edge_id, - lsn, ); self.node_writers.get_mut_dst().add_inbound_edge( Some(t), dst_pos, src, edge_id, - lsn, ); } self.node_writers .get_mut_src() - .update_deletion_time(t, src_pos, e_id, lsn); + .update_deletion_time(t, src_pos, e_id); self.node_writers .get_mut_dst() - .update_deletion_time(t, dst_pos, e_id, lsn); + .update_deletion_time(t, dst_pos, e_id); } } @@ -173,7 +169,6 @@ impl< &mut self, src: impl Into, dst: impl Into, - lsn: u64, ) -> MaybeNew { let src = src.into(); let dst = dst.into(); @@ -194,12 +189,12 @@ impl< let edge_writer = self.edge_writer.as_mut().unwrap(); let (_, edge_pos) = self.graph.edges().resolve_pos(e_id); - edge_writer.add_static_edge(Some(edge_pos), src, dst, lsn, Some(true)); + edge_writer.add_static_edge(Some(edge_pos), src, dst, Some(true)); MaybeNew::Existing(e_id) } else { let mut edge_writer = self.graph.get_free_writer(); - let edge_id = edge_writer.add_static_edge(None, src, dst, lsn, Some(false)); + let edge_id = edge_writer.add_static_edge(None, src, dst, Some(false)); let edge_id = edge_id.as_eid(edge_writer.segment_id(), self.graph.edges().max_page_len()); @@ -207,10 +202,10 @@ impl< self.node_writers .get_mut_src() - .add_static_outbound_edge(src_pos, dst, edge_id, lsn); + .add_static_outbound_edge(src_pos, dst, edge_id); self.node_writers .get_mut_dst() - .add_static_inbound_edge(dst_pos, src, edge_id, lsn); + .add_static_inbound_edge(dst_pos, src, edge_id); MaybeNew::New(edge_id) } diff --git a/db4-storage/src/pages/test_utils/checkers.rs b/db4-storage/src/pages/test_utils/checkers.rs index 0f701c88c3..ec3f9e521e 100644 --- a/db4-storage/src/pages/test_utils/checkers.rs +++ b/db4-storage/src/pages/test_utils/checkers.rs @@ -56,9 +56,10 @@ pub fn make_graph_from_edges< let layer_id = layer_id.unwrap_or(0); let mut session = graph.write_session(*src, *dst, None); - let eid = session.add_static_edge(*src, *dst, lsn); + session.set_lsn(lsn); + let eid = session.add_static_edge(*src, *dst); let elid = eid.map(|eid| eid.with_layer(layer_id)); - session.add_edge_into_layer(timestamp, *src, *dst, elid, lsn, []); + session.add_edge_into_layer(timestamp, *src, *dst, elid, []); Ok::<_, StorageError>(()) }) @@ -73,9 +74,10 @@ pub fn make_graph_from_edges< let layer_id = layer_id.unwrap_or(0); let mut session = graph.write_session(*src, *dst, None); - let eid = session.add_static_edge(*src, *dst, lsn); + session.set_lsn(lsn); + let eid = session.add_static_edge(*src, *dst); let elid = eid.map(|e| e.with_layer(layer_id)); - session.add_edge_into_layer(timestamp, *src, *dst, elid, lsn, []); + session.add_edge_into_layer(timestamp, *src, *dst, elid, []); Ok::<_, StorageError>(()) }) diff --git a/db4-storage/src/segments/edge.rs b/db4-storage/src/segments/edge.rs index 515f7cd829..aa32b0cbb3 100644 --- a/db4-storage/src/segments/edge.rs +++ b/db4-storage/src/segments/edge.rs @@ -215,12 +215,10 @@ impl MemEdgeSegment { dst: VID, layer_id: usize, props: impl IntoIterator, - lsn: u64, ) { // Ensure we have enough layers self.ensure_layer(layer_id); let est_size = self.layers[layer_id].est_size(); - self.layers[layer_id].set_lsn(lsn); let local_row = self.reserve_local_row(edge_pos, src, dst, layer_id); @@ -240,14 +238,12 @@ impl MemEdgeSegment { src: VID, dst: VID, layer_id: usize, - lsn: u64, ) { let t = TimeIndexEntry::new(t.t(), t.i()); // Ensure we have enough layers self.ensure_layer(layer_id); let est_size = self.layers[layer_id].est_size(); - self.layers[layer_id].set_lsn(lsn); let local_row = self.reserve_local_row(edge_pos, src, dst, layer_id); let props = self.layers[layer_id].properties_mut(); @@ -262,14 +258,12 @@ impl MemEdgeSegment { src: impl Into, dst: impl Into, layer_id: usize, - lsn: u64, ) { let src = src.into(); let dst = dst.into(); // Ensure we have enough layers self.ensure_layer(layer_id); - self.layers[layer_id].set_lsn(lsn); let est_size = self.layers[layer_id].est_size(); self.reserve_local_row(edge_pos, src, dst, layer_id); @@ -613,7 +607,6 @@ mod test { VID(2), 0, vec![(0, Prop::from("test1"))], - 1, ); segment.insert_edge_internal( @@ -623,7 +616,6 @@ mod test { VID(4), 0, vec![(0, Prop::from("test2"))], - 2, ); segment.insert_edge_internal( @@ -633,7 +625,6 @@ mod test { VID(6), 0, vec![(0, Prop::from("test3"))], - 3, ); // Verify edges exist @@ -763,7 +754,6 @@ mod test { VID(2), 0, vec![(0, Prop::from("test1"))], - 1, ); segment1.insert_edge_internal( TimeIndexEntry::new(2, 1), @@ -772,7 +762,6 @@ mod test { VID(4), 0, vec![(0, Prop::from("test2"))], - 1, ); segment1.insert_edge_internal( TimeIndexEntry::new(3, 2), @@ -781,7 +770,6 @@ mod test { VID(6), 0, vec![(0, Prop::from("test3"))], - 1, ); // Equivalent bulk insertion @@ -831,7 +819,6 @@ mod test { VID(2), 0, vec![(0, Prop::from("individual1"))], - 1, ); // Bulk insert some edges @@ -863,7 +850,6 @@ mod test { VID(8), 0, vec![(0, Prop::from("individual2"))], - 1, ); // Another bulk insert @@ -983,14 +969,13 @@ mod test { VID(2), 0, vec![(0, Prop::from("test"))], - 1, ); let est_size1 = segment.est_size(); assert!(est_size1 > 0); - segment.delete_edge_internal(TimeIndexEntry::new(2, 3), LocalPOS(0), VID(5), VID(3), 0, 0); + segment.delete_edge_internal(TimeIndexEntry::new(2, 3), LocalPOS(0), VID(5), VID(3), 0); let est_size2 = segment.est_size(); @@ -1007,7 +992,6 @@ mod test { VID(6), 0, vec![(0, Prop::from("test2"))], - 1, ); let est_size3 = segment.est_size(); @@ -1018,7 +1002,7 @@ mod test { // Insert a static edge - segment.insert_static_edge_internal(LocalPOS(1), 4, 6, 0, 1); + segment.insert_static_edge_internal(LocalPOS(1), 4, 6, 0); let est_size4 = segment.est_size(); assert_eq!( diff --git a/db4-storage/src/segments/mod.rs b/db4-storage/src/segments/mod.rs index 7f9b8688ef..70eed8cbc0 100644 --- a/db4-storage/src/segments/mod.rs +++ b/db4-storage/src/segments/mod.rs @@ -158,7 +158,6 @@ pub struct SegmentContainer { max_page_len: u32, properties: Properties, meta: Arc, - lsn: u64, } pub trait HasRow: Default + Send + Sync + Sized { @@ -175,7 +174,6 @@ impl SegmentContainer { max_page_len, properties: Default::default(), meta, - lsn: 0, } } @@ -275,16 +273,6 @@ impl SegmentContainer { self.segment_id } - #[inline(always)] - pub fn lsn(&self) -> u64 { - self.lsn - } - - #[inline(always)] - pub fn set_lsn(&mut self, lsn: u64) { - self.lsn = lsn; - } - pub fn len(&self) -> u32 { self.data.data.len() as u32 } diff --git a/db4-storage/src/segments/node.rs b/db4-storage/src/segments/node.rs index 2f4d52595e..1385bff972 100644 --- a/db4-storage/src/segments/node.rs +++ b/db4-storage/src/segments/node.rs @@ -206,14 +206,12 @@ impl MemNodeSegment { src_pos: LocalPOS, dst: impl Into, e_id: impl Into, - lsn: u64, ) -> (bool, usize) { let dst = dst.into(); let e_id = e_id.into(); let layer_id = e_id.layer(); let layer = self.get_or_create_layer(layer_id); let est_size = layer.est_size(); - layer.set_lsn(lsn); let add_out = layer.reserve_local_row(src_pos); let new_entry = add_out.is_new(); @@ -235,7 +233,6 @@ impl MemNodeSegment { dst_pos: impl Into, src: impl Into, e_id: impl Into, - lsn: u64, ) -> (bool, usize) { let src = src.into(); let e_id = e_id.into(); @@ -244,7 +241,6 @@ impl MemNodeSegment { let layer = self.get_or_create_layer(layer_id); let est_size = layer.est_size(); - layer.set_lsn(lsn); let add_in = layer.reserve_local_row(dst_pos); let new_entry = add_in.is_new(); @@ -275,12 +271,10 @@ impl MemNodeSegment { t: T, node_pos: LocalPOS, e_id: ELID, - lsn: u64, ) -> usize { let layer_id = e_id.layer(); let (est_size, row) = { let segment_container = self.get_or_create_layer(layer_id); //&mut self.layers[e_id.layer()]; - segment_container.set_lsn(lsn); let est_size = segment_container.est_size(); let row = segment_container.reserve_local_row(node_pos).inner().row(); (est_size, row) @@ -592,7 +586,7 @@ mod test { let est_size1 = segment.est_size(); assert_eq!(est_size1, 0); - writer.add_outbound_edge(Some(1), LocalPOS(1), VID(3), EID(7).with_layer(0), 0); + writer.add_outbound_edge(Some(1), LocalPOS(1), VID(3), EID(7).with_layer(0)); let est_size2 = segment.est_size(); assert!( @@ -600,7 +594,7 @@ mod test { "Estimated size should be greater than 0 after adding an edge" ); - writer.add_inbound_edge(Some(1), LocalPOS(2), VID(4), EID(8).with_layer(0), 0); + writer.add_inbound_edge(Some(1), LocalPOS(2), VID(4), EID(8).with_layer(0)); let est_size3 = segment.est_size(); assert!( @@ -610,7 +604,7 @@ mod test { // no change when adding the same edge again - writer.add_outbound_edge::(None, LocalPOS(1), VID(3), EID(7).with_layer(0), 0); + writer.add_outbound_edge::(None, LocalPOS(1), VID(3), EID(7).with_layer(0)); let est_size4 = segment.est_size(); assert_eq!( est_size4, est_size3, @@ -625,7 +619,7 @@ mod test { .unwrap() .inner(); - writer.update_c_props(LocalPOS(1), 0, [(prop_id, Prop::U64(73))], 0); + writer.update_c_props(LocalPOS(1), 0, [(prop_id, Prop::U64(73))]); let est_size5 = segment.est_size(); assert!( @@ -633,7 +627,7 @@ mod test { "Estimated size should increase after adding constant properties" ); - writer.update_timestamp(17, LocalPOS(1), ELID::new(EID(0), 0), 0); + writer.update_timestamp(17, LocalPOS(1), ELID::new(EID(0), 0)); let est_size6 = segment.est_size(); assert!( @@ -648,7 +642,7 @@ mod test { .unwrap() .inner(); - writer.add_props(42, LocalPOS(1), 0, [(prop_id, Prop::F64(4.13))], 0); + writer.add_props(42, LocalPOS(1), 0, [(prop_id, Prop::F64(4.13))]); let est_size7 = segment.est_size(); assert!( @@ -656,7 +650,7 @@ mod test { "Estimated size should increase after adding temporal properties" ); - writer.add_props(72, LocalPOS(1), 0, [(prop_id, Prop::F64(5.41))], 0); + writer.add_props(72, LocalPOS(1), 0, [(prop_id, Prop::F64(5.41))]); let est_size8 = segment.est_size(); assert!( est_size8 > est_size7, diff --git a/raphtory-storage/src/mutation/addition_ops.rs b/raphtory-storage/src/mutation/addition_ops.rs index 808f9ce04d..3264ed18e4 100644 --- a/raphtory-storage/src/mutation/addition_ops.rs +++ b/raphtory-storage/src/mutation/addition_ops.rs @@ -99,7 +99,6 @@ pub trait EdgeWriteLock: Send + Sync { &mut self, src: impl Into, dst: impl Into, - lsn: u64, ) -> MaybeNew; /// add edge update @@ -109,7 +108,6 @@ pub trait EdgeWriteLock: Send + Sync { src: impl Into, dst: impl Into, eid: MaybeNew, - lsn: u64, props: impl IntoIterator, ) -> MaybeNew; @@ -118,7 +116,6 @@ pub trait EdgeWriteLock: Send + Sync { t: TimeIndexEntry, src: impl Into, dst: impl Into, - lsn: u64, layer: usize, ) -> MaybeNew; diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index 0141fc6a53..c04ee79a31 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -42,9 +42,8 @@ impl<'a, EXT: PersistentStrategy, ES = ES>> EdgeWriteLock for &mut self, src: impl Into, dst: impl Into, - lsn: u64, ) -> MaybeNew { - self.static_session.add_static_edge(src, dst, lsn) + self.static_session.add_static_edge(src, dst) } fn internal_add_edge( @@ -53,11 +52,10 @@ impl<'a, EXT: PersistentStrategy, ES = ES>> EdgeWriteLock for src: impl Into, dst: impl Into, eid: MaybeNew, - lsn: u64, props: impl IntoIterator, ) -> MaybeNew { self.static_session - .add_edge_into_layer(t, src, dst, eid, lsn, props); + .add_edge_into_layer(t, src, dst, eid, props); eid } @@ -67,18 +65,17 @@ impl<'a, EXT: PersistentStrategy, ES = ES>> EdgeWriteLock for t: TimeIndexEntry, src: impl Into, dst: impl Into, - lsn: u64, layer: usize, ) -> MaybeNew { let src = src.into(); let dst = dst.into(); let eid = self .static_session - .add_static_edge(src, dst, lsn) + .add_static_edge(src, dst) .map(|eid| eid.with_layer_deletion(layer)); self.static_session - .delete_edge_from_layer(t, src, dst, eid, lsn); + .delete_edge_from_layer(t, src, dst, eid); eid } @@ -90,7 +87,7 @@ impl<'a, EXT: PersistentStrategy, ES = ES>> EdgeWriteLock for self.static_session .node_writers() .get_mut_src() - .update_c_props(pos, 0, [(NODE_ID_IDX, id.into())], 0); + .update_c_props(pos, 0, [(NODE_ID_IDX, id.into())]); }; } @@ -101,7 +98,7 @@ impl<'a, EXT: PersistentStrategy, ES = ES>> EdgeWriteLock for self.static_session .node_writers() .get_mut_dst() - .update_c_props(pos, 0, [(NODE_ID_IDX, id.into())], 0); + .update_c_props(pos, 0, [(NODE_ID_IDX, id.into())]); }; } @@ -261,7 +258,6 @@ impl InternalAdditionOps for TemporalGraph { local_pos, 0, node_info_as_props(id.as_gid_ref().left(), None), - 0, ); MaybeNew::Existing(0) } @@ -277,7 +273,6 @@ impl InternalAdditionOps for TemporalGraph { id.as_gid_ref().left(), Some(node_type_id.inner()).filter(|&id| id != 0), ), - 0, ); node_type_id } @@ -341,7 +336,7 @@ impl InternalAdditionOps for TemporalGraph { ) -> Result<(), Self::Error> { let (segment, node_pos) = self.storage().nodes().resolve_pos(v); let mut node_writer = self.storage().node_writer(segment); - node_writer.add_props(t, node_pos, 0, props, 0); + node_writer.add_props(t, node_pos, 0, props); Ok(()) } diff --git a/raphtory-storage/src/mutation/deletion_ops.rs b/raphtory-storage/src/mutation/deletion_ops.rs index 06b934cc3c..0a7b0a4b12 100644 --- a/raphtory-storage/src/mutation/deletion_ops.rs +++ b/raphtory-storage/src/mutation/deletion_ops.rs @@ -36,8 +36,9 @@ impl InternalDeletionOps for db4_graph::TemporalGraph { layer: usize, ) -> Result, Self::Error> { let mut session = self.storage().write_session(src, dst, None); - let edge = session.add_static_edge(src, dst, 0); - session.delete_edge_from_layer(t, src, dst, edge.map(|eid| eid.with_layer(layer)), 0); + session.set_lsn(0); + let edge = session.add_static_edge(src, dst); + session.delete_edge_from_layer(t, src, dst, edge.map(|eid| eid.with_layer(layer))); Ok(edge) } @@ -52,7 +53,7 @@ impl InternalDeletionOps for db4_graph::TemporalGraph { let (src, dst) = writer.get_edge(0, edge_pos).unwrap_or_else(|| { panic!("Internal Error: Edge {eid:?} not found in storage"); }); - writer.delete_edge(t, edge_pos, src, dst, layer, 0); + writer.delete_edge(t, edge_pos, src, dst, layer); Ok(()) } } diff --git a/raphtory-storage/src/mutation/property_addition_ops.rs b/raphtory-storage/src/mutation/property_addition_ops.rs index a10b04eac8..5c16f51bc0 100644 --- a/raphtory-storage/src/mutation/property_addition_ops.rs +++ b/raphtory-storage/src/mutation/property_addition_ops.rs @@ -83,7 +83,7 @@ impl InternalPropertyAdditionOps for db4_graph::TemporalGraph { let (segment_id, node_pos) = self.storage().nodes().resolve_pos(vid); let mut writer = self.storage().nodes().writer(segment_id); writer.check_metadata(node_pos, 0, &props)?; - writer.update_c_props(node_pos, 0, props, 0); + writer.update_c_props(node_pos, 0, props); Ok(writer) } @@ -94,7 +94,7 @@ impl InternalPropertyAdditionOps for db4_graph::TemporalGraph { ) -> Result, Self::Error> { let (segment_id, node_pos) = self.storage().nodes().resolve_pos(vid); let mut writer = self.storage().nodes().writer(segment_id); - writer.update_c_props(node_pos, 0, props, 0); + writer.update_c_props(node_pos, 0, props); Ok(writer) } diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index b10b0435f1..2dce0acd74 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -295,7 +295,7 @@ impl> + StaticGraphViewOps + Dura // NOTE: We log edge id after it is inserted into the edge segment. // This is fine as long as we hold onto the edge segment lock through add_edge_op // for the entire operation. - let edge_id = add_edge_op.internal_add_static_edge(src_id, dst_id, 0); + let edge_id = add_edge_op.internal_add_static_edge(src_id, dst_id); // All names, ids and values have been generated for this operation. // Create a wal entry to mark it as durable. @@ -325,7 +325,6 @@ impl> + StaticGraphViewOps + Dura src_id, dst_id, edge_id.map(|eid| eid.with_layer(layer_id)), - 0, props, ); diff --git a/raphtory/src/db/api/mutation/deletion_ops.rs b/raphtory/src/db/api/mutation/deletion_ops.rs index e25b1ca190..8157040213 100644 --- a/raphtory/src/db/api/mutation/deletion_ops.rs +++ b/raphtory/src/db/api/mutation/deletion_ops.rs @@ -51,7 +51,7 @@ pub trait DeletionOps: .atomic_add_edge(src_id, dst_id, None, layer_id) .map_err(into_graph_err)?; - let edge_id = add_edge_op.internal_delete_edge(ti, src_id, dst_id, 0, layer_id); + let edge_id = add_edge_op.internal_delete_edge(ti, src_id, dst_id, layer_id); add_edge_op.store_src_node_info(src_id, src.as_node_ref().as_gid_ref().left()); add_edge_op.store_dst_node_info(dst_id, dst.as_node_ref().as_gid_ref().left()); diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index d58db03725..72c31d5d80 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -302,9 +302,8 @@ impl EdgeWriteLock for AtomicAddEdgeSession<'_> { &mut self, src: impl Into, dst: impl Into, - lsn: u64, ) -> MaybeNew { - self.session.internal_add_static_edge(src, dst, lsn) + self.session.internal_add_static_edge(src, dst) } fn internal_add_edge( @@ -313,11 +312,10 @@ impl EdgeWriteLock for AtomicAddEdgeSession<'_> { src: impl Into, dst: impl Into, e_id: MaybeNew, - lsn: u64, props: impl IntoIterator, ) -> MaybeNew { self.session - .internal_add_edge(t, src, dst, e_id, lsn, props) + .internal_add_edge(t, src, dst, e_id, props) } fn internal_delete_edge( @@ -325,10 +323,9 @@ impl EdgeWriteLock for AtomicAddEdgeSession<'_> { t: TimeIndexEntry, src: impl Into, dst: impl Into, - lsn: u64, layer: usize, ) -> MaybeNew { - self.session.internal_delete_edge(t, src, dst, lsn, layer) + self.session.internal_delete_edge(t, src, dst, layer) } fn store_src_node_info(&mut self, id: impl Into, node_id: Option) { diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index 3d519ad20f..54a4ce452a 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -351,17 +351,16 @@ impl<'graph, G: GraphView + 'graph> GraphViewOps<'graph> for G { 0, gid.as_ref(), new_type_id, - 0, ); } else { - writer.store_node_id(node_pos, 0, gid.as_ref(), 0); + writer.store_node_id(node_pos, 0, gid.as_ref()); } graph_storage .write_session()? .set_node(gid.as_ref(), new_id)?; for (t, row) in node.rows() { - writer.add_props(t, node_pos, 0, row, 0); + writer.add_props(t, node_pos, 0, row); } writer.update_c_props( @@ -369,7 +368,6 @@ impl<'graph, G: GraphView + 'graph> GraphViewOps<'graph> for G { 0, node.metadata_ids() .filter_map(|id| node.get_metadata(id).map(|prop| (id, prop))), - 0, ); } } @@ -390,13 +388,13 @@ impl<'graph, G: GraphView + 'graph> GraphViewOps<'graph> for G { if let Some(edge_pos) = shard.resolve_pos(eid) { let mut writer = shard.writer(); // make the edge for the first time - writer.add_static_edge(Some(edge_pos), src, dst, 0, Some(false)); + writer.add_static_edge(Some(edge_pos), src, dst, Some(false)); for edge in edge.explode_layers() { let layer = layer_map[edge.edge.layer().unwrap()]; for edge in edge.explode() { let t = edge.edge.time().unwrap(); - writer.add_edge(t, edge_pos, src, dst, [], layer, 0); + writer.add_edge(t, edge_pos, src, dst, [], layer); } //TODO: move this in edge.row() for (t, t_props) in edge @@ -416,7 +414,7 @@ impl<'graph, G: GraphView + 'graph> GraphViewOps<'graph> for G { let props = t_props .map(|(_, prop_id, prop)| (prop_id, prop)) .collect::>(); - writer.add_edge(t, edge_pos, src, dst, props, layer, 0); + writer.add_edge(t, edge_pos, src, dst, props, layer); } writer.update_c_props( edge_pos, @@ -437,7 +435,7 @@ impl<'graph, G: GraphView + 'graph> GraphViewOps<'graph> for G { self.layer_ids(), ) { let layer = layer_map[layer]; - writer.delete_edge(t, edge_pos, src, dst, layer, 0); + writer.delete_edge(t, edge_pos, src, dst, layer); } } } @@ -454,12 +452,12 @@ impl<'graph, G: GraphView + 'graph> GraphViewOps<'graph> for G { if let Some(node_pos) = maybe_src_pos { let mut writer = shard.writer(); - writer.add_static_outbound_edge(node_pos, dst_id, eid, 0); + writer.add_static_outbound_edge(node_pos, dst_id, eid); } if let Some(node_pos) = maybe_dst_pos { let mut writer = shard.writer(); - writer.add_static_inbound_edge(node_pos, src_id, eid, 0); + writer.add_static_inbound_edge(node_pos, src_id, eid); } for e in edge.explode_layers() { @@ -471,7 +469,6 @@ impl<'graph, G: GraphView + 'graph> GraphViewOps<'graph> for G { node_pos, dst_id, eid.with_layer(layer), - 0, ); } if let Some(node_pos) = maybe_dst_pos { @@ -481,7 +478,6 @@ impl<'graph, G: GraphView + 'graph> GraphViewOps<'graph> for G { node_pos, src_id, eid.with_layer(layer), - 0, ); } } @@ -492,14 +488,14 @@ impl<'graph, G: GraphView + 'graph> GraphViewOps<'graph> for G { let t = e.time_and_index().expect("exploded edge should have time"); let l = layer_map[e.edge.layer().unwrap()]; - writer.update_timestamp(t, node_pos, eid.with_layer(l), 0); + writer.update_timestamp(t, node_pos, eid.with_layer(l)); } if let Some(node_pos) = maybe_dst_pos { let mut writer = shard.writer(); let t = e.time_and_index().expect("exploded edge should have time"); let l = layer_map[e.edge.layer().unwrap()]; - writer.update_timestamp(t, node_pos, eid.with_layer(l), 0); + writer.update_timestamp(t, node_pos, eid.with_layer(l)); } } @@ -513,11 +509,11 @@ impl<'graph, G: GraphView + 'graph> GraphViewOps<'graph> for G { let layer = layer_map[layer]; if let Some(node_pos) = maybe_src_pos { let mut writer = shard.writer(); - writer.update_timestamp(t, node_pos, eid.with_layer_deletion(layer), 0); + writer.update_timestamp(t, node_pos, eid.with_layer_deletion(layer)); } if let Some(node_pos) = maybe_dst_pos { let mut writer = shard.writer(); - writer.update_timestamp(t, node_pos, eid.with_layer_deletion(layer), 0); + writer.update_timestamp(t, node_pos, eid.with_layer_deletion(layer)); } } } diff --git a/raphtory/src/db/graph/edge.rs b/raphtory/src/db/graph/edge.rs index c11781c251..1fa9de0f73 100644 --- a/raphtory/src/db/graph/edge.rs +++ b/raphtory/src/db/graph/edge.rs @@ -446,7 +446,6 @@ impl EdgeView { src, dst, MaybeNew::New(e_id.with_layer(layer_id)), - 0, props, ); diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index 2f8e5e0594..bcdb01334a 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -204,19 +204,18 @@ pub fn load_nodes_from_df< let mut writer = shard.writer(); let t = TimeIndexEntry(time, secondary_index); let layer_id = STATIC_GRAPH_LAYER_ID; - let lsn = 0; update_time(t); writer - .store_node_id_and_node_type(mut_node, layer_id, gid, *node_type, lsn); + .store_node_id_and_node_type(mut_node, layer_id, gid, *node_type); let t_props = prop_cols.iter_row(row); let c_props = metadata_cols .iter_row(row) .chain(shared_metadata.iter().cloned()); - writer.add_props(t, mut_node, layer_id, t_props, lsn); - writer.update_c_props(mut_node, layer_id, c_props, lsn); + writer.add_props(t, mut_node, layer_id, t_props); + writer.update_c_props(mut_node, layer_id, c_props); }; } @@ -469,7 +468,7 @@ pub fn load_edges_from_df Date: Thu, 4 Dec 2025 18:19:10 -0500 Subject: [PATCH 10/95] Implement basic add_edge replay --- db4-storage/src/wal/mod.rs | 2 +- .../src/mutation/property_addition_ops.rs | 7 +++ raphtory/src/db/replay/mod.rs | 63 ++++++++++++++----- 3 files changed, 56 insertions(+), 16 deletions(-) diff --git a/db4-storage/src/wal/mod.rs b/db4-storage/src/wal/mod.rs index 36772e704c..d84e506972 100644 --- a/db4-storage/src/wal/mod.rs +++ b/db4-storage/src/wal/mod.rs @@ -90,6 +90,6 @@ pub trait GraphReplayer { eid: EID, layer_name: Option<&str>, layer_id: usize, - props: &[MaybeNew<(PN, usize, Prop)>], + props: Vec>, ) -> Result<(), StorageError>; } diff --git a/raphtory-storage/src/mutation/property_addition_ops.rs b/raphtory-storage/src/mutation/property_addition_ops.rs index 5c16f51bc0..741be2e6b1 100644 --- a/raphtory-storage/src/mutation/property_addition_ops.rs +++ b/raphtory-storage/src/mutation/property_addition_ops.rs @@ -13,29 +13,36 @@ use storage::Extension; pub trait InternalPropertyAdditionOps { type Error: From; + fn internal_add_properties( &self, t: TimeIndexEntry, props: &[(usize, Prop)], ) -> Result<(), Self::Error>; + fn internal_add_metadata(&self, props: &[(usize, Prop)]) -> Result<(), Self::Error>; + fn internal_update_metadata(&self, props: &[(usize, Prop)]) -> Result<(), Self::Error>; + fn internal_add_node_metadata( &self, vid: VID, props: Vec<(usize, Prop)>, ) -> Result, Self::Error>; + fn internal_update_node_metadata( &self, vid: VID, props: Vec<(usize, Prop)>, ) -> Result, Self::Error>; + fn internal_add_edge_metadata( &self, eid: EID, layer: usize, props: Vec<(usize, Prop)>, ) -> Result, Self::Error>; + fn internal_update_edge_metadata( &self, eid: EID, diff --git a/raphtory/src/db/replay/mod.rs b/raphtory/src/db/replay/mod.rs index b733c1b46a..7e9f501c5d 100644 --- a/raphtory/src/db/replay/mod.rs +++ b/raphtory/src/db/replay/mod.rs @@ -1,26 +1,31 @@ -use db4_graph::TemporalGraph; +use crate::db::api::{ + storage::{graph, storage::Storage}, + view::internal::{Base, InternalStorageOps}, + }; use raphtory_api::core::{ entities::{properties::prop::Prop, EID, GID, VID}, storage::{dict_mapper::MaybeNew, timeindex::TimeIndexEntry}, }; +use raphtory_core::entities::GidRef; +use raphtory_storage::{core_ops::CoreGraphOps, mutation::addition_ops::{EdgeWriteLock, InternalAdditionOps}}; use storage::{ api::edges::EdgeSegmentOps, error::StorageError, wal::{GraphReplayer, TransactionID, LSN}, - Extension, }; +use storage::resolver::GIDResolverOps; -/// Wrapper struct for implementing `GraphReplayer` for a `TemporalGraph`. +/// Wrapper struct for implementing `GraphReplayer` for a `Storage`. /// This is needed to workaround Rust's orphan rule since both `GraphReplayer` -/// and `TemporalGraph` are foreign to this crate. +/// and `Storage` are foreign to this crate. #[derive(Debug)] pub struct ReplayGraph { - graph: TemporalGraph, + storage: Storage, } impl ReplayGraph { - pub fn new(graph: TemporalGraph) -> Self { - Self { graph } + pub fn new(graph: Storage) -> Self { + Self { storage: graph } } } @@ -37,18 +42,46 @@ impl GraphReplayer for ReplayGraph { eid: EID, layer_name: Option<&str>, layer_id: usize, - props: &[MaybeNew<(PN, usize, Prop)>], + props_with_status: Vec>, ) -> Result<(), StorageError> { - let edge_segment = self.graph.storage().edges().get_edge_segment(eid); + // TODO: Check max lsn on disk to see if this record should be replayed. - match edge_segment { - Some(edge_segment) => { - edge_segment.head().lsn(); - } - _ => {} + let storage = self.storage.get_storage() + .ok_or_else(|| StorageError::GenericFailure("Storage not available during replay".to_string()))?; + + let temporal_graph = storage.core_graph().mutable().unwrap(); + + // 1. Insert prop ids into edge meta. + // No need to validate props again since they are already validated before + // being logged to the WAL. + let edge_meta = temporal_graph.edge_meta(); + let mut prop_ids = Vec::new(); + + for prop in props_with_status.into_iter() { + let (prop_name, prop_id, prop_value) = prop.inner(); + let prop_mapper = edge_meta.temporal_prop_mapper(); + + prop_mapper.set_id_and_dtype(prop_name.as_ref(), prop_id, prop_value.dtype()); + prop_ids.push((prop_id, prop_value)); } - // TODO: Check max lsn on disk to see if replay is needed. + // 2. Insert node ids into resolver. + temporal_graph.logical_to_physical.set(GidRef::from(&src_name), src_id)?; + temporal_graph.logical_to_physical.set(GidRef::from(&dst_name), dst_id)?; + + // 3. Insert layer id into the layer meta of both edge and node. + let node_meta = temporal_graph.node_meta(); + + edge_meta.layer_meta().set_id(layer_name.unwrap_or("_default"), layer_id); + node_meta.layer_meta().set_id(layer_name.unwrap_or("_default"), layer_id); + + // 4. Grab src, dst and edge segment locks and add the edge. + let mut add_edge_op = temporal_graph.atomic_add_edge(src_id, dst_id, Some(eid), layer_id).unwrap(); + + let edge_id = add_edge_op.internal_add_static_edge(src_id, dst_id); + let edge_id_with_layer = edge_id.map(|eid| eid.with_layer(layer_id)); + + add_edge_op.internal_add_edge(t, src_id, dst_id, edge_id_with_layer, prop_ids); Ok(()) } From fa11c7ad55de1d7c3a63759ce65f9d556c3cbe69 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Fri, 5 Dec 2025 18:10:58 -0500 Subject: [PATCH 11/95] Simplify GraphWal --- db4-storage/src/wal/entry.rs | 9 +++------ db4-storage/src/wal/mod.rs | 12 ++++++------ raphtory/src/db/api/mutation/addition_ops.rs | 11 ++++++++++- raphtory/src/db/replay/mod.rs | 17 ++++++++--------- 4 files changed, 27 insertions(+), 22 deletions(-) diff --git a/db4-storage/src/wal/entry.rs b/db4-storage/src/wal/entry.rs index d6cd68ebb4..def51d8bf4 100644 --- a/db4-storage/src/wal/entry.rs +++ b/db4-storage/src/wal/entry.rs @@ -1,9 +1,6 @@ use std::path::Path; -use raphtory_api::core::{ - entities::properties::prop::Prop, - storage::dict_mapper::MaybeNew, -}; +use raphtory_api::core::entities::properties::prop::Prop; use raphtory_core::{ entities::{EID, GID, VID}, storage::timeindex::TimeIndexEntry, @@ -17,7 +14,7 @@ use crate::{ impl GraphWal for NoWal { type ReplayEntry = (); - fn log_add_edge>( + fn log_add_edge( &self, _transaction_id: TransactionID, _t: TimeIndexEntry, @@ -28,7 +25,7 @@ impl GraphWal for NoWal { _eid: EID, _layer_name: Option<&str>, _layer_id: usize, - _props: &[MaybeNew<(PN, usize, Prop)>], + _props: Vec<(&str, usize, Prop)>, ) -> Result { Ok(0) } diff --git a/db4-storage/src/wal/mod.rs b/db4-storage/src/wal/mod.rs index d84e506972..37c86f2425 100644 --- a/db4-storage/src/wal/mod.rs +++ b/db4-storage/src/wal/mod.rs @@ -1,5 +1,5 @@ use crate::error::StorageError; -use raphtory_api::core::{entities::properties::prop::Prop, storage::dict_mapper::MaybeNew}; +use raphtory_api::core::entities::properties::prop::Prop; use raphtory_core::{ entities::{EID, GID, VID}, storage::timeindex::TimeIndexEntry, @@ -46,7 +46,7 @@ pub trait GraphWal { /// ReplayEntry represents the type of the wal entry returned during replay. type ReplayEntry; - fn log_add_edge>( + fn log_add_edge( &self, transaction_id: TransactionID, t: TimeIndexEntry, @@ -57,7 +57,7 @@ pub trait GraphWal { eid: EID, layer_name: Option<&str>, layer_id: usize, - props: &[MaybeNew<(PN, usize, Prop)>], + props: Vec<(&str, usize, Prop)>, ) -> Result; /// Logs a checkpoint record, indicating that all Wal operations upto and including @@ -78,7 +78,7 @@ pub trait GraphWal { /// Trait for defining callbacks for replaying from wal. pub trait GraphReplayer { - fn replay_add_edge>( + fn replay_add_edge( &self, lsn: LSN, transaction_id: TransactionID, @@ -88,8 +88,8 @@ pub trait GraphReplayer { dst_name: GID, dst_id: VID, eid: EID, - layer_name: Option<&str>, + layer_name: Option, layer_id: usize, - props: Vec>, + props: Vec<(String, usize, Prop)>, ) -> Result<(), StorageError>; } diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index 2dce0acd74..769eb14e32 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -299,6 +299,15 @@ impl> + StaticGraphViewOps + Dura // All names, ids and values have been generated for this operation. // Create a wal entry to mark it as durable. + + let props_for_wal = props_with_status + .iter() + .map(|maybe_new| { + let (prop_name, prop_id, prop) = maybe_new.as_ref().inner(); + (prop_name.as_ref(), *prop_id, prop.clone()) + }) + .collect::>(); + let lsn = self.wal().log_add_edge( transaction_id, ti, @@ -309,7 +318,7 @@ impl> + StaticGraphViewOps + Dura edge_id.inner(), layer, layer_id, - &props_with_status, + props_for_wal, ).unwrap(); let props = props_with_status diff --git a/raphtory/src/db/replay/mod.rs b/raphtory/src/db/replay/mod.rs index 7e9f501c5d..4d3d8a6601 100644 --- a/raphtory/src/db/replay/mod.rs +++ b/raphtory/src/db/replay/mod.rs @@ -4,7 +4,7 @@ use crate::db::api::{ }; use raphtory_api::core::{ entities::{properties::prop::Prop, EID, GID, VID}, - storage::{dict_mapper::MaybeNew, timeindex::TimeIndexEntry}, + storage::timeindex::TimeIndexEntry, }; use raphtory_core::entities::GidRef; use raphtory_storage::{core_ops::CoreGraphOps, mutation::addition_ops::{EdgeWriteLock, InternalAdditionOps}}; @@ -30,7 +30,7 @@ impl ReplayGraph { } impl GraphReplayer for ReplayGraph { - fn replay_add_edge>( + fn replay_add_edge( &self, lsn: LSN, transaction_id: TransactionID, @@ -40,9 +40,9 @@ impl GraphReplayer for ReplayGraph { dst_name: GID, dst_id: VID, eid: EID, - layer_name: Option<&str>, + layer_name: Option, layer_id: usize, - props_with_status: Vec>, + props: Vec<(String, usize, Prop)>, ) -> Result<(), StorageError> { // TODO: Check max lsn on disk to see if this record should be replayed. @@ -57,11 +57,10 @@ impl GraphReplayer for ReplayGraph { let edge_meta = temporal_graph.edge_meta(); let mut prop_ids = Vec::new(); - for prop in props_with_status.into_iter() { - let (prop_name, prop_id, prop_value) = prop.inner(); + for (prop_name, prop_id, prop_value) in props.into_iter() { let prop_mapper = edge_meta.temporal_prop_mapper(); - prop_mapper.set_id_and_dtype(prop_name.as_ref(), prop_id, prop_value.dtype()); + prop_mapper.set_id_and_dtype(prop_name, prop_id, prop_value.dtype()); prop_ids.push((prop_id, prop_value)); } @@ -72,8 +71,8 @@ impl GraphReplayer for ReplayGraph { // 3. Insert layer id into the layer meta of both edge and node. let node_meta = temporal_graph.node_meta(); - edge_meta.layer_meta().set_id(layer_name.unwrap_or("_default"), layer_id); - node_meta.layer_meta().set_id(layer_name.unwrap_or("_default"), layer_id); + edge_meta.layer_meta().set_id(layer_name.as_deref().unwrap_or("_default"), layer_id); + node_meta.layer_meta().set_id(layer_name.as_deref().unwrap_or("_default"), layer_id); // 4. Grab src, dst and edge segment locks and add the edge. let mut add_edge_op = temporal_graph.atomic_add_edge(src_id, dst_id, Some(eid), layer_id).unwrap(); From aa73139a7479d19e4d7a44762290cac5ac4dc827 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Wed, 10 Dec 2025 13:51:46 +0400 Subject: [PATCH 12/95] Remove wrapper for graph replay --- db4-storage/src/wal/entry.rs | 4 ++-- db4-storage/src/wal/mod.rs | 4 ++-- raphtory/src/db/replay/mod.rs | 20 +++----------------- 3 files changed, 7 insertions(+), 21 deletions(-) diff --git a/db4-storage/src/wal/entry.rs b/db4-storage/src/wal/entry.rs index def51d8bf4..7b0b0e6745 100644 --- a/db4-storage/src/wal/entry.rs +++ b/db4-storage/src/wal/entry.rs @@ -8,7 +8,7 @@ use raphtory_core::{ use crate::{ error::StorageError, - wal::{GraphReplayer, GraphWal, LSN, TransactionID, no_wal::NoWal}, + wal::{GraphReplay, GraphWal, LSN, TransactionID, no_wal::NoWal}, }; impl GraphWal for NoWal { @@ -40,7 +40,7 @@ impl GraphWal for NoWal { std::iter::once(Ok((0, ()))) } - fn replay_to_graph( + fn replay_to_graph( _dir: impl AsRef, _graph: &mut G, ) -> Result<(), StorageError> { diff --git a/db4-storage/src/wal/mod.rs b/db4-storage/src/wal/mod.rs index 37c86f2425..5677961089 100644 --- a/db4-storage/src/wal/mod.rs +++ b/db4-storage/src/wal/mod.rs @@ -70,14 +70,14 @@ pub trait GraphWal { ) -> impl Iterator>; /// Replays and applies all the wal entries in the given directory to the given graph. - fn replay_to_graph( + fn replay_to_graph( dir: impl AsRef, graph: &mut G, ) -> Result<(), StorageError>; } /// Trait for defining callbacks for replaying from wal. -pub trait GraphReplayer { +pub trait GraphReplay { fn replay_add_edge( &self, lsn: LSN, diff --git a/raphtory/src/db/replay/mod.rs b/raphtory/src/db/replay/mod.rs index 4d3d8a6601..1ad99a8ca7 100644 --- a/raphtory/src/db/replay/mod.rs +++ b/raphtory/src/db/replay/mod.rs @@ -11,25 +11,11 @@ use raphtory_storage::{core_ops::CoreGraphOps, mutation::addition_ops::{EdgeWrit use storage::{ api::edges::EdgeSegmentOps, error::StorageError, - wal::{GraphReplayer, TransactionID, LSN}, + wal::{GraphReplay, TransactionID, LSN}, }; use storage::resolver::GIDResolverOps; -/// Wrapper struct for implementing `GraphReplayer` for a `Storage`. -/// This is needed to workaround Rust's orphan rule since both `GraphReplayer` -/// and `Storage` are foreign to this crate. -#[derive(Debug)] -pub struct ReplayGraph { - storage: Storage, -} - -impl ReplayGraph { - pub fn new(graph: Storage) -> Self { - Self { storage: graph } - } -} - -impl GraphReplayer for ReplayGraph { +impl GraphReplay for Storage { fn replay_add_edge( &self, lsn: LSN, @@ -46,7 +32,7 @@ impl GraphReplayer for ReplayGraph { ) -> Result<(), StorageError> { // TODO: Check max lsn on disk to see if this record should be replayed. - let storage = self.storage.get_storage() + let storage = self.get_storage() .ok_or_else(|| StorageError::GenericFailure("Storage not available during replay".to_string()))?; let temporal_graph = storage.core_graph().mutable().unwrap(); From 6944a6fc0c357b061181df46bc6dc3e1770854f3 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Wed, 10 Dec 2025 14:17:34 +0400 Subject: [PATCH 13/95] Fix leftover merge issues --- db4-graph/src/lib.rs | 3 ++- db4-storage/src/segments/edge/segment.rs | 3 ++- db4-storage/src/segments/graph_prop/segment.rs | 16 ++++++++++++---- db4-storage/src/segments/node/segment.rs | 18 ++++++------------ .../src/mutation/addition_ops_ext.rs | 3 ++- 5 files changed, 24 insertions(+), 19 deletions(-) diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index 4dd2352b4e..babf9343a1 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -23,7 +23,8 @@ use storage::{ }, persist::strategy::{Config, PersistentStrategy}, resolver::GIDResolverOps, - Extension, GIDResolver, Layer, ReadLockedLayer, transaction::TransactionManager, WalImpl, ES, NS, + Extension, GIDResolver, Layer, ReadLockedLayer, transaction::TransactionManager, + WalImpl, ES, NS, GS, wal::Wal, }; use tempfile::TempDir; diff --git a/db4-storage/src/segments/edge/segment.rs b/db4-storage/src/segments/edge/segment.rs index 9ab1814c35..f3dbf64a4d 100644 --- a/db4-storage/src/segments/edge/segment.rs +++ b/db4-storage/src/segments/edge/segment.rs @@ -10,6 +10,7 @@ use crate::{ edge::entry::{MemEdgeEntry, MemEdgeRef}, }, utils::Iter4, + wal::LSN, }; use arrow_array::{ArrayRef, BooleanArray}; use parking_lot::lock_api::ArcRwLockReadGuard; @@ -53,7 +54,7 @@ impl HasRow for EdgeEntry { pub struct MemEdgeSegment { layers: Vec>, est_size: usize, - lsn: u64, + lsn: LSN, } impl>> From for MemEdgeSegment { diff --git a/db4-storage/src/segments/graph_prop/segment.rs b/db4-storage/src/segments/graph_prop/segment.rs index 3c17fa7fa9..2636ce8d9a 100644 --- a/db4-storage/src/segments/graph_prop/segment.rs +++ b/db4-storage/src/segments/graph_prop/segment.rs @@ -1,7 +1,5 @@ use crate::{ - LocalPOS, - error::StorageError, - segments::{HasRow, SegmentContainer}, + error::StorageError, segments::{HasRow, SegmentContainer}, wal::LSN, LocalPOS }; use raphtory_api::core::entities::properties::{meta::Meta, prop::Prop}; use raphtory_core::{ @@ -15,6 +13,7 @@ use std::sync::Arc; pub struct MemGraphPropSegment { /// Layers containing graph properties and metadata. layers: Vec>, + lsn: LSN, } /// A unit-like struct for use with `SegmentContainer`. @@ -49,6 +48,7 @@ impl MemGraphPropSegment { Self { layers: vec![SegmentContainer::new(segment_id, max_page_len, meta)], + lsn: 0, } } @@ -83,7 +83,15 @@ impl MemGraphPropSegment { pub fn take(&mut self) -> Self { let layers = self.layers.iter_mut().map(|layer| layer.take()).collect(); - Self { layers } + Self { layers, lsn: self.lsn } + } + + pub fn lsn(&self) -> LSN { + self.lsn + } + + pub fn set_lsn(&mut self, lsn: LSN) { + self.lsn = lsn; } pub fn add_properties( diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs index b2836ccb61..c6dabc24f4 100644 --- a/db4-storage/src/segments/node/segment.rs +++ b/db4-storage/src/segments/node/segment.rs @@ -1,13 +1,7 @@ use crate::{ - LocalPOS, - api::nodes::{LockedNSSegment, NodeSegmentOps}, - error::StorageError, - loop_lock_write, - persist::strategy::PersistentStrategy, - segments::{ - HasRow, SegmentContainer, - node::entry::{MemNodeEntry, MemNodeRef}, - }, + api::nodes::{LockedNSSegment, NodeSegmentOps}, error::StorageError, loop_lock_write, persist::strategy::PersistentStrategy, segments::{ + node::entry::{MemNodeEntry, MemNodeRef}, HasRow, SegmentContainer + }, wal::LSN, LocalPOS }; use either::Either; use parking_lot::lock_api::ArcRwLockReadGuard; @@ -36,7 +30,7 @@ pub struct MemNodeSegment { segment_id: usize, max_page_len: u32, layers: Vec>, - lsn: u64, + lsn: LSN, } impl>> From for MemNodeSegment { @@ -143,11 +137,11 @@ impl MemNodeSegment { self.get_adj(n, layer_id).map_or(0, |adj| adj.degree(dir)) } - pub fn lsn(&self) -> u64 { + pub fn lsn(&self) -> LSN { self.lsn } - pub fn set_lsn(&mut self, lsn: u64) { + pub fn set_lsn(&mut self, lsn: LSN) { if lsn > self.lsn { self.lsn = lsn; } diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index 13ca4f7e57..f9d1cee68b 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -24,7 +24,8 @@ use storage::{ persist::strategy::PersistentStrategy, properties::props_meta_writer::PropsMetaWriter, resolver::GIDResolverOps, - Extension, transaction::TransactionManager, WalImpl, ES, NS, + Extension, transaction::TransactionManager, WalImpl, ES, NS, GS, + wal::LSN, }; pub struct WriteS<'a, EXT: PersistentStrategy, ES = ES, GS = GS>> { From 65be87aca1a79dc002b2d08b8e8d8554f31cf1e4 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Wed, 10 Dec 2025 17:31:36 +0400 Subject: [PATCH 14/95] Change mark_dirty to set_dirty --- db4-storage/src/api/edges.rs | 3 +-- db4-storage/src/api/graph_props.rs | 2 +- db4-storage/src/api/nodes.rs | 2 +- db4-storage/src/pages/edge_store.rs | 2 +- db4-storage/src/pages/graph_prop_page/writer.rs | 4 ++-- db4-storage/src/pages/locked/graph_props.rs | 4 ++-- db4-storage/src/pages/node_store.rs | 2 +- db4-storage/src/segments/edge/segment.rs | 2 +- db4-storage/src/segments/graph_prop/mod.rs | 4 ++-- db4-storage/src/segments/node/segment.rs | 2 +- 10 files changed, 13 insertions(+), 14 deletions(-) diff --git a/db4-storage/src/api/edges.rs b/db4-storage/src/api/edges.rs index 61136444cd..905f6ed64b 100644 --- a/db4-storage/src/api/edges.rs +++ b/db4-storage/src/api/edges.rs @@ -58,8 +58,7 @@ pub trait EdgeSegmentOps: Send + Sync + std::fmt::Debug + 'static { fn try_head_mut(&self) -> Option>; - /// mark segment as dirty without triggering a write - fn mark_dirty(&self); + fn set_dirty(&self, dirty: bool); /// notify that an edge was added (might need to write to disk) fn notify_write( diff --git a/db4-storage/src/api/graph_props.rs b/db4-storage/src/api/graph_props.rs index 768aa8b123..a06ab76acc 100644 --- a/db4-storage/src/api/graph_props.rs +++ b/db4-storage/src/api/graph_props.rs @@ -29,7 +29,7 @@ where fn est_size(&self) -> usize; - fn mark_dirty(&self); + fn set_dirty(&self, dirty: bool); fn notify_write( &self, diff --git a/db4-storage/src/api/nodes.rs b/db4-storage/src/api/nodes.rs index ebea776c8a..9f9b2c2283 100644 --- a/db4-storage/src/api/nodes.rs +++ b/db4-storage/src/api/nodes.rs @@ -94,7 +94,7 @@ pub trait NodeSegmentOps: Send + Sync + std::fmt::Debug + 'static { head_lock: impl DerefMut, ) -> Result<(), StorageError>; - fn mark_dirty(&self); + fn set_dirty(&self, dirty: bool); fn check_node(&self, pos: LocalPOS, layer_id: usize) -> bool; diff --git a/db4-storage/src/pages/edge_store.rs b/db4-storage/src/pages/edge_store.rs index 71ed0d1be3..eaff61c5d2 100644 --- a/db4-storage/src/pages/edge_store.rs +++ b/db4-storage/src/pages/edge_store.rs @@ -144,7 +144,7 @@ impl, EXT: Config> EdgeStorageInner .properties_mut() .set_has_properties() } - segment.mark_dirty(); + segment.set_dirty(true); } empty } diff --git a/db4-storage/src/pages/graph_prop_page/writer.rs b/db4-storage/src/pages/graph_prop_page/writer.rs index f2c89064b3..612a1be9cc 100644 --- a/db4-storage/src/pages/graph_prop_page/writer.rs +++ b/db4-storage/src/pages/graph_prop_page/writer.rs @@ -33,7 +33,7 @@ impl<'a, GS: GraphPropSegmentOps> GraphPropWriter<'a, GS> { let add = self.mem_segment.add_properties(t, props); self.graph_props.increment_est_size(add); - self.graph_props.mark_dirty(); + self.graph_props.set_dirty(true); } pub fn check_metadata(&self, props: &[(usize, Prop)]) -> Result<(), StorageError> { @@ -44,7 +44,7 @@ impl<'a, GS: GraphPropSegmentOps> GraphPropWriter<'a, GS> { let add = self.mem_segment.update_metadata(props); self.graph_props.increment_est_size(add); - self.graph_props.mark_dirty(); + self.graph_props.set_dirty(true); } } diff --git a/db4-storage/src/pages/locked/graph_props.rs b/db4-storage/src/pages/locked/graph_props.rs index 9aa19dbd91..87d41dc222 100644 --- a/db4-storage/src/pages/locked/graph_props.rs +++ b/db4-storage/src/pages/locked/graph_props.rs @@ -29,7 +29,7 @@ impl<'a, GS: GraphPropSegmentOps> LockedGraphPropPage<'a, GS> { let add = self.lock.add_properties(t, props); self.page.increment_est_size(add); - self.page.mark_dirty(); + self.page.set_dirty(true); } /// Add metadata (constant properties) to the graph @@ -42,7 +42,7 @@ impl<'a, GS: GraphPropSegmentOps> LockedGraphPropPage<'a, GS> { let add = self.lock.update_metadata(props); self.page.increment_est_size(add); - self.page.mark_dirty(); + self.page.set_dirty(true); } } diff --git a/db4-storage/src/pages/node_store.rs b/db4-storage/src/pages/node_store.rs index 113112a77a..44fccfbfd2 100644 --- a/db4-storage/src/pages/node_store.rs +++ b/db4-storage/src/pages/node_store.rs @@ -159,7 +159,7 @@ impl, EXT: Config> NodeStorageInner .properties_mut() .set_has_properties() } - segment.mark_dirty(); + segment.set_dirty(true); } empty } diff --git a/db4-storage/src/segments/edge/segment.rs b/db4-storage/src/segments/edge/segment.rs index f3dbf64a4d..eb946ef868 100644 --- a/db4-storage/src/segments/edge/segment.rs +++ b/db4-storage/src/segments/edge/segment.rs @@ -585,7 +585,7 @@ impl>> EdgeSegmentOps for EdgeSegm .map_or(0, |layer| layer.len()) } - fn mark_dirty(&self) {} + fn set_dirty(&self, _dirty: bool) {} } #[cfg(test)] diff --git a/db4-storage/src/segments/graph_prop/mod.rs b/db4-storage/src/segments/graph_prop/mod.rs index 7d20c0624d..d6f98c9038 100644 --- a/db4-storage/src/segments/graph_prop/mod.rs +++ b/db4-storage/src/segments/graph_prop/mod.rs @@ -79,8 +79,8 @@ impl GraphPropSegmentOps for GraphPropSegmentView

{ self.est_size.load(Ordering::Relaxed) } - fn mark_dirty(&self) { - self.is_dirty.store(true, Ordering::Relaxed); + fn set_dirty(&self, dirty: bool) { + self.is_dirty.store(dirty, Ordering::Release); } fn notify_write( diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs index c6dabc24f4..832828b990 100644 --- a/db4-storage/src/segments/node/segment.rs +++ b/db4-storage/src/segments/node/segment.rs @@ -480,7 +480,7 @@ impl>> NodeSegmentOps for NodeSegm Ok(()) } - fn mark_dirty(&self) {} + fn set_dirty(&self, _dirty: bool) {} fn check_node(&self, _pos: LocalPOS, _layer_id: usize) -> bool { false From 9a80df792f692a8b346f551464b4d7e5acc53efb Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Thu, 11 Dec 2025 11:13:24 +0400 Subject: [PATCH 15/95] Add replay tests --- db4-storage/src/pages/edge_page/writer.rs | 1 + db4-storage/src/pages/edge_store.rs | 4 ++++ db4-storage/src/pages/graph_prop_store.rs | 4 ++++ db4-storage/src/pages/session.rs | 1 + db4-storage/src/wal/entry.rs | 2 +- db4-storage/src/wal/mod.rs | 2 +- raphtory-storage/src/mutation/addition_ops_ext.rs | 3 +-- raphtory/src/db/replay/mod.rs | 11 +++++++---- 8 files changed, 20 insertions(+), 8 deletions(-) diff --git a/db4-storage/src/pages/edge_page/writer.rs b/db4-storage/src/pages/edge_page/writer.rs index 9d7ff4fdd0..55babb1405 100644 --- a/db4-storage/src/pages/edge_page/writer.rs +++ b/db4-storage/src/pages/edge_page/writer.rs @@ -119,6 +119,7 @@ impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmen let edge_pos = edge_pos.unwrap_or_else(|| self.new_local_pos(layer_id)); self.writer .insert_static_edge_internal(edge_pos, src, dst, layer_id); + edge_pos } diff --git a/db4-storage/src/pages/edge_store.rs b/db4-storage/src/pages/edge_store.rs index eaff61c5d2..d50f87cc1d 100644 --- a/db4-storage/src/pages/edge_store.rs +++ b/db4-storage/src/pages/edge_store.rs @@ -117,6 +117,10 @@ impl, EXT: Config> EdgeStorageInner &self.layer_counter } + pub fn segments(&self) -> &boxcar::Vec> { + &self.segments + } + pub fn new_with_meta(edges_path: Option, edge_meta: Arc, ext: EXT) -> Self { let free_pages = (0..N).map(RwLock::new).collect::>(); let empty = Self { diff --git a/db4-storage/src/pages/graph_prop_store.rs b/db4-storage/src/pages/graph_prop_store.rs index 6e958182c3..0f1aaff698 100644 --- a/db4-storage/src/pages/graph_prop_store.rs +++ b/db4-storage/src/pages/graph_prop_store.rs @@ -66,6 +66,10 @@ impl, EXT: Config> GraphPropStorageInne self.page.entry() } + pub fn segment(&self) -> &Arc { + &self.page + } + pub fn writer(&self) -> GraphPropWriter<'_, GS> { let head = self.page.head_mut(); let graph_props = &self.page; diff --git a/db4-storage/src/pages/session.rs b/db4-storage/src/pages/session.rs index e43730142e..e99a496455 100644 --- a/db4-storage/src/pages/session.rs +++ b/db4-storage/src/pages/session.rs @@ -193,6 +193,7 @@ impl< if self.edge_writer.is_none() { self.edge_writer = Some(self.graph.edge_writer(e_id)); } + let edge_writer = self.edge_writer.as_mut().unwrap(); let (_, edge_pos) = self.graph.edges().resolve_pos(e_id); diff --git a/db4-storage/src/wal/entry.rs b/db4-storage/src/wal/entry.rs index 7b0b0e6745..dc2680580f 100644 --- a/db4-storage/src/wal/entry.rs +++ b/db4-storage/src/wal/entry.rs @@ -42,7 +42,7 @@ impl GraphWal for NoWal { fn replay_to_graph( _dir: impl AsRef, - _graph: &mut G, + _graph: &G, ) -> Result<(), StorageError> { todo!() } diff --git a/db4-storage/src/wal/mod.rs b/db4-storage/src/wal/mod.rs index 5677961089..72b26c0d41 100644 --- a/db4-storage/src/wal/mod.rs +++ b/db4-storage/src/wal/mod.rs @@ -72,7 +72,7 @@ pub trait GraphWal { /// Replays and applies all the wal entries in the given directory to the given graph. fn replay_to_graph( dir: impl AsRef, - graph: &mut G, + graph: &G, ) -> Result<(), StorageError>; } diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index f9d1cee68b..612caef67d 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -56,8 +56,7 @@ impl<'a, EXT: PersistentStrategy, ES = ES, GS = GS>> Edge eid: MaybeNew, props: impl IntoIterator, ) -> MaybeNew { - self.static_session - .add_edge_into_layer(t, src, dst, eid, props); + self.static_session.add_edge_into_layer(t, src, dst, eid, props); eid } diff --git a/raphtory/src/db/replay/mod.rs b/raphtory/src/db/replay/mod.rs index 1ad99a8ca7..e332a0bfbf 100644 --- a/raphtory/src/db/replay/mod.rs +++ b/raphtory/src/db/replay/mod.rs @@ -32,10 +32,7 @@ impl GraphReplay for Storage { ) -> Result<(), StorageError> { // TODO: Check max lsn on disk to see if this record should be replayed. - let storage = self.get_storage() - .ok_or_else(|| StorageError::GenericFailure("Storage not available during replay".to_string()))?; - - let temporal_graph = storage.core_graph().mutable().unwrap(); + let temporal_graph = self.core_graph().mutable().unwrap(); // 1. Insert prop ids into edge meta. // No need to validate props again since they are already validated before @@ -61,13 +58,19 @@ impl GraphReplay for Storage { node_meta.layer_meta().set_id(layer_name.as_deref().unwrap_or("_default"), layer_id); // 4. Grab src, dst and edge segment locks and add the edge. + println!("Grabbing add_edge_op lock"); let mut add_edge_op = temporal_graph.atomic_add_edge(src_id, dst_id, Some(eid), layer_id).unwrap(); + println!("Added edge to atomic_add_edge"); + let edge_id = add_edge_op.internal_add_static_edge(src_id, dst_id); let edge_id_with_layer = edge_id.map(|eid| eid.with_layer(layer_id)); + println!("Adding edge to internal_add_edge"); add_edge_op.internal_add_edge(t, src_id, dst_id, edge_id_with_layer, prop_ids); + println!("Added edge to internal_add_edge"); + Ok(()) } } From c3f3352e3ef98e8a5813afb63d590701657b3377 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Thu, 11 Dec 2025 12:57:16 +0400 Subject: [PATCH 16/95] Always set edge_writer in init in WriteSession --- db4-storage/src/pages/mod.rs | 19 ++++++++- db4-storage/src/pages/session.rs | 66 ++++++++++++-------------------- 2 files changed, 42 insertions(+), 43 deletions(-) diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index 9c90934e94..d0b6da23e1 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -354,17 +354,26 @@ impl< let node_writers = if src_chunk < dst_chunk { let src = self.node_writer(src_chunk); let dst = self.node_writer(dst_chunk); + NodeWriters { src, dst: Some(dst) } } else if src_chunk > dst_chunk { let dst = self.node_writer(dst_chunk); let src = self.node_writer(src_chunk); + NodeWriters { src, dst: Some(dst) } } else { let src = self.node_writer(src_chunk); + NodeWriters { src, dst: None } }; - let edge_writer = e_id.map(|e_id| self.edge_writer(e_id)); + let (_, src_pos) = self.nodes.resolve_pos(src); + let existing_eid = node_writers.src.get_out_edge(src_pos, dst, 0); + + let edge_writer = match e_id.or(existing_eid) { + Some(e_id) => self.edge_writer(e_id), + None => self.get_free_writer(), + }; WriteSession::new(node_writers, edge_writer, self) } @@ -398,7 +407,13 @@ impl< NodeWriters { src: writer, dst: None } }; - let edge_writer = e_id.map(|e_id| self.edge_writer(e_id)); + let (_, src_pos) = self.nodes.resolve_pos(src); + let existing_eid = node_writers.src.get_out_edge(src_pos, dst, 0); + + let edge_writer = match e_id.or(existing_eid) { + Some(e_id) => self.edge_writer(e_id), + None => self.get_free_writer(), + }; WriteSession::new(node_writers, edge_writer, self) } diff --git a/db4-storage/src/pages/session.rs b/db4-storage/src/pages/session.rs index e99a496455..df45afc6a1 100644 --- a/db4-storage/src/pages/session.rs +++ b/db4-storage/src/pages/session.rs @@ -23,7 +23,7 @@ pub struct WriteSession< EXT: Config, > { node_writers: NodeWriters<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS>, - edge_writer: Option, ES>>, + edge_writer: EdgeWriter<'a, RwLockWriteGuard<'a, MemEdgeSegment>, ES>, graph: &'a GraphStore, } @@ -37,7 +37,7 @@ impl< { pub fn new( node_writers: NodeWriters<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS>, - edge_writer: Option, ES>>, + edge_writer: EdgeWriter<'a, RwLockWriteGuard<'a, MemEdgeSegment>, ES>, graph: &'a GraphStore, ) -> Self { Self { @@ -69,19 +69,15 @@ impl< let (_, src_pos) = self.graph.nodes().resolve_pos(src); let (_, dst_pos) = self.graph.nodes().resolve_pos(dst); - if let Some(writer) = self.edge_writer.as_mut() { - let edge_max_page_len = writer.writer.get_or_create_layer(layer).max_page_len(); - let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); + let edge_max_page_len = self + .edge_writer + .writer + .get_or_create_layer(layer) + .max_page_len(); + let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); - writer.add_edge(t, edge_pos, src, dst, props, layer); - } else { - let mut writer = self.graph.edge_writer(e_id.edge); - let edge_max_page_len = writer.writer.get_or_create_layer(layer).max_page_len(); - let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); - - writer.add_edge(t, edge_pos, src, dst, props, layer); - self.edge_writer = Some(writer); // Attach edge_writer to hold onto locks - } + self.edge_writer + .add_edge(t, edge_pos, src, dst, props, layer); let edge_id = edge.inner(); @@ -125,19 +121,15 @@ impl< let (_, src_pos) = self.graph.nodes().resolve_pos(src); let (_, dst_pos) = self.graph.nodes().resolve_pos(dst); - if let Some(writer) = self.edge_writer.as_mut() { - let edge_max_page_len = writer.writer.get_or_create_layer(layer).max_page_len(); - let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); - - writer.delete_edge(t, edge_pos, src, dst, layer); - } else { - let mut writer = self.graph.edge_writer(e_id.edge); - let edge_max_page_len = writer.writer.get_or_create_layer(layer).max_page_len(); - let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); + let edge_max_page_len = self + .edge_writer + .writer + .get_or_create_layer(layer) + .max_page_len(); + let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); - writer.delete_edge(t, edge_pos, src, dst, layer); - self.edge_writer = Some(writer); // Attach edge_writer to hold onto locks - } + self.edge_writer + .delete_edge(t, edge_pos, src, dst, layer); let edge_id = edge.inner(); @@ -189,24 +181,18 @@ impl< .get_mut_src() .get_out_edge(src_pos, dst, layer_id) { - // If edge_writer is not set, we need to create a new one - if self.edge_writer.is_none() { - self.edge_writer = Some(self.graph.edge_writer(e_id)); - } - - let edge_writer = self.edge_writer.as_mut().unwrap(); let (_, edge_pos) = self.graph.edges().resolve_pos(e_id); - edge_writer.add_static_edge(Some(edge_pos), src, dst, true); + self.edge_writer + .add_static_edge(Some(edge_pos), src, dst, true); MaybeNew::Existing(e_id) } else { - let mut edge_writer = self.graph.get_free_writer(); - let edge_id = edge_writer.add_static_edge(None, src, dst, false); + let edge_id = self + .edge_writer + .add_static_edge(None, src, dst, false); let edge_id = - edge_id.as_eid(edge_writer.segment_id(), self.graph.edges().max_page_len()); - - self.edge_writer = Some(edge_writer); // Attach edge_writer to hold onto locks + edge_id.as_eid(self.edge_writer.segment_id(), self.graph.edges().max_page_len()); self.node_writers .get_mut_src() @@ -232,8 +218,6 @@ impl< dst.mut_segment.set_lsn(lsn); } - if let Some(edge_writer) = &mut self.edge_writer { - edge_writer.writer.set_lsn(lsn); - } + self.edge_writer.writer.set_lsn(lsn); } } From bc5410383534e71bfd1145eaa00a585c1b34cea7 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Fri, 12 Dec 2025 11:41:12 +0400 Subject: [PATCH 17/95] Return early from add_static_edge if edge exists --- db4-storage/src/pages/session.rs | 40 +++++++++++++++----------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/db4-storage/src/pages/session.rs b/db4-storage/src/pages/session.rs index df45afc6a1..6df54943af 100644 --- a/db4-storage/src/pages/session.rs +++ b/db4-storage/src/pages/session.rs @@ -176,33 +176,31 @@ impl< let (_, src_pos) = self.graph.nodes().resolve_pos(src); let (_, dst_pos) = self.graph.nodes().resolve_pos(dst); - if let Some(e_id) = self + let existing_eid = self .node_writers .get_mut_src() - .get_out_edge(src_pos, dst, layer_id) - { - let (_, edge_pos) = self.graph.edges().resolve_pos(e_id); + .get_out_edge(src_pos, dst, layer_id); - self.edge_writer - .add_static_edge(Some(edge_pos), src, dst, true); + // Edge already exists, so no need to add it again. + if let Some(eid) = existing_eid { + return MaybeNew::Existing(eid) + } - MaybeNew::Existing(e_id) - } else { - let edge_id = self - .edge_writer - .add_static_edge(None, src, dst, false); - let edge_id = - edge_id.as_eid(self.edge_writer.segment_id(), self.graph.edges().max_page_len()); + let edge_pos = None; + let edge_exists_hint = false; + let edge_pos = + self.edge_writer.add_static_edge(edge_pos, src, dst, edge_exists_hint); + let edge_id = + edge_pos.as_eid(self.edge_writer.segment_id(), self.graph.edges().max_page_len()); - self.node_writers - .get_mut_src() - .add_static_outbound_edge(src_pos, dst, edge_id); - self.node_writers - .get_mut_dst() - .add_static_inbound_edge(dst_pos, src, edge_id); + self.node_writers + .get_mut_src() + .add_static_outbound_edge(src_pos, dst, edge_id); + self.node_writers + .get_mut_dst() + .add_static_inbound_edge(dst_pos, src, edge_id); - MaybeNew::New(edge_id) - } + MaybeNew::New(edge_id) } pub fn node_writers( From f7ac76a5c3db28fd444348bbc0231be04dbdeeaf Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Fri, 12 Dec 2025 14:34:51 +0400 Subject: [PATCH 18/95] Move GraphReplay to WriteLockedGraph --- db4-graph/src/lib.rs | 3 ++ .../replay/mod.rs => db4-graph/src/replay.rs | 36 +++++++++---------- db4-storage/src/pages/locked/edges.rs | 5 +++ db4-storage/src/pages/locked/nodes.rs | 29 +++++++++------ raphtory/src/io/arrow/df_loaders.rs | 2 +- 5 files changed, 44 insertions(+), 31 deletions(-) rename raphtory/src/db/replay/mod.rs => db4-graph/src/replay.rs (65%) diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index babf9343a1..2b21a7d232 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -28,6 +28,8 @@ use storage::{ }; use tempfile::TempDir; +mod replay; + #[derive(Debug)] pub struct TemporalGraph { // mapping between logical and physical ids @@ -334,6 +336,7 @@ impl, ES = ES, GS = GS>> Temporal } } +/// Holds write locks across all segments in the graph for fast bulk ingestion. pub struct WriteLockedGraph<'a, EXT> where EXT: PersistentStrategy, ES = ES, GS = GS>, diff --git a/raphtory/src/db/replay/mod.rs b/db4-graph/src/replay.rs similarity index 65% rename from raphtory/src/db/replay/mod.rs rename to db4-graph/src/replay.rs index e332a0bfbf..203829a862 100644 --- a/raphtory/src/db/replay/mod.rs +++ b/db4-graph/src/replay.rs @@ -1,21 +1,28 @@ +//! Implement WAL replay for a `WriteLockedGraph`. +//! + use crate::db::api::{ - storage::{graph, storage::Storage}, - view::internal::{Base, InternalStorageOps}, - }; + storage::{graph, storage::Storage}, + view::internal::{Base, InternalStorageOps}, +}; +use crate::WriteLockedGraph; use raphtory_api::core::{ entities::{properties::prop::Prop, EID, GID, VID}, storage::timeindex::TimeIndexEntry, }; use raphtory_core::entities::GidRef; -use raphtory_storage::{core_ops::CoreGraphOps, mutation::addition_ops::{EdgeWriteLock, InternalAdditionOps}}; use storage::{ - api::edges::EdgeSegmentOps, + persist::strategy::PersistentStrategy, + NS, ES, GS, error::StorageError, wal::{GraphReplay, TransactionID, LSN}, }; use storage::resolver::GIDResolverOps; -impl GraphReplay for Storage { +impl GraphReplay for WriteLockedGraph<'_, EXT> +where + EXT: PersistentStrategy, ES = ES, GS = GS>, +{ fn replay_add_edge( &self, lsn: LSN, @@ -31,8 +38,7 @@ impl GraphReplay for Storage { props: Vec<(String, usize, Prop)>, ) -> Result<(), StorageError> { // TODO: Check max lsn on disk to see if this record should be replayed. - - let temporal_graph = self.core_graph().mutable().unwrap(); + let temporal_graph = self.graph(); // 1. Insert prop ids into edge meta. // No need to validate props again since they are already validated before @@ -57,19 +63,9 @@ impl GraphReplay for Storage { edge_meta.layer_meta().set_id(layer_name.as_deref().unwrap_or("_default"), layer_id); node_meta.layer_meta().set_id(layer_name.as_deref().unwrap_or("_default"), layer_id); - // 4. Grab src, dst and edge segment locks and add the edge. - println!("Grabbing add_edge_op lock"); - let mut add_edge_op = temporal_graph.atomic_add_edge(src_id, dst_id, Some(eid), layer_id).unwrap(); - - println!("Added edge to atomic_add_edge"); - - let edge_id = add_edge_op.internal_add_static_edge(src_id, dst_id); - let edge_id_with_layer = edge_id.map(|eid| eid.with_layer(layer_id)); - - println!("Adding edge to internal_add_edge"); - add_edge_op.internal_add_edge(t, src_id, dst_id, edge_id_with_layer, prop_ids); + // 4. Grab src, dst and edge segment writers and add the edge. + let src_writer = self.nodes().get_writer(src_id); - println!("Added edge to internal_add_edge"); Ok(()) } diff --git a/db4-storage/src/pages/locked/edges.rs b/db4-storage/src/pages/locked/edges.rs index a07f03147b..cee46e8981 100644 --- a/db4-storage/src/pages/locked/edges.rs +++ b/db4-storage/src/pages/locked/edges.rs @@ -79,6 +79,11 @@ impl<'a, ES: EdgeSegmentOps> WriteLockedEdgePages<'a, ES> { Self { writers } } + #[inline] + pub fn get(&mut self, segment_id: usize) -> Option<&mut LockedEdgePage<'a, ES>> { + self.writers.get_mut(segment_id) + } + pub fn par_iter_mut(&mut self) -> rayon::slice::IterMut<'_, LockedEdgePage<'a, ES>> { self.writers.par_iter_mut() } diff --git a/db4-storage/src/pages/locked/nodes.rs b/db4-storage/src/pages/locked/nodes.rs index 48b4fd7f10..d715cd7af2 100644 --- a/db4-storage/src/pages/locked/nodes.rs +++ b/db4-storage/src/pages/locked/nodes.rs @@ -11,7 +11,7 @@ use rayon::prelude::*; use std::ops::DerefMut; pub struct LockedNodePage<'a, NS> { - page_id: usize, + segment_id: usize, max_page_len: u32, layer_counter: &'a GraphStats, page: &'a NS, @@ -20,14 +20,14 @@ pub struct LockedNodePage<'a, NS> { impl<'a, NS: NodeSegmentOps> LockedNodePage<'a, NS> { pub fn new( - page_id: usize, + segment_id: usize, layer_counter: &'a GraphStats, max_page_len: u32, page: &'a NS, lock: RwLockWriteGuard<'a, MemNodeSegment>, ) -> Self { Self { - page_id, + segment_id, layer_counter, max_page_len, page, @@ -49,14 +49,15 @@ impl<'a, NS: NodeSegmentOps> LockedNodePage<'a, NS> { } #[inline(always)] - pub fn page_id(&self) -> usize { - self.page_id + pub fn segment_id(&self) -> usize { + self.segment_id } #[inline(always)] pub fn resolve_pos(&self, node_id: VID) -> Option { let (page, pos) = resolve_pos(node_id, self.max_page_len); - if page == self.page_id { + + if page == self.segment_id { Some(pos) } else { None @@ -86,6 +87,18 @@ impl<'a, NS: NodeSegmentOps> WriteLockedNodePages<'a, NS> { Self { writers } } + #[inline] + pub fn get( + &mut self, + segment_id: usize, + ) -> Option<&mut LockedNodePage<'a, NS>> { + self.writers.get_mut(segment_id) + } + + pub fn len(&self) -> usize { + self.writers.len() + } + pub fn par_iter_mut(&mut self) -> rayon::slice::IterMut<'_, LockedNodePage<'a, NS>> { self.writers.par_iter_mut() } @@ -104,10 +117,6 @@ impl<'a, NS: NodeSegmentOps> WriteLockedNodePages<'a, NS> { } } - pub fn len(&self) -> usize { - self.writers.len() - } - pub fn vacuum(&mut self) -> Result<(), StorageError> { for LockedNodePage { page, lock, .. } in &mut self.writers { page.vacuum(lock.deref_mut())?; diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index 6d9e2c1beb..5cfba8776d 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -415,9 +415,9 @@ pub fn load_edges_from_df Date: Tue, 16 Dec 2025 13:01:52 +0400 Subject: [PATCH 19/95] Implement add_edge replay for WriteLockedGraph --- db4-graph/src/replay.rs | 83 +++++++++++++++++--- db4-storage/src/pages/edge_page/writer.rs | 40 ++++++---- db4-storage/src/pages/edge_store.rs | 16 ++-- db4-storage/src/pages/locked/edges.rs | 2 +- db4-storage/src/pages/locked/nodes.rs | 13 ++- db4-storage/src/pages/node_store.rs | 10 ++- db4-storage/src/pages/session.rs | 9 +-- db4-storage/src/segments/edge/segment.rs | 4 +- db4-storage/src/wal/entry.rs | 2 +- db4-storage/src/wal/mod.rs | 4 +- raphtory-api/src/core/entities/mod.rs | 4 + raphtory/src/db/api/mutation/addition_ops.rs | 1 - raphtory/src/db/mod.rs | 1 - raphtory/src/io/arrow/df_loaders.rs | 12 +-- 14 files changed, 138 insertions(+), 63 deletions(-) diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs index 203829a862..b620c4fe83 100644 --- a/db4-graph/src/replay.rs +++ b/db4-graph/src/replay.rs @@ -1,14 +1,13 @@ -//! Implement WAL replay for a `WriteLockedGraph`. -//! +//! Implements WAL replay for a `WriteLockedGraph`. +//! Allows for fast replay by making use of one-time lock acquisition for +//! all the segments in the graph. -use crate::db::api::{ - storage::{graph, storage::Storage}, - view::internal::{Base, InternalStorageOps}, -}; -use crate::WriteLockedGraph; +use storage::pages::resolve_pos; +use crate::{WriteLockedGraph}; use raphtory_api::core::{ entities::{properties::prop::Prop, EID, GID, VID}, storage::timeindex::TimeIndexEntry, + entities::properties::meta::STATIC_GRAPH_LAYER_ID, }; use raphtory_core::entities::GidRef; use storage::{ @@ -24,7 +23,7 @@ where EXT: PersistentStrategy, ES = ES, GS = GS>, { fn replay_add_edge( - &self, + &mut self, lsn: LSN, transaction_id: TransactionID, t: TimeIndexEntry, @@ -38,19 +37,22 @@ where props: Vec<(String, usize, Prop)>, ) -> Result<(), StorageError> { // TODO: Check max lsn on disk to see if this record should be replayed. + let temporal_graph = self.graph(); + let node_max_page_len = temporal_graph.storage().nodes().max_page_len(); + let edge_max_page_len = temporal_graph.storage().edges().max_page_len(); // 1. Insert prop ids into edge meta. // No need to validate props again since they are already validated before // being logged to the WAL. let edge_meta = temporal_graph.edge_meta(); - let mut prop_ids = Vec::new(); + let mut prop_ids_and_values = Vec::new(); for (prop_name, prop_id, prop_value) in props.into_iter() { let prop_mapper = edge_meta.temporal_prop_mapper(); prop_mapper.set_id_and_dtype(prop_name, prop_id, prop_value.dtype()); - prop_ids.push((prop_id, prop_value)); + prop_ids_and_values.push((prop_id, prop_value)); } // 2. Insert node ids into resolver. @@ -63,9 +65,66 @@ where edge_meta.layer_meta().set_id(layer_name.as_deref().unwrap_or("_default"), layer_id); node_meta.layer_meta().set_id(layer_name.as_deref().unwrap_or("_default"), layer_id); - // 4. Grab src, dst and edge segment writers and add the edge. - let src_writer = self.nodes().get_writer(src_id); + // 4. Grab src writer and add edge data. + let (src_segment_id, src_pos) = resolve_pos(src_id, node_max_page_len); + let num_nodes = src_id.index() + 1; + self.resize_chunks_to_num_nodes(num_nodes); // Create enough segments. + + let mut src_writer = self.nodes.get_mut(src_segment_id).unwrap().writer(); + src_writer.store_node_id(src_pos, STATIC_GRAPH_LAYER_ID, GidRef::from(&src_name)); + + let is_new_edge_static = src_writer.get_out_edge(src_pos, dst_id, STATIC_GRAPH_LAYER_ID).is_none(); + let is_new_edge_layer = src_writer.get_out_edge(src_pos, dst_id, layer_id).is_none(); + + // Add the edge to the static graph if it doesn't already exist. + if is_new_edge_static { + src_writer.add_static_outbound_edge(src_pos, dst_id, eid); + } + + // Add the edge to the layer if it doesn't already exist, else just record the timestamp. + if is_new_edge_layer { + src_writer.add_outbound_edge(Some(t), src_pos, dst_id, eid.with_layer(layer_id)); + } else { + src_writer.update_timestamp(t, src_pos, eid.with_layer(layer_id)); + } + + // Release the writer for mutable access to dst_writer. + drop(src_writer); + + // 5. Grab dst writer and add edge data. + let (dst_segment_id, dst_pos) = resolve_pos(dst_id, node_max_page_len); + let num_nodes = dst_id.index() + 1; + self.resize_chunks_to_num_nodes(num_nodes); + + let mut dst_writer = self.nodes.get_mut(dst_segment_id).unwrap().writer(); + dst_writer.store_node_id(dst_pos, STATIC_GRAPH_LAYER_ID, GidRef::from(&dst_name)); + + if is_new_edge_static { + dst_writer.add_static_inbound_edge(dst_pos, src_id, eid); + } + + if is_new_edge_layer { + dst_writer.add_inbound_edge(Some(t), dst_pos, src_id, eid.with_layer(layer_id)); + } else { + dst_writer.update_timestamp(t, dst_pos, eid.with_layer(layer_id)); + } + + drop(dst_writer); + + // 6. Grab edge writer and add temporal props & metadata. + let (edge_segment_id, edge_pos) = resolve_pos(eid, edge_max_page_len); + let num_edges = eid.index() + 1; + self.resize_chunks_to_num_edges(num_edges); + let mut edge_writer = self.edges.get_mut(edge_segment_id).unwrap().writer(); + + // Add edge into the static graph if it doesn't already exist. + if is_new_edge_static { + let already_counted = false; + edge_writer.add_static_edge(Some(edge_pos), src_id, dst_id, already_counted); + } + // Add edge into the specified layer with timestamp and props. + edge_writer.add_edge(t, edge_pos, src_id, dst_id, prop_ids_and_values, layer_id); Ok(()) } diff --git a/db4-storage/src/pages/edge_page/writer.rs b/db4-storage/src/pages/edge_page/writer.rs index 55babb1405..c8ad00db36 100644 --- a/db4-storage/src/pages/edge_page/writer.rs +++ b/db4-storage/src/pages/edge_page/writer.rs @@ -3,7 +3,7 @@ use crate::{ segments::edge::segment::MemEdgeSegment, }; use arrow_array::{ArrayRef, BooleanArray}; -use raphtory_api::core::entities::{VID, properties::prop::Prop}; +use raphtory_api::core::entities::{properties::{meta::STATIC_GRAPH_LAYER_ID, prop::Prop}, VID}; use raphtory_core::{ entities::EID, storage::timeindex::{AsTime, TimeIndexEntry}, @@ -46,15 +46,19 @@ impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmen props: impl IntoIterator, layer_id: usize, ) -> LocalPOS { - let existing_edge = self + let is_new_edge = !self .page .contains_edge(edge_pos, layer_id, self.writer.deref()); - if !existing_edge { + + if is_new_edge { self.increment_layer_num_edges(layer_id); } + self.graph_stats.update_time(t.t()); + self.writer .insert_edge_internal(t, edge_pos, src, dst, layer_id, props); + edge_pos } @@ -102,23 +106,26 @@ impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmen .delete_edge_internal(t, edge_pos, src, dst, layer_id); } + /// Adds a static edge to the graph. + /// + /// If `edge_pos` is `None`, a new position is allocated. If `Some`, the provided position + /// is used. + /// Set `already_counted` to `true` when bulk loading to avoid double-counting statistics. pub fn add_static_edge( &mut self, edge_pos: Option, src: impl Into, dst: impl Into, - exists_hint: bool, // used when edge_pos is Some but the is not counted, this is used in the bulk loader + already_counted: bool, ) -> LocalPOS { - let layer_id = 0; // assuming layer_id 0 for static edges, adjust as needed - - if edge_pos.is_some() && !exists_hint { + if edge_pos.is_some() && !already_counted { self.page.increment_num_edges(); - self.increment_layer_num_edges(layer_id); + self.increment_layer_num_edges(STATIC_GRAPH_LAYER_ID); } - let edge_pos = edge_pos.unwrap_or_else(|| self.new_local_pos(layer_id)); + let edge_pos = edge_pos.unwrap_or_else(|| self.new_local_pos(STATIC_GRAPH_LAYER_ID)); self.writer - .insert_static_edge_internal(edge_pos, src, dst, layer_id); + .insert_static_edge_internal(edge_pos, src, dst, STATIC_GRAPH_LAYER_ID); edge_pos } @@ -129,23 +136,24 @@ impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmen edge_pos: LocalPOS, src: VID, dst: VID, - exists: bool, + edge_exists: bool, layer_id: usize, c_props: impl IntoIterator, t_props: impl IntoIterator, ) { - if !exists { - self.increment_layer_num_edges(0); + if !edge_exists { + self.increment_layer_num_edges(STATIC_GRAPH_LAYER_ID); self.increment_layer_num_edges(layer_id); + + self.writer + .insert_static_edge_internal(edge_pos, src, dst, STATIC_GRAPH_LAYER_ID); } - self.writer - .insert_static_edge_internal(edge_pos, src, dst, 0); + self.graph_stats.update_time(t.t()); self.writer .update_const_properties(edge_pos, src, dst, layer_id, c_props); - self.graph_stats.update_time(t.t()); self.writer .insert_edge_internal(t, edge_pos, src, dst, layer_id, t_props); } diff --git a/db4-storage/src/pages/edge_store.rs b/db4-storage/src/pages/edge_store.rs index d50f87cc1d..2f676e5246 100644 --- a/db4-storage/src/pages/edge_store.rs +++ b/db4-storage/src/pages/edge_store.rs @@ -17,7 +17,7 @@ use crate::{ segments::edge::segment::MemEdgeSegment, }; use parking_lot::{RwLock, RwLockWriteGuard}; -use raphtory_api::core::entities::{EID, VID, properties::meta::Meta}; +use raphtory_api::core::entities::{properties::meta::{Meta, STATIC_GRAPH_LAYER_ID}, EID, VID}; use raphtory_core::{ entities::{ELID, LayerIds}, storage::timeindex::{AsTime, TimeIndexEntry}, @@ -134,20 +134,24 @@ impl, EXT: Config> EdgeStorageInner let layer_mapper = empty.edge_meta().layer_meta(); let prop_mapper = empty.edge_meta().temporal_prop_mapper(); let metadata_mapper = empty.edge_meta().metadata_mapper(); + if layer_mapper.num_fields() > 0 || prop_mapper.num_fields() > 0 || metadata_mapper.num_fields() > 0 { - let segment = empty.get_or_create_segment(0); + let segment = empty.get_or_create_segment(STATIC_GRAPH_LAYER_ID); let mut head = segment.head_mut(); + for layer in layer_mapper.ids() { head.get_or_create_layer(layer); } + if prop_mapper.num_fields() > 0 { head.get_or_create_layer(0) .properties_mut() .set_has_properties() } + segment.set_dirty(true); } empty @@ -334,9 +338,11 @@ impl, EXT: Config> EdgeStorageInner if let Some(segment) = self.segments.get(segment_id) { return segment; } + let count = self.segments.count(); + if count > segment_id { - // something has allocated the segment, wait for it to be added + // Something has allocated the segment, wait for it to be added. loop { if let Some(segment) = self.segments.get(segment_id) { return segment; @@ -346,7 +352,7 @@ impl, EXT: Config> EdgeStorageInner } } } else { - // we need to create the segment + // We need to create the segment. self.segments.reserve(segment_id + 1 - count); loop { @@ -364,7 +370,7 @@ impl, EXT: Config> EdgeStorageInner if let Some(segment) = self.segments.get(segment_id) { return segment; } else { - // wait for the segment to be created + // Wait for the segment to be created. std::thread::yield_now(); } } diff --git a/db4-storage/src/pages/locked/edges.rs b/db4-storage/src/pages/locked/edges.rs index cee46e8981..ff01546c1d 100644 --- a/db4-storage/src/pages/locked/edges.rs +++ b/db4-storage/src/pages/locked/edges.rs @@ -80,7 +80,7 @@ impl<'a, ES: EdgeSegmentOps> WriteLockedEdgePages<'a, ES> { } #[inline] - pub fn get(&mut self, segment_id: usize) -> Option<&mut LockedEdgePage<'a, ES>> { + pub fn get_mut(&mut self, segment_id: usize) -> Option<&mut LockedEdgePage<'a, ES>> { self.writers.get_mut(segment_id) } diff --git a/db4-storage/src/pages/locked/nodes.rs b/db4-storage/src/pages/locked/nodes.rs index d715cd7af2..78aed9dbd5 100644 --- a/db4-storage/src/pages/locked/nodes.rs +++ b/db4-storage/src/pages/locked/nodes.rs @@ -87,18 +87,15 @@ impl<'a, NS: NodeSegmentOps> WriteLockedNodePages<'a, NS> { Self { writers } } - #[inline] - pub fn get( - &mut self, - segment_id: usize, - ) -> Option<&mut LockedNodePage<'a, NS>> { - self.writers.get_mut(segment_id) - } - pub fn len(&self) -> usize { self.writers.len() } + #[inline] + pub fn get_mut(&mut self, segment_id: usize) -> Option<&mut LockedNodePage<'a, NS>> { + self.writers.get_mut(segment_id) + } + pub fn par_iter_mut(&mut self) -> rayon::slice::IterMut<'_, LockedNodePage<'a, NS>> { self.writers.par_iter_mut() } diff --git a/db4-storage/src/pages/node_store.rs b/db4-storage/src/pages/node_store.rs index 44fccfbfd2..b94924f1a0 100644 --- a/db4-storage/src/pages/node_store.rs +++ b/db4-storage/src/pages/node_store.rs @@ -354,19 +354,21 @@ impl, EXT: Config> NodeStorageInner if let Some(segment) = self.pages.get(segment_id) { return segment; } + let count = self.pages.count(); + if count > segment_id { - // something has allocated the segment, wait for it to be added + // Something has allocated the segment, wait for it to be added. loop { if let Some(segment) = self.pages.get(segment_id) { return segment; } else { - // wait for the segment to be created + // Wait for the segment to be created. std::thread::yield_now(); } } } else { - // we need to create the segment + // We need to create the segment. self.pages.reserve(segment_id + 1 - count); loop { @@ -385,7 +387,7 @@ impl, EXT: Config> NodeStorageInner if let Some(segment) = self.pages.get(segment_id) { return segment; } else { - // wait for the segment to be created + // Wait for the segment to be created. std::thread::yield_now(); } } diff --git a/db4-storage/src/pages/session.rs b/db4-storage/src/pages/session.rs index 6df54943af..a83bb9f899 100644 --- a/db4-storage/src/pages/session.rs +++ b/db4-storage/src/pages/session.rs @@ -9,7 +9,7 @@ use crate::{ wal::LSN, }; use parking_lot::RwLockWriteGuard; -use raphtory_api::core::{entities::properties::prop::Prop, storage::dict_mapper::MaybeNew}; +use raphtory_api::core::{entities::properties::{meta::STATIC_GRAPH_LAYER_ID, prop::Prop}, storage::dict_mapper::MaybeNew}; use raphtory_core::{ entities::{EID, ELID, VID}, storage::timeindex::AsTime, @@ -171,7 +171,6 @@ impl< ) -> MaybeNew { let src = src.into(); let dst = dst.into(); - let layer_id = 0; // static graph goes to layer 0 let (_, src_pos) = self.graph.nodes().resolve_pos(src); let (_, dst_pos) = self.graph.nodes().resolve_pos(dst); @@ -179,7 +178,7 @@ impl< let existing_eid = self .node_writers .get_mut_src() - .get_out_edge(src_pos, dst, layer_id); + .get_out_edge(src_pos, dst, STATIC_GRAPH_LAYER_ID); // Edge already exists, so no need to add it again. if let Some(eid) = existing_eid { @@ -187,9 +186,9 @@ impl< } let edge_pos = None; - let edge_exists_hint = false; + let already_counted = false; let edge_pos = - self.edge_writer.add_static_edge(edge_pos, src, dst, edge_exists_hint); + self.edge_writer.add_static_edge(edge_pos, src, dst, already_counted); let edge_id = edge_pos.as_eid(self.edge_writer.segment_id(), self.graph.edges().max_page_len()); diff --git a/db4-storage/src/segments/edge/segment.rs b/db4-storage/src/segments/edge/segment.rs index eb946ef868..f7e5a72923 100644 --- a/db4-storage/src/segments/edge/segment.rs +++ b/db4-storage/src/segments/edge/segment.rs @@ -228,8 +228,10 @@ impl MemEdgeSegment { let mut prop_entry: PropMutEntry<'_> = self.layers[layer_id] .properties_mut() .get_mut_entry(local_row); + let ts = TimeIndexEntry::new(t.t(), t.i()); prop_entry.append_t_props(ts, props); + let layer_est_size = self.layers[layer_id].est_size(); self.est_size += layer_est_size.saturating_sub(est_size); } @@ -276,7 +278,7 @@ impl MemEdgeSegment { fn ensure_layer(&mut self, layer_id: usize) { if layer_id >= self.layers.len() { - // Get details from first layer to create consistent new layers + // Get details from first layer to create consistent new layers. if let Some(first_layer) = self.layers.first() { let segment_id = first_layer.segment_id(); let max_page_len = first_layer.max_page_len(); diff --git a/db4-storage/src/wal/entry.rs b/db4-storage/src/wal/entry.rs index dc2680580f..7b0b0e6745 100644 --- a/db4-storage/src/wal/entry.rs +++ b/db4-storage/src/wal/entry.rs @@ -42,7 +42,7 @@ impl GraphWal for NoWal { fn replay_to_graph( _dir: impl AsRef, - _graph: &G, + _graph: &mut G, ) -> Result<(), StorageError> { todo!() } diff --git a/db4-storage/src/wal/mod.rs b/db4-storage/src/wal/mod.rs index 72b26c0d41..9752c6ef4f 100644 --- a/db4-storage/src/wal/mod.rs +++ b/db4-storage/src/wal/mod.rs @@ -72,14 +72,14 @@ pub trait GraphWal { /// Replays and applies all the wal entries in the given directory to the given graph. fn replay_to_graph( dir: impl AsRef, - graph: &G, + graph: &mut G, ) -> Result<(), StorageError>; } /// Trait for defining callbacks for replaying from wal. pub trait GraphReplay { fn replay_add_edge( - &self, + &mut self, lsn: LSN, transaction_id: TransactionID, t: TimeIndexEntry, diff --git a/raphtory-api/src/core/entities/mod.rs b/raphtory-api/src/core/entities/mod.rs index cce5d1c80a..2256f86c6d 100644 --- a/raphtory-api/src/core/entities/mod.rs +++ b/raphtory-api/src/core/entities/mod.rs @@ -64,6 +64,10 @@ impl Default for EID { } impl EID { + pub fn index(&self) -> usize { + self.0 + } + pub fn as_u64(self) -> u64 { self.0 as u64 } diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index 769eb14e32..9d62c395d8 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -299,7 +299,6 @@ impl> + StaticGraphViewOps + Dura // All names, ids and values have been generated for this operation. // Create a wal entry to mark it as durable. - let props_for_wal = props_with_status .iter() .map(|maybe_new| { diff --git a/raphtory/src/db/mod.rs b/raphtory/src/db/mod.rs index 54e9c74f6c..63e711afda 100644 --- a/raphtory/src/db/mod.rs +++ b/raphtory/src/db/mod.rs @@ -1,4 +1,3 @@ pub mod api; pub mod graph; -pub mod replay; pub mod task; diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index 5cfba8776d..5b5ec145c6 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -459,7 +459,7 @@ pub fn load_edges_from_df Date: Thu, 18 Dec 2025 11:16:49 +0400 Subject: [PATCH 20/95] Minor cleanup --- db4-storage/src/segments/graph_prop/segment.rs | 4 ++-- db4-storage/src/segments/node/segment.rs | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/db4-storage/src/segments/graph_prop/segment.rs b/db4-storage/src/segments/graph_prop/segment.rs index 2636ce8d9a..bdfbde7032 100644 --- a/db4-storage/src/segments/graph_prop/segment.rs +++ b/db4-storage/src/segments/graph_prop/segment.rs @@ -1,5 +1,5 @@ use crate::{ - error::StorageError, segments::{HasRow, SegmentContainer}, wal::LSN, LocalPOS + error::StorageError, segments::{HasRow, SegmentContainer}, wal::LSN, }; use raphtory_api::core::entities::properties::{meta::Meta, prop::Prop}; use raphtory_core::{ @@ -22,7 +22,7 @@ pub struct MemGraphPropSegment { #[derive(Debug, Default)] pub struct UnitEntry(usize); -// `UnitEntry` does not store data, but `HasRow has to be implemented +// UnitEntry does not store data, but HasRow has to be implemented // for SegmentContainer to work. impl HasRow for UnitEntry { fn row(&self) -> usize { diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs index 832828b990..e35eb6bbd3 100644 --- a/db4-storage/src/segments/node/segment.rs +++ b/db4-storage/src/segments/node/segment.rs @@ -118,10 +118,12 @@ impl MemNodeSegment { let max_page_len = self.layers[0].max_page_len(); let segment_id = self.layers[0].segment_id(); let meta = self.layers[0].meta().clone(); + self.layers.resize_with(layer_id + 1, || { SegmentContainer::new(segment_id, max_page_len, meta.clone()) }); } + &mut self.layers[layer_id] } From 5b7532adaf8b88d7b4361efae877641488bafeb1 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Tue, 6 Jan 2026 11:45:52 +0300 Subject: [PATCH 21/95] Check lsn before replaying wal entries --- db4-graph/src/replay.rs | 98 ++++++++++++++--------- db4-storage/src/api/edges.rs | 5 +- db4-storage/src/api/nodes.rs | 9 +-- db4-storage/src/pages/edge_page/writer.rs | 4 - db4-storage/src/segments/edge/segment.rs | 4 + db4-storage/src/segments/node/segment.rs | 4 + 6 files changed, 77 insertions(+), 47 deletions(-) diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs index b620c4fe83..5ca35b6fea 100644 --- a/db4-graph/src/replay.rs +++ b/db4-graph/src/replay.rs @@ -11,6 +11,8 @@ use raphtory_api::core::{ }; use raphtory_core::entities::GidRef; use storage::{ + api::nodes::NodeSegmentOps, + api::edges::EdgeSegmentOps, persist::strategy::PersistentStrategy, NS, ES, GS, error::StorageError, @@ -36,8 +38,6 @@ where layer_id: usize, props: Vec<(String, usize, Prop)>, ) -> Result<(), StorageError> { - // TODO: Check max lsn on disk to see if this record should be replayed. - let temporal_graph = self.graph(); let node_max_page_len = temporal_graph.storage().nodes().max_page_len(); let edge_max_page_len = temporal_graph.storage().edges().max_page_len(); @@ -70,61 +70,85 @@ where let num_nodes = src_id.index() + 1; self.resize_chunks_to_num_nodes(num_nodes); // Create enough segments. - let mut src_writer = self.nodes.get_mut(src_segment_id).unwrap().writer(); - src_writer.store_node_id(src_pos, STATIC_GRAPH_LAYER_ID, GidRef::from(&src_name)); + let segment = self.graph().storage().nodes().get_or_create_segment(src_segment_id); + let immut_lsn = segment.immut_lsn(); - let is_new_edge_static = src_writer.get_out_edge(src_pos, dst_id, STATIC_GRAPH_LAYER_ID).is_none(); - let is_new_edge_layer = src_writer.get_out_edge(src_pos, dst_id, layer_id).is_none(); + // Replay this entry only if it doesn't exist in immut. + if immut_lsn < lsn { + let mut src_writer = self.nodes.get_mut(src_segment_id).unwrap().writer(); + src_writer.store_node_id(src_pos, STATIC_GRAPH_LAYER_ID, GidRef::from(&src_name)); - // Add the edge to the static graph if it doesn't already exist. - if is_new_edge_static { - src_writer.add_static_outbound_edge(src_pos, dst_id, eid); - } + let is_new_edge_static = src_writer.get_out_edge(src_pos, dst_id, STATIC_GRAPH_LAYER_ID).is_none(); + let is_new_edge_layer = src_writer.get_out_edge(src_pos, dst_id, layer_id).is_none(); - // Add the edge to the layer if it doesn't already exist, else just record the timestamp. - if is_new_edge_layer { - src_writer.add_outbound_edge(Some(t), src_pos, dst_id, eid.with_layer(layer_id)); - } else { - src_writer.update_timestamp(t, src_pos, eid.with_layer(layer_id)); - } + // Add the edge to the static graph if it doesn't already exist. + if is_new_edge_static { + src_writer.add_static_outbound_edge(src_pos, dst_id, eid); + } + + // Add the edge to the layer if it doesn't already exist, else just record the timestamp. + if is_new_edge_layer { + src_writer.add_outbound_edge(Some(t), src_pos, dst_id, eid.with_layer(layer_id)); + } else { + src_writer.update_timestamp(t, src_pos, eid.with_layer(layer_id)); + } - // Release the writer for mutable access to dst_writer. - drop(src_writer); + // Release the writer for mutable access to dst_writer. + drop(src_writer); + } // 5. Grab dst writer and add edge data. let (dst_segment_id, dst_pos) = resolve_pos(dst_id, node_max_page_len); let num_nodes = dst_id.index() + 1; self.resize_chunks_to_num_nodes(num_nodes); - let mut dst_writer = self.nodes.get_mut(dst_segment_id).unwrap().writer(); - dst_writer.store_node_id(dst_pos, STATIC_GRAPH_LAYER_ID, GidRef::from(&dst_name)); + let segment = self.graph().storage().nodes().get_or_create_segment(dst_segment_id); + let immut_lsn = segment.immut_lsn(); - if is_new_edge_static { - dst_writer.add_static_inbound_edge(dst_pos, src_id, eid); - } + // Replay this entry only if it doesn't exist in immut. + if immut_lsn < lsn { + let mut dst_writer = self.nodes.get_mut(dst_segment_id).unwrap().writer(); + dst_writer.store_node_id(dst_pos, STATIC_GRAPH_LAYER_ID, GidRef::from(&dst_name)); - if is_new_edge_layer { - dst_writer.add_inbound_edge(Some(t), dst_pos, src_id, eid.with_layer(layer_id)); - } else { - dst_writer.update_timestamp(t, dst_pos, eid.with_layer(layer_id)); - } + let is_new_edge_static = dst_writer.get_inb_edge(dst_pos, src_id, STATIC_GRAPH_LAYER_ID).is_none(); + let is_new_edge_layer = dst_writer.get_inb_edge(dst_pos, src_id, layer_id).is_none(); + + if is_new_edge_static { + dst_writer.add_static_inbound_edge(dst_pos, src_id, eid); + } - drop(dst_writer); + if is_new_edge_layer { + dst_writer.add_inbound_edge(Some(t), dst_pos, src_id, eid.with_layer(layer_id)); + } else { + dst_writer.update_timestamp(t, dst_pos, eid.with_layer(layer_id)); + } + + drop(dst_writer); + } // 6. Grab edge writer and add temporal props & metadata. let (edge_segment_id, edge_pos) = resolve_pos(eid, edge_max_page_len); let num_edges = eid.index() + 1; self.resize_chunks_to_num_edges(num_edges); - let mut edge_writer = self.edges.get_mut(edge_segment_id).unwrap().writer(); - // Add edge into the static graph if it doesn't already exist. - if is_new_edge_static { - let already_counted = false; - edge_writer.add_static_edge(Some(edge_pos), src_id, dst_id, already_counted); - } + let segment = self.graph().storage().edges().get_or_create_segment(edge_segment_id); + let immut_lsn = segment.immut_lsn(); + + // Replay this entry only if it doesn't exist in immut. + if immut_lsn < lsn { + let mut edge_writer = self.edges.get_mut(edge_segment_id).unwrap().writer(); - // Add edge into the specified layer with timestamp and props. - edge_writer.add_edge(t, edge_pos, src_id, dst_id, prop_ids_and_values, layer_id); + let is_new_edge_static = edge_writer.get_edge(STATIC_GRAPH_LAYER_ID, edge_pos).is_none(); + + // Add edge into the static graph if it doesn't already exist. + if is_new_edge_static { + let already_counted = false; + edge_writer.add_static_edge(Some(edge_pos), src_id, dst_id, already_counted); + } + + // Add edge into the specified layer with timestamp and props. + edge_writer.add_edge(t, edge_pos, src_id, dst_id, prop_ids_and_values, layer_id); + } Ok(()) } diff --git a/db4-storage/src/api/edges.rs b/db4-storage/src/api/edges.rs index 905f6ed64b..b504d43372 100644 --- a/db4-storage/src/api/edges.rs +++ b/db4-storage/src/api/edges.rs @@ -11,7 +11,7 @@ use std::{ sync::{Arc, atomic::AtomicU32}, }; -use crate::{LocalPOS, error::StorageError, segments::edge::segment::MemEdgeSegment}; +use crate::{LocalPOS, error::StorageError, segments::edge::segment::MemEdgeSegment, wal::LSN}; pub trait EdgeSegmentOps: Send + Sync + std::fmt::Debug + 'static { type Extension; @@ -97,6 +97,9 @@ pub trait EdgeSegmentOps: Send + Sync + std::fmt::Debug + 'static { &self, locked_head: impl DerefMut, ) -> Result<(), StorageError>; + + /// Returns the latest lsn for the immutable part of this segment. + fn immut_lsn(&self) -> LSN; } pub trait LockedESegment: Send + Sync + std::fmt::Debug { diff --git a/db4-storage/src/api/nodes.rs b/db4-storage/src/api/nodes.rs index 9f9b2c2283..ae7d045fc0 100644 --- a/db4-storage/src/api/nodes.rs +++ b/db4-storage/src/api/nodes.rs @@ -25,11 +25,7 @@ use std::{ }; use crate::{ - LocalPOS, - error::StorageError, - gen_ts::LayerIter, - segments::node::segment::MemNodeSegment, - utils::{Iter2, Iter3, Iter4}, + error::StorageError, gen_ts::LayerIter, segments::node::segment::MemNodeSegment, utils::{Iter2, Iter3, Iter4}, wal::LSN, LocalPOS }; pub trait NodeSegmentOps: Send + Sync + std::fmt::Debug + 'static { @@ -128,6 +124,9 @@ pub trait NodeSegmentOps: Send + Sync + std::fmt::Debug + 'static { &self, locked_head: impl DerefMut, ) -> Result<(), StorageError>; + + /// Returns the latest lsn for the immutable part of this segment. + fn immut_lsn(&self) -> LSN; } pub trait LockedNSSegment: std::fmt::Debug + Send + Sync { diff --git a/db4-storage/src/pages/edge_page/writer.rs b/db4-storage/src/pages/edge_page/writer.rs index c8ad00db36..efd956e20d 100644 --- a/db4-storage/src/pages/edge_page/writer.rs +++ b/db4-storage/src/pages/edge_page/writer.rs @@ -166,10 +166,6 @@ impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmen self.graph_stats.increment(layer_id); } - pub fn contains_edge(&self, pos: LocalPOS, layer_id: usize) -> bool { - self.page.contains_edge(pos, layer_id, self.writer.deref()) - } - pub fn get_edge(&self, layer_id: usize, edge_pos: LocalPOS) -> Option<(VID, VID)> { self.page.get_edge(edge_pos, layer_id, self.writer.deref()) } diff --git a/db4-storage/src/segments/edge/segment.rs b/db4-storage/src/segments/edge/segment.rs index f7e5a72923..c0b03bf631 100644 --- a/db4-storage/src/segments/edge/segment.rs +++ b/db4-storage/src/segments/edge/segment.rs @@ -588,6 +588,10 @@ impl>> EdgeSegmentOps for EdgeSegm } fn set_dirty(&self, _dirty: bool) {} + + fn immut_lsn(&self) -> LSN { + panic!("immut_lsn not supported for EdgeSegmentView"); + } } #[cfg(test)] diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs index e35eb6bbd3..75a295b28a 100644 --- a/db4-storage/src/segments/node/segment.rs +++ b/db4-storage/src/segments/node/segment.rs @@ -545,6 +545,10 @@ impl>> NodeSegmentOps for NodeSegm ) -> Result<(), StorageError> { Ok(()) } + + fn immut_lsn(&self) -> LSN { + panic!("immut_lsn not supported for NodeSegmentView"); + } } #[cfg(test)] From 987a17af4f21215376058cda8b57e8c270252aa0 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Tue, 6 Jan 2026 13:51:28 +0300 Subject: [PATCH 22/95] Implement take for node and edge segments --- db4-storage/src/segments/edge/segment.rs | 14 ++++++++++++++ db4-storage/src/segments/node/segment.rs | 15 +++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/db4-storage/src/segments/edge/segment.rs b/db4-storage/src/segments/edge/segment.rs index c0b03bf631..36a28d55b3 100644 --- a/db4-storage/src/segments/edge/segment.rs +++ b/db4-storage/src/segments/edge/segment.rs @@ -144,6 +144,20 @@ impl MemEdgeSegment { self.lsn = lsn; } + /// Replaces this segment with an empty instance, returning the old segment + /// with its data. + /// + /// The new segment will have the same number of layers as the original. + pub fn take(&mut self) -> Self { + let layers = self.layers.iter_mut().map(|layer| layer.take()).collect(); + + Self { + layers, + est_size: 0, + lsn: self.lsn, + } + } + pub fn max_page_len(&self) -> u32 { self.layers[0].max_page_len() } diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs index 75a295b28a..39ad342036 100644 --- a/db4-storage/src/segments/node/segment.rs +++ b/db4-storage/src/segments/node/segment.rs @@ -149,6 +149,21 @@ impl MemNodeSegment { } } + /// Replaces this segment with an empty instance, returning the old segment + /// with its data. + /// + /// The new segment will have the same number of layers as the original. + pub fn take(&mut self) -> Self { + let layers = self.layers.iter_mut().map(|layer| layer.take()).collect(); + + Self { + segment_id: self.segment_id, + max_page_len: self.max_page_len, + layers, + lsn: self.lsn, + } + } + pub fn to_vid(&self, pos: LocalPOS) -> VID { pos.as_vid(self.segment_id, self.max_page_len) } From c4d5d7cf071cae52d3243c6562be8d609c0e3345 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Tue, 6 Jan 2026 14:08:50 +0300 Subject: [PATCH 23/95] Set lsn during replay --- db4-graph/src/replay.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs index 5ca35b6fea..5b7ae7bed8 100644 --- a/db4-graph/src/replay.rs +++ b/db4-graph/src/replay.rs @@ -93,6 +93,8 @@ where src_writer.update_timestamp(t, src_pos, eid.with_layer(layer_id)); } + src_writer.mut_segment.set_lsn(lsn); + // Release the writer for mutable access to dst_writer. drop(src_writer); } @@ -123,6 +125,8 @@ where dst_writer.update_timestamp(t, dst_pos, eid.with_layer(layer_id)); } + dst_writer.mut_segment.set_lsn(lsn); + drop(dst_writer); } @@ -148,6 +152,8 @@ where // Add edge into the specified layer with timestamp and props. edge_writer.add_edge(t, edge_pos, src_id, dst_id, prop_ids_and_values, layer_id); + + edge_writer.writer.set_lsn(lsn); } Ok(()) From 01257e532af7f0e06c8b67615e402fceabeb33e9 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Wed, 7 Jan 2026 17:01:02 +0300 Subject: [PATCH 24/95] Rename wal sync to flush --- db4-storage/src/wal/mod.rs | 6 +++--- db4-storage/src/wal/no_wal.rs | 4 ++-- raphtory/src/db/api/mutation/addition_ops.rs | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/db4-storage/src/wal/mod.rs b/db4-storage/src/wal/mod.rs index 9752c6ef4f..e729ec6841 100644 --- a/db4-storage/src/wal/mod.rs +++ b/db4-storage/src/wal/mod.rs @@ -28,10 +28,10 @@ pub trait Wal { fn append(&self, data: &[u8]) -> Result; /// Immediately flushes in-memory WAL entries to disk. - fn sync(&self) -> Result<(), StorageError>; + fn flush(&self) -> Result<(), StorageError>; - /// Blocks until the WAL has fsynced the given LSN to disk. - fn wait_for_sync(&self, lsn: LSN); + /// Blocks until the WAL has flushed the given LSN to disk. + fn wait_for_flush(&self, lsn: LSN); /// Rotates the underlying WAL file. /// `cutoff_lsn` acts as a hint for which records can be safely discarded during rotation. diff --git a/db4-storage/src/wal/no_wal.rs b/db4-storage/src/wal/no_wal.rs index 72e666fefa..6928d7d950 100644 --- a/db4-storage/src/wal/no_wal.rs +++ b/db4-storage/src/wal/no_wal.rs @@ -19,11 +19,11 @@ impl Wal for NoWal { Ok(0) } - fn sync(&self) -> Result<(), StorageError> { + fn flush(&self) -> Result<(), StorageError> { Ok(()) } - fn wait_for_sync(&self, _lsn: LSN) {} + fn wait_for_flush(&self, _lsn: LSN) {} fn rotate(&self, _cutoff_lsn: LSN) -> Result<(), StorageError> { Ok(()) diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index 9d62c395d8..b4d710316e 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -349,7 +349,7 @@ impl> + StaticGraphViewOps + Dura // drop(add_edge_op); // Flush the wal entry to disk. - self.wal().sync().unwrap(); + self.wal().flush().unwrap(); Ok(EdgeView::new( self.clone(), From 9221008d679436e711b89a8f2a9a25295b8c9cde Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Wed, 7 Jan 2026 18:17:54 +0300 Subject: [PATCH 25/95] Modify flush to take an LSN --- db4-storage/src/wal/mod.rs | 4 ++-- db4-storage/src/wal/no_wal.rs | 2 +- raphtory/src/db/api/mutation/addition_ops.rs | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/db4-storage/src/wal/mod.rs b/db4-storage/src/wal/mod.rs index e729ec6841..b6ca1c8707 100644 --- a/db4-storage/src/wal/mod.rs +++ b/db4-storage/src/wal/mod.rs @@ -27,8 +27,8 @@ pub trait Wal { /// Appends data to the WAL and returns the assigned LSN. fn append(&self, data: &[u8]) -> Result; - /// Immediately flushes in-memory WAL entries to disk. - fn flush(&self) -> Result<(), StorageError>; + /// Flushes in-memory WAL entries up to the given LSN to disk. + fn flush(&self, lsn: LSN) -> Result<(), StorageError>; /// Blocks until the WAL has flushed the given LSN to disk. fn wait_for_flush(&self, lsn: LSN); diff --git a/db4-storage/src/wal/no_wal.rs b/db4-storage/src/wal/no_wal.rs index 6928d7d950..10a315902a 100644 --- a/db4-storage/src/wal/no_wal.rs +++ b/db4-storage/src/wal/no_wal.rs @@ -19,7 +19,7 @@ impl Wal for NoWal { Ok(0) } - fn flush(&self) -> Result<(), StorageError> { + fn flush(&self, _lsn: LSN) -> Result<(), StorageError> { Ok(()) } diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index b4d710316e..15b6ec98d6 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -349,7 +349,7 @@ impl> + StaticGraphViewOps + Dura // drop(add_edge_op); // Flush the wal entry to disk. - self.wal().flush().unwrap(); + self.wal().flush(lsn).unwrap(); Ok(EdgeView::new( self.clone(), From 0fddef374f92504b4d561b8dbf1e151258be175c Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Thu, 8 Jan 2026 11:40:31 +0300 Subject: [PATCH 26/95] Remove background wal flush --- db4-storage/src/pages/mod.rs | 8 ++++---- db4-storage/src/wal/mod.rs | 3 --- db4-storage/src/wal/no_wal.rs | 2 -- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index d0b6da23e1..fbb0fabb2b 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -139,7 +139,7 @@ impl< let node_meta = node_storage.prop_meta(); // Load graph temporal properties and metadata - let graph_props_storage = + let graph_prop_storage = Arc::new(GraphPropStorageInner::load(graph_props_path, ext.clone())?); for node_type in ext.node_types().iter() { @@ -151,7 +151,7 @@ impl< Ok(Self { nodes: node_storage, edges: edge_storage, - graph_props: graph_props_storage, + graph_props: graph_prop_storage, event_id: AtomicUsize::new(t_len), graph_dir: Some(graph_dir.as_ref().to_path_buf()), _ext: ext, @@ -184,7 +184,7 @@ impl< edge_meta, ext.clone(), )); - let graph_storage = Arc::new(GraphPropStorageInner::new_with_meta( + let graph_prop_storage = Arc::new(GraphPropStorageInner::new_with_meta( graph_props_path.as_deref(), graph_props_meta, ext.clone(), @@ -198,7 +198,7 @@ impl< Self { nodes: node_storage, edges: edge_storage, - graph_props: graph_storage, + graph_props: graph_prop_storage, event_id: AtomicUsize::new(0), graph_dir: graph_dir.map(|p| p.to_path_buf()), _ext: ext, diff --git a/db4-storage/src/wal/mod.rs b/db4-storage/src/wal/mod.rs index b6ca1c8707..d8be9736a2 100644 --- a/db4-storage/src/wal/mod.rs +++ b/db4-storage/src/wal/mod.rs @@ -30,9 +30,6 @@ pub trait Wal { /// Flushes in-memory WAL entries up to the given LSN to disk. fn flush(&self, lsn: LSN) -> Result<(), StorageError>; - /// Blocks until the WAL has flushed the given LSN to disk. - fn wait_for_flush(&self, lsn: LSN); - /// Rotates the underlying WAL file. /// `cutoff_lsn` acts as a hint for which records can be safely discarded during rotation. fn rotate(&self, cutoff_lsn: LSN) -> Result<(), StorageError>; diff --git a/db4-storage/src/wal/no_wal.rs b/db4-storage/src/wal/no_wal.rs index 10a315902a..4d4923272f 100644 --- a/db4-storage/src/wal/no_wal.rs +++ b/db4-storage/src/wal/no_wal.rs @@ -23,8 +23,6 @@ impl Wal for NoWal { Ok(()) } - fn wait_for_flush(&self, _lsn: LSN) {} - fn rotate(&self, _cutoff_lsn: LSN) -> Result<(), StorageError> { Ok(()) } From 612dc09b6af2c412fff87b7d88d1a4c0a88d2ec1 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Thu, 8 Jan 2026 16:14:33 +0300 Subject: [PATCH 27/95] Rename to PersistenceStrategy --- db4-graph/src/lib.rs | 10 +++++----- db4-graph/src/replay.rs | 4 ++-- db4-storage/src/pages/edge_store.rs | 6 +++--- db4-storage/src/pages/graph_prop_store.rs | 4 ++-- db4-storage/src/pages/mod.rs | 14 +++++++------- db4-storage/src/pages/node_store.rs | 8 ++++---- db4-storage/src/pages/session.rs | 6 +++--- db4-storage/src/pages/test_utils/checkers.rs | 12 ++++++------ db4-storage/src/persist/strategy.rs | 11 ++++++----- db4-storage/src/segments/edge/segment.rs | 4 ++-- db4-storage/src/segments/graph_prop/mod.rs | 6 +++--- db4-storage/src/segments/node/segment.rs | 4 ++-- raphtory-storage/src/mutation/addition_ops_ext.rs | 6 +++--- raphtory/src/db/api/storage/storage.rs | 10 ---------- 14 files changed, 48 insertions(+), 57 deletions(-) diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index 2b21a7d232..c2c67db3cc 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -21,7 +21,7 @@ use storage::{ nodes::WriteLockedNodePages, }, }, - persist::strategy::{Config, PersistentStrategy}, + persist::strategy::{PersistenceStrategy}, resolver::GIDResolverOps, Extension, GIDResolver, Layer, ReadLockedLayer, transaction::TransactionManager, WalImpl, ES, NS, GS, wal::Wal, @@ -31,7 +31,7 @@ use tempfile::TempDir; mod replay; #[derive(Debug)] -pub struct TemporalGraph { +pub struct TemporalGraph { // mapping between logical and physical ids pub logical_to_physical: Arc, pub node_count: AtomicUsize, @@ -88,7 +88,7 @@ impl Default for TemporalGraph { } } -impl, ES = ES, GS = GS>> TemporalGraph { +impl, ES = ES, GS = GS>> TemporalGraph { pub fn new(ext: EXT) -> Result { let node_meta = Meta::new_for_nodes(); let edge_meta = Meta::new_for_edges(); @@ -339,7 +339,7 @@ impl, ES = ES, GS = GS>> Temporal /// Holds write locks across all segments in the graph for fast bulk ingestion. pub struct WriteLockedGraph<'a, EXT> where - EXT: PersistentStrategy, ES = ES, GS = GS>, + EXT: PersistenceStrategy, ES = ES, GS = GS>, { pub nodes: WriteLockedNodePages<'a, storage::NS>, pub edges: WriteLockedEdgePages<'a, storage::ES>, @@ -347,7 +347,7 @@ where pub graph: &'a TemporalGraph, } -impl<'a, EXT: PersistentStrategy, ES = ES, GS = GS>> +impl<'a, EXT: PersistenceStrategy, ES = ES, GS = GS>> WriteLockedGraph<'a, EXT> { pub fn new(graph: &'a TemporalGraph) -> Self { diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs index 5b7ae7bed8..cc665d70dc 100644 --- a/db4-graph/src/replay.rs +++ b/db4-graph/src/replay.rs @@ -13,7 +13,7 @@ use raphtory_core::entities::GidRef; use storage::{ api::nodes::NodeSegmentOps, api::edges::EdgeSegmentOps, - persist::strategy::PersistentStrategy, + persist::strategy::PersistenceStrategy, NS, ES, GS, error::StorageError, wal::{GraphReplay, TransactionID, LSN}, @@ -22,7 +22,7 @@ use storage::resolver::GIDResolverOps; impl GraphReplay for WriteLockedGraph<'_, EXT> where - EXT: PersistentStrategy, ES = ES, GS = GS>, + EXT: PersistenceStrategy, ES = ES, GS = GS>, { fn replay_add_edge( &mut self, diff --git a/db4-storage/src/pages/edge_store.rs b/db4-storage/src/pages/edge_store.rs index 2f676e5246..0fea64946b 100644 --- a/db4-storage/src/pages/edge_store.rs +++ b/db4-storage/src/pages/edge_store.rs @@ -13,7 +13,7 @@ use crate::{ layer_counter::GraphStats, locked::edges::{LockedEdgePage, WriteLockedEdgePages}, }, - persist::strategy::Config, + persist::strategy::PersistenceConfig, segments::edge::segment::MemEdgeSegment, }; use parking_lot::{RwLock, RwLockWriteGuard}; @@ -42,7 +42,7 @@ pub struct ReadLockedEdgeStorage, EXT> { locked_pages: Box<[ES::ArcLockedSegment]>, } -impl, EXT: Config> ReadLockedEdgeStorage { +impl, EXT: PersistenceConfig> ReadLockedEdgeStorage { pub fn storage(&self) -> &EdgeStorageInner { &self.storage } @@ -96,7 +96,7 @@ impl, EXT: Config> ReadLockedEdgeStorage, EXT: Config> EdgeStorageInner { +impl, EXT: PersistenceConfig> EdgeStorageInner { pub fn locked(self: &Arc) -> ReadLockedEdgeStorage { let locked_pages = self .segments diff --git a/db4-storage/src/pages/graph_prop_store.rs b/db4-storage/src/pages/graph_prop_store.rs index 0f1aaff698..92b0369cf5 100644 --- a/db4-storage/src/pages/graph_prop_store.rs +++ b/db4-storage/src/pages/graph_prop_store.rs @@ -7,7 +7,7 @@ use crate::{ graph_prop_page::writer::GraphPropWriter, locked::graph_props::{LockedGraphPropPage, WriteLockedGraphPropPages}, }, - persist::strategy::Config, + persist::strategy::PersistenceConfig, }; use std::{ @@ -31,7 +31,7 @@ pub struct GraphPropStorageInner { ext: EXT, } -impl, EXT: Config> GraphPropStorageInner { +impl, EXT: PersistenceConfig> GraphPropStorageInner { pub fn new_with_meta(path: Option<&Path>, meta: Arc, ext: EXT) -> Self { let page = Arc::new(GS::new(meta.clone(), path, ext.clone())); diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index fbb0fabb2b..5794fe8ccb 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -3,7 +3,7 @@ use crate::{ api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, error::StorageError, pages::{edge_store::ReadLockedEdgeStorage, node_store::ReadLockedNodeStorage}, - persist::strategy::{Config, PersistentStrategy}, + persist::strategy::{PersistenceConfig, PersistenceStrategy}, properties::props_meta_writer::PropsMetaWriter, segments::{edge::segment::MemEdgeSegment, node::segment::MemNodeSegment}, }; @@ -48,7 +48,7 @@ pub mod test_utils; // graph // (node/edges) // segment // layer_ids (0, 1, 2, ...) // actual graphy bits #[derive(Debug)] -pub struct GraphStore { +pub struct GraphStore { nodes: Arc>, edges: Arc>, graph_props: Arc>, @@ -62,7 +62,7 @@ pub struct ReadLockedGraphStore< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: Config, + EXT: PersistenceConfig, > { pub nodes: Arc>, pub edges: Arc>, @@ -73,7 +73,7 @@ impl< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: PersistentStrategy, + EXT: PersistenceStrategy, > GraphStore { pub fn read_locked(self: &Arc) -> ReadLockedGraphStore { @@ -445,7 +445,7 @@ impl< } } -impl Drop for GraphStore { +impl Drop for GraphStore { fn drop(&mut self) { let node_types = self.nodes.prop_meta().get_all_node_types(); self._ext.set_node_types(node_types); @@ -457,7 +457,7 @@ impl Drop for GraphStore { } } -fn write_graph_config( +fn write_graph_config( graph_dir: impl AsRef, config: &EXT, ) -> Result<(), StorageError> { @@ -467,7 +467,7 @@ fn write_graph_config( Ok(()) } -fn read_graph_config( +fn read_graph_config( graph_dir: impl AsRef, ) -> Result { let config_file = graph_dir.as_ref().join("graph_config.json"); diff --git a/db4-storage/src/pages/node_store.rs b/db4-storage/src/pages/node_store.rs index b94924f1a0..bc73e345b2 100644 --- a/db4-storage/src/pages/node_store.rs +++ b/db4-storage/src/pages/node_store.rs @@ -7,7 +7,7 @@ use crate::{ layer_counter::GraphStats, locked::nodes::{LockedNodePage, WriteLockedNodePages}, }, - persist::strategy::Config, + persist::strategy::PersistenceConfig, segments::node::segment::MemNodeSegment, }; use parking_lot::RwLockWriteGuard; @@ -41,7 +41,7 @@ pub struct ReadLockedNodeStorage, EXT> { locked_segments: Box<[NS::ArcLockedSegment]>, } -impl, EXT: Config> ReadLockedNodeStorage { +impl, EXT: PersistenceConfig> ReadLockedNodeStorage { pub fn node_ref( &self, node: impl Into, @@ -91,7 +91,7 @@ impl, EXT: Config> ReadLockedNodeStorage NodeStorageInner { +impl NodeStorageInner { pub fn prop_meta(&self) -> &Arc { &self.node_meta } @@ -130,7 +130,7 @@ impl NodeStorageInner { } } -impl, EXT: Config> NodeStorageInner { +impl, EXT: PersistenceConfig> NodeStorageInner { pub fn new_with_meta( nodes_path: Option, node_meta: Arc, diff --git a/db4-storage/src/pages/session.rs b/db4-storage/src/pages/session.rs index a83bb9f899..0684a19424 100644 --- a/db4-storage/src/pages/session.rs +++ b/db4-storage/src/pages/session.rs @@ -4,7 +4,7 @@ use super::{ use crate::{ LocalPOS, api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, - persist::strategy::{Config, PersistentStrategy}, + persist::strategy::{PersistenceConfig, PersistenceStrategy}, segments::{edge::segment::MemEdgeSegment, node::segment::MemNodeSegment}, wal::LSN, }; @@ -20,7 +20,7 @@ pub struct WriteSession< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: Config, + EXT: PersistenceConfig, > { node_writers: NodeWriters<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS>, edge_writer: EdgeWriter<'a, RwLockWriteGuard<'a, MemEdgeSegment>, ES>, @@ -32,7 +32,7 @@ impl< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: PersistentStrategy, + EXT: PersistenceStrategy, > WriteSession<'a, NS, ES, GS, EXT> { pub fn new( diff --git a/db4-storage/src/pages/test_utils/checkers.rs b/db4-storage/src/pages/test_utils/checkers.rs index b1d7568b19..bc7d9b7766 100644 --- a/db4-storage/src/pages/test_utils/checkers.rs +++ b/db4-storage/src/pages/test_utils/checkers.rs @@ -19,7 +19,7 @@ use crate::{ }, error::StorageError, pages::GraphStore, - persist::strategy::PersistentStrategy, + persist::strategy::PersistenceStrategy, }; use super::fixtures::{AddEdge, Fixture, NodeFixture}; @@ -28,7 +28,7 @@ pub fn make_graph_from_edges< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: PersistentStrategy, + EXT: PersistenceStrategy, >( edges: &[(VID, VID, Option)], // src, dst, optional layer_id graph_dir: &Path, @@ -92,7 +92,7 @@ pub fn check_edges_support< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: PersistentStrategy, + EXT: PersistenceStrategy, >( edges: Vec<(impl Into, impl Into, Option)>, // src, dst, optional layer_id par_load: bool, @@ -123,7 +123,7 @@ pub fn check_edges_support< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: PersistentStrategy, + EXT: PersistenceStrategy, >( stage: &str, expected_edges: &[(VID, VID, Option)], // (src, dst, layer_id) @@ -223,7 +223,7 @@ pub fn check_edges_support< } pub fn check_graph_with_nodes_support< - EXT: PersistentStrategy, + EXT: PersistenceStrategy, NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, @@ -358,7 +358,7 @@ pub fn check_graph_with_nodes_support< } pub fn check_graph_with_props_support< - EXT: PersistentStrategy, + EXT: PersistenceStrategy, NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, diff --git a/db4-storage/src/persist/strategy.rs b/db4-storage/src/persist/strategy.rs index 0b6b045d85..dc8804293a 100644 --- a/db4-storage/src/persist/strategy.rs +++ b/db4-storage/src/persist/strategy.rs @@ -1,4 +1,5 @@ use std::ops::DerefMut; +use std::fmt::Debug; use serde::{Deserialize, Serialize}; @@ -12,8 +13,8 @@ pub const DEFAULT_MAX_PAGE_LEN_NODES: u32 = 131_072; // 2^17 pub const DEFAULT_MAX_PAGE_LEN_EDGES: u32 = 1_048_576; // 2^20 pub const DEFAULT_MAX_MEMORY_BYTES: usize = 32 * 1024 * 1024; -pub trait Config: - Default + std::fmt::Debug + Clone + Send + Sync + 'static + for<'a> Deserialize<'a> + Serialize +pub trait PersistenceConfig: + Default + Debug + Clone + Send + Sync + 'static + for<'a> Deserialize<'a> + Serialize { fn max_node_page_len(&self) -> u32; fn max_edge_page_len(&self) -> u32; @@ -24,7 +25,7 @@ pub trait Config: fn set_node_types(&mut self, types: impl IntoIterator>); } -pub trait PersistentStrategy: Config { +pub trait PersistenceStrategy: PersistenceConfig { type NS; type ES; type GS; @@ -75,7 +76,7 @@ impl Default for NoOpStrategy { } } -impl Config for NoOpStrategy { +impl PersistenceConfig for NoOpStrategy { fn max_node_page_len(&self) -> u32 { self.max_node_page_len } @@ -102,7 +103,7 @@ impl Config for NoOpStrategy { } } -impl PersistentStrategy for NoOpStrategy { +impl PersistenceStrategy for NoOpStrategy { type ES = EdgeSegmentView; type NS = NodeSegmentView; type GS = GraphPropSegmentView; diff --git a/db4-storage/src/segments/edge/segment.rs b/db4-storage/src/segments/edge/segment.rs index 36a28d55b3..1f5edb598a 100644 --- a/db4-storage/src/segments/edge/segment.rs +++ b/db4-storage/src/segments/edge/segment.rs @@ -3,7 +3,7 @@ use crate::{ api::edges::{EdgeSegmentOps, LockedESegment}, error::StorageError, pages::resolve_pos, - persist::strategy::PersistentStrategy, + persist::strategy::PersistenceStrategy, properties::PropMutEntry, segments::{ HasRow, SegmentContainer, @@ -462,7 +462,7 @@ impl LockedESegment for ArcLockedSegmentView { } } -impl>> EdgeSegmentOps for EdgeSegmentView

{ +impl>> EdgeSegmentOps for EdgeSegmentView

{ type Extension = P; type Entry<'a> = MemEdgeEntry<'a, parking_lot::RwLockReadGuard<'a, MemEdgeSegment>>; diff --git a/db4-storage/src/segments/graph_prop/mod.rs b/db4-storage/src/segments/graph_prop/mod.rs index d6f98c9038..c546c69765 100644 --- a/db4-storage/src/segments/graph_prop/mod.rs +++ b/db4-storage/src/segments/graph_prop/mod.rs @@ -4,7 +4,7 @@ pub mod segment; use crate::{ api::graph_props::GraphPropSegmentOps, error::StorageError, - persist::strategy::Config, + persist::strategy::PersistenceConfig, segments::graph_prop::{entry::MemGraphPropEntry, segment::MemGraphPropSegment}, }; use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard}; @@ -20,7 +20,7 @@ use std::{ /// `GraphPropSegmentView` manages graph temporal properties and graph metadata /// (constant properties). Reads / writes are always served from the in-memory segment. #[derive(Debug)] -pub struct GraphPropSegmentView { +pub struct GraphPropSegmentView { /// In-memory segment that contains the latest graph properties /// and graph metadata writes. head: Arc>, @@ -33,7 +33,7 @@ pub struct GraphPropSegmentView { _persistent: P, } -impl GraphPropSegmentOps for GraphPropSegmentView

{ +impl GraphPropSegmentOps for GraphPropSegmentView

{ type Extension = P; type Entry<'a> = MemGraphPropEntry<'a>; diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs index 39ad342036..be86e11853 100644 --- a/db4-storage/src/segments/node/segment.rs +++ b/db4-storage/src/segments/node/segment.rs @@ -1,5 +1,5 @@ use crate::{ - api::nodes::{LockedNSSegment, NodeSegmentOps}, error::StorageError, loop_lock_write, persist::strategy::PersistentStrategy, segments::{ + api::nodes::{LockedNSSegment, NodeSegmentOps}, error::StorageError, loop_lock_write, persist::strategy::PersistenceStrategy, segments::{ node::entry::{MemNodeEntry, MemNodeRef}, HasRow, SegmentContainer }, wal::LSN, LocalPOS }; @@ -398,7 +398,7 @@ impl LockedNSSegment for ArcLockedSegmentView { } } -impl>> NodeSegmentOps for NodeSegmentView

{ +impl>> NodeSegmentOps for NodeSegmentView

{ type Extension = P; type Entry<'a> = MemNodeEntry<'a, parking_lot::RwLockReadGuard<'a, MemNodeSegment>>; diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index 612caef67d..c5f9b9160e 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -21,14 +21,14 @@ use raphtory_core::{ }; use storage::{ pages::{node_page::writer::node_info_as_props, session::WriteSession}, - persist::strategy::PersistentStrategy, + persist::strategy::PersistenceStrategy, properties::props_meta_writer::PropsMetaWriter, resolver::GIDResolverOps, Extension, transaction::TransactionManager, WalImpl, ES, NS, GS, wal::LSN, }; -pub struct WriteS<'a, EXT: PersistentStrategy, ES = ES, GS = GS>> { +pub struct WriteS<'a, EXT: PersistenceStrategy, ES = ES, GS = GS>> { static_session: WriteSession<'a, NS, ES, GS, EXT>, } @@ -37,7 +37,7 @@ pub struct UnlockedSession<'a> { graph: &'a TemporalGraph, } -impl<'a, EXT: PersistentStrategy, ES = ES, GS = GS>> EdgeWriteLock +impl<'a, EXT: PersistenceStrategy, ES = ES, GS = GS>> EdgeWriteLock for WriteS<'a, EXT> { fn internal_add_static_edge( diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index e188874f54..8e661f1ccb 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -106,16 +106,6 @@ impl Storage { } } - pub(crate) fn new_with_path_and_ext(path: impl AsRef, ext: Extension) -> Self { - Self { - graph: GraphStorage::Unlocked(Arc::new( - TemporalGraph::new_with_path(path, ext).unwrap(), - )), - #[cfg(feature = "search")] - index: RwLock::new(GraphIndex::Empty), - } - } - pub(crate) fn load_from(path: impl AsRef) -> Self { let graph = GraphStorage::Unlocked(Arc::new(TemporalGraph::load_from_path(path).unwrap())); Self { From 417ce5f79b5017412b49d28d2bdc53cdca2bda8b Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Fri, 9 Jan 2026 11:25:53 +0300 Subject: [PATCH 28/95] Rename graph_config to persistence_config --- db4-storage/src/pages/mod.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index 268e22ea07..1cf28fcf52 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -131,7 +131,7 @@ impl< let edges_path = graph_dir.as_ref().join("edges"); let graph_props_path = graph_dir.as_ref().join("graph_props"); - let ext = read_graph_config::(graph_dir.as_ref())?; + let ext = read_persistence_config::(graph_dir.as_ref())?; let edge_storage = Arc::new(EdgeStorageInner::load(edges_path, ext.clone())?); let edge_meta = edge_storage.edge_meta().clone(); @@ -191,7 +191,7 @@ impl< )); if let Some(graph_dir) = graph_dir { - write_graph_config(graph_dir, &ext) + write_persistence_config(graph_dir, &ext) .expect("Unrecoverable! Failed to write graph config"); } @@ -450,28 +450,28 @@ impl Drop for GraphStore { let node_types = self.nodes.prop_meta().get_all_node_types(); self._ext.set_node_types(node_types); if let Some(graph_dir) = self.graph_dir.as_ref() { - if write_graph_config(graph_dir, &self._ext).is_err() { + if write_persistence_config(graph_dir, &self._ext).is_err() { eprintln!("Unrecoverable! Failed to write graph meta"); } } } } -fn write_graph_config( +fn write_persistence_config( graph_dir: impl AsRef, config: &EXT, ) -> Result<(), StorageError> { - let config_file = graph_dir.as_ref().join("graph_config.json"); + let config_file = graph_dir.as_ref().join("persistence_config.json"); let config_file = std::fs::File::create(&config_file)?; serde_json::to_writer_pretty(config_file, config)?; Ok(()) } -fn read_graph_config( +fn read_persistence_config( graph_dir: impl AsRef, ) -> Result { - let config_file = graph_dir.as_ref().join("graph_config.json"); + let config_file = graph_dir.as_ref().join("persistence_config.json"); let config_file = std::fs::File::open(config_file)?; let config = serde_json::from_reader(config_file)?; Ok(config) From 3528f71ad5a320a1254639c39ace3e93d441f129 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Fri, 9 Jan 2026 15:45:34 +0300 Subject: [PATCH 29/95] Use config through PersistenceStrategy --- db4-storage/src/pages/edge_store.rs | 10 +-- db4-storage/src/pages/graph_prop_store.rs | 4 +- db4-storage/src/pages/mod.rs | 40 +++++---- db4-storage/src/pages/node_store.rs | 10 +-- db4-storage/src/pages/session.rs | 4 +- db4-storage/src/persist/strategy.rs | 97 ++++++++++++---------- db4-storage/src/segments/edge/segment.rs | 2 +- db4-storage/src/segments/graph_prop/mod.rs | 6 +- db4-storage/src/segments/node/segment.rs | 2 +- 9 files changed, 96 insertions(+), 79 deletions(-) diff --git a/db4-storage/src/pages/edge_store.rs b/db4-storage/src/pages/edge_store.rs index 0fea64946b..3383caf8f0 100644 --- a/db4-storage/src/pages/edge_store.rs +++ b/db4-storage/src/pages/edge_store.rs @@ -13,7 +13,7 @@ use crate::{ layer_counter::GraphStats, locked::edges::{LockedEdgePage, WriteLockedEdgePages}, }, - persist::strategy::PersistenceConfig, + persist::strategy::PersistenceStrategy, segments::edge::segment::MemEdgeSegment, }; use parking_lot::{RwLock, RwLockWriteGuard}; @@ -42,7 +42,7 @@ pub struct ReadLockedEdgeStorage, EXT> { locked_pages: Box<[ES::ArcLockedSegment]>, } -impl, EXT: PersistenceConfig> ReadLockedEdgeStorage { +impl, EXT: PersistenceStrategy> ReadLockedEdgeStorage { pub fn storage(&self) -> &EdgeStorageInner { &self.storage } @@ -96,7 +96,7 @@ impl, EXT: PersistenceConfig> ReadLockedEdge } } -impl, EXT: PersistenceConfig> EdgeStorageInner { +impl, EXT: PersistenceStrategy> EdgeStorageInner { pub fn locked(self: &Arc) -> ReadLockedEdgeStorage { let locked_pages = self .segments @@ -193,7 +193,7 @@ impl, EXT: PersistenceConfig> EdgeStorageInn pub fn load(edges_path: impl AsRef, ext: EXT) -> Result { let edges_path = edges_path.as_ref(); - let max_page_len = ext.max_edge_page_len(); + let max_page_len = ext.config().max_edge_page_len; let meta = Arc::new(Meta::new_for_edges()); @@ -381,7 +381,7 @@ impl, EXT: PersistenceConfig> EdgeStorageInn #[inline(always)] pub fn max_page_len(&self) -> u32 { - self.ext.max_edge_page_len() + self.ext.config().max_edge_page_len } pub fn write_locked<'a>(&'a self) -> WriteLockedEdgePages<'a, ES> { diff --git a/db4-storage/src/pages/graph_prop_store.rs b/db4-storage/src/pages/graph_prop_store.rs index 92b0369cf5..7f005d6319 100644 --- a/db4-storage/src/pages/graph_prop_store.rs +++ b/db4-storage/src/pages/graph_prop_store.rs @@ -7,7 +7,7 @@ use crate::{ graph_prop_page::writer::GraphPropWriter, locked::graph_props::{LockedGraphPropPage, WriteLockedGraphPropPages}, }, - persist::strategy::PersistenceConfig, + persist::strategy::PersistenceStrategy, }; use std::{ @@ -31,7 +31,7 @@ pub struct GraphPropStorageInner { ext: EXT, } -impl, EXT: PersistenceConfig> GraphPropStorageInner { +impl, EXT: PersistenceStrategy> GraphPropStorageInner { pub fn new_with_meta(path: Option<&Path>, meta: Arc, ext: EXT) -> Self { let page = Arc::new(GS::new(meta.clone(), path, ext.clone())); diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index 1cf28fcf52..c16a757fd5 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -48,13 +48,13 @@ pub mod test_utils; // graph // (node/edges) // segment // layer_ids (0, 1, 2, ...) // actual graphy bits #[derive(Debug)] -pub struct GraphStore { +pub struct GraphStore { nodes: Arc>, edges: Arc>, graph_props: Arc>, graph_dir: Option, event_id: AtomicUsize, - _ext: EXT, + ext: EXT, } #[derive(Debug)] @@ -62,7 +62,7 @@ pub struct ReadLockedGraphStore< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: PersistenceConfig, + EXT: PersistenceStrategy, > { pub nodes: Arc>, pub edges: Arc>, @@ -88,7 +88,7 @@ impl< } pub fn extension(&self) -> &EXT { - &self._ext + &self.ext } pub fn nodes(&self) -> &Arc> { @@ -126,12 +126,18 @@ impl< self.nodes.stats().latest().max(self.edges.stats().latest()) } - pub fn load(graph_dir: impl AsRef) -> Result { + pub fn load(graph_dir: impl AsRef) -> Result + where + EXT: Default, + { let nodes_path = graph_dir.as_ref().join("nodes"); let edges_path = graph_dir.as_ref().join("edges"); let graph_props_path = graph_dir.as_ref().join("graph_props"); - let ext = read_persistence_config::(graph_dir.as_ref())?; + let mut ext = EXT::default(); + if let Ok(loaded_config) = read_persistence_config(graph_dir.as_ref()) { + *ext.config_mut() = loaded_config; + } let edge_storage = Arc::new(EdgeStorageInner::load(edges_path, ext.clone())?); let edge_meta = edge_storage.edge_meta().clone(); @@ -142,7 +148,7 @@ impl< let graph_prop_storage = Arc::new(GraphPropStorageInner::load(graph_props_path, ext.clone())?); - for node_type in ext.node_types().iter() { + for node_type in ext.config().node_types().iter() { node_meta.get_or_create_node_type_id(node_type); } @@ -154,7 +160,7 @@ impl< graph_props: graph_prop_storage, event_id: AtomicUsize::new(t_len), graph_dir: Some(graph_dir.as_ref().to_path_buf()), - _ext: ext, + ext, }) } @@ -191,7 +197,7 @@ impl< )); if let Some(graph_dir) = graph_dir { - write_persistence_config(graph_dir, &ext) + write_persistence_config(graph_dir, ext.config()) .expect("Unrecoverable! Failed to write graph config"); } @@ -201,7 +207,7 @@ impl< graph_props: graph_prop_storage, event_id: AtomicUsize::new(0), graph_dir: graph_dir.map(|p| p.to_path_buf()), - _ext: ext, + ext, } } @@ -445,21 +451,21 @@ impl< } } -impl Drop for GraphStore { +impl Drop for GraphStore { fn drop(&mut self) { let node_types = self.nodes.prop_meta().get_all_node_types(); - self._ext.set_node_types(node_types); + self.ext.config_mut().set_node_types(node_types); if let Some(graph_dir) = self.graph_dir.as_ref() { - if write_persistence_config(graph_dir, &self._ext).is_err() { + if write_persistence_config(graph_dir, self.ext.config()).is_err() { eprintln!("Unrecoverable! Failed to write graph meta"); } } } } -fn write_persistence_config( +fn write_persistence_config( graph_dir: impl AsRef, - config: &EXT, + config: &PersistenceConfig, ) -> Result<(), StorageError> { let config_file = graph_dir.as_ref().join("persistence_config.json"); let config_file = std::fs::File::create(&config_file)?; @@ -468,9 +474,9 @@ fn write_persistence_config( Ok(()) } -fn read_persistence_config( +fn read_persistence_config( graph_dir: impl AsRef, -) -> Result { +) -> Result { let config_file = graph_dir.as_ref().join("persistence_config.json"); let config_file = std::fs::File::open(config_file)?; let config = serde_json::from_reader(config_file)?; diff --git a/db4-storage/src/pages/node_store.rs b/db4-storage/src/pages/node_store.rs index 786ba092f7..d5f33c87e4 100644 --- a/db4-storage/src/pages/node_store.rs +++ b/db4-storage/src/pages/node_store.rs @@ -7,7 +7,7 @@ use crate::{ layer_counter::GraphStats, locked::nodes::{LockedNodePage, WriteLockedNodePages}, }, - persist::strategy::PersistenceConfig, + persist::strategy::PersistenceStrategy, segments::node::segment::MemNodeSegment, }; use parking_lot::RwLockWriteGuard; @@ -43,7 +43,7 @@ pub struct ReadLockedNodeStorage, EXT> { locked_segments: Box<[NS::ArcLockedSegment]>, } -impl, EXT: PersistenceConfig> ReadLockedNodeStorage { +impl, EXT: PersistenceStrategy> ReadLockedNodeStorage { pub fn node_ref( &self, node: impl Into, @@ -93,7 +93,7 @@ impl, EXT: PersistenceConfig> ReadLockedNode } } -impl NodeStorageInner { +impl NodeStorageInner { pub fn prop_meta(&self) -> &Arc { &self.node_meta } @@ -128,11 +128,11 @@ impl NodeStorageInner { } pub fn max_page_len(&self) -> u32 { - self.ext.max_node_page_len() + self.ext.config().max_node_page_len } } -impl, EXT: PersistenceConfig> NodeStorageInner { +impl, EXT: PersistenceStrategy> NodeStorageInner { pub fn new_with_meta( nodes_path: Option, node_meta: Arc, diff --git a/db4-storage/src/pages/session.rs b/db4-storage/src/pages/session.rs index 0684a19424..e5eb7e249f 100644 --- a/db4-storage/src/pages/session.rs +++ b/db4-storage/src/pages/session.rs @@ -4,7 +4,7 @@ use super::{ use crate::{ LocalPOS, api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, - persist::strategy::{PersistenceConfig, PersistenceStrategy}, + persist::strategy::PersistenceStrategy, segments::{edge::segment::MemEdgeSegment, node::segment::MemNodeSegment}, wal::LSN, }; @@ -20,7 +20,7 @@ pub struct WriteSession< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: PersistenceConfig, + EXT: PersistenceStrategy, > { node_writers: NodeWriters<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS>, edge_writer: EdgeWriter<'a, RwLockWriteGuard<'a, MemEdgeSegment>, ES>, diff --git a/db4-storage/src/persist/strategy.rs b/db4-storage/src/persist/strategy.rs index dc8804293a..39a3c332b4 100644 --- a/db4-storage/src/persist/strategy.rs +++ b/db4-storage/src/persist/strategy.rs @@ -13,23 +13,48 @@ pub const DEFAULT_MAX_PAGE_LEN_NODES: u32 = 131_072; // 2^17 pub const DEFAULT_MAX_PAGE_LEN_EDGES: u32 = 1_048_576; // 2^20 pub const DEFAULT_MAX_MEMORY_BYTES: usize = 32 * 1024 * 1024; -pub trait PersistenceConfig: - Default + Debug + Clone + Send + Sync + 'static + for<'a> Deserialize<'a> + Serialize -{ - fn max_node_page_len(&self) -> u32; - fn max_edge_page_len(&self) -> u32; - - fn max_memory_bytes(&self) -> usize; - fn is_parallel(&self) -> bool; - fn node_types(&self) -> &[String]; - fn set_node_types(&mut self, types: impl IntoIterator>); +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PersistenceConfig { + pub max_node_page_len: u32, + pub max_edge_page_len: u32, + pub max_memory_bytes: usize, + pub is_parallel: bool, + pub node_types: Vec, } -pub trait PersistenceStrategy: PersistenceConfig { +impl PersistenceConfig { + pub fn node_types(&self) -> &[String] { + &self.node_types + } + + pub fn set_node_types(&mut self, types: impl IntoIterator>) { + self.node_types = types + .into_iter() + .map(|s| s.as_ref().to_string()) + .collect(); + } +} + +impl Default for PersistenceConfig { + fn default() -> Self { + Self { + max_node_page_len: DEFAULT_MAX_PAGE_LEN_NODES, + max_edge_page_len: DEFAULT_MAX_PAGE_LEN_EDGES, + max_memory_bytes: DEFAULT_MAX_MEMORY_BYTES, + is_parallel: false, + node_types: Vec::new(), + } + } +} + +pub trait PersistenceStrategy: Debug + Clone + Default + Send + Sync + 'static + for<'de> Deserialize<'de> + Serialize { type NS; type ES; type GS; + fn config(&self) -> &PersistenceConfig; + fn config_mut(&mut self) -> &mut PersistenceConfig; + fn persist_node_segment>( &self, node_page: &Self::NS, @@ -55,17 +80,21 @@ pub trait PersistenceStrategy: PersistenceConfig { fn disk_storage_enabled() -> bool; } -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub struct NoOpStrategy { - max_node_page_len: u32, - max_edge_page_len: u32, + config: PersistenceConfig, } impl NoOpStrategy { pub fn new(max_node_page_len: u32, max_edge_page_len: u32) -> Self { Self { - max_node_page_len, - max_edge_page_len, + config: PersistenceConfig { + max_node_page_len, + max_edge_page_len, + max_memory_bytes: usize::MAX, + is_parallel: false, + node_types: Vec::new(), + }, } } } @@ -76,38 +105,20 @@ impl Default for NoOpStrategy { } } -impl PersistenceConfig for NoOpStrategy { - fn max_node_page_len(&self) -> u32 { - self.max_node_page_len - } - - #[inline(always)] - fn max_edge_page_len(&self) -> u32 { - self.max_edge_page_len - } - - fn max_memory_bytes(&self) -> usize { - usize::MAX - } - - fn is_parallel(&self) -> bool { - false - } - - fn node_types(&self) -> &[String] { - &[] - } - - fn set_node_types(&mut self, _types: impl IntoIterator>) { - // No operation - } -} - impl PersistenceStrategy for NoOpStrategy { type ES = EdgeSegmentView; type NS = NodeSegmentView; type GS = GraphPropSegmentView; + fn config(&self) -> &PersistenceConfig { + &self.config + } + + // Use builder pattern with_config. + fn config_mut(&mut self) -> &mut PersistenceConfig { + &mut self.config + } + fn persist_node_segment>( &self, _node_page: &Self::NS, diff --git a/db4-storage/src/segments/edge/segment.rs b/db4-storage/src/segments/edge/segment.rs index 1f5edb598a..31b14ff149 100644 --- a/db4-storage/src/segments/edge/segment.rs +++ b/db4-storage/src/segments/edge/segment.rs @@ -497,7 +497,7 @@ impl>> EdgeSegmentOps for EdgeSeg } fn new(page_id: usize, meta: Arc, _path: Option, ext: Self::Extension) -> Self { - let max_page_len = ext.max_edge_page_len(); + let max_page_len = ext.config().max_edge_page_len; Self { segment: parking_lot::RwLock::new(MemEdgeSegment::new(page_id, max_page_len, meta)) .into(), diff --git a/db4-storage/src/segments/graph_prop/mod.rs b/db4-storage/src/segments/graph_prop/mod.rs index c546c69765..d7147524c2 100644 --- a/db4-storage/src/segments/graph_prop/mod.rs +++ b/db4-storage/src/segments/graph_prop/mod.rs @@ -4,7 +4,7 @@ pub mod segment; use crate::{ api::graph_props::GraphPropSegmentOps, error::StorageError, - persist::strategy::PersistenceConfig, + persist::strategy::PersistenceStrategy, segments::graph_prop::{entry::MemGraphPropEntry, segment::MemGraphPropSegment}, }; use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard}; @@ -20,7 +20,7 @@ use std::{ /// `GraphPropSegmentView` manages graph temporal properties and graph metadata /// (constant properties). Reads / writes are always served from the in-memory segment. #[derive(Debug)] -pub struct GraphPropSegmentView { +pub struct GraphPropSegmentView { /// In-memory segment that contains the latest graph properties /// and graph metadata writes. head: Arc>, @@ -33,7 +33,7 @@ pub struct GraphPropSegmentView { _persistent: P, } -impl GraphPropSegmentOps for GraphPropSegmentView

{ +impl GraphPropSegmentOps for GraphPropSegmentView

{ type Extension = P; type Entry<'a> = MemGraphPropEntry<'a>; diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs index 416a465700..609b343d2c 100644 --- a/db4-storage/src/segments/node/segment.rs +++ b/db4-storage/src/segments/node/segment.rs @@ -455,7 +455,7 @@ impl>> NodeSegmentOps for NodeSeg _path: Option, ext: Self::Extension, ) -> Self { - let max_page_len = ext.max_node_page_len(); + let max_page_len = ext.config().max_node_page_len; Self { inner: parking_lot::RwLock::new(MemNodeSegment::new(page_id, max_page_len, meta)) .into(), From 9979c50925bb47c098ffe823ff6aa77d58b85858 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Fri, 9 Jan 2026 18:43:34 +0300 Subject: [PATCH 30/95] Fix parallel_flush --- db4-storage/src/persist/strategy.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/db4-storage/src/persist/strategy.rs b/db4-storage/src/persist/strategy.rs index 39a3c332b4..77866d913d 100644 --- a/db4-storage/src/persist/strategy.rs +++ b/db4-storage/src/persist/strategy.rs @@ -18,7 +18,7 @@ pub struct PersistenceConfig { pub max_node_page_len: u32, pub max_edge_page_len: u32, pub max_memory_bytes: usize, - pub is_parallel: bool, + pub bg_flush_enabled: bool, pub node_types: Vec, } @@ -41,7 +41,7 @@ impl Default for PersistenceConfig { max_node_page_len: DEFAULT_MAX_PAGE_LEN_NODES, max_edge_page_len: DEFAULT_MAX_PAGE_LEN_EDGES, max_memory_bytes: DEFAULT_MAX_MEMORY_BYTES, - is_parallel: false, + bg_flush_enabled: true, node_types: Vec::new(), } } @@ -92,7 +92,7 @@ impl NoOpStrategy { max_node_page_len, max_edge_page_len, max_memory_bytes: usize::MAX, - is_parallel: false, + bg_flush_enabled: true, node_types: Vec::new(), }, } From aa771630327579736669724abb92074eaab2c16d Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Fri, 9 Jan 2026 19:48:55 +0300 Subject: [PATCH 31/95] Remove defaults for Extension --- db4-graph/src/lib.rs | 10 ++-- db4-graph/src/replay.rs | 60 +++++++++++++++++------- db4-storage/src/pages/mod.rs | 10 ++-- db4-storage/src/persist/strategy.rs | 64 ++++++++++++++++++-------- raphtory/src/db/api/storage/storage.rs | 17 ++----- 5 files changed, 101 insertions(+), 60 deletions(-) diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index 6a316c1e58..91feebb79c 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -22,10 +22,11 @@ use storage::{ nodes::WriteLockedNodePages, }, }, - persist::strategy::{PersistenceStrategy}, + persist::strategy::PersistenceStrategy, resolver::GIDResolverOps, - Extension, GIDResolver, Layer, ReadLockedLayer, transaction::TransactionManager, - WalImpl, ES, NS, GS, wal::Wal, + transaction::TransactionManager, + wal::Wal, + Extension, GIDResolver, Layer, PersistenceConfig, ReadLockedLayer, WalImpl, ES, GS, NS, }; use tempfile::TempDir; @@ -85,7 +86,8 @@ impl<'a> From<&'a Path> for GraphDir { impl Default for TemporalGraph { fn default() -> Self { - Self::new(Extension::default()).unwrap() + let config = PersistenceConfig::default(); + Self::new(Extension::new(config)).unwrap() } } diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs index cc665d70dc..bfff5fb7a8 100644 --- a/db4-graph/src/replay.rs +++ b/db4-graph/src/replay.rs @@ -2,23 +2,23 @@ //! Allows for fast replay by making use of one-time lock acquisition for //! all the segments in the graph. -use storage::pages::resolve_pos; -use crate::{WriteLockedGraph}; +use crate::WriteLockedGraph; use raphtory_api::core::{ + entities::properties::meta::STATIC_GRAPH_LAYER_ID, entities::{properties::prop::Prop, EID, GID, VID}, storage::timeindex::TimeIndexEntry, - entities::properties::meta::STATIC_GRAPH_LAYER_ID, }; use raphtory_core::entities::GidRef; +use storage::pages::resolve_pos; +use storage::resolver::GIDResolverOps; use storage::{ - api::nodes::NodeSegmentOps, api::edges::EdgeSegmentOps, - persist::strategy::PersistenceStrategy, - NS, ES, GS, + api::nodes::NodeSegmentOps, error::StorageError, + persist::strategy::PersistenceStrategy, wal::{GraphReplay, TransactionID, LSN}, + ES, GS, NS, }; -use storage::resolver::GIDResolverOps; impl GraphReplay for WriteLockedGraph<'_, EXT> where @@ -56,21 +56,33 @@ where } // 2. Insert node ids into resolver. - temporal_graph.logical_to_physical.set(GidRef::from(&src_name), src_id)?; - temporal_graph.logical_to_physical.set(GidRef::from(&dst_name), dst_id)?; + temporal_graph + .logical_to_physical + .set(GidRef::from(&src_name), src_id)?; + temporal_graph + .logical_to_physical + .set(GidRef::from(&dst_name), dst_id)?; // 3. Insert layer id into the layer meta of both edge and node. let node_meta = temporal_graph.node_meta(); - edge_meta.layer_meta().set_id(layer_name.as_deref().unwrap_or("_default"), layer_id); - node_meta.layer_meta().set_id(layer_name.as_deref().unwrap_or("_default"), layer_id); + edge_meta + .layer_meta() + .set_id(layer_name.as_deref().unwrap_or("_default"), layer_id); + node_meta + .layer_meta() + .set_id(layer_name.as_deref().unwrap_or("_default"), layer_id); // 4. Grab src writer and add edge data. let (src_segment_id, src_pos) = resolve_pos(src_id, node_max_page_len); let num_nodes = src_id.index() + 1; self.resize_chunks_to_num_nodes(num_nodes); // Create enough segments. - let segment = self.graph().storage().nodes().get_or_create_segment(src_segment_id); + let segment = self + .graph() + .storage() + .nodes() + .get_or_create_segment(src_segment_id); let immut_lsn = segment.immut_lsn(); // Replay this entry only if it doesn't exist in immut. @@ -78,7 +90,9 @@ where let mut src_writer = self.nodes.get_mut(src_segment_id).unwrap().writer(); src_writer.store_node_id(src_pos, STATIC_GRAPH_LAYER_ID, GidRef::from(&src_name)); - let is_new_edge_static = src_writer.get_out_edge(src_pos, dst_id, STATIC_GRAPH_LAYER_ID).is_none(); + let is_new_edge_static = src_writer + .get_out_edge(src_pos, dst_id, STATIC_GRAPH_LAYER_ID) + .is_none(); let is_new_edge_layer = src_writer.get_out_edge(src_pos, dst_id, layer_id).is_none(); // Add the edge to the static graph if it doesn't already exist. @@ -104,7 +118,11 @@ where let num_nodes = dst_id.index() + 1; self.resize_chunks_to_num_nodes(num_nodes); - let segment = self.graph().storage().nodes().get_or_create_segment(dst_segment_id); + let segment = self + .graph() + .storage() + .nodes() + .get_or_create_segment(dst_segment_id); let immut_lsn = segment.immut_lsn(); // Replay this entry only if it doesn't exist in immut. @@ -112,7 +130,9 @@ where let mut dst_writer = self.nodes.get_mut(dst_segment_id).unwrap().writer(); dst_writer.store_node_id(dst_pos, STATIC_GRAPH_LAYER_ID, GidRef::from(&dst_name)); - let is_new_edge_static = dst_writer.get_inb_edge(dst_pos, src_id, STATIC_GRAPH_LAYER_ID).is_none(); + let is_new_edge_static = dst_writer + .get_inb_edge(dst_pos, src_id, STATIC_GRAPH_LAYER_ID) + .is_none(); let is_new_edge_layer = dst_writer.get_inb_edge(dst_pos, src_id, layer_id).is_none(); if is_new_edge_static { @@ -135,14 +155,20 @@ where let num_edges = eid.index() + 1; self.resize_chunks_to_num_edges(num_edges); - let segment = self.graph().storage().edges().get_or_create_segment(edge_segment_id); + let segment = self + .graph() + .storage() + .edges() + .get_or_create_segment(edge_segment_id); let immut_lsn = segment.immut_lsn(); // Replay this entry only if it doesn't exist in immut. if immut_lsn < lsn { let mut edge_writer = self.edges.get_mut(edge_segment_id).unwrap().writer(); - let is_new_edge_static = edge_writer.get_edge(STATIC_GRAPH_LAYER_ID, edge_pos).is_none(); + let is_new_edge_static = edge_writer + .get_edge(STATIC_GRAPH_LAYER_ID, edge_pos) + .is_none(); // Add edge into the static graph if it doesn't already exist. if is_new_edge_static { diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index c16a757fd5..ff95242efc 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -127,17 +127,15 @@ impl< } pub fn load(graph_dir: impl AsRef) -> Result - where - EXT: Default, { let nodes_path = graph_dir.as_ref().join("nodes"); let edges_path = graph_dir.as_ref().join("edges"); let graph_props_path = graph_dir.as_ref().join("graph_props"); - let mut ext = EXT::default(); - if let Ok(loaded_config) = read_persistence_config(graph_dir.as_ref()) { - *ext.config_mut() = loaded_config; - } + let config = read_persistence_config(graph_dir.as_ref()) + .unwrap_or_else(|_| PersistenceConfig::default()); + + let ext = EXT::new(config); let edge_storage = Arc::new(EdgeStorageInner::load(edges_path, ext.clone())?); let edge_meta = edge_storage.edge_meta().clone(); diff --git a/db4-storage/src/persist/strategy.rs b/db4-storage/src/persist/strategy.rs index 77866d913d..e28a72b5fc 100644 --- a/db4-storage/src/persist/strategy.rs +++ b/db4-storage/src/persist/strategy.rs @@ -22,7 +22,44 @@ pub struct PersistenceConfig { pub node_types: Vec, } +impl Default for PersistenceConfig { + fn default() -> Self { + Self { + max_node_page_len: DEFAULT_MAX_PAGE_LEN_NODES, + max_edge_page_len: DEFAULT_MAX_PAGE_LEN_EDGES, + max_memory_bytes: DEFAULT_MAX_MEMORY_BYTES, + bg_flush_enabled: true, + node_types: Vec::new(), + } + } +} + impl PersistenceConfig { + pub fn new_with_memory(max_memory_bytes: usize) -> Self { + Self { + max_memory_bytes, + ..Default::default() + } + } + + pub fn new_with_page_lens( + max_memory_bytes: usize, + max_node_page_len: u32, + max_edge_page_len: u32, + ) -> Self { + Self { + max_memory_bytes, + max_node_page_len, + max_edge_page_len, + ..Default::default() + } + } + + pub fn with_bg_flush(mut self) -> Self { + self.bg_flush_enabled = true; + self + } + pub fn node_types(&self) -> &[String] { &self.node_types } @@ -35,24 +72,15 @@ impl PersistenceConfig { } } -impl Default for PersistenceConfig { - fn default() -> Self { - Self { - max_node_page_len: DEFAULT_MAX_PAGE_LEN_NODES, - max_edge_page_len: DEFAULT_MAX_PAGE_LEN_EDGES, - max_memory_bytes: DEFAULT_MAX_MEMORY_BYTES, - bg_flush_enabled: true, - node_types: Vec::new(), - } - } -} - -pub trait PersistenceStrategy: Debug + Clone + Default + Send + Sync + 'static + for<'de> Deserialize<'de> + Serialize { +pub trait PersistenceStrategy: Debug + Clone + Send + Sync + 'static + for<'de> Deserialize<'de> + Serialize { type NS; type ES; type GS; + fn new(config: PersistenceConfig) -> Self; + fn config(&self) -> &PersistenceConfig; + fn config_mut(&mut self) -> &mut PersistenceConfig; fn persist_node_segment>( @@ -99,17 +127,15 @@ impl NoOpStrategy { } } -impl Default for NoOpStrategy { - fn default() -> Self { - Self::new(DEFAULT_MAX_PAGE_LEN_NODES, DEFAULT_MAX_PAGE_LEN_EDGES) - } -} - impl PersistenceStrategy for NoOpStrategy { type ES = EdgeSegmentView; type NS = NodeSegmentView; type GS = GraphPropSegmentView; + fn new(config: PersistenceConfig) -> Self { + Self { config } + } + fn config(&self) -> &PersistenceConfig { &self.config } diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 56e56a3681..155df1aa41 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -105,22 +105,11 @@ impl Storage { } pub(crate) fn new_at_path(path: impl AsRef) -> Result { - Ok(Self { - graph: GraphStorage::Unlocked(Arc::new(TemporalGraph::new_with_path( - path, - Extension::default(), - )?)), - #[cfg(feature = "search")] - index: RwLock::new(GraphIndex::Empty), - }) - } + let config = PersistenceConfig::default(); + let temporal_graph = TemporalGraph::new_with_path(path, Extension::new(config))?; - pub(crate) fn new_with_path_and_ext( - path: impl AsRef, - ext: Extension, - ) -> Result { Ok(Self { - graph: GraphStorage::Unlocked(Arc::new(TemporalGraph::new_with_path(path, ext)?)), + graph: GraphStorage::Unlocked(Arc::new(temporal_graph)), #[cfg(feature = "search")] index: RwLock::new(GraphIndex::Empty), }) From aa55404d0ba6927ae9c4b6647f9b18ee8b8ce938 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Fri, 9 Jan 2026 20:06:07 +0300 Subject: [PATCH 32/95] Move read/write from dir methods to PersistenceConfig --- db4-storage/src/pages/mod.rs | 28 +++++----------------------- db4-storage/src/persist/strategy.rs | 20 ++++++++++++++++++-- 2 files changed, 23 insertions(+), 25 deletions(-) diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index ff95242efc..9e3fa20b08 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -132,7 +132,7 @@ impl< let edges_path = graph_dir.as_ref().join("edges"); let graph_props_path = graph_dir.as_ref().join("graph_props"); - let config = read_persistence_config(graph_dir.as_ref()) + let config = PersistenceConfig::load_from_dir(graph_dir.as_ref()) .unwrap_or_else(|_| PersistenceConfig::default()); let ext = EXT::new(config); @@ -142,7 +142,7 @@ impl< let node_storage = Arc::new(NodeStorageInner::load(nodes_path, edge_meta, ext.clone())?); let node_meta = node_storage.prop_meta(); - // Load graph temporal properties and metadata + // Load graph temporal properties and metadata. let graph_prop_storage = Arc::new(GraphPropStorageInner::load(graph_props_path, ext.clone())?); @@ -195,7 +195,8 @@ impl< )); if let Some(graph_dir) = graph_dir { - write_persistence_config(graph_dir, ext.config()) + ext.config() + .save_to_dir(graph_dir) .expect("Unrecoverable! Failed to write graph config"); } @@ -454,32 +455,13 @@ impl Drop for GraphStore let node_types = self.nodes.prop_meta().get_all_node_types(); self.ext.config_mut().set_node_types(node_types); if let Some(graph_dir) = self.graph_dir.as_ref() { - if write_persistence_config(graph_dir, self.ext.config()).is_err() { + if self.ext.config().save_to_dir(graph_dir).is_err() { eprintln!("Unrecoverable! Failed to write graph meta"); } } } } -fn write_persistence_config( - graph_dir: impl AsRef, - config: &PersistenceConfig, -) -> Result<(), StorageError> { - let config_file = graph_dir.as_ref().join("persistence_config.json"); - let config_file = std::fs::File::create(&config_file)?; - - serde_json::to_writer_pretty(config_file, config)?; - Ok(()) -} - -fn read_persistence_config( - graph_dir: impl AsRef, -) -> Result { - let config_file = graph_dir.as_ref().join("persistence_config.json"); - let config_file = std::fs::File::open(config_file)?; - let config = serde_json::from_reader(config_file)?; - Ok(config) -} #[inline(always)] pub fn resolve_pos>(i: I, max_page_len: u32) -> (usize, LocalPOS) { diff --git a/db4-storage/src/persist/strategy.rs b/db4-storage/src/persist/strategy.rs index e28a72b5fc..3917429c41 100644 --- a/db4-storage/src/persist/strategy.rs +++ b/db4-storage/src/persist/strategy.rs @@ -1,8 +1,8 @@ use std::ops::DerefMut; use std::fmt::Debug; - +use std::path::Path; use serde::{Deserialize, Serialize}; - +use crate::error::StorageError; use crate::segments::{ edge::segment::{EdgeSegmentView, MemEdgeSegment}, graph_prop::{GraphPropSegmentView, segment::MemGraphPropSegment}, @@ -35,6 +35,22 @@ impl Default for PersistenceConfig { } impl PersistenceConfig { + const CONFIG_FILE: &str = "persistence_config.json"; + + pub fn load_from_dir(dir: impl AsRef) -> Result { + let config_file = dir.as_ref().join(Self::CONFIG_FILE); + let config_file = std::fs::File::open(config_file)?; + let config = serde_json::from_reader(config_file)?; + Ok(config) + } + + pub fn save_to_dir(&self, dir: impl AsRef) -> Result<(), StorageError> { + let config_file = dir.as_ref().join(Self::CONFIG_FILE); + let config_file = std::fs::File::create(&config_file)?; + serde_json::to_writer_pretty(config_file, self)?; + Ok(()) + } + pub fn new_with_memory(max_memory_bytes: usize) -> Self { Self { max_memory_bytes, From 934b3a84a5657c8e70d2cebb898e33f74e7536cc Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Mon, 12 Jan 2026 10:58:03 +0300 Subject: [PATCH 33/95] Add config_mut TODO --- db4-storage/src/pages/mod.rs | 1 + db4-storage/src/persist/strategy.rs | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index 9e3fa20b08..4c0ee5b0ae 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -454,6 +454,7 @@ impl Drop for GraphStore fn drop(&mut self) { let node_types = self.nodes.prop_meta().get_all_node_types(); self.ext.config_mut().set_node_types(node_types); + if let Some(graph_dir) = self.graph_dir.as_ref() { if self.ext.config().save_to_dir(graph_dir).is_err() { eprintln!("Unrecoverable! Failed to write graph meta"); diff --git a/db4-storage/src/persist/strategy.rs b/db4-storage/src/persist/strategy.rs index 3917429c41..e042f21431 100644 --- a/db4-storage/src/persist/strategy.rs +++ b/db4-storage/src/persist/strategy.rs @@ -97,6 +97,8 @@ pub trait PersistenceStrategy: Debug + Clone + Send + Sync + 'static + for<'de> fn config(&self) -> &PersistenceConfig; + // Need this to set node_types. + // TODO: Remove this once we have a better way to set node_types. fn config_mut(&mut self) -> &mut PersistenceConfig; fn persist_node_segment>( @@ -156,7 +158,6 @@ impl PersistenceStrategy for NoOpStrategy { &self.config } - // Use builder pattern with_config. fn config_mut(&mut self) -> &mut PersistenceConfig { &mut self.config } From eb459fd5efdeb12a4d6033f4c333524b625782c7 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Mon, 12 Jan 2026 11:01:13 +0300 Subject: [PATCH 34/95] Apply some more page -> segment rename --- db4-storage/src/persist/strategy.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/db4-storage/src/persist/strategy.rs b/db4-storage/src/persist/strategy.rs index e042f21431..4f92b7ad6f 100644 --- a/db4-storage/src/persist/strategy.rs +++ b/db4-storage/src/persist/strategy.rs @@ -103,21 +103,21 @@ pub trait PersistenceStrategy: Debug + Clone + Send + Sync + 'static + for<'de> fn persist_node_segment>( &self, - node_page: &Self::NS, + node_segment: &Self::NS, writer: MP, ) where Self: Sized; - fn persist_edge_page>( + fn persist_edge_segment>( &self, - edge_page: &Self::ES, + edge_segment: &Self::ES, writer: MP, ) where Self: Sized; - fn persist_graph_props>( + fn persist_graph_prop_segment>( &self, - graph_segment: &Self::GS, + graph_prop_segment: &Self::GS, writer: MP, ) where Self: Sized; @@ -170,7 +170,7 @@ impl PersistenceStrategy for NoOpStrategy { // No operation } - fn persist_edge_page>( + fn persist_edge_segment>( &self, _edge_page: &Self::ES, _writer: MP, @@ -178,7 +178,7 @@ impl PersistenceStrategy for NoOpStrategy { // No operation } - fn persist_graph_props>( + fn persist_graph_prop_segment>( &self, _graph_segment: &Self::GS, _writer: MP, From 427f2a88676406286685131170f9ce79543a67a6 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Mon, 12 Jan 2026 11:55:21 +0300 Subject: [PATCH 35/95] Expose WalType through PersistenceStrategy --- db4-graph/src/lib.rs | 8 ++++---- db4-storage/src/lib.rs | 8 +++----- db4-storage/src/persist/strategy.rs | 4 ++++ raphtory-storage/src/mutation/addition_ops_ext.rs | 4 ++-- raphtory-storage/src/mutation/durability_ops.rs | 8 ++++---- raphtory/src/db/api/storage/storage.rs | 4 ++-- 6 files changed, 19 insertions(+), 17 deletions(-) diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index 91feebb79c..4a6172b861 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -26,7 +26,7 @@ use storage::{ resolver::GIDResolverOps, transaction::TransactionManager, wal::Wal, - Extension, GIDResolver, Layer, PersistenceConfig, ReadLockedLayer, WalImpl, ES, GS, NS, + Extension, GIDResolver, Layer, PersistenceConfig, ReadLockedLayer, WalType, ES, GS, NS, }; use tempfile::TempDir; @@ -40,7 +40,7 @@ pub struct TemporalGraph { storage: Arc>, graph_dir: Option, pub transaction_manager: Arc, - pub wal: Arc, + pub wal: Arc, } #[derive(Debug)] @@ -123,7 +123,7 @@ impl, ES = ES, GS = GS>> Tempora let resolver = GIDResolver::new_with_path(&gid_resolver_dir, id_type)?; let node_count = AtomicUsize::new(storage.nodes().num_nodes()); let wal_dir = path.join("wal"); - let wal = Arc::new(WalImpl::new(Some(wal_dir))?); + let wal = Arc::new(WalType::new(Some(wal_dir))?); Ok(Self { graph_dir: Some(path.into()), @@ -175,7 +175,7 @@ impl, ES = ES, GS = GS>> Tempora ); let wal_dir = graph_dir.as_ref().map(|dir| dir.wal_dir()); - let wal = Arc::new(WalImpl::new(wal_dir)?); + let wal = Arc::new(WalType::new(wal_dir)?); Ok(Self { graph_dir, diff --git a/db4-storage/src/lib.rs b/db4-storage/src/lib.rs index 10f7b74408..39a6beb0b1 100644 --- a/db4-storage/src/lib.rs +++ b/db4-storage/src/lib.rs @@ -11,10 +11,9 @@ use crate::{ }, generic_t_props::GenericTProps, pages::{ - GraphStore, ReadLockedGraphStore, edge_store::ReadLockedEdgeStorage, - node_store::ReadLockedNodeStorage, + edge_store::ReadLockedEdgeStorage, node_store::ReadLockedNodeStorage, GraphStore, ReadLockedGraphStore }, - persist::strategy::NoOpStrategy, + persist::strategy::{NoOpStrategy, PersistenceStrategy}, resolver::mapping_resolver::MappingResolver, segments::{ edge::{ @@ -27,7 +26,6 @@ use crate::{ segment::NodeSegmentView, }, }, - wal::no_wal::NoWal, }; use parking_lot::RwLock; use raphtory_api::core::entities::{EID, VID}; @@ -53,7 +51,7 @@ pub type ES

= EdgeSegmentView

; pub type GS

= GraphPropSegmentView

; pub type Layer

= GraphStore, ES

, GS

, P>; -pub type WalImpl = NoWal; +pub type WalType = ::WalType; pub type GIDResolver = MappingResolver; pub type ReadLockedLayer

= ReadLockedGraphStore, ES

, GS

, P>; diff --git a/db4-storage/src/persist/strategy.rs b/db4-storage/src/persist/strategy.rs index 4f92b7ad6f..6371862be2 100644 --- a/db4-storage/src/persist/strategy.rs +++ b/db4-storage/src/persist/strategy.rs @@ -8,6 +8,8 @@ use crate::segments::{ graph_prop::{GraphPropSegmentView, segment::MemGraphPropSegment}, node::segment::{MemNodeSegment, NodeSegmentView}, }; +use crate::wal::no_wal::NoWal; +use crate::wal::Wal; pub const DEFAULT_MAX_PAGE_LEN_NODES: u32 = 131_072; // 2^17 pub const DEFAULT_MAX_PAGE_LEN_EDGES: u32 = 1_048_576; // 2^20 @@ -92,6 +94,7 @@ pub trait PersistenceStrategy: Debug + Clone + Send + Sync + 'static + for<'de> type NS; type ES; type GS; + type WalType: Wal; fn new(config: PersistenceConfig) -> Self; @@ -149,6 +152,7 @@ impl PersistenceStrategy for NoOpStrategy { type ES = EdgeSegmentView; type NS = NodeSegmentView; type GS = GraphPropSegmentView; + type WalType = NoWal; fn new(config: PersistenceConfig) -> Self { Self { config } diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index 9149e4bc8b..fb73863f90 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -24,7 +24,7 @@ use storage::{ persist::strategy::PersistenceStrategy, properties::props_meta_writer::PropsMetaWriter, resolver::GIDResolverOps, - Extension, transaction::TransactionManager, WalImpl, ES, NS, GS, + Extension, transaction::TransactionManager, WalType, ES, NS, GS, wal::LSN, }; @@ -357,7 +357,7 @@ impl DurabilityOps for TemporalGraph { &self.transaction_manager } - fn wal(&self) -> &WalImpl { + fn wal(&self) -> &WalType { &self.wal } } diff --git a/raphtory-storage/src/mutation/durability_ops.rs b/raphtory-storage/src/mutation/durability_ops.rs index 34713df7aa..18f229ac47 100644 --- a/raphtory-storage/src/mutation/durability_ops.rs +++ b/raphtory-storage/src/mutation/durability_ops.rs @@ -1,4 +1,4 @@ -use storage::{transaction::TransactionManager, WalImpl}; +use storage::{transaction::TransactionManager, WalType}; use crate::graph::graph::GraphStorage; use raphtory_api::inherit::Base; @@ -6,7 +6,7 @@ use raphtory_api::inherit::Base; pub trait DurabilityOps { fn transaction_manager(&self) -> &TransactionManager; - fn wal(&self) -> &WalImpl; + fn wal(&self) -> &WalType; } impl DurabilityOps for GraphStorage { @@ -14,7 +14,7 @@ impl DurabilityOps for GraphStorage { self.mutable().unwrap().transaction_manager.as_ref() } - fn wal(&self) -> &WalImpl { + fn wal(&self) -> &WalType { self.mutable().unwrap().wal.as_ref() } } @@ -31,7 +31,7 @@ where } #[inline] - fn wal(&self) -> &WalImpl { + fn wal(&self) -> &WalType { self.base().wal() } } diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 155df1aa41..aa45650027 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -36,7 +36,7 @@ use std::{ path::Path, sync::Arc, }; -use storage::{transaction::TransactionManager, WalImpl, wal::LSN}; +use storage::{transaction::TransactionManager, WalType, wal::LSN}; pub use storage::{ Extension, @@ -539,7 +539,7 @@ impl DurabilityOps for Storage { self.graph.mutable().unwrap().transaction_manager.as_ref() } - fn wal(&self) -> &WalImpl { + fn wal(&self) -> &WalType { self.graph.mutable().unwrap().wal.as_ref() } } From 7a13c79814e9d9470d7e065d86d0589135e5d6b5 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Mon, 12 Jan 2026 15:30:41 +0300 Subject: [PATCH 36/95] Pass wal as argument to constructor --- db4-graph/src/lib.rs | 3 ++- db4-storage/src/pages/mod.rs | 6 ++++-- db4-storage/src/persist/strategy.rs | 19 ++++++++++++++----- raphtory/src/db/api/storage/storage.rs | 9 ++++++--- 4 files changed, 26 insertions(+), 11 deletions(-) diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index 4a6172b861..eb479f46ef 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -87,7 +87,8 @@ impl<'a> From<&'a Path> for GraphDir { impl Default for TemporalGraph { fn default() -> Self { let config = PersistenceConfig::default(); - Self::new(Extension::new(config)).unwrap() + let wal = Arc::new(WalType::new(None).unwrap()); + Self::new(Extension::new(config, wal)).unwrap() } } diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index 4c0ee5b0ae..9207b83a53 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -1,11 +1,12 @@ use crate::{ - LocalPOS, + LocalPOS, WalType, api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, error::StorageError, pages::{edge_store::ReadLockedEdgeStorage, node_store::ReadLockedNodeStorage}, persist::strategy::{PersistenceConfig, PersistenceStrategy}, properties::props_meta_writer::PropsMetaWriter, segments::{edge::segment::MemEdgeSegment, node::segment::MemNodeSegment}, + wal::Wal, }; use edge_page::writer::EdgeWriter; use edge_store::EdgeStorageInner; @@ -135,7 +136,8 @@ impl< let config = PersistenceConfig::load_from_dir(graph_dir.as_ref()) .unwrap_or_else(|_| PersistenceConfig::default()); - let ext = EXT::new(config); + let wal = Arc::new(EXT::WalType::new(Some(graph_dir.as_ref().to_path_buf()))?); + let ext = EXT::new(config, wal); let edge_storage = Arc::new(EdgeStorageInner::load(edges_path, ext.clone())?); let edge_meta = edge_storage.edge_meta().clone(); diff --git a/db4-storage/src/persist/strategy.rs b/db4-storage/src/persist/strategy.rs index 6371862be2..3412ac7b10 100644 --- a/db4-storage/src/persist/strategy.rs +++ b/db4-storage/src/persist/strategy.rs @@ -1,6 +1,7 @@ use std::ops::DerefMut; use std::fmt::Debug; use std::path::Path; +use std::sync::Arc; use serde::{Deserialize, Serialize}; use crate::error::StorageError; use crate::segments::{ @@ -90,13 +91,13 @@ impl PersistenceConfig { } } -pub trait PersistenceStrategy: Debug + Clone + Send + Sync + 'static + for<'de> Deserialize<'de> + Serialize { +pub trait PersistenceStrategy: Debug + Clone + Send + Sync + 'static { type NS; type ES; type GS; type WalType: Wal; - fn new(config: PersistenceConfig) -> Self; + fn new(config: PersistenceConfig, wal: Arc) -> Self; fn config(&self) -> &PersistenceConfig; @@ -104,6 +105,8 @@ pub trait PersistenceStrategy: Debug + Clone + Send + Sync + 'static + for<'de> // TODO: Remove this once we have a better way to set node_types. fn config_mut(&mut self) -> &mut PersistenceConfig; + fn wal(&self) -> &Self::WalType; + fn persist_node_segment>( &self, node_segment: &Self::NS, @@ -129,9 +132,10 @@ pub trait PersistenceStrategy: Debug + Clone + Send + Sync + 'static + for<'de> fn disk_storage_enabled() -> bool; } -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone)] pub struct NoOpStrategy { config: PersistenceConfig, + wal: Arc, } impl NoOpStrategy { @@ -144,6 +148,7 @@ impl NoOpStrategy { bg_flush_enabled: true, node_types: Vec::new(), }, + wal: Arc::new(NoWal), } } } @@ -154,8 +159,8 @@ impl PersistenceStrategy for NoOpStrategy { type GS = GraphPropSegmentView; type WalType = NoWal; - fn new(config: PersistenceConfig) -> Self { - Self { config } + fn new(config: PersistenceConfig, wal: Arc) -> Self { + Self { config, wal } } fn config(&self) -> &PersistenceConfig { @@ -166,6 +171,10 @@ impl PersistenceStrategy for NoOpStrategy { &mut self.config } + fn wal(&self) -> &Self::WalType { + &self.wal + } + fn persist_node_segment>( &self, _node_page: &Self::NS, diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index aa45650027..99a6800217 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -6,7 +6,7 @@ use crate::{ }, errors::GraphError, }; -use db4_graph::{TemporalGraph, WriteLockedGraph}; +use db4_graph::{GraphDir, TemporalGraph, WriteLockedGraph}; use raphtory_api::core::{ entities::{ properties::{ @@ -36,7 +36,7 @@ use std::{ path::Path, sync::Arc, }; -use storage::{transaction::TransactionManager, WalType, wal::LSN}; +use storage::{transaction::TransactionManager, WalType, wal::{LSN, Wal}}; pub use storage::{ Extension, @@ -106,7 +106,10 @@ impl Storage { pub(crate) fn new_at_path(path: impl AsRef) -> Result { let config = PersistenceConfig::default(); - let temporal_graph = TemporalGraph::new_with_path(path, Extension::new(config))?; + let graph_dir = GraphDir::from(path.as_ref()); + let wal_dir = Some(graph_dir.wal_dir()); + let wal = Arc::new(WalType::new(wal_dir)?); + let temporal_graph = TemporalGraph::new_with_path(path, Extension::new(config, wal))?; Ok(Self { graph: GraphStorage::Unlocked(Arc::new(temporal_graph)), From 00d4a3cbef9ef66c3aafcb5185948e154c8b9401 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Mon, 12 Jan 2026 15:57:20 +0300 Subject: [PATCH 37/95] Expose wal from extension --- db4-graph/src/lib.rs | 12 ++++-------- raphtory-storage/src/mutation/addition_ops_ext.rs | 2 +- raphtory-storage/src/mutation/durability_ops.rs | 2 +- raphtory/src/db/api/storage/storage.rs | 2 +- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index eb479f46ef..79c52ada88 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -40,7 +40,6 @@ pub struct TemporalGraph { storage: Arc>, graph_dir: Option, pub transaction_manager: Arc, - pub wal: Arc, } #[derive(Debug)] @@ -123,8 +122,6 @@ impl, ES = ES, GS = GS>> Tempora let gid_resolver_dir = path.join("gid_resolver"); let resolver = GIDResolver::new_with_path(&gid_resolver_dir, id_type)?; let node_count = AtomicUsize::new(storage.nodes().num_nodes()); - let wal_dir = path.join("wal"); - let wal = Arc::new(WalType::new(Some(wal_dir))?); Ok(Self { graph_dir: Some(path.into()), @@ -132,7 +129,6 @@ impl, ES = ES, GS = GS>> Tempora node_count, storage: Arc::new(storage), transaction_manager: Arc::new(TransactionManager::new()), - wal, }) } @@ -175,16 +171,12 @@ impl, ES = ES, GS = GS>> Tempora ext, ); - let wal_dir = graph_dir.as_ref().map(|dir| dir.wal_dir()); - let wal = Arc::new(WalType::new(wal_dir)?); - Ok(Self { graph_dir, logical_to_physical, node_count: AtomicUsize::new(0), storage: Arc::new(storage), transaction_manager: Arc::new(TransactionManager::new()), - wal, }) } @@ -197,6 +189,10 @@ impl, ES = ES, GS = GS>> Tempora self.storage().extension() } + pub fn wal(&self) -> &EXT::WalType { + self.storage().extension().wal() + } + pub fn read_event_counter(&self) -> usize { self.storage().read_event_id() } diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index fb73863f90..008c3a5fa3 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -358,6 +358,6 @@ impl DurabilityOps for TemporalGraph { } fn wal(&self) -> &WalType { - &self.wal + &self.extension().wal() } } diff --git a/raphtory-storage/src/mutation/durability_ops.rs b/raphtory-storage/src/mutation/durability_ops.rs index 18f229ac47..ff2e70e894 100644 --- a/raphtory-storage/src/mutation/durability_ops.rs +++ b/raphtory-storage/src/mutation/durability_ops.rs @@ -15,7 +15,7 @@ impl DurabilityOps for GraphStorage { } fn wal(&self) -> &WalType { - self.mutable().unwrap().wal.as_ref() + self.mutable().unwrap().wal() } } diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 99a6800217..a227d2a187 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -543,7 +543,7 @@ impl DurabilityOps for Storage { } fn wal(&self) -> &WalType { - self.graph.mutable().unwrap().wal.as_ref() + self.graph.mutable().unwrap().wal() } } From f78279c3bb7d938277b36728473a1de08903dc43 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Mon, 12 Jan 2026 16:38:05 +0300 Subject: [PATCH 38/95] Add more docs to graph paths --- raphtory/src/db/graph/graph.rs | 7 ++++++- raphtory/src/serialise/graph_folder.rs | 23 ++++++++++++----------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/raphtory/src/db/graph/graph.rs b/raphtory/src/db/graph/graph.rs index 0df299dc99..991fb15c9a 100644 --- a/raphtory/src/db/graph/graph.rs +++ b/raphtory/src/db/graph/graph.rs @@ -586,10 +586,15 @@ impl Graph { if !Extension::disk_storage_enabled() { return Err(GraphError::DiskGraphNotEnabled); } + path.init()?; + let graph_storage_path = path.graph_path()?; + let storage = Storage::new_at_path(graph_storage_path)?; + let graph = Self { - inner: Arc::new(Storage::new_at_path(path.graph_path()?)?), + inner: Arc::new(storage), }; + path.write_metadata(&graph)?; Ok(graph) } diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index 375ac77838..9bdf9a258e 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -25,28 +25,29 @@ use std::{ use walkdir::WalkDir; use zip::{write::FileOptions, ZipArchive, ZipWriter}; -/// Stores graph data -pub const GRAPH_PATH: &str = "graph"; -pub const DEFAULT_GRAPH_PATH: &str = "graph0"; +/// Metadata file that stores path to the data folder. +pub const ROOT_META_PATH: &str = ".raph"; +/// Outer most directory containing all data. pub const DATA_PATH: &str = "data"; pub const DEFAULT_DATA_PATH: &str = "data0"; -/// Stores data folder path -pub const ROOT_META_PATH: &str = ".raph"; - -/// Stores graph folder path and graph metadata +/// Metadata file that stores path to the graph folder and graph metadata. pub const GRAPH_META_PATH: &str = ".meta"; -/// Temporary metadata for atomic replacement -pub const DIRTY_PATH: &str = ".dirty"; +/// Directory that stores graph data. +pub const GRAPH_PATH: &str = "graph"; +pub const DEFAULT_GRAPH_PATH: &str = "graph0"; -/// Directory that stores search indexes +/// Directory that stores search indexes. pub const INDEX_PATH: &str = "index"; -/// Directory that stores vector embeddings of the graph +/// Directory that stores vector embeddings of the graph. pub const VECTORS_PATH: &str = "vectors"; +/// Temporary metadata file for atomic replacement. +pub const DIRTY_PATH: &str = ".dirty"; + pub(crate) fn valid_relative_graph_path( relative_path: &str, prefix: &str, From 7c375dc513dbacbf4dfe6432de49beb663a78071 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Wed, 14 Jan 2026 09:58:04 -0500 Subject: [PATCH 39/95] Minor cleanup --- db4-graph/src/lib.rs | 38 ++-- db4-storage/src/pages/mod.rs | 178 +++++++++--------- .../src/mutation/addition_ops_ext.rs | 2 - raphtory/src/db/api/storage/storage.rs | 1 + raphtory/src/db/graph/graph.rs | 2 +- 5 files changed, 111 insertions(+), 110 deletions(-) diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index 79c52ada88..0bef50ad4f 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -114,24 +114,6 @@ impl, ES = ES, GS = GS>> Tempora ) } - pub fn load_from_path(path: impl AsRef) -> Result { - let path = path.as_ref(); - let storage = Layer::load(path)?; - let id_type = storage.nodes().id_type(); - - let gid_resolver_dir = path.join("gid_resolver"); - let resolver = GIDResolver::new_with_path(&gid_resolver_dir, id_type)?; - let node_count = AtomicUsize::new(storage.nodes().num_nodes()); - - Ok(Self { - graph_dir: Some(path.into()), - logical_to_physical: resolver.into(), - node_count, - storage: Arc::new(storage), - transaction_manager: Arc::new(TransactionManager::new()), - }) - } - pub fn new_with_meta( graph_dir: Option, node_meta: Meta, @@ -180,6 +162,24 @@ impl, ES = ES, GS = GS>> Tempora }) } + pub fn load_from_path(path: impl AsRef) -> Result { + let path = path.as_ref(); + let storage = Layer::load(path)?; + let id_type = storage.nodes().id_type(); + + let gid_resolver_dir = path.join("gid_resolver"); + let resolver = GIDResolver::new_with_path(&gid_resolver_dir, id_type)?; + let node_count = AtomicUsize::new(storage.nodes().num_nodes()); + + Ok(Self { + graph_dir: Some(path.into()), + logical_to_physical: resolver.into(), + node_count, + storage: Arc::new(storage), + transaction_manager: Arc::new(TransactionManager::new()), + }) + } + pub fn disk_storage_path(&self) -> Option<&Path> { self.graph_dir() .filter(|_| Extension::disk_storage_enabled()) @@ -215,10 +215,12 @@ impl, ES = ES, GS = GS>> Tempora .get_str(string) .or_else(|| self.logical_to_physical.get_u64(string.id())), }?; + // VIDs in the resolver may not be initialised yet, need to double-check the node actually exists! let nodes = self.storage().nodes(); let (page_id, pos) = nodes.resolve_pos(vid); let node_page = nodes.segments().get(page_id)?; + if pos.0 < node_page.num_nodes() { Some(vid) } else { diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index 9207b83a53..4329c985d7 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -1,5 +1,5 @@ use crate::{ - LocalPOS, WalType, + LocalPOS, api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, error::StorageError, pages::{edge_store::ReadLockedEdgeStorage, node_store::ReadLockedNodeStorage}, @@ -77,54 +77,60 @@ impl< EXT: PersistenceStrategy, > GraphStore { - pub fn read_locked(self: &Arc) -> ReadLockedGraphStore { - let nodes = self.nodes.locked().into(); - let edges = self.edges.locked().into(); - - ReadLockedGraphStore { - nodes, - edges, - graph: self.clone(), - } - } - - pub fn extension(&self) -> &EXT { - &self.ext - } - - pub fn nodes(&self) -> &Arc> { - &self.nodes - } - - pub fn edges(&self) -> &Arc> { - &self.edges - } + pub fn new(graph_dir: Option<&Path>, ext: EXT) -> Self { + let node_meta = Meta::new_for_nodes(); + let edge_meta = Meta::new_for_edges(); + let graph_props_meta = Meta::new_for_graph_props(); - pub fn graph_props(&self) -> &Arc> { - &self.graph_props + Self::new_with_meta(graph_dir, node_meta, edge_meta, graph_props_meta, ext) } - pub fn edge_meta(&self) -> &Meta { - self.edges.edge_meta() - } + pub fn new_with_meta( + graph_dir: Option<&Path>, + node_meta: Meta, + edge_meta: Meta, + graph_props_meta: Meta, + ext: EXT, + ) -> Self { + let nodes_path = graph_dir.map(|graph_dir| graph_dir.join("nodes")); + let edges_path = graph_dir.map(|graph_dir| graph_dir.join("edges")); + let graph_props_path = graph_dir.map(|graph_dir| graph_dir.join("graph_props")); - pub fn node_meta(&self) -> &Meta { - self.nodes.prop_meta() - } + let node_meta = Arc::new(node_meta); + let edge_meta = Arc::new(edge_meta); + let graph_props_meta = Arc::new(graph_props_meta); - pub fn graph_props_meta(&self) -> &Meta { - self.graph_props.meta() - } + let node_storage = Arc::new(NodeStorageInner::new_with_meta( + nodes_path, + node_meta, + edge_meta.clone(), + ext.clone(), + )); + let edge_storage = Arc::new(EdgeStorageInner::new_with_meta( + edges_path, + edge_meta, + ext.clone(), + )); + let graph_prop_storage = Arc::new(GraphPropStorageInner::new_with_meta( + graph_props_path.as_deref(), + graph_props_meta, + ext.clone(), + )); - pub fn earliest(&self) -> i64 { - self.nodes - .stats() - .earliest() - .min(self.edges.stats().earliest()) - } + if let Some(graph_dir) = graph_dir { + ext.config() + .save_to_dir(graph_dir) + .expect("Unrecoverable! Failed to write graph config"); + } - pub fn latest(&self) -> i64 { - self.nodes.stats().latest().max(self.edges.stats().latest()) + Self { + nodes: node_storage, + edges: edge_storage, + graph_props: graph_prop_storage, + event_id: AtomicUsize::new(0), + graph_dir: graph_dir.map(|p| p.to_path_buf()), + ext, + } } pub fn load(graph_dir: impl AsRef) -> Result @@ -164,60 +170,54 @@ impl< }) } - pub fn new_with_meta( - graph_dir: Option<&Path>, - node_meta: Meta, - edge_meta: Meta, - graph_props_meta: Meta, - ext: EXT, - ) -> Self { - let nodes_path = graph_dir.map(|graph_dir| graph_dir.join("nodes")); - let edges_path = graph_dir.map(|graph_dir| graph_dir.join("edges")); - let graph_props_path = graph_dir.map(|graph_dir| graph_dir.join("graph_props")); + pub fn read_locked(self: &Arc) -> ReadLockedGraphStore { + let nodes = self.nodes.locked().into(); + let edges = self.edges.locked().into(); - let node_meta = Arc::new(node_meta); - let edge_meta = Arc::new(edge_meta); - let graph_props_meta = Arc::new(graph_props_meta); + ReadLockedGraphStore { + nodes, + edges, + graph: self.clone(), + } + } - let node_storage = Arc::new(NodeStorageInner::new_with_meta( - nodes_path, - node_meta, - edge_meta.clone(), - ext.clone(), - )); - let edge_storage = Arc::new(EdgeStorageInner::new_with_meta( - edges_path, - edge_meta, - ext.clone(), - )); - let graph_prop_storage = Arc::new(GraphPropStorageInner::new_with_meta( - graph_props_path.as_deref(), - graph_props_meta, - ext.clone(), - )); + pub fn extension(&self) -> &EXT { + &self.ext + } - if let Some(graph_dir) = graph_dir { - ext.config() - .save_to_dir(graph_dir) - .expect("Unrecoverable! Failed to write graph config"); - } + pub fn nodes(&self) -> &Arc> { + &self.nodes + } - Self { - nodes: node_storage, - edges: edge_storage, - graph_props: graph_prop_storage, - event_id: AtomicUsize::new(0), - graph_dir: graph_dir.map(|p| p.to_path_buf()), - ext, - } + pub fn edges(&self) -> &Arc> { + &self.edges } - pub fn new(graph_dir: Option<&Path>, ext: EXT) -> Self { - let node_meta = Meta::new_for_nodes(); - let edge_meta = Meta::new_for_edges(); - let graph_props_meta = Meta::new_for_graph_props(); + pub fn graph_props(&self) -> &Arc> { + &self.graph_props + } - Self::new_with_meta(graph_dir, node_meta, edge_meta, graph_props_meta, ext) + pub fn edge_meta(&self) -> &Meta { + self.edges.edge_meta() + } + + pub fn node_meta(&self) -> &Meta { + self.nodes.prop_meta() + } + + pub fn graph_props_meta(&self) -> &Meta { + self.graph_props.meta() + } + + pub fn earliest(&self) -> i64 { + self.nodes + .stats() + .earliest() + .min(self.edges.stats().earliest()) + } + + pub fn latest(&self) -> i64 { + self.nodes.stats().latest().max(self.edges.stats().latest()) } pub fn add_edge( diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index 008c3a5fa3..8ab2495c16 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -175,9 +175,7 @@ impl<'a> SessionAdditionOps for UnlockedSession<'a> { impl InternalAdditionOps for TemporalGraph { type Error = MutationError; - type WS<'a> = UnlockedSession<'a>; - type AtomicAddEdge<'a> = WriteS<'a, Extension>; fn write_lock(&self) -> Result, Self::Error> { diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index a227d2a187..8694512c7c 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -120,6 +120,7 @@ impl Storage { pub(crate) fn load_from(path: impl AsRef) -> Result { let graph = GraphStorage::Unlocked(Arc::new(TemporalGraph::load_from_path(path)?)); + Ok(Self { graph, #[cfg(feature = "search")] diff --git a/raphtory/src/db/graph/graph.rs b/raphtory/src/db/graph/graph.rs index 991fb15c9a..2c86df5b92 100644 --- a/raphtory/src/db/graph/graph.rs +++ b/raphtory/src/db/graph/graph.rs @@ -611,7 +611,7 @@ impl Graph { /// #[cfg(feature = "io")] pub fn load_from_path(path: &(impl GraphPaths + ?Sized)) -> Result { - //TODO: add support for loading indexes and vectors + // TODO: add support for loading indexes and vectors Ok(Self { inner: Arc::new(Storage::load_from(path.graph_path()?)?), }) From 334fef47b5c0ba6bd5c773333648f73f83af8a54 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Wed, 14 Jan 2026 10:29:50 -0500 Subject: [PATCH 40/95] Modify graph load to accept extension --- db4-graph/src/lib.rs | 10 +++++----- db4-storage/src/pages/mod.rs | 11 ++--------- db4-storage/src/pages/test_utils/checkers.rs | 9 ++++++--- raphtory/src/db/api/storage/storage.rs | 13 ++++++++++--- 4 files changed, 23 insertions(+), 20 deletions(-) diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index 0bef50ad4f..930a1a61af 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -22,11 +22,11 @@ use storage::{ nodes::WriteLockedNodePages, }, }, - persist::strategy::PersistenceStrategy, + persist::strategy::{PersistenceConfig, PersistenceStrategy}, resolver::GIDResolverOps, transaction::TransactionManager, wal::Wal, - Extension, GIDResolver, Layer, PersistenceConfig, ReadLockedLayer, WalType, ES, GS, NS, + Extension, GIDResolver, Layer, ReadLockedLayer, WalType, ES, GS, NS, }; use tempfile::TempDir; @@ -87,7 +87,7 @@ impl Default for TemporalGraph { fn default() -> Self { let config = PersistenceConfig::default(); let wal = Arc::new(WalType::new(None).unwrap()); - Self::new(Extension::new(config, wal)).unwrap() + Self::new(::new(config, wal)).unwrap() } } @@ -162,9 +162,9 @@ impl, ES = ES, GS = GS>> Tempora }) } - pub fn load_from_path(path: impl AsRef) -> Result { + pub fn load_from_path(path: impl AsRef, ext: EXT) -> Result { let path = path.as_ref(); - let storage = Layer::load(path)?; + let storage = Layer::load(path, ext)?; let id_type = storage.nodes().id_type(); let gid_resolver_dir = path.join("gid_resolver"); diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index 4329c985d7..5d7ad80edf 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -3,10 +3,9 @@ use crate::{ api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, error::StorageError, pages::{edge_store::ReadLockedEdgeStorage, node_store::ReadLockedNodeStorage}, - persist::strategy::{PersistenceConfig, PersistenceStrategy}, + persist::strategy::PersistenceStrategy, properties::props_meta_writer::PropsMetaWriter, segments::{edge::segment::MemEdgeSegment, node::segment::MemNodeSegment}, - wal::Wal, }; use edge_page::writer::EdgeWriter; use edge_store::EdgeStorageInner; @@ -133,18 +132,12 @@ impl< } } - pub fn load(graph_dir: impl AsRef) -> Result + pub fn load(graph_dir: impl AsRef, ext: EXT) -> Result { let nodes_path = graph_dir.as_ref().join("nodes"); let edges_path = graph_dir.as_ref().join("edges"); let graph_props_path = graph_dir.as_ref().join("graph_props"); - let config = PersistenceConfig::load_from_dir(graph_dir.as_ref()) - .unwrap_or_else(|_| PersistenceConfig::default()); - - let wal = Arc::new(EXT::WalType::new(Some(graph_dir.as_ref().to_path_buf()))?); - let ext = EXT::new(config, wal); - let edge_storage = Arc::new(EdgeStorageInner::load(edges_path, ext.clone())?); let edge_meta = edge_storage.edge_meta().clone(); let node_storage = Arc::new(NodeStorageInner::load(nodes_path, edge_meta, ext.clone())?); diff --git a/db4-storage/src/pages/test_utils/checkers.rs b/db4-storage/src/pages/test_utils/checkers.rs index bc7d9b7766..945395d8dc 100644 --- a/db4-storage/src/pages/test_utils/checkers.rs +++ b/db4-storage/src/pages/test_utils/checkers.rs @@ -207,9 +207,10 @@ pub fn check_edges_support< check("pre-drop", &edges, &graph); if check_load { + let ext = graph.extension().clone(); drop(graph); - let maybe_ns = GraphStore::::load(graph_dir.path()); + let maybe_ns = GraphStore::::load(graph_dir.path(), ext); match maybe_ns { Ok(graph) => { @@ -351,8 +352,9 @@ pub fn check_graph_with_nodes_support< check_fn(temp_props, const_props, &graph); if check_load { + let ext = graph.extension().clone(); drop(graph); - let graph = GraphStore::::load(graph_dir.path()).unwrap(); + let graph = GraphStore::::load(graph_dir.path(), ext).unwrap(); check_fn(temp_props, const_props, &graph); } } @@ -505,9 +507,10 @@ pub fn check_graph_with_props_support< if check_load { // Load the graph from disk and check again + let ext = graph.extension().clone(); drop(graph); - let graph = GraphStore::::load(graph_dir.path()).unwrap(); + let graph = GraphStore::::load(graph_dir.path(), ext).unwrap(); black_box(check_fn(edges, &graph)); } } diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 8694512c7c..2add677a0a 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -109,7 +109,8 @@ impl Storage { let graph_dir = GraphDir::from(path.as_ref()); let wal_dir = Some(graph_dir.wal_dir()); let wal = Arc::new(WalType::new(wal_dir)?); - let temporal_graph = TemporalGraph::new_with_path(path, Extension::new(config, wal))?; + let ext = ::new(config, wal); + let temporal_graph = TemporalGraph::new_with_path(path, ext)?; Ok(Self { graph: GraphStorage::Unlocked(Arc::new(temporal_graph)), @@ -119,10 +120,16 @@ impl Storage { } pub(crate) fn load_from(path: impl AsRef) -> Result { - let graph = GraphStorage::Unlocked(Arc::new(TemporalGraph::load_from_path(path)?)); + let config = PersistenceConfig::load_from_dir(path.as_ref()) + .unwrap_or_else(|_| PersistenceConfig::default()); + let graph_dir = GraphDir::from(path.as_ref()); + let wal_dir = Some(graph_dir.wal_dir()); + let wal = Arc::new(WalType::new(wal_dir)?); + let ext = ::new(config, wal); + let temporal_graph = TemporalGraph::load_from_path(path, ext)?; Ok(Self { - graph, + graph: GraphStorage::Unlocked(Arc::new(temporal_graph)), #[cfg(feature = "search")] index: RwLock::new(GraphIndex::Empty), }) From 30c27440f4736afa57d2895d963a66d714e51ce6 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Wed, 14 Jan 2026 12:40:47 -0500 Subject: [PATCH 41/95] Use PersistenceConfig::new instead of strategy constructors --- db4-graph/src/lib.rs | 2 +- db4-storage/src/pages/mod.rs | 32 +++++++++++++----------- db4-storage/src/persist/strategy.rs | 14 ----------- db4-storage/src/segments/node/segment.rs | 6 +++-- raphtory/src/db/api/storage/storage.rs | 4 +-- 5 files changed, 25 insertions(+), 33 deletions(-) diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index 930a1a61af..f91d299a45 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -87,7 +87,7 @@ impl Default for TemporalGraph { fn default() -> Self { let config = PersistenceConfig::default(); let wal = Arc::new(WalType::new(None).unwrap()); - Self::new(::new(config, wal)).unwrap() + Self::new(Extension::new(config, wal)).unwrap() } } diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index 5d7ad80edf..942887de6e 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -471,14 +471,11 @@ pub fn resolve_pos>(i: I, max_page_len: u32) -> (usize, Lo mod test { use super::GraphStore; use crate::{ - Extension, Layer, - api::nodes::{NodeEntryOps, NodeRefOps}, - pages::test_utils::{ - AddEdge, Fixture, NodeFixture, check_edges_support, check_graph_with_nodes_support, - check_graph_with_props_support, edges_strat, edges_strat_with_layers, make_edges, - make_nodes, - }, + api::nodes::{NodeEntryOps, NodeRefOps}, pages::test_utils::{ + check_edges_support, check_graph_with_nodes_support, check_graph_with_props_support, edges_strat, edges_strat_with_layers, make_edges, make_nodes, AddEdge, Fixture, NodeFixture + }, persist::strategy::{PersistenceConfig, PersistenceStrategy, DEFAULT_MAX_MEMORY_BYTES}, wal::no_wal::NoWal, Extension, Layer }; + use std::sync::Arc; use chrono::DateTime; use proptest::prelude::*; use raphtory_api::core::entities::properties::prop::Prop; @@ -493,7 +490,8 @@ mod test { .collect(); check_edges_support(edges, par_load, false, |graph_dir| { - Layer::new(Some(graph_dir), Extension::new(chunk_size, chunk_size)) + let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, chunk_size, chunk_size); + Layer::new(Some(graph_dir), Extension::new(config, Arc::new(NoWal))) }) } @@ -503,7 +501,8 @@ mod test { par_load: bool, ) { check_edges_support(edges, par_load, false, |graph_dir| { - Layer::new(Some(graph_dir), Extension::new(chunk_size, chunk_size)) + let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, chunk_size, chunk_size); + Layer::new(Some(graph_dir), Extension::new(config, Arc::new(NoWal))) }) } @@ -575,7 +574,8 @@ mod test { #[test] fn test_add_one_edge_get_num_nodes() { let graph_dir = tempfile::tempdir().unwrap(); - let g = Layer::new(Some(graph_dir.path()), Extension::new(32, 32)); + let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, 32, 32); + let g = Layer::new(Some(graph_dir.path()), Extension::new(config, Arc::new(NoWal))); g.add_edge(4, 7, 3).unwrap(); assert_eq!(g.nodes().num_nodes(), 2); } @@ -583,7 +583,8 @@ mod test { #[test] fn test_node_additions_1() { let graph_dir = tempfile::tempdir().unwrap(); - let g = GraphStore::new(Some(graph_dir.path()), Extension::new(32, 32)); + let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, 32, 32); + let g = GraphStore::new(Some(graph_dir.path()), Extension::new(config, Arc::new(NoWal))); g.add_edge(4, 7, 3).unwrap(); let check = |g: &Layer| { @@ -625,7 +626,8 @@ mod test { #[test] fn node_temporal_props() { let graph_dir = tempfile::tempdir().unwrap(); - let g = Layer::new(Some(graph_dir.path()), Extension::new(32, 32)); + let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, 32, 32); + let g = Layer::new(Some(graph_dir.path()), Extension::new(config, Arc::new(NoWal))); g.add_node_props::(1, 0, 0, vec![]) .expect("Failed to add node props"); g.add_node_props::(2, 0, 0, vec![]) @@ -1428,13 +1430,15 @@ mod test { fn check_graph_with_nodes(node_page_len: u32, edge_page_len: u32, fixture: &NodeFixture) { check_graph_with_nodes_support(fixture, false, |path| { - Layer::new(Some(path), Extension::new(node_page_len, edge_page_len)) + let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, node_page_len, edge_page_len); + Layer::new(Some(path), Extension::new(config, Arc::new(NoWal))) }); } fn check_graph_with_props(node_page_len: u32, edge_page_len: u32, fixture: &Fixture) { check_graph_with_props_support(fixture, false, |path| { - Layer::new(Some(path), Extension::new(node_page_len, edge_page_len)) + let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, node_page_len, edge_page_len); + Layer::new(Some(path), Extension::new(config, Arc::new(NoWal))) }); } } diff --git a/db4-storage/src/persist/strategy.rs b/db4-storage/src/persist/strategy.rs index 3412ac7b10..81e5ab81dc 100644 --- a/db4-storage/src/persist/strategy.rs +++ b/db4-storage/src/persist/strategy.rs @@ -138,20 +138,6 @@ pub struct NoOpStrategy { wal: Arc, } -impl NoOpStrategy { - pub fn new(max_node_page_len: u32, max_edge_page_len: u32) -> Self { - Self { - config: PersistenceConfig { - max_node_page_len, - max_edge_page_len, - max_memory_bytes: usize::MAX, - bg_flush_enabled: true, - node_types: Vec::new(), - }, - wal: Arc::new(NoWal), - } - } -} impl PersistenceStrategy for NoOpStrategy { type ES = EdgeSegmentView; diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs index 609b343d2c..65aee9463a 100644 --- a/db4-storage/src/segments/node/segment.rs +++ b/db4-storage/src/segments/node/segment.rs @@ -572,7 +572,8 @@ mod test { LocalPOS, NodeSegmentView, api::nodes::NodeSegmentOps, pages::{layer_counter::GraphStats, node_page::writer::NodeWriter}, - persist::strategy::NoOpStrategy, + persist::strategy::{NoOpStrategy, PersistenceConfig, PersistenceStrategy, DEFAULT_MAX_MEMORY_BYTES}, + wal::no_wal::NoWal, }; use raphtory_api::core::entities::properties::{ meta::Meta, @@ -587,7 +588,8 @@ mod test { let node_meta = Arc::new(Meta::default()); let edge_meta = Arc::new(Meta::default()); let path = tempdir().unwrap(); - let ext = NoOpStrategy::new(10, 10); + let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, 10, 10); + let ext = NoOpStrategy::new(config, Arc::new(NoWal)); let segment = NodeSegmentView::new( 0, node_meta.clone(), diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 2add677a0a..48696bab8b 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -109,7 +109,7 @@ impl Storage { let graph_dir = GraphDir::from(path.as_ref()); let wal_dir = Some(graph_dir.wal_dir()); let wal = Arc::new(WalType::new(wal_dir)?); - let ext = ::new(config, wal); + let ext = Extension::new(config, wal); let temporal_graph = TemporalGraph::new_with_path(path, ext)?; Ok(Self { @@ -125,7 +125,7 @@ impl Storage { let graph_dir = GraphDir::from(path.as_ref()); let wal_dir = Some(graph_dir.wal_dir()); let wal = Arc::new(WalType::new(wal_dir)?); - let ext = ::new(config, wal); + let ext = Extension::new(config, wal); let temporal_graph = TemporalGraph::load_from_path(path, ext)?; Ok(Self { From 8ad9128aabc6e837d84d393fea0f144922ba6975 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Wed, 14 Jan 2026 12:41:45 -0500 Subject: [PATCH 42/95] Run fmt --- db4-graph/src/replay.rs | 13 +-- db4-storage/src/api/nodes.rs | 7 +- db4-storage/src/lib.rs | 3 +- db4-storage/src/pages/edge_page/writer.rs | 5 +- db4-storage/src/pages/edge_store.rs | 5 +- db4-storage/src/pages/graph_prop_store.rs | 4 +- db4-storage/src/pages/mod.rs | 74 ++++++++++++---- db4-storage/src/pages/node_page/writer.rs | 9 +- db4-storage/src/pages/session.rs | 53 +++++------ db4-storage/src/persist/strategy.rs | 28 +++--- .../src/segments/graph_prop/segment.rs | 9 +- db4-storage/src/segments/node/segment.rs | 24 ++--- raphtory-storage/src/mutation/addition_ops.rs | 2 +- .../src/mutation/addition_ops_ext.rs | 11 +-- .../src/mutation/durability_ops.rs | 2 +- raphtory-storage/src/mutation/mod.rs | 5 +- raphtory/src/db/api/mutation/addition_ops.rs | 55 ++++++++---- raphtory/src/db/api/storage/storage.rs | 13 +-- raphtory/src/db/graph/edge.rs | 10 +-- raphtory/src/io/arrow/df_loaders.rs | 88 ++++++++++--------- raphtory/src/serialise/serialise.rs | 2 +- 21 files changed, 243 insertions(+), 179 deletions(-) diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs index bfff5fb7a8..8398420052 100644 --- a/db4-graph/src/replay.rs +++ b/db4-graph/src/replay.rs @@ -4,18 +4,19 @@ use crate::WriteLockedGraph; use raphtory_api::core::{ - entities::properties::meta::STATIC_GRAPH_LAYER_ID, - entities::{properties::prop::Prop, EID, GID, VID}, + entities::{ + properties::{meta::STATIC_GRAPH_LAYER_ID, prop::Prop}, + EID, GID, VID, + }, storage::timeindex::TimeIndexEntry, }; use raphtory_core::entities::GidRef; -use storage::pages::resolve_pos; -use storage::resolver::GIDResolverOps; use storage::{ - api::edges::EdgeSegmentOps, - api::nodes::NodeSegmentOps, + api::{edges::EdgeSegmentOps, nodes::NodeSegmentOps}, error::StorageError, + pages::resolve_pos, persist::strategy::PersistenceStrategy, + resolver::GIDResolverOps, wal::{GraphReplay, TransactionID, LSN}, ES, GS, NS, }; diff --git a/db4-storage/src/api/nodes.rs b/db4-storage/src/api/nodes.rs index ae7d045fc0..c05a6d42fd 100644 --- a/db4-storage/src/api/nodes.rs +++ b/db4-storage/src/api/nodes.rs @@ -25,7 +25,12 @@ use std::{ }; use crate::{ - error::StorageError, gen_ts::LayerIter, segments::node::segment::MemNodeSegment, utils::{Iter2, Iter3, Iter4}, wal::LSN, LocalPOS + LocalPOS, + error::StorageError, + gen_ts::LayerIter, + segments::node::segment::MemNodeSegment, + utils::{Iter2, Iter3, Iter4}, + wal::LSN, }; pub trait NodeSegmentOps: Send + Sync + std::fmt::Debug + 'static { diff --git a/db4-storage/src/lib.rs b/db4-storage/src/lib.rs index 39a6beb0b1..4888a0186c 100644 --- a/db4-storage/src/lib.rs +++ b/db4-storage/src/lib.rs @@ -11,7 +11,8 @@ use crate::{ }, generic_t_props::GenericTProps, pages::{ - edge_store::ReadLockedEdgeStorage, node_store::ReadLockedNodeStorage, GraphStore, ReadLockedGraphStore + GraphStore, ReadLockedGraphStore, edge_store::ReadLockedEdgeStorage, + node_store::ReadLockedNodeStorage, }, persist::strategy::{NoOpStrategy, PersistenceStrategy}, resolver::mapping_resolver::MappingResolver, diff --git a/db4-storage/src/pages/edge_page/writer.rs b/db4-storage/src/pages/edge_page/writer.rs index efd956e20d..320a660f4a 100644 --- a/db4-storage/src/pages/edge_page/writer.rs +++ b/db4-storage/src/pages/edge_page/writer.rs @@ -3,7 +3,10 @@ use crate::{ segments::edge::segment::MemEdgeSegment, }; use arrow_array::{ArrayRef, BooleanArray}; -use raphtory_api::core::entities::{properties::{meta::STATIC_GRAPH_LAYER_ID, prop::Prop}, VID}; +use raphtory_api::core::entities::{ + VID, + properties::{meta::STATIC_GRAPH_LAYER_ID, prop::Prop}, +}; use raphtory_core::{ entities::EID, storage::timeindex::{AsTime, TimeIndexEntry}, diff --git a/db4-storage/src/pages/edge_store.rs b/db4-storage/src/pages/edge_store.rs index 3383caf8f0..b2fa36fadd 100644 --- a/db4-storage/src/pages/edge_store.rs +++ b/db4-storage/src/pages/edge_store.rs @@ -17,7 +17,10 @@ use crate::{ segments::edge::segment::MemEdgeSegment, }; use parking_lot::{RwLock, RwLockWriteGuard}; -use raphtory_api::core::entities::{properties::meta::{Meta, STATIC_GRAPH_LAYER_ID}, EID, VID}; +use raphtory_api::core::entities::{ + EID, VID, + properties::meta::{Meta, STATIC_GRAPH_LAYER_ID}, +}; use raphtory_core::{ entities::{ELID, LayerIds}, storage::timeindex::{AsTime, TimeIndexEntry}, diff --git a/db4-storage/src/pages/graph_prop_store.rs b/db4-storage/src/pages/graph_prop_store.rs index 7f005d6319..105895db5d 100644 --- a/db4-storage/src/pages/graph_prop_store.rs +++ b/db4-storage/src/pages/graph_prop_store.rs @@ -31,7 +31,9 @@ pub struct GraphPropStorageInner { ext: EXT, } -impl, EXT: PersistenceStrategy> GraphPropStorageInner { +impl, EXT: PersistenceStrategy> + GraphPropStorageInner +{ pub fn new_with_meta(path: Option<&Path>, meta: Arc, ext: EXT) -> Self { let page = Arc::new(GS::new(meta.clone(), path, ext.clone())); diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index 942887de6e..80d55f859a 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -9,8 +9,8 @@ use crate::{ }; use edge_page::writer::EdgeWriter; use edge_store::EdgeStorageInner; -use node_page::writer::{NodeWriter, NodeWriters}; use graph_prop_store::GraphPropStorageInner; +use node_page::writer::{NodeWriter, NodeWriters}; use node_store::NodeStorageInner; use parking_lot::RwLockWriteGuard; use raphtory_api::core::{ @@ -132,8 +132,7 @@ impl< } } - pub fn load(graph_dir: impl AsRef, ext: EXT) -> Result - { + pub fn load(graph_dir: impl AsRef, ext: EXT) -> Result { let nodes_path = graph_dir.as_ref().join("nodes"); let edges_path = graph_dir.as_ref().join("edges"); let graph_props_path = graph_dir.as_ref().join("graph_props"); @@ -355,12 +354,18 @@ impl< let src = self.node_writer(src_chunk); let dst = self.node_writer(dst_chunk); - NodeWriters { src, dst: Some(dst) } + NodeWriters { + src, + dst: Some(dst), + } } else if src_chunk > dst_chunk { let dst = self.node_writer(dst_chunk); let src = self.node_writer(src_chunk); - NodeWriters { src, dst: Some(dst) } + NodeWriters { + src, + dst: Some(dst), + } } else { let src = self.node_writer(src_chunk); @@ -404,7 +409,10 @@ impl< } } else { let writer = self.node_writer(src_chunk); - NodeWriters { src: writer, dst: None } + NodeWriters { + src: writer, + dst: None, + } }; let (_, src_pos) = self.nodes.resolve_pos(src); @@ -458,7 +466,6 @@ impl Drop for GraphStore } } - #[inline(always)] pub fn resolve_pos>(i: I, max_page_len: u32) -> (usize, LocalPOS) { let i = i.into(); @@ -471,15 +478,21 @@ pub fn resolve_pos>(i: I, max_page_len: u32) -> (usize, Lo mod test { use super::GraphStore; use crate::{ - api::nodes::{NodeEntryOps, NodeRefOps}, pages::test_utils::{ - check_edges_support, check_graph_with_nodes_support, check_graph_with_props_support, edges_strat, edges_strat_with_layers, make_edges, make_nodes, AddEdge, Fixture, NodeFixture - }, persist::strategy::{PersistenceConfig, PersistenceStrategy, DEFAULT_MAX_MEMORY_BYTES}, wal::no_wal::NoWal, Extension, Layer + Extension, Layer, + api::nodes::{NodeEntryOps, NodeRefOps}, + pages::test_utils::{ + AddEdge, Fixture, NodeFixture, check_edges_support, check_graph_with_nodes_support, + check_graph_with_props_support, edges_strat, edges_strat_with_layers, make_edges, + make_nodes, + }, + persist::strategy::{DEFAULT_MAX_MEMORY_BYTES, PersistenceConfig, PersistenceStrategy}, + wal::no_wal::NoWal, }; - use std::sync::Arc; use chrono::DateTime; use proptest::prelude::*; use raphtory_api::core::entities::properties::prop::Prop; use raphtory_core::{entities::VID, storage::timeindex::TimeIndexOps}; + use std::sync::Arc; fn check_edges(edges: Vec<(impl Into, impl Into)>, chunk_size: u32, par_load: bool) { // Set optional layer_id to None @@ -490,7 +503,11 @@ mod test { .collect(); check_edges_support(edges, par_load, false, |graph_dir| { - let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, chunk_size, chunk_size); + let config = PersistenceConfig::new_with_page_lens( + DEFAULT_MAX_MEMORY_BYTES, + chunk_size, + chunk_size, + ); Layer::new(Some(graph_dir), Extension::new(config, Arc::new(NoWal))) }) } @@ -501,7 +518,11 @@ mod test { par_load: bool, ) { check_edges_support(edges, par_load, false, |graph_dir| { - let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, chunk_size, chunk_size); + let config = PersistenceConfig::new_with_page_lens( + DEFAULT_MAX_MEMORY_BYTES, + chunk_size, + chunk_size, + ); Layer::new(Some(graph_dir), Extension::new(config, Arc::new(NoWal))) }) } @@ -575,7 +596,10 @@ mod test { fn test_add_one_edge_get_num_nodes() { let graph_dir = tempfile::tempdir().unwrap(); let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, 32, 32); - let g = Layer::new(Some(graph_dir.path()), Extension::new(config, Arc::new(NoWal))); + let g = Layer::new( + Some(graph_dir.path()), + Extension::new(config, Arc::new(NoWal)), + ); g.add_edge(4, 7, 3).unwrap(); assert_eq!(g.nodes().num_nodes(), 2); } @@ -584,7 +608,10 @@ mod test { fn test_node_additions_1() { let graph_dir = tempfile::tempdir().unwrap(); let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, 32, 32); - let g = GraphStore::new(Some(graph_dir.path()), Extension::new(config, Arc::new(NoWal))); + let g = GraphStore::new( + Some(graph_dir.path()), + Extension::new(config, Arc::new(NoWal)), + ); g.add_edge(4, 7, 3).unwrap(); let check = |g: &Layer| { @@ -627,7 +654,10 @@ mod test { fn node_temporal_props() { let graph_dir = tempfile::tempdir().unwrap(); let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, 32, 32); - let g = Layer::new(Some(graph_dir.path()), Extension::new(config, Arc::new(NoWal))); + let g = Layer::new( + Some(graph_dir.path()), + Extension::new(config, Arc::new(NoWal)), + ); g.add_node_props::(1, 0, 0, vec![]) .expect("Failed to add node props"); g.add_node_props::(2, 0, 0, vec![]) @@ -1430,14 +1460,22 @@ mod test { fn check_graph_with_nodes(node_page_len: u32, edge_page_len: u32, fixture: &NodeFixture) { check_graph_with_nodes_support(fixture, false, |path| { - let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, node_page_len, edge_page_len); + let config = PersistenceConfig::new_with_page_lens( + DEFAULT_MAX_MEMORY_BYTES, + node_page_len, + edge_page_len, + ); Layer::new(Some(path), Extension::new(config, Arc::new(NoWal))) }); } fn check_graph_with_props(node_page_len: u32, edge_page_len: u32, fixture: &Fixture) { check_graph_with_props_support(fixture, false, |path| { - let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, node_page_len, edge_page_len); + let config = PersistenceConfig::new_with_page_lens( + DEFAULT_MAX_MEMORY_BYTES, + node_page_len, + edge_page_len, + ); Layer::new(Some(path), Extension::new(config, Arc::new(NoWal))) }); } diff --git a/db4-storage/src/pages/node_page/writer.rs b/db4-storage/src/pages/node_page/writer.rs index b54c635569..981a9b9ad2 100644 --- a/db4-storage/src/pages/node_page/writer.rs +++ b/db4-storage/src/pages/node_page/writer.rs @@ -66,9 +66,7 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri let e_id = e_id.into(); let layer_id = e_id.layer(); - let (is_new_node, add) = self - .mut_segment - .add_outbound_edge(t, src_pos, dst, e_id); + let (is_new_node, add) = self.mut_segment.add_outbound_edge(t, src_pos, dst, e_id); self.page.increment_est_size(add); if is_new_node && !self.page.check_node(src_pos, layer_id) { @@ -110,9 +108,7 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri } let layer = e_id.layer(); let dst_pos = dst_pos.into(); - let (is_new_node, add) = self - .mut_segment - .add_inbound_edge(t, dst_pos, src, e_id); + let (is_new_node, add) = self.mut_segment.add_inbound_edge(t, dst_pos, src, e_id); self.page.increment_est_size(add); @@ -220,7 +216,6 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> Drop } } - /// Holds writers for src and dst node segments when adding an edge. /// If both nodes are in the same segment, `dst` is `None` and `src` is used for both. pub struct NodeWriters<'a, MP: DerefMut, NS: NodeSegmentOps> { diff --git a/db4-storage/src/pages/session.rs b/db4-storage/src/pages/session.rs index e5eb7e249f..9971e7b5e4 100644 --- a/db4-storage/src/pages/session.rs +++ b/db4-storage/src/pages/session.rs @@ -9,7 +9,10 @@ use crate::{ wal::LSN, }; use parking_lot::RwLockWriteGuard; -use raphtory_api::core::{entities::properties::{meta::STATIC_GRAPH_LAYER_ID, prop::Prop}, storage::dict_mapper::MaybeNew}; +use raphtory_api::core::{ + entities::properties::{meta::STATIC_GRAPH_LAYER_ID, prop::Prop}, + storage::dict_mapper::MaybeNew, +}; use raphtory_core::{ entities::{EID, ELID, VID}, storage::timeindex::AsTime, @@ -128,8 +131,7 @@ impl< .max_page_len(); let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); - self.edge_writer - .delete_edge(t, edge_pos, src, dst, layer); + self.edge_writer.delete_edge(t, edge_pos, src, dst, layer); let edge_id = edge.inner(); @@ -141,18 +143,12 @@ impl< .get_out_edge(src_pos, dst, edge_id.layer()) .is_none() { - self.node_writers.get_mut_src().add_outbound_edge( - Some(t), - src_pos, - dst, - edge_id, - ); - self.node_writers.get_mut_dst().add_inbound_edge( - Some(t), - dst_pos, - src, - edge_id, - ); + self.node_writers + .get_mut_src() + .add_outbound_edge(Some(t), src_pos, dst, edge_id); + self.node_writers + .get_mut_dst() + .add_inbound_edge(Some(t), dst_pos, src, edge_id); } self.node_writers @@ -164,33 +160,32 @@ impl< } } - pub fn add_static_edge( - &mut self, - src: impl Into, - dst: impl Into, - ) -> MaybeNew { + pub fn add_static_edge(&mut self, src: impl Into, dst: impl Into) -> MaybeNew { let src = src.into(); let dst = dst.into(); let (_, src_pos) = self.graph.nodes().resolve_pos(src); let (_, dst_pos) = self.graph.nodes().resolve_pos(dst); - let existing_eid = self - .node_writers - .get_mut_src() - .get_out_edge(src_pos, dst, STATIC_GRAPH_LAYER_ID); + let existing_eid = + self.node_writers + .get_mut_src() + .get_out_edge(src_pos, dst, STATIC_GRAPH_LAYER_ID); // Edge already exists, so no need to add it again. if let Some(eid) = existing_eid { - return MaybeNew::Existing(eid) + return MaybeNew::Existing(eid); } let edge_pos = None; let already_counted = false; - let edge_pos = - self.edge_writer.add_static_edge(edge_pos, src, dst, already_counted); - let edge_id = - edge_pos.as_eid(self.edge_writer.segment_id(), self.graph.edges().max_page_len()); + let edge_pos = self + .edge_writer + .add_static_edge(edge_pos, src, dst, already_counted); + let edge_id = edge_pos.as_eid( + self.edge_writer.segment_id(), + self.graph.edges().max_page_len(), + ); self.node_writers .get_mut_src() diff --git a/db4-storage/src/persist/strategy.rs b/db4-storage/src/persist/strategy.rs index 81e5ab81dc..45a439dea4 100644 --- a/db4-storage/src/persist/strategy.rs +++ b/db4-storage/src/persist/strategy.rs @@ -1,16 +1,14 @@ -use std::ops::DerefMut; -use std::fmt::Debug; -use std::path::Path; -use std::sync::Arc; -use serde::{Deserialize, Serialize}; -use crate::error::StorageError; -use crate::segments::{ - edge::segment::{EdgeSegmentView, MemEdgeSegment}, - graph_prop::{GraphPropSegmentView, segment::MemGraphPropSegment}, - node::segment::{MemNodeSegment, NodeSegmentView}, +use crate::{ + error::StorageError, + segments::{ + edge::segment::{EdgeSegmentView, MemEdgeSegment}, + graph_prop::{GraphPropSegmentView, segment::MemGraphPropSegment}, + node::segment::{MemNodeSegment, NodeSegmentView}, + }, + wal::{Wal, no_wal::NoWal}, }; -use crate::wal::no_wal::NoWal; -use crate::wal::Wal; +use serde::{Deserialize, Serialize}; +use std::{fmt::Debug, ops::DerefMut, path::Path, sync::Arc}; pub const DEFAULT_MAX_PAGE_LEN_NODES: u32 = 131_072; // 2^17 pub const DEFAULT_MAX_PAGE_LEN_EDGES: u32 = 1_048_576; // 2^20 @@ -84,10 +82,7 @@ impl PersistenceConfig { } pub fn set_node_types(&mut self, types: impl IntoIterator>) { - self.node_types = types - .into_iter() - .map(|s| s.as_ref().to_string()) - .collect(); + self.node_types = types.into_iter().map(|s| s.as_ref().to_string()).collect(); } } @@ -138,7 +133,6 @@ pub struct NoOpStrategy { wal: Arc, } - impl PersistenceStrategy for NoOpStrategy { type ES = EdgeSegmentView; type NS = NodeSegmentView; diff --git a/db4-storage/src/segments/graph_prop/segment.rs b/db4-storage/src/segments/graph_prop/segment.rs index bdfbde7032..34ed599f83 100644 --- a/db4-storage/src/segments/graph_prop/segment.rs +++ b/db4-storage/src/segments/graph_prop/segment.rs @@ -1,5 +1,7 @@ use crate::{ - error::StorageError, segments::{HasRow, SegmentContainer}, wal::LSN, + error::StorageError, + segments::{HasRow, SegmentContainer}, + wal::LSN, }; use raphtory_api::core::entities::properties::{meta::Meta, prop::Prop}; use raphtory_core::{ @@ -83,7 +85,10 @@ impl MemGraphPropSegment { pub fn take(&mut self) -> Self { let layers = self.layers.iter_mut().map(|layer| layer.take()).collect(); - Self { layers, lsn: self.lsn } + Self { + layers, + lsn: self.lsn, + } } pub fn lsn(&self) -> LSN { diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs index 65aee9463a..c8106afdc7 100644 --- a/db4-storage/src/segments/node/segment.rs +++ b/db4-storage/src/segments/node/segment.rs @@ -1,7 +1,14 @@ use crate::{ - api::nodes::{LockedNSSegment, NodeSegmentOps}, error::StorageError, loop_lock_write, persist::strategy::PersistenceStrategy, segments::{ - node::entry::{MemNodeEntry, MemNodeRef}, HasRow, SegmentContainer - }, wal::LSN, LocalPOS + LocalPOS, + api::nodes::{LockedNSSegment, NodeSegmentOps}, + error::StorageError, + loop_lock_write, + persist::strategy::PersistenceStrategy, + segments::{ + HasRow, SegmentContainer, + node::entry::{MemNodeEntry, MemNodeRef}, + }, + wal::LSN, }; use either::Either; use parking_lot::lock_api::ArcRwLockReadGuard; @@ -279,12 +286,7 @@ impl MemNodeSegment { prop_mut_entry.addition_timestamp(ts, e_id); } - pub fn update_timestamp( - &mut self, - t: T, - node_pos: LocalPOS, - e_id: ELID, - ) -> usize { + pub fn update_timestamp(&mut self, t: T, node_pos: LocalPOS, e_id: ELID) -> usize { let layer_id = e_id.layer(); let (est_size, row) = { let segment_container = self.get_or_create_layer(layer_id); //&mut self.layers[e_id.layer()]; @@ -572,7 +574,9 @@ mod test { LocalPOS, NodeSegmentView, api::nodes::NodeSegmentOps, pages::{layer_counter::GraphStats, node_page::writer::NodeWriter}, - persist::strategy::{NoOpStrategy, PersistenceConfig, PersistenceStrategy, DEFAULT_MAX_MEMORY_BYTES}, + persist::strategy::{ + DEFAULT_MAX_MEMORY_BYTES, NoOpStrategy, PersistenceConfig, PersistenceStrategy, + }, wal::no_wal::NoWal, }; use raphtory_api::core::entities::properties::{ diff --git a/raphtory-storage/src/mutation/addition_ops.rs b/raphtory-storage/src/mutation/addition_ops.rs index 8922561c81..ec7b49da8b 100644 --- a/raphtory-storage/src/mutation/addition_ops.rs +++ b/raphtory-storage/src/mutation/addition_ops.rs @@ -20,7 +20,7 @@ use raphtory_api::{ inherit::Base, }; use raphtory_core::entities::{nodes::node_ref::NodeRef, ELID}; -use storage::{Extension, wal::LSN}; +use storage::{wal::LSN, Extension}; pub trait InternalAdditionOps { type Error: From; diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index 8ab2495c16..53bafc8a66 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -17,15 +17,16 @@ use raphtory_core::{ nodes::node_ref::{AsNodeRef, NodeRef}, GidRef, EID, ELID, MAX_LAYER, VID, }, - storage::{timeindex::TimeIndexEntry}, + storage::timeindex::TimeIndexEntry, }; use storage::{ pages::{node_page::writer::node_info_as_props, session::WriteSession}, persist::strategy::PersistenceStrategy, properties::props_meta_writer::PropsMetaWriter, resolver::GIDResolverOps, - Extension, transaction::TransactionManager, WalType, ES, NS, GS, + transaction::TransactionManager, wal::LSN, + Extension, WalType, ES, GS, NS, }; pub struct WriteS<'a, EXT: PersistenceStrategy, ES = ES, GS = GS>> { @@ -56,7 +57,8 @@ impl<'a, EXT: PersistenceStrategy, ES = ES, GS = GS>> Edg eid: MaybeNew, props: impl IntoIterator, ) -> MaybeNew { - self.static_session.add_edge_into_layer(t, src, dst, eid, props); + self.static_session + .add_edge_into_layer(t, src, dst, eid, props); eid } @@ -75,8 +77,7 @@ impl<'a, EXT: PersistenceStrategy, ES = ES, GS = GS>> Edg .add_static_edge(src, dst) .map(|eid| eid.with_layer_deletion(layer)); - self.static_session - .delete_edge_from_layer(t, src, dst, eid); + self.static_session.delete_edge_from_layer(t, src, dst, eid); eid } diff --git a/raphtory-storage/src/mutation/durability_ops.rs b/raphtory-storage/src/mutation/durability_ops.rs index ff2e70e894..c31e578624 100644 --- a/raphtory-storage/src/mutation/durability_ops.rs +++ b/raphtory-storage/src/mutation/durability_ops.rs @@ -1,6 +1,6 @@ -use storage::{transaction::TransactionManager, WalType}; use crate::graph::graph::GraphStorage; use raphtory_api::inherit::Base; +use storage::{transaction::TransactionManager, WalType}; /// Accessor methods for transactions and write-ahead logging. pub trait DurabilityOps { diff --git a/raphtory-storage/src/mutation/mod.rs b/raphtory-storage/src/mutation/mod.rs index 7fb66b7a61..28cd67085d 100644 --- a/raphtory-storage/src/mutation/mod.rs +++ b/raphtory-storage/src/mutation/mod.rs @@ -3,8 +3,7 @@ use crate::{ graph::graph::Immutable, mutation::{ addition_ops::InheritAdditionOps, deletion_ops::InheritDeletionOps, - property_addition_ops::InheritPropertyAdditionOps, - durability_ops::InheritDurabilityOps, + durability_ops::InheritDurabilityOps, property_addition_ops::InheritPropertyAdditionOps, }, }; use parking_lot::RwLockWriteGuard; @@ -31,8 +30,8 @@ use thiserror::Error; pub mod addition_ops; pub mod addition_ops_ext; pub mod deletion_ops; -pub mod property_addition_ops; pub mod durability_ops; +pub mod property_addition_ops; pub type NodeWriterT<'a> = NodeWriter<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS>; pub type EdgeWriterT<'a> = EdgeWriter<'a, RwLockWriteGuard<'a, MemEdgeSegment>, ES>; diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index 15b6ec98d6..9dfb98ab91 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -15,11 +15,15 @@ use crate::{ }; use raphtory_api::core::entities::properties::prop::Prop; use raphtory_core::entities::GID; -use raphtory_storage::mutation::addition_ops::{EdgeWriteLock, InternalAdditionOps}; -use raphtory_storage::mutation::durability_ops::DurabilityOps; +use raphtory_storage::mutation::{ + addition_ops::{EdgeWriteLock, InternalAdditionOps}, + durability_ops::DurabilityOps, +}; use storage::wal::{GraphWal, Wal}; -pub trait AdditionOps: StaticGraphViewOps + InternalAdditionOps> + DurabilityOps { +pub trait AdditionOps: + StaticGraphViewOps + InternalAdditionOps> + DurabilityOps +{ // TODO: Probably add vector reference here like add /// Add a node to the graph /// @@ -145,7 +149,9 @@ pub trait AdditionOps: StaticGraphViewOps + InternalAdditionOps> + StaticGraphViewOps + DurabilityOps> AdditionOps for G { +impl> + StaticGraphViewOps + DurabilityOps> + AdditionOps for G +{ fn add_node< V: AsNodeRef, T: TryIntoInputTime, @@ -279,8 +285,18 @@ impl> + StaticGraphViewOps + Dura // FIXME: We are logging node -> node id mappings AFTER they are inserted into the // resolver. Make sure resolver mapping CANNOT get to disk before Wal. - let src_gid = src.as_node_ref().as_gid_ref().left().map(|gid_ref| GID::from(gid_ref)).unwrap(); - let dst_gid = dst.as_node_ref().as_gid_ref().left().map(|gid_ref| GID::from(gid_ref)).unwrap(); + let src_gid = src + .as_node_ref() + .as_gid_ref() + .left() + .map(|gid_ref| GID::from(gid_ref)) + .unwrap(); + let dst_gid = dst + .as_node_ref() + .as_gid_ref() + .left() + .map(|gid_ref| GID::from(gid_ref)) + .unwrap(); let src_id = src_id.inner(); let dst_id = dst_id.inner(); @@ -307,18 +323,21 @@ impl> + StaticGraphViewOps + Dura }) .collect::>(); - let lsn = self.wal().log_add_edge( - transaction_id, - ti, - src_gid, - src_id, - dst_gid, - dst_id, - edge_id.inner(), - layer, - layer_id, - props_for_wal, - ).unwrap(); + let lsn = self + .wal() + .log_add_edge( + transaction_id, + ti, + src_gid, + src_id, + dst_gid, + dst_id, + edge_id.inner(), + layer, + layer_id, + props_for_wal, + ) + .unwrap(); let props = props_with_status .into_iter() diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 48696bab8b..6bbae6cda5 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -24,9 +24,9 @@ use raphtory_storage::{ layer_ops::InheritLayerOps, mutation::{ addition_ops::{EdgeWriteLock, InternalAdditionOps, SessionAdditionOps}, - durability_ops::DurabilityOps, addition_ops_ext::{UnlockedSession, WriteS}, deletion_ops::InternalDeletionOps, + durability_ops::DurabilityOps, property_addition_ops::InternalPropertyAdditionOps, EdgeWriterT, NodeWriterT, }, @@ -36,11 +36,15 @@ use std::{ path::Path, sync::Arc, }; -use storage::{transaction::TransactionManager, WalType, wal::{LSN, Wal}}; +use storage::{ + transaction::TransactionManager, + wal::{Wal, LSN}, + WalType, +}; pub use storage::{ - Extension, persist::strategy::{PersistenceConfig, PersistenceStrategy}, + Extension, }; #[cfg(feature = "search")] use { @@ -328,8 +332,7 @@ impl EdgeWriteLock for AtomicAddEdgeSession<'_> { e_id: MaybeNew, props: impl IntoIterator, ) -> MaybeNew { - self.session - .internal_add_edge(t, src, dst, e_id, props) + self.session.internal_add_edge(t, src, dst, e_id, props) } fn internal_delete_edge( diff --git a/raphtory/src/db/graph/edge.rs b/raphtory/src/db/graph/edge.rs index 1fa9de0f73..79c57a4c12 100644 --- a/raphtory/src/db/graph/edge.rs +++ b/raphtory/src/db/graph/edge.rs @@ -40,8 +40,8 @@ use raphtory_storage::{ graph::edges::edge_storage_ops::EdgeStorageOps, mutation::{ addition_ops::{EdgeWriteLock, InternalAdditionOps}, - durability_ops::DurabilityOps, deletion_ops::InternalDeletionOps, + durability_ops::DurabilityOps, property_addition_ops::InternalPropertyAdditionOps, }, }; @@ -441,13 +441,7 @@ impl EdgeView { .atomic_add_edge(src, dst, Some(e_id), layer_id) .map_err(into_graph_err)?; - writer.internal_add_edge( - t, - src, - dst, - MaybeNew::New(e_id.with_layer(layer_id)), - props, - ); + writer.internal_add_edge(t, src, dst, MaybeNew::New(e_id.with_layer(layer_id)), props); Ok(()) } diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index f21f14a714..af0b342ea8 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -200,8 +200,7 @@ pub fn load_nodes_from_df< let layer_id = STATIC_GRAPH_LAYER_ID; update_time(t); - writer - .store_node_id_and_node_type(mut_node, layer_id, gid, *node_type); + writer.store_node_id_and_node_type(mut_node, layer_id, gid, *node_type); let t_props = prop_cols.iter_row(row); let c_props = metadata_cols @@ -508,48 +507,51 @@ pub fn load_edges_from_df = vec![]; - let mut c_props: Vec<(usize, Prop)> = vec![]; - - for (row, (src, dst, time, secondary_index, eid, layer, exists)) in - zip.enumerate() - { - if let Some(eid_pos) = locked_page.resolve_pos(*eid) { - let t = TimeIndexEntry(time, secondary_index); - let mut writer = locked_page.writer(); - - t_props.clear(); - t_props.extend(prop_cols.iter_row(row)); - - c_props.clear(); - c_props.extend(metadata_cols.iter_row(row)); - c_props.extend_from_slice(&shared_metadata); - - writer.bulk_add_edge( - t, - eid_pos, - *src, - *dst, - exists, - *layer, - c_props.drain(..), - t_props.drain(..), - ); + write_locked_graph + .edges + .par_iter_mut() + .for_each(|locked_page| { + let zip = izip!( + src_col_resolved.iter(), + dst_col_resolved.iter(), + time_col.iter(), + secondary_index_col.iter(), + eid_col_resolved.iter(), + layer_col_resolved.iter(), + eids_exist + .iter() + .map(|exists| exists.load(Ordering::Relaxed)) + ); + let mut t_props: Vec<(usize, Prop)> = vec![]; + let mut c_props: Vec<(usize, Prop)> = vec![]; + + for (row, (src, dst, time, secondary_index, eid, layer, exists)) in + zip.enumerate() + { + if let Some(eid_pos) = locked_page.resolve_pos(*eid) { + let t = TimeIndexEntry(time, secondary_index); + let mut writer = locked_page.writer(); + + t_props.clear(); + t_props.extend(prop_cols.iter_row(row)); + + c_props.clear(); + c_props.extend(metadata_cols.iter_row(row)); + c_props.extend_from_slice(&shared_metadata); + + writer.bulk_add_edge( + t, + eid_pos, + *src, + *dst, + exists, + *layer, + c_props.drain(..), + t_props.drain(..), + ); + } } - } - }); + }); }); }); diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index de133f71ea..9eda186868 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -2,7 +2,7 @@ use crate::prelude::IndexMutationOps; use crate::{ db::api::{ - mutation::AdditionOps, storage::storage::{PersistenceStrategy}, view::StaticGraphViewOps, + mutation::AdditionOps, storage::storage::PersistenceStrategy, view::StaticGraphViewOps, }, errors::GraphError, serialise::{ From 529a2cdf7282aaf83e1ad48a7c158e88612c750b Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Wed, 14 Jan 2026 15:15:29 -0500 Subject: [PATCH 43/95] Use &Path instead of PathBuf for wal --- db4-storage/src/wal/mod.rs | 2 +- db4-storage/src/wal/no_wal.rs | 2 +- raphtory/src/db/api/storage/storage.rs | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/db4-storage/src/wal/mod.rs b/db4-storage/src/wal/mod.rs index d8be9736a2..992be05bf5 100644 --- a/db4-storage/src/wal/mod.rs +++ b/db4-storage/src/wal/mod.rs @@ -20,7 +20,7 @@ pub struct WalRecord { /// Core Wal methods. pub trait Wal { - fn new(dir: Option) -> Result + fn new(dir: Option<&Path>) -> Result where Self: Sized; diff --git a/db4-storage/src/wal/no_wal.rs b/db4-storage/src/wal/no_wal.rs index 4d4923272f..4a30237821 100644 --- a/db4-storage/src/wal/no_wal.rs +++ b/db4-storage/src/wal/no_wal.rs @@ -11,7 +11,7 @@ use crate::{ pub struct NoWal; impl Wal for NoWal { - fn new(_dir: Option) -> Result { + fn new(_dir: Option<&Path>) -> Result { Ok(Self) } diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 6bbae6cda5..156c4b6d8b 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -111,8 +111,8 @@ impl Storage { pub(crate) fn new_at_path(path: impl AsRef) -> Result { let config = PersistenceConfig::default(); let graph_dir = GraphDir::from(path.as_ref()); - let wal_dir = Some(graph_dir.wal_dir()); - let wal = Arc::new(WalType::new(wal_dir)?); + let wal_dir = graph_dir.wal_dir(); + let wal = Arc::new(WalType::new(Some(wal_dir.as_path()))?); let ext = Extension::new(config, wal); let temporal_graph = TemporalGraph::new_with_path(path, ext)?; @@ -127,8 +127,8 @@ impl Storage { let config = PersistenceConfig::load_from_dir(path.as_ref()) .unwrap_or_else(|_| PersistenceConfig::default()); let graph_dir = GraphDir::from(path.as_ref()); - let wal_dir = Some(graph_dir.wal_dir()); - let wal = Arc::new(WalType::new(wal_dir)?); + let wal_dir = graph_dir.wal_dir(); + let wal = Arc::new(WalType::new(Some(wal_dir.as_path()))?); let ext = Extension::new(config, wal); let temporal_graph = TemporalGraph::load_from_path(path, ext)?; From 9b9af6e38854bd492d3d8291185d45f20c75d25f Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Thu, 15 Jan 2026 09:32:11 -0500 Subject: [PATCH 44/95] Set next_lsn after replay --- db4-storage/src/wal/entry.rs | 1 + db4-storage/src/wal/mod.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/db4-storage/src/wal/entry.rs b/db4-storage/src/wal/entry.rs index 7b0b0e6745..46aa0da791 100644 --- a/db4-storage/src/wal/entry.rs +++ b/db4-storage/src/wal/entry.rs @@ -41,6 +41,7 @@ impl GraphWal for NoWal { } fn replay_to_graph( + &self, _dir: impl AsRef, _graph: &mut G, ) -> Result<(), StorageError> { diff --git a/db4-storage/src/wal/mod.rs b/db4-storage/src/wal/mod.rs index 992be05bf5..ec82fa1fbf 100644 --- a/db4-storage/src/wal/mod.rs +++ b/db4-storage/src/wal/mod.rs @@ -68,6 +68,7 @@ pub trait GraphWal { /// Replays and applies all the wal entries in the given directory to the given graph. fn replay_to_graph( + &self, dir: impl AsRef, graph: &mut G, ) -> Result<(), StorageError>; From 8334e8381c2cf31e2b0fcc17cce28a8069d5f695 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Thu, 15 Jan 2026 10:35:10 -0500 Subject: [PATCH 45/95] Change replay to be an instance method --- db4-storage/src/wal/entry.rs | 11 +++----- db4-storage/src/wal/mod.rs | 47 ++++++++++++++++++++++++----------- db4-storage/src/wal/no_wal.rs | 6 ++--- 3 files changed, 39 insertions(+), 25 deletions(-) diff --git a/db4-storage/src/wal/entry.rs b/db4-storage/src/wal/entry.rs index 46aa0da791..be2f5c3fff 100644 --- a/db4-storage/src/wal/entry.rs +++ b/db4-storage/src/wal/entry.rs @@ -1,5 +1,3 @@ -use std::path::Path; - use raphtory_api::core::entities::properties::prop::Prop; use raphtory_core::{ entities::{EID, GID, VID}, @@ -34,17 +32,14 @@ impl GraphWal for NoWal { Ok(0) } - fn replay_iter( - _dir: impl AsRef, - ) -> impl Iterator> { - std::iter::once(Ok((0, ()))) + fn replay_iter(&self) -> impl Iterator> { + std::iter::empty() } fn replay_to_graph( &self, - _dir: impl AsRef, _graph: &mut G, ) -> Result<(), StorageError> { - todo!() + panic!("NoWAL does not support replay") } } diff --git a/db4-storage/src/wal/mod.rs b/db4-storage/src/wal/mod.rs index ec82fa1fbf..712572ff19 100644 --- a/db4-storage/src/wal/mod.rs +++ b/db4-storage/src/wal/mod.rs @@ -12,12 +12,6 @@ pub mod no_wal; pub type LSN = u64; pub type TransactionID = u64; -#[derive(Debug)] -pub struct WalRecord { - pub lsn: LSN, - pub data: Vec, -} - /// Core Wal methods. pub trait Wal { fn new(dir: Option<&Path>) -> Result @@ -34,8 +28,36 @@ pub trait Wal { /// `cutoff_lsn` acts as a hint for which records can be safely discarded during rotation. fn rotate(&self, cutoff_lsn: LSN) -> Result<(), StorageError>; - /// Returns an iterator over the wal entries in the given directory. - fn replay(dir: impl AsRef) -> impl Iterator>; + /// Returns an iterator over the entries in the wal. + fn replay(&self) -> impl Iterator>; +} + +#[derive(Debug)] +pub struct ReplayRecord { + lsn: LSN, + + data: Vec, + + /// The raw bytes of the WAL entry stored on disk, including CRC data. + raw_bytes: Vec, +} + +impl ReplayRecord { + pub fn new(lsn: LSN, data: Vec, raw_bytes: Vec) -> Self { + Self { lsn, data, raw_bytes } + } + + pub fn lsn(&self) -> LSN { + self.lsn + } + + pub fn data(&self) -> &[u8] { + &self.data + } + + pub fn raw_bytes(&self) -> &[u8] { + &self.raw_bytes + } } // Raphtory-specific logging & replay methods. @@ -61,15 +83,12 @@ pub trait GraphWal { /// `lsn` has been persisted to disk. fn log_checkpoint(&self, lsn: LSN) -> Result; - /// Returns an iterator over the wal entries in the given directory. - fn replay_iter( - dir: impl AsRef, - ) -> impl Iterator>; + /// Returns an iterator over the entries in the wal. + fn replay_iter(&self) -> impl Iterator>; - /// Replays and applies all the wal entries in the given directory to the given graph. + /// Replays and applies all the entries in the wal to the given graph. fn replay_to_graph( &self, - dir: impl AsRef, graph: &mut G, ) -> Result<(), StorageError>; } diff --git a/db4-storage/src/wal/no_wal.rs b/db4-storage/src/wal/no_wal.rs index 4a30237821..59271acc7c 100644 --- a/db4-storage/src/wal/no_wal.rs +++ b/db4-storage/src/wal/no_wal.rs @@ -1,8 +1,8 @@ -use std::path::{Path, PathBuf}; +use std::path::Path; use crate::{ error::StorageError, - wal::{LSN, Wal, WalRecord}, + wal::{LSN, Wal, ReplayRecord}, }; /// `NoWAL` is a no-op WAL implementation that discards all writes. @@ -27,7 +27,7 @@ impl Wal for NoWal { Ok(()) } - fn replay(_dir: impl AsRef) -> impl Iterator> { + fn replay(&self) -> impl Iterator> { let error = "Recovery is not supported for NoWAL"; std::iter::once(Err(StorageError::GenericFailure(error.to_string()))) } From 252ded19855a7c2d39fb433ac4606a5787b6a077 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Thu, 15 Jan 2026 10:55:20 -0500 Subject: [PATCH 46/95] Add load method to Wal --- db4-storage/src/wal/mod.rs | 7 ++++++- db4-storage/src/wal/no_wal.rs | 4 ++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/db4-storage/src/wal/mod.rs b/db4-storage/src/wal/mod.rs index 712572ff19..d320c8faf1 100644 --- a/db4-storage/src/wal/mod.rs +++ b/db4-storage/src/wal/mod.rs @@ -4,7 +4,7 @@ use raphtory_core::{ entities::{EID, GID, VID}, storage::timeindex::TimeIndexEntry, }; -use std::path::{Path, PathBuf}; +use std::path::Path; pub mod entry; pub mod no_wal; @@ -18,6 +18,11 @@ pub trait Wal { where Self: Sized; + /// Loads an existing WAL file from the given directory in append mode. + fn load(dir: Option<&Path>) -> Result + where + Self: Sized; + /// Appends data to the WAL and returns the assigned LSN. fn append(&self, data: &[u8]) -> Result; diff --git a/db4-storage/src/wal/no_wal.rs b/db4-storage/src/wal/no_wal.rs index 59271acc7c..85963ddb74 100644 --- a/db4-storage/src/wal/no_wal.rs +++ b/db4-storage/src/wal/no_wal.rs @@ -15,6 +15,10 @@ impl Wal for NoWal { Ok(Self) } + fn load(_dir: Option<&Path>) -> Result { + Ok(Self) + } + fn append(&self, _data: &[u8]) -> Result { Ok(0) } From 37bd656dee3d2fed304100fc5c1d6b96fbd9d964 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Thu, 15 Jan 2026 11:17:59 -0500 Subject: [PATCH 47/95] Run fmt --- db4-storage/src/wal/entry.rs | 5 +---- db4-storage/src/wal/mod.rs | 12 +++++++----- db4-storage/src/wal/no_wal.rs | 2 +- raphtory/src/db/api/storage/storage.rs | 13 +++++++++---- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/db4-storage/src/wal/entry.rs b/db4-storage/src/wal/entry.rs index be2f5c3fff..be151e4754 100644 --- a/db4-storage/src/wal/entry.rs +++ b/db4-storage/src/wal/entry.rs @@ -36,10 +36,7 @@ impl GraphWal for NoWal { std::iter::empty() } - fn replay_to_graph( - &self, - _graph: &mut G, - ) -> Result<(), StorageError> { + fn replay_to_graph(&self, _graph: &mut G) -> Result<(), StorageError> { panic!("NoWAL does not support replay") } } diff --git a/db4-storage/src/wal/mod.rs b/db4-storage/src/wal/mod.rs index d320c8faf1..a84f63d63e 100644 --- a/db4-storage/src/wal/mod.rs +++ b/db4-storage/src/wal/mod.rs @@ -49,7 +49,11 @@ pub struct ReplayRecord { impl ReplayRecord { pub fn new(lsn: LSN, data: Vec, raw_bytes: Vec) -> Self { - Self { lsn, data, raw_bytes } + Self { + lsn, + data, + raw_bytes, + } } pub fn lsn(&self) -> LSN { @@ -92,10 +96,8 @@ pub trait GraphWal { fn replay_iter(&self) -> impl Iterator>; /// Replays and applies all the entries in the wal to the given graph. - fn replay_to_graph( - &self, - graph: &mut G, - ) -> Result<(), StorageError>; + /// Subsequent appends to the WAL will start from the LSN of the last replayed entry. + fn replay_to_graph(&self, graph: &mut G) -> Result<(), StorageError>; } /// Trait for defining callbacks for replaying from wal. diff --git a/db4-storage/src/wal/no_wal.rs b/db4-storage/src/wal/no_wal.rs index 85963ddb74..dbc66b8ffe 100644 --- a/db4-storage/src/wal/no_wal.rs +++ b/db4-storage/src/wal/no_wal.rs @@ -2,7 +2,7 @@ use std::path::Path; use crate::{ error::StorageError, - wal::{LSN, Wal, ReplayRecord}, + wal::{LSN, ReplayRecord, Wal}, }; /// `NoWAL` is a no-op WAL implementation that discards all writes. diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 156c4b6d8b..ea17233a42 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -38,7 +38,7 @@ use std::{ }; use storage::{ transaction::TransactionManager, - wal::{Wal, LSN}, + wal::{GraphWal, Wal, LSN}, WalType, }; @@ -113,7 +113,7 @@ impl Storage { let graph_dir = GraphDir::from(path.as_ref()); let wal_dir = graph_dir.wal_dir(); let wal = Arc::new(WalType::new(Some(wal_dir.as_path()))?); - let ext = Extension::new(config, wal); + let ext = Extension::new(config, wal.clone()); let temporal_graph = TemporalGraph::new_with_path(path, ext)?; Ok(Self { @@ -128,10 +128,15 @@ impl Storage { .unwrap_or_else(|_| PersistenceConfig::default()); let graph_dir = GraphDir::from(path.as_ref()); let wal_dir = graph_dir.wal_dir(); - let wal = Arc::new(WalType::new(Some(wal_dir.as_path()))?); - let ext = Extension::new(config, wal); + let wal = Arc::new(WalType::load(Some(wal_dir.as_path()))?); + let ext = Extension::new(config, wal.clone()); let temporal_graph = TemporalGraph::load_from_path(path, ext)?; + // Replay any pending writes from the WAL. + let mut write_locked_graph = temporal_graph.write_lock()?; + wal.replay_to_graph(&mut write_locked_graph)?; + drop(write_locked_graph); + Ok(Self { graph: GraphStorage::Unlocked(Arc::new(temporal_graph)), #[cfg(feature = "search")] From e30673107b4cab04d108c77ba14f46c54ffe5a02 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Fri, 16 Jan 2026 12:04:07 -0500 Subject: [PATCH 48/95] Run fmt --- db4-storage/src/wal/mod.rs | 1 + raphtory/src/db/api/mutation/addition_ops.rs | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/db4-storage/src/wal/mod.rs b/db4-storage/src/wal/mod.rs index a84f63d63e..86382d3fcf 100644 --- a/db4-storage/src/wal/mod.rs +++ b/db4-storage/src/wal/mod.rs @@ -27,6 +27,7 @@ pub trait Wal { fn append(&self, data: &[u8]) -> Result; /// Flushes in-memory WAL entries up to the given LSN to disk. + /// Returns immediately if the given LSN is already flushed to disk. fn flush(&self, lsn: LSN) -> Result<(), StorageError>; /// Rotates the underlying WAL file. diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index 9dfb98ab91..1f7cfa9d5e 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -364,8 +364,7 @@ impl> + StaticGraphViewOps + Dura self.transaction_manager().end_transaction(transaction_id); // Drop to release all the segment locks. - // FIXME: Make sure segments cannot get to disk before wal entry is flushed. - // drop(add_edge_op); + drop(add_edge_op); // Flush the wal entry to disk. self.wal().flush(lsn).unwrap(); From bf3e0f757f1ceaa8ec3e1ad175f396cc1ecabf79 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Fri, 16 Jan 2026 17:03:18 -0500 Subject: [PATCH 49/95] Create new WAL file on materialize --- raphtory/src/db/api/view/graph.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index 1a9f5a4c0d..0da9ce27d4 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -27,7 +27,7 @@ use crate::{ }; use ahash::HashSet; use chrono::{DateTime, Utc}; -use db4_graph::TemporalGraph; +use db4_graph::{GraphDir, TemporalGraph}; use itertools::Itertools; use raphtory_api::{ atomic_extra::atomic_usize_from_mut_slice, @@ -57,7 +57,7 @@ use std::{ path::Path, sync::{atomic::Ordering, Arc}, }; -use storage::{persist::strategy::PersistenceStrategy, Extension}; +use storage::{persist::strategy::PersistenceStrategy, Extension, WalType, wal::Wal}; #[cfg(feature = "search")] use crate::{ @@ -294,12 +294,19 @@ fn materialize_impl( node_meta.set_layer_mapper(layer_meta.clone()); + // Create new WAL file for the new materialized graph. + let graph_dir = path.map(|p| GraphDir::from(p)); + let wal_dir = graph_dir.map(|dir| dir.wal_dir()); + let wal = WalType::new(wal_dir.as_deref())?; + let config = storage.extension().config().clone(); + let ext = Extension::new(config, Arc::new(wal)); + let temporal_graph = TemporalGraph::new_with_meta( path.map(|p| p.into()), node_meta, edge_meta, graph_props_meta, - storage.extension().clone(), + ext, )?; if let Some(earliest) = graph.earliest_time() { From 3ebf9f9bef7edadc56c9b86d30f369738ddcbbf8 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Mon, 19 Jan 2026 12:06:25 -0500 Subject: [PATCH 50/95] Fix some more merge conflicts --- db4-storage/src/pages/edge_page/writer.rs | 7 +++---- db4-storage/src/segments/edge/segment.rs | 1 + 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/db4-storage/src/pages/edge_page/writer.rs b/db4-storage/src/pages/edge_page/writer.rs index 95116e5a8b..fe26898c25 100644 --- a/db4-storage/src/pages/edge_page/writer.rs +++ b/db4-storage/src/pages/edge_page/writer.rs @@ -144,19 +144,18 @@ impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmen dst: VID, exists: bool, layer_id: usize, - lsn: u64, ) { if !exists { - self.increment_layer_num_edges(0); + self.increment_layer_num_edges(STATIC_GRAPH_LAYER_ID); self.increment_layer_num_edges(layer_id); } self.writer - .insert_static_edge_internal(edge_pos, src, dst, 0, lsn); + .insert_static_edge_internal(edge_pos, src, dst, STATIC_GRAPH_LAYER_ID); self.graph_stats.update_time(t.t()); self.writer - .delete_edge_internal(t, edge_pos, src, dst, layer_id, lsn); + .delete_edge_internal(t, edge_pos, src, dst, layer_id); } pub fn segment_id(&self) -> usize { diff --git a/db4-storage/src/segments/edge/segment.rs b/db4-storage/src/segments/edge/segment.rs index 30a21854b8..991b370201 100644 --- a/db4-storage/src/segments/edge/segment.rs +++ b/db4-storage/src/segments/edge/segment.rs @@ -556,6 +556,7 @@ mod test { use super::*; use raphtory_api::core::entities::properties::prop::PropType; use raphtory_core::storage::timeindex::TimeIndexEntry; + use super::*; use raphtory_api::core::entities::properties::meta::Meta; fn create_test_segment() -> MemEdgeSegment { From 2e17df011ab126d1033ee0f1446d63448d68163c Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Tue, 20 Jan 2026 15:25:38 -0500 Subject: [PATCH 51/95] Fix all merge conflicts --- db4-graph/src/lib.rs | 18 +- db4-graph/src/replay.rs | 25 +- db4-storage/src/api/nodes.rs | 9 +- db4-storage/src/pages/edge_store.rs | 4 +- db4-storage/src/pages/mod.rs | 6 +- db4-storage/src/pages/node_page/writer.rs | 6 +- db4-storage/src/pages/node_store.rs | 4 +- db4-storage/src/segments/edge/segment.rs | 317 +---- db4-storage/src/segments/node/segment.rs | 2 +- .../src/mutation/addition_ops_ext.rs | 2 +- raphtory/src/db/api/storage/storage.rs | 19 +- raphtory/src/db/api/view/graph.rs | 6 +- raphtory/src/db/graph/graph.rs | 288 ++--- raphtory/src/io/arrow/df_loaders.rs | 1029 ----------------- raphtory/src/io/arrow/df_loaders/edges.rs | 21 +- raphtory/src/io/arrow/df_loaders/nodes.rs | 15 +- raphtory/tests/df_loaders.rs | 3 - 17 files changed, 228 insertions(+), 1546 deletions(-) delete mode 100644 raphtory/src/io/arrow/df_loaders.rs diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index 3f258c1591..4f3e19cbc6 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -112,7 +112,7 @@ where Self::new_with_meta(None, node_meta, edge_meta, graph_props_meta, ext) } - pub fn new_with_path(path: impl AsRef, ext: EXT) -> Result { + pub fn new_at_path_with_ext(path: impl AsRef, ext: EXT) -> Result { let node_meta = Meta::new_for_nodes(); let edge_meta = Meta::new_for_edges(); let graph_props_meta = Meta::new_for_graph_props(); @@ -404,17 +404,15 @@ where self.graph } - pub fn resize_chunks_to_num_nodes(&mut self, max_vid: Option) { - if let Some(max_vid) = max_vid { - let (chunks_needed, _) = self.graph.storage.nodes().resolve_pos(max_vid); - self.graph.storage().nodes().grow(chunks_needed + 1); - std::mem::take(&mut self.nodes); - self.nodes = self.graph.storage.nodes().write_locked(); - } + pub fn resize_chunks_to_vid(&mut self, vid: VID) { + let (chunks_needed, _) = self.graph.storage.nodes().resolve_pos(vid); + self.graph.storage().nodes().grow(chunks_needed + 1); + std::mem::take(&mut self.nodes); + self.nodes = self.graph.storage.nodes().write_locked(); } - pub fn resize_chunks_to_num_edges(&mut self, max_eid: EID) { - let (chunks_needed, _) = self.graph.storage.edges().resolve_pos(max_eid); + pub fn resize_chunks_to_eid(&mut self, eid: EID) { + let (chunks_needed, _) = self.graph.storage.edges().resolve_pos(eid); self.graph.storage().edges().grow(chunks_needed + 1); std::mem::take(&mut self.edges); self.edges = self.graph.storage.edges().write_locked(); diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs index 8398420052..2995fe4ec6 100644 --- a/db4-graph/src/replay.rs +++ b/db4-graph/src/replay.rs @@ -12,7 +12,7 @@ use raphtory_api::core::{ }; use raphtory_core::entities::GidRef; use storage::{ - api::{edges::EdgeSegmentOps, nodes::NodeSegmentOps}, + api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, error::StorageError, pages::resolve_pos, persist::strategy::PersistenceStrategy, @@ -24,6 +24,9 @@ use storage::{ impl GraphReplay for WriteLockedGraph<'_, EXT> where EXT: PersistenceStrategy, ES = ES, GS = GS>, + NS: NodeSegmentOps, + ES: EdgeSegmentOps, + GS: GraphPropSegmentOps, { fn replay_add_edge( &mut self, @@ -40,8 +43,8 @@ where props: Vec<(String, usize, Prop)>, ) -> Result<(), StorageError> { let temporal_graph = self.graph(); - let node_max_page_len = temporal_graph.storage().nodes().max_page_len(); - let edge_max_page_len = temporal_graph.storage().edges().max_page_len(); + let node_max_page_len = temporal_graph.extension().config().max_node_page_len; + let edge_max_page_len = temporal_graph.extension().config().max_edge_page_len; // 1. Insert prop ids into edge meta. // No need to validate props again since they are already validated before @@ -76,8 +79,8 @@ where // 4. Grab src writer and add edge data. let (src_segment_id, src_pos) = resolve_pos(src_id, node_max_page_len); - let num_nodes = src_id.index() + 1; - self.resize_chunks_to_num_nodes(num_nodes); // Create enough segments. + let resize_vid = VID::from(src_id.index() + 1); + self.resize_chunks_to_vid(resize_vid); // Create enough segments. let segment = self .graph() @@ -89,7 +92,7 @@ where // Replay this entry only if it doesn't exist in immut. if immut_lsn < lsn { let mut src_writer = self.nodes.get_mut(src_segment_id).unwrap().writer(); - src_writer.store_node_id(src_pos, STATIC_GRAPH_LAYER_ID, GidRef::from(&src_name)); + src_writer.store_node_id(src_pos, STATIC_GRAPH_LAYER_ID, src_name.into()); let is_new_edge_static = src_writer .get_out_edge(src_pos, dst_id, STATIC_GRAPH_LAYER_ID) @@ -116,8 +119,8 @@ where // 5. Grab dst writer and add edge data. let (dst_segment_id, dst_pos) = resolve_pos(dst_id, node_max_page_len); - let num_nodes = dst_id.index() + 1; - self.resize_chunks_to_num_nodes(num_nodes); + let resize_vid = VID::from(dst_id.index() + 1); + self.resize_chunks_to_vid(resize_vid); let segment = self .graph() @@ -129,7 +132,7 @@ where // Replay this entry only if it doesn't exist in immut. if immut_lsn < lsn { let mut dst_writer = self.nodes.get_mut(dst_segment_id).unwrap().writer(); - dst_writer.store_node_id(dst_pos, STATIC_GRAPH_LAYER_ID, GidRef::from(&dst_name)); + dst_writer.store_node_id(dst_pos, STATIC_GRAPH_LAYER_ID, dst_name.into()); let is_new_edge_static = dst_writer .get_inb_edge(dst_pos, src_id, STATIC_GRAPH_LAYER_ID) @@ -153,8 +156,8 @@ where // 6. Grab edge writer and add temporal props & metadata. let (edge_segment_id, edge_pos) = resolve_pos(eid, edge_max_page_len); - let num_edges = eid.index() + 1; - self.resize_chunks_to_num_edges(num_edges); + let resize_eid = EID::from(eid.index() + 1); + self.resize_chunks_to_eid(resize_eid); let segment = self .graph() diff --git a/db4-storage/src/api/nodes.rs b/db4-storage/src/api/nodes.rs index d60247173c..3e6eb638a4 100644 --- a/db4-storage/src/api/nodes.rs +++ b/db4-storage/src/api/nodes.rs @@ -21,7 +21,8 @@ use std::{ borrow::Cow, ops::{Deref, DerefMut, Range}, path::{Path, PathBuf}, - sync::{Arc, atomic::AtomicU32}, + sync::{Arc, atomic::{AtomicU32, Ordering}}, + fmt::Debug, }; use rayon::prelude::*; @@ -36,7 +37,7 @@ use crate::{ wal::LSN, }; -pub trait NodeSegmentOps: Send + Sync + std::fmt::Debug + 'static { +pub trait NodeSegmentOps: Send + Sync + Debug + 'static { type Extension; type Entry<'a>: NodeEntryOps<'a> @@ -130,7 +131,7 @@ pub trait NodeSegmentOps: Send + Sync + std::fmt::Debug + 'static { fn num_nodes(&self) -> u32 { self.nodes_counter() - .load(std::sync::atomic::Ordering::Relaxed) + .load(Ordering::Relaxed) } fn num_layers(&self) -> usize; @@ -138,7 +139,7 @@ pub trait NodeSegmentOps: Send + Sync + std::fmt::Debug + 'static { fn layer_count(&self, layer_id: usize) -> u32; } -pub trait LockedNSSegment: std::fmt::Debug + Send + Sync { +pub trait LockedNSSegment: Debug + Send + Sync { type EntryRef<'a>: NodeRefOps<'a> where Self: 'a; diff --git a/db4-storage/src/pages/edge_store.rs b/db4-storage/src/pages/edge_store.rs index 63cbcbfd0a..0517983022 100644 --- a/db4-storage/src/pages/edge_store.rs +++ b/db4-storage/src/pages/edge_store.rs @@ -223,7 +223,7 @@ impl, EXT: PersistenceStrategy> EdgeStorageI pub fn load(edges_path: impl AsRef, ext: EXT) -> Result { let edges_path = edges_path.as_ref(); - let max_page_len = ext.persistence_config().max_edge_page_len; + let max_page_len = ext.config().max_edge_page_len; let meta = Arc::new(Meta::new_for_edges()); @@ -417,7 +417,7 @@ impl, EXT: PersistenceStrategy> EdgeStorageI #[inline(always)] pub fn max_page_len(&self) -> u32 { - self.ext.persistence_config().max_edge_page_len + self.ext.config().max_edge_page_len } pub fn write_locked<'a>(&'a self) -> WriteLockedEdgePages<'a, ES> { diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index c15c110176..a2360ee08d 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -73,7 +73,7 @@ impl< { pub fn flush(&self) -> Result<(), StorageError> { let node_types = self.nodes.prop_meta().get_all_node_types(); - let config = self.ext.persistence_config().with_node_types(node_types); + let config = self.ext.config().with_node_types(node_types); if let Some(graph_dir) = self.graph_dir.as_ref() { config.save_to_dir(graph_dir)?; @@ -146,7 +146,7 @@ impl< )); if let Some(graph_dir) = graph_dir { - ext.persistence_config() + ext.config() .save_to_dir(graph_dir) .expect("Unrecoverable! Failed to write graph config"); } @@ -175,7 +175,7 @@ impl< let graph_prop_storage = Arc::new(GraphPropStorageInner::load(graph_props_path, ext.clone())?); - for node_type in ext.persistence_config().node_types().iter() { + for node_type in ext.config().node_types().iter() { node_meta.get_or_create_node_type_id(node_type); } diff --git a/db4-storage/src/pages/node_page/writer.rs b/db4-storage/src/pages/node_page/writer.rs index eb90bb0eba..3f2821f85c 100644 --- a/db4-storage/src/pages/node_page/writer.rs +++ b/db4-storage/src/pages/node_page/writer.rs @@ -186,10 +186,10 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri self.update_c_props(pos, layer_id, node_info_as_props(Some(gid), node_type)); } - pub fn store_node_id(&mut self, pos: LocalPOS, layer_id: usize, gid: GidRef<'_>) { + pub fn store_node_id(&mut self, pos: LocalPOS, layer_id: usize, gid: GID) { let gid = match gid { - GidRef::U64(id) => Prop::U64(id), - GidRef::Str(s) => Prop::str(s), + GID::U64(id) => Prop::U64(id), + GID::Str(s) => Prop::str(s), }; let props = [(NODE_ID_IDX, gid)]; self.update_c_props(pos, layer_id, props); diff --git a/db4-storage/src/pages/node_store.rs b/db4-storage/src/pages/node_store.rs index ebe924c219..a82d419bb1 100644 --- a/db4-storage/src/pages/node_store.rs +++ b/db4-storage/src/pages/node_store.rs @@ -161,7 +161,7 @@ impl NodeStorageInner { } pub fn max_segment_len(&self) -> u32 { - self.ext.persistence_config().max_node_page_len + self.ext.config().max_node_page_len } } @@ -335,7 +335,7 @@ impl, EXT: PersistenceStrategy> NodeStorageI ext: EXT, ) -> Result { let nodes_path = nodes_path.as_ref(); - let max_page_len = ext.persistence_config().max_node_page_len; + let max_page_len = ext.config().max_node_page_len; let node_meta = Arc::new(Meta::new_for_nodes()); if !nodes_path.exists() { diff --git a/db4-storage/src/segments/edge/segment.rs b/db4-storage/src/segments/edge/segment.rs index 08fd43343a..b72c2966b3 100644 --- a/db4-storage/src/segments/edge/segment.rs +++ b/db4-storage/src/segments/edge/segment.rs @@ -440,7 +440,7 @@ impl>> EdgeSegmentOps for EdgeSeg } fn new(page_id: usize, meta: Arc, _path: Option, ext: Self::Extension) -> Self { - let max_page_len = ext.persistence_config().max_edge_page_len; + let max_page_len = ext.config().max_edge_page_len; Self { segment: parking_lot::RwLock::new(MemEdgeSegment::new(page_id, max_page_len, meta)) .into(), @@ -558,10 +558,8 @@ impl>> EdgeSegmentOps for EdgeSeg #[cfg(test)] mod test { use super::*; - use arrow_array::{Array, BooleanArray, StringArray}; use raphtory_api::core::entities::properties::prop::PropType; - use raphtory_core::{entities::EID, storage::timeindex::TimeIndexEntry}; - use super::*; + use raphtory_core::storage::timeindex::TimeIndexEntry; use raphtory_api::core::entities::properties::meta::Meta; fn create_test_segment() -> MemEdgeSegment { @@ -615,317 +613,6 @@ mod test { assert_eq!(segment.t_len(), 3); } - #[test] - fn test_bulk_insert_edges_internal_basic() { - let mut segment = create_test_segment(); - - // Prepare bulk insert data - let mask = BooleanArray::from(vec![true, true, true]); - let times = vec![1i64, 2i64, 3i64]; - let eids = vec![EID(0), EID(1), EID(2)]; - let srcs = vec![VID(1), VID(3), VID(5)]; - let dsts = vec![VID(2), VID(4), VID(6)]; - let cols: Vec> = - vec![Arc::new(StringArray::from(vec!["test1", "test2", "test3"]))]; - let col_mapping = vec![0]; // property id 0 - - // Bulk insert edges - segment.bulk_insert_edges_internal( - &mask, - ×, - 0, // time_sec_index - &eids, - &srcs, - &dsts, - 0, // layer_id - &cols, - &col_mapping, - ); - - // Verify edges exist - assert!(segment.contains_edge(LocalPOS(0), 0)); - assert!(segment.contains_edge(LocalPOS(1), 0)); - assert!(segment.contains_edge(LocalPOS(2), 0)); - - // Verify edge data - assert_eq!(segment.get_edge(LocalPOS(0), 0), Some((VID(1), VID(2)))); - assert_eq!(segment.get_edge(LocalPOS(1), 0), Some((VID(3), VID(4)))); - assert_eq!(segment.get_edge(LocalPOS(2), 0), Some((VID(5), VID(6)))); - - // Verify time length increased - assert_eq!(segment.t_len(), 3); - - for (index, local_pos) in [LocalPOS(0), LocalPOS(1), LocalPOS(2)].iter().enumerate() { - let actual = segment.layers[0] - .t_prop(*local_pos, 0) - .into_iter() - .flat_map(|p| p.iter()) - .collect::>(); - - let i = local_pos.0 as i64; - assert_eq!( - actual, - vec![( - TimeIndexEntry::new(i + 1, index), - Prop::str(format!("test{}", i + 1)) - )] - ); - } - } - - #[test] - fn test_bulk_insert_with_mask() { - let mut segment = create_test_segment(); - - // Prepare bulk insert data with selective mask - let mask = BooleanArray::from(vec![true, false, true, false]); - let times = vec![1i64, 2i64, 3i64, 4i64]; - let eids = vec![EID(0), EID(1), EID(2), EID(3)]; - let srcs = vec![VID(1), VID(3), VID(5), VID(7)]; - let dsts = vec![VID(2), VID(4), VID(6), VID(8)]; - let cols: Vec> = vec![Arc::new(StringArray::from(vec![ - "test1", "test2", "test3", "test4", - ]))]; - let col_mapping = vec![0]; - - // Bulk insert edges - segment.bulk_insert_edges_internal( - &mask, - ×, - 0, - &eids, - &srcs, - &dsts, - 0, - &cols, - &col_mapping, - ); - - // Only edges at positions 0 and 2 should exist (mask was true) - assert!(segment.contains_edge(LocalPOS(0), 0)); - assert!(!segment.contains_edge(LocalPOS(1), 0)); - assert!(segment.contains_edge(LocalPOS(2), 0)); - assert!(!segment.contains_edge(LocalPOS(3), 0)); - - // Verify correct edge data for existing edges - assert_eq!(segment.get_edge(LocalPOS(0), 0), Some((VID(1), VID(2)))); - assert_eq!(segment.get_edge(LocalPOS(2), 0), Some((VID(5), VID(6)))); - - // Only 2 edges should contribute to time length - assert_eq!(segment.t_len(), 2); - } - - #[test] - fn test_bulk_vs_individual_equivalence() { - let mut segment1 = create_test_segment(); - let mut segment2 = create_test_segment(); - - // Individual insertions - segment1.insert_edge_internal( - TimeIndexEntry::new(1, 0), - LocalPOS(0), - VID(1), - VID(2), - 0, - vec![(0, Prop::from("test1"))], - ); - segment1.insert_edge_internal( - TimeIndexEntry::new(2, 1), - LocalPOS(1), - VID(3), - VID(4), - 0, - vec![(0, Prop::from("test2"))], - ); - segment1.insert_edge_internal( - TimeIndexEntry::new(3, 2), - LocalPOS(2), - VID(5), - VID(6), - 0, - vec![(0, Prop::from("test3"))], - ); - - // Equivalent bulk insertion - let mask = BooleanArray::from(vec![true, true, true]); - let times = vec![1i64, 2i64, 3i64]; - let eids = vec![EID(0), EID(1), EID(2)]; - let srcs = vec![VID(1), VID(3), VID(5)]; - let dsts = vec![VID(2), VID(4), VID(6)]; - let cols: Vec> = - vec![Arc::new(StringArray::from(vec!["test1", "test2", "test3"]))]; - let col_mapping = vec![0]; - - segment2.bulk_insert_edges_internal( - &mask, - ×, - 0, - &eids, - &srcs, - &dsts, - 0, - &cols, - &col_mapping, - ); - - // Both segments should have the same edges - for pos in [LocalPOS(0), LocalPOS(1), LocalPOS(2)] { - assert_eq!( - segment1.contains_edge(pos, 0), - segment2.contains_edge(pos, 0) - ); - assert_eq!(segment1.get_edge(pos, 0), segment2.get_edge(pos, 0)); - } - - // Both should have same time length - assert_eq!(segment1.t_len(), segment2.t_len()); - } - - #[test] - fn test_interleaved_operations() { - let mut segment = create_test_segment(); - - // Start with individual insertion - segment.insert_edge_internal( - TimeIndexEntry::new(1, 0), - LocalPOS(0), - VID(1), - VID(2), - 0, - vec![(0, Prop::from("individual1"))], - ); - - // Bulk insert some edges - let mask = BooleanArray::from(vec![true, true]); - let times = vec![2i64, 3i64]; - let eids = vec![EID(1), EID(2)]; - let srcs = vec![VID(3), VID(5)]; - let dsts = vec![VID(4), VID(6)]; - let cols: Vec> = vec![Arc::new(StringArray::from(vec!["bulk1", "bulk2"]))]; - let col_mapping = vec![0]; - - segment.bulk_insert_edges_internal( - &mask, - ×, - 1, // time_sec_index continues from previous - &eids, - &srcs, - &dsts, - 0, - &cols, - &col_mapping, - ); - - // Insert another individual edge - segment.insert_edge_internal( - TimeIndexEntry::new(4, 3), - LocalPOS(3), - VID(7), - VID(8), - 0, - vec![(0, Prop::from("individual2"))], - ); - - // Another bulk insert - let mask2 = BooleanArray::from(vec![true, false, true]); - let times2 = vec![5i64, 6i64, 7i64]; - let eids2 = vec![EID(4), EID(5), EID(6)]; - let srcs2 = vec![VID(9), VID(11), VID(13)]; - let dsts2 = vec![VID(10), VID(12), VID(14)]; - let cols2: Vec> = - vec![Arc::new(StringArray::from(vec!["bulk3", "bulk4", "bulk5"]))]; - - segment.bulk_insert_edges_internal( - &mask2, - ×2, - 4, // time_sec_index continues - &eids2, - &srcs2, - &dsts2, - 0, - &cols2, - &col_mapping, - ); - - // Verify all edges exist correctly - assert!(segment.contains_edge(LocalPOS(0), 0)); // individual1 - assert!(segment.contains_edge(LocalPOS(1), 0)); // bulk1 - assert!(segment.contains_edge(LocalPOS(2), 0)); // bulk2 - assert!(segment.contains_edge(LocalPOS(3), 0)); // individual2 - assert!(segment.contains_edge(LocalPOS(4), 0)); // bulk3 - assert!(!segment.contains_edge(LocalPOS(5), 0)); // masked out - assert!(segment.contains_edge(LocalPOS(6), 0)); // bulk5 - - // Verify edge data - assert_eq!(segment.get_edge(LocalPOS(0), 0), Some((VID(1), VID(2)))); - assert_eq!(segment.get_edge(LocalPOS(1), 0), Some((VID(3), VID(4)))); - assert_eq!(segment.get_edge(LocalPOS(2), 0), Some((VID(5), VID(6)))); - assert_eq!(segment.get_edge(LocalPOS(3), 0), Some((VID(7), VID(8)))); - assert_eq!(segment.get_edge(LocalPOS(4), 0), Some((VID(9), VID(10)))); - assert_eq!(segment.get_edge(LocalPOS(6), 0), Some((VID(13), VID(14)))); - - // Total time length should be 6 (4 individual + 2 from first bulk + 2 from second bulk) - assert_eq!(segment.t_len(), 6); - } - - #[test] - fn test_bulk_insert_multiple_layers() { - let mut segment = create_test_segment(); - - // Insert into layer 0 - let mask = BooleanArray::from(vec![true, true]); - let times = vec![1i64, 2i64]; - let eids = vec![EID(0), EID(1)]; - let srcs = vec![VID(1), VID(3)]; - let dsts = vec![VID(2), VID(4)]; - let cols: Vec> = - vec![Arc::new(StringArray::from(vec!["layer0_1", "layer0_2"]))]; - let col_mapping = vec![0]; - - segment.bulk_insert_edges_internal( - &mask, - ×, - 0, - &eids, - &srcs, - &dsts, - 0, // layer 0 - &cols, - &col_mapping, - ); - - // Insert into layer 1 - let mask2 = BooleanArray::from(vec![true]); - let times2 = vec![3i64]; - let eids2 = vec![EID(0)]; // same eid, different layer - let srcs2 = vec![VID(5)]; - let dsts2 = vec![VID(6)]; - let cols2: Vec> = vec![Arc::new(StringArray::from(vec!["layer1_1"]))]; - - segment.bulk_insert_edges_internal( - &mask2, - ×2, - 2, - &eids2, - &srcs2, - &dsts2, - 1, // layer 1 - &cols2, - &col_mapping, - ); - - // Verify edges in both layers - assert!(segment.contains_edge(LocalPOS(0), 0)); - assert!(segment.contains_edge(LocalPOS(1), 0)); - assert!(segment.contains_edge(LocalPOS(0), 1)); - assert!(!segment.contains_edge(LocalPOS(1), 1)); - - // Verify correct layer data - assert_eq!(segment.get_edge(LocalPOS(0), 0), Some((VID(1), VID(2)))); - assert_eq!(segment.get_edge(LocalPOS(1), 0), Some((VID(3), VID(4)))); - assert_eq!(segment.get_edge(LocalPOS(0), 1), Some((VID(5), VID(6)))); - } - #[test] fn est_size_changes() { let meta = Arc::new(Meta::default()); diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs index 16f8414b1f..77b1fe418d 100644 --- a/db4-storage/src/segments/node/segment.rs +++ b/db4-storage/src/segments/node/segment.rs @@ -460,7 +460,7 @@ impl>> NodeSegmentOps for NodeSeg _path: Option, ext: Self::Extension, ) -> Self { - let max_page_len = ext.persistence_config().max_node_page_len; + let max_page_len = ext.config().max_node_page_len; Self { inner: parking_lot::RwLock::new(MemNodeSegment::new(page_id, max_page_len, meta)) .into(), diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index 3bfe27204c..680275793e 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -220,7 +220,7 @@ impl InternalAdditionOps for TemporalGraph { self.event_counter .fetch_add(1, std::sync::atomic::Ordering::Relaxed), ); - pos.as_vid(seg, self.extension().max_node_page_len()) + pos.as_vid(seg, self.extension().config().max_node_page_len) })?; Ok(id) diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index ea17233a42..8097ab8188 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -114,7 +114,24 @@ impl Storage { let wal_dir = graph_dir.wal_dir(); let wal = Arc::new(WalType::new(Some(wal_dir.as_path()))?); let ext = Extension::new(config, wal.clone()); - let temporal_graph = TemporalGraph::new_with_path(path, ext)?; + let temporal_graph = TemporalGraph::new_at_path_with_ext(path, ext)?; + + Ok(Self { + graph: GraphStorage::Unlocked(Arc::new(temporal_graph)), + #[cfg(feature = "search")] + index: RwLock::new(GraphIndex::Empty), + }) + } + + pub(crate) fn new_at_path_with_config( + path: impl AsRef, + config: PersistenceConfig, + ) -> Result { + let graph_dir = GraphDir::from(path.as_ref()); + let wal_dir = graph_dir.wal_dir(); + let wal = Arc::new(WalType::new(Some(wal_dir.as_path()))?); + let ext = Extension::new(config, wal.clone()); + let temporal_graph = TemporalGraph::new_at_path_with_ext(path, ext)?; Ok(Self { graph: GraphStorage::Unlocked(Arc::new(temporal_graph)), diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index 2beec0c2f7..aa71a1e6b5 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -299,7 +299,7 @@ fn materialize_impl( let graph_dir = path.map(|p| GraphDir::from(p)); let wal_dir = graph_dir.map(|dir| dir.wal_dir()); let wal = WalType::new(wal_dir.as_deref())?; - let config = storage.extension().persistence_config().clone(); + let config = storage.extension().config().clone(); let ext = Extension::new(config, Arc::new(wal)); let temporal_graph = TemporalGraph::new_with_meta( @@ -376,7 +376,7 @@ fn materialize_impl( new_type_id, ); } else { - writer.store_node_id(node_pos, STATIC_GRAPH_LAYER_ID, gid.as_ref()); + writer.store_node_id(node_pos, STATIC_GRAPH_LAYER_ID, gid.clone()); } graph_storage @@ -406,7 +406,7 @@ fn materialize_impl( new_eids.push(new_eid); max_eid = new_eid.0.max(max_eid); } - new_storage.resize_chunks_to_num_edges(EID(max_eid)); + new_storage.resize_chunks_to_eid(EID(max_eid)); for layer_id in &layer_map { new_storage.edges.ensure_layer(*layer_id); diff --git a/raphtory/src/db/graph/graph.rs b/raphtory/src/db/graph/graph.rs index 6b75163f23..e704226341 100644 --- a/raphtory/src/db/graph/graph.rs +++ b/raphtory/src/db/graph/graph.rs @@ -48,7 +48,7 @@ use std::{ ops::Deref, sync::Arc, }; -use storage::{persist::strategy::PersistenceStrategy, Extension}; +use storage::{persist::strategy::{PersistenceStrategy, PersistenceConfig}, Extension}; #[repr(transparent)] #[derive(Debug, Clone, Default)] @@ -56,8 +56,6 @@ pub struct Graph { pub(crate) inner: Arc, } -impl InheritCoreGraphOps for Graph {} -impl InheritLayerOps for Graph {} impl From> for Graph { fn from(inner: Arc) -> Self { Self { inner } @@ -72,8 +70,154 @@ impl From for Graph { } } +impl Base for Graph { + type Base = Storage; + + #[inline(always)] + fn base(&self) -> &Self::Base { + &self.inner + } +} + +impl InheritMutationOps for Graph {} + +impl InheritViewOps for Graph {} + +impl InheritStorageOps for Graph {} + +impl InheritNodeHistoryFilter for Graph {} + +impl InheritEdgeHistoryFilter for Graph {} + +impl InheritCoreGraphOps for Graph {} + +impl InheritLayerOps for Graph {} + impl Static for Graph {} +impl Display for Graph { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.inner) + } +} + +impl<'graph, G: GraphViewOps<'graph>> PartialEq for Graph +where + Self: 'graph, +{ + fn eq(&self, other: &G) -> bool { + graph_equal(self, other) + } +} + +impl Graph { + /// Create a new graph + /// + /// Returns: + /// + /// A raphtory graph + /// + /// # Example + /// + /// ``` + /// use raphtory::prelude::Graph; + /// let g = Graph::new(); + /// ``` + pub fn new() -> Self { + Self { + inner: Arc::new(Storage::default()), + } + } + + /// Create a new graph at a specific path + /// + /// # Arguments + /// * `path` - The path to the storage location + /// # Returns + /// A raphtory graph with storage at the specified path + /// # Example + /// ```no_run + /// use raphtory::prelude::Graph; + /// let g = Graph::new_at_path("/path/to/storage"); + /// ``` + #[cfg(feature = "io")] + pub fn new_at_path(path: &(impl GraphPaths + ?Sized)) -> Result { + if !Extension::disk_storage_enabled() { + return Err(GraphError::DiskGraphNotEnabled); + } + + path.init()?; + let graph_storage_path = path.graph_path()?; + let storage = Storage::new_at_path(graph_storage_path)?; + + let graph = Self { + inner: Arc::new(storage), + }; + + path.write_metadata(&graph)?; + Ok(graph) + } + + #[cfg(feature = "io")] + pub fn new_at_path_with_config( + path: &(impl GraphPaths + ?Sized), + config: PersistenceConfig, + ) -> Result { + if !Extension::disk_storage_enabled() { + return Err(GraphError::DiskGraphNotEnabled); + } + + path.init()?; + + let graph = Self { + inner: Arc::new(Storage::new_at_path_with_config(path.graph_path()?, config)?), + }; + + path.write_metadata(&graph)?; + Ok(graph) + } + + /// Load a graph from a specific path + /// # Arguments + /// * `path` - The path to the storage location + /// # Returns + /// A raphtory graph loaded from the specified path + /// # Example + /// ```no_run + /// use raphtory::prelude::Graph; + /// let g = Graph::load_from_path("/path/to/storage"); + /// + #[cfg(feature = "io")] + pub fn load_from_path(path: &(impl GraphPaths + ?Sized)) -> Result { + // TODO: add support for loading indexes and vectors + Ok(Self { + inner: Arc::new(Storage::load_from(path.graph_path()?)?), + }) + } + + pub(crate) fn from_storage(inner: Arc) -> Self { + Self { inner } + } + + pub(crate) fn from_internal_graph(graph_storage: GraphStorage) -> Self { + let inner = Arc::new(Storage::from_inner(graph_storage)); + Self { inner } + } + + pub fn event_graph(&self) -> Graph { + self.clone() + } + + /// Get persistent graph + pub fn persistent_graph(&self) -> PersistentGraph { + PersistentGraph::from_storage(self.inner.clone()) + } +} + +// ########################################### +// Methods for checking equality of graphs +// ########################################### + pub fn graph_equal<'graph1, 'graph2, G1: GraphViewOps<'graph1>, G2: GraphViewOps<'graph2>>( g1: &G1, g2: &G2, @@ -504,7 +648,8 @@ pub fn assert_graph_equal<'graph, G1: GraphViewOps<'graph>, G2: GraphViewOps<'gr assert_graph_equal_inner(g1, g2, false) } -/// Equality check for materialized persistent graph that ignores the updates generated by the materialise at graph.earliest_time() +/// Equality check for materialized persistent graph that ignores the +/// updates generated by the materialise at graph.earliest_time(). #[track_caller] pub fn assert_persistent_materialize_graph_equal< 'graph, @@ -516,138 +661,3 @@ pub fn assert_persistent_materialize_graph_equal< ) { assert_graph_equal_inner(g1, g2, true) } - -impl Display for Graph { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.inner) - } -} - -impl<'graph, G: GraphViewOps<'graph>> PartialEq for Graph -where - Self: 'graph, -{ - fn eq(&self, other: &G) -> bool { - graph_equal(self, other) - } -} - -impl Base for Graph { - type Base = Storage; - - #[inline(always)] - fn base(&self) -> &Self::Base { - &self.inner - } -} - -impl InheritMutationOps for Graph {} - -impl InheritViewOps for Graph {} - -impl InheritStorageOps for Graph {} - -impl InheritNodeHistoryFilter for Graph {} - -impl InheritEdgeHistoryFilter for Graph {} - -impl Graph { - /// Create a new graph - /// - /// Returns: - /// - /// A raphtory graph - /// - /// # Example - /// - /// ``` - /// use raphtory::prelude::Graph; - /// let g = Graph::new(); - /// ``` - pub fn new() -> Self { - Self { - inner: Arc::new(Storage::default()), - } - } - - /// Create a new graph at a specific path - /// - /// # Arguments - /// * `path` - The path to the storage location - /// # Returns - /// A raphtory graph with storage at the specified path - /// # Example - /// ```no_run - /// use raphtory::prelude::Graph; - /// let g = Graph::new_at_path("/path/to/storage"); - /// ``` - #[cfg(feature = "io")] - pub fn new_at_path(path: &(impl GraphPaths + ?Sized)) -> Result { - if !Extension::disk_storage_enabled() { - return Err(GraphError::DiskGraphNotEnabled); - } - - path.init()?; - let graph_storage_path = path.graph_path()?; - let storage = Storage::new_at_path(graph_storage_path)?; - - let graph = Self { - inner: Arc::new(storage), - }; - - path.write_metadata(&graph)?; - Ok(graph) - } - - #[cfg(feature = "io")] - pub fn new_at_path_with_config( - path: &(impl GraphPaths + ?Sized), - config: Extension, - ) -> Result { - if !Extension::disk_storage_enabled() { - return Err(GraphError::DiskGraphNotEnabled); - } - path.init()?; - let graph = Self { - inner: Arc::new(Storage::new_with_path_and_ext(path.graph_path()?, config)?), - }; - path.write_metadata(&graph)?; - Ok(graph) - } - - /// Load a graph from a specific path - /// # Arguments - /// * `path` - The path to the storage location - /// # Returns - /// A raphtory graph loaded from the specified path - /// # Example - /// ```no_run - /// use raphtory::prelude::Graph; - /// let g = Graph::load_from_path("/path/to/storage"); - /// - #[cfg(feature = "io")] - pub fn load_from_path(path: &(impl GraphPaths + ?Sized)) -> Result { - // TODO: add support for loading indexes and vectors - Ok(Self { - inner: Arc::new(Storage::load_from(path.graph_path()?)?), - }) - } - - pub(crate) fn from_storage(inner: Arc) -> Self { - Self { inner } - } - - pub(crate) fn from_internal_graph(graph_storage: GraphStorage) -> Self { - let inner = Arc::new(Storage::from_inner(graph_storage)); - Self { inner } - } - - pub fn event_graph(&self) -> Graph { - self.clone() - } - - /// Get persistent graph - pub fn persistent_graph(&self) -> PersistentGraph { - PersistentGraph::from_storage(self.inner.clone()) - } -} diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs deleted file mode 100644 index af0b342ea8..0000000000 --- a/raphtory/src/io/arrow/df_loaders.rs +++ /dev/null @@ -1,1029 +0,0 @@ -use crate::{ - core::entities::nodes::node_ref::AsNodeRef, - db::api::view::StaticGraphViewOps, - errors::{into_graph_err, GraphError, LoadError}, - io::arrow::{ - dataframe::{DFChunk, DFView, SecondaryIndexCol}, - layer_col::{lift_layer_col, lift_node_type_col}, - prop_handler::*, - }, - prelude::*, -}; -use bytemuck::checked::cast_slice_mut; -use db4_graph::WriteLockedGraph; -use either::Either; -use itertools::izip; -use raphtory_api::{ - atomic_extra::atomic_usize_from_mut_slice, - core::{ - entities::{ - properties::{meta::STATIC_GRAPH_LAYER_ID, prop::PropType}, - EID, - }, - storage::{dict_mapper::MaybeNew, timeindex::TimeIndexEntry}, - }, -}; -use raphtory_core::{ - entities::{graph::logical_to_physical::ResolverShardT, GidRef, VID}, - storage::timeindex::AsTime, -}; -use raphtory_storage::mutation::addition_ops::{InternalAdditionOps, SessionAdditionOps}; -use rayon::prelude::*; -use std::{ - borrow::{Borrow, Cow}, - collections::HashMap, - sync::atomic::{AtomicBool, AtomicUsize, Ordering}, -}; - -#[cfg(feature = "python")] -use kdam::{Bar, BarBuilder, BarExt}; - -#[cfg(feature = "python")] -fn build_progress_bar(des: String, num_rows: usize) -> Result { - BarBuilder::default() - .desc(des) - .animation(kdam::Animation::FillUp) - .total(num_rows) - .unit_scale(true) - .build() - .map_err(|_| GraphError::TqdmError) -} - -fn process_shared_properties( - props: Option<&HashMap>, - resolver: impl Fn(&str, PropType) -> Result, GraphError>, -) -> Result, GraphError> { - match props { - None => Ok(vec![]), - Some(props) => props - .iter() - .map(|(key, prop)| Ok((resolver(key, prop.dtype())?.inner(), prop.clone()))) - .collect(), - } -} - -pub fn load_nodes_from_df< - G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + std::fmt::Debug, ->( - df_view: DFView>>, - time: &str, - secondary_index: Option<&str>, - node_id: &str, - properties: &[&str], - metadata: &[&str], - shared_metadata: Option<&HashMap>, - node_type: Option<&str>, - node_type_col: Option<&str>, - graph: &G, -) -> Result<(), GraphError> { - if df_view.is_empty() { - return Ok(()); - } - let properties_indices = properties - .iter() - .map(|name| df_view.get_index(name)) - .collect::, GraphError>>()?; - let metadata_indices = metadata - .iter() - .map(|name| df_view.get_index(name)) - .collect::, GraphError>>()?; - - let node_type_index = - node_type_col.map(|node_type_col| df_view.get_index(node_type_col.as_ref())); - let node_type_index = node_type_index.transpose()?; - - let node_id_index = df_view.get_index(node_id)?; - let time_index = df_view.get_index(time)?; - let secondary_index_index = secondary_index - .map(|col| df_view.get_index(col)) - .transpose()?; - - let session = graph.write_session().map_err(into_graph_err)?; - let shared_metadata = process_shared_properties(shared_metadata, |key, dtype| { - session - .resolve_node_property(key, dtype, true) - .map_err(into_graph_err) - })?; - - #[cfg(feature = "python")] - let mut pb = build_progress_bar("Loading nodes".to_string(), df_view.num_rows)?; - - let mut node_col_resolved = vec![]; - let mut node_type_col_resolved = vec![]; - - let mut write_locked_graph = graph.write_lock().map_err(into_graph_err)?; - - for chunk in df_view.chunks { - let df = chunk?; - let prop_cols = - combine_properties_arrow(properties, &properties_indices, &df, |key, dtype| { - session - .resolve_node_property(key, dtype, false) - .map_err(into_graph_err) - })?; - let metadata_cols = - combine_properties_arrow(metadata, &metadata_indices, &df, |key, dtype| { - session - .resolve_node_property(key, dtype, true) - .map_err(into_graph_err) - })?; - let node_type_col = lift_node_type_col(node_type, node_type_index, &df)?; - - let time_col = df.time_col(time_index)?; - let node_col = df.node_col(node_id_index)?; - - // Load the secondary index column if it exists, otherwise generate from start_id. - let secondary_index_col = match secondary_index_index { - Some(col_index) => { - // Update the event_id to reflect ingesting new secondary indices. - let col = df.secondary_index_col(col_index)?; - session - .set_max_event_id(col.max()) - .map_err(into_graph_err)?; - col - } - None => { - let start_id = session - .reserve_event_ids(df.len()) - .map_err(into_graph_err)?; - SecondaryIndexCol::new_from_range(start_id, start_id + df.len()) - } - }; - - node_col_resolved.resize_with(df.len(), Default::default); - node_type_col_resolved.resize_with(df.len(), Default::default); - - // TODO: Using parallel iterators results in a 5x speedup, but - // needs to be implemented such that node VID order is preserved. - // See: https://github.com/Pometry/pometry-storage/issues/81 - for (gid, resolved, node_type, node_type_resolved) in izip!( - node_col.iter(), - node_col_resolved.iter_mut(), - node_type_col.iter(), - node_type_col_resolved.iter_mut() - ) { - let (vid, res_node_type) = write_locked_graph - .graph() - .resolve_node_and_type(gid.as_node_ref(), node_type) - .map_err(|_| LoadError::FatalError)?; - - *resolved = vid; - *node_type_resolved = res_node_type; - } - - let node_stats = write_locked_graph.node_stats().clone(); - let update_time = |time: TimeIndexEntry| { - let time = time.t(); - node_stats.update_time(time); - }; - - write_locked_graph - .resize_chunks_to_num_nodes(write_locked_graph.graph().internal_num_nodes()); - - write_locked_graph - .nodes - .par_iter_mut() - .try_for_each(|shard| { - // Zip all columns for iteration. - let zip = izip!( - node_col_resolved.iter(), - time_col.iter(), - secondary_index_col.iter(), - node_type_col_resolved.iter(), - node_col.iter() - ); - - for (row, (vid, time, secondary_index, node_type, gid)) in zip.enumerate() { - if let Some(mut_node) = shard.resolve_pos(*vid) { - let mut writer = shard.writer(); - let t = TimeIndexEntry(time, secondary_index); - let layer_id = STATIC_GRAPH_LAYER_ID; - - update_time(t); - writer.store_node_id_and_node_type(mut_node, layer_id, gid, *node_type); - - let t_props = prop_cols.iter_row(row); - let c_props = metadata_cols - .iter_row(row) - .chain(shared_metadata.iter().cloned()); - - writer.add_props(t, mut_node, layer_id, t_props); - writer.update_c_props(mut_node, layer_id, c_props); - }; - } - - Ok::<_, GraphError>(()) - })?; - - #[cfg(feature = "python")] - let _ = pb.update(df.len()); - } - - Ok(()) -} - -pub fn load_edges_from_df( - df_view: DFView>>, - time: &str, - secondary_index: Option<&str>, - src: &str, - dst: &str, - properties: &[&str], - metadata: &[&str], - shared_metadata: Option<&HashMap>, - layer: Option<&str>, - layer_col: Option<&str>, - graph: &G, -) -> Result<(), GraphError> { - if df_view.is_empty() { - return Ok(()); - } - - let properties_indices = properties - .iter() - .map(|name| df_view.get_index(name)) - .collect::, GraphError>>()?; - let metadata_indices = metadata - .iter() - .map(|name| df_view.get_index(name)) - .collect::, GraphError>>()?; - - let src_index = df_view.get_index(src)?; - let dst_index = df_view.get_index(dst)?; - let time_index = df_view.get_index(time)?; - let secondary_index_index = secondary_index - .map(|col| df_view.get_index(col)) - .transpose()?; - let layer_index = if let Some(layer_col) = layer_col { - Some(df_view.get_index(layer_col.as_ref())?) - } else { - None - }; - let session = graph.write_session().map_err(into_graph_err)?; - let shared_metadata = process_shared_properties(shared_metadata, |key, dtype| { - session - .resolve_edge_property(key, dtype, true) - .map_err(into_graph_err) - })?; - - #[cfg(feature = "python")] - let mut pb = build_progress_bar("Loading edges".to_string(), df_view.num_rows)?; - - let mut src_col_resolved = vec![]; - let mut dst_col_resolved = vec![]; - let mut eid_col_resolved: Vec = vec![]; - let mut eids_exist: Vec = vec![]; // exists or needs to be created - let mut layer_eids_exist: Vec = vec![]; // exists or needs to be created - - let mut write_locked_graph = graph.write_lock().map_err(into_graph_err)?; - - // set the type of the resolver; - let chunks = df_view.chunks.peekable(); - - let num_nodes = AtomicUsize::new(write_locked_graph.graph().internal_num_nodes()); - - for chunk in chunks { - let df = chunk?; - let prop_cols = - combine_properties_arrow(properties, &properties_indices, &df, |key, dtype| { - session - .resolve_edge_property(key, dtype, false) - .map_err(into_graph_err) - })?; - let metadata_cols = - combine_properties_arrow(metadata, &metadata_indices, &df, |key, dtype| { - session - .resolve_edge_property(key, dtype, true) - .map_err(into_graph_err) - })?; - - src_col_resolved.resize_with(df.len(), Default::default); - dst_col_resolved.resize_with(df.len(), Default::default); - - // let src_col_shared = atomic_usize_from_mut_slice(cast_slice_mut(&mut src_col_resolved)); - // let dst_col_shared = atomic_usize_from_mut_slice(cast_slice_mut(&mut dst_col_resolved)); - - let layer = lift_layer_col(layer, layer_index, &df)?; - let layer_col_resolved = layer.resolve(graph)?; - - let src_col = df.node_col(src_index)?; - src_col.validate(graph, LoadError::MissingSrcError)?; - - let dst_col = df.node_col(dst_index)?; - dst_col.validate(graph, LoadError::MissingDstError)?; - - // It's our graph, no one else can change it - src_col - .par_iter() - .zip(src_col_resolved.par_iter_mut()) - .try_for_each(|(gid, resolved)| { - let gid = gid.ok_or(LoadError::FatalError)?; - let vid = write_locked_graph - .graph() - .resolve_node(gid.as_node_ref()) - .map_err(|_| LoadError::FatalError)?; - - if vid.is_new() { - num_nodes.fetch_add(1, Ordering::Relaxed); - } - - *resolved = vid.inner(); - Ok::<(), LoadError>(()) - })?; - - dst_col - .par_iter() - .zip(dst_col_resolved.par_iter_mut()) - .try_for_each(|(gid, resolved)| { - let gid = gid.ok_or(LoadError::FatalError)?; - let vid = write_locked_graph - .graph() - .resolve_node(gid.as_node_ref()) - .map_err(|_| LoadError::FatalError)?; - - if vid.is_new() { - num_nodes.fetch_add(1, Ordering::Relaxed); - } - - *resolved = vid.inner(); - Ok::<(), LoadError>(()) - })?; - - let time_col = df.time_col(time_index)?; - - // Load the secondary index column if it exists, otherwise generate from start_id. - let secondary_index_col = match secondary_index_index { - Some(col_index) => { - // Update the event_id to reflect ingesting new secondary indices. - let col = df.secondary_index_col(col_index)?; - session - .set_max_event_id(col.max()) - .map_err(into_graph_err)?; - col - } - None => { - let start_id = session - .reserve_event_ids(df.len()) - .map_err(into_graph_err)?; - SecondaryIndexCol::new_from_range(start_id, start_id + df.len()) - } - }; - - write_locked_graph.resize_chunks_to_num_nodes(num_nodes.load(Ordering::Relaxed)); - - eid_col_resolved.resize_with(df.len(), Default::default); - eids_exist.resize_with(df.len(), Default::default); - layer_eids_exist.resize_with(df.len(), Default::default); - let eid_col_shared = atomic_usize_from_mut_slice(cast_slice_mut(&mut eid_col_resolved)); - - let edges = write_locked_graph.graph().storage().edges().clone(); - let next_edge_id = |row: usize| { - let (page, pos) = edges.reserve_free_pos(row); - pos.as_eid(page, edges.max_page_len()) - }; - - let mut per_segment_edge_count = Vec::with_capacity(write_locked_graph.nodes.len()); - per_segment_edge_count.resize_with(write_locked_graph.nodes.len(), || AtomicUsize::new(0)); - - let WriteLockedGraph { - nodes, ref edges, .. - } = &mut write_locked_graph; - - // Generate all edge_ids + add outbound edges - nodes - .iter_mut() // TODO: change to par_iter_mut but preserve edge_id order - .enumerate() - .for_each(|(page_id, locked_page)| { - // Zip all columns for iteration. - let zip = izip!( - src_col_resolved.iter(), - src_col.iter(), - dst_col_resolved.iter(), - time_col.iter(), - secondary_index_col.iter(), - layer_col_resolved.iter() - ); - - for (row, (src, src_gid, dst, time, secondary_index, layer)) in zip.enumerate() { - if let Some(src_pos) = locked_page.resolve_pos(*src) { - let mut writer = locked_page.writer(); - let t = TimeIndexEntry(time, secondary_index); - writer.store_node_id(src_pos, 0, src_gid); - - // find the original EID in the static graph if it exists - // otherwise create a new one - let edge_id = if let Some(edge_id) = writer.get_out_edge(src_pos, *dst, 0) { - eid_col_shared[row].store(edge_id.0, Ordering::Relaxed); - eids_exist[row].store(true, Ordering::Relaxed); - edge_id.with_layer(*layer) - } else { - let edge_id = next_edge_id(row); - - writer.add_static_outbound_edge(src_pos, *dst, edge_id); - eid_col_shared[row].store(edge_id.0, Ordering::Relaxed); - eids_exist[row].store(false, Ordering::Relaxed); - edge_id.with_layer(*layer) - }; - - if edges.exists(edge_id) - || writer.get_out_edge(src_pos, *dst, *layer).is_some() - { - layer_eids_exist[row].store(true, Ordering::Relaxed); - // node additions - writer.update_timestamp(t, src_pos, edge_id); - } else { - layer_eids_exist[row].store(false, Ordering::Relaxed); - // actually adds the edge - writer.add_outbound_edge(Some(t), src_pos, *dst, edge_id); - } - - per_segment_edge_count[page_id].fetch_add(1, Ordering::Relaxed); - } - } - }); - - let aprox_num_edges = write_locked_graph.graph().internal_num_edges() + df.len(); - - write_locked_graph.resize_chunks_to_num_edges(aprox_num_edges); - - rayon::scope(|sc| { - // Add inbound edges - sc.spawn(|_| { - write_locked_graph - .nodes - .par_iter_mut() - .enumerate() - .for_each(|(page_id, locked_page)| { - let zip = izip!( - src_col_resolved.iter(), - dst_col_resolved.iter(), - dst_col.iter(), - eid_col_resolved.iter(), - time_col.iter(), - secondary_index_col.iter(), - layer_col_resolved.iter(), - layer_eids_exist.iter().map(|a| a.load(Ordering::Relaxed)), - eids_exist.iter().map(|b| b.load(Ordering::Relaxed)) - ); - - for ( - src, - dst, - dst_gid, - eid, - time, - secondary_index, - layer, - edge_exists_in_layer, - edge_exists_in_static_graph, - ) in zip - { - if let Some(dst_pos) = locked_page.resolve_pos(*dst) { - let t = TimeIndexEntry(time, secondary_index); - let mut writer = locked_page.writer(); - - writer.store_node_id(dst_pos, 0, dst_gid); - - if !edge_exists_in_static_graph { - writer.add_static_inbound_edge(dst_pos, *src, *eid); - } - - if !edge_exists_in_layer { - writer.add_inbound_edge( - Some(t), - dst_pos, - *src, - eid.with_layer(*layer), - ); - } else { - writer.update_timestamp(t, dst_pos, eid.with_layer(*layer)); - } - - per_segment_edge_count[page_id].fetch_add(1, Ordering::Relaxed); - } - } - }); - }); - - // Add temporal & constant properties to edges - sc.spawn(|_| { - write_locked_graph - .edges - .par_iter_mut() - .for_each(|locked_page| { - let zip = izip!( - src_col_resolved.iter(), - dst_col_resolved.iter(), - time_col.iter(), - secondary_index_col.iter(), - eid_col_resolved.iter(), - layer_col_resolved.iter(), - eids_exist - .iter() - .map(|exists| exists.load(Ordering::Relaxed)) - ); - let mut t_props: Vec<(usize, Prop)> = vec![]; - let mut c_props: Vec<(usize, Prop)> = vec![]; - - for (row, (src, dst, time, secondary_index, eid, layer, exists)) in - zip.enumerate() - { - if let Some(eid_pos) = locked_page.resolve_pos(*eid) { - let t = TimeIndexEntry(time, secondary_index); - let mut writer = locked_page.writer(); - - t_props.clear(); - t_props.extend(prop_cols.iter_row(row)); - - c_props.clear(); - c_props.extend(metadata_cols.iter_row(row)); - c_props.extend_from_slice(&shared_metadata); - - writer.bulk_add_edge( - t, - eid_pos, - *src, - *dst, - exists, - *layer, - c_props.drain(..), - t_props.drain(..), - ); - } - } - }); - }); - }); - - #[cfg(feature = "python")] - let _ = pb.update(df.len()); - } - - Ok(()) -} - -fn load_into_shard( - src_col_shared: &[AtomicUsize], - dst_col_shared: &[AtomicUsize], - src_col: &super::node_col::NodeCol, - dst_col: &super::node_col::NodeCol, - node_count: &AtomicUsize, - shard: &mut ResolverShardT<'_, T>, - mut mapper_fn: impl FnMut(GidRef<'_>) -> Cow<'_, Q>, - mut fallback_fn: impl FnMut(&Q) -> Option, -) -> Result<(), LoadError> -where - T: Clone + Eq + std::hash::Hash + Borrow, - Q: Eq + std::hash::Hash + ToOwned + ?Sized, -{ - let src_iter = src_col.iter().map(&mut mapper_fn).enumerate(); - - for (id, gid) in src_iter { - if let Some(vid) = shard.resolve_node(&gid, |id| { - // fallback_fn(id).map(Either::Right).unwrap_or_else(|| { - // // If the node does not exist, create a new VID - // Either::Left(VID(node_count.fetch_add(1, Ordering::Relaxed))) - // }) - Either::Left(VID(node_count.fetch_add(1, Ordering::Relaxed))) - }) { - src_col_shared[id].store(vid.0, Ordering::Relaxed); - } - } - - let dst_iter = dst_col.iter().map(mapper_fn).enumerate(); - for (id, gid) in dst_iter { - if let Some(vid) = shard.resolve_node(&gid, |id| { - // fallback_fn(id).map(Either::Right).unwrap_or_else(|| { - // // If the node does not exist, create a new VID - // Either::Left(VID(node_count.fetch_add(1, Ordering::Relaxed))) - // }) - Either::Left(VID(node_count.fetch_add(1, Ordering::Relaxed))) - }) { - dst_col_shared[id].store(vid.0, Ordering::Relaxed); - } - } - Ok::<_, LoadError>(()) -} - -pub(crate) fn load_edge_deletions_from_df< - G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + DeletionOps, ->( - df_view: DFView>>, - time: &str, - secondary_index: Option<&str>, - src: &str, - dst: &str, - layer: Option<&str>, - layer_col: Option<&str>, - graph: &G, -) -> Result<(), GraphError> { - if df_view.is_empty() { - return Ok(()); - } - let src_index = df_view.get_index(src)?; - let dst_index = df_view.get_index(dst)?; - let time_index = df_view.get_index(time)?; - let secondary_index_index = secondary_index - .map(|col| df_view.get_index(col)) - .transpose()?; - let layer_index = layer_col.map(|layer_col| df_view.get_index(layer_col.as_ref())); - let layer_index = layer_index.transpose()?; - #[cfg(feature = "python")] - let mut pb = build_progress_bar("Loading edge deletions".to_string(), df_view.num_rows)?; - let session = graph.write_session().map_err(into_graph_err)?; - - for chunk in df_view.chunks { - let df = chunk?; - let layer = lift_layer_col(layer, layer_index, &df)?; - let src_col = df.node_col(src_index)?; - let dst_col = df.node_col(dst_index)?; - let time_col = df.time_col(time_index)?; - - // Load the secondary index column if it exists, otherwise generate from start_id. - let secondary_index_col = match secondary_index_index { - Some(col_index) => { - // Update the event_id to reflect ingesting new secondary indices. - let col = df.secondary_index_col(col_index)?; - session - .set_max_event_id(col.max()) - .map_err(into_graph_err)?; - col - } - None => { - let start_id = session - .reserve_event_ids(df.len()) - .map_err(into_graph_err)?; - SecondaryIndexCol::new_from_range(start_id, start_id + df.len()) - } - }; - - src_col - .iter() - .zip(dst_col.iter()) - .zip(time_col.iter()) - .zip(secondary_index_col.iter()) - .zip(layer.iter()) - .try_for_each(|((((src, dst), time), secondary_index), layer)| { - // let src = src.ok_or(LoadError::MissingSrcError)?; - // let dst = dst.ok_or(LoadError::MissingDstError)?; - graph.delete_edge((time, secondary_index), src, dst, layer)?; - Ok::<(), GraphError>(()) - })?; - - #[cfg(feature = "python")] - let _ = pb.update(df.len()); - } - - Ok(()) -} - -pub(crate) fn load_node_props_from_df< - 'a, - G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + std::fmt::Debug, ->( - df_view: DFView>>, - node_id: &str, - node_type: Option<&str>, - node_type_col: Option<&str>, - metadata: &[&str], - shared_metadata: Option<&HashMap>, - graph: &G, -) -> Result<(), GraphError> { - if df_view.is_empty() { - return Ok(()); - } - let metadata_indices = metadata - .iter() - .map(|name| df_view.get_index(name)) - .collect::, GraphError>>()?; - - let node_type_index = - node_type_col.map(|node_type_col| df_view.get_index(node_type_col.as_ref())); - let node_type_index = node_type_index.transpose()?; - - let node_id_index = df_view.get_index(node_id)?; - let session = graph.write_session().map_err(into_graph_err)?; - - let shared_metadata = process_shared_properties(shared_metadata, |key, dtype| { - session - .resolve_node_property(key, dtype, true) - .map_err(into_graph_err) - })?; - - #[cfg(feature = "python")] - let mut pb = build_progress_bar("Loading node properties".to_string(), df_view.num_rows)?; - - let mut node_col_resolved = vec![]; - let mut node_type_col_resolved = vec![]; - - let mut write_locked_graph = graph.write_lock().map_err(into_graph_err)?; - - for chunk in df_view.chunks { - let df = chunk?; - let metadata_cols = - combine_properties_arrow(metadata, &metadata_indices, &df, |key, dtype| { - session - .resolve_node_property(key, dtype, true) - .map_err(into_graph_err) - })?; - let node_type_col = lift_node_type_col(node_type, node_type_index, &df)?; - let node_col = df.node_col(node_id_index)?; - - node_col_resolved.resize_with(df.len(), Default::default); - node_type_col_resolved.resize_with(df.len(), Default::default); - - node_col - .iter() - .zip(node_col_resolved.iter_mut()) - .zip(node_type_col.iter()) - .zip(node_type_col_resolved.iter_mut()) - .try_for_each(|(((gid, resolved), node_type), node_type_resolved)| { - let (vid, res_node_type) = write_locked_graph - .graph() - .resolve_node_and_type(gid.as_node_ref(), node_type) - .map_err(|_| LoadError::FatalError)?; - *resolved = vid; - *node_type_resolved = res_node_type; - Ok::<(), LoadError>(()) - })?; - - write_locked_graph - .resize_chunks_to_num_nodes(write_locked_graph.graph().internal_num_nodes()); - - write_locked_graph.nodes.iter_mut().try_for_each(|shard| { - let mut c_props = vec![]; - - for (idx, ((vid, node_type), gid)) in node_col_resolved - .iter() - .zip(node_type_col_resolved.iter()) - .zip(node_col.iter()) - .enumerate() - { - if let Some(mut_node) = shard.resolve_pos(*vid) { - let mut writer = shard.writer(); - writer.store_node_id_and_node_type(mut_node, 0, gid, *node_type); - - c_props.clear(); - c_props.extend(metadata_cols.iter_row(idx)); - c_props.extend_from_slice(&shared_metadata); - writer.update_c_props(mut_node, 0, c_props.drain(..)); - }; - } - - Ok::<_, GraphError>(()) - })?; - - #[cfg(feature = "python")] - let _ = pb.update(df.len()); - } - Ok(()) -} - -pub(crate) fn load_edges_props_from_df< - G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps, ->( - df_view: DFView>>, - src: &str, - dst: &str, - metadata: &[&str], - shared_metadata: Option<&HashMap>, - layer: Option<&str>, - layer_col: Option<&str>, - graph: &G, -) -> Result<(), GraphError> { - if df_view.is_empty() { - return Ok(()); - } - let metadata_indices = metadata - .iter() - .map(|name| df_view.get_index(name)) - .collect::, GraphError>>()?; - - let src_index = df_view.get_index(src)?; - let dst_index = df_view.get_index(dst)?; - let layer_index = if let Some(layer_col) = layer_col { - Some(df_view.get_index(layer_col.as_ref())?) - } else { - None - }; - let session = graph.write_session().map_err(into_graph_err)?; - let shared_metadata = process_shared_properties(shared_metadata, |key, dtype| { - session - .resolve_edge_property(key, dtype, true) - .map_err(into_graph_err) - })?; - - #[cfg(feature = "python")] - let mut pb = build_progress_bar("Loading edge properties".to_string(), df_view.num_rows)?; - #[cfg(feature = "python")] - let _ = pb.update(0); - - let mut src_col_resolved = vec![]; - let mut dst_col_resolved = vec![]; - let mut eid_col_resolved = vec![]; - - let mut write_locked_graph = graph.write_lock().map_err(into_graph_err)?; - - let g = write_locked_graph.graph; - - for chunk in df_view.chunks { - let df = chunk?; - let metadata_cols = - combine_properties_arrow(metadata, &metadata_indices, &df, |key, dtype| { - session - .resolve_edge_property(key, dtype, true) - .map_err(into_graph_err) - })?; - let layer = lift_layer_col(layer, layer_index, &df)?; - let layer_col_resolved = layer.resolve(graph)?; - - let src_col = df.node_col(src_index)?; - src_col.validate(graph, LoadError::MissingSrcError)?; - - let dst_col = df.node_col(dst_index)?; - dst_col.validate(graph, LoadError::MissingDstError)?; - - // It's our graph, no one else can change it - src_col_resolved.resize_with(df.len(), Default::default); - src_col - .par_iter() - .zip(src_col_resolved.par_iter_mut()) - .try_for_each(|(gid, resolved)| { - let gid = gid.ok_or(LoadError::FatalError)?; - let vid = g - .resolve_node_ref(gid.as_node_ref()) - .ok_or(LoadError::MissingNodeError)?; - *resolved = vid; - Ok::<(), LoadError>(()) - })?; - - dst_col_resolved.resize_with(df.len(), Default::default); - dst_col - .par_iter() - .zip(dst_col_resolved.par_iter_mut()) - .try_for_each(|(gid, resolved)| { - let gid = gid.ok_or(LoadError::FatalError)?; - let vid = g - .resolve_node_ref(gid.as_node_ref()) - .ok_or(LoadError::MissingNodeError)?; - *resolved = vid; - Ok::<(), LoadError>(()) - })?; - - write_locked_graph - .resize_chunks_to_num_nodes(write_locked_graph.graph().internal_num_nodes()); - - // resolve all the edges - eid_col_resolved.resize_with(df.len(), Default::default); - let eid_col_shared = atomic_usize_from_mut_slice(cast_slice_mut(&mut eid_col_resolved)); - - write_locked_graph - .nodes - .par_iter_mut() - .try_for_each(|shard| { - for (row, (src, dst)) in src_col_resolved - .iter() - .zip(dst_col_resolved.iter()) - .enumerate() - { - if let Some(src_node) = shard.resolve_pos(*src) { - let writer = shard.writer(); - let EID(eid) = writer - .get_out_edge(src_node, *dst, 0) - .ok_or(LoadError::MissingEdgeError(*src, *dst))?; - eid_col_shared[row].store(eid, Ordering::Relaxed); - } - } - Ok::<_, LoadError>(()) - })?; - - write_locked_graph - .edges - .par_iter_mut() - .try_for_each(|shard| { - let mut c_props = vec![]; - for (idx, (((eid, layer), src), dst)) in eid_col_resolved - .iter() - .zip(layer_col_resolved.iter()) - .zip(&src_col_resolved) - .zip(&dst_col_resolved) - .enumerate() - { - if let Some(eid_pos) = shard.resolve_pos(*eid) { - let mut writer = shard.writer(); - c_props.clear(); - c_props.extend(metadata_cols.iter_row(idx)); - c_props.extend_from_slice(&shared_metadata); - writer.update_c_props(eid_pos, *src, *dst, *layer, c_props.drain(..)); - } - } - Ok::<(), GraphError>(()) - })?; - - #[cfg(feature = "python")] - let _ = pb.update(df.len()); - } - Ok(()) -} - -pub(crate) fn load_graph_props_from_df< - G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps, ->( - df_view: DFView>>, - time: &str, - secondary_index: Option<&str>, - properties: Option<&[&str]>, - metadata: Option<&[&str]>, - graph: &G, -) -> Result<(), GraphError> { - if df_view.is_empty() { - return Ok(()); - } - let properties = properties.unwrap_or(&[]); - let metadata = metadata.unwrap_or(&[]); - - let properties_indices = properties - .iter() - .map(|name| df_view.get_index(name)) - .collect::, GraphError>>()?; - let metadata_indices = metadata - .iter() - .map(|name| df_view.get_index(name)) - .collect::, GraphError>>()?; - - let time_index = df_view.get_index(time)?; - let secondary_index_index = secondary_index - .map(|col| df_view.get_index(col)) - .transpose()?; - - #[cfg(feature = "python")] - let mut pb = build_progress_bar("Loading graph properties".to_string(), df_view.num_rows)?; - let session = graph.write_session().map_err(into_graph_err)?; - - for chunk in df_view.chunks { - let df = chunk?; - let prop_cols = - combine_properties_arrow(properties, &properties_indices, &df, |key, dtype| { - session - .resolve_graph_property(key, dtype, false) - .map_err(into_graph_err) - })?; - let metadata_cols = - combine_properties_arrow(metadata, &metadata_indices, &df, |key, dtype| { - session - .resolve_graph_property(key, dtype, true) - .map_err(into_graph_err) - })?; - let time_col = df.time_col(time_index)?; - - // Load the secondary index column if it exists, otherwise generate from start_id. - let secondary_index_col = match secondary_index_index { - Some(col_index) => { - // Update the event_id to reflect ingesting new secondary indices. - let col = df.secondary_index_col(col_index)?; - session - .set_max_event_id(col.max()) - .map_err(into_graph_err)?; - col - } - None => { - let start_id = session - .reserve_event_ids(df.len()) - .map_err(into_graph_err)?; - let col = SecondaryIndexCol::new_from_range(start_id, start_id + df.len()); - col - } - }; - - time_col - .par_iter() - .zip(secondary_index_col.par_iter()) - .zip(prop_cols.par_rows()) - .zip(metadata_cols.par_rows()) - .try_for_each(|(((time, secondary_index), t_props), c_props)| { - let t = TimeIndexEntry(time, secondary_index); - let t_props: Vec<_> = t_props.collect(); - - if !t_props.is_empty() { - graph - .internal_add_properties(t, &t_props) - .map_err(into_graph_err)?; - } - - let c_props: Vec<_> = c_props.collect(); - - if !c_props.is_empty() { - graph - .internal_add_metadata(&c_props) - .map_err(into_graph_err)?; - } - - Ok::<(), GraphError>(()) - })?; - - #[cfg(feature = "python")] - let _ = pb.update(df.len()); - } - - Ok(()) -} diff --git a/raphtory/src/io/arrow/df_loaders/edges.rs b/raphtory/src/io/arrow/df_loaders/edges.rs index b0824349de..4068dbd037 100644 --- a/raphtory/src/io/arrow/df_loaders/edges.rs +++ b/raphtory/src/io/arrow/df_loaders/edges.rs @@ -21,7 +21,7 @@ use kdam::BarExt; use raphtory_api::{ atomic_extra::{atomic_usize_from_mut_slice, atomic_vid_from_mut_slice}, core::{ - entities::EID, + entities::{properties::meta::STATIC_GRAPH_LAYER_ID, EID}, storage::{dict_mapper::MaybeNew, timeindex::TimeIndexEntry, FxDashMap}, }, }; @@ -520,10 +520,9 @@ fn update_edge_properties<'a, ES: EdgeSegmentOps>( *layer, c_props.drain(..), t_props.drain(..), - 0, ); } else { - writer.bulk_delete_edge(t, eid_pos, *src, *dst, exists, *layer, 0); + writer.bulk_delete_edge(t, eid_pos, *src, *dst, exists, *layer); } } } @@ -551,7 +550,7 @@ fn update_inbound_edges<'a, NS: NodeSegmentOps>( let mut writer = shard.writer(); if !edge_exists_in_static_graph { - writer.add_static_inbound_edge(dst_pos, *src, *eid, 0); + writer.add_static_inbound_edge(dst_pos, *src, *eid); } let elid = if delete { eid.with_layer_deletion(*layer) @@ -561,14 +560,14 @@ fn update_inbound_edges<'a, NS: NodeSegmentOps>( if src != dst { if edge_exists_in_layer { - writer.update_timestamp(t, dst_pos, elid, 0); + writer.update_timestamp(t, dst_pos, elid); } else { - writer.add_inbound_edge(Some(t), dst_pos, *src, elid, 0); + writer.add_inbound_edge(Some(t), dst_pos, *src, elid); } } else { // self-loop edge, only add once if !edge_exists_in_layer { - writer.add_inbound_edge::(None, dst_pos, *src, elid, 0); + writer.add_inbound_edge::(None, dst_pos, *src, elid); } } } @@ -603,7 +602,7 @@ fn add_and_resolve_outbound_edges< MaybeNew::Existing(edge_id) } else { let edge_id = next_edge_id(row); - writer.add_static_outbound_edge(src_pos, *dst, edge_id, 0); + writer.add_static_outbound_edge(src_pos, *dst, edge_id); eid_col_shared[row].store(edge_id.0, Ordering::Relaxed); eids_exist[row].store(false, Ordering::Relaxed); MaybeNew::New(edge_id) @@ -626,9 +625,9 @@ fn add_and_resolve_outbound_edges< layer_eids_exist[row].store(exists, Ordering::Relaxed); if exists { - writer.update_timestamp(t, src_pos, edge_id.inner(), 0); + writer.update_timestamp(t, src_pos, edge_id.inner()); } else { - writer.add_outbound_edge(Some(t), src_pos, *dst, edge_id.inner(), 0); + writer.add_outbound_edge(Some(t), src_pos, *dst, edge_id.inner()); } } } @@ -643,7 +642,7 @@ pub fn store_node_ids>( if let Some(src_pos) = locked_page.resolve_pos(*vid) { let mut writer = locked_page.writer(); - writer.store_node_id_and_node_type(src_pos, 0, *gid, *node_type, 0); + writer.store_node_id_and_node_type(src_pos, STATIC_GRAPH_LAYER_ID, *gid, *node_type); } } } diff --git a/raphtory/tests/df_loaders.rs b/raphtory/tests/df_loaders.rs index f8eb2b6e19..0ad428dc13 100644 --- a/raphtory/tests/df_loaders.rs +++ b/raphtory/tests/df_loaders.rs @@ -1,7 +1,5 @@ #[cfg(feature = "io")] mod io_tests { - use std::any::Any; - use arrow::array::builder::{ ArrayBuilder, Int64Builder, LargeStringBuilder, StringViewBuilder, UInt64Builder, }; @@ -18,7 +16,6 @@ mod io_tests { nodes::{load_node_props_from_df, load_nodes_from_df}, }, }, - parquet_loaders::load_node_props_from_parquet, }, prelude::*, test_utils::{build_edge_list, build_edge_list_str, build_edge_list_with_secondary_index}, From 412c4bb2703d4577e239c99fb7267e2723561367 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Tue, 20 Jan 2026 15:52:24 -0500 Subject: [PATCH 52/95] Move merge.rs to db4-storage --- db4-storage/src/persist/merge.rs | 235 +++++++++++++++++++++++++++++++ db4-storage/src/persist/mod.rs | 1 + 2 files changed, 236 insertions(+) create mode 100644 db4-storage/src/persist/merge.rs diff --git a/db4-storage/src/persist/merge.rs b/db4-storage/src/persist/merge.rs new file mode 100644 index 0000000000..40cfdde153 --- /dev/null +++ b/db4-storage/src/persist/merge.rs @@ -0,0 +1,235 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub struct MergeConfig { + /// Maximum number of components before triggering a merge (size-tiered policy) + pub component_count_threshold: usize, + /// Maximum size ratio between smallest and largest components to consider merging + pub size_ratio_threshold: f64, + /// Maximum size of a component that can be considered for merging (in MB) + pub max_component_size_mb: usize, + /// Prefer merging smaller components first + pub prefer_small_merges: bool, +} + +impl Default for MergeConfig { + fn default() -> Self { + MergeConfig { + component_count_threshold: 4, + size_ratio_threshold: 10.0, + max_component_size_mb: 500, + prefer_small_merges: true, + } + } +} + +impl MergeConfig { + pub fn with_max_component_size_mb(mut self, size_mb: usize) -> Self { + self.max_component_size_mb = size_mb; + self + } + + pub fn unlimited_component_size(mut self) -> Self { + self.max_component_size_mb = usize::MAX / 1024 / 1024; + self + } + + pub fn with_component_count_threshold(mut self, threshold: usize) -> Self { + self.component_count_threshold = threshold; + self + } +} + +pub trait HasMergeConfig { + fn merge_config(&self) -> &MergeConfig; + + fn with_merge_config(self, config: MergeConfig) -> Self; +} + +pub trait HasSize { + fn size(&self) -> usize; +} + +impl MergeConfig { + /// Determines if disk components should be merged based on values in the config. + /// Returns Option<(usize, usize)> with the indexes of pages that should be merged. + pub fn should_merge_disk_segments( + &self, + disk_segments: &[T], + ) -> Option<(usize, usize)> { + let pages_len = disk_segments.len(); + + // Need at least 2 pages to merge + if pages_len < 2 { + return None; + } + + // Size-tiered check: enough components to trigger merge? + let enough_components = pages_len >= self.component_count_threshold; + + if !enough_components { + return None; + } + + // Collect sizes of all pages + let mut page_sizes: Vec<(usize, usize)> = disk_segments + .iter() + .enumerate() + .map(|(idx, page)| (idx, page.size())) + .collect(); + + // Sort by size for easy comparison and selection + page_sizes.sort_by_key(|(_, size)| *size); + + // Max component size in bytes + let max_size_bytes = self.max_component_size_mb * 1024 * 1024; + + // Find candidate pairs for merging + let mut merge_candidates = Vec::new(); + + // Compare adjacent components for possible merges + for i in 0..page_sizes.len() - 1 { + let (idx1, size1) = page_sizes[i]; + + for j in i + 1..page_sizes.len() { + let (idx2, size2) = page_sizes[j]; + + // Skip if either component is too large + if size1 > max_size_bytes || size2 > max_size_bytes { + continue; + } + + // Skip if combined size exceeds max size + if size1 + size2 > max_size_bytes { + continue; + } + + // Calculate size ratio + let ratio = size2 as f64 / size1 as f64; + + // If size ratio is within threshold, add as candidate + if ratio <= self.size_ratio_threshold { + // Score this candidate pair (lower is better) + let score = if self.prefer_small_merges { + // Prefer smaller components + size1 + size2 + } else { + // Prefer larger ratio differences (more balanced) + ((self.size_ratio_threshold - ratio) * 1000.0) as usize + }; + + merge_candidates.push((idx1, idx2, score)); + } + } + } + + // If we have candidates, choose the best one + if !merge_candidates.is_empty() { + // Sort by score (lower is better) + merge_candidates.sort_by_key(|(_, _, score)| *score); + let (idx1, idx2, _) = merge_candidates[0]; + + // Return the page indexes to merge + return Some((idx1, idx2)); + } + + None + } +} + +#[cfg(test)] +mod test { + use super::*; + + struct MockPage { + size: usize, + } + + impl HasSize for MockPage { + fn size(&self) -> usize { + self.size + } + } + + #[test] + fn test_one_page() { + let actual = MergeConfig::default().should_merge_disk_segments(&[MockPage { size: 1 }]); + assert_eq!(actual, None); + } + + #[test] + fn test_two_pages() { + let actual = MergeConfig::default() + .should_merge_disk_segments(&[MockPage { size: 1 }, MockPage { size: 2 }]); + assert_eq!(actual, None); + } + + #[test] + fn test_three_pages() { + let actual = MergeConfig::default().should_merge_disk_segments(&[ + MockPage { size: 1 }, + MockPage { size: 2 }, + MockPage { size: 3 }, + ]); + assert_eq!(actual, None); + } + + #[test] + fn dont_merge_small_page_into_big() { + let actual = MergeConfig::default().should_merge_disk_segments(&[ + MockPage { size: 1 }, + MockPage { size: 2_000_000 }, + MockPage { size: 3_000_000 }, + MockPage { size: 4_000_000 }, + ]); + assert_eq!(actual, Some((1, 2))); + } + + #[test] + fn dont_merge_big_page_into_small() { + let actual = MergeConfig::default().should_merge_disk_segments(&[ + MockPage { size: 1_000_000 }, + MockPage { size: 2 }, + MockPage { size: 3 }, + MockPage { size: 4 }, + ]); + assert_eq!(actual, Some((1, 2))); + } + + #[test] + fn skip_when_too_large() { + let actual = MergeConfig::default().should_merge_disk_segments(&[ + MockPage { size: 1 }, + MockPage { size: 600_000_000 }, + MockPage { size: 600_000_000 }, + MockPage { size: 600_000_000 }, + MockPage { size: 600_000_000 }, + ]); + assert_eq!(actual, None); + } + + #[test] + fn merge_the_small_ones() { + let actual = MergeConfig::default().should_merge_disk_segments(&[ + MockPage { size: 60_000_000 }, + MockPage { size: 1 }, + MockPage { size: 60_000_000 }, + MockPage { size: 60_000_000 }, + MockPage { size: 1 }, + MockPage { size: 60_000_000 }, + ]); + assert_eq!(actual, Some((1, 4))); + } + + #[test] + fn merge_small_pages() { + let actual = MergeConfig::default().should_merge_disk_segments(&[ + MockPage { size: 1 }, + MockPage { size: 2 }, + MockPage { size: 3 }, + MockPage { size: 4 }, + MockPage { size: 5 }, + ]); + assert_eq!(actual, Some((0, 1))); + } +} diff --git a/db4-storage/src/persist/mod.rs b/db4-storage/src/persist/mod.rs index 54eb972285..755834f64d 100644 --- a/db4-storage/src/persist/mod.rs +++ b/db4-storage/src/persist/mod.rs @@ -1 +1,2 @@ +pub mod merge; pub mod strategy; From d080a2684714031dcbd1e30702f6b6330d6b958d Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Tue, 20 Jan 2026 16:34:49 -0500 Subject: [PATCH 53/95] Modify Graph::new to take merge_config --- db4-graph/src/lib.rs | 3 ++- db4-storage/src/pages/mod.rs | 28 +++++++++++------------- db4-storage/src/persist/merge.rs | 6 ----- db4-storage/src/persist/strategy.rs | 18 ++++++++++++--- db4-storage/src/segments/node/segment.rs | 3 ++- raphtory/src/db/api/storage/storage.rs | 8 ++++--- raphtory/src/db/api/view/graph.rs | 7 ++++-- raphtory/src/db/graph/graph.rs | 6 ++++- 8 files changed, 47 insertions(+), 32 deletions(-) diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index 4f3e19cbc6..bc64fcb063 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -23,6 +23,7 @@ use storage::{ }, }, persist::strategy::{PersistenceConfig, PersistenceStrategy}, + persist::merge::MergeConfig, resolver::GIDResolverOps, transaction::TransactionManager, wal::Wal, @@ -93,7 +94,7 @@ impl Default for TemporalGraph { fn default() -> Self { let config = PersistenceConfig::default(); let wal = Arc::new(WalType::new(None).unwrap()); - Self::new(Extension::new(config, wal)).unwrap() + Self::new(Extension::new(config, MergeConfig::default(), wal)).unwrap() } } diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index 63d4b28f04..58773edd53 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -3,6 +3,7 @@ use crate::{ api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, error::StorageError, pages::{edge_store::ReadLockedEdgeStorage, node_store::ReadLockedNodeStorage}, + persist::merge::MergeConfig, persist::strategy::PersistenceStrategy, properties::props_meta_writer::PropsMetaWriter, segments::{edge::segment::MemEdgeSegment, node::segment::MemNodeSegment}, @@ -592,15 +593,12 @@ pub fn row_group_par_iter>( mod test { use super::GraphStore; use crate::{ - Extension, Layer, - api::nodes::{NodeEntryOps, NodeRefOps}, - pages::test_utils::{ - AddEdge, Fixture, NodeFixture, check_edges_support, check_graph_with_nodes_support, - check_graph_with_props_support, edges_strat, edges_strat_with_layers, make_edges, - make_nodes, + api::nodes::{NodeEntryOps, NodeRefOps}, pages::test_utils::{ + check_edges_support, check_graph_with_nodes_support, check_graph_with_props_support, edges_strat, edges_strat_with_layers, make_edges, make_nodes, AddEdge, Fixture, NodeFixture }, - persist::strategy::{DEFAULT_MAX_MEMORY_BYTES, PersistenceConfig, PersistenceStrategy}, - wal::no_wal::NoWal, + persist::strategy::{PersistenceConfig, PersistenceStrategy, DEFAULT_MAX_MEMORY_BYTES}, + persist::merge::MergeConfig, + wal::no_wal::NoWal, Extension, Layer, }; use chrono::DateTime; use proptest::prelude::*; @@ -643,7 +641,7 @@ mod test { chunk_size, chunk_size, ); - Layer::new(Some(graph_dir), Extension::new(config, Arc::new(NoWal))) + Layer::new(Some(graph_dir), Extension::new(config, MergeConfig::default(), Arc::new(NoWal))) }) } @@ -658,7 +656,7 @@ mod test { chunk_size, chunk_size, ); - Layer::new(Some(graph_dir), Extension::new(config, Arc::new(NoWal))) + Layer::new(Some(graph_dir), Extension::new(config, MergeConfig::default(), Arc::new(NoWal))) }) } @@ -733,7 +731,7 @@ mod test { let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, 32, 32); let g = Layer::new( Some(graph_dir.path()), - Extension::new(config, Arc::new(NoWal)), + Extension::new(config, MergeConfig::default(), Arc::new(NoWal)), ); g.add_edge(4, 7, 3).unwrap(); assert_eq!(g.nodes().num_nodes(), 2); @@ -745,7 +743,7 @@ mod test { let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, 32, 32); let g = GraphStore::new( Some(graph_dir.path()), - Extension::new(config, Arc::new(NoWal)), + Extension::new(config, MergeConfig::default(), Arc::new(NoWal)), ); g.add_edge(4, 7, 3).unwrap(); @@ -791,7 +789,7 @@ mod test { let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, 32, 32); let g = Layer::new( Some(graph_dir.path()), - Extension::new(config, Arc::new(NoWal)), + Extension::new(config, MergeConfig::default(), Arc::new(NoWal)), ); g.add_node_props::(1, 0, 0, vec![]) .expect("Failed to add node props"); @@ -1600,7 +1598,7 @@ mod test { node_page_len, edge_page_len, ); - Layer::new(Some(path), Extension::new(config, Arc::new(NoWal))) + Layer::new(Some(path), Extension::new(config, MergeConfig::default(), Arc::new(NoWal))) }); } @@ -1611,7 +1609,7 @@ mod test { node_page_len, edge_page_len, ); - Layer::new(Some(path), Extension::new(config, Arc::new(NoWal))) + Layer::new(Some(path), Extension::new(config, MergeConfig::default(), Arc::new(NoWal))) }); } } diff --git a/db4-storage/src/persist/merge.rs b/db4-storage/src/persist/merge.rs index 40cfdde153..923e091431 100644 --- a/db4-storage/src/persist/merge.rs +++ b/db4-storage/src/persist/merge.rs @@ -40,12 +40,6 @@ impl MergeConfig { } } -pub trait HasMergeConfig { - fn merge_config(&self) -> &MergeConfig; - - fn with_merge_config(self, config: MergeConfig) -> Self; -} - pub trait HasSize { fn size(&self) -> usize; } diff --git a/db4-storage/src/persist/strategy.rs b/db4-storage/src/persist/strategy.rs index 8d4cc23c3b..4acef95a0e 100644 --- a/db4-storage/src/persist/strategy.rs +++ b/db4-storage/src/persist/strategy.rs @@ -1,6 +1,7 @@ use crate::{ api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, error::StorageError, + persist::merge::MergeConfig, segments::{ edge::segment::{EdgeSegmentView, MemEdgeSegment}, graph_prop::{GraphPropSegmentView, segment::MemGraphPropSegment}, @@ -98,10 +99,12 @@ pub trait PersistenceStrategy: Debug + Clone + Send + Sync + 'static { type GS: GraphPropSegmentOps; type WalType: Wal; - fn new(config: PersistenceConfig, wal: Arc) -> Self; + fn new(config: PersistenceConfig, merge_config: MergeConfig, wal: Arc) -> Self; fn config(&self) -> &PersistenceConfig; + fn merge_config(&self) -> &MergeConfig; + fn wal(&self) -> &Self::WalType; fn persist_node_segment>( @@ -132,6 +135,7 @@ pub trait PersistenceStrategy: Debug + Clone + Send + Sync + 'static { #[derive(Debug, Clone)] pub struct NoOpStrategy { config: PersistenceConfig, + merge_config: MergeConfig, wal: Arc, } @@ -141,14 +145,22 @@ impl PersistenceStrategy for NoOpStrategy { type GS = GraphPropSegmentView; type WalType = NoWal; - fn new(config: PersistenceConfig, wal: Arc) -> Self { - Self { config, wal } + fn new(config: PersistenceConfig, merge_config: MergeConfig, wal: Arc) -> Self { + Self { + config, + merge_config, + wal, + } } fn config(&self) -> &PersistenceConfig { &self.config } + fn merge_config(&self) -> &MergeConfig { + &self.merge_config + } + fn wal(&self) -> &Self::WalType { &self.wal } diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs index 77b1fe418d..bcb688adb7 100644 --- a/db4-storage/src/segments/node/segment.rs +++ b/db4-storage/src/segments/node/segment.rs @@ -585,6 +585,7 @@ mod test { LocalPOS, NodeSegmentView, api::nodes::NodeSegmentOps, pages::{layer_counter::GraphStats, node_page::writer::NodeWriter}, + persist::merge::MergeConfig, persist::strategy::{ DEFAULT_MAX_MEMORY_BYTES, NoOpStrategy, PersistenceConfig, PersistenceStrategy, }, @@ -604,7 +605,7 @@ mod test { let edge_meta = Arc::new(Meta::default()); let path = tempdir().unwrap(); let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, 10, 10); - let ext = NoOpStrategy::new(config, Arc::new(NoWal)); + let ext = NoOpStrategy::new(config, MergeConfig::default(), Arc::new(NoWal)); let segment = NodeSegmentView::new( 0, node_meta.clone(), diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 8097ab8188..9ed72ec285 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -43,6 +43,7 @@ use storage::{ }; pub use storage::{ + persist::merge::MergeConfig, persist::strategy::{PersistenceConfig, PersistenceStrategy}, Extension, }; @@ -113,7 +114,7 @@ impl Storage { let graph_dir = GraphDir::from(path.as_ref()); let wal_dir = graph_dir.wal_dir(); let wal = Arc::new(WalType::new(Some(wal_dir.as_path()))?); - let ext = Extension::new(config, wal.clone()); + let ext = Extension::new(config, MergeConfig::default(), wal.clone()); let temporal_graph = TemporalGraph::new_at_path_with_ext(path, ext)?; Ok(Self { @@ -126,11 +127,12 @@ impl Storage { pub(crate) fn new_at_path_with_config( path: impl AsRef, config: PersistenceConfig, + merge_config: MergeConfig, ) -> Result { let graph_dir = GraphDir::from(path.as_ref()); let wal_dir = graph_dir.wal_dir(); let wal = Arc::new(WalType::new(Some(wal_dir.as_path()))?); - let ext = Extension::new(config, wal.clone()); + let ext = Extension::new(config, merge_config, wal.clone()); let temporal_graph = TemporalGraph::new_at_path_with_ext(path, ext)?; Ok(Self { @@ -146,7 +148,7 @@ impl Storage { let graph_dir = GraphDir::from(path.as_ref()); let wal_dir = graph_dir.wal_dir(); let wal = Arc::new(WalType::load(Some(wal_dir.as_path()))?); - let ext = Extension::new(config, wal.clone()); + let ext = Extension::new(config, MergeConfig::default(), wal.clone()); let temporal_graph = TemporalGraph::load_from_path(path, ext)?; // Replay any pending writes from the WAL. diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index 7c393426f4..c4bfe575cc 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -58,7 +58,10 @@ use std::{ path::Path, sync::{atomic::Ordering, Arc}, }; -use storage::{persist::strategy::PersistenceStrategy, wal::Wal, Extension, WalType}; +use storage::{ + persist::merge::MergeConfig, + persist::strategy::PersistenceStrategy, wal::Wal, Extension, WalType, +}; #[cfg(feature = "search")] use crate::{ @@ -300,7 +303,7 @@ fn materialize_impl( let wal_dir = graph_dir.map(|dir| dir.wal_dir()); let wal = WalType::new(wal_dir.as_deref())?; let config = storage.extension().config().clone(); - let ext = Extension::new(config, Arc::new(wal)); + let ext = Extension::new(config, MergeConfig::default(), Arc::new(wal)); let temporal_graph = TemporalGraph::new_with_meta( path.map(|p| p.into()), diff --git a/raphtory/src/db/graph/graph.rs b/raphtory/src/db/graph/graph.rs index 42052a5517..238d91d297 100644 --- a/raphtory/src/db/graph/graph.rs +++ b/raphtory/src/db/graph/graph.rs @@ -49,6 +49,7 @@ use std::{ sync::Arc, }; use storage::{ + persist::merge::MergeConfig, persist::strategy::{PersistenceConfig, PersistenceStrategy}, Extension, }; @@ -165,6 +166,7 @@ impl Graph { pub fn new_at_path_with_config( path: &(impl GraphPaths + ?Sized), config: PersistenceConfig, + merge_config: MergeConfig, ) -> Result { if !Extension::disk_storage_enabled() { return Err(GraphError::DiskGraphNotEnabled); @@ -176,6 +178,7 @@ impl Graph { inner: Arc::new(Storage::new_at_path_with_config( path.graph_path()?, config, + merge_config, )?), }; @@ -296,7 +299,8 @@ pub fn assert_node_equal_layer< n1.earliest_time(), n2.earliest_time() ); - // This doesn't hold for materialised windowed PersistentGraph (node is still present after the end of the window) + // This doesn't hold for materialised windowed PersistentGraph + // (node is still present after the end of the window) assert_eq!( n1.latest_time(), n2.latest_time(), From f30440aae90ee800514ffed8793a35714287e976 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Wed, 21 Jan 2026 12:37:12 -0500 Subject: [PATCH 54/95] Document re-exports --- raphtory/src/db/api/storage/storage.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 9ed72ec285..4f0fcc4fa4 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -42,11 +42,13 @@ use storage::{ WalType, }; +// Re-export for raphtory dependencies to use when creating graphs. pub use storage::{ persist::merge::MergeConfig, persist::strategy::{PersistenceConfig, PersistenceStrategy}, Extension, }; + #[cfg(feature = "search")] use { crate::{ From 41364f9ccddc78b748bf04e61916ee91df6a6d18 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Wed, 21 Jan 2026 12:44:28 -0500 Subject: [PATCH 55/95] Rename Wal trait to WalOps --- db4-graph/src/lib.rs | 4 ++-- db4-storage/src/lib.rs | 2 +- db4-storage/src/persist/strategy.rs | 14 +++++++------- db4-storage/src/wal/entry.rs | 4 ++-- db4-storage/src/wal/mod.rs | 4 ++-- db4-storage/src/wal/no_wal.rs | 4 ++-- raphtory/src/db/api/mutation/addition_ops.rs | 2 +- raphtory/src/db/api/storage/storage.rs | 2 +- raphtory/src/db/api/view/graph.rs | 2 +- 9 files changed, 19 insertions(+), 19 deletions(-) diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index bc64fcb063..b8026edd71 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -26,7 +26,7 @@ use storage::{ persist::merge::MergeConfig, resolver::GIDResolverOps, transaction::TransactionManager, - wal::Wal, + wal::WalOps, Extension, GIDResolver, Layer, ReadLockedLayer, WalType, ES, GS, NS, }; use tempfile::TempDir; @@ -205,7 +205,7 @@ where self.storage().extension() } - pub fn wal(&self) -> &EXT::WalType { + pub fn wal(&self) -> &EXT::Wal { self.storage().extension().wal() } diff --git a/db4-storage/src/lib.rs b/db4-storage/src/lib.rs index 5ffb46bff9..95a721adb7 100644 --- a/db4-storage/src/lib.rs +++ b/db4-storage/src/lib.rs @@ -53,7 +53,7 @@ pub type ES

= EdgeSegmentView

; pub type GS

= GraphPropSegmentView

; pub type Layer

= GraphStore, ES

, GS

, P>; -pub type WalType = ::WalType; +pub type WalType = ::Wal; pub type GIDResolver = MappingResolver; pub type ReadLockedLayer

= ReadLockedGraphStore, ES

, GS

, P>; diff --git a/db4-storage/src/persist/strategy.rs b/db4-storage/src/persist/strategy.rs index 4acef95a0e..606a9fd9dc 100644 --- a/db4-storage/src/persist/strategy.rs +++ b/db4-storage/src/persist/strategy.rs @@ -7,7 +7,7 @@ use crate::{ graph_prop::{GraphPropSegmentView, segment::MemGraphPropSegment}, node::segment::{MemNodeSegment, NodeSegmentView}, }, - wal::{Wal, no_wal::NoWal}, + wal::{WalOps, no_wal::NoWal}, }; use serde::{Deserialize, Serialize}; use std::{fmt::Debug, ops::DerefMut, path::Path, sync::Arc}; @@ -97,15 +97,15 @@ pub trait PersistenceStrategy: Debug + Clone + Send + Sync + 'static { type NS: NodeSegmentOps; type ES: EdgeSegmentOps; type GS: GraphPropSegmentOps; - type WalType: Wal; + type Wal: WalOps; - fn new(config: PersistenceConfig, merge_config: MergeConfig, wal: Arc) -> Self; + fn new(config: PersistenceConfig, merge_config: MergeConfig, wal: Arc) -> Self; fn config(&self) -> &PersistenceConfig; fn merge_config(&self) -> &MergeConfig; - fn wal(&self) -> &Self::WalType; + fn wal(&self) -> &Self::Wal; fn persist_node_segment>( &self, @@ -143,9 +143,9 @@ impl PersistenceStrategy for NoOpStrategy { type ES = EdgeSegmentView; type NS = NodeSegmentView; type GS = GraphPropSegmentView; - type WalType = NoWal; + type Wal = NoWal; - fn new(config: PersistenceConfig, merge_config: MergeConfig, wal: Arc) -> Self { + fn new(config: PersistenceConfig, merge_config: MergeConfig, wal: Arc) -> Self { Self { config, merge_config, @@ -161,7 +161,7 @@ impl PersistenceStrategy for NoOpStrategy { &self.merge_config } - fn wal(&self) -> &Self::WalType { + fn wal(&self) -> &Self::Wal { &self.wal } diff --git a/db4-storage/src/wal/entry.rs b/db4-storage/src/wal/entry.rs index be151e4754..41b33522ab 100644 --- a/db4-storage/src/wal/entry.rs +++ b/db4-storage/src/wal/entry.rs @@ -6,10 +6,10 @@ use raphtory_core::{ use crate::{ error::StorageError, - wal::{GraphReplay, GraphWal, LSN, TransactionID, no_wal::NoWal}, + wal::{GraphReplay, GraphWalOps, LSN, TransactionID, no_wal::NoWal}, }; -impl GraphWal for NoWal { +impl GraphWalOps for NoWal { type ReplayEntry = (); fn log_add_edge( diff --git a/db4-storage/src/wal/mod.rs b/db4-storage/src/wal/mod.rs index 86382d3fcf..e4da842cd4 100644 --- a/db4-storage/src/wal/mod.rs +++ b/db4-storage/src/wal/mod.rs @@ -13,7 +13,7 @@ pub type LSN = u64; pub type TransactionID = u64; /// Core Wal methods. -pub trait Wal { +pub trait WalOps { fn new(dir: Option<&Path>) -> Result where Self: Sized; @@ -71,7 +71,7 @@ impl ReplayRecord { } // Raphtory-specific logging & replay methods. -pub trait GraphWal { +pub trait GraphWalOps { /// ReplayEntry represents the type of the wal entry returned during replay. type ReplayEntry; diff --git a/db4-storage/src/wal/no_wal.rs b/db4-storage/src/wal/no_wal.rs index dbc66b8ffe..ffd7b1ef3c 100644 --- a/db4-storage/src/wal/no_wal.rs +++ b/db4-storage/src/wal/no_wal.rs @@ -2,7 +2,7 @@ use std::path::Path; use crate::{ error::StorageError, - wal::{LSN, ReplayRecord, Wal}, + wal::{LSN, ReplayRecord, WalOps}, }; /// `NoWAL` is a no-op WAL implementation that discards all writes. @@ -10,7 +10,7 @@ use crate::{ #[derive(Debug)] pub struct NoWal; -impl Wal for NoWal { +impl WalOps for NoWal { fn new(_dir: Option<&Path>) -> Result { Ok(Self) } diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index eccab420e6..020dda5a0d 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -20,7 +20,7 @@ use raphtory_storage::mutation::{ durability_ops::DurabilityOps, MutationError, }; -use storage::wal::{GraphWal, Wal}; +use storage::wal::{GraphWalOps, WalOps}; pub trait AdditionOps: StaticGraphViewOps + InternalAdditionOps> + DurabilityOps diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 4f0fcc4fa4..de7277ab75 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -38,7 +38,7 @@ use std::{ }; use storage::{ transaction::TransactionManager, - wal::{GraphWal, Wal, LSN}, + wal::{GraphWalOps, WalOps, LSN}, WalType, }; diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index c4bfe575cc..b3b22d8b3e 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -60,7 +60,7 @@ use std::{ }; use storage::{ persist::merge::MergeConfig, - persist::strategy::PersistenceStrategy, wal::Wal, Extension, WalType, + persist::strategy::PersistenceStrategy, wal::WalOps, Extension, WalType, }; #[cfg(feature = "search")] From dc45af95b26f2f23213277bc64c61c6bda27b624 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Wed, 21 Jan 2026 13:56:18 -0500 Subject: [PATCH 56/95] Rename WalType to Wal --- db4-graph/src/lib.rs | 4 ++-- db4-storage/src/lib.rs | 2 +- raphtory-storage/src/mutation/addition_ops_ext.rs | 4 ++-- raphtory-storage/src/mutation/durability_ops.rs | 8 ++++---- raphtory/src/db/api/storage/storage.rs | 10 +++++----- raphtory/src/db/api/view/graph.rs | 4 ++-- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index b8026edd71..64f0673423 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -27,7 +27,7 @@ use storage::{ resolver::GIDResolverOps, transaction::TransactionManager, wal::WalOps, - Extension, GIDResolver, Layer, ReadLockedLayer, WalType, ES, GS, NS, + Extension, GIDResolver, Layer, ReadLockedLayer, Wal, ES, GS, NS, }; use tempfile::TempDir; @@ -93,7 +93,7 @@ impl<'a> From<&'a Path> for GraphDir { impl Default for TemporalGraph { fn default() -> Self { let config = PersistenceConfig::default(); - let wal = Arc::new(WalType::new(None).unwrap()); + let wal = Arc::new(Wal::new(None).unwrap()); Self::new(Extension::new(config, MergeConfig::default(), wal)).unwrap() } } diff --git a/db4-storage/src/lib.rs b/db4-storage/src/lib.rs index 95a721adb7..599474efd2 100644 --- a/db4-storage/src/lib.rs +++ b/db4-storage/src/lib.rs @@ -53,7 +53,7 @@ pub type ES

= EdgeSegmentView

; pub type GS

= GraphPropSegmentView

; pub type Layer

= GraphStore, ES

, GS

, P>; -pub type WalType = ::Wal; +pub type Wal = ::Wal; pub type GIDResolver = MappingResolver; pub type ReadLockedLayer

= ReadLockedGraphStore, ES

, GS

, P>; diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index 680275793e..148f5fad44 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -27,7 +27,7 @@ use storage::{ resolver::GIDResolverOps, transaction::TransactionManager, wal::LSN, - Extension, WalType, ES, GS, NS, + Extension, Wal, ES, GS, NS, }; pub struct WriteS<'a, EXT> @@ -369,7 +369,7 @@ impl DurabilityOps for TemporalGraph { &self.transaction_manager } - fn wal(&self) -> &WalType { + fn wal(&self) -> &Wal { &self.extension().wal() } } diff --git a/raphtory-storage/src/mutation/durability_ops.rs b/raphtory-storage/src/mutation/durability_ops.rs index c31e578624..5a1be35226 100644 --- a/raphtory-storage/src/mutation/durability_ops.rs +++ b/raphtory-storage/src/mutation/durability_ops.rs @@ -1,12 +1,12 @@ use crate::graph::graph::GraphStorage; use raphtory_api::inherit::Base; -use storage::{transaction::TransactionManager, WalType}; +use storage::{transaction::TransactionManager, Wal}; /// Accessor methods for transactions and write-ahead logging. pub trait DurabilityOps { fn transaction_manager(&self) -> &TransactionManager; - fn wal(&self) -> &WalType; + fn wal(&self) -> &Wal; } impl DurabilityOps for GraphStorage { @@ -14,7 +14,7 @@ impl DurabilityOps for GraphStorage { self.mutable().unwrap().transaction_manager.as_ref() } - fn wal(&self) -> &WalType { + fn wal(&self) -> &Wal { self.mutable().unwrap().wal() } } @@ -31,7 +31,7 @@ where } #[inline] - fn wal(&self) -> &WalType { + fn wal(&self) -> &Wal { self.base().wal() } } diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index de7277ab75..4b9431918d 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -39,7 +39,7 @@ use std::{ use storage::{ transaction::TransactionManager, wal::{GraphWalOps, WalOps, LSN}, - WalType, + Wal, }; // Re-export for raphtory dependencies to use when creating graphs. @@ -115,7 +115,7 @@ impl Storage { let config = PersistenceConfig::default(); let graph_dir = GraphDir::from(path.as_ref()); let wal_dir = graph_dir.wal_dir(); - let wal = Arc::new(WalType::new(Some(wal_dir.as_path()))?); + let wal = Arc::new(Wal::new(Some(wal_dir.as_path()))?); let ext = Extension::new(config, MergeConfig::default(), wal.clone()); let temporal_graph = TemporalGraph::new_at_path_with_ext(path, ext)?; @@ -133,7 +133,7 @@ impl Storage { ) -> Result { let graph_dir = GraphDir::from(path.as_ref()); let wal_dir = graph_dir.wal_dir(); - let wal = Arc::new(WalType::new(Some(wal_dir.as_path()))?); + let wal = Arc::new(Wal::new(Some(wal_dir.as_path()))?); let ext = Extension::new(config, merge_config, wal.clone()); let temporal_graph = TemporalGraph::new_at_path_with_ext(path, ext)?; @@ -149,7 +149,7 @@ impl Storage { .unwrap_or_else(|_| PersistenceConfig::default()); let graph_dir = GraphDir::from(path.as_ref()); let wal_dir = graph_dir.wal_dir(); - let wal = Arc::new(WalType::load(Some(wal_dir.as_path()))?); + let wal = Arc::new(Wal::load(Some(wal_dir.as_path()))?); let ext = Extension::new(config, MergeConfig::default(), wal.clone()); let temporal_graph = TemporalGraph::load_from_path(path, ext)?; @@ -579,7 +579,7 @@ impl DurabilityOps for Storage { self.graph.mutable().unwrap().transaction_manager.as_ref() } - fn wal(&self) -> &WalType { + fn wal(&self) -> &Wal { self.graph.mutable().unwrap().wal() } } diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index b3b22d8b3e..deaba90f41 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -60,7 +60,7 @@ use std::{ }; use storage::{ persist::merge::MergeConfig, - persist::strategy::PersistenceStrategy, wal::WalOps, Extension, WalType, + persist::strategy::PersistenceStrategy, wal::WalOps, Extension, Wal, }; #[cfg(feature = "search")] @@ -301,7 +301,7 @@ fn materialize_impl( // Create new WAL file for the new materialized graph. let graph_dir = path.map(|p| GraphDir::from(p)); let wal_dir = graph_dir.map(|dir| dir.wal_dir()); - let wal = WalType::new(wal_dir.as_deref())?; + let wal = Wal::new(wal_dir.as_deref())?; let config = storage.extension().config().clone(); let ext = Extension::new(config, MergeConfig::default(), Arc::new(wal)); From 18dad10b320e350135521ca0abb8ac8979755e12 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Wed, 21 Jan 2026 17:42:55 -0500 Subject: [PATCH 57/95] Cleanup configs in OS --- db4-graph/src/lib.rs | 9 +- db4-graph/src/replay.rs | 4 +- db4-storage/src/lib.rs | 1 + db4-storage/src/pages/edge_store.rs | 4 +- db4-storage/src/pages/mod.rs | 103 ++++---- db4-storage/src/pages/node_store.rs | 4 +- db4-storage/src/persist/merge.rs | 229 ------------------ db4-storage/src/persist/mod.rs | 1 - db4-storage/src/persist/strategy.rs | 138 ++++------- db4-storage/src/segments/edge/segment.rs | 2 +- db4-storage/src/segments/node/segment.rs | 10 +- .../src/mutation/addition_ops_ext.rs | 2 +- raphtory/src/db/api/storage/storage.rs | 20 +- raphtory/src/db/api/view/graph.rs | 2 +- raphtory/src/db/graph/graph.rs | 9 +- 15 files changed, 134 insertions(+), 404 deletions(-) delete mode 100644 db4-storage/src/persist/merge.rs diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index 64f0673423..4de3f6a369 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -22,12 +22,11 @@ use storage::{ nodes::WriteLockedNodePages, }, }, - persist::strategy::{PersistenceConfig, PersistenceStrategy}, - persist::merge::MergeConfig, + persist::strategy::{PersistenceStrategy}, resolver::GIDResolverOps, transaction::TransactionManager, wal::WalOps, - Extension, GIDResolver, Layer, ReadLockedLayer, Wal, ES, GS, NS, + Extension, GIDResolver, Layer, ReadLockedLayer, Wal, ES, GS, NS, Config, }; use tempfile::TempDir; @@ -92,9 +91,9 @@ impl<'a> From<&'a Path> for GraphDir { impl Default for TemporalGraph { fn default() -> Self { - let config = PersistenceConfig::default(); + let config = Config::default(); let wal = Arc::new(Wal::new(None).unwrap()); - Self::new(Extension::new(config, MergeConfig::default(), wal)).unwrap() + Self::new(Extension::new(config, wal)).unwrap() } } diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs index 2995fe4ec6..e9725bdf47 100644 --- a/db4-graph/src/replay.rs +++ b/db4-graph/src/replay.rs @@ -43,8 +43,8 @@ where props: Vec<(String, usize, Prop)>, ) -> Result<(), StorageError> { let temporal_graph = self.graph(); - let node_max_page_len = temporal_graph.extension().config().max_node_page_len; - let edge_max_page_len = temporal_graph.extension().config().max_edge_page_len; + let node_max_page_len = temporal_graph.extension().max_node_page_len(); + let edge_max_page_len = temporal_graph.extension().max_edge_page_len(); // 1. Insert prop ids into edge meta. // No need to validate props again since they are already validated before diff --git a/db4-storage/src/lib.rs b/db4-storage/src/lib.rs index 599474efd2..33100cf651 100644 --- a/db4-storage/src/lib.rs +++ b/db4-storage/src/lib.rs @@ -54,6 +54,7 @@ pub type GS

= GraphPropSegmentView

; pub type Layer

= GraphStore, ES

, GS

, P>; pub type Wal = ::Wal; +pub type Config = ::Config; pub type GIDResolver = MappingResolver; pub type ReadLockedLayer

= ReadLockedGraphStore, ES

, GS

, P>; diff --git a/db4-storage/src/pages/edge_store.rs b/db4-storage/src/pages/edge_store.rs index 0517983022..09557a54ca 100644 --- a/db4-storage/src/pages/edge_store.rs +++ b/db4-storage/src/pages/edge_store.rs @@ -223,7 +223,7 @@ impl, EXT: PersistenceStrategy> EdgeStorageI pub fn load(edges_path: impl AsRef, ext: EXT) -> Result { let edges_path = edges_path.as_ref(); - let max_page_len = ext.config().max_edge_page_len; + let max_page_len = ext.max_edge_page_len(); let meta = Arc::new(Meta::new_for_edges()); @@ -417,7 +417,7 @@ impl, EXT: PersistenceStrategy> EdgeStorageI #[inline(always)] pub fn max_page_len(&self) -> u32 { - self.ext.config().max_edge_page_len + self.ext.max_edge_page_len() } pub fn write_locked<'a>(&'a self) -> WriteLockedEdgePages<'a, ES> { diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index 58773edd53..bb1528a8a5 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -3,7 +3,6 @@ use crate::{ api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, error::StorageError, pages::{edge_store::ReadLockedEdgeStorage, node_store::ReadLockedNodeStorage}, - persist::merge::MergeConfig, persist::strategy::PersistenceStrategy, properties::props_meta_writer::PropsMetaWriter, segments::{edge::segment::MemEdgeSegment, node::segment::MemNodeSegment}, @@ -73,12 +72,9 @@ impl< > GraphStore { pub fn flush(&self) -> Result<(), StorageError> { - let node_types = self.nodes.prop_meta().get_all_node_types(); - let config = self.ext.config().with_node_types(node_types); - - if let Some(graph_dir) = self.graph_dir.as_ref() { - config.save_to_dir(graph_dir)?; - } + // Config saving for WriteAndMerge is handled in db4-disk-storage's implementation + // This generic code in db4-storage doesn't have access to WriteAndMergeConfig types + // to avoid circular dependencies. For NoOpStrategy, config saving is not needed. self.nodes.flush()?; self.edges.flush()?; @@ -147,9 +143,12 @@ impl< )); if let Some(graph_dir) = graph_dir { - ext.config() - .save_to_dir(graph_dir) - .expect("Unrecoverable! Failed to write graph config"); + // Config saving for WriteAndMerge is handled in db4-disk-storage's implementation + // This generic code in db4-storage doesn't have access to WriteAndMergeConfig types + // to avoid circular dependencies. For NoOpStrategy, config saving is not needed. + if EXT::disk_storage_enabled() { + // Config will be saved by db4-disk-storage's GraphStore implementation + } } Self { @@ -176,8 +175,10 @@ impl< let graph_prop_storage = Arc::new(GraphPropStorageInner::load(graph_props_path, ext.clone())?); - for node_type in ext.config().node_types().iter() { - node_meta.get_or_create_node_type_id(node_type); + // Node types handling for WriteAndMerge is done in db4-disk-storage + // For NoOpStrategy, disk_storage_enabled() returns false, so this is skipped + if EXT::disk_storage_enabled() { + // Node types will be handled by db4-disk-storage's implementation } let t_len = edge_storage.t_len(); @@ -594,10 +595,10 @@ mod test { use super::GraphStore; use crate::{ api::nodes::{NodeEntryOps, NodeRefOps}, pages::test_utils::{ - check_edges_support, check_graph_with_nodes_support, check_graph_with_props_support, edges_strat, edges_strat_with_layers, make_edges, make_nodes, AddEdge, Fixture, NodeFixture + check_edges_support, check_graph_with_nodes_support, check_graph_with_props_support, + edges_strat, edges_strat_with_layers, make_edges, make_nodes, AddEdge, Fixture, NodeFixture }, - persist::strategy::{PersistenceConfig, PersistenceStrategy, DEFAULT_MAX_MEMORY_BYTES}, - persist::merge::MergeConfig, + persist::strategy::{PersistenceStrategy}, wal::no_wal::NoWal, Extension, Layer, }; use chrono::DateTime; @@ -636,12 +637,12 @@ mod test { .collect(); check_edges_support(edges, par_load, false, |graph_dir| { - let config = PersistenceConfig::new_with_page_lens( - DEFAULT_MAX_MEMORY_BYTES, - chunk_size, - chunk_size, - ); - Layer::new(Some(graph_dir), Extension::new(config, MergeConfig::default(), Arc::new(NoWal))) + use crate::persist::strategy::NoOpConfig; + let config = NoOpConfig { + max_node_page_len: chunk_size, + max_edge_page_len: chunk_size, + }; + Layer::new(Some(graph_dir), Extension::new(config, Arc::new(NoWal))) }) } @@ -651,12 +652,12 @@ mod test { par_load: bool, ) { check_edges_support(edges, par_load, false, |graph_dir| { - let config = PersistenceConfig::new_with_page_lens( - DEFAULT_MAX_MEMORY_BYTES, - chunk_size, - chunk_size, - ); - Layer::new(Some(graph_dir), Extension::new(config, MergeConfig::default(), Arc::new(NoWal))) + use crate::persist::strategy::NoOpConfig; + let config = NoOpConfig { + max_node_page_len: chunk_size, + max_edge_page_len: chunk_size, + }; + Layer::new(Some(graph_dir), Extension::new(config, Arc::new(NoWal))) }) } @@ -728,10 +729,14 @@ mod test { #[test] fn test_add_one_edge_get_num_nodes() { let graph_dir = tempfile::tempdir().unwrap(); - let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, 32, 32); + use crate::persist::strategy::NoOpConfig; + let config = NoOpConfig { + max_node_page_len: 32, + max_edge_page_len: 32, + }; let g = Layer::new( Some(graph_dir.path()), - Extension::new(config, MergeConfig::default(), Arc::new(NoWal)), + Extension::new(config, Arc::new(NoWal)), ); g.add_edge(4, 7, 3).unwrap(); assert_eq!(g.nodes().num_nodes(), 2); @@ -740,10 +745,14 @@ mod test { #[test] fn test_node_additions_1() { let graph_dir = tempfile::tempdir().unwrap(); - let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, 32, 32); + use crate::persist::strategy::NoOpConfig; + let config = NoOpConfig { + max_node_page_len: 32, + max_edge_page_len: 32, + }; let g = GraphStore::new( Some(graph_dir.path()), - Extension::new(config, MergeConfig::default(), Arc::new(NoWal)), + Extension::new(config, Arc::new(NoWal)), ); g.add_edge(4, 7, 3).unwrap(); @@ -786,10 +795,14 @@ mod test { #[test] fn node_temporal_props() { let graph_dir = tempfile::tempdir().unwrap(); - let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, 32, 32); + use crate::persist::strategy::NoOpConfig; + let config = NoOpConfig { + max_node_page_len: 32, + max_edge_page_len: 32, + }; let g = Layer::new( Some(graph_dir.path()), - Extension::new(config, MergeConfig::default(), Arc::new(NoWal)), + Extension::new(config, Arc::new(NoWal)), ); g.add_node_props::(1, 0, 0, vec![]) .expect("Failed to add node props"); @@ -1593,23 +1606,23 @@ mod test { fn check_graph_with_nodes(node_page_len: u32, edge_page_len: u32, fixture: &NodeFixture) { check_graph_with_nodes_support(fixture, false, |path| { - let config = PersistenceConfig::new_with_page_lens( - DEFAULT_MAX_MEMORY_BYTES, - node_page_len, - edge_page_len, - ); - Layer::new(Some(path), Extension::new(config, MergeConfig::default(), Arc::new(NoWal))) + use crate::persist::strategy::NoOpConfig; + let config = NoOpConfig { + max_node_page_len: node_page_len, + max_edge_page_len: edge_page_len, + }; + Layer::new(Some(path), Extension::new(config, Arc::new(NoWal))) }); } fn check_graph_with_props(node_page_len: u32, edge_page_len: u32, fixture: &Fixture) { check_graph_with_props_support(fixture, false, |path| { - let config = PersistenceConfig::new_with_page_lens( - DEFAULT_MAX_MEMORY_BYTES, - node_page_len, - edge_page_len, - ); - Layer::new(Some(path), Extension::new(config, MergeConfig::default(), Arc::new(NoWal))) + use crate::persist::strategy::NoOpConfig; + let config = NoOpConfig { + max_node_page_len: node_page_len, + max_edge_page_len: edge_page_len, + }; + Layer::new(Some(path), Extension::new(config, Arc::new(NoWal))) }); } } diff --git a/db4-storage/src/pages/node_store.rs b/db4-storage/src/pages/node_store.rs index a82d419bb1..6e762a4523 100644 --- a/db4-storage/src/pages/node_store.rs +++ b/db4-storage/src/pages/node_store.rs @@ -161,7 +161,7 @@ impl NodeStorageInner { } pub fn max_segment_len(&self) -> u32 { - self.ext.config().max_node_page_len + self.ext.max_node_page_len() } } @@ -335,7 +335,7 @@ impl, EXT: PersistenceStrategy> NodeStorageI ext: EXT, ) -> Result { let nodes_path = nodes_path.as_ref(); - let max_page_len = ext.config().max_node_page_len; + let max_page_len = ext.max_node_page_len(); let node_meta = Arc::new(Meta::new_for_nodes()); if !nodes_path.exists() { diff --git a/db4-storage/src/persist/merge.rs b/db4-storage/src/persist/merge.rs deleted file mode 100644 index 923e091431..0000000000 --- a/db4-storage/src/persist/merge.rs +++ /dev/null @@ -1,229 +0,0 @@ -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] -pub struct MergeConfig { - /// Maximum number of components before triggering a merge (size-tiered policy) - pub component_count_threshold: usize, - /// Maximum size ratio between smallest and largest components to consider merging - pub size_ratio_threshold: f64, - /// Maximum size of a component that can be considered for merging (in MB) - pub max_component_size_mb: usize, - /// Prefer merging smaller components first - pub prefer_small_merges: bool, -} - -impl Default for MergeConfig { - fn default() -> Self { - MergeConfig { - component_count_threshold: 4, - size_ratio_threshold: 10.0, - max_component_size_mb: 500, - prefer_small_merges: true, - } - } -} - -impl MergeConfig { - pub fn with_max_component_size_mb(mut self, size_mb: usize) -> Self { - self.max_component_size_mb = size_mb; - self - } - - pub fn unlimited_component_size(mut self) -> Self { - self.max_component_size_mb = usize::MAX / 1024 / 1024; - self - } - - pub fn with_component_count_threshold(mut self, threshold: usize) -> Self { - self.component_count_threshold = threshold; - self - } -} - -pub trait HasSize { - fn size(&self) -> usize; -} - -impl MergeConfig { - /// Determines if disk components should be merged based on values in the config. - /// Returns Option<(usize, usize)> with the indexes of pages that should be merged. - pub fn should_merge_disk_segments( - &self, - disk_segments: &[T], - ) -> Option<(usize, usize)> { - let pages_len = disk_segments.len(); - - // Need at least 2 pages to merge - if pages_len < 2 { - return None; - } - - // Size-tiered check: enough components to trigger merge? - let enough_components = pages_len >= self.component_count_threshold; - - if !enough_components { - return None; - } - - // Collect sizes of all pages - let mut page_sizes: Vec<(usize, usize)> = disk_segments - .iter() - .enumerate() - .map(|(idx, page)| (idx, page.size())) - .collect(); - - // Sort by size for easy comparison and selection - page_sizes.sort_by_key(|(_, size)| *size); - - // Max component size in bytes - let max_size_bytes = self.max_component_size_mb * 1024 * 1024; - - // Find candidate pairs for merging - let mut merge_candidates = Vec::new(); - - // Compare adjacent components for possible merges - for i in 0..page_sizes.len() - 1 { - let (idx1, size1) = page_sizes[i]; - - for j in i + 1..page_sizes.len() { - let (idx2, size2) = page_sizes[j]; - - // Skip if either component is too large - if size1 > max_size_bytes || size2 > max_size_bytes { - continue; - } - - // Skip if combined size exceeds max size - if size1 + size2 > max_size_bytes { - continue; - } - - // Calculate size ratio - let ratio = size2 as f64 / size1 as f64; - - // If size ratio is within threshold, add as candidate - if ratio <= self.size_ratio_threshold { - // Score this candidate pair (lower is better) - let score = if self.prefer_small_merges { - // Prefer smaller components - size1 + size2 - } else { - // Prefer larger ratio differences (more balanced) - ((self.size_ratio_threshold - ratio) * 1000.0) as usize - }; - - merge_candidates.push((idx1, idx2, score)); - } - } - } - - // If we have candidates, choose the best one - if !merge_candidates.is_empty() { - // Sort by score (lower is better) - merge_candidates.sort_by_key(|(_, _, score)| *score); - let (idx1, idx2, _) = merge_candidates[0]; - - // Return the page indexes to merge - return Some((idx1, idx2)); - } - - None - } -} - -#[cfg(test)] -mod test { - use super::*; - - struct MockPage { - size: usize, - } - - impl HasSize for MockPage { - fn size(&self) -> usize { - self.size - } - } - - #[test] - fn test_one_page() { - let actual = MergeConfig::default().should_merge_disk_segments(&[MockPage { size: 1 }]); - assert_eq!(actual, None); - } - - #[test] - fn test_two_pages() { - let actual = MergeConfig::default() - .should_merge_disk_segments(&[MockPage { size: 1 }, MockPage { size: 2 }]); - assert_eq!(actual, None); - } - - #[test] - fn test_three_pages() { - let actual = MergeConfig::default().should_merge_disk_segments(&[ - MockPage { size: 1 }, - MockPage { size: 2 }, - MockPage { size: 3 }, - ]); - assert_eq!(actual, None); - } - - #[test] - fn dont_merge_small_page_into_big() { - let actual = MergeConfig::default().should_merge_disk_segments(&[ - MockPage { size: 1 }, - MockPage { size: 2_000_000 }, - MockPage { size: 3_000_000 }, - MockPage { size: 4_000_000 }, - ]); - assert_eq!(actual, Some((1, 2))); - } - - #[test] - fn dont_merge_big_page_into_small() { - let actual = MergeConfig::default().should_merge_disk_segments(&[ - MockPage { size: 1_000_000 }, - MockPage { size: 2 }, - MockPage { size: 3 }, - MockPage { size: 4 }, - ]); - assert_eq!(actual, Some((1, 2))); - } - - #[test] - fn skip_when_too_large() { - let actual = MergeConfig::default().should_merge_disk_segments(&[ - MockPage { size: 1 }, - MockPage { size: 600_000_000 }, - MockPage { size: 600_000_000 }, - MockPage { size: 600_000_000 }, - MockPage { size: 600_000_000 }, - ]); - assert_eq!(actual, None); - } - - #[test] - fn merge_the_small_ones() { - let actual = MergeConfig::default().should_merge_disk_segments(&[ - MockPage { size: 60_000_000 }, - MockPage { size: 1 }, - MockPage { size: 60_000_000 }, - MockPage { size: 60_000_000 }, - MockPage { size: 1 }, - MockPage { size: 60_000_000 }, - ]); - assert_eq!(actual, Some((1, 4))); - } - - #[test] - fn merge_small_pages() { - let actual = MergeConfig::default().should_merge_disk_segments(&[ - MockPage { size: 1 }, - MockPage { size: 2 }, - MockPage { size: 3 }, - MockPage { size: 4 }, - MockPage { size: 5 }, - ]); - assert_eq!(actual, Some((0, 1))); - } -} diff --git a/db4-storage/src/persist/mod.rs b/db4-storage/src/persist/mod.rs index 755834f64d..54eb972285 100644 --- a/db4-storage/src/persist/mod.rs +++ b/db4-storage/src/persist/mod.rs @@ -1,2 +1 @@ -pub mod merge; pub mod strategy; diff --git a/db4-storage/src/persist/strategy.rs b/db4-storage/src/persist/strategy.rs index 606a9fd9dc..55cfaa040b 100644 --- a/db4-storage/src/persist/strategy.rs +++ b/db4-storage/src/persist/strategy.rs @@ -1,7 +1,5 @@ use crate::{ api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, - error::StorageError, - persist::merge::MergeConfig, segments::{ edge::segment::{EdgeSegmentView, MemEdgeSegment}, graph_prop::{GraphPropSegmentView, segment::MemGraphPropSegment}, @@ -9,101 +7,23 @@ use crate::{ }, wal::{WalOps, no_wal::NoWal}, }; +use crate::error::StorageError; use serde::{Deserialize, Serialize}; use std::{fmt::Debug, ops::DerefMut, path::Path, sync::Arc}; pub const DEFAULT_MAX_PAGE_LEN_NODES: u32 = 131_072; // 2^17 pub const DEFAULT_MAX_PAGE_LEN_EDGES: u32 = 1_048_576; // 2^20 -pub const DEFAULT_MAX_MEMORY_BYTES: usize = 32 * 1024 * 1024; - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PersistenceConfig { - pub max_node_page_len: u32, - pub max_edge_page_len: u32, - pub max_memory_bytes: usize, - pub bg_flush_enabled: bool, - pub node_types: Vec, -} - -impl Default for PersistenceConfig { - fn default() -> Self { - Self { - max_node_page_len: DEFAULT_MAX_PAGE_LEN_NODES, - max_edge_page_len: DEFAULT_MAX_PAGE_LEN_EDGES, - max_memory_bytes: DEFAULT_MAX_MEMORY_BYTES, - bg_flush_enabled: true, - node_types: Vec::new(), - } - } -} - -impl PersistenceConfig { - const CONFIG_FILE: &str = "persistence_config.json"; - - pub fn load_from_dir(dir: impl AsRef) -> Result { - let config_file = dir.as_ref().join(Self::CONFIG_FILE); - let config_file = std::fs::File::open(config_file)?; - let config = serde_json::from_reader(config_file)?; - Ok(config) - } - - pub fn save_to_dir(&self, dir: impl AsRef) -> Result<(), StorageError> { - let config_file = dir.as_ref().join(Self::CONFIG_FILE); - let config_file = std::fs::File::create(&config_file)?; - serde_json::to_writer_pretty(config_file, self)?; - Ok(()) - } - - pub fn new_with_memory(max_memory_bytes: usize) -> Self { - Self { - max_memory_bytes, - ..Default::default() - } - } - - pub fn new_with_page_lens( - max_memory_bytes: usize, - max_node_page_len: u32, - max_edge_page_len: u32, - ) -> Self { - Self { - max_memory_bytes, - max_node_page_len, - max_edge_page_len, - ..Default::default() - } - } - - pub fn with_bg_flush(mut self) -> Self { - self.bg_flush_enabled = true; - self - } - - pub fn node_types(&self) -> &[String] { - &self.node_types - } - - pub fn with_node_types(&self, types: impl IntoIterator>) -> Self { - let node_types = types.into_iter().map(|s| s.as_ref().to_string()).collect(); - - Self { - node_types, - ..*self - } - } -} pub trait PersistenceStrategy: Debug + Clone + Send + Sync + 'static { type NS: NodeSegmentOps; type ES: EdgeSegmentOps; type GS: GraphPropSegmentOps; type Wal: WalOps; + type Config: ConfigOps; - fn new(config: PersistenceConfig, merge_config: MergeConfig, wal: Arc) -> Self; + fn new(config: Self::Config, wal: Arc) -> Self; - fn config(&self) -> &PersistenceConfig; - - fn merge_config(&self) -> &MergeConfig; + fn config(&self) -> &Self::Config; fn wal(&self) -> &Self::Wal; @@ -132,35 +52,67 @@ pub trait PersistenceStrategy: Debug + Clone + Send + Sync + 'static { fn disk_storage_enabled() -> bool; } +pub trait ConfigOps: Serialize + Deserialize<'static> { + fn load_from_dir(dir: impl AsRef) -> Result; + + fn save_to_dir(&self, dir: impl AsRef) -> Result<(), StorageError>; +} + #[derive(Debug, Clone)] pub struct NoOpStrategy { - config: PersistenceConfig, - merge_config: MergeConfig, + config: NoOpConfig, wal: Arc, } +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct NoOpConfig { + pub max_node_page_len: u32, + pub max_edge_page_len: u32, +} + +impl NoOpConfig { + pub fn new(max_node_page_len: u32, max_edge_page_len: u32) -> Self { + Self { max_node_page_len, max_edge_page_len } + } +} + +impl Default for NoOpConfig { + fn default() -> Self { + Self { + max_node_page_len: DEFAULT_MAX_PAGE_LEN_NODES, + max_edge_page_len: DEFAULT_MAX_PAGE_LEN_EDGES, + } + } +} + +impl ConfigOps for NoOpConfig { + fn load_from_dir(_dir: impl AsRef) -> Result { + Ok(Self::default()) + } + + fn save_to_dir(&self, _dir: impl AsRef) -> Result<(), StorageError> { + Ok(()) + } +} + impl PersistenceStrategy for NoOpStrategy { type ES = EdgeSegmentView; type NS = NodeSegmentView; type GS = GraphPropSegmentView; type Wal = NoWal; + type Config = NoOpConfig; - fn new(config: PersistenceConfig, merge_config: MergeConfig, wal: Arc) -> Self { + fn new(config: Self::Config, wal: Arc) -> Self { Self { config, - merge_config, wal, } } - fn config(&self) -> &PersistenceConfig { + fn config(&self) -> &Self::Config { &self.config } - fn merge_config(&self) -> &MergeConfig { - &self.merge_config - } - fn wal(&self) -> &Self::Wal { &self.wal } diff --git a/db4-storage/src/segments/edge/segment.rs b/db4-storage/src/segments/edge/segment.rs index 5f6767b37a..f1e567da6e 100644 --- a/db4-storage/src/segments/edge/segment.rs +++ b/db4-storage/src/segments/edge/segment.rs @@ -440,7 +440,7 @@ impl>> EdgeSegmentOps for EdgeSeg } fn new(page_id: usize, meta: Arc, _path: Option, ext: Self::Extension) -> Self { - let max_page_len = ext.config().max_edge_page_len; + let max_page_len = ext.max_edge_page_len(); Self { segment: parking_lot::RwLock::new(MemEdgeSegment::new(page_id, max_page_len, meta)) .into(), diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs index bcb688adb7..7d2b75a615 100644 --- a/db4-storage/src/segments/node/segment.rs +++ b/db4-storage/src/segments/node/segment.rs @@ -585,9 +585,8 @@ mod test { LocalPOS, NodeSegmentView, api::nodes::NodeSegmentOps, pages::{layer_counter::GraphStats, node_page::writer::NodeWriter}, - persist::merge::MergeConfig, persist::strategy::{ - DEFAULT_MAX_MEMORY_BYTES, NoOpStrategy, PersistenceConfig, PersistenceStrategy, + NoOpConfig, NoOpStrategy, PersistenceStrategy, }, wal::no_wal::NoWal, }; @@ -604,10 +603,11 @@ mod test { let node_meta = Arc::new(Meta::default()); let edge_meta = Arc::new(Meta::default()); let path = tempdir().unwrap(); - let config = PersistenceConfig::new_with_page_lens(DEFAULT_MAX_MEMORY_BYTES, 10, 10); - let ext = NoOpStrategy::new(config, MergeConfig::default(), Arc::new(NoWal)); + let config = NoOpConfig::new(10, 10); + let ext = NoOpStrategy::new(config, Arc::new(NoWal)); + let segment_id = 0; let segment = NodeSegmentView::new( - 0, + segment_id, node_meta.clone(), edge_meta, Some(path.path().to_path_buf()), diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index 148f5fad44..43f29ee160 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -220,7 +220,7 @@ impl InternalAdditionOps for TemporalGraph { self.event_counter .fetch_add(1, std::sync::atomic::Ordering::Relaxed), ); - pos.as_vid(seg, self.extension().config().max_node_page_len) + pos.as_vid(seg, self.extension().max_node_page_len()) })?; Ok(id) diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 4b9431918d..01cf1fb1f0 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -40,13 +40,12 @@ use storage::{ transaction::TransactionManager, wal::{GraphWalOps, WalOps, LSN}, Wal, + ConfigOps, }; // Re-export for raphtory dependencies to use when creating graphs. pub use storage::{ - persist::merge::MergeConfig, - persist::strategy::{PersistenceConfig, PersistenceStrategy}, - Extension, + Extension, Config, PersistenceStrategy }; #[cfg(feature = "search")] @@ -112,11 +111,11 @@ impl Storage { } pub(crate) fn new_at_path(path: impl AsRef) -> Result { - let config = PersistenceConfig::default(); + let config = Config::default(); let graph_dir = GraphDir::from(path.as_ref()); let wal_dir = graph_dir.wal_dir(); let wal = Arc::new(Wal::new(Some(wal_dir.as_path()))?); - let ext = Extension::new(config, MergeConfig::default(), wal.clone()); + let ext = Extension::new(config, wal.clone()); let temporal_graph = TemporalGraph::new_at_path_with_ext(path, ext)?; Ok(Self { @@ -128,13 +127,12 @@ impl Storage { pub(crate) fn new_at_path_with_config( path: impl AsRef, - config: PersistenceConfig, - merge_config: MergeConfig, + config: Config, ) -> Result { let graph_dir = GraphDir::from(path.as_ref()); let wal_dir = graph_dir.wal_dir(); let wal = Arc::new(Wal::new(Some(wal_dir.as_path()))?); - let ext = Extension::new(config, merge_config, wal.clone()); + let ext = Extension::new(config, wal.clone()); let temporal_graph = TemporalGraph::new_at_path_with_ext(path, ext)?; Ok(Self { @@ -145,12 +143,12 @@ impl Storage { } pub(crate) fn load_from(path: impl AsRef) -> Result { - let config = PersistenceConfig::load_from_dir(path.as_ref()) - .unwrap_or_else(|_| PersistenceConfig::default()); + let config = Config::load_from_dir(path.as_ref()) + .unwrap_or_else(|_| Config::default()); let graph_dir = GraphDir::from(path.as_ref()); let wal_dir = graph_dir.wal_dir(); let wal = Arc::new(Wal::load(Some(wal_dir.as_path()))?); - let ext = Extension::new(config, MergeConfig::default(), wal.clone()); + let ext = Extension::new(config, wal.clone()); let temporal_graph = TemporalGraph::load_from_path(path, ext)?; // Replay any pending writes from the WAL. diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index deaba90f41..522f727da9 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -303,7 +303,7 @@ fn materialize_impl( let wal_dir = graph_dir.map(|dir| dir.wal_dir()); let wal = Wal::new(wal_dir.as_deref())?; let config = storage.extension().config().clone(); - let ext = Extension::new(config, MergeConfig::default(), Arc::new(wal)); + let ext = Extension::new(config, Arc::new(wal)); let temporal_graph = TemporalGraph::new_with_meta( path.map(|p| p.into()), diff --git a/raphtory/src/db/graph/graph.rs b/raphtory/src/db/graph/graph.rs index 238d91d297..d4b9b27950 100644 --- a/raphtory/src/db/graph/graph.rs +++ b/raphtory/src/db/graph/graph.rs @@ -49,9 +49,8 @@ use std::{ sync::Arc, }; use storage::{ - persist::merge::MergeConfig, - persist::strategy::{PersistenceConfig, PersistenceStrategy}, - Extension, + persist::strategy::PersistenceStrategy, + Extension, Config, }; #[repr(transparent)] @@ -165,8 +164,7 @@ impl Graph { #[cfg(feature = "io")] pub fn new_at_path_with_config( path: &(impl GraphPaths + ?Sized), - config: PersistenceConfig, - merge_config: MergeConfig, + config: Config, ) -> Result { if !Extension::disk_storage_enabled() { return Err(GraphError::DiskGraphNotEnabled); @@ -178,7 +176,6 @@ impl Graph { inner: Arc::new(Storage::new_at_path_with_config( path.graph_path()?, config, - merge_config, )?), }; From dc2f834e3b9691a7c4d5f26b3c8c49f7d0397d4a Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Thu, 22 Jan 2026 09:48:09 -0500 Subject: [PATCH 58/95] Use separate ConfigOps --- db4-graph/src/lib.rs | 2 +- db4-graph/src/replay.rs | 7 +- db4-storage/src/pages/edge_store.rs | 9 +- db4-storage/src/pages/mod.rs | 44 ++---- db4-storage/src/pages/node_store.rs | 9 +- db4-storage/src/persist/config.rs | 138 ++++++++++++++++++ db4-storage/src/persist/mod.rs | 1 + db4-storage/src/persist/strategy.rs | 45 +----- db4-storage/src/segments/edge/segment.rs | 8 +- db4-storage/src/segments/node/segment.rs | 23 ++- .../src/mutation/addition_ops_ext.rs | 4 +- raphtory/src/db/api/storage/storage.rs | 6 +- 12 files changed, 194 insertions(+), 102 deletions(-) create mode 100644 db4-storage/src/persist/config.rs diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index 4de3f6a369..976fde4931 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -22,7 +22,7 @@ use storage::{ nodes::WriteLockedNodePages, }, }, - persist::strategy::{PersistenceStrategy}, + PersistenceStrategy, resolver::GIDResolverOps, transaction::TransactionManager, wal::WalOps, diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs index e9725bdf47..9564bd17d1 100644 --- a/db4-graph/src/replay.rs +++ b/db4-graph/src/replay.rs @@ -15,10 +15,11 @@ use storage::{ api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, error::StorageError, pages::resolve_pos, - persist::strategy::PersistenceStrategy, + PersistenceStrategy, resolver::GIDResolverOps, wal::{GraphReplay, TransactionID, LSN}, ES, GS, NS, + persist::config::ConfigOps, }; impl GraphReplay for WriteLockedGraph<'_, EXT> @@ -43,8 +44,8 @@ where props: Vec<(String, usize, Prop)>, ) -> Result<(), StorageError> { let temporal_graph = self.graph(); - let node_max_page_len = temporal_graph.extension().max_node_page_len(); - let edge_max_page_len = temporal_graph.extension().max_edge_page_len(); + let node_max_page_len = temporal_graph.extension().config().persistence().max_node_page_len; + let edge_max_page_len = temporal_graph.extension().config().persistence().max_edge_page_len; // 1. Insert prop ids into edge meta. // No need to validate props again since they are already validated before diff --git a/db4-storage/src/pages/edge_store.rs b/db4-storage/src/pages/edge_store.rs index 09557a54ca..4584e05a1d 100644 --- a/db4-storage/src/pages/edge_store.rs +++ b/db4-storage/src/pages/edge_store.rs @@ -9,7 +9,10 @@ use crate::{ locked::edges::{LockedEdgePage, WriteLockedEdgePages}, row_group_par_iter, }, - persist::strategy::PersistenceStrategy, + persist::{ + config::ConfigOps, + strategy::PersistenceStrategy, + }, segments::edge::segment::MemEdgeSegment, }; use parking_lot::{RwLock, RwLockWriteGuard}; @@ -223,7 +226,7 @@ impl, EXT: PersistenceStrategy> EdgeStorageI pub fn load(edges_path: impl AsRef, ext: EXT) -> Result { let edges_path = edges_path.as_ref(); - let max_page_len = ext.max_edge_page_len(); + let max_page_len = ext.config().persistence().max_edge_page_len; let meta = Arc::new(Meta::new_for_edges()); @@ -417,7 +420,7 @@ impl, EXT: PersistenceStrategy> EdgeStorageI #[inline(always)] pub fn max_page_len(&self) -> u32 { - self.ext.max_edge_page_len() + self.ext.config().persistence().max_edge_page_len } pub fn write_locked<'a>(&'a self) -> WriteLockedEdgePages<'a, ES> { diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index bb1528a8a5..d29e19c840 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -602,11 +602,13 @@ mod test { wal::no_wal::NoWal, Extension, Layer, }; use chrono::DateTime; + use crate::persist::config::NoOpConfig; use proptest::prelude::*; use raphtory_api::core::entities::properties::prop::Prop; use raphtory_core::{entities::VID, storage::timeindex::TimeIndexOps}; use rayon::iter::ParallelIterator; use std::sync::Arc; + use tempfile; #[test] fn test_iterleave() { @@ -637,11 +639,7 @@ mod test { .collect(); check_edges_support(edges, par_load, false, |graph_dir| { - use crate::persist::strategy::NoOpConfig; - let config = NoOpConfig { - max_node_page_len: chunk_size, - max_edge_page_len: chunk_size, - }; + let config = NoOpConfig::new(chunk_size, chunk_size); Layer::new(Some(graph_dir), Extension::new(config, Arc::new(NoWal))) }) } @@ -652,11 +650,7 @@ mod test { par_load: bool, ) { check_edges_support(edges, par_load, false, |graph_dir| { - use crate::persist::strategy::NoOpConfig; - let config = NoOpConfig { - max_node_page_len: chunk_size, - max_edge_page_len: chunk_size, - }; + let config = NoOpConfig::new(chunk_size, chunk_size); Layer::new(Some(graph_dir), Extension::new(config, Arc::new(NoWal))) }) } @@ -729,11 +723,7 @@ mod test { #[test] fn test_add_one_edge_get_num_nodes() { let graph_dir = tempfile::tempdir().unwrap(); - use crate::persist::strategy::NoOpConfig; - let config = NoOpConfig { - max_node_page_len: 32, - max_edge_page_len: 32, - }; + let config = NoOpConfig::new(32, 32); let g = Layer::new( Some(graph_dir.path()), Extension::new(config, Arc::new(NoWal)), @@ -745,11 +735,7 @@ mod test { #[test] fn test_node_additions_1() { let graph_dir = tempfile::tempdir().unwrap(); - use crate::persist::strategy::NoOpConfig; - let config = NoOpConfig { - max_node_page_len: 32, - max_edge_page_len: 32, - }; + let config = NoOpConfig::new(32, 32); let g = GraphStore::new( Some(graph_dir.path()), Extension::new(config, Arc::new(NoWal)), @@ -795,11 +781,7 @@ mod test { #[test] fn node_temporal_props() { let graph_dir = tempfile::tempdir().unwrap(); - use crate::persist::strategy::NoOpConfig; - let config = NoOpConfig { - max_node_page_len: 32, - max_edge_page_len: 32, - }; + let config = NoOpConfig::new(32, 32); let g = Layer::new( Some(graph_dir.path()), Extension::new(config, Arc::new(NoWal)), @@ -1606,22 +1588,14 @@ mod test { fn check_graph_with_nodes(node_page_len: u32, edge_page_len: u32, fixture: &NodeFixture) { check_graph_with_nodes_support(fixture, false, |path| { - use crate::persist::strategy::NoOpConfig; - let config = NoOpConfig { - max_node_page_len: node_page_len, - max_edge_page_len: edge_page_len, - }; + let config = NoOpConfig::new(node_page_len, edge_page_len); Layer::new(Some(path), Extension::new(config, Arc::new(NoWal))) }); } fn check_graph_with_props(node_page_len: u32, edge_page_len: u32, fixture: &Fixture) { check_graph_with_props_support(fixture, false, |path| { - use crate::persist::strategy::NoOpConfig; - let config = NoOpConfig { - max_node_page_len: node_page_len, - max_edge_page_len: edge_page_len, - }; + let config = NoOpConfig::new(node_page_len, edge_page_len); Layer::new(Some(path), Extension::new(config, Arc::new(NoWal))) }); } diff --git a/db4-storage/src/pages/node_store.rs b/db4-storage/src/pages/node_store.rs index 6e762a4523..83b35c5c7f 100644 --- a/db4-storage/src/pages/node_store.rs +++ b/db4-storage/src/pages/node_store.rs @@ -9,7 +9,10 @@ use crate::{ locked::nodes::{LockedNodePage, WriteLockedNodePages}, row_group_par_iter, }, - persist::strategy::PersistenceStrategy, + persist::{ + config::ConfigOps, + strategy::PersistenceStrategy, + }, segments::node::segment::MemNodeSegment, }; use parking_lot::{RwLock, RwLockWriteGuard}; @@ -161,7 +164,7 @@ impl NodeStorageInner { } pub fn max_segment_len(&self) -> u32 { - self.ext.max_node_page_len() + self.ext.config().persistence().max_node_page_len } } @@ -335,7 +338,7 @@ impl, EXT: PersistenceStrategy> NodeStorageI ext: EXT, ) -> Result { let nodes_path = nodes_path.as_ref(); - let max_page_len = ext.max_node_page_len(); + let max_page_len = ext.config().persistence().max_node_page_len; let node_meta = Arc::new(Meta::new_for_nodes()); if !nodes_path.exists() { diff --git a/db4-storage/src/persist/config.rs b/db4-storage/src/persist/config.rs new file mode 100644 index 0000000000..5e58ab02b4 --- /dev/null +++ b/db4-storage/src/persist/config.rs @@ -0,0 +1,138 @@ +use crate::error::StorageError; +use serde::{Deserialize, Serialize}; +use std::path::Path; + +pub const DEFAULT_MAX_PAGE_LEN_NODES: u32 = 131_072; // 2^17 +pub const DEFAULT_MAX_PAGE_LEN_EDGES: u32 = 1_048_576; // 2^20 +pub const DEFAULT_MAX_MEMORY_BYTES: usize = 32 * 1024 * 1024; + +pub trait ConfigOps: Serialize + Deserialize<'static> { + fn persistence(&self) -> &PersistenceConfig; + + fn load_from_dir(dir: impl AsRef) -> Result; + fn save_to_dir(&self, dir: impl AsRef) -> Result<(), StorageError>; +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PersistenceConfig { + pub max_node_page_len: u32, + pub max_edge_page_len: u32, + pub max_memory_bytes: usize, + pub bg_flush_enabled: bool, + pub node_types: Vec, +} + +impl Default for PersistenceConfig { + fn default() -> Self { + Self { + max_node_page_len: DEFAULT_MAX_PAGE_LEN_NODES, + max_edge_page_len: DEFAULT_MAX_PAGE_LEN_EDGES, + max_memory_bytes: DEFAULT_MAX_MEMORY_BYTES, + bg_flush_enabled: true, + node_types: Vec::new(), + } + } +} + +impl PersistenceConfig { + const CONFIG_FILE: &str = "persistence_config.json"; + + pub fn load_from_dir(dir: impl AsRef) -> Result { + let config_file = dir.as_ref().join(Self::CONFIG_FILE); + let config_file = std::fs::File::open(config_file)?; + let config = serde_json::from_reader(config_file)?; + Ok(config) + } + + pub fn save_to_dir(&self, dir: impl AsRef) -> Result<(), StorageError> { + let config_file = dir.as_ref().join(Self::CONFIG_FILE); + let config_file = std::fs::File::create(&config_file)?; + serde_json::to_writer_pretty(config_file, self)?; + Ok(()) + } + + pub fn new_with_memory(max_memory_bytes: usize) -> Self { + Self { + max_memory_bytes, + ..Default::default() + } + } + + pub fn new_with_page_lens( + max_memory_bytes: usize, + max_node_page_len: u32, + max_edge_page_len: u32, + ) -> Self { + Self { + max_memory_bytes, + max_node_page_len, + max_edge_page_len, + ..Default::default() + } + } + + pub fn with_bg_flush(mut self) -> Self { + self.bg_flush_enabled = true; + self + } + + pub fn node_types(&self) -> &[String] { + &self.node_types + } + + pub fn with_node_types(&self, types: impl IntoIterator>) -> Self { + let node_types = types.into_iter().map(|s| s.as_ref().to_string()).collect(); + + Self { + node_types, + ..*self + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct NoOpConfig { + persistence: PersistenceConfig, +} + +impl NoOpConfig { + pub fn new(max_node_page_len: u32, max_edge_page_len: u32) -> Self { + let persistence = PersistenceConfig { + max_node_page_len, + max_edge_page_len, + max_memory_bytes: usize::MAX, + bg_flush_enabled: false, + node_types: Vec::new(), + }; + + Self { persistence } + } +} + +impl Default for NoOpConfig { + fn default() -> Self { + let persistence = PersistenceConfig { + max_node_page_len: DEFAULT_MAX_PAGE_LEN_NODES, + max_edge_page_len: DEFAULT_MAX_PAGE_LEN_EDGES, + max_memory_bytes: usize::MAX, + bg_flush_enabled: false, + node_types: Vec::new(), + }; + + Self { persistence } + } +} + +impl ConfigOps for NoOpConfig { + fn persistence(&self) -> &PersistenceConfig { + &self.persistence + } + + fn load_from_dir(_dir: impl AsRef) -> Result { + Ok(Self::default()) + } + + fn save_to_dir(&self, _dir: impl AsRef) -> Result<(), StorageError> { + Ok(()) + } +} diff --git a/db4-storage/src/persist/mod.rs b/db4-storage/src/persist/mod.rs index 54eb972285..43275c62a7 100644 --- a/db4-storage/src/persist/mod.rs +++ b/db4-storage/src/persist/mod.rs @@ -1 +1,2 @@ +pub mod config; pub mod strategy; diff --git a/db4-storage/src/persist/strategy.rs b/db4-storage/src/persist/strategy.rs index 55cfaa040b..5256b2762a 100644 --- a/db4-storage/src/persist/strategy.rs +++ b/db4-storage/src/persist/strategy.rs @@ -1,5 +1,6 @@ use crate::{ api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, + persist::config::{ConfigOps, NoOpConfig}, segments::{ edge::segment::{EdgeSegmentView, MemEdgeSegment}, graph_prop::{GraphPropSegmentView, segment::MemGraphPropSegment}, @@ -7,12 +8,7 @@ use crate::{ }, wal::{WalOps, no_wal::NoWal}, }; -use crate::error::StorageError; -use serde::{Deserialize, Serialize}; -use std::{fmt::Debug, ops::DerefMut, path::Path, sync::Arc}; - -pub const DEFAULT_MAX_PAGE_LEN_NODES: u32 = 131_072; // 2^17 -pub const DEFAULT_MAX_PAGE_LEN_EDGES: u32 = 1_048_576; // 2^20 +use std::{fmt::Debug, ops::DerefMut, sync::Arc}; pub trait PersistenceStrategy: Debug + Clone + Send + Sync + 'static { type NS: NodeSegmentOps; @@ -52,49 +48,12 @@ pub trait PersistenceStrategy: Debug + Clone + Send + Sync + 'static { fn disk_storage_enabled() -> bool; } -pub trait ConfigOps: Serialize + Deserialize<'static> { - fn load_from_dir(dir: impl AsRef) -> Result; - - fn save_to_dir(&self, dir: impl AsRef) -> Result<(), StorageError>; -} - #[derive(Debug, Clone)] pub struct NoOpStrategy { config: NoOpConfig, wal: Arc, } -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct NoOpConfig { - pub max_node_page_len: u32, - pub max_edge_page_len: u32, -} - -impl NoOpConfig { - pub fn new(max_node_page_len: u32, max_edge_page_len: u32) -> Self { - Self { max_node_page_len, max_edge_page_len } - } -} - -impl Default for NoOpConfig { - fn default() -> Self { - Self { - max_node_page_len: DEFAULT_MAX_PAGE_LEN_NODES, - max_edge_page_len: DEFAULT_MAX_PAGE_LEN_EDGES, - } - } -} - -impl ConfigOps for NoOpConfig { - fn load_from_dir(_dir: impl AsRef) -> Result { - Ok(Self::default()) - } - - fn save_to_dir(&self, _dir: impl AsRef) -> Result<(), StorageError> { - Ok(()) - } -} - impl PersistenceStrategy for NoOpStrategy { type ES = EdgeSegmentView; type NS = NodeSegmentView; diff --git a/db4-storage/src/segments/edge/segment.rs b/db4-storage/src/segments/edge/segment.rs index f1e567da6e..beeef4ee3b 100644 --- a/db4-storage/src/segments/edge/segment.rs +++ b/db4-storage/src/segments/edge/segment.rs @@ -2,7 +2,10 @@ use crate::{ LocalPOS, api::edges::{EdgeSegmentOps, LockedESegment}, error::StorageError, - persist::strategy::PersistenceStrategy, + persist::{ + config::ConfigOps, + strategy::PersistenceStrategy, + }, properties::PropMutEntry, segments::{ HasRow, SegmentContainer, @@ -440,7 +443,8 @@ impl>> EdgeSegmentOps for EdgeSeg } fn new(page_id: usize, meta: Arc, _path: Option, ext: Self::Extension) -> Self { - let max_page_len = ext.max_edge_page_len(); + let max_page_len = ext.config().persistence().max_edge_page_len; + Self { segment: parking_lot::RwLock::new(MemEdgeSegment::new(page_id, max_page_len, meta)) .into(), diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs index 7d2b75a615..a54b96255c 100644 --- a/db4-storage/src/segments/node/segment.rs +++ b/db4-storage/src/segments/node/segment.rs @@ -4,7 +4,10 @@ use crate::{ error::StorageError, loop_lock_write, pages::node_store::increment_and_clamp, - persist::strategy::PersistenceStrategy, + persist::{ + config::ConfigOps, + strategy::PersistenceStrategy, + }, segments::{ HasRow, SegmentContainer, node::entry::{MemNodeEntry, MemNodeRef}, @@ -13,6 +16,7 @@ use crate::{ }; use either::Either; use parking_lot::lock_api::ArcRwLockReadGuard; +use parking_lot::RwLock; use raphtory_api::core::{ Direction, entities::{ @@ -439,7 +443,7 @@ impl>> NodeSegmentOps for NodeSeg } fn load( - _page_id: usize, + _segment_id: usize, _node_meta: Arc, _edge_meta: Arc, _path: impl AsRef, @@ -454,17 +458,19 @@ impl>> NodeSegmentOps for NodeSeg } fn new( - page_id: usize, + segment_id: usize, meta: Arc, _edge_meta: Arc, _path: Option, ext: Self::Extension, ) -> Self { - let max_page_len = ext.config().max_node_page_len; + let max_page_len = ext.config().persistence().max_node_page_len; + let inner = RwLock::new(MemNodeSegment::new(segment_id, max_page_len, meta)); + let inner = Arc::new(inner); + Self { - inner: parking_lot::RwLock::new(MemNodeSegment::new(page_id, max_page_len, meta)) - .into(), - segment_id: page_id, + inner, + segment_id, _ext: ext, max_num_node: AtomicU32::new(0), est_size: AtomicUsize::new(0), @@ -586,8 +592,9 @@ mod test { api::nodes::NodeSegmentOps, pages::{layer_counter::GraphStats, node_page::writer::NodeWriter}, persist::strategy::{ - NoOpConfig, NoOpStrategy, PersistenceStrategy, + NoOpStrategy, PersistenceStrategy, }, + persist::config::NoOpConfig, wal::no_wal::NoWal, }; use raphtory_api::core::entities::properties::{ diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index 43f29ee160..52eeffb18a 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -22,7 +22,7 @@ use raphtory_core::{ use storage::{ api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, pages::{node_page::writer::node_info_as_props, session::WriteSession}, - persist::strategy::PersistenceStrategy, + persist::{config::ConfigOps, strategy::PersistenceStrategy}, properties::props_meta_writer::PropsMetaWriter, resolver::GIDResolverOps, transaction::TransactionManager, @@ -220,7 +220,7 @@ impl InternalAdditionOps for TemporalGraph { self.event_counter .fetch_add(1, std::sync::atomic::Ordering::Relaxed), ); - pos.as_vid(seg, self.extension().max_node_page_len()) + pos.as_vid(seg, self.extension().config().persistence().max_node_page_len) })?; Ok(id) diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 01cf1fb1f0..7b0d5a55a1 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -40,12 +40,14 @@ use storage::{ transaction::TransactionManager, wal::{GraphWalOps, WalOps, LSN}, Wal, - ConfigOps, + persist::config::ConfigOps, }; // Re-export for raphtory dependencies to use when creating graphs. pub use storage::{ - Extension, Config, PersistenceStrategy + Extension, Config, + persist::strategy::PersistenceStrategy, + persist::config::PersistenceConfig }; #[cfg(feature = "search")] From 70b1d07f50f3f962eb9d23670fb648d6801bde64 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Thu, 22 Jan 2026 10:01:56 -0500 Subject: [PATCH 59/95] Use consistent re-exports --- db4-graph/src/lib.rs | 2 +- db4-graph/src/replay.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index 976fde4931..7a139639f8 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -22,7 +22,7 @@ use storage::{ nodes::WriteLockedNodePages, }, }, - PersistenceStrategy, + persist::strategy::PersistenceStrategy, resolver::GIDResolverOps, transaction::TransactionManager, wal::WalOps, diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs index 9564bd17d1..6d993d9332 100644 --- a/db4-graph/src/replay.rs +++ b/db4-graph/src/replay.rs @@ -15,7 +15,7 @@ use storage::{ api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, error::StorageError, pages::resolve_pos, - PersistenceStrategy, + persist::strategy::PersistenceStrategy, resolver::GIDResolverOps, wal::{GraphReplay, TransactionID, LSN}, ES, GS, NS, From 94ffe230a58b083851549a4bf415b447418a2cb1 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Thu, 22 Jan 2026 10:20:38 -0500 Subject: [PATCH 60/95] Remove MergeConfig --- db4-storage/src/pages/test_utils/checkers.rs | 3 +++ raphtory/src/db/api/view/graph.rs | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/db4-storage/src/pages/test_utils/checkers.rs b/db4-storage/src/pages/test_utils/checkers.rs index 5530ebec64..67e7dfc57e 100644 --- a/db4-storage/src/pages/test_utils/checkers.rs +++ b/db4-storage/src/pages/test_utils/checkers.rs @@ -36,6 +36,7 @@ pub fn make_graph_from_edges< make_graph: impl FnOnce(&Path) -> GraphStore, ) -> GraphStore { let graph = make_graph(graph_dir); + for (_, _, layer) in edges { if let Some(layer) = layer { for layer in 0..=*layer { @@ -49,6 +50,7 @@ pub fn make_graph_from_edges< } } } + if par_load { edges .par_iter() @@ -85,6 +87,7 @@ pub fn make_graph_from_edges< }) .expect("Failed to add edge"); } + graph } diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index 522f727da9..3e89422c83 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -59,7 +59,6 @@ use std::{ sync::{atomic::Ordering, Arc}, }; use storage::{ - persist::merge::MergeConfig, persist::strategy::PersistenceStrategy, wal::WalOps, Extension, Wal, }; From 7755d280362bdc888303a49f3f3956a06b104c66 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Thu, 22 Jan 2026 10:27:12 -0500 Subject: [PATCH 61/95] Remove silly comment --- db4-storage/src/pages/mod.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index d29e19c840..45fe447b73 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -72,10 +72,6 @@ impl< > GraphStore { pub fn flush(&self) -> Result<(), StorageError> { - // Config saving for WriteAndMerge is handled in db4-disk-storage's implementation - // This generic code in db4-storage doesn't have access to WriteAndMergeConfig types - // to avoid circular dependencies. For NoOpStrategy, config saving is not needed. - self.nodes.flush()?; self.edges.flush()?; self.graph_props.flush()?; From 7aa753bcc107dbf6151adfe316542350805038e4 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Thu, 22 Jan 2026 13:36:32 -0500 Subject: [PATCH 62/95] Use getter methods for PersistenceConfig --- db4-graph/src/replay.rs | 4 +- db4-storage/src/pages/edge_store.rs | 4 +- db4-storage/src/pages/node_store.rs | 4 +- db4-storage/src/persist/config.rs | 62 ++++++++++++------- db4-storage/src/segments/edge/segment.rs | 2 +- db4-storage/src/segments/node/segment.rs | 2 +- .../src/mutation/addition_ops_ext.rs | 2 +- 7 files changed, 48 insertions(+), 32 deletions(-) diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs index 6d993d9332..a9a9c086b3 100644 --- a/db4-graph/src/replay.rs +++ b/db4-graph/src/replay.rs @@ -44,8 +44,8 @@ where props: Vec<(String, usize, Prop)>, ) -> Result<(), StorageError> { let temporal_graph = self.graph(); - let node_max_page_len = temporal_graph.extension().config().persistence().max_node_page_len; - let edge_max_page_len = temporal_graph.extension().config().persistence().max_edge_page_len; + let node_max_page_len = temporal_graph.extension().config().persistence().max_node_page_len(); + let edge_max_page_len = temporal_graph.extension().config().persistence().max_edge_page_len(); // 1. Insert prop ids into edge meta. // No need to validate props again since they are already validated before diff --git a/db4-storage/src/pages/edge_store.rs b/db4-storage/src/pages/edge_store.rs index 4584e05a1d..4910aa9569 100644 --- a/db4-storage/src/pages/edge_store.rs +++ b/db4-storage/src/pages/edge_store.rs @@ -226,7 +226,7 @@ impl, EXT: PersistenceStrategy> EdgeStorageI pub fn load(edges_path: impl AsRef, ext: EXT) -> Result { let edges_path = edges_path.as_ref(); - let max_page_len = ext.config().persistence().max_edge_page_len; + let max_page_len = ext.config().persistence().max_edge_page_len(); let meta = Arc::new(Meta::new_for_edges()); @@ -420,7 +420,7 @@ impl, EXT: PersistenceStrategy> EdgeStorageI #[inline(always)] pub fn max_page_len(&self) -> u32 { - self.ext.config().persistence().max_edge_page_len + self.ext.config().persistence().max_edge_page_len() } pub fn write_locked<'a>(&'a self) -> WriteLockedEdgePages<'a, ES> { diff --git a/db4-storage/src/pages/node_store.rs b/db4-storage/src/pages/node_store.rs index 83b35c5c7f..7864322afc 100644 --- a/db4-storage/src/pages/node_store.rs +++ b/db4-storage/src/pages/node_store.rs @@ -164,7 +164,7 @@ impl NodeStorageInner { } pub fn max_segment_len(&self) -> u32 { - self.ext.config().persistence().max_node_page_len + self.ext.config().persistence().max_node_page_len() } } @@ -338,7 +338,7 @@ impl, EXT: PersistenceStrategy> NodeStorageI ext: EXT, ) -> Result { let nodes_path = nodes_path.as_ref(); - let max_page_len = ext.config().persistence().max_node_page_len; + let max_page_len = ext.config().persistence().max_node_page_len(); let node_meta = Arc::new(Meta::new_for_nodes()); if !nodes_path.exists() { diff --git a/db4-storage/src/persist/config.rs b/db4-storage/src/persist/config.rs index 5e58ab02b4..af83dc2cdb 100644 --- a/db4-storage/src/persist/config.rs +++ b/db4-storage/src/persist/config.rs @@ -15,11 +15,11 @@ pub trait ConfigOps: Serialize + Deserialize<'static> { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PersistenceConfig { - pub max_node_page_len: u32, - pub max_edge_page_len: u32, - pub max_memory_bytes: usize, - pub bg_flush_enabled: bool, - pub node_types: Vec, + max_node_page_len: u32, + max_edge_page_len: u32, + max_memory_bytes: usize, + bg_flush_enabled: bool, + node_types: Vec, } impl Default for PersistenceConfig { @@ -37,20 +37,6 @@ impl Default for PersistenceConfig { impl PersistenceConfig { const CONFIG_FILE: &str = "persistence_config.json"; - pub fn load_from_dir(dir: impl AsRef) -> Result { - let config_file = dir.as_ref().join(Self::CONFIG_FILE); - let config_file = std::fs::File::open(config_file)?; - let config = serde_json::from_reader(config_file)?; - Ok(config) - } - - pub fn save_to_dir(&self, dir: impl AsRef) -> Result<(), StorageError> { - let config_file = dir.as_ref().join(Self::CONFIG_FILE); - let config_file = std::fs::File::create(&config_file)?; - serde_json::to_writer_pretty(config_file, self)?; - Ok(()) - } - pub fn new_with_memory(max_memory_bytes: usize) -> Self { Self { max_memory_bytes, @@ -71,15 +57,25 @@ impl PersistenceConfig { } } + pub fn load_from_dir(dir: impl AsRef) -> Result { + let config_file = dir.as_ref().join(Self::CONFIG_FILE); + let config_file = std::fs::File::open(config_file)?; + let config = serde_json::from_reader(config_file)?; + Ok(config) + } + + pub fn save_to_dir(&self, dir: impl AsRef) -> Result<(), StorageError> { + let config_file = dir.as_ref().join(Self::CONFIG_FILE); + let config_file = std::fs::File::create(&config_file)?; + serde_json::to_writer_pretty(config_file, self)?; + Ok(()) + } + pub fn with_bg_flush(mut self) -> Self { self.bg_flush_enabled = true; self } - pub fn node_types(&self) -> &[String] { - &self.node_types - } - pub fn with_node_types(&self, types: impl IntoIterator>) -> Self { let node_types = types.into_iter().map(|s| s.as_ref().to_string()).collect(); @@ -88,6 +84,26 @@ impl PersistenceConfig { ..*self } } + + pub fn max_node_page_len(&self) -> u32 { + self.max_node_page_len + } + + pub fn max_edge_page_len(&self) -> u32 { + self.max_edge_page_len + } + + pub fn max_memory_bytes(&self) -> usize { + self.max_memory_bytes + } + + pub fn bg_flush_enabled(&self) -> bool { + self.bg_flush_enabled + } + + pub fn node_types(&self) -> &[String] { + &self.node_types + } } #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/db4-storage/src/segments/edge/segment.rs b/db4-storage/src/segments/edge/segment.rs index beeef4ee3b..1ac6c279b1 100644 --- a/db4-storage/src/segments/edge/segment.rs +++ b/db4-storage/src/segments/edge/segment.rs @@ -443,7 +443,7 @@ impl>> EdgeSegmentOps for EdgeSeg } fn new(page_id: usize, meta: Arc, _path: Option, ext: Self::Extension) -> Self { - let max_page_len = ext.config().persistence().max_edge_page_len; + let max_page_len = ext.config().persistence().max_edge_page_len(); Self { segment: parking_lot::RwLock::new(MemEdgeSegment::new(page_id, max_page_len, meta)) diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs index a54b96255c..66de03b431 100644 --- a/db4-storage/src/segments/node/segment.rs +++ b/db4-storage/src/segments/node/segment.rs @@ -464,7 +464,7 @@ impl>> NodeSegmentOps for NodeSeg _path: Option, ext: Self::Extension, ) -> Self { - let max_page_len = ext.config().persistence().max_node_page_len; + let max_page_len = ext.config().persistence().max_node_page_len(); let inner = RwLock::new(MemNodeSegment::new(segment_id, max_page_len, meta)); let inner = Arc::new(inner); diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index 52eeffb18a..aab8c47fa2 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -220,7 +220,7 @@ impl InternalAdditionOps for TemporalGraph { self.event_counter .fetch_add(1, std::sync::atomic::Ordering::Relaxed), ); - pos.as_vid(seg, self.extension().config().persistence().max_node_page_len) + pos.as_vid(seg, self.extension().config().persistence().max_node_page_len()) })?; Ok(id) From 7abc7c6d72d5324a31a71f2fa625b7eecbe50421 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Thu, 22 Jan 2026 20:02:03 -0500 Subject: [PATCH 63/95] Run fmt --- db4-graph/src/lib.rs | 2 +- db4-graph/src/replay.rs | 15 +++++++++++---- db4-storage/src/pages/edge_store.rs | 5 +---- db4-storage/src/pages/mod.rs | 14 ++++++++------ db4-storage/src/pages/node_store.rs | 5 +---- db4-storage/src/persist/strategy.rs | 5 +---- db4-storage/src/segments/edge/segment.rs | 8 ++------ db4-storage/src/segments/node/segment.rs | 14 +++++--------- raphtory-storage/src/mutation/addition_ops_ext.rs | 5 ++++- raphtory/src/db/api/storage/storage.rs | 10 ++++------ raphtory/src/db/api/view/graph.rs | 4 +--- raphtory/src/db/graph/graph.rs | 5 +---- 12 files changed, 40 insertions(+), 52 deletions(-) diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index 7a139639f8..f56fc812f5 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -26,7 +26,7 @@ use storage::{ resolver::GIDResolverOps, transaction::TransactionManager, wal::WalOps, - Extension, GIDResolver, Layer, ReadLockedLayer, Wal, ES, GS, NS, Config, + Config, Extension, GIDResolver, Layer, ReadLockedLayer, Wal, ES, GS, NS, }; use tempfile::TempDir; diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs index a9a9c086b3..93088f759f 100644 --- a/db4-graph/src/replay.rs +++ b/db4-graph/src/replay.rs @@ -15,11 +15,10 @@ use storage::{ api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, error::StorageError, pages::resolve_pos, - persist::strategy::PersistenceStrategy, + persist::{config::ConfigOps, strategy::PersistenceStrategy}, resolver::GIDResolverOps, wal::{GraphReplay, TransactionID, LSN}, ES, GS, NS, - persist::config::ConfigOps, }; impl GraphReplay for WriteLockedGraph<'_, EXT> @@ -44,8 +43,16 @@ where props: Vec<(String, usize, Prop)>, ) -> Result<(), StorageError> { let temporal_graph = self.graph(); - let node_max_page_len = temporal_graph.extension().config().persistence().max_node_page_len(); - let edge_max_page_len = temporal_graph.extension().config().persistence().max_edge_page_len(); + let node_max_page_len = temporal_graph + .extension() + .config() + .persistence() + .max_node_page_len(); + let edge_max_page_len = temporal_graph + .extension() + .config() + .persistence() + .max_edge_page_len(); // 1. Insert prop ids into edge meta. // No need to validate props again since they are already validated before diff --git a/db4-storage/src/pages/edge_store.rs b/db4-storage/src/pages/edge_store.rs index 4910aa9569..19c95e6d0f 100644 --- a/db4-storage/src/pages/edge_store.rs +++ b/db4-storage/src/pages/edge_store.rs @@ -9,10 +9,7 @@ use crate::{ locked::edges::{LockedEdgePage, WriteLockedEdgePages}, row_group_par_iter, }, - persist::{ - config::ConfigOps, - strategy::PersistenceStrategy, - }, + persist::{config::ConfigOps, strategy::PersistenceStrategy}, segments::edge::segment::MemEdgeSegment, }; use parking_lot::{RwLock, RwLockWriteGuard}; diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index 45fe447b73..a3afcd79b1 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -590,15 +590,17 @@ pub fn row_group_par_iter>( mod test { use super::GraphStore; use crate::{ - api::nodes::{NodeEntryOps, NodeRefOps}, pages::test_utils::{ - check_edges_support, check_graph_with_nodes_support, check_graph_with_props_support, - edges_strat, edges_strat_with_layers, make_edges, make_nodes, AddEdge, Fixture, NodeFixture + Extension, Layer, + api::nodes::{NodeEntryOps, NodeRefOps}, + pages::test_utils::{ + AddEdge, Fixture, NodeFixture, check_edges_support, check_graph_with_nodes_support, + check_graph_with_props_support, edges_strat, edges_strat_with_layers, make_edges, + make_nodes, }, - persist::strategy::{PersistenceStrategy}, - wal::no_wal::NoWal, Extension, Layer, + persist::{config::NoOpConfig, strategy::PersistenceStrategy}, + wal::no_wal::NoWal, }; use chrono::DateTime; - use crate::persist::config::NoOpConfig; use proptest::prelude::*; use raphtory_api::core::entities::properties::prop::Prop; use raphtory_core::{entities::VID, storage::timeindex::TimeIndexOps}; diff --git a/db4-storage/src/pages/node_store.rs b/db4-storage/src/pages/node_store.rs index 7864322afc..62acae85e4 100644 --- a/db4-storage/src/pages/node_store.rs +++ b/db4-storage/src/pages/node_store.rs @@ -9,10 +9,7 @@ use crate::{ locked::nodes::{LockedNodePage, WriteLockedNodePages}, row_group_par_iter, }, - persist::{ - config::ConfigOps, - strategy::PersistenceStrategy, - }, + persist::{config::ConfigOps, strategy::PersistenceStrategy}, segments::node::segment::MemNodeSegment, }; use parking_lot::{RwLock, RwLockWriteGuard}; diff --git a/db4-storage/src/persist/strategy.rs b/db4-storage/src/persist/strategy.rs index 5256b2762a..46866ff017 100644 --- a/db4-storage/src/persist/strategy.rs +++ b/db4-storage/src/persist/strategy.rs @@ -62,10 +62,7 @@ impl PersistenceStrategy for NoOpStrategy { type Config = NoOpConfig; fn new(config: Self::Config, wal: Arc) -> Self { - Self { - config, - wal, - } + Self { config, wal } } fn config(&self) -> &Self::Config { diff --git a/db4-storage/src/segments/edge/segment.rs b/db4-storage/src/segments/edge/segment.rs index 1ac6c279b1..7de1eeffa6 100644 --- a/db4-storage/src/segments/edge/segment.rs +++ b/db4-storage/src/segments/edge/segment.rs @@ -2,10 +2,7 @@ use crate::{ LocalPOS, api::edges::{EdgeSegmentOps, LockedESegment}, error::StorageError, - persist::{ - config::ConfigOps, - strategy::PersistenceStrategy, - }, + persist::{config::ConfigOps, strategy::PersistenceStrategy}, properties::PropMutEntry, segments::{ HasRow, SegmentContainer, @@ -562,8 +559,7 @@ impl>> EdgeSegmentOps for EdgeSeg #[cfg(test)] mod test { use super::*; - use raphtory_api::core::entities::properties::meta::Meta; - use raphtory_api::core::entities::properties::prop::PropType; + use raphtory_api::core::entities::properties::{meta::Meta, prop::PropType}; use raphtory_core::storage::timeindex::TimeIndexEntry; fn create_test_segment() -> MemEdgeSegment { diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs index 66de03b431..3f225eba22 100644 --- a/db4-storage/src/segments/node/segment.rs +++ b/db4-storage/src/segments/node/segment.rs @@ -4,10 +4,7 @@ use crate::{ error::StorageError, loop_lock_write, pages::node_store::increment_and_clamp, - persist::{ - config::ConfigOps, - strategy::PersistenceStrategy, - }, + persist::{config::ConfigOps, strategy::PersistenceStrategy}, segments::{ HasRow, SegmentContainer, node::entry::{MemNodeEntry, MemNodeRef}, @@ -15,8 +12,7 @@ use crate::{ wal::LSN, }; use either::Either; -use parking_lot::lock_api::ArcRwLockReadGuard; -use parking_lot::RwLock; +use parking_lot::{RwLock, lock_api::ArcRwLockReadGuard}; use raphtory_api::core::{ Direction, entities::{ @@ -591,10 +587,10 @@ mod test { LocalPOS, NodeSegmentView, api::nodes::NodeSegmentOps, pages::{layer_counter::GraphStats, node_page::writer::NodeWriter}, - persist::strategy::{ - NoOpStrategy, PersistenceStrategy, + persist::{ + config::NoOpConfig, + strategy::{NoOpStrategy, PersistenceStrategy}, }, - persist::config::NoOpConfig, wal::no_wal::NoWal, }; use raphtory_api::core::entities::properties::{ diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index aab8c47fa2..d9c8d52796 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -220,7 +220,10 @@ impl InternalAdditionOps for TemporalGraph { self.event_counter .fetch_add(1, std::sync::atomic::Ordering::Relaxed), ); - pos.as_vid(seg, self.extension().config().persistence().max_node_page_len()) + pos.as_vid( + seg, + self.extension().config().persistence().max_node_page_len(), + ) })?; Ok(id) diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 7b0d5a55a1..6a1265a367 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -37,17 +37,16 @@ use std::{ sync::Arc, }; use storage::{ + persist::config::ConfigOps, transaction::TransactionManager, wal::{GraphWalOps, WalOps, LSN}, Wal, - persist::config::ConfigOps, }; // Re-export for raphtory dependencies to use when creating graphs. pub use storage::{ - Extension, Config, - persist::strategy::PersistenceStrategy, - persist::config::PersistenceConfig + persist::{config::PersistenceConfig, strategy::PersistenceStrategy}, + Config, Extension, }; #[cfg(feature = "search")] @@ -145,8 +144,7 @@ impl Storage { } pub(crate) fn load_from(path: impl AsRef) -> Result { - let config = Config::load_from_dir(path.as_ref()) - .unwrap_or_else(|_| Config::default()); + let config = Config::load_from_dir(path.as_ref()).unwrap_or_else(|_| Config::default()); let graph_dir = GraphDir::from(path.as_ref()); let wal_dir = graph_dir.wal_dir(); let wal = Arc::new(Wal::load(Some(wal_dir.as_path()))?); diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index 3e89422c83..38a24f0f46 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -58,9 +58,7 @@ use std::{ path::Path, sync::{atomic::Ordering, Arc}, }; -use storage::{ - persist::strategy::PersistenceStrategy, wal::WalOps, Extension, Wal, -}; +use storage::{persist::strategy::PersistenceStrategy, wal::WalOps, Extension, Wal}; #[cfg(feature = "search")] use crate::{ diff --git a/raphtory/src/db/graph/graph.rs b/raphtory/src/db/graph/graph.rs index d4b9b27950..e98d1bb3ec 100644 --- a/raphtory/src/db/graph/graph.rs +++ b/raphtory/src/db/graph/graph.rs @@ -48,10 +48,7 @@ use std::{ ops::Deref, sync::Arc, }; -use storage::{ - persist::strategy::PersistenceStrategy, - Extension, Config, -}; +use storage::{persist::strategy::PersistenceStrategy, Config, Extension}; #[repr(transparent)] #[derive(Debug, Clone, Default)] From 8153f573f55ccacb896af6d0a59b47131f8755c5 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Fri, 23 Jan 2026 15:21:48 -0500 Subject: [PATCH 64/95] Fix up defaults --- db4-storage/src/persist/config.rs | 33 +++++++++++++------------------ 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/db4-storage/src/persist/config.rs b/db4-storage/src/persist/config.rs index af83dc2cdb..6f94dc0de8 100644 --- a/db4-storage/src/persist/config.rs +++ b/db4-storage/src/persist/config.rs @@ -28,7 +28,7 @@ impl Default for PersistenceConfig { max_node_page_len: DEFAULT_MAX_PAGE_LEN_NODES, max_edge_page_len: DEFAULT_MAX_PAGE_LEN_EDGES, max_memory_bytes: DEFAULT_MAX_MEMORY_BYTES, - bg_flush_enabled: true, + bg_flush_enabled: false, node_types: Vec::new(), } } @@ -113,29 +113,24 @@ pub struct NoOpConfig { impl NoOpConfig { pub fn new(max_node_page_len: u32, max_edge_page_len: u32) -> Self { - let persistence = PersistenceConfig { - max_node_page_len, - max_edge_page_len, - max_memory_bytes: usize::MAX, - bg_flush_enabled: false, - node_types: Vec::new(), - }; - - Self { persistence } + Self { + persistence: PersistenceConfig { + max_node_page_len, + max_edge_page_len, + ..NoOpConfig::default().persistence + } + } } } impl Default for NoOpConfig { fn default() -> Self { - let persistence = PersistenceConfig { - max_node_page_len: DEFAULT_MAX_PAGE_LEN_NODES, - max_edge_page_len: DEFAULT_MAX_PAGE_LEN_EDGES, - max_memory_bytes: usize::MAX, - bg_flush_enabled: false, - node_types: Vec::new(), - }; - - Self { persistence } + Self { + persistence: PersistenceConfig { + max_memory_bytes: usize::MAX, + ..PersistenceConfig::default() + }, + } } } From a3040659b6d36513e85360f91f303fd85f00164b Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Fri, 23 Jan 2026 17:22:15 -0500 Subject: [PATCH 65/95] Run fmt --- db4-storage/src/persist/config.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db4-storage/src/persist/config.rs b/db4-storage/src/persist/config.rs index 6f94dc0de8..003a6fd6f7 100644 --- a/db4-storage/src/persist/config.rs +++ b/db4-storage/src/persist/config.rs @@ -118,7 +118,7 @@ impl NoOpConfig { max_node_page_len, max_edge_page_len, ..NoOpConfig::default().persistence - } + }, } } } From eb13fe244cc86a26b301a22760db6323d9606a72 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Mon, 26 Jan 2026 10:45:47 -0500 Subject: [PATCH 66/95] Rename bg_flush_enabled to bg_flush --- db4-storage/src/pages/node_store.rs | 1 + db4-storage/src/persist/config.rs | 10 +++++----- raphtory/src/db/api/mutation/addition_ops.rs | 1 - raphtory/src/db/api/view/graph.rs | 1 - raphtory/src/db/graph/graph.rs | 7 ++++++- raphtory/src/db/graph/nodes.rs | 1 + 6 files changed, 13 insertions(+), 8 deletions(-) diff --git a/db4-storage/src/pages/node_store.rs b/db4-storage/src/pages/node_store.rs index 62acae85e4..9cdf85afaa 100644 --- a/db4-storage/src/pages/node_store.rs +++ b/db4-storage/src/pages/node_store.rs @@ -241,6 +241,7 @@ impl, EXT: PersistenceStrategy> NodeStorageI let lock_slot = self.free_segments[slot_idx].read_recursive(); let page_id = *lock_slot; let page = self.segments.get(page_id); + page.and_then(|page| { self.reserve_segment_row(page) .map(|pos| (page.segment_id(), LocalPOS(pos))) diff --git a/db4-storage/src/persist/config.rs b/db4-storage/src/persist/config.rs index 003a6fd6f7..8babe4d7af 100644 --- a/db4-storage/src/persist/config.rs +++ b/db4-storage/src/persist/config.rs @@ -18,7 +18,7 @@ pub struct PersistenceConfig { max_node_page_len: u32, max_edge_page_len: u32, max_memory_bytes: usize, - bg_flush_enabled: bool, + bg_flush: bool, node_types: Vec, } @@ -28,7 +28,7 @@ impl Default for PersistenceConfig { max_node_page_len: DEFAULT_MAX_PAGE_LEN_NODES, max_edge_page_len: DEFAULT_MAX_PAGE_LEN_EDGES, max_memory_bytes: DEFAULT_MAX_MEMORY_BYTES, - bg_flush_enabled: false, + bg_flush: false, node_types: Vec::new(), } } @@ -72,7 +72,7 @@ impl PersistenceConfig { } pub fn with_bg_flush(mut self) -> Self { - self.bg_flush_enabled = true; + self.bg_flush = true; self } @@ -97,8 +97,8 @@ impl PersistenceConfig { self.max_memory_bytes } - pub fn bg_flush_enabled(&self) -> bool { - self.bg_flush_enabled + pub fn bg_flush(&self) -> bool { + self.bg_flush } pub fn node_types(&self) -> &[String] { diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index 020dda5a0d..16245123b5 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -303,7 +303,6 @@ impl> + StaticGraphViewOps + Dura let src_id = src_id.inner(); let dst_id = dst_id.inner(); - let layer_id = layer_id.inner(); // Hold all locks for src node, dst node and edge until add_edge_op goes out of scope. diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index 38a24f0f46..b1235e25c0 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -324,7 +324,6 @@ fn materialize_impl( .set_event_id(storage.read_event_id()); let temporal_graph = Arc::new(temporal_graph); - let graph_storage = GraphStorage::from(temporal_graph.clone()); { diff --git a/raphtory/src/db/graph/graph.rs b/raphtory/src/db/graph/graph.rs index e98d1bb3ec..b71b24dc79 100644 --- a/raphtory/src/db/graph/graph.rs +++ b/raphtory/src/db/graph/graph.rs @@ -429,14 +429,17 @@ pub fn assert_nodes_equal_layer< persistent: bool, ) { let mut nodes1: Vec<_> = nodes1.collect(); - nodes1.sort(); let mut nodes2: Vec<_> = nodes2.collect(); + + nodes1.sort(); nodes2.sort(); + assert_eq!( nodes1.len(), nodes2.len(), "mismatched number of nodes{layer_tag}", ); + for (n1, n2) in nodes1.into_iter().zip(nodes2) { assert_node_equal_layer(n1, n2, layer_tag, persistent); } @@ -623,8 +626,10 @@ fn assert_graph_equal_inner<'graph, G1: GraphViewOps<'graph>, G2: GraphViewOps<' ) { black_box({ assert_graph_equal_layer(g1, g2, None, persistent); + let left_layers: HashSet<_> = g1.unique_layers().collect(); let right_layers: HashSet<_> = g2.unique_layers().collect(); + assert_eq!( left_layers, right_layers, "mismatched layers: left {:?}, right {:?}", diff --git a/raphtory/src/db/graph/nodes.rs b/raphtory/src/db/graph/nodes.rs index 7be0cb75a2..6dc67a31d5 100644 --- a/raphtory/src/db/graph/nodes.rs +++ b/raphtory/src/db/graph/nodes.rs @@ -200,6 +200,7 @@ where fn iter_vids(&self, g: GraphStorage) -> impl Iterator + Send + Sync + 'graph { let node_types_filter = self.node_types_filter.clone(); let view = self.graph.clone(); + self.node_list().nodes_iter(&g).filter(move |&vid| { g.try_core_node(vid).is_some_and(|node| { node_types_filter From 3269c40478b0930c4e36e2325aacacef169c2ead Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Mon, 26 Jan 2026 19:33:29 -0500 Subject: [PATCH 67/95] Fix node_types --- db4-storage/src/pages/mod.rs | 14 +++----------- db4-storage/src/segments/node/segment.rs | 4 ---- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index eeea5b17a2..31e74ef6ad 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -1,11 +1,5 @@ use crate::{ - LocalPOS, - api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, - error::StorageError, - pages::{edge_store::ReadLockedEdgeStorage, node_store::ReadLockedNodeStorage}, - persist::strategy::PersistenceStrategy, - properties::props_meta_writer::PropsMetaWriter, - segments::{edge::segment::MemEdgeSegment, node::segment::MemNodeSegment}, + api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, error::StorageError, pages::{edge_store::ReadLockedEdgeStorage, node_store::ReadLockedNodeStorage}, persist::{config::ConfigOps, strategy::PersistenceStrategy}, properties::props_meta_writer::PropsMetaWriter, segments::{edge::segment::MemEdgeSegment, node::segment::MemNodeSegment}, LocalPOS }; use edge_page::writer::EdgeWriter; use edge_store::EdgeStorageInner; @@ -171,10 +165,8 @@ impl< let graph_prop_storage = Arc::new(GraphPropStorageInner::load(graph_props_path, ext.clone())?); - // Node types handling for WriteAndMerge is done in db4-disk-storage - // For NoOpStrategy, disk_storage_enabled() returns false, so this is skipped - if EXT::disk_storage_enabled() { - // Node types will be handled by db4-disk-storage's implementation + for node_type in ext.config().persistence().node_types().iter() { + node_meta.get_or_create_node_type_id(node_type); } let t_len = edge_storage.t_len(); diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs index 3f225eba22..91ff75867b 100644 --- a/db4-storage/src/segments/node/segment.rs +++ b/db4-storage/src/segments/node/segment.rs @@ -575,10 +575,6 @@ impl>> NodeSegmentOps for NodeSeg fn nodes_counter(&self) -> &AtomicU32 { &self.max_num_node } - - fn increment_num_nodes(&self, max_page_len: u32) { - increment_and_clamp(self.nodes_counter(), max_page_len); - } } #[cfg(test)] From b869ea6e5edd66f88cff966a107dfa23307be13f Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Mon, 26 Jan 2026 21:14:55 -0500 Subject: [PATCH 68/95] Rename check_node to has_node --- db4-storage/src/api/nodes.rs | 2 +- db4-storage/src/pages/node_page/writer.rs | 12 ++++++++---- db4-storage/src/segments/node/segment.rs | 2 +- raphtory/src/io/arrow/df_loaders/nodes.rs | 1 + 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/db4-storage/src/api/nodes.rs b/db4-storage/src/api/nodes.rs index f7e01e533d..1fdcf5416f 100644 --- a/db4-storage/src/api/nodes.rs +++ b/db4-storage/src/api/nodes.rs @@ -90,7 +90,7 @@ pub trait NodeSegmentOps: Send + Sync + Debug + 'static { fn set_dirty(&self, dirty: bool); - fn check_node(&self, pos: LocalPOS, layer_id: usize) -> bool; + fn has_node(&self, pos: LocalPOS, layer_id: usize) -> bool; fn get_out_edge( &self, diff --git a/db4-storage/src/pages/node_page/writer.rs b/db4-storage/src/pages/node_page/writer.rs index 3f2821f85c..a7be173916 100644 --- a/db4-storage/src/pages/node_page/writer.rs +++ b/db4-storage/src/pages/node_page/writer.rs @@ -69,7 +69,7 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri let (is_new_node, add) = self.mut_segment.add_outbound_edge(t, src_pos, dst, e_id); self.page.increment_est_size(add); - if is_new_node && !self.page.check_node(src_pos, layer_id) { + if is_new_node && !self.page.has_node(src_pos, layer_id) { self.l_counter.increment(layer_id); } } @@ -112,7 +112,7 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri self.page.increment_est_size(add); - if is_new_node && !self.page.check_node(dst_pos, layer) { + if is_new_node && !self.page.has_node(dst_pos, layer) { self.l_counter.increment(layer); } } @@ -127,7 +127,7 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri self.l_counter.update_time(t.t()); let (is_new_node, add) = self.mut_segment.add_props(t, pos, layer_id, props); self.page.increment_est_size(add); - if is_new_node && !self.page.check_node(pos, layer_id) { + if is_new_node && !self.page.has_node(pos, layer_id) { self.l_counter.increment(layer_id); } } @@ -149,7 +149,7 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri ) { let (is_new_node, add) = self.mut_segment.update_metadata(pos, layer_id, props); self.page.increment_est_size(add); - if is_new_node && !self.page.check_node(pos, layer_id) { + if is_new_node && !self.page.has_node(pos, layer_id) { self.l_counter.increment(layer_id); } } @@ -203,6 +203,10 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri self.page .increment_num_nodes(self.mut_segment.max_page_len()); } + + pub fn has_node(&self, node: LocalPOS, layer_id: usize) -> bool { + self.mut_segment.has_node(node, layer_id) + } } pub fn node_info_as_props( diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs index 91ff75867b..69f7fa89bc 100644 --- a/db4-storage/src/segments/node/segment.rs +++ b/db4-storage/src/segments/node/segment.rs @@ -506,7 +506,7 @@ impl>> NodeSegmentOps for NodeSeg fn set_dirty(&self, _dirty: bool) {} - fn check_node(&self, _pos: LocalPOS, _layer_id: usize) -> bool { + fn has_node(&self, _pos: LocalPOS, _layer_id: usize) -> bool { false } diff --git a/raphtory/src/io/arrow/df_loaders/nodes.rs b/raphtory/src/io/arrow/df_loaders/nodes.rs index 8597fd959b..7769da529f 100644 --- a/raphtory/src/io/arrow/df_loaders/nodes.rs +++ b/raphtory/src/io/arrow/df_loaders/nodes.rs @@ -277,6 +277,7 @@ pub fn load_node_props_from_df< c_props.clear(); c_props.extend(metadata_cols.iter_row(idx)); c_props.extend_from_slice(&shared_metadata); + if !c_props.is_empty() { writer.update_c_props(mut_node, STATIC_GRAPH_LAYER_ID, c_props.drain(..)); } From acda4a85306f05c82151412664c035085b9dae45 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Mon, 26 Jan 2026 21:34:31 -0500 Subject: [PATCH 69/95] Increment num_nodes for segment in wal replay --- db4-graph/src/replay.rs | 12 ++++++++++++ db4-storage/src/pages/node_page/writer.rs | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs index 93088f759f..366b6d071a 100644 --- a/db4-graph/src/replay.rs +++ b/db4-graph/src/replay.rs @@ -100,6 +100,12 @@ where // Replay this entry only if it doesn't exist in immut. if immut_lsn < lsn { let mut src_writer = self.nodes.get_mut(src_segment_id).unwrap().writer(); + + // Increment the node counter for this segment if this is a new node. + if !src_writer.has_node(src_pos, STATIC_GRAPH_LAYER_ID) { + src_writer.increment_seg_num_nodes(); + } + src_writer.store_node_id(src_pos, STATIC_GRAPH_LAYER_ID, src_name.into()); let is_new_edge_static = src_writer @@ -140,6 +146,12 @@ where // Replay this entry only if it doesn't exist in immut. if immut_lsn < lsn { let mut dst_writer = self.nodes.get_mut(dst_segment_id).unwrap().writer(); + + // Increment the node counter for this segment if this is a new node. + if !dst_writer.has_node(dst_pos, STATIC_GRAPH_LAYER_ID) { + dst_writer.increment_seg_num_nodes(); + } + dst_writer.store_node_id(dst_pos, STATIC_GRAPH_LAYER_ID, dst_name.into()); let is_new_edge_static = dst_writer diff --git a/db4-storage/src/pages/node_page/writer.rs b/db4-storage/src/pages/node_page/writer.rs index a7be173916..3656ce5d8e 100644 --- a/db4-storage/src/pages/node_page/writer.rs +++ b/db4-storage/src/pages/node_page/writer.rs @@ -205,7 +205,7 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri } pub fn has_node(&self, node: LocalPOS, layer_id: usize) -> bool { - self.mut_segment.has_node(node, layer_id) + self.mut_segment.has_node(node, layer_id) || self.page.has_node(node, layer_id) } } From cf65020a09133f1727c6d7d27c77d43306b912f9 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Mon, 26 Jan 2026 21:35:06 -0500 Subject: [PATCH 70/95] Run fmt --- db4-storage/src/pages/mod.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index 31e74ef6ad..d81cc0dcad 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -1,5 +1,11 @@ use crate::{ - api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, error::StorageError, pages::{edge_store::ReadLockedEdgeStorage, node_store::ReadLockedNodeStorage}, persist::{config::ConfigOps, strategy::PersistenceStrategy}, properties::props_meta_writer::PropsMetaWriter, segments::{edge::segment::MemEdgeSegment, node::segment::MemNodeSegment}, LocalPOS + LocalPOS, + api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, + error::StorageError, + pages::{edge_store::ReadLockedEdgeStorage, node_store::ReadLockedNodeStorage}, + persist::{config::ConfigOps, strategy::PersistenceStrategy}, + properties::props_meta_writer::PropsMetaWriter, + segments::{edge::segment::MemEdgeSegment, node::segment::MemNodeSegment}, }; use edge_page::writer::EdgeWriter; use edge_store::EdgeStorageInner; From 4bf60cff4bebcb5881e117e1b568205232671a9b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 27 Jan 2026 03:07:46 +0000 Subject: [PATCH 71/95] chore: apply tidy-public auto-fixes --- docs/reference/graphql/graphql_API.md | 32 +++++++++++++-------------- raphtory-graphql/schema.graphql | 2 +- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/reference/graphql/graphql_API.md b/docs/reference/graphql/graphql_API.md index c2d353ecfc..c83907a32d 100644 --- a/docs/reference/graphql/graphql_API.md +++ b/docs/reference/graphql/graphql_API.md @@ -2421,43 +2421,43 @@ This allows you to specify multiple operations together. -shortest_path -[ShortestPathOutput!]! +pagerank +[PagerankOutput!]! -source -String! +iterCount +Int! -targets -[String!]! +threads +Int -direction -String +tol +Float -pagerank -[PagerankOutput!]! +shortest_path +[ShortestPathOutput!]! -iterCount -Int! +source +String! -threads -Int +targets +[String!]! -tol -Float +direction +String diff --git a/raphtory-graphql/schema.graphql b/raphtory-graphql/schema.graphql index e309e55948..da5b4e420b 100644 --- a/raphtory-graphql/schema.graphql +++ b/raphtory-graphql/schema.graphql @@ -947,8 +947,8 @@ type Graph { } type GraphAlgorithmPlugin { - shortest_path(source: String!, targets: [String!]!, direction: String): [ShortestPathOutput!]! pagerank(iterCount: Int!, threads: Int, tol: Float): [PagerankOutput!]! + shortest_path(source: String!, targets: [String!]!, direction: String): [ShortestPathOutput!]! } type GraphSchema { From 6088eca99f0b46ec58f2b1566ef94f7484d9be0e Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Tue, 27 Jan 2026 11:31:55 +0100 Subject: [PATCH 72/95] fix handling of additions with internal ids and avoid cloning the id --- db4-graph/src/replay.rs | 29 +++++++---- db4-storage/src/wal/entry.rs | 6 +-- db4-storage/src/wal/mod.rs | 10 ++-- raphtory-api/src/core/entities/mod.rs | 34 +++++++++++++ raphtory/src/db/api/mutation/addition_ops.rs | 52 ++++++++++---------- 5 files changed, 87 insertions(+), 44 deletions(-) diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs index a165ed4ebb..af27655bd0 100644 --- a/db4-graph/src/replay.rs +++ b/db4-graph/src/replay.rs @@ -33,9 +33,9 @@ where lsn: LSN, transaction_id: TransactionID, t: EventTime, - src_name: GID, + src_name: Option, src_id: VID, - dst_name: GID, + dst_name: Option, dst_id: VID, eid: EID, layer_name: Option, @@ -68,12 +68,17 @@ where } // 2. Insert node ids into resolver. - temporal_graph - .logical_to_physical - .set(GidRef::from(&src_name), src_id)?; - temporal_graph - .logical_to_physical - .set(GidRef::from(&dst_name), dst_id)?; + if let Some(src_name) = src_name.as_ref() { + temporal_graph + .logical_to_physical + .set(src_name.as_ref(), src_id)?; + } + + if let Some(dst_name) = dst_name.as_ref() { + temporal_graph + .logical_to_physical + .set(dst_name.as_ref(), dst_id)?; + } // 3. Insert layer id into the layer meta of both edge and node. let node_meta = temporal_graph.node_meta(); @@ -106,7 +111,9 @@ where src_writer.increment_seg_num_nodes(); } - src_writer.store_node_id(src_pos, STATIC_GRAPH_LAYER_ID, src_name.into()); + if let Some(src_name) = src_name { + src_writer.store_node_id(src_pos, STATIC_GRAPH_LAYER_ID, src_name); + } let is_new_edge_static = src_writer .get_out_edge(src_pos, dst_id, STATIC_GRAPH_LAYER_ID) @@ -152,7 +159,9 @@ where dst_writer.increment_seg_num_nodes(); } - dst_writer.store_node_id(dst_pos, STATIC_GRAPH_LAYER_ID, dst_name.into()); + if let Some(dst_name) = dst_name { + dst_writer.store_node_id(dst_pos, STATIC_GRAPH_LAYER_ID, dst_name); + } let is_new_edge_static = dst_writer .get_inb_edge(dst_pos, src_id, STATIC_GRAPH_LAYER_ID) diff --git a/db4-storage/src/wal/entry.rs b/db4-storage/src/wal/entry.rs index aa05ed7f0b..10a04dbad1 100644 --- a/db4-storage/src/wal/entry.rs +++ b/db4-storage/src/wal/entry.rs @@ -1,4 +1,4 @@ -use raphtory_api::core::entities::properties::prop::Prop; +use raphtory_api::core::entities::{GidRef, properties::prop::Prop}; use raphtory_core::{ entities::{EID, GID, VID}, storage::timeindex::EventTime, @@ -16,9 +16,9 @@ impl GraphWalOps for NoWal { &self, _transaction_id: TransactionID, _t: EventTime, - _src_name: GID, + _src_name: Option>, _src_id: VID, - _dst_name: GID, + _dst_name: Option>, _dst_id: VID, _eid: EID, _layer_name: Option<&str>, diff --git a/db4-storage/src/wal/mod.rs b/db4-storage/src/wal/mod.rs index 37833db5ae..cb5e510201 100644 --- a/db4-storage/src/wal/mod.rs +++ b/db4-storage/src/wal/mod.rs @@ -1,5 +1,5 @@ use crate::error::StorageError; -use raphtory_api::core::entities::properties::prop::Prop; +use raphtory_api::core::entities::{GidRef, properties::prop::Prop}; use raphtory_core::{ entities::{EID, GID, VID}, storage::timeindex::EventTime, @@ -79,9 +79,9 @@ pub trait GraphWalOps { &self, transaction_id: TransactionID, t: EventTime, - src_name: GID, + src_name: Option>, src_id: VID, - dst_name: GID, + dst_name: Option>, dst_id: VID, eid: EID, layer_name: Option<&str>, @@ -108,9 +108,9 @@ pub trait GraphReplay { lsn: LSN, transaction_id: TransactionID, t: EventTime, - src_name: GID, + src_name: Option, src_id: VID, - dst_name: GID, + dst_name: Option, dst_id: VID, eid: EID, layer_name: Option, diff --git a/raphtory-api/src/core/entities/mod.rs b/raphtory-api/src/core/entities/mod.rs index 8c574abe3e..24da2d688c 100644 --- a/raphtory-api/src/core/entities/mod.rs +++ b/raphtory-api/src/core/entities/mod.rs @@ -305,6 +305,40 @@ pub enum GidRef<'a> { Str(&'a str), } +#[derive(Clone, Debug, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)] +pub enum GidCow<'a> { + U64(u64), + Str(Cow<'a, str>), +} + +impl<'a> From> for GidCow<'a> { + fn from(value: GidRef<'a>) -> Self { + match value { + GidRef::U64(v) => Self::U64(v), + GidRef::Str(v) => Self::Str(Cow::Borrowed(v)), + } + } +} + +impl<'a> GidCow<'a> { + pub fn as_ref<'b>(&'b self) -> GidRef<'b> + where + 'a: 'b, + { + match self { + GidCow::U64(v) => GidRef::U64(*v), + GidCow::Str(v) => GidRef::Str(v), + } + } + + pub fn into_owned(self) -> GID { + match self { + GidCow::U64(v) => GID::U64(v), + GidCow::Str(v) => GID::Str(v.into_owned()), + } + } +} + #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub enum GidType { U64, diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index 2e2d5dc7e3..b40708ee68 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -7,13 +7,18 @@ use crate::{ errors::{into_graph_err, GraphError}, prelude::{GraphViewOps, NodeViewOps}, }; -use raphtory_api::core::entities::properties::prop::Prop; -use raphtory_api::core::utils::time::{IntoTimeWithFormat, TryIntoInputTime}; -use raphtory_core::entities::GID; -use raphtory_storage::mutation::{ - addition_ops::{EdgeWriteLock, InternalAdditionOps}, - durability_ops::DurabilityOps, - MutationError, +use raphtory_api::core::{ + entities::properties::prop::Prop, + utils::time::{IntoTimeWithFormat, TryIntoInputTime}, +}; +use raphtory_core::entities::{nodes::node_ref::NodeRef, GID}; +use raphtory_storage::{ + core_ops::CoreGraphOps, + mutation::{ + addition_ops::{EdgeWriteLock, InternalAdditionOps}, + durability_ops::DurabilityOps, + MutationError, + }, }; use storage::wal::{GraphWalOps, WalOps}; @@ -256,9 +261,11 @@ impl> + StaticGraphViewOps + Dura ) -> Result, GraphError> { let transaction_id = self.transaction_manager().begin_transaction(); let session = self.write_session().map_err(|err| err.into())?; + let src = src.as_node_ref(); + let dst = dst.as_node_ref(); self.validate_gids( - [src.as_node_ref(), dst.as_node_ref()] + [src, dst] .iter() .filter_map(|node_ref| node_ref.as_gid_ref().left()), ) @@ -273,28 +280,21 @@ impl> + StaticGraphViewOps + Dura .map_err(into_graph_err)?; let ti = time_from_input_session(&session, t)?; - let src_id = self - .resolve_node(src.as_node_ref()) - .map_err(into_graph_err)?; - let dst_id = self - .resolve_node(dst.as_node_ref()) - .map_err(into_graph_err)?; + let src_id = self.resolve_node(src).map_err(into_graph_err)?; + let dst_id = self.resolve_node(dst).map_err(into_graph_err)?; let layer_id = self.resolve_layer(layer).map_err(into_graph_err)?; // FIXME: We are logging node -> node id mappings AFTER they are inserted into the // resolver. Make sure resolver mapping CANNOT get to disk before Wal. - let src_gid = src - .as_node_ref() - .as_gid_ref() - .left() - .map(|gid_ref| GID::from(gid_ref)) - .unwrap(); - let dst_gid = dst - .as_node_ref() - .as_gid_ref() - .left() - .map(|gid_ref| GID::from(gid_ref)) - .unwrap(); + let src_gid = match src { + NodeRef::Internal(_) => None, + NodeRef::External(gid_ref) => Some(gid_ref), + }; + + let dst_gid = match dst { + NodeRef::Internal(_) => None, + NodeRef::External(gid_ref) => Some(gid_ref), + }; let src_id = src_id.inner(); let dst_id = dst_id.inner(); From 383bb9b89d28c03addc453f87d78c818ea0b0dc3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 27 Jan 2026 11:46:54 +0000 Subject: [PATCH 73/95] chore: apply tidy-public auto-fixes --- raphtory-graphql/schema.graphql | 20 ++++++++++---------- raphtory/src/db/graph/edge.rs | 3 ++- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/raphtory-graphql/schema.graphql b/raphtory-graphql/schema.graphql index ed312f4f39..3faa0d5db4 100644 --- a/raphtory-graphql/schema.graphql +++ b/raphtory-graphql/schema.graphql @@ -1190,7 +1190,7 @@ type History { """ Fetch one page of EventTime entries with a number of items up to a specified limit, optionally offset by a specified amount. The page_index sets the number of pages to skip (defaults to 0). - + For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ @@ -1198,7 +1198,7 @@ type History { """ Fetch one page of EventTime entries with a number of items up to a specified limit, optionally offset by a specified amount. The page_index sets the number of pages to skip (defaults to 0). - + For example, if page_rev(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ @@ -1255,7 +1255,7 @@ type HistoryDateTime { optionally offset by a specified amount. The page_index sets the number of pages to skip (defaults to 0). If filter_broken is set to True, time conversion errors will be ignored. If set to False, a TimeError will be raised on time conversion error. Defaults to False. - + For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ @@ -1265,7 +1265,7 @@ type HistoryDateTime { optionally offset by a specified amount. The page_index sets the number of pages to skip (defaults to 0). If filter_broken is set to True, time conversion errors will be ignored. If set to False, a TimeError will be raised on time conversion error. Defaults to False. - + For example, if page_rev(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ @@ -1287,7 +1287,7 @@ type HistoryEventId { """ Fetch one page of event ids with a number of items up to a specified limit, optionally offset by a specified amount. The page_index sets the number of pages to skip (defaults to 0). - + For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ @@ -1295,7 +1295,7 @@ type HistoryEventId { """ Fetch one page of event ids in reverse chronological order with a number of items up to a specified limit, optionally offset by a specified amount. The page_index sets the number of pages to skip (defaults to 0). - + For example, if page_rev(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ @@ -1317,7 +1317,7 @@ type HistoryTimestamp { """ Fetch one page of timestamps with a number of items up to a specified limit, optionally offset by a specified amount. The page_index sets the number of pages to skip (defaults to 0). - + For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ @@ -1325,7 +1325,7 @@ type HistoryTimestamp { """ Fetch one page of timestamps in reverse order with a number of items up to a specified limit, optionally offset by a specified amount. The page_index sets the number of pages to skip (defaults to 0). - + For example, if page_rev(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ @@ -1388,7 +1388,7 @@ type Intervals { """ Fetch one page of intervals between consecutive timestamps with a number of items up to a specified limit, optionally offset by a specified amount. The page_index sets the number of pages to skip (defaults to 0). - + For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ @@ -1396,7 +1396,7 @@ type Intervals { """ Fetch one page of intervals between consecutive timestamps in reverse order with a number of items up to a specified limit, optionally offset by a specified amount. The page_index sets the number of pages to skip (defaults to 0). - + For example, if page(5, 2, 1) is called, a page with 5 items, offset by 11 items (2 pages of 5 + 1), will be returned. """ diff --git a/raphtory/src/db/graph/edge.rs b/raphtory/src/db/graph/edge.rs index 404a567e89..c10ecdd9a3 100644 --- a/raphtory/src/db/graph/edge.rs +++ b/raphtory/src/db/graph/edge.rs @@ -157,7 +157,8 @@ impl< G: StaticGraphViewOps + InternalAdditionOps + InternalPropertyAdditionOps - + InternalDeletionOps + DurabilityOps, + + InternalDeletionOps + + DurabilityOps, > EdgeView { pub fn delete(&self, t: T, layer: Option<&str>) -> Result<(), GraphError> { From c38b67d2e2dac980f4d8c12de9a9a974c11a6531 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Tue, 27 Jan 2026 10:09:45 -0500 Subject: [PATCH 74/95] Remove silly comment and use STATIC_GRAPH_LAYER_ID --- db4-storage/src/pages/mod.rs | 9 +++------ db4-storage/src/pages/node_page/writer.rs | 16 +++++++++++++--- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index 72b4f32a35..e47ba0ea1c 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -139,12 +139,9 @@ impl< )); if let Some(graph_dir) = graph_dir { - // Config saving for WriteAndMerge is handled in db4-disk-storage's implementation - // This generic code in db4-storage doesn't have access to WriteAndMergeConfig types - // to avoid circular dependencies. For NoOpStrategy, config saving is not needed. - if EXT::disk_storage_enabled() { - // Config will be saved by db4-disk-storage's GraphStore implementation - } + ext.config() + .save_to_dir(graph_dir) + .expect("Failed to write config to disk"); } Self { diff --git a/db4-storage/src/pages/node_page/writer.rs b/db4-storage/src/pages/node_page/writer.rs index 3656ce5d8e..018fd6d56e 100644 --- a/db4-storage/src/pages/node_page/writer.rs +++ b/db4-storage/src/pages/node_page/writer.rs @@ -5,7 +5,7 @@ use crate::{ use raphtory_api::core::entities::{ EID, GID, VID, properties::{ - meta::{NODE_ID_IDX, NODE_TYPE_IDX}, + meta::{NODE_ID_IDX, NODE_TYPE_IDX, STATIC_GRAPH_LAYER_ID}, prop::Prop, }, }; @@ -48,7 +48,12 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri e_id: impl Into, ) { let e_id = e_id.into(); - self.add_outbound_edge_inner::(None, src_pos, dst, e_id.with_layer(0)); + self.add_outbound_edge_inner::( + None, + src_pos, + dst, + e_id.with_layer(STATIC_GRAPH_LAYER_ID), + ); } fn add_outbound_edge_inner( @@ -91,7 +96,12 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri e_id: impl Into, ) { let e_id = e_id.into(); - self.add_inbound_edge_inner::(None, dst_pos, src, e_id.with_layer(0)); + self.add_inbound_edge_inner::( + None, + dst_pos, + src, + e_id.with_layer(STATIC_GRAPH_LAYER_ID), + ); } fn add_inbound_edge_inner( From 2f77e09dcd491f8122c6919192922a09a8fb46aa Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Tue, 27 Jan 2026 11:05:22 -0500 Subject: [PATCH 75/95] Add wal.has_entries --- db4-storage/src/wal/mod.rs | 3 +++ db4-storage/src/wal/no_wal.rs | 4 ++++ raphtory/src/db/api/storage/storage.rs | 9 +++++---- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/db4-storage/src/wal/mod.rs b/db4-storage/src/wal/mod.rs index cb5e510201..dabe11f47d 100644 --- a/db4-storage/src/wal/mod.rs +++ b/db4-storage/src/wal/mod.rs @@ -36,6 +36,9 @@ pub trait WalOps { /// Returns an iterator over the entries in the wal. fn replay(&self) -> impl Iterator>; + + /// Returns true if there are entries in the WAL file on disk. + fn has_entries(&self) -> bool; } #[derive(Debug)] diff --git a/db4-storage/src/wal/no_wal.rs b/db4-storage/src/wal/no_wal.rs index ffd7b1ef3c..87eccc154a 100644 --- a/db4-storage/src/wal/no_wal.rs +++ b/db4-storage/src/wal/no_wal.rs @@ -35,4 +35,8 @@ impl WalOps for NoWal { let error = "Recovery is not supported for NoWAL"; std::iter::once(Err(StorageError::GenericFailure(error.to_string()))) } + + fn has_entries(&self) -> bool { + false + } } diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 5a93f5edd5..52b4409d8b 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -144,7 +144,7 @@ impl Storage { } pub(crate) fn load_from(path: impl AsRef) -> Result { - let config = Config::load_from_dir(path.as_ref()).unwrap_or_else(|_| Config::default()); + let config = Config::load_from_dir(path.as_ref())?; let graph_dir = GraphDir::from(path.as_ref()); let wal_dir = graph_dir.wal_dir(); let wal = Arc::new(Wal::load(Some(wal_dir.as_path()))?); @@ -152,9 +152,10 @@ impl Storage { let temporal_graph = TemporalGraph::load_from_path(path, ext)?; // Replay any pending writes from the WAL. - let mut write_locked_graph = temporal_graph.write_lock()?; - wal.replay_to_graph(&mut write_locked_graph)?; - drop(write_locked_graph); + if wal.has_entries() { + let mut write_locked_graph = temporal_graph.write_lock()?; + wal.replay_to_graph(&mut write_locked_graph)?; + } Ok(Self { graph: GraphStorage::Unlocked(Arc::new(temporal_graph)), From b2f67ee453ada7abfcb3f01a871088b4c1ad65c9 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Tue, 27 Jan 2026 17:37:45 +0100 Subject: [PATCH 76/95] json output for GraphFixture --- raphtory/src/test_utils.rs | 251 +++++++++++++++++++++++++++++++---- raphtory/tests/df_loaders.rs | 22 +-- 2 files changed, 242 insertions(+), 31 deletions(-) diff --git a/raphtory/src/test_utils.rs b/raphtory/src/test_utils.rs index c7f30c8735..84291cbb09 100644 --- a/raphtory/src/test_utils.rs +++ b/raphtory/src/test_utils.rs @@ -24,8 +24,15 @@ use raphtory_storage::{ mutation::addition_ops::{InternalAdditionOps, SessionAdditionOps}, }; use rayon::iter::ParallelIterator; +use serde::{ + de::{SeqAccess, Visitor}, + ser::SerializeSeq, + Deserialize, Deserializer, Serialize, Serializer, +}; use std::{ + borrow::Cow, collections::{hash_map, HashMap}, + fmt::{Debug, Formatter}, ops::{Range, RangeInclusive}, sync::Arc, }; @@ -249,7 +256,7 @@ pub fn assert_valid_graph(fixture: &GraphFixture, graph: &Graph) { Some(updates) => { assert_eq!( node.node_type().as_str(), - updates.node_type, + updates.node_type.as_str(), "mismatched node_type for node {node_id}" ); @@ -535,12 +542,19 @@ pub fn prop_type(nested_prop_size: usize) -> impl Strategy { }) } -#[derive(Debug, Clone)] +#[derive(Clone, PartialEq, Serialize, Deserialize)] pub struct GraphFixture { pub nodes: NodeFixture, pub edges: EdgeFixture, } +impl Debug for GraphFixture { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let json = serde_json::to_string(self).unwrap(); + f.write_str(&json) + } +} + impl GraphFixture { pub fn edges(&self) -> impl Iterator), &EdgeUpdatesFixture)> { self.edges.iter() @@ -551,9 +565,16 @@ impl GraphFixture { } } -#[derive(Debug, Default, Clone)] +#[derive(Default, Clone, PartialEq, Serialize, Deserialize)] pub struct NodeFixture(pub HashMap); +impl Debug for NodeFixture { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let json = serde_json::to_string(self).unwrap(); + f.write_str(&json) + } +} + impl FromIterator<(u64, NodeUpdatesFixture)> for NodeFixture { fn from_iter>(iter: T) -> Self { Self(iter.into_iter().collect()) @@ -575,47 +596,130 @@ impl IntoIterator for NodeFixture { } } -#[derive(Debug, Default, Clone)] +#[derive(Default, Clone, PartialEq, Serialize, Deserialize)] pub struct PropUpdatesFixture { pub t_props: Vec<(i64, Vec<(String, Prop)>)>, pub c_props: Vec<(String, Prop)>, } -#[derive(Debug, Default, Clone)] +impl Debug for PropUpdatesFixture { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let json = serde_json::to_string(self).unwrap(); + f.write_str(&json) + } +} + +#[derive(Default, Clone, PartialEq, Serialize, Deserialize)] pub struct NodeUpdatesFixture { pub props: PropUpdatesFixture, - pub node_type: Option<&'static str>, + pub node_type: Option>, +} + +impl Debug for NodeUpdatesFixture { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let json = serde_json::to_string(self).unwrap(); + f.write_str(&json) + } } -#[derive(Debug, Default, Clone)] +#[derive(Default, Clone, PartialEq, Serialize, Deserialize)] pub struct EdgeUpdatesFixture { pub props: PropUpdatesFixture, pub deletions: Vec, } -#[derive(Debug, Default, Clone)] -pub struct EdgeFixture(pub HashMap<(u64, u64, Option<&'static str>), EdgeUpdatesFixture>); +impl Debug for EdgeUpdatesFixture { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let json = serde_json::to_string(self).unwrap(); + f.write_str(&json) + } +} + +#[derive(Default, Clone, PartialEq)] +pub struct EdgeFixture(pub HashMap<(u64, u64, Option>), EdgeUpdatesFixture>); + +impl Serialize for EdgeFixture { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut seq = serializer.serialize_seq(Some(self.0.len()))?; + for v in self.iter() { + seq.serialize_element(&v)?; + } + seq.end() + } +} + +struct Elements; + +impl<'de> Visitor<'de> for Elements { + type Value = EdgeFixture; + + fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result { + formatter.write_str("a sequence edge updates") + } + + fn visit_seq(self, mut seq: A) -> Result + where + A: SeqAccess<'de>, + { + let mut elements = if let Some(size) = seq.size_hint() { + HashMap::with_capacity(size) + } else { + HashMap::new() + }; + while let Some((next_key, next_value)) = seq.next_element()? { + elements.insert(next_key, next_value); + } + Ok(EdgeFixture(elements)) + } +} + +impl<'de> Deserialize<'de> for EdgeFixture { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_seq(Elements) + } +} + +impl Debug for EdgeFixture { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let json = serde_json::to_string(self).unwrap(); + f.write_str(&json) + } +} impl EdgeFixture { pub fn iter(&self) -> impl Iterator), &EdgeUpdatesFixture)> { - self.0.iter().map(|(k, v)| (*k, v)) + self.0 + .iter() + .map(|((src, dst, layer), v)| ((*src, *dst, layer.as_str()), v)) } } impl IntoIterator for EdgeFixture { - type Item = ((u64, u64, Option<&'static str>), EdgeUpdatesFixture); - type IntoIter = hash_map::IntoIter<(u64, u64, Option<&'static str>), EdgeUpdatesFixture>; + type Item = ((u64, u64, Option>), EdgeUpdatesFixture); + type IntoIter = hash_map::IntoIter<(u64, u64, Option>), EdgeUpdatesFixture>; fn into_iter(self) -> Self::IntoIter { self.0.into_iter() } } -impl FromIterator<((u64, u64, Option<&'static str>), EdgeUpdatesFixture)> for EdgeFixture { - fn from_iter), EdgeUpdatesFixture)>>( +impl>> FromIterator<((u64, u64, Option), EdgeUpdatesFixture)> + for EdgeFixture +{ + fn from_iter), EdgeUpdatesFixture)>>( iter: T, ) -> Self { - Self(iter.into_iter().collect()) + Self( + iter.into_iter() + .map(|((s, d, l), f)| ((s, d, l.map(|l| l.into())), f)) + .collect(), + ) } } @@ -668,8 +772,19 @@ impl From for GraphFixture { } } -impl, Option<&'static str>)>> From - for GraphFixture +impl< + V, + T, + I: IntoIterator< + Item = ( + V, + V, + T, + Vec<(String, Prop)>, + Option>>, + ), + >, + > From for GraphFixture where u64: TryFrom, i64: TryFrom, @@ -679,7 +794,11 @@ where .into_iter() .filter_map(|(src, dst, t, props, layer)| { Some(( - (src.try_into().ok()?, dst.try_into().ok()?, layer), + ( + src.try_into().ok()?, + dst.try_into().ok()?, + layer.map(|l| l.into()), + ), (t.try_into().ok()?, props), )) }) @@ -705,8 +824,12 @@ where } } -pub fn make_node_type() -> impl Strategy> { - proptest::sample::select(vec![None, Some("one"), Some("two")]) +pub fn make_node_type() -> impl Strategy>> { + proptest::sample::select(vec![ + None, + Some(Cow::Borrowed("one")), + Some(Cow::Borrowed("two")), + ]) } pub fn make_node_types() -> impl Strategy> { @@ -817,7 +940,11 @@ pub fn build_edge_list_dyn( ( num_nodes.clone().prop_map(|n| n as u64), num_nodes.clone().prop_map(|n| n as u64), - proptest::sample::select(vec![Some("a"), Some("b"), None]), + proptest::sample::select(vec![ + Some(Cow::Borrowed("a")), + Some(Cow::Borrowed("b")), + None, + ]), ), edge_updates(schema.clone(), num_updates.clone(), del_edges), num_edges.clone(), @@ -944,7 +1071,7 @@ pub fn build_graph(graph_fix: &GraphFixture) -> Arc { } if let Some(node) = g.node(node) { node.add_metadata(updates.props.c_props.clone()).unwrap(); - if let Some(node_type) = updates.node_type { + if let Some(node_type) = updates.node_type.as_str() { node.set_node_type(node_type).unwrap(); } } @@ -1016,7 +1143,7 @@ pub fn build_graph_layer(graph_fix: &GraphFixture, layers: &[&str]) -> Arc, ), ( 1, 2, 12, vec![("a".to_string(), Prop::List(vec![Prop::str("aa")].into()))], - None, + None::, ), ] .into(), @@ -967,7 +967,7 @@ mod parquet_tests { 0, 0, vec![("a".to_string(), Prop::List(vec![Prop::DTime(dt)].into()))], - None, + None::, )] .into(), ); @@ -986,7 +986,7 @@ mod parquet_tests { "a".to_string(), Prop::map([("a", Prop::DTime(dt)), ("b", Prop::str("s"))]), )], - None, + None::, )] .into(), ); @@ -1001,14 +1001,14 @@ mod parquet_tests { 0, 0, vec![("a".to_string(), Prop::map([("a", Prop::I32(1))]))], - None, + None::, ), ( 0, 0, 0, vec![("a".to_string(), Prop::map([("b", Prop::str("x"))]))], - None, + None::, ), ] .into(), @@ -1019,13 +1019,19 @@ mod parquet_tests { fn edges_maps3() { build_and_check_parquet_encoding( [ - (0, 0, 0, vec![("a".to_string(), Prop::U8(5))], None), + ( + 0, + 0, + 0, + vec![("a".to_string(), Prop::U8(5))], + None::, + ), ( 0, 0, 0, vec![("b".to_string(), Prop::map([("c", Prop::U8(66))]))], - None, + None::, ), ] .into(), From fbdff9268645ea58e4df0d3c45d42c23c6c8d32f Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Tue, 27 Jan 2026 12:28:34 -0500 Subject: [PATCH 77/95] Restrict DurabilityOps impls --- db4-graph/src/lib.rs | 4 --- db4-graph/src/replay.rs | 1 - raphtory-storage/src/graph/graph.rs | 2 +- .../src/mutation/addition_ops_ext.rs | 8 ++--- raphtory-storage/src/mutation/deletion_ops.rs | 4 ++- .../src/mutation/durability_ops.rs | 32 +++++-------------- raphtory-storage/src/mutation/mod.rs | 3 +- raphtory/src/db/api/mutation/addition_ops.rs | 28 +++++++--------- raphtory/src/db/api/storage/storage.rs | 10 ------ raphtory/src/db/graph/edge.rs | 1 - 10 files changed, 29 insertions(+), 64 deletions(-) diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index 9b9d7f8be1..7f28a5741f 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -204,10 +204,6 @@ where self.storage().extension() } - pub fn wal(&self) -> &EXT::Wal { - self.storage().extension().wal() - } - pub fn read_event_counter(&self) -> usize { self.storage().read_event_id() } diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs index af27655bd0..3d4f55e6b4 100644 --- a/db4-graph/src/replay.rs +++ b/db4-graph/src/replay.rs @@ -10,7 +10,6 @@ use raphtory_api::core::{ }, storage::timeindex::EventTime, }; -use raphtory_core::entities::GidRef; use storage::{ api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, error::StorageError, diff --git a/raphtory-storage/src/graph/graph.rs b/raphtory-storage/src/graph/graph.rs index 754eccf6ab..46795e1bca 100644 --- a/raphtory-storage/src/graph/graph.rs +++ b/raphtory-storage/src/graph/graph.rs @@ -8,7 +8,7 @@ use crate::{ locked::LockedGraph, nodes::{nodes::NodesStorage, nodes_ref::NodesStorageEntry}, }, - mutation::MutationError, + mutation::{durability_ops::DurabilityOps, MutationError}, }; use db4_graph::TemporalGraph; use raphtory_api::core::entities::{properties::meta::Meta, LayerIds, LayerVariants, EID, VID}; diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index 81c30e0630..1dd9c05686 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -368,11 +368,11 @@ impl InternalAdditionOps for TemporalGraph { } impl DurabilityOps for TemporalGraph { - fn transaction_manager(&self) -> &TransactionManager { - &self.transaction_manager + fn transaction_manager(&self) -> Result<&TransactionManager, MutationError> { + Ok(&self.transaction_manager) } - fn wal(&self) -> &Wal { - &self.extension().wal() + fn wal(&self) -> Result<&Wal, MutationError> { + Ok(&self.extension().wal()) } } diff --git a/raphtory-storage/src/mutation/deletion_ops.rs b/raphtory-storage/src/mutation/deletion_ops.rs index e5d893f5e0..d811df5f63 100644 --- a/raphtory-storage/src/mutation/deletion_ops.rs +++ b/raphtory-storage/src/mutation/deletion_ops.rs @@ -6,8 +6,10 @@ use raphtory_api::{ }, inherit::Base, }; +use db4_graph::TemporalGraph; use storage::Extension; + pub trait InternalDeletionOps { type Error: From; fn internal_delete_edge( @@ -25,7 +27,7 @@ pub trait InternalDeletionOps { ) -> Result<(), Self::Error>; } -impl InternalDeletionOps for db4_graph::TemporalGraph { +impl InternalDeletionOps for TemporalGraph { type Error = MutationError; fn internal_delete_edge( diff --git a/raphtory-storage/src/mutation/durability_ops.rs b/raphtory-storage/src/mutation/durability_ops.rs index 5a1be35226..8efdca06ac 100644 --- a/raphtory-storage/src/mutation/durability_ops.rs +++ b/raphtory-storage/src/mutation/durability_ops.rs @@ -1,37 +1,21 @@ use crate::graph::graph::GraphStorage; -use raphtory_api::inherit::Base; +use db4_graph::TemporalGraph; use storage::{transaction::TransactionManager, Wal}; +use crate::mutation::MutationError; /// Accessor methods for transactions and write-ahead logging. pub trait DurabilityOps { - fn transaction_manager(&self) -> &TransactionManager; + fn transaction_manager(&self) -> Result<&TransactionManager, MutationError>; - fn wal(&self) -> &Wal; + fn wal(&self) -> Result<&Wal, MutationError>; } impl DurabilityOps for GraphStorage { - fn transaction_manager(&self) -> &TransactionManager { - self.mutable().unwrap().transaction_manager.as_ref() + fn transaction_manager(&self) -> Result<&TransactionManager, MutationError> { + self.mutable()?.transaction_manager() } - fn wal(&self) -> &Wal { - self.mutable().unwrap().wal() - } -} - -pub trait InheritDurabilityOps: Base {} - -impl DurabilityOps for G -where - G::Base: DurabilityOps, -{ - #[inline] - fn transaction_manager(&self) -> &TransactionManager { - self.base().transaction_manager() - } - - #[inline] - fn wal(&self) -> &Wal { - self.base().wal() + fn wal(&self) -> Result<&Wal, MutationError> { + self.mutable()?.wal() } } diff --git a/raphtory-storage/src/mutation/mod.rs b/raphtory-storage/src/mutation/mod.rs index 28cd67085d..3b9e16ac33 100644 --- a/raphtory-storage/src/mutation/mod.rs +++ b/raphtory-storage/src/mutation/mod.rs @@ -3,7 +3,7 @@ use crate::{ graph::graph::Immutable, mutation::{ addition_ops::InheritAdditionOps, deletion_ops::InheritDeletionOps, - durability_ops::InheritDurabilityOps, property_addition_ops::InheritPropertyAdditionOps, + property_addition_ops::InheritPropertyAdditionOps, }, }; use parking_lot::RwLockWriteGuard; @@ -71,6 +71,5 @@ pub trait InheritMutationOps: Base {} impl InheritAdditionOps for G {} impl InheritPropertyAdditionOps for G {} impl InheritDeletionOps for G {} -impl InheritDurabilityOps for G {} impl InheritMutationOps for Arc {} diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index b40708ee68..1a87e9963c 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -1,7 +1,7 @@ use crate::{ core::entities::{edges::edge_ref::EdgeRef, nodes::node_ref::AsNodeRef}, db::{ - api::{mutation::time_from_input_session, view::StaticGraphViewOps}, + api::{mutation::time_from_input_session, view::{internal::InternalStorageOps, StaticGraphViewOps}}, graph::{edge::EdgeView, node::NodeView}, }, errors::{into_graph_err, GraphError}, @@ -11,19 +11,14 @@ use raphtory_api::core::{ entities::properties::prop::Prop, utils::time::{IntoTimeWithFormat, TryIntoInputTime}, }; -use raphtory_core::entities::{nodes::node_ref::NodeRef, GID}; -use raphtory_storage::{ - core_ops::CoreGraphOps, - mutation::{ - addition_ops::{EdgeWriteLock, InternalAdditionOps}, - durability_ops::DurabilityOps, - MutationError, - }, -}; +use raphtory_core::entities::{nodes::node_ref::NodeRef}; +use raphtory_storage::mutation::{ + addition_ops::{EdgeWriteLock, InternalAdditionOps}, durability_ops::DurabilityOps, MutationError + }; use storage::wal::{GraphWalOps, WalOps}; pub trait AdditionOps: - StaticGraphViewOps + InternalAdditionOps> + DurabilityOps + StaticGraphViewOps + InternalAdditionOps> + InternalStorageOps { // TODO: Probably add vector reference here like add /// Add a node to the graph @@ -152,7 +147,7 @@ pub trait AdditionOps: fn flush(&self) -> Result<(), Self::Error>; } -impl> + StaticGraphViewOps + DurabilityOps> +impl> + StaticGraphViewOps + InternalStorageOps> AdditionOps for G { fn add_node< @@ -259,7 +254,7 @@ impl> + StaticGraphViewOps + Dura props: PII, layer: Option<&str>, ) -> Result, GraphError> { - let transaction_id = self.transaction_manager().begin_transaction(); + let transaction_id = self.core_graph().transaction_manager()?.begin_transaction(); let session = self.write_session().map_err(|err| err.into())?; let src = src.as_node_ref(); let dst = dst.as_node_ref(); @@ -321,7 +316,8 @@ impl> + StaticGraphViewOps + Dura .collect::>(); let lsn = self - .wal() + .core_graph() + .wal()? .log_add_edge( transaction_id, ti, @@ -358,13 +354,13 @@ impl> + StaticGraphViewOps + Dura // Update the src, dst and edge segments with the lsn of the wal entry. add_edge_op.set_lsn(lsn); - self.transaction_manager().end_transaction(transaction_id); + self.core_graph().transaction_manager()?.end_transaction(transaction_id); // Drop to release all the segment locks. drop(add_edge_op); // Flush the wal entry to disk. - self.wal().flush(lsn).unwrap(); + self.core_graph().wal()?.flush(lsn).unwrap(); Ok(EdgeView::new( self.clone(), diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 52b4409d8b..f673ae6693 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -573,16 +573,6 @@ impl InternalAdditionOps for Storage { } } -impl DurabilityOps for Storage { - fn transaction_manager(&self) -> &TransactionManager { - self.graph.mutable().unwrap().transaction_manager.as_ref() - } - - fn wal(&self) -> &Wal { - self.graph.mutable().unwrap().wal() - } -} - impl InternalPropertyAdditionOps for Storage { type Error = GraphError; diff --git a/raphtory/src/db/graph/edge.rs b/raphtory/src/db/graph/edge.rs index c10ecdd9a3..bb66c30d86 100644 --- a/raphtory/src/db/graph/edge.rs +++ b/raphtory/src/db/graph/edge.rs @@ -158,7 +158,6 @@ impl< + InternalAdditionOps + InternalPropertyAdditionOps + InternalDeletionOps - + DurabilityOps, > EdgeView { pub fn delete(&self, t: T, layer: Option<&str>) -> Result<(), GraphError> { From 32f29743edac7a5cb8af77b521140c0682566924 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Tue, 27 Jan 2026 12:46:32 -0500 Subject: [PATCH 78/95] Run fmt --- raphtory-storage/src/mutation/deletion_ops.rs | 3 +-- raphtory-storage/src/mutation/durability_ops.rs | 3 +-- raphtory/src/db/api/mutation/addition_ops.rs | 17 ++++++++++++----- raphtory/src/db/graph/edge.rs | 2 +- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/raphtory-storage/src/mutation/deletion_ops.rs b/raphtory-storage/src/mutation/deletion_ops.rs index d811df5f63..cb69cc72cc 100644 --- a/raphtory-storage/src/mutation/deletion_ops.rs +++ b/raphtory-storage/src/mutation/deletion_ops.rs @@ -1,4 +1,5 @@ use crate::{graph::graph::GraphStorage, mutation::MutationError}; +use db4_graph::TemporalGraph; use raphtory_api::{ core::{ entities::{EID, VID}, @@ -6,10 +7,8 @@ use raphtory_api::{ }, inherit::Base, }; -use db4_graph::TemporalGraph; use storage::Extension; - pub trait InternalDeletionOps { type Error: From; fn internal_delete_edge( diff --git a/raphtory-storage/src/mutation/durability_ops.rs b/raphtory-storage/src/mutation/durability_ops.rs index 8efdca06ac..9b384993ba 100644 --- a/raphtory-storage/src/mutation/durability_ops.rs +++ b/raphtory-storage/src/mutation/durability_ops.rs @@ -1,7 +1,6 @@ -use crate::graph::graph::GraphStorage; +use crate::{graph::graph::GraphStorage, mutation::MutationError}; use db4_graph::TemporalGraph; use storage::{transaction::TransactionManager, Wal}; -use crate::mutation::MutationError; /// Accessor methods for transactions and write-ahead logging. pub trait DurabilityOps { diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index 1a87e9963c..e04a867d5e 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -1,7 +1,10 @@ use crate::{ core::entities::{edges::edge_ref::EdgeRef, nodes::node_ref::AsNodeRef}, db::{ - api::{mutation::time_from_input_session, view::{internal::InternalStorageOps, StaticGraphViewOps}}, + api::{ + mutation::time_from_input_session, + view::{internal::InternalStorageOps, StaticGraphViewOps}, + }, graph::{edge::EdgeView, node::NodeView}, }, errors::{into_graph_err, GraphError}, @@ -11,10 +14,12 @@ use raphtory_api::core::{ entities::properties::prop::Prop, utils::time::{IntoTimeWithFormat, TryIntoInputTime}, }; -use raphtory_core::entities::{nodes::node_ref::NodeRef}; +use raphtory_core::entities::nodes::node_ref::NodeRef; use raphtory_storage::mutation::{ - addition_ops::{EdgeWriteLock, InternalAdditionOps}, durability_ops::DurabilityOps, MutationError - }; + addition_ops::{EdgeWriteLock, InternalAdditionOps}, + durability_ops::DurabilityOps, + MutationError, +}; use storage::wal::{GraphWalOps, WalOps}; pub trait AdditionOps: @@ -354,7 +359,9 @@ impl> + StaticGraphViewOps + Inte // Update the src, dst and edge segments with the lsn of the wal entry. add_edge_op.set_lsn(lsn); - self.core_graph().transaction_manager()?.end_transaction(transaction_id); + self.core_graph() + .transaction_manager()? + .end_transaction(transaction_id); // Drop to release all the segment locks. drop(add_edge_op); diff --git a/raphtory/src/db/graph/edge.rs b/raphtory/src/db/graph/edge.rs index bb66c30d86..aac46f5633 100644 --- a/raphtory/src/db/graph/edge.rs +++ b/raphtory/src/db/graph/edge.rs @@ -157,7 +157,7 @@ impl< G: StaticGraphViewOps + InternalAdditionOps + InternalPropertyAdditionOps - + InternalDeletionOps + + InternalDeletionOps, > EdgeView { pub fn delete(&self, t: T, layer: Option<&str>) -> Result<(), GraphError> { From 30f10fd41d86bc1a2abf11c060e8ca8c06616dd9 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Tue, 27 Jan 2026 14:39:32 -0500 Subject: [PATCH 79/95] Minor cleanup --- db4-storage/src/pages/mod.rs | 6 ++--- db4-storage/src/segments/edge/segment.rs | 28 +++++-------------- db4-storage/src/segments/node/segment.rs | 34 ++++++------------------ 3 files changed, 17 insertions(+), 51 deletions(-) diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index e47ba0ea1c..eb18bfa7aa 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -14,7 +14,7 @@ use node_page::writer::{NodeWriter, NodeWriters}; use node_store::NodeStorageInner; use parking_lot::RwLockWriteGuard; use raphtory_api::core::{ - entities::properties::{meta::Meta, prop::Prop}, + entities::properties::{meta::{Meta, STATIC_GRAPH_LAYER_ID}, prop::Prop}, storage::dict_mapper::MaybeNew, utils::time::{InputTime, TryIntoInputTime}, }; @@ -400,7 +400,7 @@ impl< }; let (_, src_pos) = self.nodes.resolve_pos(src); - let existing_eid = node_writers.src.get_out_edge(src_pos, dst, 0); + let existing_eid = node_writers.src.get_out_edge(src_pos, dst, STATIC_GRAPH_LAYER_ID); let edge_writer = match e_id.or(existing_eid) { Some(e_id) => self.edge_writer(e_id), @@ -443,7 +443,7 @@ impl< }; let (_, src_pos) = self.nodes.resolve_pos(src); - let existing_eid = node_writers.src.get_out_edge(src_pos, dst, 0); + let existing_eid = node_writers.src.get_out_edge(src_pos, dst, STATIC_GRAPH_LAYER_ID); let edge_writer = match e_id.or(existing_eid) { Some(e_id) => self.edge_writer(e_id), diff --git a/db4-storage/src/segments/edge/segment.rs b/db4-storage/src/segments/edge/segment.rs index bc53d66c88..0add964096 100644 --- a/db4-storage/src/segments/edge/segment.rs +++ b/db4-storage/src/segments/edge/segment.rs @@ -55,22 +55,6 @@ pub struct MemEdgeSegment { lsn: LSN, } -impl>> From for MemEdgeSegment { - fn from(inner: I) -> Self { - let layers: Vec<_> = inner.into_iter().collect(); - let est_size = layers.iter().map(|seg| seg.est_size()).sum(); - assert!( - !layers.is_empty(), - "MemEdgeSegment must have at least one layer" - ); - Self { - layers, - est_size, - lsn: 0, - } - } -} - impl AsRef<[SegmentContainer]> for MemEdgeSegment { fn as_ref(&self) -> &[SegmentContainer] { &self.layers @@ -559,7 +543,7 @@ impl>> EdgeSegmentOps for EdgeSeg #[cfg(test)] mod test { use super::*; - use raphtory_api::core::entities::properties::{meta::Meta, prop::PropType}; + use raphtory_api::core::entities::properties::{meta::{Meta, STATIC_GRAPH_LAYER_ID}, prop::PropType}; use raphtory_core::storage::timeindex::EventTime; fn create_test_segment() -> MemEdgeSegment { @@ -625,7 +609,7 @@ mod test { LocalPOS(0), VID(1), VID(2), - 0, + STATIC_GRAPH_LAYER_ID, vec![(0, Prop::from("test"))], ); @@ -633,7 +617,7 @@ mod test { assert!(est_size1 > 0); - segment.delete_edge_internal(EventTime::new(2, 3), LocalPOS(0), VID(5), VID(3), 0); + segment.delete_edge_internal(EventTime::new(2, 3), LocalPOS(0), VID(5), VID(3), STATIC_GRAPH_LAYER_ID); let est_size2 = segment.est_size(); @@ -648,7 +632,7 @@ mod test { LocalPOS(1), VID(4), VID(6), - 0, + STATIC_GRAPH_LAYER_ID, vec![(0, Prop::from("test2"))], ); @@ -660,7 +644,7 @@ mod test { // Insert a static edge - segment.insert_static_edge_internal(LocalPOS(1), 4, 6, 0); + segment.insert_static_edge_internal(LocalPOS(1), 4, 6, STATIC_GRAPH_LAYER_ID); let est_size4 = segment.est_size(); assert_eq!( @@ -674,7 +658,7 @@ mod test { .unwrap() .inner(); - segment.update_const_properties(LocalPOS(1), VID(4), VID(6), 0, [(prop_id, Prop::U8(2))]); + segment.update_const_properties(LocalPOS(1), VID(4), VID(6), STATIC_GRAPH_LAYER_ID, [(prop_id, Prop::U8(2))]); let est_size5 = segment.est_size(); assert!( diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs index eac0fc7771..f39bf4b8b7 100644 --- a/db4-storage/src/segments/node/segment.rs +++ b/db4-storage/src/segments/node/segment.rs @@ -40,24 +40,6 @@ pub struct MemNodeSegment { lsn: LSN, } -impl>> From for MemNodeSegment { - fn from(inner: I) -> Self { - let layers = inner.into_iter().collect::>(); - assert!( - !layers.is_empty(), - "MemNodeSegment must have at least one layer" - ); - let segment_id = layers[0].segment_id(); - let max_page_len = layers[0].max_page_len(); - Self { - segment_id, - max_page_len, - layers, - lsn: 0, - } - } -} - #[derive(Debug, Default, serde::Serialize)] pub struct AdjEntry { row: usize, @@ -589,7 +571,7 @@ mod test { wal::no_wal::NoWal, }; use raphtory_api::core::entities::properties::{ - meta::Meta, + meta::{Meta, STATIC_GRAPH_LAYER_ID}, prop::{Prop, PropType}, }; use raphtory_core::entities::{EID, ELID, VID}; @@ -618,7 +600,7 @@ mod test { let est_size1 = segment.est_size(); assert_eq!(est_size1, 0); - writer.add_outbound_edge(Some(1), LocalPOS(1), VID(3), EID(7).with_layer(0)); + writer.add_outbound_edge(Some(1), LocalPOS(1), VID(3), EID(7).with_layer(STATIC_GRAPH_LAYER_ID)); let est_size2 = segment.est_size(); assert!( @@ -626,7 +608,7 @@ mod test { "Estimated size should be greater than 0 after adding an edge" ); - writer.add_inbound_edge(Some(1), LocalPOS(2), VID(4), EID(8).with_layer(0)); + writer.add_inbound_edge(Some(1), LocalPOS(2), VID(4), EID(8).with_layer(STATIC_GRAPH_LAYER_ID)); let est_size3 = segment.est_size(); assert!( @@ -636,7 +618,7 @@ mod test { // no change when adding the same edge again - writer.add_outbound_edge::(None, LocalPOS(1), VID(3), EID(7).with_layer(0)); + writer.add_outbound_edge::(None, LocalPOS(1), VID(3), EID(7).with_layer(STATIC_GRAPH_LAYER_ID)); let est_size4 = segment.est_size(); assert_eq!( est_size4, est_size3, @@ -651,7 +633,7 @@ mod test { .unwrap() .inner(); - writer.update_c_props(LocalPOS(1), 0, [(prop_id, Prop::U64(73))]); + writer.update_c_props(LocalPOS(1), STATIC_GRAPH_LAYER_ID, [(prop_id, Prop::U64(73))]); let est_size5 = segment.est_size(); assert!( @@ -659,7 +641,7 @@ mod test { "Estimated size should increase after adding constant properties" ); - writer.update_timestamp(17, LocalPOS(1), ELID::new(EID(0), 0)); + writer.update_timestamp(17, LocalPOS(1), ELID::new(EID(0), STATIC_GRAPH_LAYER_ID)); let est_size6 = segment.est_size(); assert!( @@ -674,7 +656,7 @@ mod test { .unwrap() .inner(); - writer.add_props(42, LocalPOS(1), 0, [(prop_id, Prop::F64(4.13))]); + writer.add_props(42, LocalPOS(1), STATIC_GRAPH_LAYER_ID, [(prop_id, Prop::F64(4.13))]); let est_size7 = segment.est_size(); assert!( @@ -682,7 +664,7 @@ mod test { "Estimated size should increase after adding temporal properties" ); - writer.add_props(72, LocalPOS(1), 0, [(prop_id, Prop::F64(5.41))]); + writer.add_props(72, LocalPOS(1), STATIC_GRAPH_LAYER_ID, [(prop_id, Prop::F64(5.41))]); let est_size8 = segment.est_size(); assert!( est_size8 > est_size7, From da23315a12cc80ef5ba4b266b086a0358604b392 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Tue, 27 Jan 2026 14:42:29 -0500 Subject: [PATCH 80/95] Use STATIC_GRAPH_LAYER_ID --- raphtory-storage/src/mutation/addition_ops_ext.rs | 14 +++++++------- .../src/mutation/property_addition_ops.rs | 8 ++++---- raphtory/src/db/api/mutation/addition_ops.rs | 4 ++-- raphtory/src/db/api/view/graph.rs | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index 1dd9c05686..a9d85b1df0 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -6,7 +6,7 @@ use crate::mutation::{ use db4_graph::{TemporalGraph, WriteLockedGraph}; use raphtory_api::core::{ entities::properties::{ - meta::{Meta, NODE_ID_IDX, NODE_TYPE_IDX}, + meta::{Meta, NODE_ID_IDX, NODE_TYPE_IDX, STATIC_GRAPH_LAYER_ID}, prop::{Prop, PropType, PropUnwrap}, }, storage::dict_mapper::MaybeNew, @@ -100,7 +100,7 @@ where self.static_session .node_writers() .get_mut_src() - .update_c_props(pos, 0, [(NODE_ID_IDX, id.into())]); + .update_c_props(pos, STATIC_GRAPH_LAYER_ID, [(NODE_ID_IDX, id.into())]); }; } @@ -111,7 +111,7 @@ where self.static_session .node_writers() .get_mut_dst() - .update_c_props(pos, 0, [(NODE_ID_IDX, id.into())]); + .update_c_props(pos, STATIC_GRAPH_LAYER_ID, [(NODE_ID_IDX, id.into())]); }; } @@ -244,19 +244,19 @@ impl InternalAdditionOps for TemporalGraph { None => { writer.update_c_props( local_pos, - 0, + STATIC_GRAPH_LAYER_ID, node_info_as_props(id.as_gid_ref().left(), None), ); MaybeNew::Existing(0) } Some(node_type) => { - let old_type = writer.get_metadata(local_pos, 0, NODE_TYPE_IDX).into_u64(); + let old_type = writer.get_metadata(local_pos, STATIC_GRAPH_LAYER_ID, NODE_TYPE_IDX).into_u64(); match old_type { None => { let node_type_id = self.node_meta().get_or_create_node_type_id(node_type); writer.update_c_props( local_pos, - 0, + STATIC_GRAPH_LAYER_ID, node_info_as_props( id.as_gid_ref().left(), Some(node_type_id.inner()).filter(|&id| id != 0), @@ -324,7 +324,7 @@ impl InternalAdditionOps for TemporalGraph { ) -> Result<(), Self::Error> { let (segment, node_pos) = self.storage().nodes().resolve_pos(v); let mut node_writer = self.storage().node_writer(segment); - node_writer.add_props(t, node_pos, 0, props); + node_writer.add_props(t, node_pos, STATIC_GRAPH_LAYER_ID, props); Ok(()) } diff --git a/raphtory-storage/src/mutation/property_addition_ops.rs b/raphtory-storage/src/mutation/property_addition_ops.rs index 62e64f1b5f..406c3d9469 100644 --- a/raphtory-storage/src/mutation/property_addition_ops.rs +++ b/raphtory-storage/src/mutation/property_addition_ops.rs @@ -4,7 +4,7 @@ use crate::{ }; use raphtory_api::{ core::{ - entities::{properties::prop::Prop, EID, VID}, + entities::{properties::{meta::STATIC_GRAPH_LAYER_ID, prop::Prop}, EID, VID}, storage::timeindex::EventTime, }, inherit::Base, @@ -86,8 +86,8 @@ impl InternalPropertyAdditionOps for db4_graph::TemporalGraph { ) -> Result, Self::Error> { let (segment_id, node_pos) = self.storage().nodes().resolve_pos(vid); let mut writer = self.storage().nodes().writer(segment_id); - writer.check_metadata(node_pos, 0, &props)?; - writer.update_c_props(node_pos, 0, props); + writer.check_metadata(node_pos, STATIC_GRAPH_LAYER_ID, &props)?; + writer.update_c_props(node_pos, STATIC_GRAPH_LAYER_ID, props); Ok(writer) } @@ -98,7 +98,7 @@ impl InternalPropertyAdditionOps for db4_graph::TemporalGraph { ) -> Result, Self::Error> { let (segment_id, node_pos) = self.storage().nodes().resolve_pos(vid); let mut writer = self.storage().nodes().writer(segment_id); - writer.update_c_props(node_pos, 0, props); + writer.update_c_props(node_pos, STATIC_GRAPH_LAYER_ID, props); Ok(writer) } diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index e04a867d5e..ac83b2c3c6 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -23,7 +23,7 @@ use raphtory_storage::mutation::{ use storage::wal::{GraphWalOps, WalOps}; pub trait AdditionOps: - StaticGraphViewOps + InternalAdditionOps> + InternalStorageOps + StaticGraphViewOps + InternalAdditionOps> { // TODO: Probably add vector reference here like add /// Add a node to the graph @@ -152,7 +152,7 @@ pub trait AdditionOps: fn flush(&self) -> Result<(), Self::Error>; } -impl> + StaticGraphViewOps + InternalStorageOps> +impl> + StaticGraphViewOps> AdditionOps for G { fn add_node< diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index 2cc28c1d62..7a0e68ac6a 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -376,7 +376,7 @@ fn materialize_impl( .set_node(gid.as_ref(), new_id)?; for (t, row) in node.rows() { - writer.add_props(t, node_pos, 0, row); + writer.add_props(t, node_pos, STATIC_GRAPH_LAYER_ID, row); } writer.update_c_props( From 299c723c5d3bc577b315c8fb7004f5f01260f3f8 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Tue, 27 Jan 2026 15:35:33 -0500 Subject: [PATCH 81/95] Run fmt --- db4-storage/src/pages/mod.rs | 13 ++++-- db4-storage/src/segments/edge/segment.rs | 21 ++++++++-- db4-storage/src/segments/node/segment.rs | 41 ++++++++++++++++--- .../src/mutation/addition_ops_ext.rs | 4 +- .../src/mutation/property_addition_ops.rs | 5 ++- raphtory/src/db/api/mutation/addition_ops.rs | 8 +--- 6 files changed, 72 insertions(+), 20 deletions(-) diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index eb18bfa7aa..1667f9ab7a 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -14,7 +14,10 @@ use node_page::writer::{NodeWriter, NodeWriters}; use node_store::NodeStorageInner; use parking_lot::RwLockWriteGuard; use raphtory_api::core::{ - entities::properties::{meta::{Meta, STATIC_GRAPH_LAYER_ID}, prop::Prop}, + entities::properties::{ + meta::{Meta, STATIC_GRAPH_LAYER_ID}, + prop::Prop, + }, storage::dict_mapper::MaybeNew, utils::time::{InputTime, TryIntoInputTime}, }; @@ -400,7 +403,9 @@ impl< }; let (_, src_pos) = self.nodes.resolve_pos(src); - let existing_eid = node_writers.src.get_out_edge(src_pos, dst, STATIC_GRAPH_LAYER_ID); + let existing_eid = node_writers + .src + .get_out_edge(src_pos, dst, STATIC_GRAPH_LAYER_ID); let edge_writer = match e_id.or(existing_eid) { Some(e_id) => self.edge_writer(e_id), @@ -443,7 +448,9 @@ impl< }; let (_, src_pos) = self.nodes.resolve_pos(src); - let existing_eid = node_writers.src.get_out_edge(src_pos, dst, STATIC_GRAPH_LAYER_ID); + let existing_eid = node_writers + .src + .get_out_edge(src_pos, dst, STATIC_GRAPH_LAYER_ID); let edge_writer = match e_id.or(existing_eid) { Some(e_id) => self.edge_writer(e_id), diff --git a/db4-storage/src/segments/edge/segment.rs b/db4-storage/src/segments/edge/segment.rs index 0add964096..b60850bd81 100644 --- a/db4-storage/src/segments/edge/segment.rs +++ b/db4-storage/src/segments/edge/segment.rs @@ -543,7 +543,10 @@ impl>> EdgeSegmentOps for EdgeSeg #[cfg(test)] mod test { use super::*; - use raphtory_api::core::entities::properties::{meta::{Meta, STATIC_GRAPH_LAYER_ID}, prop::PropType}; + use raphtory_api::core::entities::properties::{ + meta::{Meta, STATIC_GRAPH_LAYER_ID}, + prop::PropType, + }; use raphtory_core::storage::timeindex::EventTime; fn create_test_segment() -> MemEdgeSegment { @@ -617,7 +620,13 @@ mod test { assert!(est_size1 > 0); - segment.delete_edge_internal(EventTime::new(2, 3), LocalPOS(0), VID(5), VID(3), STATIC_GRAPH_LAYER_ID); + segment.delete_edge_internal( + EventTime::new(2, 3), + LocalPOS(0), + VID(5), + VID(3), + STATIC_GRAPH_LAYER_ID, + ); let est_size2 = segment.est_size(); @@ -658,7 +667,13 @@ mod test { .unwrap() .inner(); - segment.update_const_properties(LocalPOS(1), VID(4), VID(6), STATIC_GRAPH_LAYER_ID, [(prop_id, Prop::U8(2))]); + segment.update_const_properties( + LocalPOS(1), + VID(4), + VID(6), + STATIC_GRAPH_LAYER_ID, + [(prop_id, Prop::U8(2))], + ); let est_size5 = segment.est_size(); assert!( diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs index f39bf4b8b7..4ec08c12a8 100644 --- a/db4-storage/src/segments/node/segment.rs +++ b/db4-storage/src/segments/node/segment.rs @@ -600,7 +600,12 @@ mod test { let est_size1 = segment.est_size(); assert_eq!(est_size1, 0); - writer.add_outbound_edge(Some(1), LocalPOS(1), VID(3), EID(7).with_layer(STATIC_GRAPH_LAYER_ID)); + writer.add_outbound_edge( + Some(1), + LocalPOS(1), + VID(3), + EID(7).with_layer(STATIC_GRAPH_LAYER_ID), + ); let est_size2 = segment.est_size(); assert!( @@ -608,7 +613,12 @@ mod test { "Estimated size should be greater than 0 after adding an edge" ); - writer.add_inbound_edge(Some(1), LocalPOS(2), VID(4), EID(8).with_layer(STATIC_GRAPH_LAYER_ID)); + writer.add_inbound_edge( + Some(1), + LocalPOS(2), + VID(4), + EID(8).with_layer(STATIC_GRAPH_LAYER_ID), + ); let est_size3 = segment.est_size(); assert!( @@ -618,7 +628,12 @@ mod test { // no change when adding the same edge again - writer.add_outbound_edge::(None, LocalPOS(1), VID(3), EID(7).with_layer(STATIC_GRAPH_LAYER_ID)); + writer.add_outbound_edge::( + None, + LocalPOS(1), + VID(3), + EID(7).with_layer(STATIC_GRAPH_LAYER_ID), + ); let est_size4 = segment.est_size(); assert_eq!( est_size4, est_size3, @@ -633,7 +648,11 @@ mod test { .unwrap() .inner(); - writer.update_c_props(LocalPOS(1), STATIC_GRAPH_LAYER_ID, [(prop_id, Prop::U64(73))]); + writer.update_c_props( + LocalPOS(1), + STATIC_GRAPH_LAYER_ID, + [(prop_id, Prop::U64(73))], + ); let est_size5 = segment.est_size(); assert!( @@ -656,7 +675,12 @@ mod test { .unwrap() .inner(); - writer.add_props(42, LocalPOS(1), STATIC_GRAPH_LAYER_ID, [(prop_id, Prop::F64(4.13))]); + writer.add_props( + 42, + LocalPOS(1), + STATIC_GRAPH_LAYER_ID, + [(prop_id, Prop::F64(4.13))], + ); let est_size7 = segment.est_size(); assert!( @@ -664,7 +688,12 @@ mod test { "Estimated size should increase after adding temporal properties" ); - writer.add_props(72, LocalPOS(1), STATIC_GRAPH_LAYER_ID, [(prop_id, Prop::F64(5.41))]); + writer.add_props( + 72, + LocalPOS(1), + STATIC_GRAPH_LAYER_ID, + [(prop_id, Prop::F64(5.41))], + ); let est_size8 = segment.est_size(); assert!( est_size8 > est_size7, diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index a9d85b1df0..0984a4cdbf 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -250,7 +250,9 @@ impl InternalAdditionOps for TemporalGraph { MaybeNew::Existing(0) } Some(node_type) => { - let old_type = writer.get_metadata(local_pos, STATIC_GRAPH_LAYER_ID, NODE_TYPE_IDX).into_u64(); + let old_type = writer + .get_metadata(local_pos, STATIC_GRAPH_LAYER_ID, NODE_TYPE_IDX) + .into_u64(); match old_type { None => { let node_type_id = self.node_meta().get_or_create_node_type_id(node_type); diff --git a/raphtory-storage/src/mutation/property_addition_ops.rs b/raphtory-storage/src/mutation/property_addition_ops.rs index 406c3d9469..6b319d20ff 100644 --- a/raphtory-storage/src/mutation/property_addition_ops.rs +++ b/raphtory-storage/src/mutation/property_addition_ops.rs @@ -4,7 +4,10 @@ use crate::{ }; use raphtory_api::{ core::{ - entities::{properties::{meta::STATIC_GRAPH_LAYER_ID, prop::Prop}, EID, VID}, + entities::{ + properties::{meta::STATIC_GRAPH_LAYER_ID, prop::Prop}, + EID, VID, + }, storage::timeindex::EventTime, }, inherit::Base, diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index ac83b2c3c6..d6f7a8816f 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -22,9 +22,7 @@ use raphtory_storage::mutation::{ }; use storage::wal::{GraphWalOps, WalOps}; -pub trait AdditionOps: - StaticGraphViewOps + InternalAdditionOps> -{ +pub trait AdditionOps: StaticGraphViewOps + InternalAdditionOps> { // TODO: Probably add vector reference here like add /// Add a node to the graph /// @@ -152,9 +150,7 @@ pub trait AdditionOps: fn flush(&self) -> Result<(), Self::Error>; } -impl> + StaticGraphViewOps> - AdditionOps for G -{ +impl> + StaticGraphViewOps> AdditionOps for G { fn add_node< V: AsNodeRef, T: TryIntoInputTime, From d8a5c643a5bb46b362a8ca717ca1d138f5410123 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 27 Jan 2026 21:09:16 +0000 Subject: [PATCH 82/95] chore: apply tidy-public auto-fixes --- db4-storage/src/pages/mod.rs | 13 ++++++-- db4-storage/src/segments/edge/segment.rs | 21 ++++++++++-- db4-storage/src/segments/node/segment.rs | 41 ++++++++++++++++++++---- 3 files changed, 63 insertions(+), 12 deletions(-) diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index eb18bfa7aa..1667f9ab7a 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -14,7 +14,10 @@ use node_page::writer::{NodeWriter, NodeWriters}; use node_store::NodeStorageInner; use parking_lot::RwLockWriteGuard; use raphtory_api::core::{ - entities::properties::{meta::{Meta, STATIC_GRAPH_LAYER_ID}, prop::Prop}, + entities::properties::{ + meta::{Meta, STATIC_GRAPH_LAYER_ID}, + prop::Prop, + }, storage::dict_mapper::MaybeNew, utils::time::{InputTime, TryIntoInputTime}, }; @@ -400,7 +403,9 @@ impl< }; let (_, src_pos) = self.nodes.resolve_pos(src); - let existing_eid = node_writers.src.get_out_edge(src_pos, dst, STATIC_GRAPH_LAYER_ID); + let existing_eid = node_writers + .src + .get_out_edge(src_pos, dst, STATIC_GRAPH_LAYER_ID); let edge_writer = match e_id.or(existing_eid) { Some(e_id) => self.edge_writer(e_id), @@ -443,7 +448,9 @@ impl< }; let (_, src_pos) = self.nodes.resolve_pos(src); - let existing_eid = node_writers.src.get_out_edge(src_pos, dst, STATIC_GRAPH_LAYER_ID); + let existing_eid = node_writers + .src + .get_out_edge(src_pos, dst, STATIC_GRAPH_LAYER_ID); let edge_writer = match e_id.or(existing_eid) { Some(e_id) => self.edge_writer(e_id), diff --git a/db4-storage/src/segments/edge/segment.rs b/db4-storage/src/segments/edge/segment.rs index 0add964096..b60850bd81 100644 --- a/db4-storage/src/segments/edge/segment.rs +++ b/db4-storage/src/segments/edge/segment.rs @@ -543,7 +543,10 @@ impl>> EdgeSegmentOps for EdgeSeg #[cfg(test)] mod test { use super::*; - use raphtory_api::core::entities::properties::{meta::{Meta, STATIC_GRAPH_LAYER_ID}, prop::PropType}; + use raphtory_api::core::entities::properties::{ + meta::{Meta, STATIC_GRAPH_LAYER_ID}, + prop::PropType, + }; use raphtory_core::storage::timeindex::EventTime; fn create_test_segment() -> MemEdgeSegment { @@ -617,7 +620,13 @@ mod test { assert!(est_size1 > 0); - segment.delete_edge_internal(EventTime::new(2, 3), LocalPOS(0), VID(5), VID(3), STATIC_GRAPH_LAYER_ID); + segment.delete_edge_internal( + EventTime::new(2, 3), + LocalPOS(0), + VID(5), + VID(3), + STATIC_GRAPH_LAYER_ID, + ); let est_size2 = segment.est_size(); @@ -658,7 +667,13 @@ mod test { .unwrap() .inner(); - segment.update_const_properties(LocalPOS(1), VID(4), VID(6), STATIC_GRAPH_LAYER_ID, [(prop_id, Prop::U8(2))]); + segment.update_const_properties( + LocalPOS(1), + VID(4), + VID(6), + STATIC_GRAPH_LAYER_ID, + [(prop_id, Prop::U8(2))], + ); let est_size5 = segment.est_size(); assert!( diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs index f39bf4b8b7..4ec08c12a8 100644 --- a/db4-storage/src/segments/node/segment.rs +++ b/db4-storage/src/segments/node/segment.rs @@ -600,7 +600,12 @@ mod test { let est_size1 = segment.est_size(); assert_eq!(est_size1, 0); - writer.add_outbound_edge(Some(1), LocalPOS(1), VID(3), EID(7).with_layer(STATIC_GRAPH_LAYER_ID)); + writer.add_outbound_edge( + Some(1), + LocalPOS(1), + VID(3), + EID(7).with_layer(STATIC_GRAPH_LAYER_ID), + ); let est_size2 = segment.est_size(); assert!( @@ -608,7 +613,12 @@ mod test { "Estimated size should be greater than 0 after adding an edge" ); - writer.add_inbound_edge(Some(1), LocalPOS(2), VID(4), EID(8).with_layer(STATIC_GRAPH_LAYER_ID)); + writer.add_inbound_edge( + Some(1), + LocalPOS(2), + VID(4), + EID(8).with_layer(STATIC_GRAPH_LAYER_ID), + ); let est_size3 = segment.est_size(); assert!( @@ -618,7 +628,12 @@ mod test { // no change when adding the same edge again - writer.add_outbound_edge::(None, LocalPOS(1), VID(3), EID(7).with_layer(STATIC_GRAPH_LAYER_ID)); + writer.add_outbound_edge::( + None, + LocalPOS(1), + VID(3), + EID(7).with_layer(STATIC_GRAPH_LAYER_ID), + ); let est_size4 = segment.est_size(); assert_eq!( est_size4, est_size3, @@ -633,7 +648,11 @@ mod test { .unwrap() .inner(); - writer.update_c_props(LocalPOS(1), STATIC_GRAPH_LAYER_ID, [(prop_id, Prop::U64(73))]); + writer.update_c_props( + LocalPOS(1), + STATIC_GRAPH_LAYER_ID, + [(prop_id, Prop::U64(73))], + ); let est_size5 = segment.est_size(); assert!( @@ -656,7 +675,12 @@ mod test { .unwrap() .inner(); - writer.add_props(42, LocalPOS(1), STATIC_GRAPH_LAYER_ID, [(prop_id, Prop::F64(4.13))]); + writer.add_props( + 42, + LocalPOS(1), + STATIC_GRAPH_LAYER_ID, + [(prop_id, Prop::F64(4.13))], + ); let est_size7 = segment.est_size(); assert!( @@ -664,7 +688,12 @@ mod test { "Estimated size should increase after adding temporal properties" ); - writer.add_props(72, LocalPOS(1), STATIC_GRAPH_LAYER_ID, [(prop_id, Prop::F64(5.41))]); + writer.add_props( + 72, + LocalPOS(1), + STATIC_GRAPH_LAYER_ID, + [(prop_id, Prop::F64(5.41))], + ); let est_size8 = segment.est_size(); assert!( est_size8 > est_size7, From 85d1703646495d728eef1edcbb83d1cca4b9379b Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Wed, 28 Jan 2026 11:54:15 +0100 Subject: [PATCH 83/95] some cleanup for add_edge --- db4-graph/src/replay.rs | 18 ++- raphtory/src/db/api/mutation/addition_ops.rs | 146 ++++++++++--------- 2 files changed, 89 insertions(+), 75 deletions(-) diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs index 3d4f55e6b4..7ef0084fa6 100644 --- a/db4-graph/src/replay.rs +++ b/db4-graph/src/replay.rs @@ -57,13 +57,10 @@ where // No need to validate props again since they are already validated before // being logged to the WAL. let edge_meta = temporal_graph.edge_meta(); - let mut prop_ids_and_values = Vec::new(); - for (prop_name, prop_id, prop_value) in props.into_iter() { + for (prop_name, prop_id, prop_value) in &props { let prop_mapper = edge_meta.temporal_prop_mapper(); - - prop_mapper.set_id_and_dtype(prop_name, prop_id, prop_value.dtype()); - prop_ids_and_values.push((prop_id, prop_value)); + prop_mapper.set_id_and_dtype(prop_name.as_str(), *prop_id, prop_value.dtype()); } // 2. Insert node ids into resolver. @@ -209,7 +206,16 @@ where } // Add edge into the specified layer with timestamp and props. - edge_writer.add_edge(t, edge_pos, src_id, dst_id, prop_ids_and_values, layer_id); + edge_writer.add_edge( + t, + edge_pos, + src_id, + dst_id, + props + .into_iter() + .map(|(_, prop_id, prop_value)| (prop_id, prop_value)), + layer_id, + ); edge_writer.writer.set_lsn(lsn); } diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index d6f7a8816f..14a6d9799f 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -15,10 +15,13 @@ use raphtory_api::core::{ utils::time::{IntoTimeWithFormat, TryIntoInputTime}, }; use raphtory_core::entities::nodes::node_ref::NodeRef; -use raphtory_storage::mutation::{ - addition_ops::{EdgeWriteLock, InternalAdditionOps}, - durability_ops::DurabilityOps, - MutationError, +use raphtory_storage::{ + core_ops::CoreGraphOps, + mutation::{ + addition_ops::{EdgeWriteLock, InternalAdditionOps}, + durability_ops::DurabilityOps, + MutationError, + }, }; use storage::wal::{GraphWalOps, WalOps}; @@ -255,7 +258,9 @@ impl> + StaticGraphViewOps> Addit props: PII, layer: Option<&str>, ) -> Result, GraphError> { - let transaction_id = self.core_graph().transaction_manager()?.begin_transaction(); + let transaction_manager = self.core_graph().transaction_manager()?; + let wal = self.core_graph().wal()?; + let transaction_id = transaction_manager.begin_transaction(); let session = self.write_session().map_err(|err| err.into())?; let src = src.as_node_ref(); let dst = dst.as_node_ref(); @@ -274,14 +279,8 @@ impl> + StaticGraphViewOps> Addit props.into_iter().map(|(k, v)| (k, v.into())), ) .map_err(into_graph_err)?; - let ti = time_from_input_session(&session, t)?; - let src_id = self.resolve_node(src).map_err(into_graph_err)?; - let dst_id = self.resolve_node(dst).map_err(into_graph_err)?; - let layer_id = self.resolve_layer(layer).map_err(into_graph_err)?; - // FIXME: We are logging node -> node id mappings AFTER they are inserted into the - // resolver. Make sure resolver mapping CANNOT get to disk before Wal. let src_gid = match src { NodeRef::Internal(_) => None, NodeRef::External(gid_ref) => Some(gid_ref), @@ -292,34 +291,40 @@ impl> + StaticGraphViewOps> Addit NodeRef::External(gid_ref) => Some(gid_ref), }; - let src_id = src_id.inner(); - let dst_id = dst_id.inner(); - let layer_id = layer_id.inner(); - - // Hold all locks for src node, dst node and edge until add_edge_op goes out of scope. - let mut add_edge_op = self - .atomic_add_edge(src_id, dst_id, None, layer_id) - .map_err(into_graph_err)?; + // At this point we start modifying the graph, any error after this point is fatal and should + // panic! + let (edge_id, src_id, dst_id, layer_id) = { + // FIXME: We are logging node -> node id mappings AFTER they are inserted into the + // resolver. Make sure resolver mapping CANNOT get to disk before Wal. + let src_id = self.resolve_node(src).map_err(into_graph_err)?; + let dst_id = self.resolve_node(dst).map_err(into_graph_err)?; + let layer_id = self.resolve_layer(layer).map_err(into_graph_err)?; + + let src_id = src_id.inner(); + let dst_id = dst_id.inner(); + let layer_id = layer_id.inner(); + + // Hold all locks for src node, dst node and edge until add_edge_op goes out of scope. + let mut add_edge_op = self + .atomic_add_edge(src_id, dst_id, None, layer_id) + .map_err(into_graph_err)?; + + // NOTE: We log edge id after it is inserted into the edge segment. + // This is fine as long as we hold onto the edge segment lock through add_edge_op + // for the entire operation. + let edge_id = add_edge_op.internal_add_static_edge(src_id, dst_id); + + // All names, ids and values have been generated for this operation. + // Create a wal entry to mark it as durable. + let props_for_wal = props_with_status + .iter() + .map(|maybe_new| { + let (prop_name, prop_id, prop) = maybe_new.as_ref().inner(); + (prop_name.as_ref(), *prop_id, prop.clone()) + }) + .collect::>(); - // NOTE: We log edge id after it is inserted into the edge segment. - // This is fine as long as we hold onto the edge segment lock through add_edge_op - // for the entire operation. - let edge_id = add_edge_op.internal_add_static_edge(src_id, dst_id); - - // All names, ids and values have been generated for this operation. - // Create a wal entry to mark it as durable. - let props_for_wal = props_with_status - .iter() - .map(|maybe_new| { - let (prop_name, prop_id, prop) = maybe_new.as_ref().inner(); - (prop_name.as_ref(), *prop_id, prop.clone()) - }) - .collect::>(); - - let lsn = self - .core_graph() - .wal()? - .log_add_edge( + let lsn = wal.log_add_edge( transaction_id, ti, src_gid, @@ -330,40 +335,43 @@ impl> + StaticGraphViewOps> Addit layer, layer_id, props_for_wal, - ) - .unwrap(); - - let props = props_with_status - .into_iter() - .map(|maybe_new| { - let (_, prop_id, prop) = maybe_new.inner(); - (prop_id, prop) - }) - .collect::>(); - - let edge_id = add_edge_op.internal_add_edge( - ti, - src_id, - dst_id, - edge_id.map(|eid| eid.with_layer(layer_id)), - props, - ); - - add_edge_op.store_src_node_info(src_id, src.as_node_ref().as_gid_ref().left()); - add_edge_op.store_dst_node_info(dst_id, dst.as_node_ref().as_gid_ref().left()); - - // Update the src, dst and edge segments with the lsn of the wal entry. - add_edge_op.set_lsn(lsn); + )?; - self.core_graph() - .transaction_manager()? - .end_transaction(transaction_id); + let props = props_with_status + .into_iter() + .map(|maybe_new| { + let (_, prop_id, prop) = maybe_new.inner(); + (prop_id, prop) + }) + .collect::>(); - // Drop to release all the segment locks. - drop(add_edge_op); + let edge_id = add_edge_op.internal_add_edge( + ti, + src_id, + dst_id, + edge_id.map(|eid| eid.with_layer(layer_id)), + props, + ); + + add_edge_op.store_src_node_info(src_id, src.as_node_ref().as_gid_ref().left()); + add_edge_op.store_dst_node_info(dst_id, dst.as_node_ref().as_gid_ref().left()); + + // Update the src, dst and edge segments with the lsn of the wal entry. + add_edge_op.set_lsn(lsn); - // Flush the wal entry to disk. - self.core_graph().wal()?.flush(lsn).unwrap(); + self.core_graph() + .transaction_manager()? + .end_transaction(transaction_id); + + // Drop to release all the segment locks. + drop(add_edge_op); + + // Flush the wal entry to disk. + // Any error here is fatal + self.core_graph().wal()?.flush(lsn)?; + Ok::<_, GraphError>((edge_id, src_id, dst_id, layer_id)) + } + .unwrap(); Ok(EdgeView::new( self.clone(), From 02264a40bc712a7763a10f2800418c0f3b58005d Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Tue, 27 Jan 2026 17:37:45 +0100 Subject: [PATCH 84/95] json output for GraphFixture --- raphtory/src/test_utils.rs | 251 +++++++++++++++++++++++++++++++---- raphtory/tests/df_loaders.rs | 22 +-- 2 files changed, 242 insertions(+), 31 deletions(-) diff --git a/raphtory/src/test_utils.rs b/raphtory/src/test_utils.rs index e72c9de451..9309ded034 100644 --- a/raphtory/src/test_utils.rs +++ b/raphtory/src/test_utils.rs @@ -24,8 +24,15 @@ use raphtory_storage::{ mutation::addition_ops::{InternalAdditionOps, SessionAdditionOps}, }; use rayon::iter::ParallelIterator; +use serde::{ + de::{SeqAccess, Visitor}, + ser::SerializeSeq, + Deserialize, Deserializer, Serialize, Serializer, +}; use std::{ + borrow::Cow, collections::{hash_map, HashMap}, + fmt::{Debug, Formatter}, ops::{Range, RangeInclusive}, sync::Arc, }; @@ -249,7 +256,7 @@ pub fn assert_valid_graph(fixture: &GraphFixture, graph: &Graph) { Some(updates) => { assert_eq!( node.node_type().as_str(), - updates.node_type, + updates.node_type.as_str(), "mismatched node_type for node {node_id}" ); @@ -535,12 +542,19 @@ pub fn prop_type(nested_prop_size: usize) -> impl Strategy { }) } -#[derive(Debug, Clone)] +#[derive(Clone, PartialEq, Serialize, Deserialize)] pub struct GraphFixture { pub nodes: NodeFixture, pub edges: EdgeFixture, } +impl Debug for GraphFixture { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let json = serde_json::to_string(self).unwrap(); + f.write_str(&json) + } +} + impl GraphFixture { pub fn edges(&self) -> impl Iterator), &EdgeUpdatesFixture)> { self.edges.iter() @@ -551,9 +565,16 @@ impl GraphFixture { } } -#[derive(Debug, Default, Clone)] +#[derive(Default, Clone, PartialEq, Serialize, Deserialize)] pub struct NodeFixture(pub HashMap); +impl Debug for NodeFixture { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let json = serde_json::to_string(self).unwrap(); + f.write_str(&json) + } +} + impl FromIterator<(u64, NodeUpdatesFixture)> for NodeFixture { fn from_iter>(iter: T) -> Self { Self(iter.into_iter().collect()) @@ -575,47 +596,130 @@ impl IntoIterator for NodeFixture { } } -#[derive(Debug, Default, Clone)] +#[derive(Default, Clone, PartialEq, Serialize, Deserialize)] pub struct PropUpdatesFixture { pub t_props: Vec<(i64, Vec<(String, Prop)>)>, pub c_props: Vec<(String, Prop)>, } -#[derive(Debug, Default, Clone)] +impl Debug for PropUpdatesFixture { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let json = serde_json::to_string(self).unwrap(); + f.write_str(&json) + } +} + +#[derive(Default, Clone, PartialEq, Serialize, Deserialize)] pub struct NodeUpdatesFixture { pub props: PropUpdatesFixture, - pub node_type: Option<&'static str>, + pub node_type: Option>, +} + +impl Debug for NodeUpdatesFixture { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let json = serde_json::to_string(self).unwrap(); + f.write_str(&json) + } } -#[derive(Debug, Default, Clone)] +#[derive(Default, Clone, PartialEq, Serialize, Deserialize)] pub struct EdgeUpdatesFixture { pub props: PropUpdatesFixture, pub deletions: Vec, } -#[derive(Debug, Default, Clone)] -pub struct EdgeFixture(pub HashMap<(u64, u64, Option<&'static str>), EdgeUpdatesFixture>); +impl Debug for EdgeUpdatesFixture { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let json = serde_json::to_string(self).unwrap(); + f.write_str(&json) + } +} + +#[derive(Default, Clone, PartialEq)] +pub struct EdgeFixture(pub HashMap<(u64, u64, Option>), EdgeUpdatesFixture>); + +impl Serialize for EdgeFixture { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut seq = serializer.serialize_seq(Some(self.0.len()))?; + for v in self.iter() { + seq.serialize_element(&v)?; + } + seq.end() + } +} + +struct Elements; + +impl<'de> Visitor<'de> for Elements { + type Value = EdgeFixture; + + fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result { + formatter.write_str("a sequence edge updates") + } + + fn visit_seq(self, mut seq: A) -> Result + where + A: SeqAccess<'de>, + { + let mut elements = if let Some(size) = seq.size_hint() { + HashMap::with_capacity(size) + } else { + HashMap::new() + }; + while let Some((next_key, next_value)) = seq.next_element()? { + elements.insert(next_key, next_value); + } + Ok(EdgeFixture(elements)) + } +} + +impl<'de> Deserialize<'de> for EdgeFixture { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_seq(Elements) + } +} + +impl Debug for EdgeFixture { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let json = serde_json::to_string(self).unwrap(); + f.write_str(&json) + } +} impl EdgeFixture { pub fn iter(&self) -> impl Iterator), &EdgeUpdatesFixture)> { - self.0.iter().map(|(k, v)| (*k, v)) + self.0 + .iter() + .map(|((src, dst, layer), v)| ((*src, *dst, layer.as_str()), v)) } } impl IntoIterator for EdgeFixture { - type Item = ((u64, u64, Option<&'static str>), EdgeUpdatesFixture); - type IntoIter = hash_map::IntoIter<(u64, u64, Option<&'static str>), EdgeUpdatesFixture>; + type Item = ((u64, u64, Option>), EdgeUpdatesFixture); + type IntoIter = hash_map::IntoIter<(u64, u64, Option>), EdgeUpdatesFixture>; fn into_iter(self) -> Self::IntoIter { self.0.into_iter() } } -impl FromIterator<((u64, u64, Option<&'static str>), EdgeUpdatesFixture)> for EdgeFixture { - fn from_iter), EdgeUpdatesFixture)>>( +impl>> FromIterator<((u64, u64, Option), EdgeUpdatesFixture)> + for EdgeFixture +{ + fn from_iter), EdgeUpdatesFixture)>>( iter: T, ) -> Self { - Self(iter.into_iter().collect()) + Self( + iter.into_iter() + .map(|((s, d, l), f)| ((s, d, l.map(|l| l.into())), f)) + .collect(), + ) } } @@ -668,8 +772,19 @@ impl From for GraphFixture { } } -impl, Option<&'static str>)>> From - for GraphFixture +impl< + V, + T, + I: IntoIterator< + Item = ( + V, + V, + T, + Vec<(String, Prop)>, + Option>>, + ), + >, + > From for GraphFixture where u64: TryFrom, i64: TryFrom, @@ -679,7 +794,11 @@ where .into_iter() .filter_map(|(src, dst, t, props, layer)| { Some(( - (src.try_into().ok()?, dst.try_into().ok()?, layer), + ( + src.try_into().ok()?, + dst.try_into().ok()?, + layer.map(|l| l.into()), + ), (t.try_into().ok()?, props), )) }) @@ -705,8 +824,12 @@ where } } -pub fn make_node_type() -> impl Strategy> { - proptest::sample::select(vec![None, Some("one"), Some("two")]) +pub fn make_node_type() -> impl Strategy>> { + proptest::sample::select(vec![ + None, + Some(Cow::Borrowed("one")), + Some(Cow::Borrowed("two")), + ]) } pub fn make_node_types() -> impl Strategy> { @@ -817,7 +940,11 @@ pub fn build_edge_list_dyn( ( num_nodes.clone().prop_map(|n| n as u64), num_nodes.clone().prop_map(|n| n as u64), - proptest::sample::select(vec![Some("a"), Some("b"), None]), + proptest::sample::select(vec![ + Some(Cow::Borrowed("a")), + Some(Cow::Borrowed("b")), + None, + ]), ), edge_updates(schema.clone(), num_updates.clone(), del_edges), num_edges.clone(), @@ -944,7 +1071,7 @@ pub fn build_graph(graph_fix: &GraphFixture) -> Arc { } if let Some(node) = g.node(node) { node.add_metadata(updates.props.c_props.clone()).unwrap(); - if let Some(node_type) = updates.node_type { + if let Some(node_type) = updates.node_type.as_str() { node.set_node_type(node_type).unwrap(); } } @@ -1016,7 +1143,7 @@ pub fn build_graph_layer(graph_fix: &GraphFixture, layers: &[&str]) -> Arc, ), ( 1, 2, 12, vec![("a".to_string(), Prop::List(vec![Prop::str("aa")].into()))], - None, + None::, ), ] .into(), @@ -969,7 +969,7 @@ mod parquet_tests { 0, 0, vec![("a".to_string(), Prop::List(vec![Prop::DTime(dt)].into()))], - None, + None::, )] .into(), ); @@ -988,7 +988,7 @@ mod parquet_tests { "a".to_string(), Prop::map([("a", Prop::DTime(dt)), ("b", Prop::str("s"))]), )], - None, + None::, )] .into(), ); @@ -1003,14 +1003,14 @@ mod parquet_tests { 0, 0, vec![("a".to_string(), Prop::map([("a", Prop::I32(1))]))], - None, + None::, ), ( 0, 0, 0, vec![("a".to_string(), Prop::map([("b", Prop::str("x"))]))], - None, + None::, ), ] .into(), @@ -1021,13 +1021,19 @@ mod parquet_tests { fn edges_maps3() { build_and_check_parquet_encoding( [ - (0, 0, 0, vec![("a".to_string(), Prop::U8(5))], None), + ( + 0, + 0, + 0, + vec![("a".to_string(), Prop::U8(5))], + None::, + ), ( 0, 0, 0, vec![("b".to_string(), Prop::map([("c", Prop::U8(66))]))], - None, + None::, ), ] .into(), From e991fe1cc21e812cd622e7067079056f135b9748 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Wed, 28 Jan 2026 13:30:01 +0100 Subject: [PATCH 85/95] refactor the config --- db4-graph/src/replay.rs | 12 +- db4-storage/src/pages/edge_store.rs | 4 +- db4-storage/src/pages/mod.rs | 23 +-- db4-storage/src/pages/node_store.rs | 4 +- db4-storage/src/persist/config.rs | 139 +++++------------- db4-storage/src/persist/strategy.rs | 6 +- db4-storage/src/segments/edge/segment.rs | 2 +- db4-storage/src/segments/node/segment.rs | 6 +- db4-storage/src/wal/entry.rs | 2 +- .../src/mutation/addition_ops_ext.rs | 5 +- 10 files changed, 65 insertions(+), 138 deletions(-) diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs index 7ef0084fa6..9293d7385b 100644 --- a/db4-graph/src/replay.rs +++ b/db4-graph/src/replay.rs @@ -42,16 +42,8 @@ where props: Vec<(String, usize, Prop)>, ) -> Result<(), StorageError> { let temporal_graph = self.graph(); - let node_max_page_len = temporal_graph - .extension() - .config() - .persistence() - .max_node_page_len(); - let edge_max_page_len = temporal_graph - .extension() - .config() - .persistence() - .max_edge_page_len(); + let node_max_page_len = temporal_graph.extension().config().max_node_page_len(); + let edge_max_page_len = temporal_graph.extension().config().max_edge_page_len(); // 1. Insert prop ids into edge meta. // No need to validate props again since they are already validated before diff --git a/db4-storage/src/pages/edge_store.rs b/db4-storage/src/pages/edge_store.rs index e4446b57a8..f773a8103c 100644 --- a/db4-storage/src/pages/edge_store.rs +++ b/db4-storage/src/pages/edge_store.rs @@ -223,7 +223,7 @@ impl, EXT: PersistenceStrategy> EdgeStorageI pub fn load(edges_path: impl AsRef, ext: EXT) -> Result { let edges_path = edges_path.as_ref(); - let max_page_len = ext.config().persistence().max_edge_page_len(); + let max_page_len = ext.config().max_edge_page_len(); let meta = Arc::new(Meta::new_for_edges()); @@ -417,7 +417,7 @@ impl, EXT: PersistenceStrategy> EdgeStorageI #[inline(always)] pub fn max_page_len(&self) -> u32 { - self.ext.config().persistence().max_edge_page_len() + self.ext.config().max_edge_page_len() } pub fn write_locked<'a>(&'a self) -> WriteLockedEdgePages<'a, ES> { diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index 1667f9ab7a..92b99b0a65 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -75,6 +75,11 @@ impl< > GraphStore { pub fn flush(&self) -> Result<(), StorageError> { + let node_types = self.nodes.prop_meta().get_all_node_types(); + let config = self.ext.config().with_node_types(node_types); + if let Some(graph_dir) = self.graph_dir.as_ref() { + config.save_to_dir(graph_dir)?; + } self.nodes.flush()?; self.edges.flush()?; self.graph_props.flush()?; @@ -171,7 +176,7 @@ impl< let graph_prop_storage = Arc::new(GraphPropStorageInner::load(graph_props_path, ext.clone())?); - for node_type in ext.config().persistence().node_types().iter() { + for node_type in ext.config().node_types().iter() { node_meta.get_or_create_node_type_id(node_type); } @@ -598,7 +603,7 @@ mod test { check_graph_with_props_support, edges_strat, edges_strat_with_layers, make_edges, make_nodes, }, - persist::{config::NoOpConfig, strategy::PersistenceStrategy}, + persist::{config::BaseConfig, strategy::PersistenceStrategy}, wal::no_wal::NoWal, }; use chrono::DateTime; @@ -638,7 +643,7 @@ mod test { .collect(); check_edges_support(edges, par_load, false, |graph_dir| { - let config = NoOpConfig::new(chunk_size, chunk_size); + let config = BaseConfig::new(chunk_size, chunk_size); Layer::new(Some(graph_dir), Extension::new(config, Arc::new(NoWal))) }) } @@ -649,7 +654,7 @@ mod test { par_load: bool, ) { check_edges_support(edges, par_load, false, |graph_dir| { - let config = NoOpConfig::new(chunk_size, chunk_size); + let config = BaseConfig::new(chunk_size, chunk_size); Layer::new(Some(graph_dir), Extension::new(config, Arc::new(NoWal))) }) } @@ -722,7 +727,7 @@ mod test { #[test] fn test_add_one_edge_get_num_nodes() { let graph_dir = tempfile::tempdir().unwrap(); - let config = NoOpConfig::new(32, 32); + let config = BaseConfig::new(32, 32); let g = Layer::new( Some(graph_dir.path()), Extension::new(config, Arc::new(NoWal)), @@ -734,7 +739,7 @@ mod test { #[test] fn test_node_additions_1() { let graph_dir = tempfile::tempdir().unwrap(); - let config = NoOpConfig::new(32, 32); + let config = BaseConfig::new(32, 32); let g = GraphStore::new( Some(graph_dir.path()), Extension::new(config, Arc::new(NoWal)), @@ -780,7 +785,7 @@ mod test { #[test] fn node_temporal_props() { let graph_dir = tempfile::tempdir().unwrap(); - let config = NoOpConfig::new(32, 32); + let config = BaseConfig::new(32, 32); let g = Layer::new( Some(graph_dir.path()), Extension::new(config, Arc::new(NoWal)), @@ -1587,14 +1592,14 @@ mod test { fn check_graph_with_nodes(node_page_len: u32, edge_page_len: u32, fixture: &NodeFixture) { check_graph_with_nodes_support(fixture, false, |path| { - let config = NoOpConfig::new(node_page_len, edge_page_len); + let config = BaseConfig::new(node_page_len, edge_page_len); Layer::new(Some(path), Extension::new(config, Arc::new(NoWal))) }); } fn check_graph_with_props(node_page_len: u32, edge_page_len: u32, fixture: &Fixture) { check_graph_with_props_support(fixture, false, |path| { - let config = NoOpConfig::new(node_page_len, edge_page_len); + let config = BaseConfig::new(node_page_len, edge_page_len); Layer::new(Some(path), Extension::new(config, Arc::new(NoWal))) }); } diff --git a/db4-storage/src/pages/node_store.rs b/db4-storage/src/pages/node_store.rs index 9cdf85afaa..6053b18688 100644 --- a/db4-storage/src/pages/node_store.rs +++ b/db4-storage/src/pages/node_store.rs @@ -161,7 +161,7 @@ impl NodeStorageInner { } pub fn max_segment_len(&self) -> u32 { - self.ext.config().persistence().max_node_page_len() + self.ext.config().max_node_page_len() } } @@ -336,7 +336,7 @@ impl, EXT: PersistenceStrategy> NodeStorageI ext: EXT, ) -> Result { let nodes_path = nodes_path.as_ref(); - let max_page_len = ext.config().persistence().max_node_page_len(); + let max_page_len = ext.config().max_node_page_len(); let node_meta = Arc::new(Meta::new_for_nodes()); if !nodes_path.exists() { diff --git a/db4-storage/src/persist/config.rs b/db4-storage/src/persist/config.rs index 8babe4d7af..11443c1689 100644 --- a/db4-storage/src/persist/config.rs +++ b/db4-storage/src/persist/config.rs @@ -1,142 +1,75 @@ use crate::error::StorageError; -use serde::{Deserialize, Serialize}; +use serde::{Deserialize, Serialize, de::DeserializeOwned}; use std::path::Path; pub const DEFAULT_MAX_PAGE_LEN_NODES: u32 = 131_072; // 2^17 pub const DEFAULT_MAX_PAGE_LEN_EDGES: u32 = 1_048_576; // 2^20 pub const DEFAULT_MAX_MEMORY_BYTES: usize = 32 * 1024 * 1024; +pub const CONFIG_FILE: &str = "config.json"; -pub trait ConfigOps: Serialize + Deserialize<'static> { - fn persistence(&self) -> &PersistenceConfig; +pub trait ConfigOps: Serialize + DeserializeOwned { + fn max_node_page_len(&self) -> u32; - fn load_from_dir(dir: impl AsRef) -> Result; - fn save_to_dir(&self, dir: impl AsRef) -> Result<(), StorageError>; -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PersistenceConfig { - max_node_page_len: u32, - max_edge_page_len: u32, - max_memory_bytes: usize, - bg_flush: bool, - node_types: Vec, -} - -impl Default for PersistenceConfig { - fn default() -> Self { - Self { - max_node_page_len: DEFAULT_MAX_PAGE_LEN_NODES, - max_edge_page_len: DEFAULT_MAX_PAGE_LEN_EDGES, - max_memory_bytes: DEFAULT_MAX_MEMORY_BYTES, - bg_flush: false, - node_types: Vec::new(), - } - } -} - -impl PersistenceConfig { - const CONFIG_FILE: &str = "persistence_config.json"; + fn max_edge_page_len(&self) -> u32; - pub fn new_with_memory(max_memory_bytes: usize) -> Self { - Self { - max_memory_bytes, - ..Default::default() - } - } + fn node_types(&self) -> &[String]; - pub fn new_with_page_lens( - max_memory_bytes: usize, - max_node_page_len: u32, - max_edge_page_len: u32, - ) -> Self { - Self { - max_memory_bytes, - max_node_page_len, - max_edge_page_len, - ..Default::default() - } - } + fn with_node_types(&self, node_types: impl IntoIterator>) -> Self; - pub fn load_from_dir(dir: impl AsRef) -> Result { - let config_file = dir.as_ref().join(Self::CONFIG_FILE); + fn load_from_dir(dir: impl AsRef) -> Result { + let config_file = dir.as_ref().join(CONFIG_FILE); let config_file = std::fs::File::open(config_file)?; let config = serde_json::from_reader(config_file)?; Ok(config) } - pub fn save_to_dir(&self, dir: impl AsRef) -> Result<(), StorageError> { - let config_file = dir.as_ref().join(Self::CONFIG_FILE); + fn save_to_dir(&self, dir: impl AsRef) -> Result<(), StorageError> { + let config_file = dir.as_ref().join(CONFIG_FILE); let config_file = std::fs::File::create(&config_file)?; serde_json::to_writer_pretty(config_file, self)?; Ok(()) } - - pub fn with_bg_flush(mut self) -> Self { - self.bg_flush = true; - self - } - - pub fn with_node_types(&self, types: impl IntoIterator>) -> Self { - let node_types = types.into_iter().map(|s| s.as_ref().to_string()).collect(); - - Self { - node_types, - ..*self - } - } - - pub fn max_node_page_len(&self) -> u32 { - self.max_node_page_len - } - - pub fn max_edge_page_len(&self) -> u32 { - self.max_edge_page_len - } - - pub fn max_memory_bytes(&self) -> usize { - self.max_memory_bytes - } - - pub fn bg_flush(&self) -> bool { - self.bg_flush - } - - pub fn node_types(&self) -> &[String] { - &self.node_types - } } -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct NoOpConfig { - persistence: PersistenceConfig, +#[derive(Debug, Copy, Clone, Serialize, Deserialize)] +pub struct BaseConfig { + max_node_page_len: u32, + max_edge_page_len: u32, } -impl NoOpConfig { +impl BaseConfig { pub fn new(max_node_page_len: u32, max_edge_page_len: u32) -> Self { Self { - persistence: PersistenceConfig { - max_node_page_len, - max_edge_page_len, - ..NoOpConfig::default().persistence - }, + max_node_page_len, + max_edge_page_len, } } } -impl Default for NoOpConfig { +impl Default for BaseConfig { fn default() -> Self { Self { - persistence: PersistenceConfig { - max_memory_bytes: usize::MAX, - ..PersistenceConfig::default() - }, + max_node_page_len: DEFAULT_MAX_PAGE_LEN_NODES, + max_edge_page_len: DEFAULT_MAX_PAGE_LEN_EDGES, } } } -impl ConfigOps for NoOpConfig { - fn persistence(&self) -> &PersistenceConfig { - &self.persistence +impl ConfigOps for BaseConfig { + fn max_node_page_len(&self) -> u32 { + self.max_node_page_len + } + + fn max_edge_page_len(&self) -> u32 { + self.max_edge_page_len + } + + fn node_types(&self) -> &[String] { + &[] + } + + fn with_node_types(&self, _node_types: impl IntoIterator>) -> Self { + *self } fn load_from_dir(_dir: impl AsRef) -> Result { diff --git a/db4-storage/src/persist/strategy.rs b/db4-storage/src/persist/strategy.rs index 46866ff017..617b85a576 100644 --- a/db4-storage/src/persist/strategy.rs +++ b/db4-storage/src/persist/strategy.rs @@ -1,6 +1,6 @@ use crate::{ api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, - persist::config::{ConfigOps, NoOpConfig}, + persist::config::{BaseConfig, ConfigOps}, segments::{ edge::segment::{EdgeSegmentView, MemEdgeSegment}, graph_prop::{GraphPropSegmentView, segment::MemGraphPropSegment}, @@ -50,7 +50,7 @@ pub trait PersistenceStrategy: Debug + Clone + Send + Sync + 'static { #[derive(Debug, Clone)] pub struct NoOpStrategy { - config: NoOpConfig, + config: BaseConfig, wal: Arc, } @@ -59,7 +59,7 @@ impl PersistenceStrategy for NoOpStrategy { type NS = NodeSegmentView; type GS = GraphPropSegmentView; type Wal = NoWal; - type Config = NoOpConfig; + type Config = BaseConfig; fn new(config: Self::Config, wal: Arc) -> Self { Self { config, wal } diff --git a/db4-storage/src/segments/edge/segment.rs b/db4-storage/src/segments/edge/segment.rs index b60850bd81..94347dafbe 100644 --- a/db4-storage/src/segments/edge/segment.rs +++ b/db4-storage/src/segments/edge/segment.rs @@ -424,7 +424,7 @@ impl>> EdgeSegmentOps for EdgeSeg } fn new(page_id: usize, meta: Arc, _path: Option, ext: Self::Extension) -> Self { - let max_page_len = ext.config().persistence().max_edge_page_len(); + let max_page_len = ext.config().max_edge_page_len(); Self { segment: parking_lot::RwLock::new(MemEdgeSegment::new(page_id, max_page_len, meta)) diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs index 4ec08c12a8..d4e1909b99 100644 --- a/db4-storage/src/segments/node/segment.rs +++ b/db4-storage/src/segments/node/segment.rs @@ -441,7 +441,7 @@ impl>> NodeSegmentOps for NodeSeg _path: Option, ext: Self::Extension, ) -> Self { - let max_page_len = ext.config().persistence().max_node_page_len(); + let max_page_len = ext.config().max_node_page_len(); let inner = RwLock::new(MemNodeSegment::new(segment_id, max_page_len, meta)); let inner = Arc::new(inner); @@ -565,7 +565,7 @@ mod test { api::nodes::NodeSegmentOps, pages::{layer_counter::GraphStats, node_page::writer::NodeWriter}, persist::{ - config::NoOpConfig, + config::BaseConfig, strategy::{NoOpStrategy, PersistenceStrategy}, }, wal::no_wal::NoWal, @@ -583,7 +583,7 @@ mod test { let node_meta = Arc::new(Meta::default()); let edge_meta = Arc::new(Meta::default()); let path = tempdir().unwrap(); - let config = NoOpConfig::new(10, 10); + let config = BaseConfig::new(10, 10); let ext = NoOpStrategy::new(config, Arc::new(NoWal)); let segment_id = 0; let segment = NodeSegmentView::new( diff --git a/db4-storage/src/wal/entry.rs b/db4-storage/src/wal/entry.rs index 10a04dbad1..f56e2cdc3c 100644 --- a/db4-storage/src/wal/entry.rs +++ b/db4-storage/src/wal/entry.rs @@ -1,6 +1,6 @@ use raphtory_api::core::entities::{GidRef, properties::prop::Prop}; use raphtory_core::{ - entities::{EID, GID, VID}, + entities::{EID, VID}, storage::timeindex::EventTime, }; diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index 0984a4cdbf..ed99cb3e9f 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -220,10 +220,7 @@ impl InternalAdditionOps for TemporalGraph { self.event_counter .fetch_add(1, std::sync::atomic::Ordering::Relaxed), ); - pos.as_vid( - seg, - self.extension().config().persistence().max_node_page_len(), - ) + pos.as_vid(seg, self.extension().config().max_node_page_len()) })?; Ok(id) From 2edfb7e99cbe20082b484d8b5d6142b749f5891d Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Wed, 28 Jan 2026 14:07:03 +0100 Subject: [PATCH 86/95] check for self-loops in materialize --- raphtory/src/db/api/view/graph.rs | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index 4ba0765039..c076c9ee6b 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -501,19 +501,21 @@ fn materialize_impl( } for e in edge.explode() { - if let Some(node_pos) = maybe_src_pos { + if let Some(src_pos) = maybe_src_pos { let mut writer = shard.writer(); let t = e.time().expect("exploded edge should have time"); let l = layer_map[e.edge.layer().unwrap()]; - writer.update_timestamp(t, node_pos, eid.with_layer(l), 0); + writer.update_timestamp(t, src_pos, eid.with_layer(l), 0); } - if let Some(node_pos) = maybe_dst_pos { - let mut writer = shard.writer(); + if let Some(dst_pos) = maybe_dst_pos { + if maybe_src_pos.is_none_or(|src_pos| src_pos != dst_pos) { + let mut writer = shard.writer(); - let t = e.time().expect("exploded edge should have time"); - let l = layer_map[e.edge.layer().unwrap()]; - writer.update_timestamp(t, node_pos, eid.with_layer(l), 0); + let t = e.time().expect("exploded edge should have time"); + let l = layer_map[e.edge.layer().unwrap()]; + writer.update_timestamp(t, dst_pos, eid.with_layer(l), 0); + } } } @@ -525,13 +527,15 @@ fn materialize_impl( graph.layer_ids(), ) { let layer = layer_map[layer]; - if let Some(node_pos) = maybe_src_pos { + if let Some(src_pos) = maybe_src_pos { let mut writer = shard.writer(); - writer.update_timestamp(t, node_pos, eid.with_layer_deletion(layer), 0); + writer.update_timestamp(t, src_pos, eid.with_layer_deletion(layer), 0); } - if let Some(node_pos) = maybe_dst_pos { - let mut writer = shard.writer(); - writer.update_timestamp(t, node_pos, eid.with_layer_deletion(layer), 0); + if let Some(dst_pos) = maybe_dst_pos { + if maybe_src_pos.is_none_or(|src_pos| src_pos != dst_pos) { + let mut writer = shard.writer(); + writer.update_timestamp(t, dst_pos, eid.with_layer_deletion(layer), 0); + } } } } From 66036dd1570127c599b3e7bf202c87b72358e2c7 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Wed, 28 Jan 2026 15:12:05 +0100 Subject: [PATCH 87/95] property types need to be unified --- db4-graph/src/replay.rs | 19 +++++++++++++++++-- .../src/core/entities/properties/meta.rs | 6 +++++- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs index 9293d7385b..06cb346271 100644 --- a/db4-graph/src/replay.rs +++ b/db4-graph/src/replay.rs @@ -5,7 +5,10 @@ use crate::WriteLockedGraph; use raphtory_api::core::{ entities::{ - properties::{meta::STATIC_GRAPH_LAYER_ID, prop::Prop}, + properties::{ + meta::STATIC_GRAPH_LAYER_ID, + prop::{check_for_unification, unify_types, Prop, PropType}, + }, EID, GID, VID, }, storage::timeindex::EventTime, @@ -52,7 +55,19 @@ where for (prop_name, prop_id, prop_value) in &props { let prop_mapper = edge_meta.temporal_prop_mapper(); - prop_mapper.set_id_and_dtype(prop_name.as_str(), *prop_id, prop_value.dtype()); + match prop_mapper.get_dtype(*prop_id) { + None => { + prop_mapper.set_id_and_dtype(prop_name.as_str(), *prop_id, prop_value.dtype()); + } + Some(old_dtype) => { + let dtype = prop_value.dtype(); + let mut unified = false; + let new_dtype = unify_types(&old_dtype, &dtype, &mut unified)?; + if unified { + prop_mapper.set_dtype(*prop_id, new_dtype); + } + } + } } // 2. Insert node ids into resolver. diff --git a/raphtory-api/src/core/entities/properties/meta.rs b/raphtory-api/src/core/entities/properties/meta.rs index 73d06784f7..c25621f855 100644 --- a/raphtory-api/src/core/entities/properties/meta.rs +++ b/raphtory-api/src/core/entities/properties/meta.rs @@ -332,8 +332,12 @@ impl PropMapper { } pub fn set_id_and_dtype(&self, key: impl Into, id: usize, dtype: PropType) { - let mut dtypes = self.dtypes.write(); self.set_id(key, id); + self.set_dtype(id, dtype); + } + + pub fn set_dtype(&self, id: usize, dtype: PropType) { + let mut dtypes = self.dtypes.write(); if dtypes.len() <= id { dtypes.resize(id + 1, PropType::Empty); } From 34115cad4b4926880bca2534ba805ed685ad6524 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Wed, 28 Jan 2026 15:20:59 +0100 Subject: [PATCH 88/95] move the property meta updates under the edge segment lsn --- db4-graph/src/replay.rs | 54 ++++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs index 06cb346271..3422749ff4 100644 --- a/db4-graph/src/replay.rs +++ b/db4-graph/src/replay.rs @@ -51,24 +51,7 @@ where // 1. Insert prop ids into edge meta. // No need to validate props again since they are already validated before // being logged to the WAL. - let edge_meta = temporal_graph.edge_meta(); - for (prop_name, prop_id, prop_value) in &props { - let prop_mapper = edge_meta.temporal_prop_mapper(); - match prop_mapper.get_dtype(*prop_id) { - None => { - prop_mapper.set_id_and_dtype(prop_name.as_str(), *prop_id, prop_value.dtype()); - } - Some(old_dtype) => { - let dtype = prop_value.dtype(); - let mut unified = false; - let new_dtype = unify_types(&old_dtype, &dtype, &mut unified)?; - if unified { - prop_mapper.set_dtype(*prop_id, new_dtype); - } - } - } - } // 2. Insert node ids into resolver. if let Some(src_name) = src_name.as_ref() { @@ -83,15 +66,7 @@ where .set(dst_name.as_ref(), dst_id)?; } - // 3. Insert layer id into the layer meta of both edge and node. - let node_meta = temporal_graph.node_meta(); - edge_meta - .layer_meta() - .set_id(layer_name.as_deref().unwrap_or("_default"), layer_id); - node_meta - .layer_meta() - .set_id(layer_name.as_deref().unwrap_or("_default"), layer_id); // 4. Grab src writer and add edge data. let (src_segment_id, src_pos) = resolve_pos(src_id, node_max_page_len); @@ -200,6 +175,35 @@ where // Replay this entry only if it doesn't exist in immut. if immut_lsn < lsn { + let edge_meta = temporal_graph.edge_meta(); + + for (prop_name, prop_id, prop_value) in &props { + let prop_mapper = edge_meta.temporal_prop_mapper(); + match prop_mapper.get_dtype(*prop_id) { + None => { + prop_mapper.set_id_and_dtype(prop_name.as_str(), *prop_id, prop_value.dtype()); + } + Some(old_dtype) => { + let dtype = prop_value.dtype(); + let mut unified = false; + let new_dtype = unify_types(&old_dtype, &dtype, &mut unified)?; + if unified { + prop_mapper.set_dtype(*prop_id, new_dtype); + } + } + } + } + + // 3. Insert layer id into the layer meta of both edge and node. + let node_meta = temporal_graph.node_meta(); + + edge_meta + .layer_meta() + .set_id(layer_name.as_deref().unwrap_or("_default"), layer_id); + node_meta + .layer_meta() + .set_id(layer_name.as_deref().unwrap_or("_default"), layer_id); + let mut edge_writer = self.edges.get_mut(edge_segment_id).unwrap().writer(); let is_new_edge_static = edge_writer From 8e93a06742e2e6fab32fb325a3fc8ad2c628cd20 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Wed, 28 Jan 2026 10:08:46 -0500 Subject: [PATCH 89/95] Fix borrow checker issues in replay --- db4-graph/src/replay.rs | 15 ++++++--------- raphtory-storage/src/graph/graph.rs | 2 +- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs index 3422749ff4..5dc4f4324f 100644 --- a/db4-graph/src/replay.rs +++ b/db4-graph/src/replay.rs @@ -44,9 +44,8 @@ where layer_id: usize, props: Vec<(String, usize, Prop)>, ) -> Result<(), StorageError> { - let temporal_graph = self.graph(); - let node_max_page_len = temporal_graph.extension().config().max_node_page_len(); - let edge_max_page_len = temporal_graph.extension().config().max_edge_page_len(); + let node_max_page_len = self.graph().extension().config().max_node_page_len(); + let edge_max_page_len = self.graph().extension().config().max_edge_page_len(); // 1. Insert prop ids into edge meta. // No need to validate props again since they are already validated before @@ -55,19 +54,17 @@ where // 2. Insert node ids into resolver. if let Some(src_name) = src_name.as_ref() { - temporal_graph + self.graph() .logical_to_physical .set(src_name.as_ref(), src_id)?; } if let Some(dst_name) = dst_name.as_ref() { - temporal_graph + self.graph() .logical_to_physical .set(dst_name.as_ref(), dst_id)?; } - - // 4. Grab src writer and add edge data. let (src_segment_id, src_pos) = resolve_pos(src_id, node_max_page_len); let resize_vid = VID::from(src_id.index() + 1); @@ -175,7 +172,7 @@ where // Replay this entry only if it doesn't exist in immut. if immut_lsn < lsn { - let edge_meta = temporal_graph.edge_meta(); + let edge_meta = self.graph().edge_meta(); for (prop_name, prop_id, prop_value) in &props { let prop_mapper = edge_meta.temporal_prop_mapper(); @@ -195,7 +192,7 @@ where } // 3. Insert layer id into the layer meta of both edge and node. - let node_meta = temporal_graph.node_meta(); + let node_meta = self.graph().node_meta(); edge_meta .layer_meta() diff --git a/raphtory-storage/src/graph/graph.rs b/raphtory-storage/src/graph/graph.rs index 46795e1bca..830619c714 100644 --- a/raphtory-storage/src/graph/graph.rs +++ b/raphtory-storage/src/graph/graph.rs @@ -8,7 +8,7 @@ use crate::{ locked::LockedGraph, nodes::{nodes::NodesStorage, nodes_ref::NodesStorageEntry}, }, - mutation::{durability_ops::DurabilityOps, MutationError}, + mutation::{MutationError}, }; use db4_graph::TemporalGraph; use raphtory_api::core::entities::{properties::meta::Meta, LayerIds, LayerVariants, EID, VID}; From af22b61b32e29ad5e8c9fa54fa83fcd2d12d1c30 Mon Sep 17 00:00:00 2001 From: Fadhil Abubaker Date: Wed, 28 Jan 2026 10:23:10 -0500 Subject: [PATCH 90/95] Move DEFAULT_MAX_MEMORY_BYTES to private --- db4-storage/src/persist/config.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/db4-storage/src/persist/config.rs b/db4-storage/src/persist/config.rs index 11443c1689..3958de59d7 100644 --- a/db4-storage/src/persist/config.rs +++ b/db4-storage/src/persist/config.rs @@ -4,7 +4,6 @@ use std::path::Path; pub const DEFAULT_MAX_PAGE_LEN_NODES: u32 = 131_072; // 2^17 pub const DEFAULT_MAX_PAGE_LEN_EDGES: u32 = 1_048_576; // 2^20 -pub const DEFAULT_MAX_MEMORY_BYTES: usize = 32 * 1024 * 1024; pub const CONFIG_FILE: &str = "config.json"; pub trait ConfigOps: Serialize + DeserializeOwned { From cab894c0426538c247f80e61d3ebf9189bfbb156 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Thu, 29 Jan 2026 10:52:01 +0100 Subject: [PATCH 91/95] move the count checks to the end as the other ones are more helpful for debugging --- raphtory/src/test_utils.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/raphtory/src/test_utils.rs b/raphtory/src/test_utils.rs index 84291cbb09..f19b796361 100644 --- a/raphtory/src/test_utils.rs +++ b/raphtory/src/test_utils.rs @@ -193,18 +193,6 @@ pub fn assert_valid_graph(fixture: &GraphFixture, graph: &Graph) { expected_node_ids, actual_node_ids ); - assert_eq!( - expected_edge_pairs.len(), - graph.count_edges(), - "mismatched number of unique edges (src,dst) pairs" - ); - - assert_eq!( - expected_exploded_edge_count, - graph.count_temporal_edges(), - "mismatched number of temporal (exploded) edge events" - ); - for ((_, _, layer), _) in &expected_edge_layer_updates { assert!( graph.has_layer(layer.as_ref()), @@ -357,6 +345,18 @@ pub fn assert_valid_graph(fixture: &GraphFixture, graph: &Graph) { panic!("graph should have edge {src}->{dst} in layer {layer_name:?}") }); } + + assert_eq!( + expected_edge_pairs.len(), + graph.count_edges(), + "mismatched number of unique edges (src,dst) pairs" + ); + + assert_eq!( + expected_exploded_edge_count, + graph.count_temporal_edges(), + "mismatched number of temporal (exploded) edge events" + ); } #[macro_export] From 714ad3fb59d029c6dcf91d17165450721c6bc3b8 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Thu, 29 Jan 2026 14:01:42 +0100 Subject: [PATCH 92/95] serialize PropArray as ipc --- raphtory-api/Cargo.toml | 1 + .../entities/properties/prop/prop_array.rs | 89 +++++++++++++++++-- 2 files changed, 81 insertions(+), 9 deletions(-) diff --git a/raphtory-api/Cargo.toml b/raphtory-api/Cargo.toml index 4c632688a9..b838234db8 100644 --- a/raphtory-api/Cargo.toml +++ b/raphtory-api/Cargo.toml @@ -46,6 +46,7 @@ display-error-chain = { workspace = true, optional = true } [dev-dependencies] proptest.workspace = true +serde_json.workspace = true [features] # Enables generating the pyo3 python bindings diff --git a/raphtory-api/src/core/entities/properties/prop/prop_array.rs b/raphtory-api/src/core/entities/properties/prop/prop_array.rs index e2dea8117d..d7365412dc 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_array.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_array.rs @@ -6,11 +6,14 @@ use crate::{ }; use arrow_array::{ cast::AsArray, types::*, Array, ArrayRef, ArrowPrimitiveType, OffsetSizeTrait, PrimitiveArray, + RecordBatch, }; -use arrow_schema::{DataType, Field, Fields, TimeUnit}; -use serde::{ser::SerializeSeq, Deserialize, Deserializer, Serialize, Serializer}; +use arrow_ipc::{reader::FileReader, writer::FileWriter}; +use arrow_schema::{DataType, Field, Fields, Schema, TimeUnit}; +use serde::{de, ser, ser::SerializeSeq, Deserialize, Deserializer, Serialize, Serializer}; use std::{ hash::{Hash, Hasher}, + io::Cursor, sync::Arc, }; @@ -20,6 +23,12 @@ pub enum PropArray { Array(ArrayRef), } +#[derive(Debug, Clone, Deserialize, Serialize)] +enum SerializedPropArray { + Vec(Arc<[Prop]>), + Array(Vec), +} + impl Default for PropArray { fn default() -> Self { PropArray::Vec(vec![].into()) @@ -199,11 +208,27 @@ impl Serialize for PropArray { where S: Serializer, { - let mut state = serializer.serialize_seq(Some(self.len()))?; - for prop in self.iter_all() { - state.serialize_element(&prop)?; - } - state.end() + let serializable = match self { + PropArray::Vec(inner) => SerializedPropArray::Vec(inner.clone()), + PropArray::Array(array) => { + let mut bytes = Vec::new(); + let cursor = Cursor::new(&mut bytes); + let schema = + Schema::new(vec![Field::new("value", array.data_type().clone(), true)]); + let mut writer = FileWriter::try_new(cursor, &schema) + .map_err(|err| ser::Error::custom(err.to_string()))?; + let batch = RecordBatch::try_new(schema.into(), vec![array.clone()]) + .map_err(|err| ser::Error::custom(err.to_string()))?; + writer + .write(&batch) + .map_err(|err| ser::Error::custom(err.to_string()))?; + writer + .finish() + .map_err(|err| ser::Error::custom(err.to_string()))?; + SerializedPropArray::Array(bytes) + } + }; + serializable.serialize(serializer) } } @@ -212,8 +237,29 @@ impl<'de> Deserialize<'de> for PropArray { where D: Deserializer<'de>, { - let data = >::deserialize(deserializer)?; - Ok(PropArray::Vec(data.into())) + let data = SerializedPropArray::deserialize(deserializer)?; + let deserialized = match data { + SerializedPropArray::Vec(res) => PropArray::Vec(res), + SerializedPropArray::Array(bytes) => { + let cursor = Cursor::new(bytes); + let mut reader = FileReader::try_new(cursor, None) + .map_err(|err| de::Error::custom(err.to_string()))?; + let batch = reader.next().ok_or_else(|| { + de::Error::custom( + "Failed to deserialize PropArray: Array data missing.".to_owned(), + ) + })?; + let batch = batch.map_err(|err| de::Error::custom(err.to_string()))?; + let (_, arrays, _) = batch.into_parts(); + let array = arrays.into_iter().next().ok_or_else(|| { + de::Error::custom( + "Failed to deserialize PropArray: Array data missing.".to_owned(), + ) + })?; + PropArray::Array(array) + } + }; + Ok(deserialized) } } @@ -339,3 +385,28 @@ impl PropArrayUnwrap for Prop { } } } + +#[cfg(test)] +mod test { + use crate::core::entities::properties::prop::{Prop, PropArray}; + use arrow_array::Int64Array; + use std::sync::Arc; + + #[test] + fn test_prop_array_json() { + let array = PropArray::Array(Arc::new(Int64Array::from(vec![0, 1, 2]))); + let json = serde_json::to_string(&array).unwrap(); + println!("{json}"); + let recovered: PropArray = serde_json::from_str(&json).unwrap(); + assert_eq!(array, recovered); + } + + #[test] + fn test_prop_array_list_json() { + let array = PropArray::Vec([Prop::U64(1), Prop::U64(2)].into()); + let json = serde_json::to_string(&array).unwrap(); + println!("{json}"); + let recovered: PropArray = serde_json::from_str(&json).unwrap(); + assert_eq!(array, recovered); + } +} From e81f78d985f3424c6fdd14909daaa572d98ffeb5 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Thu, 29 Jan 2026 14:24:32 +0100 Subject: [PATCH 93/95] postcard cannot handle the multi-deserialize in BigDecimal --- raphtory-api/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/raphtory-api/Cargo.toml b/raphtory-api/Cargo.toml index b838234db8..d768c1f2b7 100644 --- a/raphtory-api/Cargo.toml +++ b/raphtory-api/Cargo.toml @@ -17,7 +17,7 @@ edition.workspace = true [dependencies] serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true, optional = true } -bigdecimal = { workspace = true } +bigdecimal = { workspace = true, features = ["string-only"] } thiserror = { workspace = true } bytemuck = { workspace = true } chrono.workspace = true From 88aaa056152b8f492eba250f4d1ad34a8a4e79f5 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Thu, 29 Jan 2026 14:43:45 +0100 Subject: [PATCH 94/95] fmt --- db4-graph/src/replay.rs | 7 +++++-- raphtory-storage/src/graph/graph.rs | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs index 5dc4f4324f..5b2c4941f1 100644 --- a/db4-graph/src/replay.rs +++ b/db4-graph/src/replay.rs @@ -51,7 +51,6 @@ where // No need to validate props again since they are already validated before // being logged to the WAL. - // 2. Insert node ids into resolver. if let Some(src_name) = src_name.as_ref() { self.graph() @@ -178,7 +177,11 @@ where let prop_mapper = edge_meta.temporal_prop_mapper(); match prop_mapper.get_dtype(*prop_id) { None => { - prop_mapper.set_id_and_dtype(prop_name.as_str(), *prop_id, prop_value.dtype()); + prop_mapper.set_id_and_dtype( + prop_name.as_str(), + *prop_id, + prop_value.dtype(), + ); } Some(old_dtype) => { let dtype = prop_value.dtype(); diff --git a/raphtory-storage/src/graph/graph.rs b/raphtory-storage/src/graph/graph.rs index 830619c714..754eccf6ab 100644 --- a/raphtory-storage/src/graph/graph.rs +++ b/raphtory-storage/src/graph/graph.rs @@ -8,7 +8,7 @@ use crate::{ locked::LockedGraph, nodes::{nodes::NodesStorage, nodes_ref::NodesStorageEntry}, }, - mutation::{MutationError}, + mutation::MutationError, }; use db4_graph::TemporalGraph; use raphtory_api::core::entities::{properties::meta::Meta, LayerIds, LayerVariants, EID, VID}; From f1cc3eec3e3741161eea91514edf332152e23028 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Thu, 29 Jan 2026 15:20:04 +0100 Subject: [PATCH 95/95] cleanup --- db4-graph/src/replay.rs | 2 +- db4-storage/src/pages/graph_prop_page/writer.rs | 3 +-- db4-storage/src/pages/locked/graph_props.rs | 7 +++---- .../src/core/entities/properties/prop/prop_array.rs | 2 +- raphtory-storage/src/mutation/durability_ops.rs | 1 - raphtory-storage/src/mutation/property_addition_ops.rs | 6 +++--- raphtory/Cargo.toml | 4 ++-- raphtory/src/db/api/mutation/addition_ops.rs | 5 +---- raphtory/src/db/api/storage/storage.rs | 7 +------ raphtory/src/db/api/view/graph.rs | 4 ++-- raphtory/src/db/graph/edge.rs | 1 - 11 files changed, 15 insertions(+), 27 deletions(-) diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs index 5b2c4941f1..f3a1e30d80 100644 --- a/db4-graph/src/replay.rs +++ b/db4-graph/src/replay.rs @@ -7,7 +7,7 @@ use raphtory_api::core::{ entities::{ properties::{ meta::STATIC_GRAPH_LAYER_ID, - prop::{check_for_unification, unify_types, Prop, PropType}, + prop::{unify_types, Prop}, }, EID, GID, VID, }, diff --git a/db4-storage/src/pages/graph_prop_page/writer.rs b/db4-storage/src/pages/graph_prop_page/writer.rs index 612a1be9cc..f3f0acce93 100644 --- a/db4-storage/src/pages/graph_prop_page/writer.rs +++ b/db4-storage/src/pages/graph_prop_page/writer.rs @@ -28,7 +28,6 @@ impl<'a, GS: GraphPropSegmentOps> GraphPropWriter<'a, GS> { &mut self, t: T, props: impl IntoIterator, - lsn: u64, ) { let add = self.mem_segment.add_properties(t, props); @@ -40,7 +39,7 @@ impl<'a, GS: GraphPropSegmentOps> GraphPropWriter<'a, GS> { self.mem_segment.check_metadata(props) } - pub fn update_metadata(&mut self, props: impl IntoIterator, lsn: u64) { + pub fn update_metadata(&mut self, props: impl IntoIterator) { let add = self.mem_segment.update_metadata(props); self.graph_props.increment_est_size(add); diff --git a/db4-storage/src/pages/locked/graph_props.rs b/db4-storage/src/pages/locked/graph_props.rs index 87d41dc222..b74a46a70a 100644 --- a/db4-storage/src/pages/locked/graph_props.rs +++ b/db4-storage/src/pages/locked/graph_props.rs @@ -24,7 +24,6 @@ impl<'a, GS: GraphPropSegmentOps> LockedGraphPropPage<'a, GS> { &mut self, t: T, props: impl IntoIterator, - lsn: u64, ) { let add = self.lock.add_properties(t, props); @@ -33,12 +32,12 @@ impl<'a, GS: GraphPropSegmentOps> LockedGraphPropPage<'a, GS> { } /// Add metadata (constant properties) to the graph - pub fn add_metadata(&mut self, props: impl IntoIterator, lsn: u64) { - self.update_metadata(props, lsn); + pub fn add_metadata(&mut self, props: impl IntoIterator) { + self.update_metadata(props); } /// Update metadata (constant properties) on the graph - pub fn update_metadata(&mut self, props: impl IntoIterator, lsn: u64) { + pub fn update_metadata(&mut self, props: impl IntoIterator) { let add = self.lock.update_metadata(props); self.page.increment_est_size(add); diff --git a/raphtory-api/src/core/entities/properties/prop/prop_array.rs b/raphtory-api/src/core/entities/properties/prop/prop_array.rs index d7365412dc..8906b0a59d 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_array.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_array.rs @@ -10,7 +10,7 @@ use arrow_array::{ }; use arrow_ipc::{reader::FileReader, writer::FileWriter}; use arrow_schema::{DataType, Field, Fields, Schema, TimeUnit}; -use serde::{de, ser, ser::SerializeSeq, Deserialize, Deserializer, Serialize, Serializer}; +use serde::{de, ser, Deserialize, Deserializer, Serialize, Serializer}; use std::{ hash::{Hash, Hasher}, io::Cursor, diff --git a/raphtory-storage/src/mutation/durability_ops.rs b/raphtory-storage/src/mutation/durability_ops.rs index 9b384993ba..0a2205b5a8 100644 --- a/raphtory-storage/src/mutation/durability_ops.rs +++ b/raphtory-storage/src/mutation/durability_ops.rs @@ -1,5 +1,4 @@ use crate::{graph::graph::GraphStorage, mutation::MutationError}; -use db4_graph::TemporalGraph; use storage::{transaction::TransactionManager, Wal}; /// Accessor methods for transactions and write-ahead logging. diff --git a/raphtory-storage/src/mutation/property_addition_ops.rs b/raphtory-storage/src/mutation/property_addition_ops.rs index 6b319d20ff..180468843d 100644 --- a/raphtory-storage/src/mutation/property_addition_ops.rs +++ b/raphtory-storage/src/mutation/property_addition_ops.rs @@ -64,21 +64,21 @@ impl InternalPropertyAdditionOps for db4_graph::TemporalGraph { props: &[(usize, Prop)], ) -> Result<(), Self::Error> { let mut writer = self.storage().graph_props().writer(); - writer.add_properties(t, props.iter().map(|(id, prop)| (*id, prop.clone())), 0); + writer.add_properties(t, props.iter().map(|(id, prop)| (*id, prop.clone()))); Ok(()) } fn internal_add_metadata(&self, props: &[(usize, Prop)]) -> Result<(), Self::Error> { let mut writer = self.storage().graph_props().writer(); writer.check_metadata(props)?; - writer.update_metadata(props.iter().map(|(id, prop)| (*id, prop.clone())), 0); + writer.update_metadata(props.iter().map(|(id, prop)| (*id, prop.clone()))); Ok(()) } // FIXME: this can't fail fn internal_update_metadata(&self, props: &[(usize, Prop)]) -> Result<(), Self::Error> { let mut writer = self.storage().graph_props().writer(); - writer.update_metadata(props.iter().map(|(id, prop)| (*id, prop.clone())), 0); + writer.update_metadata(props.iter().map(|(id, prop)| (*id, prop.clone()))); Ok(()) } diff --git a/raphtory/Cargo.toml b/raphtory/Cargo.toml index 1b48830c08..69ba087003 100644 --- a/raphtory/Cargo.toml +++ b/raphtory/Cargo.toml @@ -54,7 +54,7 @@ uuid = { workspace = true } parquet = { workspace = true } arrow-json = { workspace = true } arrow-csv = { workspace = true } -arrow = { workspace = true, features = ["chrono-tz"]} +arrow = { workspace = true, features = ["chrono-tz"] } # io optional dependencies csv = { workspace = true, optional = true } @@ -155,7 +155,7 @@ python = [ "dep:numpy", "dep:num", "dep:display-error-chain", - "pyo3-arrow", + "dep:pyo3-arrow", "raphtory-api/python", "raphtory-core/python", "kdam/notebook", diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index 14a6d9799f..22da9e9326 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -1,10 +1,7 @@ use crate::{ core::entities::{edges::edge_ref::EdgeRef, nodes::node_ref::AsNodeRef}, db::{ - api::{ - mutation::time_from_input_session, - view::{internal::InternalStorageOps, StaticGraphViewOps}, - }, + api::{mutation::time_from_input_session, view::StaticGraphViewOps}, graph::{edge::EdgeView, node::NodeView}, }, errors::{into_graph_err, GraphError}, diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index f673ae6693..1fb0ea1dea 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -26,7 +26,6 @@ use raphtory_storage::{ addition_ops::{EdgeWriteLock, InternalAdditionOps, SessionAdditionOps}, addition_ops_ext::{UnlockedSession, WriteS}, deletion_ops::InternalDeletionOps, - durability_ops::DurabilityOps, property_addition_ops::InternalPropertyAdditionOps, EdgeWriterT, NodeWriterT, }, @@ -38,16 +37,12 @@ use std::{ }; use storage::{ persist::config::ConfigOps, - transaction::TransactionManager, wal::{GraphWalOps, WalOps, LSN}, Wal, }; // Re-export for raphtory dependencies to use when creating graphs. -pub use storage::{ - persist::{config::PersistenceConfig, strategy::PersistenceStrategy}, - Config, Extension, -}; +pub use storage::{persist::strategy::PersistenceStrategy, Config, Extension}; #[cfg(feature = "search")] use { diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index 0c49dcaf4f..13959b082e 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -561,7 +561,7 @@ fn materialize_impl( for (t, prop_value) in temporal_prop.iter_indexed() { let lsn = 0; - graph_writer.add_properties(t, [(prop_id, prop_value)], lsn); + graph_writer.add_properties(t, [(prop_id, prop_value)]); } } @@ -581,7 +581,7 @@ fn materialize_impl( if !metadata_props.is_empty() { let lsn = 0; - graph_writer.update_metadata(metadata_props, lsn); + graph_writer.update_metadata(metadata_props); } } } diff --git a/raphtory/src/db/graph/edge.rs b/raphtory/src/db/graph/edge.rs index aac46f5633..62ff30ab0e 100644 --- a/raphtory/src/db/graph/edge.rs +++ b/raphtory/src/db/graph/edge.rs @@ -42,7 +42,6 @@ use raphtory_storage::{ mutation::{ addition_ops::{EdgeWriteLock, InternalAdditionOps}, deletion_ops::InternalDeletionOps, - durability_ops::DurabilityOps, property_addition_ops::InternalPropertyAdditionOps, }, };