diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index a986c3c3a3..7f28a5741f 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -1,10 +1,7 @@ use std::{ io, path::{Path, PathBuf}, - sync::{ - atomic::{self, AtomicU64, AtomicUsize}, - Arc, - }, + sync::{atomic::AtomicUsize, Arc}, }; use raphtory_api::core::{ @@ -25,17 +22,20 @@ use storage::{ nodes::WriteLockedNodePages, }, }, - persist::strategy::PersistentStrategy, + persist::strategy::PersistenceStrategy, resolver::GIDResolverOps, - wal::{GraphWal, TransactionID, Wal}, - Extension, GIDResolver, Layer, ReadLockedLayer, WalImpl, ES, GS, NS, + transaction::TransactionManager, + wal::WalOps, + Config, Extension, GIDResolver, Layer, ReadLockedLayer, Wal, ES, GS, NS, }; use tempfile::TempDir; +mod replay; + #[derive(Debug)] pub struct TemporalGraph where - EXT: PersistentStrategy, ES = ES, GS = GS>, + EXT: PersistenceStrategy, ES = ES, GS = GS>, NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, @@ -46,7 +46,6 @@ where storage: Arc>, graph_dir: Option, pub transaction_manager: Arc, - pub wal: Arc, } #[derive(Debug)] @@ -90,49 +89,17 @@ impl<'a> From<&'a Path> for GraphDir { } } -#[derive(Debug)] -pub struct TransactionManager { - last_transaction_id: AtomicU64, - wal: Arc, -} - -impl TransactionManager { - const STARTING_TRANSACTION_ID: TransactionID = 1; - - pub fn new(wal: Arc) -> Self { - Self { - last_transaction_id: AtomicU64::new(Self::STARTING_TRANSACTION_ID), - wal, - } - } - - pub fn load(self, last_transaction_id: TransactionID) { - self.last_transaction_id - .store(last_transaction_id, atomic::Ordering::SeqCst) - } - - pub fn begin_transaction(&self) -> TransactionID { - let transaction_id = self - .last_transaction_id - .fetch_add(1, atomic::Ordering::SeqCst); - self.wal.log_begin_transaction(transaction_id).unwrap(); - transaction_id - } - - pub fn end_transaction(&self, transaction_id: TransactionID) { - self.wal.log_end_transaction(transaction_id).unwrap(); - } -} - impl Default for TemporalGraph { fn default() -> Self { - Self::new(Extension::default()).unwrap() + let config = Config::default(); + let wal = Arc::new(Wal::new(None).unwrap()); + Self::new(Extension::new(config, wal)).unwrap() } } impl TemporalGraph where - EXT: PersistentStrategy, ES = ES, GS = GS>, + EXT: PersistenceStrategy, ES = ES, GS = GS>, NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, @@ -145,7 +112,7 @@ where Self::new_with_meta(None, node_meta, edge_meta, graph_props_meta, ext) } - pub fn new_with_path(path: impl AsRef, ext: EXT) -> Result { + pub fn new_at_path_with_ext(path: impl AsRef, ext: EXT) -> Result { let node_meta = Meta::new_for_nodes(); let edge_meta = Meta::new_for_edges(); let graph_props_meta = Meta::new_for_graph_props(); @@ -159,26 +126,6 @@ where ) } - pub fn load_from_path(path: impl AsRef) -> Result { - let path = path.as_ref(); - let storage = Layer::load(path)?; - let id_type = storage.nodes().id_type(); - - let gid_resolver_dir = path.join("gid_resolver"); - let resolver = GIDResolver::new_with_path(&gid_resolver_dir, id_type)?; - let wal_dir = path.join("wal"); - let wal = Arc::new(WalImpl::new(Some(wal_dir))?); - - Ok(Self { - graph_dir: Some(path.into()), - event_counter: AtomicUsize::new(resolver.len()), - logical_to_physical: resolver.into(), - storage: Arc::new(storage), - transaction_manager: Arc::new(TransactionManager::new(wal.clone())), - wal, - }) - } - pub fn new_with_meta( graph_dir: Option, node_meta: Meta, @@ -218,16 +165,29 @@ where ext, ); - let wal_dir = graph_dir.as_ref().map(|dir| dir.wal_dir()); - let wal = Arc::new(WalImpl::new(wal_dir)?); - Ok(Self { graph_dir, logical_to_physical, storage: Arc::new(storage), - transaction_manager: Arc::new(TransactionManager::new(wal.clone())), + transaction_manager: Arc::new(TransactionManager::new()), event_counter: AtomicUsize::new(0), - wal, + }) + } + + pub fn load_from_path(path: impl AsRef, ext: EXT) -> Result { + let path = path.as_ref(); + let storage = Layer::load(path, ext)?; + let id_type = storage.nodes().id_type(); + + let gid_resolver_dir = path.join("gid_resolver"); + let resolver = GIDResolver::new_with_path(&gid_resolver_dir, id_type)?; + + Ok(Self { + graph_dir: Some(path.into()), + event_counter: AtomicUsize::new(resolver.len()), + logical_to_physical: resolver.into(), + storage: Arc::new(storage), + transaction_manager: Arc::new(TransactionManager::new()), }) } @@ -266,10 +226,12 @@ where .get_str(string) .or_else(|| self.logical_to_physical.get_u64(string.id())), }?; + // VIDs in the resolver may not be initialised yet, need to double-check the node actually exists! let nodes = self.storage().nodes(); let (page_id, pos) = nodes.resolve_pos(vid); let node_page = nodes.segments().get(page_id)?; + if pos.0 < node_page.num_nodes() { Some(vid) } else { @@ -404,9 +366,10 @@ where } } +/// Holds write locks across all segments in the graph for fast bulk ingestion. pub struct WriteLockedGraph<'a, EXT> where - EXT: PersistentStrategy, ES = ES, GS = GS>, + EXT: PersistenceStrategy, ES = ES, GS = GS>, NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, @@ -419,7 +382,7 @@ where impl<'a, EXT> WriteLockedGraph<'a, EXT> where - EXT: PersistentStrategy, ES = ES, GS = GS>, + EXT: PersistenceStrategy, ES = ES, GS = GS>, NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, @@ -437,17 +400,15 @@ where self.graph } - pub fn resize_chunks_to_num_nodes(&mut self, max_vid: Option) { - if let Some(max_vid) = max_vid { - let (chunks_needed, _) = self.graph.storage.nodes().resolve_pos(max_vid); - self.graph.storage().nodes().grow(chunks_needed + 1); - std::mem::take(&mut self.nodes); - self.nodes = self.graph.storage.nodes().write_locked(); - } + pub fn resize_chunks_to_vid(&mut self, vid: VID) { + let (chunks_needed, _) = self.graph.storage.nodes().resolve_pos(vid); + self.graph.storage().nodes().grow(chunks_needed + 1); + std::mem::take(&mut self.nodes); + self.nodes = self.graph.storage.nodes().write_locked(); } - pub fn resize_chunks_to_num_edges(&mut self, max_eid: EID) { - let (chunks_needed, _) = self.graph.storage.edges().resolve_pos(max_eid); + pub fn resize_chunks_to_eid(&mut self, eid: EID) { + let (chunks_needed, _) = self.graph.storage.edges().resolve_pos(eid); self.graph.storage().edges().grow(chunks_needed + 1); std::mem::take(&mut self.edges); self.edges = self.graph.storage.edges().write_locked(); diff --git a/db4-graph/src/replay.rs b/db4-graph/src/replay.rs new file mode 100644 index 0000000000..f3a1e30d80 --- /dev/null +++ b/db4-graph/src/replay.rs @@ -0,0 +1,236 @@ +//! Implements WAL replay for a `WriteLockedGraph`. +//! Allows for fast replay by making use of one-time lock acquisition for +//! all the segments in the graph. + +use crate::WriteLockedGraph; +use raphtory_api::core::{ + entities::{ + properties::{ + meta::STATIC_GRAPH_LAYER_ID, + prop::{unify_types, Prop}, + }, + EID, GID, VID, + }, + storage::timeindex::EventTime, +}; +use storage::{ + api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, + error::StorageError, + pages::resolve_pos, + persist::{config::ConfigOps, strategy::PersistenceStrategy}, + resolver::GIDResolverOps, + wal::{GraphReplay, TransactionID, LSN}, + ES, GS, NS, +}; + +impl GraphReplay for WriteLockedGraph<'_, EXT> +where + EXT: PersistenceStrategy, ES = ES, GS = GS>, + NS: NodeSegmentOps, + ES: EdgeSegmentOps, + GS: GraphPropSegmentOps, +{ + fn replay_add_edge( + &mut self, + lsn: LSN, + transaction_id: TransactionID, + t: EventTime, + src_name: Option, + src_id: VID, + dst_name: Option, + dst_id: VID, + eid: EID, + layer_name: Option, + layer_id: usize, + props: Vec<(String, usize, Prop)>, + ) -> Result<(), StorageError> { + let node_max_page_len = self.graph().extension().config().max_node_page_len(); + let edge_max_page_len = self.graph().extension().config().max_edge_page_len(); + + // 1. Insert prop ids into edge meta. + // No need to validate props again since they are already validated before + // being logged to the WAL. + + // 2. Insert node ids into resolver. + if let Some(src_name) = src_name.as_ref() { + self.graph() + .logical_to_physical + .set(src_name.as_ref(), src_id)?; + } + + if let Some(dst_name) = dst_name.as_ref() { + self.graph() + .logical_to_physical + .set(dst_name.as_ref(), dst_id)?; + } + + // 4. Grab src writer and add edge data. + let (src_segment_id, src_pos) = resolve_pos(src_id, node_max_page_len); + let resize_vid = VID::from(src_id.index() + 1); + self.resize_chunks_to_vid(resize_vid); // Create enough segments. + + let segment = self + .graph() + .storage() + .nodes() + .get_or_create_segment(src_segment_id); + let immut_lsn = segment.immut_lsn(); + + // Replay this entry only if it doesn't exist in immut. + if immut_lsn < lsn { + let mut src_writer = self.nodes.get_mut(src_segment_id).unwrap().writer(); + + // Increment the node counter for this segment if this is a new node. + if !src_writer.has_node(src_pos, STATIC_GRAPH_LAYER_ID) { + src_writer.increment_seg_num_nodes(); + } + + if let Some(src_name) = src_name { + src_writer.store_node_id(src_pos, STATIC_GRAPH_LAYER_ID, src_name); + } + + let is_new_edge_static = src_writer + .get_out_edge(src_pos, dst_id, STATIC_GRAPH_LAYER_ID) + .is_none(); + let is_new_edge_layer = src_writer.get_out_edge(src_pos, dst_id, layer_id).is_none(); + + // Add the edge to the static graph if it doesn't already exist. + if is_new_edge_static { + src_writer.add_static_outbound_edge(src_pos, dst_id, eid); + } + + // Add the edge to the layer if it doesn't already exist, else just record the timestamp. + if is_new_edge_layer { + src_writer.add_outbound_edge(Some(t), src_pos, dst_id, eid.with_layer(layer_id)); + } else { + src_writer.update_timestamp(t, src_pos, eid.with_layer(layer_id)); + } + + src_writer.mut_segment.set_lsn(lsn); + + // Release the writer for mutable access to dst_writer. + drop(src_writer); + } + + // 5. Grab dst writer and add edge data. + let (dst_segment_id, dst_pos) = resolve_pos(dst_id, node_max_page_len); + let resize_vid = VID::from(dst_id.index() + 1); + self.resize_chunks_to_vid(resize_vid); + + let segment = self + .graph() + .storage() + .nodes() + .get_or_create_segment(dst_segment_id); + let immut_lsn = segment.immut_lsn(); + + // Replay this entry only if it doesn't exist in immut. + if immut_lsn < lsn { + let mut dst_writer = self.nodes.get_mut(dst_segment_id).unwrap().writer(); + + // Increment the node counter for this segment if this is a new node. + if !dst_writer.has_node(dst_pos, STATIC_GRAPH_LAYER_ID) { + dst_writer.increment_seg_num_nodes(); + } + + if let Some(dst_name) = dst_name { + dst_writer.store_node_id(dst_pos, STATIC_GRAPH_LAYER_ID, dst_name); + } + + let is_new_edge_static = dst_writer + .get_inb_edge(dst_pos, src_id, STATIC_GRAPH_LAYER_ID) + .is_none(); + let is_new_edge_layer = dst_writer.get_inb_edge(dst_pos, src_id, layer_id).is_none(); + + if is_new_edge_static { + dst_writer.add_static_inbound_edge(dst_pos, src_id, eid); + } + + if is_new_edge_layer { + dst_writer.add_inbound_edge(Some(t), dst_pos, src_id, eid.with_layer(layer_id)); + } else { + dst_writer.update_timestamp(t, dst_pos, eid.with_layer(layer_id)); + } + + dst_writer.mut_segment.set_lsn(lsn); + + drop(dst_writer); + } + + // 6. Grab edge writer and add temporal props & metadata. + let (edge_segment_id, edge_pos) = resolve_pos(eid, edge_max_page_len); + let resize_eid = EID::from(eid.index() + 1); + self.resize_chunks_to_eid(resize_eid); + + let segment = self + .graph() + .storage() + .edges() + .get_or_create_segment(edge_segment_id); + let immut_lsn = segment.immut_lsn(); + + // Replay this entry only if it doesn't exist in immut. + if immut_lsn < lsn { + let edge_meta = self.graph().edge_meta(); + + for (prop_name, prop_id, prop_value) in &props { + let prop_mapper = edge_meta.temporal_prop_mapper(); + match prop_mapper.get_dtype(*prop_id) { + None => { + prop_mapper.set_id_and_dtype( + prop_name.as_str(), + *prop_id, + prop_value.dtype(), + ); + } + Some(old_dtype) => { + let dtype = prop_value.dtype(); + let mut unified = false; + let new_dtype = unify_types(&old_dtype, &dtype, &mut unified)?; + if unified { + prop_mapper.set_dtype(*prop_id, new_dtype); + } + } + } + } + + // 3. Insert layer id into the layer meta of both edge and node. + let node_meta = self.graph().node_meta(); + + edge_meta + .layer_meta() + .set_id(layer_name.as_deref().unwrap_or("_default"), layer_id); + node_meta + .layer_meta() + .set_id(layer_name.as_deref().unwrap_or("_default"), layer_id); + + let mut edge_writer = self.edges.get_mut(edge_segment_id).unwrap().writer(); + + let is_new_edge_static = edge_writer + .get_edge(STATIC_GRAPH_LAYER_ID, edge_pos) + .is_none(); + + // Add edge into the static graph if it doesn't already exist. + if is_new_edge_static { + let already_counted = false; + edge_writer.add_static_edge(Some(edge_pos), src_id, dst_id, already_counted); + } + + // Add edge into the specified layer with timestamp and props. + edge_writer.add_edge( + t, + edge_pos, + src_id, + dst_id, + props + .into_iter() + .map(|(_, prop_id, prop_value)| (prop_id, prop_value)), + layer_id, + ); + + edge_writer.writer.set_lsn(lsn); + } + + Ok(()) + } +} diff --git a/db4-storage/src/api/edges.rs b/db4-storage/src/api/edges.rs index 3764b75feb..ecca18474e 100644 --- a/db4-storage/src/api/edges.rs +++ b/db4-storage/src/api/edges.rs @@ -11,7 +11,7 @@ use std::{ sync::{Arc, atomic::AtomicU32}, }; -use crate::{LocalPOS, error::StorageError, segments::edge::segment::MemEdgeSegment}; +use crate::{LocalPOS, error::StorageError, segments::edge::segment::MemEdgeSegment, wal::LSN}; pub trait EdgeSegmentOps: Send + Sync + std::fmt::Debug + 'static { type Extension; @@ -59,8 +59,7 @@ pub trait EdgeSegmentOps: Send + Sync + std::fmt::Debug + 'static { fn try_head_mut(&self) -> Option>; - /// mark segment as dirty without triggering a write - fn mark_dirty(&self); + fn set_dirty(&self, dirty: bool); /// notify that an edge was added (might need to write to disk) fn notify_write( @@ -102,6 +101,10 @@ pub trait EdgeSegmentOps: Send + Sync + std::fmt::Debug + 'static { &self, locked_head: impl DerefMut, ) -> Result<(), StorageError>; + + /// Returns the latest lsn for the immutable part of this segment. + fn immut_lsn(&self) -> LSN; + fn flush(&self) -> Result<(), StorageError>; } diff --git a/db4-storage/src/api/graph_props.rs b/db4-storage/src/api/graph_props.rs index b4cca958f6..5e611748d1 100644 --- a/db4-storage/src/api/graph_props.rs +++ b/db4-storage/src/api/graph_props.rs @@ -29,7 +29,7 @@ where fn est_size(&self) -> usize; - fn mark_dirty(&self); + fn set_dirty(&self, dirty: bool); fn notify_write( &self, diff --git a/db4-storage/src/api/nodes.rs b/db4-storage/src/api/nodes.rs index f7beb408ef..c76b3f931f 100644 --- a/db4-storage/src/api/nodes.rs +++ b/db4-storage/src/api/nodes.rs @@ -19,9 +19,13 @@ use raphtory_core::{ }; use std::{ borrow::Cow, + fmt::Debug, ops::{Deref, DerefMut, Range}, path::{Path, PathBuf}, - sync::{Arc, atomic::AtomicU32}, + sync::{ + Arc, + atomic::{AtomicU32, Ordering}, + }, }; use rayon::prelude::*; @@ -33,9 +37,10 @@ use crate::{ pages::node_store::increment_and_clamp, segments::node::segment::MemNodeSegment, utils::{Iter2, Iter3, Iter4}, + wal::LSN, }; -pub trait NodeSegmentOps: Send + Sync + std::fmt::Debug + 'static { +pub trait NodeSegmentOps: Send + Sync + Debug + 'static { type Extension; type Entry<'a>: NodeEntryOps<'a> @@ -83,9 +88,9 @@ pub trait NodeSegmentOps: Send + Sync + std::fmt::Debug + 'static { head_lock: impl DerefMut, ) -> Result<(), StorageError>; - fn mark_dirty(&self); + fn set_dirty(&self, dirty: bool); - fn check_node(&self, pos: LocalPOS, layer_id: usize) -> bool; + fn has_node(&self, pos: LocalPOS, layer_id: usize) -> bool; fn get_out_edge( &self, @@ -118,6 +123,9 @@ pub trait NodeSegmentOps: Send + Sync + std::fmt::Debug + 'static { locked_head: impl DerefMut, ) -> Result<(), StorageError>; + /// Returns the latest lsn for the immutable part of this segment. + fn immut_lsn(&self) -> LSN; + fn nodes_counter(&self) -> &AtomicU32; fn increment_num_nodes(&self, max_page_len: u32) { @@ -125,8 +133,7 @@ pub trait NodeSegmentOps: Send + Sync + std::fmt::Debug + 'static { } fn num_nodes(&self) -> u32 { - self.nodes_counter() - .load(std::sync::atomic::Ordering::Relaxed) + self.nodes_counter().load(Ordering::Relaxed) } fn num_layers(&self) -> usize; @@ -134,7 +141,7 @@ pub trait NodeSegmentOps: Send + Sync + std::fmt::Debug + 'static { fn layer_count(&self, layer_id: usize) -> u32; } -pub trait LockedNSSegment: std::fmt::Debug + Send + Sync { +pub trait LockedNSSegment: Debug + Send + Sync { type EntryRef<'a>: NodeRefOps<'a> where Self: 'a; diff --git a/db4-storage/src/lib.rs b/db4-storage/src/lib.rs index 8c3f87308f..fd3521869c 100644 --- a/db4-storage/src/lib.rs +++ b/db4-storage/src/lib.rs @@ -14,7 +14,7 @@ use crate::{ GraphStore, ReadLockedGraphStore, edge_store::ReadLockedEdgeStorage, node_store::ReadLockedNodeStorage, }, - persist::strategy::NoOpStrategy, + persist::strategy::{NoOpStrategy, PersistenceStrategy}, resolver::mapping_resolver::MappingResolver, segments::{ edge::{ @@ -27,7 +27,6 @@ use crate::{ segment::NodeSegmentView, }, }, - wal::no_wal::NoWal, }; use parking_lot::RwLock; use raphtory_api::core::entities::{EID, VID}; @@ -44,6 +43,7 @@ pub mod properties; pub mod resolver; pub mod segments; pub mod state; +pub mod transaction; pub mod utils; pub mod wal; @@ -53,7 +53,8 @@ pub type ES

= EdgeSegmentView

; pub type GS

= GraphPropSegmentView

; pub type Layer

= GraphStore, ES

, GS

, P>; -pub type WalImpl = NoWal; +pub type Wal = ::Wal; +pub type Config = ::Config; pub type GIDResolver = MappingResolver; pub type ReadLockedLayer

= ReadLockedGraphStore, ES

, GS

, P>; diff --git a/db4-storage/src/pages/edge_page/writer.rs b/db4-storage/src/pages/edge_page/writer.rs index 4df1262b98..253fb6c623 100644 --- a/db4-storage/src/pages/edge_page/writer.rs +++ b/db4-storage/src/pages/edge_page/writer.rs @@ -2,7 +2,10 @@ use crate::{ LocalPOS, api::edges::EdgeSegmentOps, error::StorageError, pages::layer_counter::GraphStats, segments::edge::segment::MemEdgeSegment, }; -use raphtory_api::core::entities::{VID, properties::prop::Prop}; +use raphtory_api::core::entities::{ + VID, + properties::{meta::STATIC_GRAPH_LAYER_ID, prop::Prop}, +}; use raphtory_core::storage::timeindex::{AsTime, EventTime}; use std::ops::DerefMut; @@ -41,17 +44,20 @@ impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmen dst: VID, props: impl IntoIterator, layer_id: usize, - lsn: u64, ) -> LocalPOS { - let existing_edge = self + let is_new_edge = !self .page .contains_edge(edge_pos, layer_id, self.writer.deref()); - if !existing_edge { + + if is_new_edge { self.increment_layer_num_edges(layer_id); } + self.graph_stats.update_time(t.t()); + self.writer - .insert_edge_internal(t, edge_pos, src, dst, layer_id, props, lsn); + .insert_edge_internal(t, edge_pos, src, dst, layer_id, props); + edge_pos } @@ -62,7 +68,6 @@ impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmen src: VID, dst: VID, layer_id: usize, - lsn: u64, ) { let existing_edge = self .page @@ -72,27 +77,30 @@ impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmen } self.graph_stats.update_time(t.t()); self.writer - .delete_edge_internal(t, edge_pos, src, dst, layer_id, lsn); + .delete_edge_internal(t, edge_pos, src, dst, layer_id); } + /// Adds a static edge to the graph. + /// + /// If `edge_pos` is `None`, a new position is allocated. If `Some`, the provided position + /// is used. + /// Set `already_counted` to `true` when bulk loading to avoid double-counting statistics. pub fn add_static_edge( &mut self, edge_pos: Option, src: impl Into, dst: impl Into, - lsn: u64, - exist: bool, // used when edge_pos is Some but the is not counted, this is used in the bulk loader + already_counted: bool, ) -> LocalPOS { - let layer_id = 0; // assuming layer_id 0 for static edges, adjust as needed - - if edge_pos.is_some() && !exist { + if edge_pos.is_some() && !already_counted { self.page.increment_num_edges(); - self.increment_layer_num_edges(layer_id); + self.increment_layer_num_edges(STATIC_GRAPH_LAYER_ID); } - let edge_pos = edge_pos.unwrap_or_else(|| self.new_local_pos(layer_id)); + let edge_pos = edge_pos.unwrap_or_else(|| self.new_local_pos(STATIC_GRAPH_LAYER_ID)); self.writer - .insert_static_edge_internal(edge_pos, src, dst, layer_id, lsn); + .insert_static_edge_internal(edge_pos, src, dst, STATIC_GRAPH_LAYER_ID); + edge_pos } @@ -102,26 +110,26 @@ impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmen edge_pos: LocalPOS, src: VID, dst: VID, - exists: bool, + edge_exists: bool, layer_id: usize, c_props: impl IntoIterator, t_props: impl IntoIterator, - lsn: u64, ) { - if !exists { - self.increment_layer_num_edges(0); + if !edge_exists { + self.increment_layer_num_edges(STATIC_GRAPH_LAYER_ID); self.increment_layer_num_edges(layer_id); + + self.writer + .insert_static_edge_internal(edge_pos, src, dst, STATIC_GRAPH_LAYER_ID); } - self.writer - .insert_static_edge_internal(edge_pos, src, dst, 0, lsn); + self.graph_stats.update_time(t.t()); self.writer .update_const_properties(edge_pos, src, dst, layer_id, c_props); - self.graph_stats.update_time(t.t()); self.writer - .insert_edge_internal(t, edge_pos, src, dst, layer_id, t_props, lsn); + .insert_edge_internal(t, edge_pos, src, dst, layer_id, t_props); } pub fn bulk_delete_edge( @@ -132,19 +140,18 @@ impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmen dst: VID, exists: bool, layer_id: usize, - lsn: u64, ) { if !exists { - self.increment_layer_num_edges(0); + self.increment_layer_num_edges(STATIC_GRAPH_LAYER_ID); self.increment_layer_num_edges(layer_id); } self.writer - .insert_static_edge_internal(edge_pos, src, dst, 0, lsn); + .insert_static_edge_internal(edge_pos, src, dst, STATIC_GRAPH_LAYER_ID); self.graph_stats.update_time(t.t()); self.writer - .delete_edge_internal(t, edge_pos, src, dst, layer_id, lsn); + .delete_edge_internal(t, edge_pos, src, dst, layer_id); } pub fn segment_id(&self) -> usize { @@ -155,10 +162,6 @@ impl<'a, MP: DerefMut + std::fmt::Debug, ES: EdgeSegmen self.graph_stats.increment(layer_id); } - pub fn contains_edge(&self, pos: LocalPOS, layer_id: usize) -> bool { - self.page.contains_edge(pos, layer_id, self.writer.deref()) - } - pub fn get_edge(&self, layer_id: usize, edge_pos: LocalPOS) -> Option<(VID, VID)> { self.page.get_edge(edge_pos, layer_id, self.writer.deref()) } diff --git a/db4-storage/src/pages/edge_store.rs b/db4-storage/src/pages/edge_store.rs index 672f528731..f773a8103c 100644 --- a/db4-storage/src/pages/edge_store.rs +++ b/db4-storage/src/pages/edge_store.rs @@ -9,11 +9,14 @@ use crate::{ locked::edges::{LockedEdgePage, WriteLockedEdgePages}, row_group_par_iter, }, - persist::strategy::Config, + persist::{config::ConfigOps, strategy::PersistenceStrategy}, segments::edge::segment::MemEdgeSegment, }; use parking_lot::{RwLock, RwLockWriteGuard}; -use raphtory_api::core::entities::{EID, VID, properties::meta::Meta}; +use raphtory_api::core::entities::{ + EID, VID, + properties::meta::{Meta, STATIC_GRAPH_LAYER_ID}, +}; use raphtory_core::{ entities::{ELID, LayerIds}, storage::timeindex::{AsTime, EventTime}, @@ -44,7 +47,7 @@ pub struct ReadLockedEdgeStorage, EXT> { locked_pages: Box<[ES::ArcLockedSegment]>, } -impl, EXT: Config> ReadLockedEdgeStorage { +impl, EXT: PersistenceStrategy> ReadLockedEdgeStorage { pub fn storage(&self) -> &EdgeStorageInner { &self.storage } @@ -123,7 +126,7 @@ impl, EXT: Config> ReadLockedEdgeStorage, EXT: Config> EdgeStorageInner { +impl, EXT: PersistenceStrategy> EdgeStorageInner { pub fn locked(self: &Arc) -> ReadLockedEdgeStorage { let locked_pages = self .segments @@ -144,6 +147,10 @@ impl, EXT: Config> EdgeStorageInner &self.layer_counter } + pub fn segments(&self) -> &boxcar::Vec> { + &self.segments + } + pub fn new_with_meta(edges_path: Option, edge_meta: Arc, ext: EXT) -> Self { let free_pages = (0..N).map(RwLock::new).collect::>(); let empty = Self { @@ -157,21 +164,25 @@ impl, EXT: Config> EdgeStorageInner let layer_mapper = empty.edge_meta().layer_meta(); let prop_mapper = empty.edge_meta().temporal_prop_mapper(); let metadata_mapper = empty.edge_meta().metadata_mapper(); + if layer_mapper.num_fields() > 0 || prop_mapper.num_fields() > 0 || metadata_mapper.num_fields() > 0 { - let segment = empty.get_or_create_segment(0); + let segment = empty.get_or_create_segment(STATIC_GRAPH_LAYER_ID); let mut head = segment.head_mut(); + for layer in layer_mapper.ids() { head.get_or_create_layer(layer); } + if prop_mapper.num_fields() > 0 { head.get_or_create_layer(0) .properties_mut() .set_has_properties() } - segment.mark_dirty(); + + segment.set_dirty(true); } empty } @@ -212,7 +223,7 @@ impl, EXT: Config> EdgeStorageInner pub fn load(edges_path: impl AsRef, ext: EXT) -> Result { let edges_path = edges_path.as_ref(); - let max_page_len = ext.max_edge_page_len(); + let max_page_len = ext.config().max_edge_page_len(); let meta = Arc::new(Meta::new_for_edges()); @@ -363,9 +374,11 @@ impl, EXT: Config> EdgeStorageInner if let Some(segment) = self.segments.get(segment_id) { return segment; } + let count = self.segments.count(); + if count > segment_id { - // something has allocated the segment, wait for it to be added + // Something has allocated the segment, wait for it to be added. loop { if let Some(segment) = self.segments.get(segment_id) { return segment; @@ -375,7 +388,7 @@ impl, EXT: Config> EdgeStorageInner } } } else { - // we need to create the segment + // We need to create the segment. self.segments.reserve(segment_id + 1 - count); loop { @@ -393,7 +406,7 @@ impl, EXT: Config> EdgeStorageInner if let Some(segment) = self.segments.get(segment_id) { return segment; } else { - // wait for the segment to be created + // Wait for the segment to be created. std::thread::yield_now(); } } @@ -404,7 +417,7 @@ impl, EXT: Config> EdgeStorageInner #[inline(always)] pub fn max_page_len(&self) -> u32 { - self.ext.max_edge_page_len() + self.ext.config().max_edge_page_len() } pub fn write_locked<'a>(&'a self) -> WriteLockedEdgePages<'a, ES> { diff --git a/db4-storage/src/pages/graph_prop_page/writer.rs b/db4-storage/src/pages/graph_prop_page/writer.rs index 50485d47c8..f3f0acce93 100644 --- a/db4-storage/src/pages/graph_prop_page/writer.rs +++ b/db4-storage/src/pages/graph_prop_page/writer.rs @@ -28,25 +28,22 @@ impl<'a, GS: GraphPropSegmentOps> GraphPropWriter<'a, GS> { &mut self, t: T, props: impl IntoIterator, - lsn: u64, ) { let add = self.mem_segment.add_properties(t, props); - self.mem_segment.layers_mut()[MemGraphPropSegment::DEFAULT_LAYER].set_lsn(lsn); self.graph_props.increment_est_size(add); - self.graph_props.mark_dirty(); + self.graph_props.set_dirty(true); } pub fn check_metadata(&self, props: &[(usize, Prop)]) -> Result<(), StorageError> { self.mem_segment.check_metadata(props) } - pub fn update_metadata(&mut self, props: impl IntoIterator, lsn: u64) { + pub fn update_metadata(&mut self, props: impl IntoIterator) { let add = self.mem_segment.update_metadata(props); - self.mem_segment.layers_mut()[MemGraphPropSegment::DEFAULT_LAYER].set_lsn(lsn); self.graph_props.increment_est_size(add); - self.graph_props.mark_dirty(); + self.graph_props.set_dirty(true); } } diff --git a/db4-storage/src/pages/graph_prop_store.rs b/db4-storage/src/pages/graph_prop_store.rs index c814fdf5ea..10cc93072f 100644 --- a/db4-storage/src/pages/graph_prop_store.rs +++ b/db4-storage/src/pages/graph_prop_store.rs @@ -7,7 +7,7 @@ use crate::{ graph_prop_page::writer::GraphPropWriter, locked::graph_props::{LockedGraphPropPage, WriteLockedGraphPropPages}, }, - persist::strategy::Config, + persist::strategy::PersistenceStrategy, }; use std::{ @@ -31,7 +31,9 @@ pub struct GraphPropStorageInner { ext: EXT, } -impl, EXT: Config> GraphPropStorageInner { +impl, EXT: PersistenceStrategy> + GraphPropStorageInner +{ pub fn new_with_meta(path: Option<&Path>, meta: Arc, ext: EXT) -> Self { let page = Arc::new(GS::new(meta.clone(), path, ext.clone())); @@ -66,6 +68,10 @@ impl, EXT: Config> GraphPropStorageInne self.page.entry() } + pub fn segment(&self) -> &Arc { + &self.page + } + pub fn writer(&self) -> GraphPropWriter<'_, GS> { let head = self.page.head_mut(); let graph_props = &self.page; diff --git a/db4-storage/src/pages/locked/edges.rs b/db4-storage/src/pages/locked/edges.rs index 1bfe0005d3..6f6280957d 100644 --- a/db4-storage/src/pages/locked/edges.rs +++ b/db4-storage/src/pages/locked/edges.rs @@ -79,6 +79,11 @@ impl<'a, ES: EdgeSegmentOps> WriteLockedEdgePages<'a, ES> { Self { writers } } + #[inline] + pub fn get_mut(&mut self, segment_id: usize) -> Option<&mut LockedEdgePage<'a, ES>> { + self.writers.get_mut(segment_id) + } + pub fn par_iter_mut(&mut self) -> rayon::slice::IterMut<'_, LockedEdgePage<'a, ES>> { self.writers.par_iter_mut() } diff --git a/db4-storage/src/pages/locked/graph_props.rs b/db4-storage/src/pages/locked/graph_props.rs index 5ef775dfdb..b74a46a70a 100644 --- a/db4-storage/src/pages/locked/graph_props.rs +++ b/db4-storage/src/pages/locked/graph_props.rs @@ -24,27 +24,24 @@ impl<'a, GS: GraphPropSegmentOps> LockedGraphPropPage<'a, GS> { &mut self, t: T, props: impl IntoIterator, - lsn: u64, ) { let add = self.lock.add_properties(t, props); - self.lock.layers_mut()[MemGraphPropSegment::DEFAULT_LAYER].set_lsn(lsn); self.page.increment_est_size(add); - self.page.mark_dirty(); + self.page.set_dirty(true); } /// Add metadata (constant properties) to the graph - pub fn add_metadata(&mut self, props: impl IntoIterator, lsn: u64) { - self.update_metadata(props, lsn); + pub fn add_metadata(&mut self, props: impl IntoIterator) { + self.update_metadata(props); } /// Update metadata (constant properties) on the graph - pub fn update_metadata(&mut self, props: impl IntoIterator, lsn: u64) { + pub fn update_metadata(&mut self, props: impl IntoIterator) { let add = self.lock.update_metadata(props); - self.lock.layers_mut()[MemGraphPropSegment::DEFAULT_LAYER].set_lsn(lsn); self.page.increment_est_size(add); - self.page.mark_dirty(); + self.page.set_dirty(true); } } diff --git a/db4-storage/src/pages/locked/nodes.rs b/db4-storage/src/pages/locked/nodes.rs index 04cca83328..cad23ca145 100644 --- a/db4-storage/src/pages/locked/nodes.rs +++ b/db4-storage/src/pages/locked/nodes.rs @@ -12,7 +12,7 @@ use std::ops::DerefMut; #[derive(Debug)] pub struct LockedNodePage<'a, NS> { - page_id: usize, + segment_id: usize, max_page_len: u32, layer_counter: &'a GraphStats, page: &'a NS, @@ -21,14 +21,14 @@ pub struct LockedNodePage<'a, NS> { impl<'a, NS: NodeSegmentOps> LockedNodePage<'a, NS> { pub fn new( - page_id: usize, + segment_id: usize, layer_counter: &'a GraphStats, max_page_len: u32, page: &'a NS, lock: RwLockWriteGuard<'a, MemNodeSegment>, ) -> Self { Self { - page_id, + segment_id, layer_counter, max_page_len, page, @@ -50,14 +50,15 @@ impl<'a, NS: NodeSegmentOps> LockedNodePage<'a, NS> { } #[inline(always)] - pub fn page_id(&self) -> usize { - self.page_id + pub fn segment_id(&self) -> usize { + self.segment_id } #[inline(always)] pub fn resolve_pos(&self, node_id: VID) -> Option { let (page, pos) = resolve_pos(node_id, self.max_page_len); - if page == self.page_id { + + if page == self.segment_id { Some(pos) } else { None @@ -87,6 +88,15 @@ impl<'a, EXT, NS: NodeSegmentOps> WriteLockedNodePages<'a, NS> Self { writers } } + pub fn len(&self) -> usize { + self.writers.len() + } + + #[inline] + pub fn get_mut(&mut self, segment_id: usize) -> Option<&mut LockedNodePage<'a, NS>> { + self.writers.get_mut(segment_id) + } + pub fn par_iter_mut(&mut self) -> rayon::slice::IterMut<'_, LockedNodePage<'a, NS>> { self.writers.par_iter_mut() } @@ -105,10 +115,6 @@ impl<'a, EXT, NS: NodeSegmentOps> WriteLockedNodePages<'a, NS> } } - pub fn len(&self) -> usize { - self.writers.len() - } - pub fn vacuum(&mut self) -> Result<(), StorageError> { for LockedNodePage { page, lock, .. } in &mut self.writers { page.vacuum(lock.deref_mut())?; diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index efb9ae0b80..92b99b0a65 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -3,18 +3,21 @@ use crate::{ api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, error::StorageError, pages::{edge_store::ReadLockedEdgeStorage, node_store::ReadLockedNodeStorage}, - persist::strategy::{Config, PersistentStrategy}, + persist::{config::ConfigOps, strategy::PersistenceStrategy}, properties::props_meta_writer::PropsMetaWriter, segments::{edge::segment::MemEdgeSegment, node::segment::MemNodeSegment}, }; use edge_page::writer::EdgeWriter; use edge_store::EdgeStorageInner; use graph_prop_store::GraphPropStorageInner; -use node_page::writer::{NodeWriter, WriterPair}; +use node_page::writer::{NodeWriter, NodeWriters}; use node_store::NodeStorageInner; use parking_lot::RwLockWriteGuard; use raphtory_api::core::{ - entities::properties::{meta::Meta, prop::Prop}, + entities::properties::{ + meta::{Meta, STATIC_GRAPH_LAYER_ID}, + prop::Prop, + }, storage::dict_mapper::MaybeNew, utils::time::{InputTime, TryIntoInputTime}, }; @@ -54,28 +57,28 @@ pub struct GraphStore< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: PersistentStrategy, + EXT: PersistenceStrategy, > { nodes: Arc>, edges: Arc>, graph_props: Arc>, graph_dir: Option, event_id: AtomicUsize, - _ext: EXT, + ext: EXT, } impl< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: PersistentStrategy, + EXT: PersistenceStrategy, > GraphStore { pub fn flush(&self) -> Result<(), StorageError> { let node_types = self.nodes.prop_meta().get_all_node_types(); - let config = self._ext.with_node_types(node_types); + let config = self.ext.config().with_node_types(node_types); if let Some(graph_dir) = self.graph_dir.as_ref() { - write_graph_config(graph_dir, &config)?; + config.save_to_dir(graph_dir)?; } self.nodes.flush()?; self.edges.flush()?; @@ -89,7 +92,7 @@ pub struct ReadLockedGraphStore< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: PersistentStrategy, + EXT: PersistenceStrategy, > { pub nodes: Arc>, pub edges: Arc>, @@ -100,9 +103,95 @@ impl< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: PersistentStrategy, + EXT: PersistenceStrategy, > GraphStore { + pub fn new(graph_dir: Option<&Path>, ext: EXT) -> Self { + let node_meta = Meta::new_for_nodes(); + let edge_meta = Meta::new_for_edges(); + let graph_props_meta = Meta::new_for_graph_props(); + + Self::new_with_meta(graph_dir, node_meta, edge_meta, graph_props_meta, ext) + } + + pub fn new_with_meta( + graph_dir: Option<&Path>, + node_meta: Meta, + edge_meta: Meta, + graph_props_meta: Meta, + ext: EXT, + ) -> Self { + let nodes_path = graph_dir.map(|graph_dir| graph_dir.join("nodes")); + let edges_path = graph_dir.map(|graph_dir| graph_dir.join("edges")); + let graph_props_path = graph_dir.map(|graph_dir| graph_dir.join("graph_props")); + + let node_meta = Arc::new(node_meta); + let edge_meta = Arc::new(edge_meta); + let graph_props_meta = Arc::new(graph_props_meta); + + let node_storage = Arc::new(NodeStorageInner::new_with_meta( + nodes_path, + node_meta, + edge_meta.clone(), + ext.clone(), + )); + let edge_storage = Arc::new(EdgeStorageInner::new_with_meta( + edges_path, + edge_meta, + ext.clone(), + )); + let graph_prop_storage = Arc::new(GraphPropStorageInner::new_with_meta( + graph_props_path.as_deref(), + graph_props_meta, + ext.clone(), + )); + + if let Some(graph_dir) = graph_dir { + ext.config() + .save_to_dir(graph_dir) + .expect("Failed to write config to disk"); + } + + Self { + nodes: node_storage, + edges: edge_storage, + graph_props: graph_prop_storage, + event_id: AtomicUsize::new(0), + graph_dir: graph_dir.map(|p| p.to_path_buf()), + ext, + } + } + + pub fn load(graph_dir: impl AsRef, ext: EXT) -> Result { + let nodes_path = graph_dir.as_ref().join("nodes"); + let edges_path = graph_dir.as_ref().join("edges"); + let graph_props_path = graph_dir.as_ref().join("graph_props"); + + let edge_storage = Arc::new(EdgeStorageInner::load(edges_path, ext.clone())?); + let edge_meta = edge_storage.edge_meta().clone(); + let node_storage = Arc::new(NodeStorageInner::load(nodes_path, edge_meta, ext.clone())?); + let node_meta = node_storage.prop_meta(); + + // Load graph temporal properties and metadata. + let graph_prop_storage = + Arc::new(GraphPropStorageInner::load(graph_props_path, ext.clone())?); + + for node_type in ext.config().node_types().iter() { + node_meta.get_or_create_node_type_id(node_type); + } + + let t_len = edge_storage.t_len(); + + Ok(Self { + nodes: node_storage, + edges: edge_storage, + graph_props: graph_prop_storage, + event_id: AtomicUsize::new(t_len), + graph_dir: Some(graph_dir.as_ref().to_path_buf()), + ext, + }) + } + pub fn read_locked(self: &Arc) -> ReadLockedGraphStore { let nodes = self.nodes.locked().into(); let edges = self.edges.locked().into(); @@ -115,7 +204,7 @@ impl< } pub fn extension(&self) -> &EXT { - &self._ext + &self.ext } pub fn nodes(&self) -> &Arc> { @@ -161,93 +250,6 @@ impl< self.edges.segment_counts() } - pub fn load(graph_dir: impl AsRef) -> Result { - let nodes_path = graph_dir.as_ref().join("nodes"); - let edges_path = graph_dir.as_ref().join("edges"); - let graph_props_path = graph_dir.as_ref().join("graph_props"); - - let ext = read_graph_config::(graph_dir.as_ref())?; - - let edge_storage = Arc::new(EdgeStorageInner::load(edges_path, ext.clone())?); - let edge_meta = edge_storage.edge_meta().clone(); - let node_storage = Arc::new(NodeStorageInner::load(nodes_path, edge_meta, ext.clone())?); - let node_meta = node_storage.prop_meta(); - - // Load graph temporal properties and metadata - let graph_props_storage = - Arc::new(GraphPropStorageInner::load(graph_props_path, ext.clone())?); - - for node_type in ext.node_types().iter() { - node_meta.get_or_create_node_type_id(node_type); - } - - let t_len = edge_storage.t_len(); - - Ok(Self { - nodes: node_storage, - edges: edge_storage, - graph_props: graph_props_storage, - event_id: AtomicUsize::new(t_len), - graph_dir: Some(graph_dir.as_ref().to_path_buf()), - _ext: ext, - }) - } - - pub fn new_with_meta( - graph_dir: Option<&Path>, - node_meta: Meta, - edge_meta: Meta, - graph_props_meta: Meta, - ext: EXT, - ) -> Self { - let nodes_path = graph_dir.map(|graph_dir| graph_dir.join("nodes")); - let edges_path = graph_dir.map(|graph_dir| graph_dir.join("edges")); - let graph_props_path = graph_dir.map(|graph_dir| graph_dir.join("graph_props")); - - let node_meta = Arc::new(node_meta); - let edge_meta = Arc::new(edge_meta); - let graph_props_meta = Arc::new(graph_props_meta); - - let node_storage = Arc::new(NodeStorageInner::new_with_meta( - nodes_path, - node_meta, - edge_meta.clone(), - ext.clone(), - )); - let edge_storage = Arc::new(EdgeStorageInner::new_with_meta( - edges_path, - edge_meta, - ext.clone(), - )); - let graph_storage = Arc::new(GraphPropStorageInner::new_with_meta( - graph_props_path.as_deref(), - graph_props_meta, - ext.clone(), - )); - - if let Some(graph_dir) = graph_dir { - write_graph_config(graph_dir, &ext) - .expect("Unrecoverable! Failed to write graph config"); - } - - Self { - nodes: node_storage, - edges: edge_storage, - graph_props: graph_storage, - event_id: AtomicUsize::new(0), - graph_dir: graph_dir.map(|p| p.to_path_buf()), - _ext: ext, - } - } - - pub fn new(graph_dir: Option<&Path>, ext: EXT) -> Self { - let node_meta = Meta::new_for_nodes(); - let edge_meta = Meta::new_for_edges(); - let graph_props_meta = Meta::new_for_graph_props(); - - Self::new_with_meta(graph_dir, node_meta, edge_meta, graph_props_meta, ext) - } - pub fn add_edge( &self, t: T, @@ -282,10 +284,11 @@ impl< let src = src.into(); let dst = dst.into(); let mut session = self.write_session(src, dst, None); + session.set_lsn(lsn); let elid = session - .add_static_edge(src, dst, lsn) + .add_static_edge(src, dst) .map(|eid| eid.with_layer(0)); - session.add_edge_into_layer(t, src, dst, elid, lsn, props); + session.add_edge_into_layer(t, src, dst, elid, props); Ok(elid) } @@ -350,7 +353,7 @@ impl< let (segment, node_pos) = self.nodes.resolve_pos(node); let mut node_writer = self.nodes.writer(segment); let prop_writer = PropsMetaWriter::constant(self.node_meta(), props.into_iter())?; - node_writer.update_c_props(node_pos, layer_id, prop_writer.into_props_const()?, 0); // TODO: LSN + node_writer.update_c_props(node_pos, layer_id, prop_writer.into_props_const()?); Ok(()) } @@ -368,7 +371,7 @@ impl< let mut node_writer = self.nodes.writer(segment); let prop_writer = PropsMetaWriter::temporal(self.node_meta(), props.into_iter())?; - node_writer.add_props(t, node_pos, layer_id, prop_writer.into_props_temporal()?, 0); // TODO: LSN + node_writer.add_props(t, node_pos, layer_id, prop_writer.into_props_temporal()?); Ok(()) } @@ -381,26 +384,38 @@ impl< let (src_chunk, _) = self.nodes.resolve_pos(src); let (dst_chunk, _) = self.nodes.resolve_pos(dst); + // Acquire locks in consistent order (lower chunk ID first) to prevent deadlocks. let node_writers = if src_chunk < dst_chunk { - let src_writer = self.node_writer(src_chunk); - let dst_writer = self.node_writer(dst_chunk); - WriterPair::Different { - src_writer, - dst_writer, + let src = self.node_writer(src_chunk); + let dst = self.node_writer(dst_chunk); + + NodeWriters { + src, + dst: Some(dst), } } else if src_chunk > dst_chunk { - let dst_writer = self.node_writer(dst_chunk); - let src_writer = self.node_writer(src_chunk); - WriterPair::Different { - src_writer, - dst_writer, + let dst = self.node_writer(dst_chunk); + let src = self.node_writer(src_chunk); + + NodeWriters { + src, + dst: Some(dst), } } else { - let writer = self.node_writer(src_chunk); - WriterPair::Same { writer } + let src = self.node_writer(src_chunk); + + NodeWriters { src, dst: None } }; - let edge_writer = e_id.map(|e_id| self.edge_writer(e_id)); + let (_, src_pos) = self.nodes.resolve_pos(src); + let existing_eid = node_writers + .src + .get_out_edge(src_pos, dst, STATIC_GRAPH_LAYER_ID); + + let edge_writer = match e_id.or(existing_eid) { + Some(e_id) => self.edge_writer(e_id), + None => self.get_free_writer(), + }; WriteSession::new(node_writers, edge_writer, self) } @@ -418,22 +433,34 @@ impl< self.nodes().get_or_create_segment(src_chunk); self.nodes().get_or_create_segment(dst_chunk); + // FIXME: This can livelock due to inconsistent lock acquisition order. loop { if let Some(src_writer) = self.nodes().try_writer(src_chunk) { if let Some(dst_writer) = self.nodes().try_writer(dst_chunk) { - break WriterPair::Different { - src_writer, - dst_writer, + break NodeWriters { + src: src_writer, + dst: Some(dst_writer), }; } } } } else { let writer = self.node_writer(src_chunk); - WriterPair::Same { writer } + NodeWriters { + src: writer, + dst: None, + } }; - let edge_writer = e_id.map(|e_id| self.edge_writer(e_id)); + let (_, src_pos) = self.nodes.resolve_pos(src); + let existing_eid = node_writers + .src + .get_out_edge(src_pos, dst, STATIC_GRAPH_LAYER_ID); + + let edge_writer = match e_id.or(existing_eid) { + Some(e_id) => self.edge_writer(e_id), + None => self.get_free_writer(), + }; WriteSession::new(node_writers, edge_writer, self) } @@ -459,8 +486,10 @@ impl< pub fn vacuum(self: &Arc) -> Result<(), StorageError> { let mut locked_nodes = self.nodes.write_locked(); let mut locked_edges = self.edges.write_locked(); + locked_nodes.vacuum()?; locked_edges.vacuum()?; + Ok(()) } } @@ -516,7 +545,7 @@ impl< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: PersistentStrategy, + EXT: PersistenceStrategy, > Drop for GraphStore { fn drop(&mut self) { @@ -529,26 +558,6 @@ impl< } } -pub fn write_graph_config( - graph_dir: impl AsRef, - config: &EXT, -) -> Result<(), StorageError> { - let config_file = graph_dir.as_ref().join("graph_config.json"); - let config_file = std::fs::File::create(&config_file)?; - - serde_json::to_writer_pretty(config_file, config)?; - Ok(()) -} - -fn read_graph_config( - graph_dir: impl AsRef, -) -> Result { - let config_file = graph_dir.as_ref().join("graph_config.json"); - let config_file = std::fs::File::open(config_file)?; - let config = serde_json::from_reader(config_file)?; - Ok(config) -} - #[inline(always)] pub fn resolve_pos>(i: I, max_page_len: u32) -> (usize, LocalPOS) { let i = i.into(); @@ -594,12 +603,16 @@ mod test { check_graph_with_props_support, edges_strat, edges_strat_with_layers, make_edges, make_nodes, }, + persist::{config::BaseConfig, strategy::PersistenceStrategy}, + wal::no_wal::NoWal, }; use chrono::DateTime; use proptest::prelude::*; use raphtory_api::core::entities::properties::prop::Prop; use raphtory_core::{entities::VID, storage::timeindex::TimeIndexOps}; use rayon::iter::ParallelIterator; + use std::sync::Arc; + use tempfile; #[test] fn test_iterleave() { @@ -630,7 +643,8 @@ mod test { .collect(); check_edges_support(edges, par_load, false, |graph_dir| { - Layer::new(Some(graph_dir), Extension::new(chunk_size, chunk_size)) + let config = BaseConfig::new(chunk_size, chunk_size); + Layer::new(Some(graph_dir), Extension::new(config, Arc::new(NoWal))) }) } @@ -640,7 +654,8 @@ mod test { par_load: bool, ) { check_edges_support(edges, par_load, false, |graph_dir| { - Layer::new(Some(graph_dir), Extension::new(chunk_size, chunk_size)) + let config = BaseConfig::new(chunk_size, chunk_size); + Layer::new(Some(graph_dir), Extension::new(config, Arc::new(NoWal))) }) } @@ -712,7 +727,11 @@ mod test { #[test] fn test_add_one_edge_get_num_nodes() { let graph_dir = tempfile::tempdir().unwrap(); - let g = Layer::new(Some(graph_dir.path()), Extension::new(32, 32)); + let config = BaseConfig::new(32, 32); + let g = Layer::new( + Some(graph_dir.path()), + Extension::new(config, Arc::new(NoWal)), + ); g.add_edge(4, 7, 3).unwrap(); assert_eq!(g.nodes().num_nodes(), 2); } @@ -720,7 +739,11 @@ mod test { #[test] fn test_node_additions_1() { let graph_dir = tempfile::tempdir().unwrap(); - let g = GraphStore::new(Some(graph_dir.path()), Extension::new(32, 32)); + let config = BaseConfig::new(32, 32); + let g = GraphStore::new( + Some(graph_dir.path()), + Extension::new(config, Arc::new(NoWal)), + ); g.add_edge(4, 7, 3).unwrap(); let check = |g: &Layer| { @@ -762,7 +785,11 @@ mod test { #[test] fn node_temporal_props() { let graph_dir = tempfile::tempdir().unwrap(); - let g = Layer::new(Some(graph_dir.path()), Extension::new(32, 32)); + let config = BaseConfig::new(32, 32); + let g = Layer::new( + Some(graph_dir.path()), + Extension::new(config, Arc::new(NoWal)), + ); g.add_node_props::(1, 0, 0, vec![]) .expect("Failed to add node props"); g.add_node_props::(2, 0, 0, vec![]) @@ -1565,13 +1592,15 @@ mod test { fn check_graph_with_nodes(node_page_len: u32, edge_page_len: u32, fixture: &NodeFixture) { check_graph_with_nodes_support(fixture, false, |path| { - Layer::new(Some(path), Extension::new(node_page_len, edge_page_len)) + let config = BaseConfig::new(node_page_len, edge_page_len); + Layer::new(Some(path), Extension::new(config, Arc::new(NoWal))) }); } fn check_graph_with_props(node_page_len: u32, edge_page_len: u32, fixture: &Fixture) { check_graph_with_props_support(fixture, false, |path| { - Layer::new(Some(path), Extension::new(node_page_len, edge_page_len)) + let config = BaseConfig::new(node_page_len, edge_page_len); + Layer::new(Some(path), Extension::new(config, Arc::new(NoWal))) }); } } diff --git a/db4-storage/src/pages/node_page/writer.rs b/db4-storage/src/pages/node_page/writer.rs index fec7b2ced8..018fd6d56e 100644 --- a/db4-storage/src/pages/node_page/writer.rs +++ b/db4-storage/src/pages/node_page/writer.rs @@ -5,7 +5,7 @@ use crate::{ use raphtory_api::core::entities::{ EID, GID, VID, properties::{ - meta::{NODE_ID_IDX, NODE_TYPE_IDX}, + meta::{NODE_ID_IDX, NODE_TYPE_IDX, STATIC_GRAPH_LAYER_ID}, prop::Prop, }, }; @@ -37,9 +37,8 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri src_pos: impl Into, dst: impl Into, e_id: impl Into, - lsn: u64, ) { - self.add_outbound_edge_inner(t, src_pos, dst, e_id, lsn); + self.add_outbound_edge_inner(t, src_pos, dst, e_id); } pub fn add_static_outbound_edge( @@ -47,10 +46,14 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri src_pos: LocalPOS, dst: impl Into, e_id: impl Into, - lsn: u64, ) { let e_id = e_id.into(); - self.add_outbound_edge_inner::(None, src_pos, dst, e_id.with_layer(0), lsn); + self.add_outbound_edge_inner::( + None, + src_pos, + dst, + e_id.with_layer(STATIC_GRAPH_LAYER_ID), + ); } fn add_outbound_edge_inner( @@ -59,7 +62,6 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri src_pos: impl Into, dst: impl Into, e_id: impl Into, - lsn: u64, ) { let src_pos = src_pos.into(); let dst = dst.into(); @@ -69,12 +71,10 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri let e_id = e_id.into(); let layer_id = e_id.layer(); - let (is_new_node, add) = self - .mut_segment - .add_outbound_edge(t, src_pos, dst, e_id, lsn); + let (is_new_node, add) = self.mut_segment.add_outbound_edge(t, src_pos, dst, e_id); self.page.increment_est_size(add); - if is_new_node && !self.page.check_node(src_pos, layer_id) { + if is_new_node && !self.page.has_node(src_pos, layer_id) { self.l_counter.increment(layer_id); } } @@ -85,9 +85,8 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri dst_pos: impl Into, src: impl Into, e_id: impl Into, - lsn: u64, ) { - self.add_inbound_edge_inner(t, dst_pos, src, e_id, lsn); + self.add_inbound_edge_inner(t, dst_pos, src, e_id); } pub fn add_static_inbound_edge( @@ -95,10 +94,14 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri dst_pos: LocalPOS, src: impl Into, e_id: impl Into, - lsn: u64, ) { let e_id = e_id.into(); - self.add_inbound_edge_inner::(None, dst_pos, src, e_id.with_layer(0), lsn); + self.add_inbound_edge_inner::( + None, + dst_pos, + src, + e_id.with_layer(STATIC_GRAPH_LAYER_ID), + ); } fn add_inbound_edge_inner( @@ -107,7 +110,6 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri dst_pos: impl Into, src: impl Into, e_id: impl Into, - lsn: u64, ) { let e_id = e_id.into(); let src = src.into(); @@ -116,13 +118,11 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri } let layer = e_id.layer(); let dst_pos = dst_pos.into(); - let (is_new_node, add) = self - .mut_segment - .add_inbound_edge(t, dst_pos, src, e_id, lsn); + let (is_new_node, add) = self.mut_segment.add_inbound_edge(t, dst_pos, src, e_id); self.page.increment_est_size(add); - if is_new_node && !self.page.check_node(dst_pos, layer) { + if is_new_node && !self.page.has_node(dst_pos, layer) { self.l_counter.increment(layer); } } @@ -133,13 +133,11 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri pos: LocalPOS, layer_id: usize, props: impl IntoIterator, - lsn: u64, ) { self.l_counter.update_time(t.t()); let (is_new_node, add) = self.mut_segment.add_props(t, pos, layer_id, props); - self.mut_segment.as_mut()[layer_id].set_lsn(lsn); self.page.increment_est_size(add); - if is_new_node && !self.page.check_node(pos, layer_id) { + if is_new_node && !self.page.has_node(pos, layer_id) { self.l_counter.increment(layer_id); } } @@ -158,12 +156,10 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri pos: LocalPOS, layer_id: usize, props: impl IntoIterator, - lsn: u64, ) { let (is_new_node, add) = self.mut_segment.update_metadata(pos, layer_id, props); - self.mut_segment.as_mut()[layer_id].set_lsn(lsn); self.page.increment_est_size(add); - if is_new_node && !self.page.check_node(pos, layer_id) { + if is_new_node && !self.page.has_node(pos, layer_id) { self.l_counter.increment(layer_id); } } @@ -172,9 +168,9 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri self.mut_segment.get_metadata(pos, layer_id, prop_id) } - pub fn update_timestamp(&mut self, t: T, pos: LocalPOS, e_id: ELID, lsn: u64) { + pub fn update_timestamp(&mut self, t: T, pos: LocalPOS, e_id: ELID) { self.l_counter.update_time(t.t()); - let add = self.mut_segment.update_timestamp(t, pos, e_id, lsn); + let add = self.mut_segment.update_timestamp(t, pos, e_id); self.page.increment_est_size(add); } @@ -195,28 +191,32 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> NodeWri layer_id: usize, gid: GidRef<'_>, node_type: usize, - lsn: u64, ) { let node_type = (node_type != 0).then_some(node_type); - self.update_c_props(pos, layer_id, node_info_as_props(Some(gid), node_type), lsn); + self.update_c_props(pos, layer_id, node_info_as_props(Some(gid), node_type)); } - pub fn store_node_id(&mut self, pos: LocalPOS, layer_id: usize, gid: GID, lsn: u64) { + pub fn store_node_id(&mut self, pos: LocalPOS, layer_id: usize, gid: GID) { let gid = match gid { GID::U64(id) => Prop::U64(id), GID::Str(s) => Prop::str(s), }; - self.update_c_props(pos, layer_id, [(NODE_ID_IDX, gid)], lsn); + let props = [(NODE_ID_IDX, gid)]; + self.update_c_props(pos, layer_id, props); } - pub fn update_deletion_time(&mut self, t: T, node: LocalPOS, e_id: ELID, lsn: u64) { - self.update_timestamp(t, node, e_id, lsn); + pub fn update_deletion_time(&mut self, t: T, node: LocalPOS, e_id: ELID) { + self.update_timestamp(t, node, e_id); } pub fn increment_seg_num_nodes(&mut self) { self.page .increment_num_nodes(self.mut_segment.max_page_len()); } + + pub fn has_node(&self, node: LocalPOS, layer_id: usize) -> bool { + self.mut_segment.has_node(node, layer_id) || self.page.has_node(node, layer_id) + } } pub fn node_info_as_props( @@ -240,34 +240,19 @@ impl<'a, MP: DerefMut + 'a, NS: NodeSegmentOps> Drop } } -pub enum WriterPair<'a, MP: DerefMut, NS: NodeSegmentOps> { - Same { - writer: NodeWriter<'a, MP, NS>, - }, - Different { - src_writer: NodeWriter<'a, MP, NS>, - dst_writer: NodeWriter<'a, MP, NS>, - }, +/// Holds writers for src and dst node segments when adding an edge. +/// If both nodes are in the same segment, `dst` is `None` and `src` is used for both. +pub struct NodeWriters<'a, MP: DerefMut, NS: NodeSegmentOps> { + pub src: NodeWriter<'a, MP, NS>, + pub dst: Option>, } -impl<'a, MP: DerefMut, NS: NodeSegmentOps> WriterPair<'a, MP, NS> { +impl<'a, MP: DerefMut, NS: NodeSegmentOps> NodeWriters<'a, MP, NS> { pub fn get_mut_src(&mut self) -> &mut NodeWriter<'a, MP, NS> { - match self { - WriterPair::Same { writer, .. } => writer, - WriterPair::Different { - src_writer: writer_i, - .. - } => writer_i, - } + &mut self.src } pub fn get_mut_dst(&mut self) -> &mut NodeWriter<'a, MP, NS> { - match self { - WriterPair::Same { writer, .. } => writer, - WriterPair::Different { - dst_writer: writer_j, - .. - } => writer_j, - } + self.dst.as_mut().unwrap_or(&mut self.src) } } diff --git a/db4-storage/src/pages/node_store.rs b/db4-storage/src/pages/node_store.rs index 99e0e7c28f..6053b18688 100644 --- a/db4-storage/src/pages/node_store.rs +++ b/db4-storage/src/pages/node_store.rs @@ -9,7 +9,7 @@ use crate::{ locked::nodes::{LockedNodePage, WriteLockedNodePages}, row_group_par_iter, }, - persist::strategy::Config, + persist::{config::ConfigOps, strategy::PersistenceStrategy}, segments::node::segment::MemNodeSegment, }; use parking_lot::{RwLock, RwLockWriteGuard}; @@ -46,7 +46,7 @@ pub struct ReadLockedNodeStorage, EXT> { locked_segments: Box<[NS::ArcLockedSegment]>, } -impl, EXT: Config> ReadLockedNodeStorage { +impl, EXT: PersistenceStrategy> ReadLockedNodeStorage { pub fn node_ref( &self, node: impl Into, @@ -118,7 +118,7 @@ impl, EXT: Config> ReadLockedNodeStorage NodeStorageInner { +impl NodeStorageInner { pub fn prop_meta(&self) -> &Arc { &self.node_meta } @@ -161,11 +161,11 @@ impl NodeStorageInner { } pub fn max_segment_len(&self) -> u32 { - self.ext.max_node_page_len() + self.ext.config().max_node_page_len() } } -impl, EXT: Config> NodeStorageInner { +impl, EXT: PersistenceStrategy> NodeStorageInner { pub fn new_with_meta( nodes_path: Option, node_meta: Arc, @@ -196,7 +196,7 @@ impl, EXT: Config> NodeStorageInner .properties_mut() .set_has_properties() } - segment.mark_dirty(); + segment.set_dirty(true); } empty } @@ -241,6 +241,7 @@ impl, EXT: Config> NodeStorageInner let lock_slot = self.free_segments[slot_idx].read_recursive(); let page_id = *lock_slot; let page = self.segments.get(page_id); + page.and_then(|page| { self.reserve_segment_row(page) .map(|pos| (page.segment_id(), LocalPOS(pos))) @@ -335,7 +336,7 @@ impl, EXT: Config> NodeStorageInner ext: EXT, ) -> Result { let nodes_path = nodes_path.as_ref(); - let max_page_len = ext.max_node_page_len(); + let max_page_len = ext.config().max_node_page_len(); let node_meta = Arc::new(Meta::new_for_nodes()); if !nodes_path.exists() { @@ -483,19 +484,21 @@ impl, EXT: Config> NodeStorageInner if let Some(segment) = self.segments.get(segment_id) { return segment; } + let count = self.segments.count(); + if count > segment_id { - // something has allocated the segment, wait for it to be added + // Something has allocated the segment, wait for it to be added. loop { if let Some(segment) = self.segments.get(segment_id) { return segment; } else { - // wait for the segment to be created + // Wait for the segment to be created. std::thread::yield_now(); } } } else { - // we need to create the segment + // we need to create the segment. self.segments.reserve(segment_id + 1 - count); loop { @@ -514,7 +517,7 @@ impl, EXT: Config> NodeStorageInner if let Some(segment) = self.segments.get(segment_id) { return segment; } else { - // wait for the segment to be created + // Wait for the segment to be created. std::thread::yield_now(); } } diff --git a/db4-storage/src/pages/session.rs b/db4-storage/src/pages/session.rs index 1a13aa130b..87a5c8eb4d 100644 --- a/db4-storage/src/pages/session.rs +++ b/db4-storage/src/pages/session.rs @@ -1,14 +1,18 @@ use super::{ - GraphStore, edge_page::writer::EdgeWriter, node_page::writer::WriterPair, resolve_pos, + GraphStore, edge_page::writer::EdgeWriter, node_page::writer::NodeWriters, resolve_pos, }; use crate::{ LocalPOS, api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, - persist::strategy::PersistentStrategy, + persist::strategy::PersistenceStrategy, segments::{edge::segment::MemEdgeSegment, node::segment::MemNodeSegment}, + wal::LSN, }; use parking_lot::RwLockWriteGuard; -use raphtory_api::core::{entities::properties::prop::Prop, storage::dict_mapper::MaybeNew}; +use raphtory_api::core::{ + entities::properties::{meta::STATIC_GRAPH_LAYER_ID, prop::Prop}, + storage::dict_mapper::MaybeNew, +}; use raphtory_core::{ entities::{EID, ELID, VID}, storage::timeindex::AsTime, @@ -19,10 +23,10 @@ pub struct WriteSession< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: PersistentStrategy, + EXT: PersistenceStrategy, > { - node_writers: WriterPair<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS>, - edge_writer: Option, ES>>, + node_writers: NodeWriters<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS>, + edge_writer: EdgeWriter<'a, RwLockWriteGuard<'a, MemEdgeSegment>, ES>, graph: &'a GraphStore, } @@ -31,12 +35,12 @@ impl< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: PersistentStrategy, + EXT: PersistenceStrategy, > WriteSession<'a, NS, ES, GS, EXT> { pub fn new( - node_writers: WriterPair<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS>, - edge_writer: Option, ES>>, + node_writers: NodeWriters<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS>, + edge_writer: EdgeWriter<'a, RwLockWriteGuard<'a, MemEdgeSegment>, ES>, graph: &'a GraphStore, ) -> Self { Self { @@ -56,7 +60,6 @@ impl< src: impl Into, dst: impl Into, edge: MaybeNew, - lsn: u64, props: impl IntoIterator, ) { let src = src.into(); @@ -69,19 +72,15 @@ impl< let (_, src_pos) = self.graph.nodes().resolve_pos(src); let (_, dst_pos) = self.graph.nodes().resolve_pos(dst); - if let Some(writer) = self.edge_writer.as_mut() { - let edge_max_page_len = writer.writer.get_or_create_layer(layer).max_page_len(); - let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); - - writer.add_edge(t, edge_pos, src, dst, props, layer, lsn); - } else { - let mut writer = self.graph.edge_writer(e_id.edge); - let edge_max_page_len = writer.writer.get_or_create_layer(layer).max_page_len(); - let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); + let edge_max_page_len = self + .edge_writer + .writer + .get_or_create_layer(layer) + .max_page_len(); + let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); - writer.add_edge(t, edge_pos, src, dst, props, layer, lsn); - self.edge_writer = Some(writer); // Attach edge_writer to hold onto locks - } + self.edge_writer + .add_edge(t, edge_pos, src, dst, props, layer); let edge_id = edge.inner(); @@ -94,18 +93,18 @@ impl< { self.node_writers .get_mut_src() - .add_outbound_edge(Some(t), src_pos, dst, edge_id, lsn); + .add_outbound_edge(Some(t), src_pos, dst, edge_id); self.node_writers .get_mut_dst() - .add_inbound_edge(Some(t), dst_pos, src, edge_id, lsn); + .add_inbound_edge(Some(t), dst_pos, src, edge_id); } self.node_writers .get_mut_src() - .update_timestamp(t, src_pos, e_id, lsn); + .update_timestamp(t, src_pos, e_id); self.node_writers .get_mut_dst() - .update_timestamp(t, dst_pos, e_id, lsn); + .update_timestamp(t, dst_pos, e_id); } pub fn delete_edge_from_layer( @@ -114,7 +113,6 @@ impl< src: impl Into, dst: impl Into, edge: MaybeNew, - lsn: u64, ) { let src = src.into(); let dst = dst.into(); @@ -126,19 +124,14 @@ impl< let (_, src_pos) = self.graph.nodes().resolve_pos(src); let (_, dst_pos) = self.graph.nodes().resolve_pos(dst); - if let Some(writer) = self.edge_writer.as_mut() { - let edge_max_page_len = writer.writer.get_or_create_layer(layer).max_page_len(); - let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); - - writer.delete_edge(t, edge_pos, src, dst, layer, lsn); - } else { - let mut writer = self.graph.edge_writer(e_id.edge); - let edge_max_page_len = writer.writer.get_or_create_layer(layer).max_page_len(); - let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); + let edge_max_page_len = self + .edge_writer + .writer + .get_or_create_layer(layer) + .max_page_len(); + let (_, edge_pos) = resolve_pos(e_id.edge, edge_max_page_len); - writer.delete_edge(t, edge_pos, src, dst, layer, lsn); - self.edge_writer = Some(writer); // Attach edge_writer to hold onto locks - } + self.edge_writer.delete_edge(t, edge_pos, src, dst, layer); let edge_id = edge.inner(); @@ -150,81 +143,73 @@ impl< .get_out_edge(src_pos, dst, edge_id.layer()) .is_none() { - self.node_writers.get_mut_src().add_outbound_edge( - Some(t), - src_pos, - dst, - edge_id, - lsn, - ); - self.node_writers.get_mut_dst().add_inbound_edge( - Some(t), - dst_pos, - src, - edge_id, - lsn, - ); + self.node_writers + .get_mut_src() + .add_outbound_edge(Some(t), src_pos, dst, edge_id); + self.node_writers + .get_mut_dst() + .add_inbound_edge(Some(t), dst_pos, src, edge_id); } self.node_writers .get_mut_src() - .update_deletion_time(t, src_pos, e_id, lsn); + .update_deletion_time(t, src_pos, e_id); self.node_writers .get_mut_dst() - .update_deletion_time(t, dst_pos, e_id, lsn); + .update_deletion_time(t, dst_pos, e_id); } } - pub fn add_static_edge( - &mut self, - src: impl Into, - dst: impl Into, - lsn: u64, - ) -> MaybeNew { + pub fn add_static_edge(&mut self, src: impl Into, dst: impl Into) -> MaybeNew { let src = src.into(); let dst = dst.into(); - let layer_id = 0; // static graph goes to layer 0 let (_, src_pos) = self.graph.nodes().resolve_pos(src); let (_, dst_pos) = self.graph.nodes().resolve_pos(dst); - if let Some(e_id) = self - .node_writers - .get_mut_src() - .get_out_edge(src_pos, dst, layer_id) - { - // If edge_writer is not set, we need to create a new one - if self.edge_writer.is_none() { - self.edge_writer = Some(self.graph.edge_writer(e_id)); - } - let edge_writer = self.edge_writer.as_mut().unwrap(); - let (_, edge_pos) = self.graph.edges().resolve_pos(e_id); - - edge_writer.add_static_edge(Some(edge_pos), src, dst, lsn, true); - - MaybeNew::Existing(e_id) - } else { - let mut edge_writer = self.graph.get_free_writer(); - let edge_id = edge_writer.add_static_edge(None, src, dst, lsn, false); - let edge_id = - edge_id.as_eid(edge_writer.segment_id(), self.graph.edges().max_page_len()); - - self.edge_writer = Some(edge_writer); // Attach edge_writer to hold onto locks - + let existing_eid = self.node_writers .get_mut_src() - .add_static_outbound_edge(src_pos, dst, edge_id, lsn); - self.node_writers - .get_mut_dst() - .add_static_inbound_edge(dst_pos, src, edge_id, lsn); + .get_out_edge(src_pos, dst, STATIC_GRAPH_LAYER_ID); - MaybeNew::New(edge_id) + // Edge already exists, so no need to add it again. + if let Some(eid) = existing_eid { + return MaybeNew::Existing(eid); } + + let edge_pos = None; + let already_counted = false; + let edge_pos = self + .edge_writer + .add_static_edge(edge_pos, src, dst, already_counted); + let edge_id = edge_pos.as_eid( + self.edge_writer.segment_id(), + self.graph.edges().max_page_len(), + ); + + self.node_writers + .get_mut_src() + .add_static_outbound_edge(src_pos, dst, edge_id); + self.node_writers + .get_mut_dst() + .add_static_inbound_edge(dst_pos, src, edge_id); + + MaybeNew::New(edge_id) } pub fn node_writers( &mut self, - ) -> &mut WriterPair<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS> { + ) -> &mut NodeWriters<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS> { &mut self.node_writers } + + pub fn set_lsn(&mut self, lsn: LSN) { + self.node_writers.src.mut_segment.set_lsn(lsn); + + if let Some(dst) = &mut self.node_writers.dst { + dst.mut_segment.set_lsn(lsn); + } + + self.edge_writer.writer.set_lsn(lsn); + } } diff --git a/db4-storage/src/pages/test_utils/checkers.rs b/db4-storage/src/pages/test_utils/checkers.rs index 4527e492f1..67e7dfc57e 100644 --- a/db4-storage/src/pages/test_utils/checkers.rs +++ b/db4-storage/src/pages/test_utils/checkers.rs @@ -19,7 +19,7 @@ use crate::{ }, error::StorageError, pages::GraphStore, - persist::strategy::PersistentStrategy, + persist::strategy::PersistenceStrategy, }; use super::fixtures::{AddEdge, Fixture, NodeFixture}; @@ -28,7 +28,7 @@ pub fn make_graph_from_edges< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: PersistentStrategy, + EXT: PersistenceStrategy, >( edges: &[(VID, VID, Option)], // src, dst, optional layer_id graph_dir: &Path, @@ -36,6 +36,7 @@ pub fn make_graph_from_edges< make_graph: impl FnOnce(&Path) -> GraphStore, ) -> GraphStore { let graph = make_graph(graph_dir); + for (_, _, layer) in edges { if let Some(layer) = layer { for layer in 0..=*layer { @@ -49,6 +50,7 @@ pub fn make_graph_from_edges< } } } + if par_load { edges .par_iter() @@ -58,9 +60,10 @@ pub fn make_graph_from_edges< let layer_id = layer_id.unwrap_or(0); let mut session = graph.write_session(*src, *dst, None); - let eid = session.add_static_edge(*src, *dst, lsn); + session.set_lsn(lsn); + let eid = session.add_static_edge(*src, *dst); let elid = eid.map(|eid| eid.with_layer(layer_id)); - session.add_edge_into_layer(timestamp, *src, *dst, elid, lsn, []); + session.add_edge_into_layer(timestamp, *src, *dst, elid, []); Ok::<_, StorageError>(()) }) @@ -75,14 +78,16 @@ pub fn make_graph_from_edges< let layer_id = layer_id.unwrap_or(0); let mut session = graph.write_session(*src, *dst, None); - let eid = session.add_static_edge(*src, *dst, lsn); + session.set_lsn(lsn); + let eid = session.add_static_edge(*src, *dst); let elid = eid.map(|e| e.with_layer(layer_id)); - session.add_edge_into_layer(timestamp, *src, *dst, elid, lsn, []); + session.add_edge_into_layer(timestamp, *src, *dst, elid, []); Ok::<_, StorageError>(()) }) .expect("Failed to add edge"); } + graph } @@ -90,7 +95,7 @@ pub fn check_edges_support< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: PersistentStrategy, + EXT: PersistenceStrategy, >( edges: Vec<(impl Into, impl Into, Option)>, // src, dst, optional layer_id par_load: bool, @@ -121,7 +126,7 @@ pub fn check_edges_support< NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, - EXT: PersistentStrategy, + EXT: PersistenceStrategy, >( stage: &str, expected_edges: &[(VID, VID, Option)], // (src, dst, layer_id) @@ -205,9 +210,10 @@ pub fn check_edges_support< check("pre-drop", &edges, &graph); if check_load { + let ext = graph.extension().clone(); drop(graph); - let maybe_ns = GraphStore::::load(graph_dir.path()); + let maybe_ns = GraphStore::::load(graph_dir.path(), ext); match maybe_ns { Ok(graph) => { @@ -221,7 +227,7 @@ pub fn check_edges_support< } pub fn check_graph_with_nodes_support< - EXT: PersistentStrategy, + EXT: PersistenceStrategy, NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, @@ -349,14 +355,15 @@ pub fn check_graph_with_nodes_support< check_fn(temp_props, const_props, &graph); if check_load { + let ext = graph.extension().clone(); drop(graph); - let graph = GraphStore::::load(graph_dir.path()).unwrap(); + let graph = GraphStore::::load(graph_dir.path(), ext).unwrap(); check_fn(temp_props, const_props, &graph); } } pub fn check_graph_with_props_support< - EXT: PersistentStrategy, + EXT: PersistenceStrategy, NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, @@ -503,9 +510,10 @@ pub fn check_graph_with_props_support< if check_load { // Load the graph from disk and check again + let ext = graph.extension().clone(); drop(graph); - let graph = GraphStore::::load(graph_dir.path()).unwrap(); + let graph = GraphStore::::load(graph_dir.path(), ext).unwrap(); black_box(check_fn(edges, &graph)); } } diff --git a/db4-storage/src/persist/config.rs b/db4-storage/src/persist/config.rs new file mode 100644 index 0000000000..3958de59d7 --- /dev/null +++ b/db4-storage/src/persist/config.rs @@ -0,0 +1,81 @@ +use crate::error::StorageError; +use serde::{Deserialize, Serialize, de::DeserializeOwned}; +use std::path::Path; + +pub const DEFAULT_MAX_PAGE_LEN_NODES: u32 = 131_072; // 2^17 +pub const DEFAULT_MAX_PAGE_LEN_EDGES: u32 = 1_048_576; // 2^20 +pub const CONFIG_FILE: &str = "config.json"; + +pub trait ConfigOps: Serialize + DeserializeOwned { + fn max_node_page_len(&self) -> u32; + + fn max_edge_page_len(&self) -> u32; + + fn node_types(&self) -> &[String]; + + fn with_node_types(&self, node_types: impl IntoIterator>) -> Self; + + fn load_from_dir(dir: impl AsRef) -> Result { + let config_file = dir.as_ref().join(CONFIG_FILE); + let config_file = std::fs::File::open(config_file)?; + let config = serde_json::from_reader(config_file)?; + Ok(config) + } + + fn save_to_dir(&self, dir: impl AsRef) -> Result<(), StorageError> { + let config_file = dir.as_ref().join(CONFIG_FILE); + let config_file = std::fs::File::create(&config_file)?; + serde_json::to_writer_pretty(config_file, self)?; + Ok(()) + } +} + +#[derive(Debug, Copy, Clone, Serialize, Deserialize)] +pub struct BaseConfig { + max_node_page_len: u32, + max_edge_page_len: u32, +} + +impl BaseConfig { + pub fn new(max_node_page_len: u32, max_edge_page_len: u32) -> Self { + Self { + max_node_page_len, + max_edge_page_len, + } + } +} + +impl Default for BaseConfig { + fn default() -> Self { + Self { + max_node_page_len: DEFAULT_MAX_PAGE_LEN_NODES, + max_edge_page_len: DEFAULT_MAX_PAGE_LEN_EDGES, + } + } +} + +impl ConfigOps for BaseConfig { + fn max_node_page_len(&self) -> u32 { + self.max_node_page_len + } + + fn max_edge_page_len(&self) -> u32 { + self.max_edge_page_len + } + + fn node_types(&self) -> &[String] { + &[] + } + + fn with_node_types(&self, _node_types: impl IntoIterator>) -> Self { + *self + } + + fn load_from_dir(_dir: impl AsRef) -> Result { + Ok(Self::default()) + } + + fn save_to_dir(&self, _dir: impl AsRef) -> Result<(), StorageError> { + Ok(()) + } +} diff --git a/db4-storage/src/persist/mod.rs b/db4-storage/src/persist/mod.rs index 54eb972285..43275c62a7 100644 --- a/db4-storage/src/persist/mod.rs +++ b/db4-storage/src/persist/mod.rs @@ -1 +1,2 @@ +pub mod config; pub mod strategy; diff --git a/db4-storage/src/persist/strategy.rs b/db4-storage/src/persist/strategy.rs index 0bca7c9b72..617b85a576 100644 --- a/db4-storage/src/persist/strategy.rs +++ b/db4-storage/src/persist/strategy.rs @@ -1,53 +1,45 @@ -use std::ops::DerefMut; - use crate::{ api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, + persist::config::{BaseConfig, ConfigOps}, segments::{ edge::segment::{EdgeSegmentView, MemEdgeSegment}, graph_prop::{GraphPropSegmentView, segment::MemGraphPropSegment}, node::segment::{MemNodeSegment, NodeSegmentView}, }, + wal::{WalOps, no_wal::NoWal}, }; -use serde::{Deserialize, Serialize}; - -pub const DEFAULT_MAX_PAGE_LEN_NODES: u32 = 131_072; // 2^17 -pub const DEFAULT_MAX_PAGE_LEN_EDGES: u32 = 1_048_576; // 2^20 -pub const DEFAULT_MAX_MEMORY_BYTES: usize = 32 * 1024 * 1024; - -pub trait Config: - Default + std::fmt::Debug + Clone + Send + Sync + 'static + for<'a> Deserialize<'a> + Serialize -{ - fn max_node_page_len(&self) -> u32; - fn max_edge_page_len(&self) -> u32; - - fn max_memory_bytes(&self) -> usize; - fn is_parallel(&self) -> bool; - fn node_types(&self) -> &[String]; - fn with_node_types(&self, types: impl IntoIterator>) -> Self; -} +use std::{fmt::Debug, ops::DerefMut, sync::Arc}; -pub trait PersistentStrategy: Config { +pub trait PersistenceStrategy: Debug + Clone + Send + Sync + 'static { type NS: NodeSegmentOps; type ES: EdgeSegmentOps; type GS: GraphPropSegmentOps; + type Wal: WalOps; + type Config: ConfigOps; + + fn new(config: Self::Config, wal: Arc) -> Self; + + fn config(&self) -> &Self::Config; + + fn wal(&self) -> &Self::Wal; fn persist_node_segment>( &self, - node_page: &Self::NS, + node_segment: &Self::NS, writer: MP, ) where Self: Sized; - fn persist_edge_page>( + fn persist_edge_segment>( &self, - edge_page: &Self::ES, + edge_segment: &Self::ES, writer: MP, ) where Self: Sized; - fn persist_graph_props>( + fn persist_graph_prop_segment>( &self, - graph_segment: &Self::GS, + graph_prop_segment: &Self::GS, writer: MP, ) where Self: Sized; @@ -56,58 +48,30 @@ pub trait PersistentStrategy: Config { fn disk_storage_enabled() -> bool; } -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +#[derive(Debug, Clone)] pub struct NoOpStrategy { - max_node_page_len: u32, - max_edge_page_len: u32, + config: BaseConfig, + wal: Arc, } -impl NoOpStrategy { - pub fn new(max_node_page_len: u32, max_edge_page_len: u32) -> Self { - Self { - max_node_page_len, - max_edge_page_len, - } - } -} - -impl Default for NoOpStrategy { - fn default() -> Self { - Self::new(DEFAULT_MAX_PAGE_LEN_NODES, DEFAULT_MAX_PAGE_LEN_EDGES) - } -} - -impl Config for NoOpStrategy { - fn max_node_page_len(&self) -> u32 { - self.max_node_page_len - } - - #[inline(always)] - fn max_edge_page_len(&self) -> u32 { - self.max_edge_page_len - } - - fn max_memory_bytes(&self) -> usize { - usize::MAX - } +impl PersistenceStrategy for NoOpStrategy { + type ES = EdgeSegmentView; + type NS = NodeSegmentView; + type GS = GraphPropSegmentView; + type Wal = NoWal; + type Config = BaseConfig; - fn is_parallel(&self) -> bool { - false + fn new(config: Self::Config, wal: Arc) -> Self { + Self { config, wal } } - fn node_types(&self) -> &[String] { - &[] + fn config(&self) -> &Self::Config { + &self.config } - fn with_node_types(&self, _types: impl IntoIterator>) -> Self { - *self + fn wal(&self) -> &Self::Wal { + &self.wal } -} - -impl PersistentStrategy for NoOpStrategy { - type ES = EdgeSegmentView; - type NS = NodeSegmentView; - type GS = GraphPropSegmentView; fn persist_node_segment>( &self, @@ -117,7 +81,7 @@ impl PersistentStrategy for NoOpStrategy { // No operation } - fn persist_edge_page>( + fn persist_edge_segment>( &self, _edge_page: &Self::ES, _writer: MP, @@ -125,7 +89,7 @@ impl PersistentStrategy for NoOpStrategy { // No operation } - fn persist_graph_props>( + fn persist_graph_prop_segment>( &self, _graph_segment: &Self::GS, _writer: MP, diff --git a/db4-storage/src/segments/edge/segment.rs b/db4-storage/src/segments/edge/segment.rs index 72eca783d2..94347dafbe 100644 --- a/db4-storage/src/segments/edge/segment.rs +++ b/db4-storage/src/segments/edge/segment.rs @@ -2,13 +2,14 @@ use crate::{ LocalPOS, api::edges::{EdgeSegmentOps, LockedESegment}, error::StorageError, - persist::strategy::PersistentStrategy, + persist::{config::ConfigOps, strategy::PersistenceStrategy}, properties::PropMutEntry, segments::{ HasRow, SegmentContainer, edge::entry::{MemEdgeEntry, MemEdgeRef}, }, utils::Iter4, + wal::LSN, }; use parking_lot::lock_api::ArcRwLockReadGuard; use raphtory_api::core::entities::{ @@ -51,18 +52,7 @@ impl HasRow for EdgeEntry { pub struct MemEdgeSegment { layers: Vec>, est_size: usize, -} - -impl>> From for MemEdgeSegment { - fn from(inner: I) -> Self { - let layers: Vec<_> = inner.into_iter().collect(); - let est_size = layers.iter().map(|seg| seg.est_size()).sum(); - assert!( - !layers.is_empty(), - "MemEdgeSegment must have at least one layer" - ); - Self { layers, est_size } - } + lsn: LSN, } impl AsRef<[SegmentContainer]> for MemEdgeSegment { @@ -82,6 +72,7 @@ impl MemEdgeSegment { Self { layers: vec![SegmentContainer::new(segment_id, max_page_len, meta)], est_size: 0, + lsn: 0, } } @@ -128,7 +119,25 @@ impl MemEdgeSegment { } pub fn lsn(&self) -> u64 { - self.layers.iter().map(|seg| seg.lsn()).min().unwrap_or(0) + self.lsn + } + + pub fn set_lsn(&mut self, lsn: u64) { + self.lsn = lsn; + } + + /// Replaces this segment with an empty instance, returning the old segment + /// with its data. + /// + /// The new segment will have the same number of layers as the original. + pub fn take(&mut self) -> Self { + let layers = self.layers.iter_mut().map(|layer| layer.take()).collect(); + + Self { + layers, + est_size: 0, + lsn: self.lsn, + } } pub fn max_page_len(&self) -> u32 { @@ -150,20 +159,20 @@ impl MemEdgeSegment { dst: VID, layer_id: usize, props: impl IntoIterator, - lsn: u64, ) { // Ensure we have enough layers self.ensure_layer(layer_id); let est_size = self.layers[layer_id].est_size(); - self.layers[layer_id].set_lsn(lsn); let local_row = self.reserve_local_row(edge_pos, src, dst, layer_id); let mut prop_entry: PropMutEntry<'_> = self.layers[layer_id] .properties_mut() .get_mut_entry(local_row); + let ts = EventTime::new(t.t(), t.i()); prop_entry.append_t_props(ts, props); + let layer_est_size = self.layers[layer_id].est_size(); self.est_size += layer_est_size.saturating_sub(est_size); } @@ -175,14 +184,12 @@ impl MemEdgeSegment { src: VID, dst: VID, layer_id: usize, - lsn: u64, ) { let t = EventTime::new(t.t(), t.i()); // Ensure we have enough layers self.ensure_layer(layer_id); let est_size = self.layers[layer_id].est_size(); - self.layers[layer_id].set_lsn(lsn); let local_row = self.reserve_local_row(edge_pos, src, dst, layer_id); let props = self.layers[layer_id].properties_mut(); @@ -197,14 +204,12 @@ impl MemEdgeSegment { src: impl Into, dst: impl Into, layer_id: usize, - lsn: u64, ) { let src = src.into(); let dst = dst.into(); // Ensure we have enough layers self.ensure_layer(layer_id); - self.layers[layer_id].set_lsn(lsn); let est_size = self.layers[layer_id].est_size(); self.reserve_local_row(edge_pos, src, dst, layer_id); @@ -214,7 +219,7 @@ impl MemEdgeSegment { fn ensure_layer(&mut self, layer_id: usize) { if layer_id >= self.layers.len() { - // Get details from first layer to create consistent new layers + // Get details from first layer to create consistent new layers. if let Some(first_layer) = self.layers.first() { let segment_id = first_layer.segment_id(); let max_page_len = first_layer.max_page_len(); @@ -384,7 +389,7 @@ impl LockedESegment for ArcLockedSegmentView { } } -impl>> EdgeSegmentOps for EdgeSegmentView

{ +impl>> EdgeSegmentOps for EdgeSegmentView

{ type Extension = P; type Entry<'a> = MemEdgeEntry<'a, parking_lot::RwLockReadGuard<'a, MemEdgeSegment>>; @@ -419,7 +424,8 @@ impl>> EdgeSegmentOps for EdgeSegm } fn new(page_id: usize, meta: Arc, _path: Option, ext: Self::Extension) -> Self { - let max_page_len = ext.max_edge_page_len(); + let max_page_len = ext.config().max_edge_page_len(); + Self { segment: parking_lot::RwLock::new(MemEdgeSegment::new(page_id, max_page_len, meta)) .into(), @@ -523,7 +529,11 @@ impl>> EdgeSegmentOps for EdgeSegm .map_or(0, |layer| layer.len()) } - fn mark_dirty(&self) {} + fn set_dirty(&self, _dirty: bool) {} + + fn immut_lsn(&self) -> LSN { + panic!("immut_lsn not supported for EdgeSegmentView"); + } fn flush(&self) -> Result<(), StorageError> { Ok(()) @@ -533,7 +543,10 @@ impl>> EdgeSegmentOps for EdgeSegm #[cfg(test)] mod test { use super::*; - use raphtory_api::core::entities::properties::prop::PropType; + use raphtory_api::core::entities::properties::{ + meta::{Meta, STATIC_GRAPH_LAYER_ID}, + prop::PropType, + }; use raphtory_core::storage::timeindex::EventTime; fn create_test_segment() -> MemEdgeSegment { @@ -553,7 +566,6 @@ mod test { VID(2), 0, vec![(0, Prop::from("test1"))], - 1, ); segment.insert_edge_internal( @@ -563,7 +575,6 @@ mod test { VID(4), 0, vec![(0, Prop::from("test2"))], - 2, ); segment.insert_edge_internal( @@ -573,7 +584,6 @@ mod test { VID(6), 0, vec![(0, Prop::from("test3"))], - 3, ); // Verify edges exist @@ -592,9 +602,6 @@ mod test { #[test] fn est_size_changes() { - use super::*; - use raphtory_api::core::entities::properties::meta::Meta; - let meta = Arc::new(Meta::default()); let mut segment = MemEdgeSegment::new(1, 100, meta.clone()); @@ -605,16 +612,21 @@ mod test { LocalPOS(0), VID(1), VID(2), - 0, + STATIC_GRAPH_LAYER_ID, vec![(0, Prop::from("test"))], - 1, ); let est_size1 = segment.est_size(); assert!(est_size1 > 0); - segment.delete_edge_internal(EventTime::new(2, 3), LocalPOS(0), VID(5), VID(3), 0, 0); + segment.delete_edge_internal( + EventTime::new(2, 3), + LocalPOS(0), + VID(5), + VID(3), + STATIC_GRAPH_LAYER_ID, + ); let est_size2 = segment.est_size(); @@ -629,9 +641,8 @@ mod test { LocalPOS(1), VID(4), VID(6), - 0, + STATIC_GRAPH_LAYER_ID, vec![(0, Prop::from("test2"))], - 1, ); let est_size3 = segment.est_size(); @@ -642,7 +653,7 @@ mod test { // Insert a static edge - segment.insert_static_edge_internal(LocalPOS(1), 4, 6, 0, 1); + segment.insert_static_edge_internal(LocalPOS(1), 4, 6, STATIC_GRAPH_LAYER_ID); let est_size4 = segment.est_size(); assert_eq!( @@ -656,7 +667,13 @@ mod test { .unwrap() .inner(); - segment.update_const_properties(LocalPOS(1), VID(4), VID(6), 0, [(prop_id, Prop::U8(2))]); + segment.update_const_properties( + LocalPOS(1), + VID(4), + VID(6), + STATIC_GRAPH_LAYER_ID, + [(prop_id, Prop::U8(2))], + ); let est_size5 = segment.est_size(); assert!( diff --git a/db4-storage/src/segments/graph_prop/mod.rs b/db4-storage/src/segments/graph_prop/mod.rs index f1a1f8ad1b..ce535d4395 100644 --- a/db4-storage/src/segments/graph_prop/mod.rs +++ b/db4-storage/src/segments/graph_prop/mod.rs @@ -4,7 +4,7 @@ pub mod segment; use crate::{ api::graph_props::GraphPropSegmentOps, error::StorageError, - persist::strategy::Config, + persist::strategy::PersistenceStrategy, segments::graph_prop::{entry::MemGraphPropEntry, segment::MemGraphPropSegment}, }; use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard}; @@ -20,7 +20,7 @@ use std::{ /// `GraphPropSegmentView` manages graph temporal properties and graph metadata /// (constant properties). Reads / writes are always served from the in-memory segment. #[derive(Debug)] -pub struct GraphPropSegmentView { +pub struct GraphPropSegmentView { /// In-memory segment that contains the latest graph properties /// and graph metadata writes. head: Arc>, @@ -33,7 +33,7 @@ pub struct GraphPropSegmentView { _persistent: P, } -impl GraphPropSegmentOps for GraphPropSegmentView

{ +impl GraphPropSegmentOps for GraphPropSegmentView

{ type Extension = P; type Entry<'a> = MemGraphPropEntry<'a>; @@ -79,8 +79,8 @@ impl GraphPropSegmentOps for GraphPropSegmentView

{ self.est_size.load(Ordering::Relaxed) } - fn mark_dirty(&self) { - self.is_dirty.store(true, Ordering::Relaxed); + fn set_dirty(&self, dirty: bool) { + self.is_dirty.store(dirty, Ordering::Release); } fn notify_write( diff --git a/db4-storage/src/segments/graph_prop/segment.rs b/db4-storage/src/segments/graph_prop/segment.rs index 5ea6710c85..2e634c90dd 100644 --- a/db4-storage/src/segments/graph_prop/segment.rs +++ b/db4-storage/src/segments/graph_prop/segment.rs @@ -1,6 +1,7 @@ use crate::{ error::StorageError, segments::{HasRow, SegmentContainer}, + wal::LSN, }; use raphtory_api::core::entities::properties::{meta::Meta, prop::Prop}; use raphtory_core::{ @@ -14,6 +15,7 @@ use std::sync::Arc; pub struct MemGraphPropSegment { /// Layers containing graph properties and metadata. layers: Vec>, + lsn: LSN, } /// A unit-like struct for use with `SegmentContainer`. @@ -22,7 +24,7 @@ pub struct MemGraphPropSegment { #[derive(Debug, Default)] pub struct UnitEntry(usize); -// `UnitEntry` does not store data, but `HasRow has to be implemented +// UnitEntry does not store data, but HasRow has to be implemented // for SegmentContainer to work. impl HasRow for UnitEntry { fn row(&self) -> usize { @@ -48,13 +50,10 @@ impl MemGraphPropSegment { Self { layers: vec![SegmentContainer::new(segment_id, max_page_len, meta)], + lsn: 0, } } - pub fn lsn(&self) -> u64 { - self.layers.iter().map(|seg| seg.lsn()).min().unwrap_or(0) - } - pub fn get_or_create_layer(&mut self, layer_id: usize) -> &mut SegmentContainer { if layer_id >= self.layers.len() { let max_page_len = self.layers[0].max_page_len(); @@ -86,7 +85,18 @@ impl MemGraphPropSegment { pub fn take(&mut self) -> Self { let layers = self.layers.iter_mut().map(|layer| layer.take()).collect(); - Self { layers } + Self { + layers, + lsn: self.lsn, + } + } + + pub fn lsn(&self) -> LSN { + self.lsn + } + + pub fn set_lsn(&mut self, lsn: LSN) { + self.lsn = lsn; } pub fn add_properties( diff --git a/db4-storage/src/segments/mod.rs b/db4-storage/src/segments/mod.rs index fed9654f12..f2c5bbe099 100644 --- a/db4-storage/src/segments/mod.rs +++ b/db4-storage/src/segments/mod.rs @@ -157,7 +157,6 @@ pub struct SegmentContainer { max_page_len: u32, properties: Properties, meta: Arc, - lsn: u64, } pub trait HasRow: Default + Send + Sync + Sized { @@ -176,7 +175,6 @@ impl SegmentContainer { max_page_len, properties: Default::default(), meta, - lsn: 0, } } @@ -286,16 +284,6 @@ impl SegmentContainer { self.segment_id } - #[inline(always)] - pub fn lsn(&self) -> u64 { - self.lsn - } - - #[inline(always)] - pub fn set_lsn(&mut self, lsn: u64) { - self.lsn = lsn; - } - pub fn len(&self) -> u32 { self.data.data.len() as u32 } diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs index 5d7f39cb3d..d4e1909b99 100644 --- a/db4-storage/src/segments/node/segment.rs +++ b/db4-storage/src/segments/node/segment.rs @@ -3,15 +3,15 @@ use crate::{ api::nodes::{LockedNSSegment, NodeSegmentOps}, error::StorageError, loop_lock_write, - pages::node_store::increment_and_clamp, - persist::strategy::PersistentStrategy, + persist::{config::ConfigOps, strategy::PersistenceStrategy}, segments::{ HasRow, SegmentContainer, node::entry::{MemNodeEntry, MemNodeRef}, }, + wal::LSN, }; use either::Either; -use parking_lot::lock_api::ArcRwLockReadGuard; +use parking_lot::{RwLock, lock_api::ArcRwLockReadGuard}; use raphtory_api::core::{ Direction, entities::{ @@ -37,23 +37,7 @@ pub struct MemNodeSegment { segment_id: usize, max_page_len: u32, layers: Vec>, -} - -impl>> From for MemNodeSegment { - fn from(inner: I) -> Self { - let layers = inner.into_iter().collect::>(); - assert!( - !layers.is_empty(), - "MemNodeSegment must have at least one layer" - ); - let segment_id = layers[0].segment_id(); - let max_page_len = layers[0].max_page_len(); - Self { - segment_id, - max_page_len, - layers, - } - } + lsn: LSN, } #[derive(Debug, Default, serde::Serialize)] @@ -123,10 +107,12 @@ impl MemNodeSegment { let max_page_len = self.layers[0].max_page_len(); let segment_id = self.layers[0].segment_id(); let meta = self.layers[0].meta().clone(); + self.layers.resize_with(layer_id + 1, || { SegmentContainer::new(segment_id, max_page_len, meta.clone()) }); } + &mut self.layers[layer_id] } @@ -142,8 +128,29 @@ impl MemNodeSegment { self.get_adj(n, layer_id).map_or(0, |adj| adj.degree(dir)) } - pub fn lsn(&self) -> u64 { - self.layers.iter().map(|seg| seg.lsn()).min().unwrap_or(0) + pub fn lsn(&self) -> LSN { + self.lsn + } + + pub fn set_lsn(&mut self, lsn: LSN) { + if lsn > self.lsn { + self.lsn = lsn; + } + } + + /// Replaces this segment with an empty instance, returning the old segment + /// with its data. + /// + /// The new segment will have the same number of layers as the original. + pub fn take(&mut self) -> Self { + let layers = self.layers.iter_mut().map(|layer| layer.take()).collect(); + + Self { + segment_id: self.segment_id, + max_page_len: self.max_page_len, + layers, + lsn: self.lsn, + } } pub fn to_vid(&self, pos: LocalPOS) -> VID { @@ -191,6 +198,7 @@ impl MemNodeSegment { segment_id, max_page_len, layers: vec![SegmentContainer::new(segment_id, max_page_len, meta)], + lsn: 0, } } @@ -200,14 +208,12 @@ impl MemNodeSegment { src_pos: LocalPOS, dst: impl Into, e_id: impl Into, - lsn: u64, ) -> (bool, usize) { let dst = dst.into(); let e_id = e_id.into(); let layer_id = e_id.layer(); let layer = self.get_or_create_layer(layer_id); let est_size = layer.est_size(); - layer.set_lsn(lsn); let add_out = layer.reserve_local_row(src_pos); let new_entry = add_out.is_new(); @@ -229,7 +235,6 @@ impl MemNodeSegment { dst_pos: impl Into, src: impl Into, e_id: impl Into, - lsn: u64, ) -> (bool, usize) { let src = src.into(); let e_id = e_id.into(); @@ -238,7 +243,6 @@ impl MemNodeSegment { let layer = self.get_or_create_layer(layer_id); let est_size = layer.est_size(); - layer.set_lsn(lsn); let add_in = layer.reserve_local_row(dst_pos); let new_entry = add_in.is_new(); @@ -264,17 +268,10 @@ impl MemNodeSegment { prop_mut_entry.addition_timestamp(ts, e_id); } - pub fn update_timestamp( - &mut self, - t: T, - node_pos: LocalPOS, - e_id: ELID, - lsn: u64, - ) -> usize { + pub fn update_timestamp(&mut self, t: T, node_pos: LocalPOS, e_id: ELID) -> usize { let layer_id = e_id.layer(); let (est_size, row) = { let segment_container = self.get_or_create_layer(layer_id); //&mut self.layers[e_id.layer()]; - segment_container.set_lsn(lsn); let est_size = segment_container.est_size(); let row = segment_container.reserve_local_row(node_pos).inner().row(); (est_size, row) @@ -403,7 +400,7 @@ impl LockedNSSegment for ArcLockedSegmentView { } } -impl>> NodeSegmentOps for NodeSegmentView

{ +impl>> NodeSegmentOps for NodeSegmentView

{ type Extension = P; type Entry<'a> = MemNodeEntry<'a, parking_lot::RwLockReadGuard<'a, MemNodeSegment>>; @@ -423,7 +420,7 @@ impl>> NodeSegmentOps for NodeSegm } fn load( - _page_id: usize, + _segment_id: usize, _node_meta: Arc, _edge_meta: Arc, _path: impl AsRef, @@ -438,17 +435,19 @@ impl>> NodeSegmentOps for NodeSegm } fn new( - page_id: usize, + segment_id: usize, meta: Arc, _edge_meta: Arc, _path: Option, ext: Self::Extension, ) -> Self { - let max_page_len = ext.max_node_page_len(); + let max_page_len = ext.config().max_node_page_len(); + let inner = RwLock::new(MemNodeSegment::new(segment_id, max_page_len, meta)); + let inner = Arc::new(inner); + Self { - inner: parking_lot::RwLock::new(MemNodeSegment::new(page_id, max_page_len, meta)) - .into(), - segment_id: page_id, + inner, + segment_id, _ext: ext, max_num_node: AtomicU32::new(0), est_size: AtomicUsize::new(0), @@ -486,9 +485,9 @@ impl>> NodeSegmentOps for NodeSegm Ok(()) } - fn mark_dirty(&self) {} + fn set_dirty(&self, _dirty: bool) {} - fn check_node(&self, _pos: LocalPOS, _layer_id: usize) -> bool { + fn has_node(&self, _pos: LocalPOS, _layer_id: usize) -> bool { false } @@ -550,12 +549,12 @@ impl>> NodeSegmentOps for NodeSegm Ok(()) } - fn nodes_counter(&self) -> &AtomicU32 { - &self.max_num_node + fn immut_lsn(&self) -> LSN { + panic!("immut_lsn not supported for NodeSegmentView"); } - fn increment_num_nodes(&self, max_page_len: u32) { - increment_and_clamp(self.nodes_counter(), max_page_len); + fn nodes_counter(&self) -> &AtomicU32 { + &self.max_num_node } } @@ -565,10 +564,14 @@ mod test { LocalPOS, NodeSegmentView, api::nodes::NodeSegmentOps, pages::{layer_counter::GraphStats, node_page::writer::NodeWriter}, - persist::strategy::NoOpStrategy, + persist::{ + config::BaseConfig, + strategy::{NoOpStrategy, PersistenceStrategy}, + }, + wal::no_wal::NoWal, }; use raphtory_api::core::entities::properties::{ - meta::Meta, + meta::{Meta, STATIC_GRAPH_LAYER_ID}, prop::{Prop, PropType}, }; use raphtory_core::entities::{EID, ELID, VID}; @@ -580,9 +583,11 @@ mod test { let node_meta = Arc::new(Meta::default()); let edge_meta = Arc::new(Meta::default()); let path = tempdir().unwrap(); - let ext = NoOpStrategy::new(10, 10); + let config = BaseConfig::new(10, 10); + let ext = NoOpStrategy::new(config, Arc::new(NoWal)); + let segment_id = 0; let segment = NodeSegmentView::new( - 0, + segment_id, node_meta.clone(), edge_meta, Some(path.path().to_path_buf()), @@ -595,7 +600,12 @@ mod test { let est_size1 = segment.est_size(); assert_eq!(est_size1, 0); - writer.add_outbound_edge(Some(1), LocalPOS(1), VID(3), EID(7).with_layer(0), 0); + writer.add_outbound_edge( + Some(1), + LocalPOS(1), + VID(3), + EID(7).with_layer(STATIC_GRAPH_LAYER_ID), + ); let est_size2 = segment.est_size(); assert!( @@ -603,7 +613,12 @@ mod test { "Estimated size should be greater than 0 after adding an edge" ); - writer.add_inbound_edge(Some(1), LocalPOS(2), VID(4), EID(8).with_layer(0), 0); + writer.add_inbound_edge( + Some(1), + LocalPOS(2), + VID(4), + EID(8).with_layer(STATIC_GRAPH_LAYER_ID), + ); let est_size3 = segment.est_size(); assert!( @@ -613,7 +628,12 @@ mod test { // no change when adding the same edge again - writer.add_outbound_edge::(None, LocalPOS(1), VID(3), EID(7).with_layer(0), 0); + writer.add_outbound_edge::( + None, + LocalPOS(1), + VID(3), + EID(7).with_layer(STATIC_GRAPH_LAYER_ID), + ); let est_size4 = segment.est_size(); assert_eq!( est_size4, est_size3, @@ -628,7 +648,11 @@ mod test { .unwrap() .inner(); - writer.update_c_props(LocalPOS(1), 0, [(prop_id, Prop::U64(73))], 0); + writer.update_c_props( + LocalPOS(1), + STATIC_GRAPH_LAYER_ID, + [(prop_id, Prop::U64(73))], + ); let est_size5 = segment.est_size(); assert!( @@ -636,7 +660,7 @@ mod test { "Estimated size should increase after adding constant properties" ); - writer.update_timestamp(17, LocalPOS(1), ELID::new(EID(0), 0), 0); + writer.update_timestamp(17, LocalPOS(1), ELID::new(EID(0), STATIC_GRAPH_LAYER_ID)); let est_size6 = segment.est_size(); assert!( @@ -651,7 +675,12 @@ mod test { .unwrap() .inner(); - writer.add_props(42, LocalPOS(1), 0, [(prop_id, Prop::F64(4.13))], 0); + writer.add_props( + 42, + LocalPOS(1), + STATIC_GRAPH_LAYER_ID, + [(prop_id, Prop::F64(4.13))], + ); let est_size7 = segment.est_size(); assert!( @@ -659,7 +688,12 @@ mod test { "Estimated size should increase after adding temporal properties" ); - writer.add_props(72, LocalPOS(1), 0, [(prop_id, Prop::F64(5.41))], 0); + writer.add_props( + 72, + LocalPOS(1), + STATIC_GRAPH_LAYER_ID, + [(prop_id, Prop::F64(5.41))], + ); let est_size8 = segment.est_size(); assert!( est_size8 > est_size7, diff --git a/db4-storage/src/transaction/mod.rs b/db4-storage/src/transaction/mod.rs new file mode 100644 index 0000000000..439e5b00de --- /dev/null +++ b/db4-storage/src/transaction/mod.rs @@ -0,0 +1,40 @@ +use std::sync::atomic::{self, AtomicU64}; + +use crate::wal::TransactionID; + +#[derive(Debug)] +pub struct TransactionManager { + last_transaction_id: AtomicU64, +} + +impl TransactionManager { + const STARTING_TRANSACTION_ID: TransactionID = 1; + + pub fn new() -> Self { + Self { + last_transaction_id: AtomicU64::new(Self::STARTING_TRANSACTION_ID), + } + } + + /// Restores the last used transaction ID to the specified value. + /// Intended for using during recovery. + pub fn restore_transaction_id(&self, last_transaction_id: TransactionID) { + self.last_transaction_id + .store(last_transaction_id, atomic::Ordering::SeqCst) + } + + pub fn begin_transaction(&self) -> TransactionID { + self.last_transaction_id + .fetch_add(1, atomic::Ordering::SeqCst) + } + + pub fn end_transaction(&self, _transaction_id: TransactionID) { + // No-op for now. + } +} + +impl Default for TransactionManager { + fn default() -> Self { + Self::new() + } +} diff --git a/db4-storage/src/wal/entry.rs b/db4-storage/src/wal/entry.rs index c49d2b3b6b..f56e2cdc3c 100644 --- a/db4-storage/src/wal/entry.rs +++ b/db4-storage/src/wal/entry.rs @@ -1,91 +1,29 @@ -use std::path::Path; - -use raphtory_api::core::{entities::properties::prop::Prop, storage::dict_mapper::MaybeNew}; +use raphtory_api::core::entities::{GidRef, properties::prop::Prop}; use raphtory_core::{ - entities::{EID, GID, VID}, + entities::{EID, VID}, storage::timeindex::EventTime, }; use crate::{ error::StorageError, - wal::{GraphReplayer, GraphWal, LSN, TransactionID, no_wal::NoWal}, + wal::{GraphReplay, GraphWalOps, LSN, TransactionID, no_wal::NoWal}, }; -impl GraphWal for NoWal { +impl GraphWalOps for NoWal { type ReplayEntry = (); - fn log_begin_transaction(&self, _transaction_id: TransactionID) -> Result { - Ok(0) - } - - fn log_end_transaction(&self, _transaction_id: TransactionID) -> Result { - Ok(0) - } - - fn log_add_static_edge( - &self, - _transaction_id: TransactionID, - _t: EventTime, - _src: VID, - _dst: VID, - ) -> Result { - Ok(0) - } - fn log_add_edge( &self, _transaction_id: TransactionID, _t: EventTime, - _src: VID, - _dst: VID, - _eid: EID, - _layer_id: usize, - _props: &[(usize, Prop)], - ) -> Result { - Ok(0) - } - - fn log_node_id( - &self, - _transaction_id: TransactionID, - _gid: GID, - _vid: VID, - ) -> Result { - Ok(0) - } - - fn log_edge_id( - &self, - _transaction_id: TransactionID, - _src: VID, - _dst: VID, + _src_name: Option>, + _src_id: VID, + _dst_name: Option>, + _dst_id: VID, _eid: EID, + _layer_name: Option<&str>, _layer_id: usize, - ) -> Result { - Ok(0) - } - - fn log_const_prop_ids>( - &self, - _transaction_id: TransactionID, - _props: &[MaybeNew<(PN, usize, Prop)>], - ) -> Result { - Ok(0) - } - - fn log_temporal_prop_ids>( - &self, - _transaction_id: TransactionID, - _props: &[MaybeNew<(PN, usize, Prop)>], - ) -> Result { - Ok(0) - } - - fn log_layer_id( - &self, - _transaction_id: TransactionID, - _name: &str, - _id: usize, + _props: Vec<(&str, usize, Prop)>, ) -> Result { Ok(0) } @@ -94,16 +32,11 @@ impl GraphWal for NoWal { Ok(0) } - fn replay_iter( - _dir: impl AsRef, - ) -> impl Iterator> { - std::iter::once(Ok((0, ()))) + fn replay_iter(&self) -> impl Iterator> { + std::iter::empty() } - fn replay_to_graph( - _dir: impl AsRef, - _graph: &mut G, - ) -> Result<(), StorageError> { - todo!() + fn replay_to_graph(&self, _graph: &mut G) -> Result<(), StorageError> { + panic!("NoWAL does not support replay") } } diff --git a/db4-storage/src/wal/mod.rs b/db4-storage/src/wal/mod.rs index c557096b1b..dabe11f47d 100644 --- a/db4-storage/src/wal/mod.rs +++ b/db4-storage/src/wal/mod.rs @@ -1,10 +1,10 @@ use crate::error::StorageError; -use raphtory_api::core::{entities::properties::prop::Prop, storage::dict_mapper::MaybeNew}; +use raphtory_api::core::entities::{GidRef, properties::prop::Prop}; use raphtory_core::{ entities::{EID, GID, VID}, storage::timeindex::EventTime, }; -use std::path::{Path, PathBuf}; +use std::path::Path; pub mod entry; pub mod no_wal; @@ -12,207 +12,112 @@ pub mod no_wal; pub type LSN = u64; pub type TransactionID = u64; -#[derive(Debug)] -pub struct WalRecord { - pub lsn: LSN, - pub data: Vec, -} - /// Core Wal methods. -pub trait Wal { - fn new(dir: Option) -> Result +pub trait WalOps { + fn new(dir: Option<&Path>) -> Result + where + Self: Sized; + + /// Loads an existing WAL file from the given directory in append mode. + fn load(dir: Option<&Path>) -> Result where Self: Sized; /// Appends data to the WAL and returns the assigned LSN. fn append(&self, data: &[u8]) -> Result; - /// Immediately flushes in-memory WAL entries to disk. - fn sync(&self) -> Result<(), StorageError>; - - /// Blocks until the WAL has fsynced the given LSN to disk. - fn wait_for_sync(&self, lsn: LSN); + /// Flushes in-memory WAL entries up to the given LSN to disk. + /// Returns immediately if the given LSN is already flushed to disk. + fn flush(&self, lsn: LSN) -> Result<(), StorageError>; /// Rotates the underlying WAL file. /// `cutoff_lsn` acts as a hint for which records can be safely discarded during rotation. fn rotate(&self, cutoff_lsn: LSN) -> Result<(), StorageError>; - /// Returns an iterator over the wal entries in the given directory. - fn replay(dir: impl AsRef) -> impl Iterator>; + /// Returns an iterator over the entries in the wal. + fn replay(&self) -> impl Iterator>; + + /// Returns true if there are entries in the WAL file on disk. + fn has_entries(&self) -> bool; } -// Raphtory-specific logging & replay methods. -pub trait GraphWal { - /// ReplayEntry represents the type of the wal entry returned during replay. - type ReplayEntry; +#[derive(Debug)] +pub struct ReplayRecord { + lsn: LSN, - fn log_begin_transaction(&self, transaction_id: TransactionID) -> Result; + data: Vec, - fn log_end_transaction(&self, transaction_id: TransactionID) -> Result; + /// The raw bytes of the WAL entry stored on disk, including CRC data. + raw_bytes: Vec, +} - /// Log a static edge addition. - /// - /// # Arguments - /// - /// * `transaction_id` - The transaction ID - /// * `t` - The timestamp of the edge addition - /// * `src` - The source vertex ID - /// * `dst` - The destination vertex ID - fn log_add_static_edge( - &self, - transaction_id: TransactionID, - t: EventTime, - src: VID, - dst: VID, - ) -> Result; +impl ReplayRecord { + pub fn new(lsn: LSN, data: Vec, raw_bytes: Vec) -> Self { + Self { + lsn, + data, + raw_bytes, + } + } + + pub fn lsn(&self) -> LSN { + self.lsn + } + + pub fn data(&self) -> &[u8] { + &self.data + } + + pub fn raw_bytes(&self) -> &[u8] { + &self.raw_bytes + } +} + +// Raphtory-specific logging & replay methods. +pub trait GraphWalOps { + /// ReplayEntry represents the type of the wal entry returned during replay. + type ReplayEntry; - /// Log an edge addition to a layer with temporal props. - /// - /// # Arguments - /// - /// * `transaction_id` - The transaction ID - /// * `t` - The timestamp of the edge addition - /// * `src` - The source vertex ID - /// * `dst` - The destination vertex ID - /// * `eid` - The edge ID - /// * `layer_id` - The layer ID - /// * `props` - The temporal properties of the edge fn log_add_edge( &self, transaction_id: TransactionID, t: EventTime, - src: VID, - dst: VID, - eid: EID, - layer_id: usize, - props: &[(usize, Prop)], - ) -> Result; - - fn log_node_id( - &self, - transaction_id: TransactionID, - gid: GID, - vid: VID, - ) -> Result; - - fn log_edge_id( - &self, - transaction_id: TransactionID, - src: VID, - dst: VID, + src_name: Option>, + src_id: VID, + dst_name: Option>, + dst_id: VID, eid: EID, + layer_name: Option<&str>, layer_id: usize, - ) -> Result; - - /// Log constant prop name -> prop id mappings. - /// - /// # Arguments - /// - /// * `transaction_id` - The transaction ID - /// * `props` - A slice containing new or existing tuples of (prop name, id, value) - fn log_const_prop_ids>( - &self, - transaction_id: TransactionID, - props: &[MaybeNew<(PN, usize, Prop)>], - ) -> Result; - - /// Log temporal prop name -> prop id mappings. - /// - /// # Arguments - /// - /// * `transaction_id` - The transaction ID - /// * `props` - A slice containing new or existing tuples of (prop name, id, value). - fn log_temporal_prop_ids>( - &self, - transaction_id: TransactionID, - props: &[MaybeNew<(PN, usize, Prop)>], - ) -> Result; - - fn log_layer_id( - &self, - transaction_id: TransactionID, - name: &str, - id: usize, + props: Vec<(&str, usize, Prop)>, ) -> Result; /// Logs a checkpoint record, indicating that all Wal operations upto and including /// `lsn` has been persisted to disk. fn log_checkpoint(&self, lsn: LSN) -> Result; - /// Returns an iterator over the wal entries in the given directory. - fn replay_iter( - dir: impl AsRef, - ) -> impl Iterator>; + /// Returns an iterator over the entries in the wal. + fn replay_iter(&self) -> impl Iterator>; - /// Replays and applies all the wal entries in the given directory to the given graph. - fn replay_to_graph( - dir: impl AsRef, - graph: &mut G, - ) -> Result<(), StorageError>; + /// Replays and applies all the entries in the wal to the given graph. + /// Subsequent appends to the WAL will start from the LSN of the last replayed entry. + fn replay_to_graph(&self, graph: &mut G) -> Result<(), StorageError>; } -/// Trait for defining callbacks for replaying from wal -pub trait GraphReplayer { - fn replay_begin_transaction( - &self, - lsn: LSN, - transaction_id: TransactionID, - ) -> Result<(), StorageError>; - - fn replay_end_transaction( - &self, - lsn: LSN, - transaction_id: TransactionID, - ) -> Result<(), StorageError>; - - fn replay_add_static_edge( - &self, - lsn: LSN, - transaction_id: TransactionID, - t: EventTime, - src: VID, - dst: VID, - ) -> Result<(), StorageError>; - +/// Trait for defining callbacks for replaying from wal. +pub trait GraphReplay { fn replay_add_edge( - &self, + &mut self, lsn: LSN, transaction_id: TransactionID, t: EventTime, - src: VID, - dst: VID, + src_name: Option, + src_id: VID, + dst_name: Option, + dst_id: VID, eid: EID, + layer_name: Option, layer_id: usize, - props: &[(usize, Prop)], - ) -> Result<(), StorageError>; - - fn replay_node_id( - &self, - lsn: LSN, - transaction_id: TransactionID, - gid: GID, - vid: VID, - ) -> Result<(), StorageError>; - - fn replay_const_prop_ids>( - &self, - lsn: LSN, - transaction_id: TransactionID, - props: &[MaybeNew<(PN, usize, Prop)>], - ) -> Result<(), StorageError>; - - fn replay_temporal_prop_ids>( - &self, - lsn: LSN, - transaction_id: TransactionID, - props: &[MaybeNew<(PN, usize, Prop)>], - ) -> Result<(), StorageError>; - - fn replay_layer_id( - &self, - lsn: LSN, - transaction_id: TransactionID, - name: &str, - id: usize, + props: Vec<(String, usize, Prop)>, ) -> Result<(), StorageError>; } diff --git a/db4-storage/src/wal/no_wal.rs b/db4-storage/src/wal/no_wal.rs index 72e666fefa..87eccc154a 100644 --- a/db4-storage/src/wal/no_wal.rs +++ b/db4-storage/src/wal/no_wal.rs @@ -1,8 +1,8 @@ -use std::path::{Path, PathBuf}; +use std::path::Path; use crate::{ error::StorageError, - wal::{LSN, Wal, WalRecord}, + wal::{LSN, ReplayRecord, WalOps}, }; /// `NoWAL` is a no-op WAL implementation that discards all writes. @@ -10,8 +10,12 @@ use crate::{ #[derive(Debug)] pub struct NoWal; -impl Wal for NoWal { - fn new(_dir: Option) -> Result { +impl WalOps for NoWal { + fn new(_dir: Option<&Path>) -> Result { + Ok(Self) + } + + fn load(_dir: Option<&Path>) -> Result { Ok(Self) } @@ -19,18 +23,20 @@ impl Wal for NoWal { Ok(0) } - fn sync(&self) -> Result<(), StorageError> { + fn flush(&self, _lsn: LSN) -> Result<(), StorageError> { Ok(()) } - fn wait_for_sync(&self, _lsn: LSN) {} - fn rotate(&self, _cutoff_lsn: LSN) -> Result<(), StorageError> { Ok(()) } - fn replay(_dir: impl AsRef) -> impl Iterator> { + fn replay(&self) -> impl Iterator> { let error = "Recovery is not supported for NoWAL"; std::iter::once(Err(StorageError::GenericFailure(error.to_string()))) } + + fn has_entries(&self) -> bool { + false + } } diff --git a/raphtory-api/Cargo.toml b/raphtory-api/Cargo.toml index 4c632688a9..d768c1f2b7 100644 --- a/raphtory-api/Cargo.toml +++ b/raphtory-api/Cargo.toml @@ -17,7 +17,7 @@ edition.workspace = true [dependencies] serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true, optional = true } -bigdecimal = { workspace = true } +bigdecimal = { workspace = true, features = ["string-only"] } thiserror = { workspace = true } bytemuck = { workspace = true } chrono.workspace = true @@ -46,6 +46,7 @@ display-error-chain = { workspace = true, optional = true } [dev-dependencies] proptest.workspace = true +serde_json.workspace = true [features] # Enables generating the pyo3 python bindings diff --git a/raphtory-api/src/core/entities/mod.rs b/raphtory-api/src/core/entities/mod.rs index 6235882469..24da2d688c 100644 --- a/raphtory-api/src/core/entities/mod.rs +++ b/raphtory-api/src/core/entities/mod.rs @@ -65,6 +65,10 @@ impl Default for EID { } impl EID { + pub fn index(&self) -> usize { + self.0 + } + pub fn as_u64(self) -> u64 { self.0 as u64 } @@ -301,6 +305,40 @@ pub enum GidRef<'a> { Str(&'a str), } +#[derive(Clone, Debug, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)] +pub enum GidCow<'a> { + U64(u64), + Str(Cow<'a, str>), +} + +impl<'a> From> for GidCow<'a> { + fn from(value: GidRef<'a>) -> Self { + match value { + GidRef::U64(v) => Self::U64(v), + GidRef::Str(v) => Self::Str(Cow::Borrowed(v)), + } + } +} + +impl<'a> GidCow<'a> { + pub fn as_ref<'b>(&'b self) -> GidRef<'b> + where + 'a: 'b, + { + match self { + GidCow::U64(v) => GidRef::U64(*v), + GidCow::Str(v) => GidRef::Str(v), + } + } + + pub fn into_owned(self) -> GID { + match self { + GidCow::U64(v) => GID::U64(v), + GidCow::Str(v) => GID::Str(v.into_owned()), + } + } +} + #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub enum GidType { U64, diff --git a/raphtory-api/src/core/entities/properties/meta.rs b/raphtory-api/src/core/entities/properties/meta.rs index 73d06784f7..c25621f855 100644 --- a/raphtory-api/src/core/entities/properties/meta.rs +++ b/raphtory-api/src/core/entities/properties/meta.rs @@ -332,8 +332,12 @@ impl PropMapper { } pub fn set_id_and_dtype(&self, key: impl Into, id: usize, dtype: PropType) { - let mut dtypes = self.dtypes.write(); self.set_id(key, id); + self.set_dtype(id, dtype); + } + + pub fn set_dtype(&self, id: usize, dtype: PropType) { + let mut dtypes = self.dtypes.write(); if dtypes.len() <= id { dtypes.resize(id + 1, PropType::Empty); } diff --git a/raphtory-api/src/core/entities/properties/prop/prop_array.rs b/raphtory-api/src/core/entities/properties/prop/prop_array.rs index e2dea8117d..8906b0a59d 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_array.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_array.rs @@ -6,11 +6,14 @@ use crate::{ }; use arrow_array::{ cast::AsArray, types::*, Array, ArrayRef, ArrowPrimitiveType, OffsetSizeTrait, PrimitiveArray, + RecordBatch, }; -use arrow_schema::{DataType, Field, Fields, TimeUnit}; -use serde::{ser::SerializeSeq, Deserialize, Deserializer, Serialize, Serializer}; +use arrow_ipc::{reader::FileReader, writer::FileWriter}; +use arrow_schema::{DataType, Field, Fields, Schema, TimeUnit}; +use serde::{de, ser, Deserialize, Deserializer, Serialize, Serializer}; use std::{ hash::{Hash, Hasher}, + io::Cursor, sync::Arc, }; @@ -20,6 +23,12 @@ pub enum PropArray { Array(ArrayRef), } +#[derive(Debug, Clone, Deserialize, Serialize)] +enum SerializedPropArray { + Vec(Arc<[Prop]>), + Array(Vec), +} + impl Default for PropArray { fn default() -> Self { PropArray::Vec(vec![].into()) @@ -199,11 +208,27 @@ impl Serialize for PropArray { where S: Serializer, { - let mut state = serializer.serialize_seq(Some(self.len()))?; - for prop in self.iter_all() { - state.serialize_element(&prop)?; - } - state.end() + let serializable = match self { + PropArray::Vec(inner) => SerializedPropArray::Vec(inner.clone()), + PropArray::Array(array) => { + let mut bytes = Vec::new(); + let cursor = Cursor::new(&mut bytes); + let schema = + Schema::new(vec![Field::new("value", array.data_type().clone(), true)]); + let mut writer = FileWriter::try_new(cursor, &schema) + .map_err(|err| ser::Error::custom(err.to_string()))?; + let batch = RecordBatch::try_new(schema.into(), vec![array.clone()]) + .map_err(|err| ser::Error::custom(err.to_string()))?; + writer + .write(&batch) + .map_err(|err| ser::Error::custom(err.to_string()))?; + writer + .finish() + .map_err(|err| ser::Error::custom(err.to_string()))?; + SerializedPropArray::Array(bytes) + } + }; + serializable.serialize(serializer) } } @@ -212,8 +237,29 @@ impl<'de> Deserialize<'de> for PropArray { where D: Deserializer<'de>, { - let data = >::deserialize(deserializer)?; - Ok(PropArray::Vec(data.into())) + let data = SerializedPropArray::deserialize(deserializer)?; + let deserialized = match data { + SerializedPropArray::Vec(res) => PropArray::Vec(res), + SerializedPropArray::Array(bytes) => { + let cursor = Cursor::new(bytes); + let mut reader = FileReader::try_new(cursor, None) + .map_err(|err| de::Error::custom(err.to_string()))?; + let batch = reader.next().ok_or_else(|| { + de::Error::custom( + "Failed to deserialize PropArray: Array data missing.".to_owned(), + ) + })?; + let batch = batch.map_err(|err| de::Error::custom(err.to_string()))?; + let (_, arrays, _) = batch.into_parts(); + let array = arrays.into_iter().next().ok_or_else(|| { + de::Error::custom( + "Failed to deserialize PropArray: Array data missing.".to_owned(), + ) + })?; + PropArray::Array(array) + } + }; + Ok(deserialized) } } @@ -339,3 +385,28 @@ impl PropArrayUnwrap for Prop { } } } + +#[cfg(test)] +mod test { + use crate::core::entities::properties::prop::{Prop, PropArray}; + use arrow_array::Int64Array; + use std::sync::Arc; + + #[test] + fn test_prop_array_json() { + let array = PropArray::Array(Arc::new(Int64Array::from(vec![0, 1, 2]))); + let json = serde_json::to_string(&array).unwrap(); + println!("{json}"); + let recovered: PropArray = serde_json::from_str(&json).unwrap(); + assert_eq!(array, recovered); + } + + #[test] + fn test_prop_array_list_json() { + let array = PropArray::Vec([Prop::U64(1), Prop::U64(2)].into()); + let json = serde_json::to_string(&array).unwrap(); + println!("{json}"); + let recovered: PropArray = serde_json::from_str(&json).unwrap(); + assert_eq!(array, recovered); + } +} diff --git a/raphtory-core/src/python/time.rs b/raphtory-core/src/python/time.rs index e5695b153e..3d22db4146 100644 --- a/raphtory-core/src/python/time.rs +++ b/raphtory-core/src/python/time.rs @@ -1,5 +1,5 @@ use crate::utils::time::{AlignmentUnit, Interval}; -use pyo3::{exceptions::PyTypeError, prelude::*, Bound, FromPyObject, PyAny, PyResult}; +use pyo3::{exceptions::PyTypeError, prelude::*, FromPyObject, PyAny, PyResult}; impl<'py> FromPyObject<'_, 'py> for Interval { type Error = PyErr; diff --git a/raphtory-graphql/src/model/graph/meta_graph.rs b/raphtory-graphql/src/model/graph/meta_graph.rs index 577c941dab..3e34abbccf 100644 --- a/raphtory-graphql/src/model/graph/meta_graph.rs +++ b/raphtory-graphql/src/model/graph/meta_graph.rs @@ -6,7 +6,7 @@ use crate::{ use async_graphql::Context; use dynamic_graphql::{ResolvedObject, ResolvedObjectFields, Result}; use raphtory::{ - db::api::storage::storage::{Extension, PersistentStrategy}, + db::api::storage::storage::{Extension, PersistenceStrategy}, prelude::{GraphViewOps, PropertiesOps}, serialise::{metadata::GraphMetadata, parquet::decode_graph_metadata}, }; diff --git a/raphtory-graphql/src/model/mod.rs b/raphtory-graphql/src/model/mod.rs index 95548913e0..229f0ae230 100644 --- a/raphtory-graphql/src/model/mod.rs +++ b/raphtory-graphql/src/model/mod.rs @@ -22,7 +22,7 @@ use itertools::Itertools; use raphtory::{ db::{ api::{ - storage::storage::{Extension, PersistentStrategy}, + storage::storage::{Extension, PersistenceStrategy}, view::MaterializedGraph, }, graph::views::deletion_graph::PersistentGraph, diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index 518cb3faf1..dd79c5cdf3 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -2,7 +2,7 @@ use crate::{data::DIRTY_PATH, model::blocking_io, rayon::blocking_compute}; use futures_util::io; use raphtory::{ db::api::{ - storage::storage::{Extension, PersistentStrategy}, + storage::storage::{Extension, PersistenceStrategy}, view::{internal::InternalStorageOps, MaterializedGraph}, }, errors::{GraphError, InvalidPathReason}, diff --git a/raphtory-graphql/src/url_encode.rs b/raphtory-graphql/src/url_encode.rs index 98f4c09cb5..2069a74cbd 100644 --- a/raphtory-graphql/src/url_encode.rs +++ b/raphtory-graphql/src/url_encode.rs @@ -1,7 +1,7 @@ use base64::{prelude::BASE64_URL_SAFE, DecodeError, Engine}; use raphtory::{ db::api::{ - storage::storage::{Extension, PersistentStrategy}, + storage::storage::{Extension, PersistenceStrategy}, view::MaterializedGraph, }, errors::GraphError, diff --git a/raphtory-storage/src/mutation/addition_ops.rs b/raphtory-storage/src/mutation/addition_ops.rs index 41730387ef..ea81361f1a 100644 --- a/raphtory-storage/src/mutation/addition_ops.rs +++ b/raphtory-storage/src/mutation/addition_ops.rs @@ -5,7 +5,7 @@ use crate::{ MutationError, }, }; -use db4_graph::{TransactionManager, WriteLockedGraph}; +use db4_graph::WriteLockedGraph; use raphtory_api::{ core::{ entities::{ @@ -20,7 +20,7 @@ use raphtory_api::{ inherit::Base, }; use raphtory_core::entities::{nodes::node_ref::NodeRef, ELID}; -use storage::{Extension, WalImpl}; +use storage::{wal::LSN, Extension}; pub trait InternalAdditionOps { type Error: From; @@ -93,12 +93,6 @@ pub trait InternalAdditionOps { meta: &Meta, props: impl Iterator, ) -> Result>, Self::Error>; - - /// TODO: Not sure the below methods belong here... - - fn transaction_manager(&self) -> &TransactionManager; - - fn wal(&self) -> &WalImpl; } pub trait EdgeWriteLock: Send + Sync { @@ -106,7 +100,6 @@ pub trait EdgeWriteLock: Send + Sync { &mut self, src: impl Into, dst: impl Into, - lsn: u64, ) -> MaybeNew; /// add edge update @@ -116,7 +109,6 @@ pub trait EdgeWriteLock: Send + Sync { src: impl Into, dst: impl Into, eid: MaybeNew, - lsn: u64, props: impl IntoIterator, ) -> MaybeNew; @@ -125,12 +117,13 @@ pub trait EdgeWriteLock: Send + Sync { t: EventTime, src: impl Into, dst: impl Into, - lsn: u64, layer: usize, ) -> MaybeNew; fn store_src_node_info(&mut self, id: impl Into, node_id: Option); fn store_dst_node_info(&mut self, id: impl Into, node_id: Option); + + fn set_lsn(&mut self, lsn: LSN); } pub trait SessionAdditionOps: Send + Sync { @@ -260,14 +253,6 @@ impl InternalAdditionOps for GraphStorage { Ok(self.mutable()?.validate_gids(gids)?) } - fn transaction_manager(&self) -> &TransactionManager { - self.mutable().unwrap().transaction_manager.as_ref() - } - - fn wal(&self) -> &WalImpl { - self.mutable().unwrap().wal.as_ref() - } - fn resolve_node_and_type( &self, id: NodeRef, @@ -377,16 +362,6 @@ where self.base().validate_gids(gids) } - #[inline] - fn transaction_manager(&self) -> &TransactionManager { - self.base().transaction_manager() - } - - #[inline] - fn wal(&self) -> &WalImpl { - self.base().wal() - } - fn resolve_node_and_type( &self, id: NodeRef, diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index cfb3272a49..ed99cb3e9f 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -1,11 +1,12 @@ use crate::mutation::{ addition_ops::{EdgeWriteLock, InternalAdditionOps, SessionAdditionOps}, + durability_ops::DurabilityOps, MutationError, }; -use db4_graph::{TemporalGraph, TransactionManager, WriteLockedGraph}; +use db4_graph::{TemporalGraph, WriteLockedGraph}; use raphtory_api::core::{ entities::properties::{ - meta::{Meta, NODE_ID_IDX, NODE_TYPE_IDX}, + meta::{Meta, NODE_ID_IDX, NODE_TYPE_IDX, STATIC_GRAPH_LAYER_ID}, prop::{Prop, PropType, PropUnwrap}, }, storage::dict_mapper::MaybeNew, @@ -21,15 +22,17 @@ use raphtory_core::{ use storage::{ api::{edges::EdgeSegmentOps, graph_props::GraphPropSegmentOps, nodes::NodeSegmentOps}, pages::{node_page::writer::node_info_as_props, session::WriteSession}, - persist::strategy::{Config, PersistentStrategy}, + persist::{config::ConfigOps, strategy::PersistenceStrategy}, properties::props_meta_writer::PropsMetaWriter, resolver::GIDResolverOps, - Extension, WalImpl, ES, GS, NS, + transaction::TransactionManager, + wal::LSN, + Extension, Wal, ES, GS, NS, }; pub struct WriteS<'a, EXT> where - EXT: PersistentStrategy, ES = ES, GS = GS>, + EXT: PersistenceStrategy, ES = ES, GS = GS>, NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, @@ -44,7 +47,7 @@ pub struct UnlockedSession<'a> { impl<'a, EXT> EdgeWriteLock for WriteS<'a, EXT> where - EXT: PersistentStrategy, ES = ES, GS = GS>, + EXT: PersistenceStrategy, ES = ES, GS = GS>, NS: NodeSegmentOps, ES: EdgeSegmentOps, GS: GraphPropSegmentOps, @@ -53,9 +56,8 @@ where &mut self, src: impl Into, dst: impl Into, - lsn: u64, ) -> MaybeNew { - self.static_session.add_static_edge(src, dst, lsn) + self.static_session.add_static_edge(src, dst) } fn internal_add_edge( @@ -64,11 +66,10 @@ where src: impl Into, dst: impl Into, eid: MaybeNew, - lsn: u64, props: impl IntoIterator, ) -> MaybeNew { self.static_session - .add_edge_into_layer(t, src, dst, eid, lsn, props); + .add_edge_into_layer(t, src, dst, eid, props); eid } @@ -78,18 +79,16 @@ where t: EventTime, src: impl Into, dst: impl Into, - lsn: u64, layer: usize, ) -> MaybeNew { let src = src.into(); let dst = dst.into(); let eid = self .static_session - .add_static_edge(src, dst, lsn) + .add_static_edge(src, dst) .map(|eid| eid.with_layer_deletion(layer)); - self.static_session - .delete_edge_from_layer(t, src, dst, eid, lsn); + self.static_session.delete_edge_from_layer(t, src, dst, eid); eid } @@ -101,7 +100,7 @@ where self.static_session .node_writers() .get_mut_src() - .update_c_props(pos, 0, [(NODE_ID_IDX, id.into())], 0); + .update_c_props(pos, STATIC_GRAPH_LAYER_ID, [(NODE_ID_IDX, id.into())]); }; } @@ -112,9 +111,13 @@ where self.static_session .node_writers() .get_mut_dst() - .update_c_props(pos, 0, [(NODE_ID_IDX, id.into())], 0); + .update_c_props(pos, STATIC_GRAPH_LAYER_ID, [(NODE_ID_IDX, id.into())]); }; } + + fn set_lsn(&mut self, lsn: LSN) { + self.static_session.set_lsn(lsn); + } } impl<'a> SessionAdditionOps for UnlockedSession<'a> { @@ -184,9 +187,7 @@ impl<'a> SessionAdditionOps for UnlockedSession<'a> { impl InternalAdditionOps for TemporalGraph { type Error = MutationError; - type WS<'a> = UnlockedSession<'a>; - type AtomicAddEdge<'a> = WriteS<'a, Extension>; fn write_lock(&self) -> Result, Self::Error> { @@ -219,7 +220,7 @@ impl InternalAdditionOps for TemporalGraph { self.event_counter .fetch_add(1, std::sync::atomic::Ordering::Relaxed), ); - pos.as_vid(seg, self.extension().max_node_page_len()) + pos.as_vid(seg, self.extension().config().max_node_page_len()) })?; Ok(id) @@ -240,25 +241,25 @@ impl InternalAdditionOps for TemporalGraph { None => { writer.update_c_props( local_pos, - 0, + STATIC_GRAPH_LAYER_ID, node_info_as_props(id.as_gid_ref().left(), None), - 0, ); MaybeNew::Existing(0) } Some(node_type) => { - let old_type = writer.get_metadata(local_pos, 0, NODE_TYPE_IDX).into_u64(); + let old_type = writer + .get_metadata(local_pos, STATIC_GRAPH_LAYER_ID, NODE_TYPE_IDX) + .into_u64(); match old_type { None => { let node_type_id = self.node_meta().get_or_create_node_type_id(node_type); writer.update_c_props( local_pos, - 0, + STATIC_GRAPH_LAYER_ID, node_info_as_props( id.as_gid_ref().left(), Some(node_type_id.inner()).filter(|&id| id != 0), ), - 0, ); node_type_id } @@ -322,7 +323,7 @@ impl InternalAdditionOps for TemporalGraph { ) -> Result<(), Self::Error> { let (segment, node_pos) = self.storage().nodes().resolve_pos(v); let mut node_writer = self.storage().node_writer(segment); - node_writer.add_props(t, node_pos, 0, props, 0); + node_writer.add_props(t, node_pos, STATIC_GRAPH_LAYER_ID, props); Ok(()) } @@ -363,12 +364,14 @@ impl InternalAdditionOps for TemporalGraph { Ok(prop_ids) } } +} - fn transaction_manager(&self) -> &TransactionManager { - &self.transaction_manager +impl DurabilityOps for TemporalGraph { + fn transaction_manager(&self) -> Result<&TransactionManager, MutationError> { + Ok(&self.transaction_manager) } - fn wal(&self) -> &WalImpl { - &self.wal + fn wal(&self) -> Result<&Wal, MutationError> { + Ok(&self.extension().wal()) } } diff --git a/raphtory-storage/src/mutation/deletion_ops.rs b/raphtory-storage/src/mutation/deletion_ops.rs index 770110bd17..cb69cc72cc 100644 --- a/raphtory-storage/src/mutation/deletion_ops.rs +++ b/raphtory-storage/src/mutation/deletion_ops.rs @@ -1,4 +1,5 @@ use crate::{graph::graph::GraphStorage, mutation::MutationError}; +use db4_graph::TemporalGraph; use raphtory_api::{ core::{ entities::{EID, VID}, @@ -25,7 +26,7 @@ pub trait InternalDeletionOps { ) -> Result<(), Self::Error>; } -impl InternalDeletionOps for db4_graph::TemporalGraph { +impl InternalDeletionOps for TemporalGraph { type Error = MutationError; fn internal_delete_edge( @@ -36,8 +37,9 @@ impl InternalDeletionOps for db4_graph::TemporalGraph { layer: usize, ) -> Result, Self::Error> { let mut session = self.storage().write_session(src, dst, None); - let edge = session.add_static_edge(src, dst, 0); - session.delete_edge_from_layer(t, src, dst, edge.map(|eid| eid.with_layer(layer)), 0); + session.set_lsn(0); + let edge = session.add_static_edge(src, dst); + session.delete_edge_from_layer(t, src, dst, edge.map(|eid| eid.with_layer(layer))); Ok(edge) } @@ -52,7 +54,7 @@ impl InternalDeletionOps for db4_graph::TemporalGraph { let (src, dst) = writer.get_edge(0, edge_pos).unwrap_or_else(|| { panic!("Internal Error: Edge {eid:?} not found in storage"); }); - writer.delete_edge(t, edge_pos, src, dst, layer, 0); + writer.delete_edge(t, edge_pos, src, dst, layer); Ok(()) } } diff --git a/raphtory-storage/src/mutation/durability_ops.rs b/raphtory-storage/src/mutation/durability_ops.rs new file mode 100644 index 0000000000..0a2205b5a8 --- /dev/null +++ b/raphtory-storage/src/mutation/durability_ops.rs @@ -0,0 +1,19 @@ +use crate::{graph::graph::GraphStorage, mutation::MutationError}; +use storage::{transaction::TransactionManager, Wal}; + +/// Accessor methods for transactions and write-ahead logging. +pub trait DurabilityOps { + fn transaction_manager(&self) -> Result<&TransactionManager, MutationError>; + + fn wal(&self) -> Result<&Wal, MutationError>; +} + +impl DurabilityOps for GraphStorage { + fn transaction_manager(&self) -> Result<&TransactionManager, MutationError> { + self.mutable()?.transaction_manager() + } + + fn wal(&self) -> Result<&Wal, MutationError> { + self.mutable()?.wal() + } +} diff --git a/raphtory-storage/src/mutation/mod.rs b/raphtory-storage/src/mutation/mod.rs index 44f18037b9..3b9e16ac33 100644 --- a/raphtory-storage/src/mutation/mod.rs +++ b/raphtory-storage/src/mutation/mod.rs @@ -30,6 +30,7 @@ use thiserror::Error; pub mod addition_ops; pub mod addition_ops_ext; pub mod deletion_ops; +pub mod durability_ops; pub mod property_addition_ops; pub type NodeWriterT<'a> = NodeWriter<'a, RwLockWriteGuard<'a, MemNodeSegment>, NS>; diff --git a/raphtory-storage/src/mutation/property_addition_ops.rs b/raphtory-storage/src/mutation/property_addition_ops.rs index aaf7f3a952..180468843d 100644 --- a/raphtory-storage/src/mutation/property_addition_ops.rs +++ b/raphtory-storage/src/mutation/property_addition_ops.rs @@ -4,7 +4,10 @@ use crate::{ }; use raphtory_api::{ core::{ - entities::{properties::prop::Prop, EID, VID}, + entities::{ + properties::{meta::STATIC_GRAPH_LAYER_ID, prop::Prop}, + EID, VID, + }, storage::timeindex::EventTime, }, inherit::Base, @@ -61,21 +64,21 @@ impl InternalPropertyAdditionOps for db4_graph::TemporalGraph { props: &[(usize, Prop)], ) -> Result<(), Self::Error> { let mut writer = self.storage().graph_props().writer(); - writer.add_properties(t, props.iter().map(|(id, prop)| (*id, prop.clone())), 0); + writer.add_properties(t, props.iter().map(|(id, prop)| (*id, prop.clone()))); Ok(()) } fn internal_add_metadata(&self, props: &[(usize, Prop)]) -> Result<(), Self::Error> { let mut writer = self.storage().graph_props().writer(); writer.check_metadata(props)?; - writer.update_metadata(props.iter().map(|(id, prop)| (*id, prop.clone())), 0); + writer.update_metadata(props.iter().map(|(id, prop)| (*id, prop.clone()))); Ok(()) } // FIXME: this can't fail fn internal_update_metadata(&self, props: &[(usize, Prop)]) -> Result<(), Self::Error> { let mut writer = self.storage().graph_props().writer(); - writer.update_metadata(props.iter().map(|(id, prop)| (*id, prop.clone())), 0); + writer.update_metadata(props.iter().map(|(id, prop)| (*id, prop.clone()))); Ok(()) } @@ -86,8 +89,8 @@ impl InternalPropertyAdditionOps for db4_graph::TemporalGraph { ) -> Result, Self::Error> { let (segment_id, node_pos) = self.storage().nodes().resolve_pos(vid); let mut writer = self.storage().nodes().writer(segment_id); - writer.check_metadata(node_pos, 0, &props)?; - writer.update_c_props(node_pos, 0, props, 0); + writer.check_metadata(node_pos, STATIC_GRAPH_LAYER_ID, &props)?; + writer.update_c_props(node_pos, STATIC_GRAPH_LAYER_ID, props); Ok(writer) } @@ -98,7 +101,7 @@ impl InternalPropertyAdditionOps for db4_graph::TemporalGraph { ) -> Result, Self::Error> { let (segment_id, node_pos) = self.storage().nodes().resolve_pos(vid); let mut writer = self.storage().nodes().writer(segment_id); - writer.update_c_props(node_pos, 0, props, 0); + writer.update_c_props(node_pos, STATIC_GRAPH_LAYER_ID, props); Ok(writer) } diff --git a/raphtory/Cargo.toml b/raphtory/Cargo.toml index 1b48830c08..69ba087003 100644 --- a/raphtory/Cargo.toml +++ b/raphtory/Cargo.toml @@ -54,7 +54,7 @@ uuid = { workspace = true } parquet = { workspace = true } arrow-json = { workspace = true } arrow-csv = { workspace = true } -arrow = { workspace = true, features = ["chrono-tz"]} +arrow = { workspace = true, features = ["chrono-tz"] } # io optional dependencies csv = { workspace = true, optional = true } @@ -155,7 +155,7 @@ python = [ "dep:numpy", "dep:num", "dep:display-error-chain", - "pyo3-arrow", + "dep:pyo3-arrow", "raphtory-api/python", "raphtory-core/python", "kdam/notebook", diff --git a/raphtory/src/db/api/mutation/addition_ops.rs b/raphtory/src/db/api/mutation/addition_ops.rs index e1c0966e7d..22da9e9326 100644 --- a/raphtory/src/db/api/mutation/addition_ops.rs +++ b/raphtory/src/db/api/mutation/addition_ops.rs @@ -7,17 +7,20 @@ use crate::{ errors::{into_graph_err, GraphError}, prelude::{GraphViewOps, NodeViewOps}, }; - -use raphtory_storage::mutation::{ - addition_ops::{EdgeWriteLock, InternalAdditionOps}, - MutationError, -}; - use raphtory_api::core::{ entities::properties::prop::Prop, utils::time::{IntoTimeWithFormat, TryIntoInputTime}, }; -use storage::wal::{GraphWal, Wal}; +use raphtory_core::entities::nodes::node_ref::NodeRef; +use raphtory_storage::{ + core_ops::CoreGraphOps, + mutation::{ + addition_ops::{EdgeWriteLock, InternalAdditionOps}, + durability_ops::DurabilityOps, + MutationError, + }, +}; +use storage::wal::{GraphWalOps, WalOps}; pub trait AdditionOps: StaticGraphViewOps + InternalAdditionOps> { // TODO: Probably add vector reference here like add @@ -252,12 +255,15 @@ impl> + StaticGraphViewOps> Addit props: PII, layer: Option<&str>, ) -> Result, GraphError> { - // Log transaction start - let transaction_id = self.transaction_manager().begin_transaction(); + let transaction_manager = self.core_graph().transaction_manager()?; + let wal = self.core_graph().wal()?; + let transaction_id = transaction_manager.begin_transaction(); let session = self.write_session().map_err(|err| err.into())?; + let src = src.as_node_ref(); + let dst = dst.as_node_ref(); self.validate_gids( - [src.as_node_ref(), dst.as_node_ref()] + [src, dst] .iter() .filter_map(|node_ref| node_ref.as_gid_ref().left()), ) @@ -270,100 +276,99 @@ impl> + StaticGraphViewOps> Addit props.into_iter().map(|(k, v)| (k, v.into())), ) .map_err(into_graph_err)?; - - // Log prop name -> prop id mappings - self.wal() - .log_temporal_prop_ids(transaction_id, &props_with_status) - .unwrap(); - - let props = props_with_status - .into_iter() - .map(|maybe_new| { - let (_, prop_id, prop) = maybe_new.inner(); - (prop_id, prop) - }) - .collect::>(); - let ti = time_from_input_session(&session, t)?; - let src_id = self - .resolve_node(src.as_node_ref()) - .map_err(into_graph_err)?; - let dst_id = self - .resolve_node(dst.as_node_ref()) - .map_err(into_graph_err)?; - let layer_id = self.resolve_layer(layer).map_err(into_graph_err)?; - - // Log node -> node id mappings - // FIXME: We are logging node -> node id mappings AFTER they are inserted into the - // resolver. Make sure resolver mapping CANNOT get to disk before Wal. - if let Some(gid) = src.as_node_ref().as_gid_ref().left() { - self.wal() - .log_node_id(transaction_id, gid.into(), src_id.inner()) - .unwrap(); - } - if let Some(gid) = dst.as_node_ref().as_gid_ref().left() { - self.wal() - .log_node_id(transaction_id, gid.into(), dst_id.inner()) - .unwrap(); - } - - let src_id = src_id.inner(); - let dst_id = dst_id.inner(); - - // Log layer -> layer id mappings - if let Some(layer) = layer { - self.wal() - .log_layer_id(transaction_id, layer, layer_id.inner()) - .unwrap(); - } - - let layer_id = layer_id.inner(); - - // Holds all locks for nodes and edge until add_edge_op goes out of scope - let mut add_edge_op = self - .atomic_add_edge(src_id, dst_id, None, layer_id) - .map_err(into_graph_err)?; + let src_gid = match src { + NodeRef::Internal(_) => None, + NodeRef::External(gid_ref) => Some(gid_ref), + }; + + let dst_gid = match dst { + NodeRef::Internal(_) => None, + NodeRef::External(gid_ref) => Some(gid_ref), + }; + + // At this point we start modifying the graph, any error after this point is fatal and should + // panic! + let (edge_id, src_id, dst_id, layer_id) = { + // FIXME: We are logging node -> node id mappings AFTER they are inserted into the + // resolver. Make sure resolver mapping CANNOT get to disk before Wal. + let src_id = self.resolve_node(src).map_err(into_graph_err)?; + let dst_id = self.resolve_node(dst).map_err(into_graph_err)?; + let layer_id = self.resolve_layer(layer).map_err(into_graph_err)?; + + let src_id = src_id.inner(); + let dst_id = dst_id.inner(); + let layer_id = layer_id.inner(); + + // Hold all locks for src node, dst node and edge until add_edge_op goes out of scope. + let mut add_edge_op = self + .atomic_add_edge(src_id, dst_id, None, layer_id) + .map_err(into_graph_err)?; + + // NOTE: We log edge id after it is inserted into the edge segment. + // This is fine as long as we hold onto the edge segment lock through add_edge_op + // for the entire operation. + let edge_id = add_edge_op.internal_add_static_edge(src_id, dst_id); + + // All names, ids and values have been generated for this operation. + // Create a wal entry to mark it as durable. + let props_for_wal = props_with_status + .iter() + .map(|maybe_new| { + let (prop_name, prop_id, prop) = maybe_new.as_ref().inner(); + (prop_name.as_ref(), *prop_id, prop.clone()) + }) + .collect::>(); - // Log edge addition - let add_static_edge_lsn = self - .wal() - .log_add_static_edge(transaction_id, ti, src_id, dst_id) - .unwrap(); - let edge_id = add_edge_op.internal_add_static_edge(src_id, dst_id, add_static_edge_lsn); - - // Log edge -> edge id mappings - // NOTE: We log edge id mappings after they are inserted into edge segments. - // This is fine as long as we hold onto segment locks for the entire operation. - let add_edge_lsn = self - .wal() - .log_add_edge( + let lsn = wal.log_add_edge( transaction_id, ti, + src_gid, src_id, + dst_gid, dst_id, edge_id.inner(), + layer, layer_id, - &props, - ) - .unwrap(); - let edge_id = add_edge_op.internal_add_edge( - ti, - src_id, - dst_id, - edge_id.map(|eid| eid.with_layer(layer_id)), - add_edge_lsn, - props, - ); - - add_edge_op.store_src_node_info(src_id, src.as_node_ref().as_gid_ref().left()); - add_edge_op.store_dst_node_info(dst_id, dst.as_node_ref().as_gid_ref().left()); - - // Log transaction end - self.transaction_manager().end_transaction(transaction_id); - - // Flush all wal entries to disk. - self.wal().sync().unwrap(); + props_for_wal, + )?; + + let props = props_with_status + .into_iter() + .map(|maybe_new| { + let (_, prop_id, prop) = maybe_new.inner(); + (prop_id, prop) + }) + .collect::>(); + + let edge_id = add_edge_op.internal_add_edge( + ti, + src_id, + dst_id, + edge_id.map(|eid| eid.with_layer(layer_id)), + props, + ); + + add_edge_op.store_src_node_info(src_id, src.as_node_ref().as_gid_ref().left()); + add_edge_op.store_dst_node_info(dst_id, dst.as_node_ref().as_gid_ref().left()); + + // Update the src, dst and edge segments with the lsn of the wal entry. + add_edge_op.set_lsn(lsn); + + self.core_graph() + .transaction_manager()? + .end_transaction(transaction_id); + + // Drop to release all the segment locks. + drop(add_edge_op); + + // Flush the wal entry to disk. + // Any error here is fatal + self.core_graph().wal()?.flush(lsn)?; + Ok::<_, GraphError>((edge_id, src_id, dst_id, layer_id)) + } + .unwrap(); Ok(EdgeView::new( self.clone(), diff --git a/raphtory/src/db/api/mutation/deletion_ops.rs b/raphtory/src/db/api/mutation/deletion_ops.rs index 83b9b2bf9a..9fe2a04d3d 100644 --- a/raphtory/src/db/api/mutation/deletion_ops.rs +++ b/raphtory/src/db/api/mutation/deletion_ops.rs @@ -51,7 +51,7 @@ pub trait DeletionOps: .atomic_add_edge(src_id, dst_id, None, layer_id) .map_err(into_graph_err)?; - let edge_id = add_edge_op.internal_delete_edge(ti, src_id, dst_id, 0, layer_id); + let edge_id = add_edge_op.internal_delete_edge(ti, src_id, dst_id, layer_id); add_edge_op.store_src_node_info(src_id, src.as_node_ref().as_gid_ref().left()); add_edge_op.store_dst_node_info(dst_id, dst.as_node_ref().as_gid_ref().left()); diff --git a/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs b/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs index 2d67cf6c41..15aedbd025 100644 --- a/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs +++ b/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs @@ -1,6 +1,6 @@ use super::GraphStorage; use crate::{ - core::{entities::LayerIds, storage::timeindex::TimeIndexOps}, + core::storage::timeindex::TimeIndexOps, db::api::view::internal::{GraphTimeSemanticsOps, TimeSemantics}, prelude::Prop, }; @@ -176,12 +176,11 @@ impl GraphTimeSemanticsOps for GraphStorage { } } -#[cfg(test)] +#[cfg(all(test, feature = "search"))] mod test_graph_storage { use crate::{db::api::view::StaticGraphViewOps, prelude::AdditionOps}; use raphtory_api::core::entities::properties::prop::Prop; - #[cfg(all(test, feature = "search"))] fn init_graph_for_nodes_tests(graph: G) -> G { let nodes = vec![ (6, "N1", vec![("p1", Prop::U64(2u64))]), @@ -207,7 +206,6 @@ mod test_graph_storage { graph } - #[cfg(all(test, feature = "search"))] fn init_graph_for_edges_tests(graph: G) -> G { let edges = vec![ (6, "N1", "N2", vec![("p1", Prop::U64(2u64))], Some("layer1")), @@ -236,7 +234,6 @@ mod test_graph_storage { graph } - #[cfg(all(test, feature = "search"))] mod search_nodes { use super::*; use crate::{ @@ -268,7 +265,6 @@ mod test_graph_storage { } } - #[cfg(all(test, feature = "search"))] mod search_edges { use super::*; use crate::{ diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index e38825b65c..1fb0ea1dea 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -6,7 +6,7 @@ use crate::{ }, errors::GraphError, }; -use db4_graph::{TemporalGraph, TransactionManager, WriteLockedGraph}; +use db4_graph::{GraphDir, TemporalGraph, WriteLockedGraph}; use raphtory_api::core::{ entities::{ properties::{ @@ -35,11 +35,15 @@ use std::{ path::Path, sync::Arc, }; - -pub use storage::{ - persist::strategy::{Config, PersistentStrategy}, - Extension, WalImpl, +use storage::{ + persist::config::ConfigOps, + wal::{GraphWalOps, WalOps, LSN}, + Wal, }; + +// Re-export for raphtory dependencies to use when creating graphs. +pub use storage::{persist::strategy::PersistenceStrategy, Config, Extension}; + #[cfg(feature = "search")] use { crate::{ @@ -103,31 +107,53 @@ impl Storage { } pub(crate) fn new_at_path(path: impl AsRef) -> Result { + let config = Config::default(); + let graph_dir = GraphDir::from(path.as_ref()); + let wal_dir = graph_dir.wal_dir(); + let wal = Arc::new(Wal::new(Some(wal_dir.as_path()))?); + let ext = Extension::new(config, wal.clone()); + let temporal_graph = TemporalGraph::new_at_path_with_ext(path, ext)?; + Ok(Self { - graph: GraphStorage::Unlocked(Arc::new(TemporalGraph::new_with_path( - path, - Extension::default(), - )?)), + graph: GraphStorage::Unlocked(Arc::new(temporal_graph)), #[cfg(feature = "search")] index: RwLock::new(GraphIndex::Empty), }) } - pub(crate) fn new_with_path_and_ext( + pub(crate) fn new_at_path_with_config( path: impl AsRef, - ext: Extension, + config: Config, ) -> Result { + let graph_dir = GraphDir::from(path.as_ref()); + let wal_dir = graph_dir.wal_dir(); + let wal = Arc::new(Wal::new(Some(wal_dir.as_path()))?); + let ext = Extension::new(config, wal.clone()); + let temporal_graph = TemporalGraph::new_at_path_with_ext(path, ext)?; + Ok(Self { - graph: GraphStorage::Unlocked(Arc::new(TemporalGraph::new_with_path(path, ext)?)), + graph: GraphStorage::Unlocked(Arc::new(temporal_graph)), #[cfg(feature = "search")] index: RwLock::new(GraphIndex::Empty), }) } pub(crate) fn load_from(path: impl AsRef) -> Result { - let graph = GraphStorage::Unlocked(Arc::new(TemporalGraph::load_from_path(path)?)); + let config = Config::load_from_dir(path.as_ref())?; + let graph_dir = GraphDir::from(path.as_ref()); + let wal_dir = graph_dir.wal_dir(); + let wal = Arc::new(Wal::load(Some(wal_dir.as_path()))?); + let ext = Extension::new(config, wal.clone()); + let temporal_graph = TemporalGraph::load_from_path(path, ext)?; + + // Replay any pending writes from the WAL. + if wal.has_entries() { + let mut write_locked_graph = temporal_graph.write_lock()?; + wal.replay_to_graph(&mut write_locked_graph)?; + } + Ok(Self { - graph, + graph: GraphStorage::Unlocked(Arc::new(temporal_graph)), #[cfg(feature = "search")] index: RwLock::new(GraphIndex::Empty), }) @@ -314,9 +340,8 @@ impl EdgeWriteLock for AtomicAddEdgeSession<'_> { &mut self, src: impl Into, dst: impl Into, - lsn: u64, ) -> MaybeNew { - self.session.internal_add_static_edge(src, dst, lsn) + self.session.internal_add_static_edge(src, dst) } fn internal_add_edge( @@ -325,11 +350,9 @@ impl EdgeWriteLock for AtomicAddEdgeSession<'_> { src: impl Into, dst: impl Into, e_id: MaybeNew, - lsn: u64, props: impl IntoIterator, ) -> MaybeNew { - self.session - .internal_add_edge(t, src, dst, e_id, lsn, props) + self.session.internal_add_edge(t, src, dst, e_id, props) } fn internal_delete_edge( @@ -337,10 +360,9 @@ impl EdgeWriteLock for AtomicAddEdgeSession<'_> { t: EventTime, src: impl Into, dst: impl Into, - lsn: u64, layer: usize, ) -> MaybeNew { - self.session.internal_delete_edge(t, src, dst, lsn, layer) + self.session.internal_delete_edge(t, src, dst, layer) } fn store_src_node_info(&mut self, id: impl Into, node_id: Option) { @@ -350,6 +372,10 @@ impl EdgeWriteLock for AtomicAddEdgeSession<'_> { fn store_dst_node_info(&mut self, id: impl Into, node_id: Option) { self.session.store_dst_node_info(id, node_id); } + + fn set_lsn(&mut self, lsn: LSN) { + self.session.set_lsn(lsn); + } } impl<'a> SessionAdditionOps for StorageWriteSession<'a> { @@ -533,14 +559,6 @@ impl InternalAdditionOps for Storage { Ok(self.graph.validate_gids(gids)?) } - fn transaction_manager(&self) -> &TransactionManager { - self.graph.mutable().unwrap().transaction_manager.as_ref() - } - - fn wal(&self) -> &WalImpl { - self.graph.mutable().unwrap().wal.as_ref() - } - fn resolve_node_and_type( &self, id: NodeRef, diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index c076c9ee6b..13959b082e 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -23,13 +23,13 @@ use crate::{ prelude::*, }; use ahash::HashSet; -use db4_graph::TemporalGraph; +use db4_graph::{GraphDir, TemporalGraph}; use itertools::Itertools; use raphtory_api::{ atomic_extra::atomic_usize_from_mut_slice, core::{ entities::{ - properties::meta::{Meta, PropMapper}, + properties::meta::{Meta, PropMapper, STATIC_GRAPH_LAYER_ID}, EID, }, storage::{arc_str::ArcStr, timeindex::EventTime}, @@ -53,7 +53,7 @@ use std::{ path::Path, sync::{atomic::Ordering, Arc}, }; -use storage::{persist::strategy::PersistentStrategy, Extension}; +use storage::{persist::strategy::PersistenceStrategy, wal::WalOps, Extension, Wal}; #[cfg(feature = "search")] use crate::{ @@ -288,12 +288,19 @@ fn materialize_impl( node_meta.set_layer_mapper(layer_meta.clone()); + // Create new WAL file for the new materialized graph. + let graph_dir = path.map(|p| GraphDir::from(p)); + let wal_dir = graph_dir.map(|dir| dir.wal_dir()); + let wal = Wal::new(wal_dir.as_deref())?; + let config = storage.extension().config().clone(); + let ext = Extension::new(config, Arc::new(wal)); + let temporal_graph = TemporalGraph::new_with_meta( path.map(|p| p.into()), node_meta, edge_meta, graph_props_meta, - storage.extension().clone(), + ext, )?; if let Some(earliest) = graph.earliest_time() { @@ -310,7 +317,6 @@ fn materialize_impl( .set_event_id(storage.read_event_id()); let temporal_graph = Arc::new(temporal_graph); - let graph_storage = GraphStorage::from(temporal_graph.clone()); { @@ -345,8 +351,10 @@ fn materialize_impl( for node in graph.nodes().iter() { let new_id = get_new_vid(node.node, &index, &node_map); let gid = node.id(); + if let Some(node_pos) = shard.resolve_pos(new_id) { let mut writer = shard.writer(); + if let Some(node_type) = node.node_type() { let new_type_id = graph_storage .node_meta() @@ -355,31 +363,31 @@ fn materialize_impl( .inner(); writer.store_node_id_and_node_type( node_pos, - 0, + STATIC_GRAPH_LAYER_ID, gid.as_ref(), new_type_id, - 0, ); } else { - writer.store_node_id(node_pos, 0, gid.clone(), 0); + writer.store_node_id(node_pos, STATIC_GRAPH_LAYER_ID, gid.clone()); } + graph_storage .write_session()? .set_node(gid.as_ref(), new_id)?; for (t, row) in node.rows() { - writer.add_props(t, node_pos, 0, row, 0); + writer.add_props(t, node_pos, STATIC_GRAPH_LAYER_ID, row); } writer.update_c_props( node_pos, - 0, + STATIC_GRAPH_LAYER_ID, node.metadata_ids() .filter_map(|id| node.get_metadata(id).map(|prop| (id, prop))), - 0, ); } } + Ok::<(), MutationError>(()) })?; @@ -390,7 +398,7 @@ fn materialize_impl( new_eids.push(new_eid); max_eid = new_eid.0.max(max_eid); } - new_storage.resize_chunks_to_num_edges(EID(max_eid)); + new_storage.resize_chunks_to_eid(EID(max_eid)); for layer_id in &layer_map { new_storage.edges.ensure_layer(*layer_id); @@ -404,13 +412,13 @@ fn materialize_impl( if let Some(edge_pos) = shard.resolve_pos(eid) { let mut writer = shard.writer(); // make the edge for the first time - writer.add_static_edge(Some(edge_pos), src, dst, 0, false); + writer.add_static_edge(Some(edge_pos), src, dst, false); for edge in edge.explode_layers() { let layer = layer_map[edge.edge.layer().unwrap()]; for edge in edge.explode() { let t = edge.edge.time().unwrap(); - writer.add_edge(t, edge_pos, src, dst, [], layer, 0); + writer.add_edge(t, edge_pos, src, dst, [], layer); } //TODO: move this in edge.row() for (t, t_props) in edge @@ -430,7 +438,7 @@ fn materialize_impl( let props = t_props .map(|(_, prop_id, prop)| (prop_id, prop)) .collect::>(); - writer.add_edge(t, edge_pos, src, dst, props, layer, 0); + writer.add_edge(t, edge_pos, src, dst, props, layer); } writer.update_c_props( edge_pos, @@ -451,7 +459,7 @@ fn materialize_impl( graph.layer_ids(), ) { let layer = layer_map[layer]; - writer.delete_edge(t, edge_pos, src, dst, layer, 0); + writer.delete_edge(t, edge_pos, src, dst, layer); } } } @@ -468,12 +476,12 @@ fn materialize_impl( if let Some(node_pos) = maybe_src_pos { let mut writer = shard.writer(); - writer.add_static_outbound_edge(node_pos, dst_id, eid, 0); + writer.add_static_outbound_edge(node_pos, dst_id, eid); } if let Some(node_pos) = maybe_dst_pos { let mut writer = shard.writer(); - writer.add_static_inbound_edge(node_pos, src_id, eid, 0); + writer.add_static_inbound_edge(node_pos, src_id, eid); } for e in edge.explode_layers() { @@ -485,7 +493,6 @@ fn materialize_impl( node_pos, dst_id, eid.with_layer(layer), - 0, ); } if let Some(node_pos) = maybe_dst_pos { @@ -495,7 +502,6 @@ fn materialize_impl( node_pos, src_id, eid.with_layer(layer), - 0, ); } } @@ -506,7 +512,7 @@ fn materialize_impl( let t = e.time().expect("exploded edge should have time"); let l = layer_map[e.edge.layer().unwrap()]; - writer.update_timestamp(t, src_pos, eid.with_layer(l), 0); + writer.update_timestamp(t, src_pos, eid.with_layer(l)); } if let Some(dst_pos) = maybe_dst_pos { if maybe_src_pos.is_none_or(|src_pos| src_pos != dst_pos) { @@ -514,7 +520,7 @@ fn materialize_impl( let t = e.time().expect("exploded edge should have time"); let l = layer_map[e.edge.layer().unwrap()]; - writer.update_timestamp(t, dst_pos, eid.with_layer(l), 0); + writer.update_timestamp(t, dst_pos, eid.with_layer(l)); } } } @@ -529,12 +535,12 @@ fn materialize_impl( let layer = layer_map[layer]; if let Some(src_pos) = maybe_src_pos { let mut writer = shard.writer(); - writer.update_timestamp(t, src_pos, eid.with_layer_deletion(layer), 0); + writer.update_timestamp(t, src_pos, eid.with_layer_deletion(layer)); } if let Some(dst_pos) = maybe_dst_pos { if maybe_src_pos.is_none_or(|src_pos| src_pos != dst_pos) { let mut writer = shard.writer(); - writer.update_timestamp(t, dst_pos, eid.with_layer_deletion(layer), 0); + writer.update_timestamp(t, dst_pos, eid.with_layer_deletion(layer)); } } } @@ -555,7 +561,7 @@ fn materialize_impl( for (t, prop_value) in temporal_prop.iter_indexed() { let lsn = 0; - graph_writer.add_properties(t, [(prop_id, prop_value)], lsn); + graph_writer.add_properties(t, [(prop_id, prop_value)]); } } @@ -575,7 +581,7 @@ fn materialize_impl( if !metadata_props.is_empty() { let lsn = 0; - graph_writer.update_metadata(metadata_props, lsn); + graph_writer.update_metadata(metadata_props); } } } diff --git a/raphtory/src/db/api/view/internal/materialize.rs b/raphtory/src/db/api/view/internal/materialize.rs index b8ce9a5e07..311042cbc5 100644 --- a/raphtory/src/db/api/view/internal/materialize.rs +++ b/raphtory/src/db/api/view/internal/materialize.rs @@ -1,3 +1,5 @@ +#[cfg(feature = "io")] +use crate::serialise::GraphPaths; use crate::{ core::storage::timeindex::EventTime, db::{ @@ -11,9 +13,6 @@ use raphtory_api::{iter::BoxedLIter, GraphType}; use raphtory_storage::{graph::graph::GraphStorage, mutation::InheritMutationOps}; use std::ops::Range; -#[cfg(feature = "io")] -use crate::serialise::GraphPaths; - #[derive(Clone)] pub enum MaterializedGraph { EventGraph(Graph), diff --git a/raphtory/src/db/graph/edge.rs b/raphtory/src/db/graph/edge.rs index c52c51edd2..62ff30ab0e 100644 --- a/raphtory/src/db/graph/edge.rs +++ b/raphtory/src/db/graph/edge.rs @@ -394,14 +394,7 @@ impl EdgeView { .atomic_add_edge(src, dst, Some(e_id), layer_id) .map_err(into_graph_err)?; - writer.internal_add_edge( - t, - src, - dst, - MaybeNew::New(e_id.with_layer(layer_id)), - 0, - props, - ); + writer.internal_add_edge(t, src, dst, MaybeNew::New(e_id.with_layer(layer_id)), props); Ok(()) } diff --git a/raphtory/src/db/graph/graph.rs b/raphtory/src/db/graph/graph.rs index 729a22b1f7..cfcce085b7 100644 --- a/raphtory/src/db/graph/graph.rs +++ b/raphtory/src/db/graph/graph.rs @@ -22,7 +22,7 @@ use crate::{ db::{ api::{ state::ops::NodeFilterOp, - storage::storage::Storage, + storage::storage::{Config, PersistenceStrategy, Storage}, view::{ internal::{ GraphView, InheritEdgeHistoryFilter, InheritNodeHistoryFilter, @@ -52,7 +52,7 @@ use std::{ ops::Deref, sync::Arc, }; -use storage::{persist::strategy::PersistentStrategy, Extension}; +use storage::Extension; #[repr(transparent)] #[derive(Debug, Clone, Default)] @@ -60,8 +60,6 @@ pub struct Graph { pub(crate) inner: Arc, } -impl InheritCoreGraphOps for Graph {} -impl InheritLayerOps for Graph {} impl From> for Graph { fn from(inner: Arc) -> Self { Self { inner } @@ -76,8 +74,157 @@ impl From for Graph { } } +impl Base for Graph { + type Base = Storage; + + #[inline(always)] + fn base(&self) -> &Self::Base { + &self.inner + } +} + +impl InheritMutationOps for Graph {} + +impl InheritViewOps for Graph {} + +impl InheritStorageOps for Graph {} + +impl InheritNodeHistoryFilter for Graph {} + +impl InheritEdgeHistoryFilter for Graph {} + +impl InheritCoreGraphOps for Graph {} + +impl InheritLayerOps for Graph {} + impl Static for Graph {} +impl Display for Graph { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.inner) + } +} + +impl<'graph, G: GraphViewOps<'graph>> PartialEq for Graph +where + Self: 'graph, +{ + fn eq(&self, other: &G) -> bool { + graph_equal(self, other) + } +} + +impl Graph { + /// Create a new graph + /// + /// Returns: + /// + /// A raphtory graph + /// + /// # Example + /// + /// ``` + /// use raphtory::prelude::Graph; + /// let g = Graph::new(); + /// ``` + pub fn new() -> Self { + Self { + inner: Arc::new(Storage::default()), + } + } + + /// Create a new graph at a specific path + /// + /// # Arguments + /// * `path` - The path to the storage location + /// # Returns + /// A raphtory graph with storage at the specified path + /// # Example + /// ```no_run + /// use raphtory::prelude::Graph; + /// let g = Graph::new_at_path("/path/to/storage"); + /// ``` + #[cfg(feature = "io")] + pub fn new_at_path(path: &(impl GraphPaths + ?Sized)) -> Result { + if !Extension::disk_storage_enabled() { + return Err(GraphError::DiskGraphNotEnabled); + } + + path.init()?; + let graph_storage_path = path.graph_path()?; + let storage = Storage::new_at_path(graph_storage_path)?; + + let graph = Self { + inner: Arc::new(storage), + }; + + path.write_metadata(&graph)?; + Ok(graph) + } + + #[cfg(feature = "io")] + pub fn new_at_path_with_config( + path: &(impl GraphPaths + ?Sized), + config: Config, + ) -> Result { + if !Extension::disk_storage_enabled() { + return Err(GraphError::DiskGraphNotEnabled); + } + + path.init()?; + + let graph = Self { + inner: Arc::new(Storage::new_at_path_with_config( + path.graph_path()?, + config, + )?), + }; + + path.write_metadata(&graph)?; + Ok(graph) + } + + /// Load a graph from a specific path + /// # Arguments + /// * `path` - The path to the storage location + /// # Returns + /// A raphtory graph loaded from the specified path + /// # Example + /// ```no_run + /// use raphtory::prelude::Graph; + /// let g = Graph::load_from_path("/path/to/storage"); + /// + #[cfg(feature = "io")] + pub fn load_from_path(path: &(impl GraphPaths + ?Sized)) -> Result { + // TODO: add support for loading indexes and vectors + Ok(Self { + inner: Arc::new(Storage::load_from(path.graph_path()?)?), + }) + } + + pub(crate) fn from_storage(inner: Arc) -> Self { + Self { inner } + } + + pub(crate) fn from_internal_graph(graph_storage: GraphStorage) -> Self { + let inner = Arc::new(Storage::from_inner(graph_storage)); + Self { inner } + } + + pub fn event_graph(&self) -> Graph { + self.clone() + } + + /// Get persistent graph + pub fn persistent_graph(&self) -> PersistentGraph { + PersistentGraph::from_storage(self.inner.clone()) + } +} + +// ########################################### +// Methods for checking equality of graphs +// ########################################### + pub fn graph_equal<'graph1, 'graph2, G1: GraphViewOps<'graph1>, G2: GraphViewOps<'graph2>>( g1: &G1, g2: &G2, @@ -363,14 +510,17 @@ pub fn assert_nodes_equal_layer< only_timestamps: bool, ) { let mut nodes1: Vec<_> = nodes1.collect(); - nodes1.sort(); let mut nodes2: Vec<_> = nodes2.collect(); + + nodes1.sort(); nodes2.sort(); + assert_eq!( nodes1.len(), nodes2.len(), "mismatched number of nodes{layer_tag}", ); + for (n1, n2) in nodes1.into_iter().zip(nodes2) { assert_node_equal_layer(n1, n2, layer_tag, persistent, only_timestamps); } @@ -645,8 +795,10 @@ fn assert_graph_equal_inner<'graph, G1: GraphViewOps<'graph>, G2: GraphViewOps<' ) { black_box({ assert_graph_equal_layer(g1, g2, None, persistent, only_timestamps); + let left_layers: HashSet<_> = g1.unique_layers().collect(); let right_layers: HashSet<_> = g2.unique_layers().collect(); + assert_eq!( left_layers, right_layers, "mismatched layers: left {:?}, right {:?}", @@ -682,7 +834,8 @@ pub fn assert_graph_equal_timestamps<'graph, G1: GraphViewOps<'graph>, G2: Graph assert_graph_equal_inner(g1, g2, false, true) } -/// Equality check for materialized persistent graph that ignores the updates generated by the materialise at graph.earliest_time() +/// Equality check for materialized persistent graph that ignores the +/// updates generated by the materialise at graph.earliest_time(). #[track_caller] pub fn assert_persistent_materialize_graph_equal< 'graph, @@ -694,133 +847,3 @@ pub fn assert_persistent_materialize_graph_equal< ) { assert_graph_equal_inner(g1, g2, true, false) } - -impl Display for Graph { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.inner) - } -} - -impl<'graph, G: GraphViewOps<'graph>> PartialEq for Graph -where - Self: 'graph, -{ - fn eq(&self, other: &G) -> bool { - graph_equal(self, other) - } -} - -impl Base for Graph { - type Base = Storage; - - #[inline(always)] - fn base(&self) -> &Self::Base { - &self.inner - } -} - -impl InheritMutationOps for Graph {} - -impl InheritViewOps for Graph {} - -impl InheritStorageOps for Graph {} - -impl InheritNodeHistoryFilter for Graph {} - -impl InheritEdgeHistoryFilter for Graph {} - -impl Graph { - /// Create a new graph - /// - /// Returns: - /// - /// A raphtory graph - /// - /// # Example - /// - /// ``` - /// use raphtory::prelude::Graph; - /// let g = Graph::new(); - /// ``` - pub fn new() -> Self { - Self { - inner: Arc::new(Storage::default()), - } - } - - /// Create a new graph at a specific path - /// - /// # Arguments - /// * `path` - The path to the storage location - /// # Returns - /// A raphtory graph with storage at the specified path - /// # Example - /// ```no_run - /// use raphtory::prelude::Graph; - /// let g = Graph::new_at_path("/path/to/storage"); - /// ``` - #[cfg(feature = "io")] - pub fn new_at_path(path: &(impl GraphPaths + ?Sized)) -> Result { - if !Extension::disk_storage_enabled() { - return Err(GraphError::DiskGraphNotEnabled); - } - path.init()?; - let graph = Self { - inner: Arc::new(Storage::new_at_path(path.graph_path()?)?), - }; - path.write_metadata(&graph)?; - Ok(graph) - } - - #[cfg(feature = "io")] - pub fn new_at_path_with_config( - path: &(impl GraphPaths + ?Sized), - config: Extension, - ) -> Result { - if !Extension::disk_storage_enabled() { - return Err(GraphError::DiskGraphNotEnabled); - } - path.init()?; - let graph = Self { - inner: Arc::new(Storage::new_with_path_and_ext(path.graph_path()?, config)?), - }; - path.write_metadata(&graph)?; - Ok(graph) - } - - /// Load a graph from a specific path - /// # Arguments - /// * `path` - The path to the storage location - /// # Returns - /// A raphtory graph loaded from the specified path - /// # Example - /// ```no_run - /// use raphtory::prelude::Graph; - /// let g = Graph::load_from_path("/path/to/storage"); - /// - #[cfg(feature = "io")] - pub fn load_from_path(path: &(impl GraphPaths + ?Sized)) -> Result { - //TODO: add support for loading indexes and vectors - Ok(Self { - inner: Arc::new(Storage::load_from(path.graph_path()?)?), - }) - } - - pub(crate) fn from_storage(inner: Arc) -> Self { - Self { inner } - } - - pub(crate) fn from_internal_graph(graph_storage: GraphStorage) -> Self { - let inner = Arc::new(Storage::from_inner(graph_storage)); - Self { inner } - } - - pub fn event_graph(&self) -> Graph { - self.clone() - } - - /// Get persistent graph - pub fn persistent_graph(&self) -> PersistentGraph { - PersistentGraph::from_storage(self.inner.clone()) - } -} diff --git a/raphtory/src/db/graph/nodes.rs b/raphtory/src/db/graph/nodes.rs index ad49982700..5311ce8d1c 100644 --- a/raphtory/src/db/graph/nodes.rs +++ b/raphtory/src/db/graph/nodes.rs @@ -184,6 +184,7 @@ where fn iter_vids(&self, g: GraphStorage) -> impl Iterator + Send + Sync + 'graph { let view = self.base_graph.clone(); let selector = self.predicate.clone(); + self.node_list().nodes_iter(&g).filter(move |&vid| { g.try_core_node(vid) .is_some_and(|node| view.filter_node(node.as_ref()) && selector.apply(&g, vid)) diff --git a/raphtory/src/db/graph/views/deletion_graph.rs b/raphtory/src/db/graph/views/deletion_graph.rs index 5ed347f7e8..941dc25626 100644 --- a/raphtory/src/db/graph/views/deletion_graph.rs +++ b/raphtory/src/db/graph/views/deletion_graph.rs @@ -4,7 +4,8 @@ use crate::{ core::storage::timeindex::{AsTime, EventTime, TimeIndex, TimeIndexOps}, db::{ api::{ - properties::internal::InheritPropertiesOps, storage::storage::Storage, + properties::internal::InheritPropertiesOps, + storage::storage::{PersistenceStrategy, Storage}, view::internal::*, }, graph::graph::graph_equal, @@ -27,7 +28,6 @@ use std::{ }; use storage::{ api::graph_props::{GraphPropEntryOps, GraphPropRefOps}, - persist::strategy::PersistentStrategy, Extension, }; diff --git a/raphtory/src/db/graph/views/filter/model/graph_filter.rs b/raphtory/src/db/graph/views/filter/model/graph_filter.rs index bf1d5c15f0..417fd1baa8 100644 --- a/raphtory/src/db/graph/views/filter/model/graph_filter.rs +++ b/raphtory/src/db/graph/views/filter/model/graph_filter.rs @@ -1,6 +1,6 @@ use crate::{ db::{ - api::state::ops::{filter::NodeExistsOp, GraphView, NotANodeFilter}, + api::state::ops::{filter::NodeExistsOp, GraphView}, graph::views::filter::{ model::{ edge_filter::CompositeEdgeFilter, windowed_filter::Windowed, diff --git a/raphtory/src/db/mod.rs b/raphtory/src/db/mod.rs index 54e9c74f6c..63e711afda 100644 --- a/raphtory/src/db/mod.rs +++ b/raphtory/src/db/mod.rs @@ -1,4 +1,3 @@ pub mod api; pub mod graph; -pub mod replay; pub mod task; diff --git a/raphtory/src/db/replay/mod.rs b/raphtory/src/db/replay/mod.rs deleted file mode 100644 index d6ee286670..0000000000 --- a/raphtory/src/db/replay/mod.rs +++ /dev/null @@ -1,115 +0,0 @@ -use db4_graph::TemporalGraph; -use raphtory_api::core::{ - entities::{properties::prop::Prop, EID, GID, VID}, - storage::{dict_mapper::MaybeNew, timeindex::EventTime}, -}; -use storage::{ - api::edges::EdgeSegmentOps, - error::StorageError, - wal::{GraphReplayer, TransactionID, LSN}, - Extension, -}; - -/// Wrapper struct for implementing GraphReplayer for a TemporalGraph. -/// This is needed to workaround Rust's orphan rule since both ReplayGraph and TemporalGraph -/// are foreign to this crate. -#[derive(Debug)] -pub struct ReplayGraph { - graph: TemporalGraph, -} - -impl ReplayGraph { - pub fn new(graph: TemporalGraph) -> Self { - Self { graph } - } -} - -impl GraphReplayer for ReplayGraph { - fn replay_begin_transaction( - &self, - lsn: LSN, - transaction_id: TransactionID, - ) -> Result<(), StorageError> { - Ok(()) - } - - fn replay_end_transaction( - &self, - lsn: LSN, - transaction_id: TransactionID, - ) -> Result<(), StorageError> { - Ok(()) - } - - fn replay_add_static_edge( - &self, - lsn: LSN, - transaction_id: TransactionID, - t: EventTime, - src: VID, - dst: VID, - ) -> Result<(), StorageError> { - Ok(()) - } - - fn replay_add_edge( - &self, - lsn: LSN, - transaction_id: TransactionID, - t: EventTime, - src: VID, - dst: VID, - eid: EID, - layer_id: usize, - props: &[(usize, Prop)], - ) -> Result<(), StorageError> { - let edge_segment = self.graph.storage().edges().get_edge_segment(eid); - - match edge_segment { - Some(edge_segment) => { - edge_segment.head().lsn(); - } - _ => {} - } - - Ok(()) - } - - fn replay_node_id( - &self, - lsn: LSN, - transaction_id: TransactionID, - gid: GID, - vid: VID, - ) -> Result<(), StorageError> { - Ok(()) - } - - fn replay_const_prop_ids>( - &self, - lsn: LSN, - transaction_id: TransactionID, - props: &[MaybeNew<(PN, usize, Prop)>], - ) -> Result<(), StorageError> { - Ok(()) - } - - fn replay_temporal_prop_ids>( - &self, - lsn: LSN, - transaction_id: TransactionID, - props: &[MaybeNew<(PN, usize, Prop)>], - ) -> Result<(), StorageError> { - Ok(()) - } - - fn replay_layer_id( - &self, - lsn: LSN, - transaction_id: TransactionID, - name: &str, - id: usize, - ) -> Result<(), StorageError> { - Ok(()) - } -} diff --git a/raphtory/src/errors.rs b/raphtory/src/errors.rs index ed19091a16..4fff71fcf0 100644 --- a/raphtory/src/errors.rs +++ b/raphtory/src/errors.rs @@ -22,7 +22,6 @@ use std::{ sync::Arc, time::SystemTimeError, }; -use tracing::error; #[cfg(feature = "python")] use pyo3::PyErr; diff --git a/raphtory/src/graphgen/erdos_renyi.rs b/raphtory/src/graphgen/erdos_renyi.rs index de0a54bcc7..8cf62605cc 100644 --- a/raphtory/src/graphgen/erdos_renyi.rs +++ b/raphtory/src/graphgen/erdos_renyi.rs @@ -13,12 +13,11 @@ use crate::{ graph::graph::Graph, }, errors::GraphError, - prelude::{NodeStateOps, NO_PROPS}, + prelude::NO_PROPS, }; use rand::{rngs::StdRng, Rng, SeedableRng}; use raphtory_api::core::storage::timeindex::AsTime; use raphtory_core::entities::GID; -use tracing::error; /// Generates an Erdős-Rényi random graph and returns it. /// @@ -77,8 +76,7 @@ pub fn erdos_renyi(nodes_to_add: usize, p: f64, seed: Option) -> Result>( *layer, c_props.drain(..), t_props.drain(..), - 0, ); } else { - writer.bulk_delete_edge(t, eid_pos, *src, *dst, exists, *layer, 0); + writer.bulk_delete_edge(t, eid_pos, *src, *dst, exists, *layer); } } } @@ -553,7 +552,7 @@ fn update_inbound_edges<'a, NS: NodeSegmentOps>( let mut writer = shard.writer(); if !edge_exists_in_static_graph { - writer.add_static_inbound_edge(dst_pos, *src, *eid, 0); + writer.add_static_inbound_edge(dst_pos, *src, *eid); } let elid = if delete { eid.with_layer_deletion(*layer) @@ -563,14 +562,14 @@ fn update_inbound_edges<'a, NS: NodeSegmentOps>( if src != dst { if edge_exists_in_layer { - writer.update_timestamp(t, dst_pos, elid, 0); + writer.update_timestamp(t, dst_pos, elid); } else { - writer.add_inbound_edge(Some(t), dst_pos, *src, elid, 0); + writer.add_inbound_edge(Some(t), dst_pos, *src, elid); } } else { // self-loop edge, only add once if !edge_exists_in_layer { - writer.add_inbound_edge::(None, dst_pos, *src, elid, 0); + writer.add_inbound_edge::(None, dst_pos, *src, elid); } } } @@ -605,7 +604,7 @@ fn add_and_resolve_outbound_edges< MaybeNew::Existing(edge_id) } else { let edge_id = next_edge_id(row); - writer.add_static_outbound_edge(src_pos, *dst, edge_id, 0); + writer.add_static_outbound_edge(src_pos, *dst, edge_id); eid_col_shared[row].store(edge_id.0, Ordering::Relaxed); eids_exist[row].store(false, Ordering::Relaxed); MaybeNew::New(edge_id) @@ -628,9 +627,9 @@ fn add_and_resolve_outbound_edges< layer_eids_exist[row].store(exists, Ordering::Relaxed); if exists { - writer.update_timestamp(t, src_pos, edge_id.inner(), 0); + writer.update_timestamp(t, src_pos, edge_id.inner()); } else { - writer.add_outbound_edge(Some(t), src_pos, *dst, edge_id.inner(), 0); + writer.add_outbound_edge(Some(t), src_pos, *dst, edge_id.inner()); } } } @@ -645,7 +644,7 @@ pub fn store_node_ids>( if let Some(src_pos) = locked_page.resolve_pos(*vid) { let mut writer = locked_page.writer(); - writer.store_node_id_and_node_type(src_pos, 0, *gid, *node_type, 0); + writer.store_node_id_and_node_type(src_pos, STATIC_GRAPH_LAYER_ID, *gid, *node_type); } } } diff --git a/raphtory/src/python/filter/property_filter_builders.rs b/raphtory/src/python/filter/property_filter_builders.rs index f8537052ca..2c94c94162 100644 --- a/raphtory/src/python/filter/property_filter_builders.rs +++ b/raphtory/src/python/filter/property_filter_builders.rs @@ -7,9 +7,8 @@ use crate::{ ops::{ElemQualifierOps, ListAggOps, PropertyFilterOps}, }, DynInternalViewWrapPropOps, DynPropertyFilterBuilder, DynTemporalPropertyFilterBuilder, - DynView, DynViewFilter, EntityMarker, InternalPropertyFilterBuilder, - PropertyFilterFactory, TemporalPropertyFilterFactory, TryAsCompositeFilter, - ViewWrapOps, + DynView, EntityMarker, InternalPropertyFilterBuilder, PropertyFilterFactory, + TemporalPropertyFilterFactory, TryAsCompositeFilter, ViewWrapOps, }, CreateFilter, }, diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index 6712732aed..47364b52e0 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -119,7 +119,7 @@ pub(crate) fn load_edges_from_arrow_c_stream< schema: Option>, event_id: Option<&str>, ) -> Result<(), GraphError> { - let mut cols_to_check = [src, dst, time] + let cols_to_check = [src, dst, time] .into_iter() .chain(properties.iter().copied()) .chain(metadata.iter().copied()) diff --git a/raphtory/src/python/graph/node_state/node_state_earliest_time.rs b/raphtory/src/python/graph/node_state/node_state_earliest_time.rs index 099b5e85a6..1c602c4638 100644 --- a/raphtory/src/python/graph/node_state/node_state_earliest_time.rs +++ b/raphtory/src/python/graph/node_state/node_state_earliest_time.rs @@ -36,7 +36,6 @@ use crate::{ state::{ops::IntoDynNodeOp, NodeStateGroupBy, OrderedNodeStateOps}, view::GraphViewOps, }, - py_borrowing_iter, python::graph::node_state::node_state::ops::NodeFilterOp, }; type EarliestTimeOp = ops::history::EarliestTime; diff --git a/raphtory/src/python/graph/node_state/node_state_history.rs b/raphtory/src/python/graph/node_state/node_state_history.rs index 327944895d..c04cc144a4 100644 --- a/raphtory/src/python/graph/node_state/node_state_history.rs +++ b/raphtory/src/python/graph/node_state/node_state_history.rs @@ -35,8 +35,7 @@ use std::{collections::HashMap, sync::Arc}; use crate::db::graph::nodes::IntoDynNodes; pub(crate) use crate::{ - db::api::state::ops::IntoDynNodeOp, py_borrowing_iter, - python::graph::node_state::node_state::ops::NodeFilterOp, + db::api::state::ops::IntoDynNodeOp, python::graph::node_state::node_state::ops::NodeFilterOp, }; /// A lazy view over History objects for each node. diff --git a/raphtory/src/python/graph/node_state/node_state_intervals.rs b/raphtory/src/python/graph/node_state/node_state_intervals.rs index 60b0c6ba35..5205195c8f 100644 --- a/raphtory/src/python/graph/node_state/node_state_intervals.rs +++ b/raphtory/src/python/graph/node_state/node_state_intervals.rs @@ -31,7 +31,6 @@ use crate::db::graph::nodes::IntoDynNodes; pub(crate) use crate::{ db::api::state::{ops::IntoDynNodeOp, NodeStateOps, OrderedNodeStateOps}, prelude::*, - py_borrowing_iter, python::graph::node_state::node_state::ops::NodeFilterOp, }; diff --git a/raphtory/src/python/graph/node_state/node_state_latest_time.rs b/raphtory/src/python/graph/node_state/node_state_latest_time.rs index f5a3286759..b988cfd54e 100644 --- a/raphtory/src/python/graph/node_state/node_state_latest_time.rs +++ b/raphtory/src/python/graph/node_state/node_state_latest_time.rs @@ -32,7 +32,6 @@ pub(crate) use crate::{ view::GraphViewOps, }, prelude::*, - py_borrowing_iter, python::graph::node_state::node_state::ops::NodeFilterOp, }; diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index b1d30e3fac..abd4a2c6df 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -25,28 +25,29 @@ use std::{ use walkdir::WalkDir; use zip::{write::FileOptions, ZipArchive, ZipWriter}; -/// Stores graph data -pub const GRAPH_PATH: &str = "graph"; -pub const DEFAULT_GRAPH_PATH: &str = "graph0"; +/// Metadata file that stores path to the data folder. +pub const ROOT_META_PATH: &str = ".raph"; +/// Outer most directory containing all data. pub const DATA_PATH: &str = "data"; pub const DEFAULT_DATA_PATH: &str = "data0"; -/// Stores data folder path -pub const ROOT_META_PATH: &str = ".raph"; - -/// Stores graph folder path and graph metadata +/// Metadata file that stores path to the graph folder and graph metadata. pub const GRAPH_META_PATH: &str = ".meta"; -/// Temporary metadata for atomic replacement -pub const DIRTY_PATH: &str = ".dirty"; +/// Directory that stores graph data. +pub const GRAPH_PATH: &str = "graph"; +pub const DEFAULT_GRAPH_PATH: &str = "graph0"; -/// Directory that stores search indexes +/// Directory that stores search indexes. pub const INDEX_PATH: &str = "index"; -/// Directory that stores vector embeddings of the graph +/// Directory that stores vector embeddings of the graph. pub const VECTORS_PATH: &str = "vectors"; +/// Temporary metadata file for atomic replacement. +pub const DIRTY_PATH: &str = ".dirty"; + pub(crate) fn valid_path_pointer(relative_path: &str, prefix: &str) -> Result<(), GraphError> { relative_path .strip_prefix(prefix) // should have the prefix diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index dab57dec81..b2a65a7cf3 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -2,7 +2,7 @@ use crate::prelude::IndexMutationOps; use crate::{ db::api::{ - mutation::AdditionOps, storage::storage::PersistentStrategy, view::StaticGraphViewOps, + mutation::AdditionOps, storage::storage::PersistenceStrategy, view::StaticGraphViewOps, }, errors::GraphError, serialise::{ @@ -22,6 +22,7 @@ use zip::{write::SimpleFileOptions, ZipArchive, ZipWriter}; pub trait StableEncode: StaticGraphViewOps + AdditionOps { fn encode_to_zip(&self, writer: ZipWriter) -> Result<(), GraphError>; + /// Encode the graph into bytes. fn encode_to_bytes(&self) -> Result, GraphError>; diff --git a/raphtory/src/test_utils.rs b/raphtory/src/test_utils.rs index 9309ded034..f19b796361 100644 --- a/raphtory/src/test_utils.rs +++ b/raphtory/src/test_utils.rs @@ -16,7 +16,7 @@ use raphtory_api::core::{ entities::properties::prop::{PropType, DECIMAL_MAX}, storage::{ arc_str::{ArcStr, OptionAsStr}, - timeindex::{AsTime, EventTime}, + timeindex::AsTime, }, }; use raphtory_storage::{ @@ -65,7 +65,7 @@ pub fn assert_valid_graph(fixture: &GraphFixture, graph: &Graph) { let get_node_t_prop_map = |node: &NodeView<&Graph>| -> HashMap)>> { - let mut out: HashMap)>> = node + let out: HashMap)>> = node .properties() .temporal() .iter() @@ -93,7 +93,7 @@ pub fn assert_valid_graph(fixture: &GraphFixture, graph: &Graph) { }; let get_edge_t_prop_counts = |edge: &EdgeView<&Graph>| -> HashMap)>> { - let mut out: HashMap)>> = edge + let out: HashMap)>> = edge .properties() .temporal() .iter() @@ -193,18 +193,6 @@ pub fn assert_valid_graph(fixture: &GraphFixture, graph: &Graph) { expected_node_ids, actual_node_ids ); - assert_eq!( - expected_edge_pairs.len(), - graph.count_edges(), - "mismatched number of unique edges (src,dst) pairs" - ); - - assert_eq!( - expected_exploded_edge_count, - graph.count_temporal_edges(), - "mismatched number of temporal (exploded) edge events" - ); - for ((_, _, layer), _) in &expected_edge_layer_updates { assert!( graph.has_layer(layer.as_ref()), @@ -357,6 +345,18 @@ pub fn assert_valid_graph(fixture: &GraphFixture, graph: &Graph) { panic!("graph should have edge {src}->{dst} in layer {layer_name:?}") }); } + + assert_eq!( + expected_edge_pairs.len(), + graph.count_edges(), + "mismatched number of unique edges (src,dst) pairs" + ); + + assert_eq!( + expected_exploded_edge_count, + graph.count_temporal_edges(), + "mismatched number of temporal (exploded) edge events" + ); } #[macro_export] diff --git a/raphtory/tests/df_loaders.rs b/raphtory/tests/df_loaders.rs index 15488da86d..635559cff2 100644 --- a/raphtory/tests/df_loaders.rs +++ b/raphtory/tests/df_loaders.rs @@ -1,7 +1,5 @@ #[cfg(feature = "io")] mod io_tests { - use std::any::Any; - use arrow::array::builder::{ ArrayBuilder, Int64Builder, LargeStringBuilder, StringViewBuilder, UInt64Builder, }; diff --git a/raphtory/tests/exploded_edge_property_filter.rs b/raphtory/tests/exploded_edge_property_filter.rs index f332a5fb54..dc73a7d580 100644 --- a/raphtory/tests/exploded_edge_property_filter.rs +++ b/raphtory/tests/exploded_edge_property_filter.rs @@ -32,10 +32,7 @@ use raphtory_api::core::{ use raphtory_core::entities::nodes::node_ref::AsNodeRef; use raphtory_storage::{ core_ops::CoreGraphOps, - mutation::{ - addition_ops::{InternalAdditionOps, SessionAdditionOps}, - property_addition_ops::InternalPropertyAdditionOps, - }, + mutation::addition_ops::{InternalAdditionOps, SessionAdditionOps}, }; use std::collections::HashMap; diff --git a/raphtory/tests/node_property_filter.rs b/raphtory/tests/node_property_filter.rs index ecb9b7ae89..a1f0daa3ce 100644 --- a/raphtory/tests/node_property_filter.rs +++ b/raphtory/tests/node_property_filter.rs @@ -3,32 +3,23 @@ use proptest::{arbitrary::any, proptest}; use raphtory::{ db::{ api::{ - properties::internal::InheritPropertiesOps, state::ops::NodeOp, view::{ filter_ops::{Filter, NodeSelect}, - internal::{ - GraphView, Immutable, InheritAllEdgeFilterOps, InheritEdgeHistoryFilter, - InheritLayerOps, InheritListOps, InheritMaterialize, InheritNodeHistoryFilter, - InheritStorageOps, InheritTimeSemantics, InternalNodeFilterOps, Static, - }, EdgeSelect, }, }, graph::{ assertions::assert_ok_or_missing_nodes, graph::assert_edges_equal, - views::filter::{ - model::{ - node_filter::{ops::NodeFilterOps, NodeFilter}, - property_filter::ops::PropertyFilterOps, - ComposableFilter, PropertyFilterFactory, - }, - CreateFilter, + views::filter::model::{ + node_filter::{ops::NodeFilterOps, NodeFilter}, + property_filter::ops::PropertyFilterOps, + ComposableFilter, PropertyFilterFactory, }, }, }, - prelude::{AdditionOps, Graph, GraphViewOps, TimeOps, NO_PROPS, *}, + prelude::*, test_utils::{ add_node_props, build_edge_list, build_graph_from_edge_list, build_node_props, node_filtered_graph, diff --git a/raphtory/tests/test_filters.rs b/raphtory/tests/test_filters.rs index 674aa4f291..e184bad86f 100644 --- a/raphtory/tests/test_filters.rs +++ b/raphtory/tests/test_filters.rs @@ -132,8 +132,7 @@ mod test_property_semantics { assert_filter_nodes_results, assert_search_nodes_results, TestVariants, }, views::filter::model::{ - node_filter::NodeFilter, - property_filter::ops::{ListAggOps, PropertyFilterOps}, + node_filter::NodeFilter, property_filter::ops::PropertyFilterOps, PropertyFilterFactory, TemporalPropertyFilterFactory, }, }, @@ -4925,7 +4924,6 @@ mod test_node_property_filter_agg { use raphtory_storage::mutation::{ addition_ops::InternalAdditionOps, property_addition_ops::InternalPropertyAdditionOps, }; - use std::{sync::Arc, vec}; fn list_u8(xs: &[u8]) -> Prop { Prop::list(xs.iter().copied().map(Prop::U8)) diff --git a/raphtory/tests/tests_node_type_filtered_subgraph.rs b/raphtory/tests/tests_node_type_filtered_subgraph.rs index 7a951c635a..9191a7fa4d 100644 --- a/raphtory/tests/tests_node_type_filtered_subgraph.rs +++ b/raphtory/tests/tests_node_type_filtered_subgraph.rs @@ -487,7 +487,7 @@ mod test_filters_node_type_filtered_subgraph { }, views::filter::model::PropertyFilterFactory, }, - prelude::{EdgeFilter, EdgeViewOps, GraphViewOps}, + prelude::EdgeFilter, }; #[test] diff --git a/raphtory/tests/views_test.rs b/raphtory/tests/views_test.rs index e037bc079e..5af498d7ad 100644 --- a/raphtory/tests/views_test.rs +++ b/raphtory/tests/views_test.rs @@ -528,10 +528,6 @@ fn test_entity_history() { } mod test_filters_window_graph { - use raphtory::{ - db::{api::view::StaticGraphViewOps, graph::assertions::GraphTransformer}, - prelude::TimeOps, - }; mod test_nodes_filters_window_graph { use raphtory::{