From f3e2cb3395b90adabf153ab62b037a4a7909ec02 Mon Sep 17 00:00:00 2001 From: pa <1020401660@qq.com> Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 01/26] Revert "no bug code" This reverts commit 89f83b437b05e693de30df80bd40d58e7679c5a7. --- scripts/deploy_single_node/node_config.yaml | 2 +- src/main/src/general/app/mod.rs | 121 ++++++++---------- .../src/general/data/m_data_general/mod.rs | 89 +++++-------- src/main/src/general/m_os/mod.rs | 10 +- src/main/src/general/network/m_p2p.rs | 8 +- src/main/src/general/network/m_p2p_quic.rs | 5 +- src/main/src/master/data/m_data_master.rs | 9 +- src/main/src/worker/m_kv_user_client.rs | 8 +- 8 files changed, 100 insertions(+), 152 deletions(-) diff --git a/scripts/deploy_single_node/node_config.yaml b/scripts/deploy_single_node/node_config.yaml index cd5aa06..78aef3a 100644 --- a/scripts/deploy_single_node/node_config.yaml +++ b/scripts/deploy_single_node/node_config.yaml @@ -3,5 +3,5 @@ nodes: addr: 127.0.0.1:2600 spec: [meta,master] 2: - addr: 192.168.31.240:2602 + addr: 127.0.0.1:2605 spec: [meta,worker] diff --git a/src/main/src/general/app/mod.rs b/src/main/src/general/app/mod.rs index be46a7e..a0d57a8 100644 --- a/src/main/src/general/app/mod.rs +++ b/src/main/src/general/app/mod.rs @@ -781,36 +781,36 @@ impl AppMetaManager { // let appdir = self.fs_layer.concat_app_dir(app); let appmeta = self.fs_layer.read_app_meta(tmpapp).await?; - // TODO: 2.check project dir - // 3. if java, take snapshot - if let AppType::Jar = appmeta.app_type { - let _ = self - .meta - .write() - .await - .tmp_app_metas - .insert(tmpapp.to_owned(), appmeta.clone()); - tracing::debug!("record app meta to make checkpoint {}", tmpapp); - self.view - .instance_manager() - .make_checkpoint_for_app(tmpapp) - .await?; - self.view - .instance_manager() - .drap_app_instances(tmpapp) - .await; - // remove app_meta - tracing::debug!("checkpoint made, remove app meta {}", tmpapp); - let _ = self - .meta - .write() - .await - .tmp_app_metas - .remove(tmpapp) - .unwrap_or_else(|| { - panic!("remove app meta failed, app: {}", tmpapp); - }); - } + // // TODO: 2.check project dir + // // 3. if java, take snapshot + // if let AppType::Jar = appmeta.app_type { + // let _ = self + // .meta + // .write() + // .await + // .tmp_app_metas + // .insert(tmpapp.to_owned(), appmeta.clone()); + // tracing::debug!("record app meta to make checkpoint {}", tmpapp); + // self.view + // .instance_manager() + // .make_checkpoint_for_app(tmpapp) + // .await?; + // self.view + // .instance_manager() + // .drap_app_instances(tmpapp) + // .await; + // // remove app_meta + // tracing::debug!("checkpoint made, remove app meta {}", tmpapp); + // let _ = self + // .meta + // .write() + // .await + // .tmp_app_metas + // .remove(tmpapp) + // .unwrap_or_else(|| { + // panic!("remove app meta failed, app: {}", tmpapp); + // }); + // } Ok(appmeta) } @@ -1072,48 +1072,29 @@ impl AppMetaManager { pub fn set_app_meta_list(&self, list: Vec) { //发送逻辑处理 曾俊 - // self.view - // .kv_store_engine() - // .set( - // KeyTypeServiceList, - // &serde_json::to_string(&list).unwrap().into(), - // false, - // ) - // .todo_handle("This part of the code needs to be implemented."); - - //修改后代码:对set函数的返回类型进行处理 曾俊 - match self.view + self.view + .kv_store_engine() + .set( + KeyTypeServiceList, + &serde_json::to_string(&list).unwrap().into(), + false, + ) + .todo_handle("This part of the code needs to be implemented."); + } + pub fn get_app_meta_list(&self) -> Vec { + let res = self + .view .kv_store_engine() - .set( - KeyTypeServiceList, - &serde_json::to_string(&list).unwrap().into(), - false, - ) { - Ok((version, _)) => { - tracing::debug!("App meta list updated successfully, version: {}, list: {:?}", version, list); - }, - Err(e) => { - tracing::error!("Failed to set app meta list: {:?}", e); - } + .get(&KeyTypeServiceList, false, KvAdditionalConf {}) + .map(|(_version, list)| list) + .unwrap_or_else(|| { + return vec![]; + }); + serde_json::from_slice(&res).unwrap_or_else(|e| { + tracing::warn!("parse app meta list failed, err: {:?}", e); + vec![] + }) } -} - -pub fn get_app_meta_list(&self) -> Vec { - let res = self - .view - .kv_store_engine() - .get(&KeyTypeServiceList, false, KvAdditionalConf {}) - .map(|(_version, list)| list) - .unwrap_or_else(|| { - return vec![]; - }); - serde_json::from_slice(&res).unwrap_or_else(|e| { - tracing::warn!("parse app meta list failed, err: {:?}", e); - vec![] - }) -} - - // pub fn get_app_meta_basicinfo_list(&self) -> Vec { // let apps = self.get_app_meta_list(); diff --git a/src/main/src/general/data/m_data_general/mod.rs b/src/main/src/general/data/m_data_general/mod.rs index 19a372f..09892f3 100644 --- a/src/main/src/general/data/m_data_general/mod.rs +++ b/src/main/src/general/data/m_data_general/mod.rs @@ -660,30 +660,25 @@ impl DataGeneral { let message = "New data version overwrite".to_owned(); tracing::warn!("{}", message); - if let Err(e) = responsor //返回结果未处理 曾俊 + responsor //返回结果未处理 曾俊 .send_resp(WriteOneDataResponse { remote_version: 0, success: false, message, }) - .await{ - tracing::error!("Failed to send write one data response 1: {}", e); - } - // .todo_handle("1 err_comment waitting to fill"); - + .await + .todo_handle("1 err_comment waitting to fill"); }; let fail_with_msg = |message: String| async { tracing::warn!("{}", message); - if let Err(e) = responsor //返回结果未处理 曾俊 + responsor //返回结果未处理 曾俊 .send_resp(WriteOneDataResponse { remote_version: 0, success: false, message, }) - .await { - tracing::error!("Failed to send write one data response 2 : {}", e); - } - // .todo_handle("2 err_comment waitting to fill"); + .await + .todo_handle("2 err_comment waitting to fill"); }; loop { @@ -784,7 +779,7 @@ impl DataGeneral { || check_meta.as_ref().unwrap().0 != required_meta.as_ref().unwrap().0 { drop(guard); - if let Err(e) = responsor //返回结果未处理 曾俊 + responsor //返回结果未处理 曾俊 .send_resp(WriteOneDataResponse { remote_version: if check_meta.is_none() { 0 @@ -794,10 +789,8 @@ impl DataGeneral { success: false, message: "meta is updated again, cancel write".to_owned(), }) - .await{ - tracing::error!("Failed to send write one data response 3: {}", e); - } - // .todo_handle("3 err_comment waitting to fill"); + .await + .todo_handle("3 err_comment waitting to fill"); return; } @@ -827,16 +820,14 @@ impl DataGeneral { kv_store_engine.flush(); drop(guard); tracing::debug!("data partial is written"); - if let Err(e) = responsor //返回结果未使用 曾俊 + responsor //返回结果未使用 曾俊 .send_resp(WriteOneDataResponse { remote_version: req.version, success: true, message: "".to_owned(), }) - .await{ - tracing::error!("Failed to send write one data response 4: {}", e); - } - // .todo_handle("4 err_comment waitting to fill"); + .await + .todo_handle("4 err_comment waitting to fill"); } async fn rpc_handle_data_meta_update( @@ -876,30 +867,24 @@ impl DataGeneral { drop(_kv_write_lock_guard); let err_msg = "New data version is smaller, failed update"; tracing::warn!("{}", err_msg); - if let Err(e) = responsor //返回结果未处理 曾俊 + responsor //返回结果未处理 曾俊 .send_resp(proto::DataMetaUpdateResponse { version: old_meta.version, message: err_msg.to_owned(), }) - .await{ - tracing::error!("Failed to send data meta update response 5: {}", e); - } - // .todo_handle("5 err_comment waitting to fill"); + .await + .todo_handle("5 err_comment waitting to fill"); return; } old_meta.version = req.version; if req.serialized_meta.len() > 0 { - if let Err(e) = self.view.kv_store_engine() //返回结果未处理 曾俊 - .set_raw(&keybytes, std::mem::take(&mut req.serialized_meta), true){ - tracing::error!("Failed to set raw data in kv store 6: {}", e); - } - // .todo_handle("6 err_comment waitting to fill"); + self.view.kv_store_engine() //返回结果未处理 曾俊 + .set_raw(&keybytes, std::mem::take(&mut req.serialized_meta), true) + .todo_handle("6 err_comment waitting to fill"); } else { - if let Err(e) = self.view.kv_store_engine() //返回结果未处理 曾俊 - .set(key, &old_meta, true){ - tracing::error!("Failed to set raw data in kv store 7: {}", e); - } - // .todo_handle("7 err_comment waitting to fill"); + self.view.kv_store_engine() //返回结果未处理 曾俊 + .set(key, &old_meta, true) + .todo_handle("7 err_comment waitting to fill"); } } else { if req.serialized_meta.len() > 0 { @@ -907,38 +892,32 @@ impl DataGeneral { "set new meta data, {:?}", bincode::deserialize::(&req.serialized_meta) ); - if let Err(e) = self.view.kv_store_engine() //返回结果未处理 曾俊 - .set_raw(&keybytes, std::mem::take(&mut req.serialized_meta), true){ - tracing::error!("Failed to set raw data in kv store 8: {}", e); - } - // .todo_handle("8 err_comment waitting to fill"); + self.view.kv_store_engine() //返回结果未处理 曾俊 + .set_raw(&keybytes, std::mem::take(&mut req.serialized_meta), true) + .todo_handle("8 err_comment waitting to fill"); } else { drop(_kv_write_lock_guard); let err_msg = "Old meta data not found and missing new meta"; tracing::warn!("{}", err_msg); - if let Err(e) = responsor //返回结果未处理 曾俊 + responsor //返回结果未处理 曾俊 .send_resp(proto::DataMetaUpdateResponse { version: 0, message: err_msg.to_owned(), }) - .await{ - tracing::error!("Failed to send data meta update response 9: {}", e); - } - // .todo_handle("9 err_comment waitting to fill"); + .await + .todo_handle("9 err_comment waitting to fill"); return; } } drop(_kv_write_lock_guard); tracing::debug!("rpc_handle_data_meta_update success"); - if let Err(e) = responsor //返回结果未处理 曾俊 + responsor //返回结果未处理 曾俊 .send_resp(proto::DataMetaUpdateResponse { version: req.version, message: "Update success".to_owned(), }) - .await{ - tracing::error!("Failed to send data meta update response 10: {}", e); - } - // .todo_handle("10 err_comment waitting to fill"); + .await + .todo_handle("10 err_comment waitting to fill"); } async fn rpc_handle_get_data_meta( @@ -1609,11 +1588,9 @@ impl LogicalModule for DataGeneral { .regist(p2p, move |responsor, req| { let view = view.clone(); let _ = tokio::spawn(async move { - if let Err(e) = view.data_general().rpc_handle_get_data_meta(req, responsor) //返回结果未处理 曾俊 - .await{ - tracing::error!("Failed to handle get data meta: {}", e); - } - // .todo_handle("rpc_handle_get_data_meta err"); + view.data_general().rpc_handle_get_data_meta(req, responsor) //返回结果未处理 曾俊 + .await + .todo_handle("rpc_handle_get_data_meta err"); }); Ok(()) }); diff --git a/src/main/src/general/m_os/mod.rs b/src/main/src/general/m_os/mod.rs index e974f17..225f347 100644 --- a/src/main/src/general/m_os/mod.rs +++ b/src/main/src/general/m_os/mod.rs @@ -254,9 +254,7 @@ impl OperatingSystem { }) .await .unwrap(); - if let Err(e) = responser.send_resp(res).await { - tracing::error!("Failed to send run cmd response: {}", e); - } + responser.send_resp(res).await.todo_handle("This part of the code needs to be implemented."); //返回结果未处理 曾俊 } async fn remote_get_dir_content_handler( @@ -300,7 +298,9 @@ impl OperatingSystem { get_dir_content_resp::GetDirContentRespOk { files, dirs }, )), } + // 在这里使用 responser 将 dir_contents 发送回调用方 } else { + // 发生读取目录错误,可以选择使用 responser 发送错误消息 GetDirContentResp { dispatch: Some(get_dir_content_resp::Dispatch::Fail( GetDirContentRespFail { @@ -321,9 +321,7 @@ impl OperatingSystem { }) .await .unwrap(); - if let Err(e) = responser.send_resp(res).await { - tracing::error!("Failed to send get dir content response: {}", e); - } + responser.send_resp(res).await.todo_handle("This part of the code needs to be implemented."); //返回结果未处理 曾俊 } pub fn open_file(&self, fname: &str) -> WSResult { diff --git a/src/main/src/general/network/m_p2p.rs b/src/main/src/general/network/m_p2p.rs index 88675b8..33b4b67 100644 --- a/src/main/src/general/network/m_p2p.rs +++ b/src/main/src/general/network/m_p2p.rs @@ -415,18 +415,16 @@ impl P2PModule { let _ = self .waiting_tasks .insert((taskid, node_id), Some(tx).into()); - if let Err(e) = self.dispatch( //返回结果未处理 曾俊 + self.dispatch( node_id, r.msg_id(), taskid, DispatchPayload::Local(Box::new(r)), - ){ - tracing::error!("Failed to dispatch rpc: {}", e); - } + ) //.todo_handle(); //虞光勇修改,修改原因:在调用 todo_handle 方法时遇到了缺少参数的问题。需要确保在调用 todo_handle 方法时提供所需的字符串参数。 //修改内容:加入字符串参数。 - // .todo_handle("This part of the code needs to be implemented."); + .todo_handle("This part of the code needs to be implemented."); //返回结果未处理 曾俊 let resp = rx.await.unwrap(); let resp = resp.downcast::().unwrap(); diff --git a/src/main/src/general/network/m_p2p_quic.rs b/src/main/src/general/network/m_p2p_quic.rs index f3c96c9..b0226fd 100644 --- a/src/main/src/general/network/m_p2p_quic.rs +++ b/src/main/src/general/network/m_p2p_quic.rs @@ -361,10 +361,7 @@ async fn handle_connection( match deserialize_msg_id_task_id(&head) { Ok((msg_id, task_id)) => { //返回结果未处理 曾俊 - if let Err(e) = view.p2p().dispatch(remote_id, msg_id, task_id, bytes.into()){ - tracing::error!("Failed to dispatch rpc: {}", e); - } - // .todo_handle("This part of the code needs to be implemented."); + view.p2p().dispatch(remote_id, msg_id, task_id, bytes.into()).todo_handle("This part of the code needs to be implemented."); } Err(err) => { tracing::warn!("incoming deserial head error: {:?}", err); diff --git a/src/main/src/master/data/m_data_master.rs b/src/main/src/master/data/m_data_master.rs index 60f5d63..08848ad 100644 --- a/src/main/src/master/data/m_data_master.rs +++ b/src/main/src/master/data/m_data_master.rs @@ -336,7 +336,7 @@ impl DataMaster { ); //返回结果未处理 曾俊 - if let Err(e) = responsor + responsor .send_resp(DataVersionScheduleResponse { version: new_meta.version, cache_mode: new_meta.cache_mode.into_iter().map(|v| v as u32).collect(), @@ -347,10 +347,9 @@ impl DataMaster { .collect(), cache_nodes, }) - .await{ - tracing::error!("Failed to send data version schedule response: {}", e); - } - // .todo_handle("This part of the code needs to be implemented."); + .await + //.todo_handle(); + .todo_handle("This part of the code needs to be implemented."); Ok(()) } // async fn rpc_handler_dataversion_synced_on_node( diff --git a/src/main/src/worker/m_kv_user_client.rs b/src/main/src/worker/m_kv_user_client.rs index 8602458..9d18f6a 100644 --- a/src/main/src/worker/m_kv_user_client.rs +++ b/src/main/src/worker/m_kv_user_client.rs @@ -211,7 +211,7 @@ impl KvUserClient { let data_general = self.view.data_general(); //返回结果未处理 曾俊 - if let Err(e) = data_general + data_general .write_data( new_data_unique_id_fn_kv(&key), //原代码: @@ -229,10 +229,8 @@ impl KvUserClient { }), )), ) - .await{ - tracing::error!("Failed to write data: {}", e); - } - // .todo_handle("This part of the code needs to be implemented."); + .await + .todo_handle("This part of the code needs to be implemented."); KvResponse::new_common(vec![]) } From 79b60be8c7d111e71bd4e88f09d2ccbe507f4ecc Mon Sep 17 00:00:00 2001 From: pa <1020401660@qq.com> Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 02/26] Revert "backup change" This reverts commit c60aecab8ab30c9d3b0eef1c235a5e2cedb8d7ae. --- Cargo.lock | 1 - Cargo.toml | 1 - compilelog | 306 +++- design_of_new_batch.md | 2 +- plan.md | 84 + src/data/dataitem.rs | 31 - src/data/write_split.rs | 40 - src/errors.rs | 18 - src/main/src/general/app/mod.rs | 44 +- .../src/general/data/m_data_general/batch.rs | 10 +- .../general/data/m_data_general/dataitem.rs | 207 +-- .../src/general/data/m_data_general/mod.rs | 83 +- .../general/data/m_data_general/mod.rs.bak | 1616 ----------------- .../src/general/data/m_kv_store_engine.rs | 3 +- src/main/src/general/m_os/mod.rs | 11 +- src/main/src/general/network/m_p2p.rs | 5 +- src/main/src/general/network/m_p2p_quic.rs | 3 +- src/main/src/general/network/proto_ext.rs | 14 +- .../src/general/network/proto_src/data.proto | 9 +- src/main/src/main.rs | 4 +- src/main/src/master/data/m_data_master.rs | 4 +- src/main/src/modules_global_bridge/mod.rs | 34 +- src/main/src/result.rs | 26 +- src/main/src/util/zip.rs | 21 +- src/main/src/worker/m_kv_user_client.rs | 13 +- 25 files changed, 471 insertions(+), 2119 deletions(-) create mode 100644 plan.md delete mode 100644 src/data/dataitem.rs delete mode 100644 src/data/write_split.rs delete mode 100644 src/errors.rs delete mode 100644 src/main/src/general/data/m_data_general/mod.rs.bak diff --git a/Cargo.lock b/Cargo.lock index c174000..01085c2 100755 --- a/Cargo.lock +++ b/Cargo.lock @@ -3570,7 +3570,6 @@ dependencies = [ "slotmap", "ssh2", "sysinfo", - "tempfile", "thiserror", "tokio", "tower", diff --git a/Cargo.toml b/Cargo.toml index 59351df..65dd577 100755 --- a/Cargo.toml +++ b/Cargo.toml @@ -61,7 +61,6 @@ path-absolutize = "3.0.13" dashmap = "6.1.0" base64 = "0.22.1" hex = "0.4.3" -tempfile="3.8" [profile.test] # 0: no optimizations diff --git a/compilelog b/compilelog index 15dee7d..f071912 100644 --- a/compilelog +++ b/compilelog @@ -1,6 +1,7 @@ warning: profiles for the non root package will be ignored, specify profiles at the workspace root: -package: /root/prjs/serverless_benchmark_plus/middlewares/waverless/waverless/src/main/Cargo.toml -workspace: /root/prjs/serverless_benchmark_plus/middlewares/waverless/waverless/Cargo.toml +package: /home/nature/padev/waverless/src/main/Cargo.toml +workspace: /home/nature/padev/waverless/Cargo.toml + Compiling wasm_serverless v0.1.0 (/home/nature/padev/waverless/src/main) warning: function `path_is_option` is never used --> ws_derive/src/lib.rs:21:4 | @@ -10,66 +11,283 @@ warning: function `path_is_option` is never used = note: `#[warn(dead_code)]` on by default warning: `ws_derive` (lib) generated 1 warning - Compiling wasm_serverless v0.1.0 (/root/prjs/serverless_benchmark_plus/middlewares/waverless/waverless/src/main) -warning: unused import: `crate::util::zip` - --> src/main/src/general/data/m_data_general/dataitem.rs:11:5 +warning: unused import: `crate::general::app::m_executor::FnExeCtxAsync` + --> src/main/src/general/app/app_owned/wasm_host_funcs/result.rs:2:5 + | +2 | use crate::general::app::m_executor::FnExeCtxAsync; + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | + = note: `#[warn(unused_imports)]` on by default + +warning: unused import: `FnExeCtxBase` + --> src/main/src/general/app/app_owned/wasm_host_funcs/mod.rs:16:58 + | +16 | use crate::general::app::m_executor::{FnExeCtxAsync, FnExeCtxBase}; + | ^^^^^^^^^^^^ + +warning: unused import: `WsFuncError` + --> src/main/src/general/app/app_owned/mod.rs:7:31 + | +7 | use crate::result::{WSResult, WsFuncError}; + | ^^^^^^^^^^^ + +warning: unused import: `std::path::Path` + --> src/main/src/general/app/app_shared/java.rs:9:5 + | +9 | use std::path::Path; + | ^^^^^^^^^^^^^^^ + +warning: unused import: `WSError` + --> src/main/src/general/app/app_shared/process.rs:11:21 + | +11 | use crate::result::{WSError, WsFuncError}; + | ^^^^^^^ + +warning: unused import: `kv_interface::KvOps` + --> src/main/src/general/app/mod.rs:21:13 + | +21 | kv_interface::KvOps, + | ^^^^^^^^^^^^^^^^^^^ + +warning: unused import: `ErrCvt` + --> src/main/src/general/app/mod.rs:37:14 + | +37 | result::{ErrCvt, WSResult, WsFuncError}, + | ^^^^^^ + +warning: unused import: `std::path::PathBuf` + --> src/main/src/general/app/mod.rs:46:5 + | +46 | use std::path::PathBuf; + | ^^^^^^^^^^^^^^^^^^ + +warning: unused import: `super::CacheModeVisitor` + --> src/main/src/general/data/m_data_general/dataitem.rs:17:5 | -11 | use crate::util::zip; +17 | use super::CacheModeVisitor; + | ^^^^^^^^^^^^^^^^^^^^^^^ + +warning: unused import: `base64::Engine` + --> src/main/src/general/data/m_data_general/batch.rs:29:5 + | +29 | use base64::Engine; + | ^^^^^^^^^^^^^^ + +warning: unused import: `tokio::io::AsyncWriteExt` + --> src/main/src/general/data/m_data_general/batch.rs:31:5 + | +31 | use tokio::io::AsyncWriteExt; + | ^^^^^^^^^^^^^^^^^^^^^^^^ + +warning: unused imports: `DataMetaGetRequest` and `DataVersionScheduleRequest` + --> src/main/src/general/data/m_data_general/mod.rs:16:29 + | +16 | self, DataMeta, DataMetaGetRequest, DataVersionScheduleRequest, WriteOneDataRequest, + | ^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^ + +warning: unused import: `WsRuntimeErr` + --> src/main/src/general/data/m_data_general/mod.rs:28:46 + | +28 | result::{WSError, WSResult, WSResultExt, WsRuntimeErr, WsSerialErr, WsNetworkLogicErr}, + | ^^^^^^^^^^^^ + +warning: unused import: `enum_as_inner::EnumAsInner` + --> src/main/src/general/data/m_data_general/mod.rs:36:5 + | +36 | use enum_as_inner::EnumAsInner; + | ^^^^^^^^^^^^^^^^^^^^^^^^^^ + +warning: unused import: `dashmap::DashMap` + --> src/main/src/general/data/m_data_general/mod.rs:38:5 + | +38 | use dashmap::DashMap; | ^^^^^^^^^^^^^^^^ + +warning: unused import: `std::ops::Range` + --> src/main/src/general/data/m_data_general/mod.rs:40:5 + | +40 | use std::ops::Range; + | ^^^^^^^^^^^^^^^ + +warning: unused import: `std::future::Future` + --> src/main/src/general/data/m_data_general/mod.rs:51:5 | - = note: `#[warn(unused_imports)]` on by default +51 | use std::future::Future; + | ^^^^^^^^^^^^^^^^^^^ + +warning: unused imports: `m_data_general::DataItemIdx`, `network::proto`, and `self` + --> src/main/src/master/app/fddg.rs:6:16 + | +6 | data::{self, m_data_general::DataItemIdx}, + | ^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^ +7 | network::proto, + | ^^^^^^^^^^^^^^ -warning: unused import: `crate::general::network::proto::DataItem` - --> src/main/src/general/data/m_data_general/mod.rs:10:5 +warning: unused import: `dashmap::DashMap` + --> src/main/src/master/app/fddg.rs:11:5 | -10 | use crate::general::network::proto::DataItem; - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +11 | use dashmap::DashMap; + | ^^^^^^^^^^^^^^^^ -warning: unused import: `WriteSplitTaskResult` - --> src/main/src/general/data/m_data_general/mod.rs:11:36 +warning: unused import: `std::collections::HashSet` + --> src/main/src/master/app/fddg.rs:13:5 | -11 | use dataitem::{DataItemArgWrapper, WriteSplitTaskResult}; - | ^^^^^^^^^^^^^^^^^^^^ +13 | use std::collections::HashSet; + | ^^^^^^^^^^^^^^^^^^^^^^^^^ -warning: unused imports: `WSError`, `WSResult`, `WsIoErr` - --> src/main/src/general/m_os/zip.rs:2:21 +warning: unused imports: `AffinityPattern`, `AffinityRule`, `AppType`, `FnMeta`, and `NodeTag` + --> src/main/src/master/app/m_app_master.rs:3:27 | -2 | use crate::result::{WSError, WSResult, WsIoErr}; - | ^^^^^^^ ^^^^^^^^ ^^^^^^^ +3 | use crate::general::app::{AffinityPattern, AffinityRule, AppType, FnMeta, NodeTag}; + | ^^^^^^^^^^^^^^^ ^^^^^^^^^^^^ ^^^^^^^ ^^^^^^ ^^^^^^^ -warning: unused imports: `Cursor`, `File`, `Read`, `Seek`, `Write`, `os::unix::fs::PermissionsExt`, `path::Path`, `self`, `self` - --> src/main/src/general/m_os/zip.rs:4:10 +warning: unused import: `crate::general::network::m_p2p::RPCCaller` + --> src/main/src/master/app/m_app_master.rs:5:5 | -4 | fs::{self, File}, - | ^^^^ ^^^^ -5 | io::{self, Cursor, Read, Seek, Write}, - | ^^^^ ^^^^^^ ^^^^ ^^^^ ^^^^^ -6 | os::unix::fs::PermissionsExt, - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -7 | path::Path, - | ^^^^^^^^^^ +5 | use crate::general::network::m_p2p::RPCCaller; + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -warning: unused import: `walkdir::WalkDir` - --> src/main/src/general/m_os/zip.rs:9:5 +warning: unused imports: `distribute_task_req::Trigger` and `self` + --> src/main/src/master/app/m_app_master.rs:6:44 | -9 | use walkdir::WalkDir; - | ^^^^^^^^^^^^^^^^ +6 | use crate::general::network::proto::sche::{self, distribute_task_req::Trigger}; + | ^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -warning: unused imports: `result::ZipError`, `write::FileOptions` - --> src/main/src/general/m_os/zip.rs:10:11 +warning: unused import: `FunctionTriggerContext` + --> src/main/src/master/app/m_app_master.rs:9:31 + | +9 | use crate::master::m_master::{FunctionTriggerContext, Master}; + | ^^^^^^^^^^^^^^^^^^^^^^ + +warning: unused import: `WsFuncError` + --> src/main/src/master/app/m_app_master.rs:10:31 | -10 | use zip::{result::ZipError, write::FileOptions}; - | ^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^ +10 | use crate::result::{WSResult, WsFuncError}; + | ^^^^^^^^^^^ -warning: unused import: `crate::general::data::m_data_general::CacheModeVisitor` +warning: unused import: `crate::sys::NodeID` + --> src/main/src/master/app/m_app_master.rs:11:5 + | +11 | use crate::sys::NodeID; + | ^^^^^^^^^^^^^^^^^^ + +warning: unused imports: `HashMap` and `HashSet` + --> src/main/src/master/app/m_app_master.rs:15:24 + | +15 | use std::collections::{HashMap, HashSet}; + | ^^^^^^^ ^^^^^^^ + +warning: unused imports: `AtomicU32` and `Ordering` + --> src/main/src/master/app/m_app_master.rs:16:25 + | +16 | use std::sync::atomic::{AtomicU32, Ordering}; + | ^^^^^^^^^ ^^^^^^^^ + +warning: unused import: `std::time::Duration` + --> src/main/src/master/app/m_app_master.rs:17:5 + | +17 | use std::time::Duration; + | ^^^^^^^^^^^^^^^^^^^ + +warning: unused import: `crate::general::app::m_executor::EventCtx` + --> src/main/src/master/data/m_data_master.rs:1:5 + | +1 | use crate::general::app::m_executor::EventCtx; + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +warning: unused import: `crate::general::app::m_executor::FnExeCtxAsync` + --> src/main/src/master/data/m_data_master.rs:3:5 + | +3 | use crate::general::app::m_executor::FnExeCtxAsync; + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +warning: unused import: `crate::general::app::m_executor::FnExeCtxAsyncAllowedType` --> src/main/src/master/data/m_data_master.rs:4:5 | -4 | use crate::general::data::m_data_general::CacheModeVisitor; - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +4 | use crate::general::app::m_executor::FnExeCtxAsyncAllowedType; + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -warning: unused imports: `CACHE_MODE_MAP_COMMON_KV_MASK`, `CACHE_MODE_TIME_FOREVER_MASK` - --> src/main/src/master/data/m_data_master.rs:17:28 +warning: unused imports: `AffinityPattern`, `AffinityRule`, and `NodeTag` + --> src/main/src/master/data/m_data_master.rs:7:27 + | +7 | use crate::general::app::{AffinityPattern, AffinityRule, NodeTag}; + | ^^^^^^^^^^^^^^^ ^^^^^^^^^^^^ ^^^^^^^ + +warning: unused imports: `DataItemIdx` and `DataSetMeta` + --> src/main/src/master/data/m_data_master.rs:19:37 + | +19 | CacheMode, DataGeneral, DataItemIdx, DataSetMeta, DataSetMetaBuilder, DataSplit, + | ^^^^^^^^^^^ ^^^^^^^^^^^ + +warning: unused imports: `AffinityPattern`, `AffinityRule`, `AppType`, and `FnMeta` + --> src/main/src/master/m_master.rs:16:15 + | +16 | app::{AffinityPattern, AffinityRule, AppMetaManager, AppType, DataEventTrigger, FnMeta}, + | ^^^^^^^^^^^^^^^ ^^^^^^^^^^^^ ^^^^^^^ ^^^^^^ + +warning: unused import: `RwLockReadGuard` + --> src/main/src/util/container/sync_trie.rs:1:27 + | +1 | use parking_lot::{RwLock, RwLockReadGuard}; + | ^^^^^^^^^^^^^^^ + +warning: unused import: `std::thread` + --> src/main/src/util/container/sync_trie.rs:5:5 + | +5 | use std::thread; + | ^^^^^^^^^^^ + +warning: unused import: `std::time::Duration` + --> src/main/src/util/container/sync_trie.rs:6:5 + | +6 | use std::time::Duration; + | ^^^^^^^^^^^^^^^^^^^ + +error: fields `version`, `block_type`, and `total_blocks` are never read + --> src/main/src/general/data/m_data_general/batch.rs:52:9 + | +50 | pub(super) struct BatchTransfer { + | ------------- fields in this struct +51 | pub unique_id: Vec, +52 | pub version: u64, + | ^^^^^^^ +53 | pub block_type: proto::BatchDataBlockType, + | ^^^^^^^^^^ +54 | pub total_blocks: u32, + | ^^^^^^^^^^^^ | -17 | EachNodeSplit, CACHE_MODE_MAP_COMMON_KV_MASK, CACHE_MODE_TIME_FOREVER_MASK, - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +note: the lint level is defined here + --> src/main/src/main.rs:7:5 + | +7 | dead_code, + | ^^^^^^^^^ + +error: method `add_block` is never used + --> src/main/src/general/data/m_data_general/batch.rs:104:18 + | +63 | impl BatchTransfer { + | ------------------ method in this implementation +... +104 | pub async fn add_block(&self, index: u32, data: Vec) -> WSResult { + | ^^^^^^^^^ + +error: method `handle_block` is never used + --> src/main/src/general/data/m_data_general/batch.rs:211:18 + | +173 | impl BatchManager { + | ----------------- method in this implementation +... +211 | pub async fn handle_block( + | ^^^^^^^^^^^^ + +error: method `call_batch_data` is never used + --> src/main/src/general/data/m_data_general/batch.rs:238:25 + | +236 | impl DataGeneral { + | ---------------- method in this implementation +237 | /// 发起批量数据传输 +238 | pub(super) async fn call_batch_data( + | ^^^^^^^^^^^^^^^ +warning: `wasm_serverless` (bin "wasm_serverless") generated 38 warnings +error: could not compile `wasm_serverless` (bin "wasm_serverless") due to 4 previous errors; 38 warnings emitted diff --git a/design_of_new_batch.md b/design_of_new_batch.md index bb6ba00..c360c6d 100755 --- a/design_of_new_batch.md +++ b/design_of_new_batch.md @@ -374,7 +374,7 @@ pub trait DataSource: Send + Sync + 'static { } /// 批量传输数据 -pub async fn ( +pub async fn batch_transfer( unique_id: Vec, version: u64, target_node: NodeID, diff --git a/plan.md b/plan.md new file mode 100644 index 0000000..a767a78 --- /dev/null +++ b/plan.md @@ -0,0 +1,84 @@ +\begin{abstract} +Serverless computing has transformed cloud resource management; however, the separation of scaling and scheduling in current methods leads to low resource utilization, increased cold start delays, and high operational costs. This paper presents a joint optimization mechanism, Cejoss, which integrates a reinforcement learning-based scaling strategy (RELA) with a data-aware pre-scheduling mechanism (DECODS). By sharing scaling state and task information in real time, the system achieves coordinated decision-making between scaling and scheduling to achieve joint optimization. With this joint optimization, the system is able to dynamically adjust the number of instances and pre-schedule tasks to reduce cold start delays, ultimately achieving a near-optimal resource allocation. Moreover, the scaler utilizes reinforcement learning to anticipate workload fluctuations and efficiently regulate instance counts, while the scheduler employs a data-aware pre-scheduling strategy to optimize task assignments and minimize latency. Experimental results show that in single-function scenarios the quality-price ratio improves by at least 50\%, while in multi-function scenarios it increases by at least 17\%, demonstrating significant advantages in enhancing system performance and reducing costs. This approach is adaptable to various workload scales and application types, offering a novel perspective for efficient serverless platform operations. +\end{abstract} + +\begin{IEEEkeywords} +Cloud Computing, Servreless, Scaler, Scheduler, Joint Optimization +\end{IEEEkeywords} + +\section{Introduction} + +Serverless computing has become a paradigm shift in cloud resource management, offering auto-scaling, pay-as-you-go pricing, and rapid application development. This innovative approach offers versatile solutions for various workloads, including web services, data analysis, scientific computing, and machine learning inference. Examining the architecture of serverless systems, it can be divided into components like scaler, scheduler, observation and storage. Among these, the scaler and scheduler play critical roles in the performance and cost efficiency of serverless applications. The scaler determines the number and placement of function instances, directly affecting resource utilization and cold start , while the scheduler's decisions on task assignment impact execution latency and load distribution. Their interaction fundamentally shapes the system's ability to handle varying workloads efficiently. + +Current serverless systems face three critical challenges that significantly impact their performance and efficiency. At the architectural level, the lack of coordination between scaling and scheduling components leads to fundamental limitations. This independent operation creates information isolation, where scaling decisions are made without knowledge of scheduling states and vice versa, resulting in suboptimal resource allocation and increased cold start delays. + +In the scaling component, current mechanisms rely heavily on static parameters and manual tuning, making them unable to adapt to dynamic workloads. The delayed response to workload changes not only leads to resource inefficiency but also hinders the scheduler's ability to make informed placement decisions. This limitation becomes particularly evident in scenarios with varying request patterns, where static scaling rules fail to provide appropriate resource levels for optimal scheduling. + +On the scheduling side, existing strategies lack the sophistication needed for efficient task placement in a modern serverless environment. Traditional approaches either trigger tasks reactively after predecessor completion (causing unnecessary cold starts) or pre-schedule all tasks aggressively (wasting resources). Moreover, they often ignore critical factors like DAG data transmission delays, which significantly impact overall performance. These scheduling limitations, combined with the lack of coordination with scaling decisions, further compound the system's inefficiencies. + +To address these challenges, this paper presents Cejoss (Cost-effective Joint Optimization for Scaler and Scheduler), which makes the following key contributions: + +(1) The core of our contribution is a novel joint optimization framework \textbf{Cejoss} that merges scaling node selection and scheduling node selection stages. This integration enables real-time sharing of resource views between components, allowing both the scaler and scheduler to maintain timely awareness of each other's decisions. Through this coordinated approach, the system achieves significant improvements in both latency and cost metrics. + +(2) To improve dynamic resource management, we introduce \textbf{RELA} (REinforce Learning-based scAler), which employs PPO-based reinforcement learning with carefully designed state space, reward function, and action mapping. RELA achieves adaptive optimization between different application types and request frequencies, demonstrating superior performance in both single-function and multi-function scenarios and effectively addressing the limitations of static scaling parameters. + +(3) For efficient task scheduling, we develop \textbf{DECODS} (DEcoupled CO-scaling Data-aware Scheduler), which implements a three-stage scheduling approach comprising Task Collection, Scaling Nodes Selection, and Task Nodes Selection. DECODS introduces moderate pre-scheduling to reduce cold start time while avoiding over-allocation of resources. By considering DAG data transmission latency for optimized task placement, it achieves a balanced trade-off between pre-scheduling benefits and resource efficiency. + +(4) Our comprehensive experimental evaluation demonstrates that this integrated approach significantly improves system performance. The quality-price ratio improves by at least 50\% in single-function scenarios and 17\% in multi-function scenarios compared to state-of-the-art approaches. These results validate the effectiveness of our joint optimization strategy across various workload scales and application types. + + +\section{Background and Motivation} + +\subsection{Notations and Terms} + +Before discussing the challenges in current serverless systems, we first define several key terms used throughout this paper: + +\textbf{Function:} A single, stateless piece of application logic deployed to the cloud. Each function has its own code and resource configuration (memory, timeout, etc.) and is invoked on-demand in response to events or requests. For example, a function might resize an image or process a database query. + +\textbf{Instance:} The runtime environment (typically a container or virtual machine) that executes function tasks. An instance has loaded the function code and can serve multiple tasks concurrently. New instances may be started (incurring cold start delays) or terminated by the platform based on demand. + +\textbf{Task (Invocation):} A runtime instance of a function execution, triggered by a specific event or request. Each task runs the function code with given input and produces output. Tasks are the units of work that must be scheduled for execution. + +\textbf{Quality-Price Ratio:} A comprehensive metric that evaluates both system performance and resource efficiency, calculated as $QP = \frac{Performance}{Cost}$. Performance considers factors like request latency and throughput, while Cost accounts for resource consumption and instance hours. + +\textbf{Single-Function Applications:} These applications involve independent function invocations without dependencies. Common examples include API endpoints (e.g., HTTP request handlers), event processors (e.g., image resizing, video transcoding), and stateless microservices. While simpler to manage, they still require efficient scaling to handle varying request rates and optimal instance placement for load balancing. + +\textbf{Multi-Function Applications:} These applications compose multiple functions into complex DAGs, where the output of one function serves as input to others. Different applications exhibit varying characteristics in terms of computation and data transfer requirements. Video processing pipelines, for example, involve significant data movement as they transform raw uploads through decode, filter, encode, and thumbnail generation stages. Machine learning applications, on the other hand, often emphasize computational intensity, chaining functions from data preprocessing and feature extraction to model inference and result ranking. Data analytics pipelines combine both aspects, processing logs through collection, parsing, aggregation, and visualization stages, with varying demands on computation and data transfer at different stages. Each stage in these workflows represents a separate function, with both data and control dependencies flowing between them. + + +\subsection{Scaler and Scheduler} + +The core of serverless resource management consists of two key components: + +\textbf{Scaler:} The component responsible for dynamic resource management in serverless systems. It continuously monitors system metrics (e.g., request rates, CPU utilization) to determine both the appropriate number of function instances and their optimal placement across nodes. When workload increases, the scaler must decide not only how many new instances to launch, but also which nodes should host them, considering factors like node capacity, current load, and network conditions. Conversely, during periods of low demand, it identifies underutilized instances for termination while maintaining sufficient capacity for incoming requests. These scaling decisions directly impact both system performance (through cold start latency and execution efficiency) and cost (through resource utilization and instance hours). + +\textbf{Scheduler:} The component that assigns incoming tasks to available instances. When a task arrives, the scheduler must decide which instance should execute it, considering factors like instance availability, load balancing, and data locality. In modern serverless systems, schedulers must also handle complex application represented as DAGs (Directed Acyclic Graphs), where multiple functions have dependencies and data transfer requirements. + +The effectiveness of these components heavily depends on how they interact and coordinate their decisions. In practice, current serverless platforms have explored two fundamentally different architectural patterns. The \textbf{Global Scheduler Only} pattern relies solely on a global scheduler, without an explicit scaling component. When a task arrives, the scheduler assigns it to a node, triggering instance creation if necessary. The instance lifecycle is managed through node-local mechanisms: instances are automatically started when tasks are scheduled (incurring cold starts), and terminated based on local policies such as idle timeouts or cache eviction strategies. This approach simplifies the architecture but leaves scaling decisions to emerge from the combined effects of task placement and local instance management. \textbf{The Decoupled Scaler & Scheduler} pattern adopts a different approach, where separate scaling and scheduling components work independently. The scaler proactively manages instance counts based on global metrics, while the scheduler focuses on task placement among available instances. + +Figure \ref{fig:arch_patterns} illustrates these contrasting architectures and their decision flows. + +\begin{figure}[htbp] +\centerline{\includegraphics[width=1.0\linewidth]{ArchPatterns.png}} +\caption{Two predominant architectural patterns in serverless platforms: (a) Global Scheduler Only pattern, where task placement triggers instance creation and local policies handle termination; (b) Decoupled pattern, where explicit scaling decisions are made independently of task scheduling.} +\label{fig:arch_patterns} +\end{figure} + +\begin{figure}[htbp] +\centerline{\includegraphics[width=1.0\linewidth]{SimpleCompare.png}} +\caption{The metrics of three basic strategies in terms of average request latency, average request cost, and the number of instances tested under medium workload (The latency is composed of a dark part and a light part, with the dark part representing execution latency and the light part representing cold start latency; The specific details of the testing environment are described in Section \ref{sec:exp_env}.)} +\label{fig:simplecmp} +\end{figure} + + +To evaluate these patterns, we implemented three representative strategies and analyzed their performance (Figure \ref{fig:simplecmp}). For the Global Scheduler Only pattern, we tested two approaches: the \textbf{Hash-Based Strategy}, which maintains a single instance per function to minimize cold starts but suffers from high execution latency under load, and the \textbf{No-Scaler Greedy Strategy}, which creates multiple replicas but leads to over-provisioning. For the Decoupled pattern, we implemented the \textbf{HPA Scaler + Greedy Strategy}, which shows detection lag in scaling decisions and increased cold starts due to poor coordination. As shown in the experimental results, neither pattern achieves satisfactory performance, motivating the need for better coordination between scaling and scheduling decisions. + + +\subsection{Existing Improvements} + +\textbf{Advanced Scaling:} Systems like Hansel \cite{lstm_hansel} and Autopilot \cite{autopilot} have introduced predictive and learning-based scaling mechanisms to better handle dynamic workloads. However, these solutions still operate independently from scheduling decisions. + +\textbf{Improved Scheduling:} Platforms like FaaSFlow and FnSched \cite{fnsched} have developed DAG-aware and hybrid scheduling strategies. Yet, they typically lack coordination with scaling components. + +While these improvements have enhanced individual components, they fail to address the fundamental issue: the lack of coordination between scaling and scheduling decisions. This limitation motivates our investigation into a joint optimization approach that can bridge this gap. + diff --git a/src/data/dataitem.rs b/src/data/dataitem.rs deleted file mode 100644 index 3dc947e..0000000 --- a/src/data/dataitem.rs +++ /dev/null @@ -1,31 +0,0 @@ -/// A waiter for tracking completion of all write split data tasks -pub struct WriteSplitDataWaiter { - rx: broadcast::Receiver<()>, - total_tasks: usize, -} - -impl WriteSplitDataWaiter { - /// Wait for all tasks to complete - pub async fn wait(mut self) -> WSResult<()> { - let mut completed = 0; - while completed < self.total_tasks { - self.rx.recv().await.map_err(|e| { - WsDataError::WaitTaskError { - reason: format!("Failed to receive task completion: {}", e) - } - })?; - completed += 1; - } - Ok(()) - } -} - -impl Handle { - /// Gets a waiter that will complete when all tasks are finished - pub fn get_all_tasks_waiter(&self) -> WriteSplitDataWaiter { - WriteSplitDataWaiter { - rx: self.task_complete_tx.subscribe(), - total_tasks: self.tasks.lock().unwrap().len(), - } - } -} \ No newline at end of file diff --git a/src/data/write_split.rs b/src/data/write_split.rs deleted file mode 100644 index 22bf8b2..0000000 --- a/src/data/write_split.rs +++ /dev/null @@ -1,40 +0,0 @@ -/// A waiter for tracking completion of all write split data tasks -pub struct WriteSplitDataWaiter { - rx: broadcast::Receiver<()>, - total_tasks: usize, -} - -impl WriteSplitDataWaiter { - /// Wait for all tasks to complete - pub async fn wait(mut self) -> WSResult<()> { - let mut completed = 0; - while completed < self.total_tasks { - self.rx.recv().await.map_err(|e| { - WsDataError::WaitTaskError { - reason: format!("Failed to receive task completion: {}", e) - } - })?; - completed += 1; - } - Ok(()) - } -} - -impl Handle { - /// Gets a waiter that will complete when all tasks are finished - pub fn get_all_tasks_waiter(&self) -> WriteSplitDataWaiter { - WriteSplitDataWaiter { - rx: self.task_complete_tx.subscribe(), - total_tasks: self.tasks.lock().unwrap().len(), - } - } -} - -// 需要在 errors.rs 中添加新的错误类型 -#[derive(Debug)] -pub enum WsDataError { - // ... existing errors ... - WaitTaskError { - reason: String, - }, -} \ No newline at end of file diff --git a/src/errors.rs b/src/errors.rs deleted file mode 100644 index b4aca1a..0000000 --- a/src/errors.rs +++ /dev/null @@ -1,18 +0,0 @@ -#[derive(Debug)] -pub enum WsDataError { - // ... existing errors ... - WaitTaskError { - reason: String, - }, -} - -impl std::fmt::Display for WsDataError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - // ... existing error matches ... - WsDataError::WaitTaskError { reason } => { - write!(f, "Failed to wait for tasks: {}", reason) - } - } - } -} \ No newline at end of file diff --git a/src/main/src/general/app/mod.rs b/src/main/src/general/app/mod.rs index a0d57a8..01859e8 100644 --- a/src/main/src/general/app/mod.rs +++ b/src/main/src/general/app/mod.rs @@ -6,7 +6,6 @@ pub mod instance; pub mod m_executor; pub mod v_os; -use std::path::PathBuf; use super::data::m_data_general::{DataSetMetaV2, GetOrDelDataArg, GetOrDelDataArgType}; use super::m_os::APPS_REL_DIR; use crate::general::app::app_native::native_apps; @@ -1018,40 +1017,20 @@ impl AppMetaManager { // mv temp app to formal app dir let rel_app_dir = format!("{}/{}", APPS_REL_DIR, appname); - // 修改前: let formal_app_dir = self.view.os().file_path.join(rel_app_dir); rel_app_dir是字符串类型发生所有权转移,然而在下方还被使用了,选择修改为clone 曾俊 - let formal_app_dir = self.view.os().file_path.join(rel_app_dir.clone()); - //let _ = fs::rename(&tmpappdir, &formal_app_dir).map_err(|e| WSError::from(WsDataError::FileOpenErr { path: (), err: () })); - //虞光勇修改:因为在调用 fs::rename 并处理其结果时遇到了类型不匹配的问题。具体来说, - // 在构造WsDataError::FileOpenErr 时,path 字段的类型不匹配:期望的是 PathBuf 类型,但实际传入的是 ()(即单元类型)。 - //修改: - // let result = fs::rename(&tmpappdir, &formal_app_dir).map_err(|e| { - // 这里result变量下方没有再使用 加了一个标志 曾俊 - let _result = fs::rename(&tmpappdir, &formal_app_dir).map_err(|e| { - WSError::from(WsDataError::FileOpenErr { - path: PathBuf::from(formal_app_dir.clone()), - err: e, - }) - }); + let formal_app_dir = self.view.os().file_path.join(rel_app_dir); + let _ = fs::rename(&tmpappdir, &formal_app_dir).map_err(|e| WSError::from(WsDataError::FileOpenErr { path: (), err: () })); // 3. broadcast meta and appfile let write_data_id = format!("{}{}", DATA_UID_PREFIX_APP_META, appname); let write_datas = vec![ DataItemArgWrapper::from_bytes(bincode::serialize(&appmeta).unwrap()), - //DataItemArgWrapper::from_file(rel_app_dir), - //虞光勇修改,因为编译器提示在调用 DataItemArgWrapper::from_file 方法时,传递的参数类型不匹配。 - // 具体来说,from_file 方法期望的是一个 PathBuf 类型的参数,但你传递的是一个 String 类型。 - //修改后: - //DataItemArgWrapper::from_file(rel_app_dir.into()), - //这里的 from_file 方法返回一个 Result, - // 但你直接将其赋值给一个期望 DataItemArgWrapper 类型的变量或参数,导致类型不匹配。使用 ? 操作符 - //DataItemArgWrapper::from_file(rel_app_dir.into())?, - DataItemArgWrapper::from_file(rel_app_dir.into())?, + DataItemArgWrapper::from_file(rel_app_dir), ]; tracing::debug!( "app data size: {:?}", write_datas .iter() - // 修改前:.map(|v| v.to_string()) 去掉了这一行,为结构体派生了debug特征 曾俊 + .map(|v| v.to_string()) .collect::>() ); self.view @@ -1071,15 +1050,14 @@ impl AppMetaManager { } pub fn set_app_meta_list(&self, list: Vec) { - //发送逻辑处理 曾俊 self.view - .kv_store_engine() - .set( - KeyTypeServiceList, - &serde_json::to_string(&list).unwrap().into(), - false, - ) - .todo_handle("This part of the code needs to be implemented."); + .kv_store_engine() + .set( + KeyTypeServiceList, + &serde_json::to_string(&list).unwrap().into(), + false, + ) + .todo_handle(); } pub fn get_app_meta_list(&self) -> Vec { let res = self diff --git a/src/main/src/general/data/m_data_general/batch.rs b/src/main/src/general/data/m_data_general/batch.rs index 4600187..e27d3cd 100644 --- a/src/main/src/general/data/m_data_general/batch.rs +++ b/src/main/src/general/data/m_data_general/batch.rs @@ -96,12 +96,10 @@ impl DataGeneral { }), dataset_unique_id: unique_id.clone(), data_item_idx: 0, // 因为是整体传输,所以使用0 - // block_type: match data.as_ref() { - // DataItemSource::Memory { .. } => proto::BatchDataBlockType::Memory as i32, - // DataItemSource::File { .. } => proto::BatchDataBlockType::File as i32, - // }, - //原代码block_type: data.as_ref(), 类型不匹配,使用自定义的to_data_item函数转化 曾俊 - block_type: Some(data.to_data_item()), + block_type: match data.as_ref() { + DataItemSource::Memory { .. } => proto::BatchDataBlockType::Memory as i32, + DataItemSource::File { .. } => proto::BatchDataBlockType::File as i32, + }, block_index: block_idx as u32, data: block_data, operation: proto::DataOpeType::Write as i32, diff --git a/src/main/src/general/data/m_data_general/dataitem.rs b/src/main/src/general/data/m_data_general/dataitem.rs index fd2c083..757c165 100644 --- a/src/main/src/general/data/m_data_general/dataitem.rs +++ b/src/main/src/general/data/m_data_general/dataitem.rs @@ -1,16 +1,9 @@ use crate::general::data::m_data_general::UniqueId; -use crate::LogicalModulesRef;//虞光勇修改,修改内容:增加use crate::LogicalModulesRef;来导入 LogicalModulesRef。 -use ::zip::CompressionMethod;//虞光勇修改,因为编译器无法找到 zip 模块中的 CompressionMethod,需加入头文件(860续) -use crate::general::m_os::OperatingSystem; use crate::general::network::proto; use crate::general::data::m_data_general::{DataItemIdx, DataSplitIdx, GetOrDelDataArgType}; use crate::general::network::proto_ext::{NewPartialFileDataArg, ProtoExtDataItem}; -use crate::logical_module_view_impl; -use crate::modules_global_bridge::try_get_modules_ref; -use crate::result::{WSError, WSResult, WSResultExt, WsDataError}; -use crate::util::zip; +use crate::result::{WSError, WSResult, WsDataError}; use futures::stream::{FuturesUnordered, StreamExt}; -use std::cell::RefCell; use std::collections::btree_set; use std::ops::Range; use std::path::PathBuf; @@ -20,13 +13,9 @@ use tokio::sync::mpsc; use tokio::sync::broadcast; use tracing; use base64::{engine::general_purpose::STANDARD, Engine as _}; -use std::sync::RwLock; const DEFAULT_BLOCK_SIZE: usize = 4096; -logical_module_view_impl!(DataItemView); -logical_module_view_impl!(DataItemView,os,OperatingSystem); - /// 用于遍历数据项索引的迭代器 #[derive(Debug)] pub(super) enum WantIdxIter<'a> { @@ -240,13 +229,10 @@ pub struct WriteSplitTaskResult { pub enum WriteSplitDataTaskGroup { /// 文件写入模式 ToFile { - is_dir: bool, /// 任务唯一标识 unique_id: UniqueId, - /// 临时文件路径,用作传输 - tmp_file_path: PathBuf, - /// 目标文件路径, 用作最终使用 - target_file_path: PathBuf, + /// 目标文件路径 + file_path: PathBuf, /// 任务列表 tasks: Vec>, /// 接收新任务的通道 @@ -262,18 +248,8 @@ pub enum WriteSplitDataTaskGroup { ToMem { /// 任务唯一标识 unique_id: UniqueId, - - - // /// 共享内存区域 - // shared_mem: RefCell>>, - /// 费新文修改,修改内容:shared_mem: RefCell>>, - /// 修改原因:shared_mem: RefCell>>, 需要修改为 RefCell>, - /// 修改后:shared_mem: RefCell>, /// 共享内存区域 - /// - // shared_mem: RefCell>, 修改为RwLock>, 曾俊 - shared_mem: RwLock>, - + shared_mem: SharedMemHolder, /// 任务列表 tasks: Vec>, /// 接收新任务的通道 @@ -292,36 +268,30 @@ impl WriteSplitDataTaskGroup { pub async fn new( unique_id: UniqueId, total_size: usize, - block_type: proto::data_item::DataItemDispatch, + block_type: proto::BatchDataBlockType, version: u64, - // file_name: Option<&str>, 函数体并没有用到这个参数 查看引用发现也没有使用到这个参数 这里直接删除 曾俊 ) -> WSResult<(Self, WriteSplitDataTaskHandle)> { let (tx, rx) = mpsc::channel(32); let (broadcast_tx, _) = broadcast::channel::<()>(32); let broadcast_tx = Arc::new(broadcast_tx); - // let pathbase=DataItemView::new(try_get_modules_ref().todo_handle("Failed to get modules ref when create WriteSplitDataTaskGroup")?).os().file_path; - //所有权发生变化 添加克隆方法 曾俊 - let pathbase=DataItemView::new(try_get_modules_ref().todo_handle("Failed to get modules ref when create WriteSplitDataTaskGroup")?).os().file_path.clone(); match block_type { - proto::data_item::DataItemDispatch::File(file_data) => { - let tmp_file_path = pathbase.join(format!("{}.data", + proto::BatchDataBlockType::File => { + let file_path = PathBuf::from(format!("{}.data", STANDARD.encode(&unique_id))); let handle = WriteSplitDataTaskHandle { tx, write_type: WriteSplitDataType::File { - path: tmp_file_path.clone(), + path: file_path.clone(), }, version, broadcast_tx: broadcast_tx.clone(), }; let group = Self::ToFile { - is_dir: file_data.is_dir_opt, unique_id, - tmp_file_path, - target_file_path: pathbase.join(file_data.file_name_opt.as_str()), + file_path, tasks: Vec::new(), rx, expected_size: total_size, @@ -331,7 +301,7 @@ impl WriteSplitDataTaskGroup { Ok((group, handle)) } - proto::data_item::DataItemDispatch::RawBytes(_) => { + proto::BatchDataBlockType::Memory => { let shared_mem = SharedMemHolder { data: Arc::new(vec![0; total_size]), }; @@ -347,8 +317,7 @@ impl WriteSplitDataTaskGroup { let group = Self::ToMem { unique_id, - // 原代码:shared_mem, 类型不匹配 曾俊 - shared_mem:RwLock::new(Some(shared_mem)), + shared_mem, tasks: Vec::new(), rx, expected_size: total_size, @@ -380,7 +349,7 @@ impl WriteSplitDataTaskGroup { loop { // 1. 检查完成状态 - match self.try_complete().await.todo_handle("Failed to complete write split data tasks")? { + match self.try_complete()? { Some(item) => return Ok(item), None => {} // 继续等待 } @@ -421,9 +390,9 @@ impl WriteSplitDataTaskGroup { /// - Ok(Some(item)) - 写入完成,返回数据项 /// - Ok(None) - 写入未完成 /// - Err(e) - 写入出错 - async fn try_complete(&self) -> WSResult> { + fn try_complete(&self) -> WSResult> { match self { - Self::ToFile { current_size, expected_size, tmp_file_path, target_file_path, unique_id, is_dir, .. } => { + Self::ToFile { current_size, expected_size, file_path, unique_id, .. } => { if *current_size > *expected_size { Err(WSError::WsDataError(WsDataError::BatchTransferError { request_id: proto::BatchRequestId { @@ -434,44 +403,7 @@ impl WriteSplitDataTaskGroup { current_size, expected_size, unique_id) })) } else if *current_size == *expected_size { - if *is_dir{ - // unzip to file_path - // - open received file with std api - let file=std::fs::File::open(tmp_file_path).map_err(|e|{ - tracing::error!("Failed to open file: {}", e); - WSError::from(WsDataError::FileOpenErr { - path: tmp_file_path.clone(), - err: e, - }) - })?; - let tmp_file_path=tmp_file_path.clone(); - let target_file_path=target_file_path.clone(); - tokio::task::spawn_blocking(move || - zip_extract::extract(file,target_file_path.as_path() , false).map_err(|e|{ - WSError::from(WsDataError::UnzipErr { - path: tmp_file_path, - err: e, - }) - }) - ).await.unwrap().todo_handle("Failed to unzip file")?; - }else{ - // rename tmp_file_path to target_file_path - std::fs::rename(tmp_file_path, target_file_path).map_err(|e|{ - tracing::error!("Failed to rename file: {}", e); - WSError::from(WsDataError::FileRenameErr { - from: tmp_file_path.clone(), - to: target_file_path.clone(), - err: e, - }) - })?; - } - Ok(Some(proto::DataItem{ - data_item_dispatch: Some(proto::data_item::DataItemDispatch::File(proto::FileData{ - file_name_opt: target_file_path.to_string_lossy().to_string(), - is_dir_opt: *is_dir, - file_content: vec![], - })), - })) + Ok(Some(proto::DataItem::new_file_data(file_path.clone(), false))) } else { Ok(None) } @@ -487,11 +419,7 @@ impl WriteSplitDataTaskGroup { current_size, expected_size, unique_id) })) } else if *current_size == *expected_size { - Ok(Some(proto::DataItem{ - //曾俊 随RwLock数据类型改动 - // data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes(shared_mem.borrow_mut().take().unwrap().try_take_data().expect("only group can take data once"))), - data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes(shared_mem.write().expect("Failed to lock RwLock for writing").take().unwrap().try_take_data().expect("only group can take data once"))), - })) + Ok(Some(proto::DataItem::new_raw_bytes(shared_mem.clone()))) } else { Ok(None) } @@ -500,25 +428,6 @@ impl WriteSplitDataTaskGroup { } } -/// 简化的任务完成等待器 -pub struct WriteSplitDataWaiter { - rx: broadcast::Receiver<()>, -} - -impl WriteSplitDataWaiter { - /// 等待所有任务完成 - pub async fn wait(mut self) -> WSResult<()> { - // 持续接收直到通道关闭 - while let Ok(_) = self.rx.recv().await { - // 不需要处理具体消息内容,只需要知道有消息到达 - } - - // 通道关闭表示所有发送端都已释放 - Ok(()) - } -} - - /// 写入分片任务的句柄 /// 用于提交新的分片任务和等待任务完成 #[derive(Clone)] @@ -542,12 +451,6 @@ impl WriteSplitDataTaskHandle { self.version } - pub fn get_all_tasks_waiter(&self) -> WriteSplitDataWaiter { - WriteSplitDataWaiter { - rx: self.broadcast_tx.subscribe(), - } - } - /// 提交新的分片任务 /// /// # 参数 @@ -559,8 +462,7 @@ impl WriteSplitDataTaskHandle { /// * `Err(e)` - 任务提交失败,可能是通道已关闭 pub async fn submit_split(&self, idx: DataSplitIdx, data: proto::DataItem) -> WSResult<()> { let task = match &self.write_type { - // WriteSplitDataType::File { path } | WriteSplitDataType::Dir { path } => { 原WriteSplitDataType::Dir忽视了zip_file字段 发现没有用到修改为直接忽视 曾俊 - WriteSplitDataType::File { path } | WriteSplitDataType::Dir { path ,..} => { + WriteSplitDataType::File { path } => { let path = path.clone(); let offset = idx; let data = data.as_raw_bytes().unwrap_or(&[]).to_vec(); @@ -652,16 +554,6 @@ impl WriteSplitDataTaskHandle { Ok(()) } - - // 在任务处理逻辑中保持发送端的引用 - pub async fn process_tasks(&mut self) -> WSResult<()> { - let _tx_holder = self.broadcast_tx.clone(); // 保持发送端存活 - - // ...任务处理逻辑... - - // 当所有任务完成,_tx_holder被释放,广播通道自动关闭 - Ok(()) -} } #[derive(Debug)] @@ -699,21 +591,6 @@ impl DataItemSource { } } - //添加一个DataItemSource转换到DataItem的函数 曾俊 - pub fn to_data_item(&self) -> proto::DataItem { - match self { - DataItemSource::Memory { data } => proto::DataItem { - data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes(data.clone())), - }, - DataItemSource::File { path } => proto::DataItem { - data_item_dispatch: Some(proto::data_item::DataItemDispatch::File(proto::FileData { - file_name_opt: path.to_str().map_or_else(|| String::from(""), |s| s.to_string()), // 这里需要根据实际情况调整类型转换 - ..Default::default() // 假设 FileData 有其他字段,这里使用默认值 - })), - }, - } - } - pub async fn size(&self) -> WSResult { match self { DataItemSource::Memory { data } => Ok(data.len()), @@ -731,13 +608,13 @@ impl DataItemSource { } } } - - // pub fn block_type(&self) -> proto::BatchDataBlockType { - // match self { - // DataItemSource::Memory { .. } => proto::BatchDataBlockType::Memory, - // DataItemSource::File { .. } => proto::BatchDataBlockType::File, - // } - // } + + pub fn block_type(&self) -> proto::BatchDataBlockType { + match self { + DataItemSource::Memory { .. } => proto::BatchDataBlockType::Memory, + DataItemSource::File { .. } => proto::BatchDataBlockType::File, + } + } pub async fn get_block(&self, block_idx: usize) -> WSResult> { match self { @@ -818,7 +695,7 @@ impl DataItemExt for DataItemSource { } } -#[derive(Debug, Clone)] +#[derive(Debug)] enum DataItemZip { /// 未初始化状态 Uninitialized, @@ -830,8 +707,6 @@ enum DataItemZip { } } -//派生显示特征 曾俊 -#[derive(Debug, Clone)] pub struct DataItemArgWrapper { pub dataitem: proto::DataItem, /// 目录压缩状态 @@ -839,41 +714,22 @@ pub struct DataItemArgWrapper { } impl DataItemArgWrapper { - - // 根据传入的DataItem类型新建一个DataItemArgWrapper实例, tmpzipfile默认为Uninitialized。 曾俊 - pub fn new(value: Vec) -> Self { - DataItemArgWrapper { - dataitem:proto::DataItem {data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes(value))}, - tmpzipfile: DataItemZip::Uninitialized, - } - } - pub fn from_file(filepath: PathBuf) -> WSResult { - let view=DataItemView::new(try_get_modules_ref().map_err(|err|{ - tracing::error!("Failed to get modules ref: {}", err); - err - })?); - - //let abs_filepath=view.os().abs_file_path(filepath); - //虞光勇修改 添加.clone() - let abs_filepath=view.os().abs_file_path(filepath.clone()); - - Ok(Self { + pub fn from_file(filepath: PathBuf) -> Self { + Self { dataitem: proto::DataItem{ data_item_dispatch: Some(proto::data_item::DataItemDispatch::File(proto::FileData{ - is_dir_opt: abs_filepath.is_dir(), + is_dir_opt: filepath.is_dir(), file_name_opt: filepath.to_str().unwrap().to_string(), file_content: vec![], })), }, tmpzipfile: DataItemZip::Uninitialized, - }) + } } pub fn from_bytes(bytes: Vec) -> Self { Self { - dataitem: proto::DataItem{ - data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes(bytes)), - }, + dataitem: proto::DataItem::new_raw_bytes(bytes), tmpzipfile: DataItemZip::Uninitialized, } } @@ -927,8 +783,7 @@ impl DataItemArgWrapper { // 压缩目录到临时文件 crate::util::zip::zip_dir_2_file( &filedata.file_name_opt, - //zip::CompressionMethod::Stored, - CompressionMethod::Stored,//(续)虞光勇修改,修改内容删除zip:: + zip::CompressionMethod::Stored, tmp_file.into_file(), ).await?; diff --git a/src/main/src/general/data/m_data_general/mod.rs b/src/main/src/general/data/m_data_general/mod.rs index 09892f3..b16fe88 100644 --- a/src/main/src/general/data/m_data_general/mod.rs +++ b/src/main/src/general/data/m_data_general/mod.rs @@ -7,8 +7,7 @@ pub mod batch_handler; use crate::general::data::m_data_general::dataitem::{calculate_splits, WantIdxIter, WriteSplitDataTaskGroup, DataItemSource}; use crate::general::data::m_data_general::batch_handler::{BatchReceiveState, SharedWithBatchHandler}; -use crate::general::network::proto::DataItem; -use dataitem::{DataItemArgWrapper, WriteSplitTaskResult}; +use dataitem::DataItemArgWrapper; use tokio::io::{AsyncSeekExt, AsyncReadExt}; use crate::general::{ @@ -211,14 +210,9 @@ impl DataGeneral { }), dataset_unique_id: unique_id.clone(), data_item_idx: data_item_idx as u32, - // 用空的 DataItem 代替 block_type: match data.as_ref() { - DataItemSource::Memory { .. } => Some(proto::DataItem{ - data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes(Vec::new())), - }), - DataItemSource::File { .. } => Some(proto::DataItem{ - data_item_dispatch: Some(proto::data_item::DataItemDispatch::File(proto::FileData { file_name_opt: String::new(), is_dir_opt: true, file_content: Vec::new() })), - }), + DataItemSource::Memory { .. } => proto::BatchDataBlockType::Memory as i32, + DataItemSource::File { .. } => proto::BatchDataBlockType::File as i32, }, block_index: block_idx as u32, data: block_data, @@ -495,7 +489,7 @@ impl DataGeneral { pub async fn write_data( &self, unique_id: impl Into>, - mut datas: Vec, + datas: Vec, context_openode_opetype_operole: Option<( NodeID, proto::DataOpeType, @@ -524,8 +518,7 @@ impl DataGeneral { unique_id: unique_id.clone(), context: context_openode_opetype_operole.map(|(node, ope, role)| { proto::DataScheduleContext { - // each_data_sz_bytes: data_transfer_sizes, 原代码类型不匹配 曾俊 - each_data_sz_bytes: data_transfer_sizes.iter().map(|&x| x as u32).collect(), + each_data_sz_bytes: data_transfer_sizes, ope_node: node as i64, ope_type: ope as i32, ope_role: Some(role), @@ -544,7 +537,7 @@ impl DataGeneral { // 处理每个数据项 let mut iter = WantIdxIter::new(&GetOrDelDataArgType::All, datas.len() as u8); while let Some(data_item_idx) = iter.next() { - let data_item: &DataItemArgWrapper = &mut datas[data_item_idx as usize]; + let data_item: &DataItemArgWrapper = &datas[data_item_idx as usize]; let split = &splits[data_item_idx as usize]; let mut primary_tasks = Vec::new(); @@ -556,11 +549,7 @@ impl DataGeneral { log_tag, split_idx + 1, split.splits.len(), split_info.node_id, split_info.data_offset, split_info.data_size); let split_info = split_info.clone(); let unique_id_clone = unique_id.clone(); - // let data_item_primary = data_item.clone_split_range(split_info.data_offset..split_info.data_offset+split_info.data_size); 类型不匹配 曾俊 - // 生成一个复制的可变数据项 - let mut data_item_clone = (*data_item).clone(); - let data_item_primary = data_item_clone.clone_split_range(split_info.data_offset as usize..(split_info.data_offset+split_info.data_size)as usize).await.todo_handle("clone_split_range for write data err")?; - // let data_item_primary = data_item.clone_split_range(split_info.data_offset as usize..(split_info.data_offset+split_info.data_size)as usize).await.todo_handle("clone_split_range for write data err")?; + let data_item_primary = data_item.clone_split_range(split_info.data_offset..split_info.data_offset+split_info.data_size) let view = self.view.clone(); let version_copy = version; let task = tokio::spawn(async move { @@ -574,7 +563,6 @@ impl DataGeneral { version: version_copy, data: vec![proto::DataItemWithIdx { idx: data_item_idx as u32, - // data: Some(data_item_primary), 类型不匹配 曾俊 data: Some(data_item_primary), }], }, @@ -611,8 +599,7 @@ impl DataGeneral { let task = tokio::spawn(async move { let _permit = permit; // 持有permit直到任务完成 view.data_general() - // .write_data_batch(unique_id_clone.clone(), version, data_item_cache, data_item_idx, node_id) //类型不匹配 曾俊 - .write_data_batch(unique_id_clone.clone(), version, data_item_cache.dataitem, data_item_idx, node_id) + .write_data_batch(unique_id_clone.clone(), version, data_item_cache, data_item_idx, node_id) .await?; Ok::(proto::WriteOneDataResponse { remote_version: version, @@ -659,26 +646,25 @@ impl DataGeneral { let fail_by_overwrite = || async { let message = "New data version overwrite".to_owned(); tracing::warn!("{}", message); - - responsor //返回结果未处理 曾俊 + responsor .send_resp(WriteOneDataResponse { remote_version: 0, success: false, message, }) .await - .todo_handle("1 err_comment waitting to fill"); + .todo_handle(); }; let fail_with_msg = |message: String| async { tracing::warn!("{}", message); - responsor //返回结果未处理 曾俊 + responsor .send_resp(WriteOneDataResponse { remote_version: 0, success: false, message, }) .await - .todo_handle("2 err_comment waitting to fill"); + .todo_handle(); }; loop { @@ -779,7 +765,7 @@ impl DataGeneral { || check_meta.as_ref().unwrap().0 != required_meta.as_ref().unwrap().0 { drop(guard); - responsor //返回结果未处理 曾俊 + responsor .send_resp(WriteOneDataResponse { remote_version: if check_meta.is_none() { 0 @@ -790,7 +776,7 @@ impl DataGeneral { message: "meta is updated again, cancel write".to_owned(), }) .await - .todo_handle("3 err_comment waitting to fill"); + .todo_handle(); return; } @@ -820,14 +806,14 @@ impl DataGeneral { kv_store_engine.flush(); drop(guard); tracing::debug!("data partial is written"); - responsor //返回结果未使用 曾俊 + responsor .send_resp(WriteOneDataResponse { remote_version: req.version, success: true, message: "".to_owned(), }) .await - .todo_handle("4 err_comment waitting to fill"); + .todo_handle(); } async fn rpc_handle_data_meta_update( @@ -867,24 +853,24 @@ impl DataGeneral { drop(_kv_write_lock_guard); let err_msg = "New data version is smaller, failed update"; tracing::warn!("{}", err_msg); - responsor //返回结果未处理 曾俊 + responsor .send_resp(proto::DataMetaUpdateResponse { version: old_meta.version, message: err_msg.to_owned(), }) .await - .todo_handle("5 err_comment waitting to fill"); + .todo_handle(); return; } old_meta.version = req.version; if req.serialized_meta.len() > 0 { - self.view.kv_store_engine() //返回结果未处理 曾俊 + self.view.kv_store_engine() .set_raw(&keybytes, std::mem::take(&mut req.serialized_meta), true) - .todo_handle("6 err_comment waitting to fill"); + .todo_handle(); } else { - self.view.kv_store_engine() //返回结果未处理 曾俊 + self.view.kv_store_engine() .set(key, &old_meta, true) - .todo_handle("7 err_comment waitting to fill"); + .todo_handle(); } } else { if req.serialized_meta.len() > 0 { @@ -892,32 +878,32 @@ impl DataGeneral { "set new meta data, {:?}", bincode::deserialize::(&req.serialized_meta) ); - self.view.kv_store_engine() //返回结果未处理 曾俊 + self.view.kv_store_engine() .set_raw(&keybytes, std::mem::take(&mut req.serialized_meta), true) - .todo_handle("8 err_comment waitting to fill"); + .todo_handle(); } else { drop(_kv_write_lock_guard); let err_msg = "Old meta data not found and missing new meta"; tracing::warn!("{}", err_msg); - responsor //返回结果未处理 曾俊 + responsor .send_resp(proto::DataMetaUpdateResponse { version: 0, message: err_msg.to_owned(), }) .await - .todo_handle("9 err_comment waitting to fill"); + .todo_handle(); return; } } drop(_kv_write_lock_guard); tracing::debug!("rpc_handle_data_meta_update success"); - responsor //返回结果未处理 曾俊 + responsor .send_resp(proto::DataMetaUpdateResponse { version: req.version, message: "Update success".to_owned(), }) .await - .todo_handle("10 err_comment waitting to fill"); + .todo_handle(); } async fn rpc_handle_get_data_meta( @@ -1035,7 +1021,7 @@ impl DataGeneral { Ok(()) } - // 处理批量数据写入请求 + /// 处理批量数据写入请求 pub async fn rpc_handle_batch_data( &self, responsor: RPCResponsor, @@ -1052,11 +1038,10 @@ impl DataGeneral { let state = match self.batch_receive_states .get_or_init(req.request_id.clone().unwrap(), async move { // 创建任务组和句柄 - let (mut group, handle) = match WriteSplitDataTaskGroup::new( + let (mut group, handle) = match WriteSplitDataTaskGroup::new( req.unique_id.clone(), req.total_size as usize, - // req.block_type(), 类型错误 曾俊 - req.block_type.unwrap().data_item_dispatch.unwrap(), + req.block_type(), req.version, ).await { Ok((group, handle)) => (group, handle), @@ -1067,7 +1052,7 @@ impl DataGeneral { }; // 再process之前订阅,避免通知先于订阅 - let waiter = handle.get_all_tasks_waiter(); + let mut waiter = handle.get_all_tasks_waiter(); // 启动process_tasks let _ = tokio::spawn(async move { @@ -1588,9 +1573,9 @@ impl LogicalModule for DataGeneral { .regist(p2p, move |responsor, req| { let view = view.clone(); let _ = tokio::spawn(async move { - view.data_general().rpc_handle_get_data_meta(req, responsor) //返回结果未处理 曾俊 + view.data_general().rpc_handle_get_data_meta(req, responsor) .await - .todo_handle("rpc_handle_get_data_meta err"); + .todo_handle(); }); Ok(()) }); diff --git a/src/main/src/general/data/m_data_general/mod.rs.bak b/src/main/src/general/data/m_data_general/mod.rs.bak deleted file mode 100644 index 6831f09..0000000 --- a/src/main/src/general/data/m_data_general/mod.rs.bak +++ /dev/null @@ -1,1616 +0,0 @@ -/// 缓存模式类型 -pub type CacheMode = u16; - -pub mod dataitem; -pub mod batch; -pub mod batch_handler; - -use crate::general::data::m_data_general::dataitem::{calculate_splits, WantIdxIter, WriteSplitDataTaskGroup, DataItemSource}; -use crate::general::data::m_data_general::batch_handler::{BatchReceiveState, SharedWithBatchHandler}; -use crate::general::network::proto::DataItem; -use dataitem::DataItemArgWrapper; -use tokio::io::{AsyncSeekExt, AsyncReadExt}; - -use crate::general::{ - data::m_kv_store_engine::{ - KeyTypeDataSetItem, KeyTypeDataSetMeta, KvAdditionalConf, KvStoreEngine, KvVersion, - }, - m_os::OperatingSystem, - network::{ - m_p2p::{P2PModule, RPCCaller, RPCHandler, RPCResponsor}, - proto::{ - self, DataMeta, WriteOneDataResponse, - }, - proto_ext::ProtoExtDataItem, - }, -}; -use crate::{ - general::{ - data::m_kv_store_engine::{KeyLockGuard, KeyType}, - network::{proto_ext::DataItemExt}, - }, - logical_module_view_impl, - result::{WSError, WSResult, WSResultExt, WsSerialErr, WsNetworkLogicErr}, - sys::{LogicalModule, LogicalModuleNewArgs, NodeID}, - util::{JoinHandleWrapper, container::async_init_map::AsyncInitMap}, -}; -use crate::{result::WsDataError, sys::LogicalModulesRef}; -use async_trait::async_trait; -use camelpaste::paste; -use core::str; - -use serde::{Deserialize, Serialize}; -use std::{ - collections::{BTreeSet, HashMap, HashSet}, - sync::Arc, - time::Duration, - sync::atomic::{AtomicU32, Ordering}, -}; -use tokio::sync::Semaphore; -use tokio::task::JoinError; -use ws_derive::LogicalModule; - -logical_module_view_impl!(DataGeneralView); -logical_module_view_impl!(DataGeneralView, p2p, P2PModule); -logical_module_view_impl!(DataGeneralView, data_general, DataGeneral); -logical_module_view_impl!(DataGeneralView, kv_store_engine, KvStoreEngine); -logical_module_view_impl!(DataGeneralView, os, OperatingSystem); - -pub type DataVersion = u64; -pub type DataItemIdx = u8; - -pub const DATA_UID_PREFIX_APP_META: &str = "app"; -pub const DATA_UID_PREFIX_FN_KV: &str = "fkv"; - -/// 默认数据块大小 (4MB) -pub const DEFAULT_BLOCK_SIZE: usize = 4 * 1024 * 1024; - -pub const CACHE_MODE_TIME_MASK: u16 = 0xf000; -pub const CACHE_MODE_TIME_FOREVER_MASK: u16 = 0x0fff; -pub const CACHE_MODE_TIME_AUTO_MASK: u16 = 0x1fff; - -pub const CACHE_MODE_POS_MASK: u16 = 0x0f00; -pub const CACHE_MODE_POS_ALLNODE_MASK: u16 = 0xf0ff; -pub const CACHE_MODE_POS_SPECNODE_MASK: u16 = 0xf1ff; -pub const CACHE_MODE_POS_AUTO_MASK: u16 = 0xf2ff; - -pub const CACHE_MODE_MAP_MASK: u16 = 0x00f0; -pub const CACHE_MODE_MAP_COMMON_KV_MASK: u16 = 0xff0f; -pub const CACHE_MODE_MAP_FILE_MASK: u16 = 0xff1f; -// const DATA_UID_PREFIX_OBJ: &str = "obj"; - -pub fn new_data_unique_id_app(app_name: &str) -> String { - format!("{}{}", DATA_UID_PREFIX_APP_META, app_name) -} - -pub fn new_data_unique_id_fn_kv(key: &[u8]) -> Vec { - let mut temp = DATA_UID_PREFIX_FN_KV.as_bytes().to_owned(); - temp.extend(key); - temp - // let key_str = str::from_utf8(key).unwrap(); - // format!("{}{}", DATA_UID_PREFIX_FN_KV, key_str) -} - -/// 唯一标识符类型 -pub type UniqueId = Vec; - -#[derive(LogicalModule)] -pub struct DataGeneral { - view: DataGeneralView, - pub rpc_call_data_version_schedule: RPCCaller, - rpc_call_write_once_data: RPCCaller, - rpc_call_batch_data: RPCCaller, - rpc_call_get_data_meta: RPCCaller, - rpc_call_get_data: RPCCaller, - - rpc_handler_write_once_data: RPCHandler, - rpc_handler_batch_data: RPCHandler, - rpc_handler_data_meta_update: RPCHandler, - rpc_handler_get_data_meta: RPCHandler, - rpc_handler_get_data: RPCHandler, - - // 批量数据接收状态管理 - batch_receive_states: AsyncInitMap>, -} - -impl DataGeneral { - pub fn inner_new(args: LogicalModuleNewArgs) -> Self { - Self { - view: DataGeneralView::new(args.logical_modules_ref.clone()), - rpc_call_data_version_schedule: RPCCaller::new(), - rpc_call_write_once_data: RPCCaller::new(), - rpc_call_batch_data: RPCCaller::new(), - rpc_call_get_data_meta: RPCCaller::new(), - rpc_call_get_data: RPCCaller::new(), - rpc_handler_write_once_data: RPCHandler::new(), - rpc_handler_batch_data: RPCHandler::new(), - rpc_handler_data_meta_update: RPCHandler::new(), - rpc_handler_get_data_meta: RPCHandler::new(), - rpc_handler_get_data: RPCHandler::new(), - batch_receive_states: AsyncInitMap::new(), - } - } - - #[allow(dead_code)] - fn next_batch_id(&self) -> u32 { - static NEXT_BATCH_ID: AtomicU32 = AtomicU32::new(1); // 从1开始,保留0作为特殊值 - NEXT_BATCH_ID.fetch_add(1, Ordering::Relaxed) - } - - pub async fn write_data_batch( - &self, - unique_id: UniqueId, - version: u64, - data: proto::DataItem, - data_item_idx: DataItemIdx, - node_id: NodeID, - ) -> WSResult<()> { - // 调用 batch_transfer 函数处理数据传输 - async fn batch_transfer( - data_item_idx: DataItemIdx, - unique_id: UniqueId, - version: u64, - target_node: NodeID, - data: Arc, - view: DataGeneralView, - ) -> WSResult<()> { - let (tx, mut rx) = tokio::sync::mpsc::channel(32); - let mut handles = Vec::new(); - - let data_size = data.size().await?; - let splits = calculate_splits(data_size); - - tracing::debug!("batch_transfer total size({}), splits: {:?}, to node {}", data_size, splits, target_node); - - for (block_idx, split_range) in splits.iter().enumerate() { - let block_data = match data.as_ref() { - DataItemSource::Memory { data } => data[split_range.clone()].to_vec(), - DataItemSource::File { path } => { - // 读取文件对应块的数据 - let mut file = tokio::fs::File::open(path).await.map_err(|e| WsDataError::BatchTransferFailed { - request_id: proto::BatchRequestId { - node_id: target_node as u32, - sequence: block_idx as u64, - }, - reason: format!("Failed to open file: {}", e), - })?; - let mut buffer = vec![0; split_range.len()]; - // 验证seek结果 - let seek_pos = file.seek(std::io::SeekFrom::Start(split_range.start as u64)).await.map_err(|e| WsDataError::BatchTransferFailed { - request_id: proto::BatchRequestId { - node_id: target_node as u32, - sequence: block_idx as u64, - }, - reason: format!("Failed to seek file: {}", e), - })?; - if seek_pos != split_range.start as u64 { - return Err(WsDataError::BatchTransferFailed { - request_id: proto::BatchRequestId { - node_id: target_node as u32, - sequence: block_idx as u64, - }, - reason: format!("Seek position mismatch: expected {}, got {}", split_range.start, seek_pos), - }.into()); - } - // read_exact保证读取指定长度的数据或返回错误 - let _ = file.read_exact(&mut buffer).await.map_err(|e| WsDataError::BatchTransferFailed { - request_id: proto::BatchRequestId { - node_id: target_node as u32, - sequence: block_idx as u64, - }, - reason: format!("Failed to read file: {}", e), - })?; - buffer - } - }; - - let request = proto::BatchDataRequest { - request_id: Some(proto::BatchRequestId { - node_id: target_node as u32, - sequence: block_idx as u64, - }), - dataset_unique_id: unique_id.clone(), - data_item_idx: data_item_idx as u32, - // 用空的 DataItem 代替 - block_type: match data.as_ref() { - DataItemSource::Memory { .. } => Some(proto::DataItem{ - data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes(Vec::new())), - }), - DataItemSource::File { .. } => Some(proto::DataItem{ - data_item_dispatch: Some(proto::data_item::DataItemDispatch::File(proto::FileData { file_name_opt: String::new(), is_dir_opt: true, file_content: Vec::new() })), - }), - }, - block_index: block_idx as u32, - data: block_data, - operation: proto::DataOpeType::Write as i32, - unique_id: unique_id.clone(), - version, - total_size: data_size as u64, - }; - - let tx = tx.clone(); - let view = view.clone(); - - let handle = tokio::spawn(async move { - let result = view.data_general() - .rpc_call_batch_data - .call( - view.p2p(), - target_node, - request, - Some(Duration::from_secs(30)), - ) - .await; - - if let Err(e) = tx.send(result).await { - tracing::error!("Failed to send batch transfer result: {}", e); - } - }); - - handles.push(handle); - } - - drop(tx); - - while let Some(result) = rx.recv().await { - match result { - Ok(resp) if !resp.success => { - return Err(WsDataError::BatchTransferFailed { - request_id: proto::BatchRequestId { - node_id: target_node as u32, - sequence: 0, // TODO: Add proper sequence number - }, - reason: resp.error_message, - }.into()); - } - Ok(_) => continue, - Err(e) => { - return Err(WsDataError::BatchTransferFailed { - request_id: proto::BatchRequestId { - node_id: target_node as u32, - sequence: 0, - }, - reason: format!("RPC call failed: {}", e), - }.into()); - } - } - } - - for handle in handles { - handle.await.map_err(|e| { - WsDataError::BatchTransferFailed { - request_id: proto::BatchRequestId { - node_id: target_node as u32, - sequence: 0, - }, - reason: format!("Task join failed: {}", e), - } - })?; - } - - Ok(()) - } - - let data = Arc::new(data.to_data_item_source()); - batch_transfer(data_item_idx,unique_id, version, node_id, data, self.view.clone()).await - } - - - pub async fn get_or_del_datameta_from_master( - &self, - unique_id: &[u8], - delete: bool, - ) -> WSResult { - tracing::debug!("get_or_del_datameta_from_master uid: {:?}, delete: {}, whoami: {}", unique_id, delete, self.view.p2p().nodes_config.this.0); - let p2p = self.view.p2p(); - // get meta from master - let meta = self - .rpc_call_get_data_meta - .call( - p2p, - p2p.nodes_config.get_master_node(), - proto::DataMetaGetRequest { - unique_id: unique_id.to_vec(), - delete, - }, - Some(Duration::from_secs(60)), - ) - .await?; - - if meta.serialized_meta.is_empty() { - return Err(WsDataError::DataSetNotFound { - uniqueid: unique_id.to_vec(), - } - .into()); - } - - bincode::deserialize(&meta.serialized_meta).map_err(|err| { - WsSerialErr::BincodeErr { - err, - context: "get_or_del_datameta_from_master".to_owned(), - } - .into() - }) - } - - pub async fn get_or_del_data( - &self, - GetOrDelDataArg { - meta, - unique_id, - ty, - }: GetOrDelDataArg, - ) -> WSResult<(DataSetMetaV2, HashMap)> { - tracing::debug!("get_or_del_data uid: {:?}, maybe with meta: {:?}", unique_id, meta); - let mut data_map = HashMap::new(); - - // get meta from master - let meta = if let Some(meta) = meta { - meta - } else { - self.get_or_del_datameta_from_master(&unique_id, false) - .await? - }; - - tracing::debug!("start get_or_del_data uid: {:?},meta: {:?}", unique_id, meta); - - // basical verify - for idx in 0..meta.data_item_cnt() { - let idx = idx as DataItemIdx; - let check_cache_map = |meta: &DataSetMetaV2| -> WSResult<()> { - if !meta.cache_mode_visitor(idx).is_map_common_kv() - && !meta.cache_mode_visitor(idx).is_map_file() - { - return Err(WsDataError::UnknownCacheMapMode { - mode: meta.cache_mode_visitor(idx).0, - } - .into()); - } - Ok(()) - }; - check_cache_map(&meta)?; - } - - // get data - let p2p = self.view.p2p(); - - match ty { - GetOrDelDataArgType::All => { - for idx in 0..meta.data_item_cnt() { - let idx = idx as DataItemIdx; - let resp = self - .rpc_call_get_data - .call( - p2p, - meta.get_data_node(idx), - proto::GetOneDataRequest { - unique_id: unique_id.to_vec(), - idxs: vec![idx as u32], - delete: false, - return_data: true, - }, - Some(Duration::from_secs(60)), - ) - .await?; - - if !resp.success { - return Err(WsDataError::GetDataFailed { - unique_id: unique_id.to_vec(), - msg: resp.message, - } - .into()); - } - - let _ = data_map.insert(idx, resp.data[0].clone()); - } - } - GetOrDelDataArgType::Delete => { - for idx in 0..meta.data_item_cnt() { - let idx = idx as DataItemIdx; - let resp = self - .rpc_call_get_data - .call( - p2p, - meta.get_data_node(idx), - proto::GetOneDataRequest { - unique_id: unique_id.to_vec(), - idxs: vec![idx as u32], - delete: true, - return_data: true, - }, - Some(Duration::from_secs(60)), - ) - .await?; - - if !resp.success { - return Err(WsDataError::GetDataFailed { - unique_id: unique_id.to_vec(), - msg: resp.message, - } - .into()); - } - - let _ = data_map.insert(idx, resp.data[0].clone()); - } - } - GetOrDelDataArgType::PartialOne { idx } => { - let resp = self - .rpc_call_get_data - .call( - p2p, - meta.get_data_node(idx), - proto::GetOneDataRequest { - unique_id: unique_id.to_vec(), - idxs: vec![idx as u32], - delete: false, - return_data: true, - }, - Some(Duration::from_secs(60)), - ) - .await?; - - if !resp.success { - return Err(WsDataError::GetDataFailed { - unique_id: unique_id.to_vec(), - msg: resp.message, - } - .into()); - } - - let _ = data_map.insert(idx, resp.data[0].clone()); - } - GetOrDelDataArgType::PartialMany { idxs } => { - for idx in idxs { - let resp = self - .rpc_call_get_data - .call( - p2p, - meta.get_data_node(idx), - proto::GetOneDataRequest { - unique_id: unique_id.to_vec(), - idxs: vec![idx as u32], - delete: false, - return_data: true, - }, - Some(Duration::from_secs(60)), - ) - .await?; - - if !resp.success { - return Err(WsDataError::GetDataFailed { - unique_id: unique_id.to_vec(), - msg: resp.message, - } - .into()); - } - - let _ = data_map.insert(idx, resp.data[0].clone()); - } - } - } - - Ok((meta, data_map)) - } - - pub async fn write_data( - &self, - unique_id: impl Into>, - mut datas: Vec, - context_openode_opetype_operole: Option<( - NodeID, - proto::DataOpeType, - proto::data_schedule_context::OpeRole, - )>, - ) -> WSResult<()> { - let unique_id = unique_id.into(); - let log_tag = format!("[write_data({})]", String::from_utf8_lossy(&unique_id)); - tracing::debug!("{} start write data", log_tag); - - let mut data_transfer_sizes=Vec::new(); - data_transfer_sizes.reserve(datas.len()); - for d in datas.iter_mut(){ - data_transfer_sizes.push(d.transfer_size().await.map_err(|err|{ - tracing::error!("{} transfer size error: {}", log_tag, err); - err - })?); - } - // 获取数据调度计划 - let version_schedule_resp = self - .rpc_call_data_version_schedule - .call( - self.view.p2p(), - self.view.p2p().nodes_config.get_master_node(), - proto::DataVersionScheduleRequest { - unique_id: unique_id.clone(), - context: context_openode_opetype_operole.map(|(node, ope, role)| { - proto::DataScheduleContext { - // each_data_sz_bytes: data_transfer_sizes, 原代码类型不匹配 曾俊 - each_data_sz_bytes: data_transfer_sizes.iter().map(|&x| x as u32).collect(), - ope_node: node as i64, - ope_type: ope as i32, - ope_role: Some(role), - } - }), - version: 0, - }, - Some(Duration::from_secs(60)), - ) - .await?; - - // Clone the response to extend its lifetime - let version = version_schedule_resp.version; - let splits = version_schedule_resp.split.clone(); - - // 处理每个数据项 - let mut iter = WantIdxIter::new(&GetOrDelDataArgType::All, datas.len() as u8); - while let Some(data_item_idx) = iter.next() { - let data_item: &DataItemArgWrapper = &mut datas[data_item_idx as usize]; - let split = &splits[data_item_idx as usize]; - let mut primary_tasks = Vec::new(); - - // 1. 并行写入所有主数据分片 - let mut split_iter = WantIdxIter::new(&GetOrDelDataArgType::All, split.splits.len() as u8); - while let Some(split_idx) = split_iter.next() { - let split_info = &split.splits[split_idx as usize]; - tracing::debug!("{} creating split write task {}/{} for node {}, offset={}, size={}", - log_tag, split_idx + 1, split.splits.len(), split_info.node_id, split_info.data_offset, split_info.data_size); - let split_info = split_info.clone(); - let unique_id_clone = unique_id.clone(); - // let data_item_primary = data_item.clone_split_range(split_info.data_offset..split_info.data_offset+split_info.data_size); 类型不匹配 曾俊 - // 生成一个复制的可变数据项 - let mut data_item_clone = (*data_item).clone(); - let data_item_primary = data_item_clone.clone_split_range(split_info.data_offset as usize..(split_info.data_offset+split_info.data_size)as usize).await.todo_handle("clone_split_range for write data err")?; - // let data_item_primary = data_item.clone_split_range(split_info.data_offset as usize..(split_info.data_offset+split_info.data_size)as usize).await.todo_handle("clone_split_range for write data err")?; - let view = self.view.clone(); - let version_copy = version; - let task = tokio::spawn(async move { - view.data_general() - .rpc_call_write_once_data - .call( - view.p2p(), - split_info.node_id, - proto::WriteOneDataRequest { - unique_id: unique_id_clone.clone(), - version: version_copy, - data: vec![proto::DataItemWithIdx { - idx: data_item_idx as u32, - // data: Some(data_item_primary), 类型不匹配 曾俊 - data: Some(data_item_primary), - }], - }, - Some(Duration::from_secs(60)), - ) - .await - }); - primary_tasks.push(task); - } - - // 2. 并行写入缓存数据(完整数据) - let visitor = CacheModeVisitor(version_schedule_resp.cache_mode[data_item_idx as usize] as u16); - let need_cache = visitor.is_map_common_kv() || visitor.is_map_file(); - let cache_nodes: Vec = if need_cache { - split.splits.iter().map(|s| s.node_id).collect() - } else { - vec![] - }; - - let mut cache_tasks = Vec::new(); - if !cache_nodes.is_empty() { - tracing::debug!("{} found {} cache nodes: {:?}", log_tag, cache_nodes.len(), cache_nodes); - const MAX_CONCURRENT_TRANSFERS: usize = 3; - let semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT_TRANSFERS)); - - let mut cache_iter = WantIdxIter::new(&GetOrDelDataArgType::All, cache_nodes.len() as u8); - while let Some(cache_idx) = cache_iter.next() { - let node_id = cache_nodes[cache_idx as usize]; - let permit = semaphore.clone().acquire_owned().await.unwrap(); - tracing::debug!("{} creating cache write task {}/{} for node {}", log_tag, cache_idx + 1, cache_nodes.len(), node_id); - let unique_id_clone = unique_id.clone(); - let data_item_cache = data_item.clone(); - let view = self.view.clone(); - let task = tokio::spawn(async move { - let _permit = permit; // 持有permit直到任务完成 - view.data_general() - // .write_data_batch(unique_id_clone.clone(), version, data_item_cache, data_item_idx, node_id) //类型不匹配 曾俊 - .write_data_batch(unique_id_clone.clone(), version, data_item_cache.dataitem, data_item_idx, node_id) - .await?; - Ok::(proto::WriteOneDataResponse { - remote_version: version, - success: true, - message: String::new(), - }) - }); - cache_tasks.push(task); - } - } - - let primary_results = futures::future::join_all(primary_tasks).await; - tracing::debug!("{} primary_results: {:?}", log_tag, primary_results); - let cache_results = futures::future::join_all(cache_tasks).await; - tracing::debug!("{} cache_results: {:?}", log_tag, cache_results); - - if primary_results.iter().any(|res| res.is_err()) || cache_results.iter().any(|res| res.is_err()) { - let error_msg = format!("主节点或缓存节点数据写入失败"); - tracing::error!("{}", error_msg); - return Err(WSError::WsDataError(WsDataError::WriteDataFailed { - unique_id: unique_id.clone(), - message: error_msg, - })); - } - } - - Ok(()) - } - - async fn rpc_handle_write_one_data( - &self, - responsor: RPCResponsor, - req: proto::WriteOneDataRequest, - ) { - tracing::debug!("verify data meta bf write data"); - let kv_store_engine = self.view.kv_store_engine(); - - // Step1: verify version - // take old meta - #[allow(unused_assignments)] - let mut required_meta: Option<(usize, DataSetMetaV2)> = None; - { - let keybytes: Vec = KeyTypeDataSetMeta(&req.unique_id).make_key(); - let fail_by_overwrite = || async { - let message = "New data version overwrite".to_owned(); - tracing::warn!("{}", message); - responsor - .send_resp(WriteOneDataResponse { - remote_version: 0, - success: false, - message, - }) - .await - .todo_handle("1 err_comment waitting to fill"); - }; - let fail_with_msg = |message: String| async { - tracing::warn!("{}", message); - responsor - .send_resp(WriteOneDataResponse { - remote_version: 0, - success: false, - message, - }) - .await - .todo_handle("2 err_comment waitting to fill"); - }; - - loop { - // tracing::debug!("verify version loop"); - let lock = - kv_store_engine.with_rwlock(&KeyTypeDataSetMeta(&req.unique_id).make_key()); - let guard = KeyLockGuard::Read(lock.read()); - required_meta = kv_store_engine.get( - &KeyTypeDataSetMeta(&req.unique_id), - true, - KvAdditionalConf {}, - ); //tofix, master send maybe not synced - let old_dataset_version = if required_meta.is_none() { - 0 - } else { - required_meta.as_ref().unwrap().1.version - }; - // need to wait for new version - if required_meta.is_none() - || required_meta.as_ref().unwrap().1.version < req.version - { - if required_meta.is_none() { - tracing::debug!("no data version, waiting for notify"); - } else { - tracing::debug!( - "data version is old({}) at node({}), waiting for new notify({})", - required_meta.as_ref().unwrap().1.version, - self.view.p2p().nodes_config.this_node(), - req.version - ); - } - - let (kv_version, new_value) = kv_store_engine - .register_waiter_for_new(&keybytes, guard) - .await - .unwrap_or_else(|err| { - panic!("fail to wait for new data version: {:?}", err); - }); - - let Some(new_value) = new_value.as_raw_data() else { - fail_with_msg(format!( - "fatal error, kv value supposed to be DataSetMeta, rathe than {:?}", - new_value - )) - .await; - return; - }; - - // deserialize - let new_value = bincode::deserialize::(&new_value); - if let Err(err) = new_value { - fail_with_msg(format!( - "fatal error, kv value deserialization failed: {}", - err - )) - .await; - return; - } - let new_value = new_value.unwrap(); - - // version check - if new_value.version > req.version { - fail_by_overwrite().await; - return; - } else if new_value.version < req.version { - tracing::debug!("recv data version({}) is old than required({}), waiting for new notify",new_value.version, req.version); - // still need to wait for new version - continue; - } else { - required_meta = Some((kv_version, new_value)); - break; - } - } else if old_dataset_version > req.version { - drop(guard); - fail_by_overwrite().await; - return; - } else { - tracing::debug!( - "data version is matched cur({}) require({}) // 0 should be invalid", - old_dataset_version, - req.version - ); - break; - } - } - } - - // Step3: write data - tracing::debug!("start to write partial data"); - let lock = kv_store_engine.with_rwlock(&KeyTypeDataSetMeta(&req.unique_id).make_key()); - let guard = KeyLockGuard::Write(lock.write()); - let check_meta = kv_store_engine.get( - &KeyTypeDataSetMeta(&req.unique_id), - true, - KvAdditionalConf {}, - ); //tofix, master send maybe not synced - if check_meta.is_none() - || check_meta.as_ref().unwrap().0 != required_meta.as_ref().unwrap().0 - { - drop(guard); - responsor - .send_resp(WriteOneDataResponse { - remote_version: if check_meta.is_none() { - 0 - } else { - check_meta.as_ref().unwrap().1.version - }, - success: false, - message: "meta is updated again, cancel write".to_owned(), - }) - .await - .todo_handle("3 err_comment waitting to fill"); - return; - } - - for data_with_idx in req.data.into_iter() { - let proto::DataItemWithIdx { idx, data } = data_with_idx; - let data = data.unwrap(); - let data_source = data.to_data_item_source(); - let data = Arc::new(data_source); - let serialize = data.as_ref().encode_persist(); - tracing::debug!( - "writing data part uid({:?}) idx({}) item({})", - req.unique_id, - idx, - data.to_debug_string() - ); - if let Err(err) = kv_store_engine.set( - KeyTypeDataSetItem { - uid: req.unique_id.as_ref(), - idx: idx as u8, - }, - &serialize, - true, - ) { - tracing::warn!("flush error: {}", err) - } - } - kv_store_engine.flush(); - drop(guard); - tracing::debug!("data partial is written"); - responsor - .send_resp(WriteOneDataResponse { - remote_version: req.version, - success: true, - message: "".to_owned(), - }) - .await - .todo_handle("4 err_comment waitting to fill"); - } - - async fn rpc_handle_data_meta_update( - &self, - responsor: RPCResponsor, - mut req: proto::DataMetaUpdateRequest, - ) { - struct Defer { - node: NodeID, - } - impl Drop for Defer { - fn drop(&mut self) { - tracing::debug!("rpc_handle_data_meta_update return at node({})", self.node); - } - } - let _defer = Defer { - node: self.view.p2p().nodes_config.this_node(), - }; - - let key = KeyTypeDataSetMeta(&req.unique_id); - let keybytes = key.make_key(); - - // test only log - #[cfg(test)] - tracing::debug!("rpc_handle_data_meta_update {:?}\n {:?}", req,bincode::deserialize::(&req.serialized_meta)); - // not test log - #[cfg(not(test))] - tracing::debug!("rpc_handle_data_meta_update {:?}", req); - - let kv_lock = self.view.kv_store_engine().with_rwlock(&keybytes); - let _kv_write_lock_guard = kv_lock.write(); - - if let Some((_old_version, mut old_meta)) = - self.view.kv_store_engine().get(&key, true, KvAdditionalConf {}) - { - if old_meta.version > req.version { - drop(_kv_write_lock_guard); - let err_msg = "New data version is smaller, failed update"; - tracing::warn!("{}", err_msg); - responsor - .send_resp(proto::DataMetaUpdateResponse { - version: old_meta.version, - message: err_msg.to_owned(), - }) - .await - .todo_handle("5 err_comment waitting to fill"); - return; - } - old_meta.version = req.version; - if req.serialized_meta.len() > 0 { - self.view.kv_store_engine() - .set_raw(&keybytes, std::mem::take(&mut req.serialized_meta), true) - .todo_handle("6 err_comment waitting to fill"); - } else { - self.view.kv_store_engine() - .set(key, &old_meta, true) - .todo_handle("7 err_comment waitting to fill"); - } - } else { - if req.serialized_meta.len() > 0 { - tracing::debug!( - "set new meta data, {:?}", - bincode::deserialize::(&req.serialized_meta) - ); - self.view.kv_store_engine() - .set_raw(&keybytes, std::mem::take(&mut req.serialized_meta), true) - .todo_handle("8 err_comment waitting to fill"); - } else { - drop(_kv_write_lock_guard); - let err_msg = "Old meta data not found and missing new meta"; - tracing::warn!("{}", err_msg); - responsor - .send_resp(proto::DataMetaUpdateResponse { - version: 0, - message: err_msg.to_owned(), - }) - .await - .todo_handle("9 err_comment waitting to fill"); - return; - } - } - drop(_kv_write_lock_guard); - tracing::debug!("rpc_handle_data_meta_update success"); - responsor - .send_resp(proto::DataMetaUpdateResponse { - version: req.version, - message: "Update success".to_owned(), - }) - .await - .todo_handle("10 err_comment waitting to fill"); - } - - async fn rpc_handle_get_data_meta( - &self, - req: proto::DataMetaGetRequest, - responsor: RPCResponsor, - ) -> WSResult<()> { - tracing::debug!("rpc_handle_get_data_meta with req({:?})", req); - let meta = self.view.get_data_meta_local(&req.unique_id, req.delete)?; - if meta.is_none() { - tracing::debug!("rpc_handle_get_data_meta data meta not found"); - } else { - tracing::debug!("rpc_handle_get_data_meta data meta found"); - } - let serialized_meta = meta.map_or(vec![], |(_kvversion, meta)| { - bincode::serialize(&meta).unwrap() - }); - - responsor - .send_resp(proto::DataMetaGetResponse { serialized_meta }) - .await?; - - Ok(()) - } - - async fn rpc_handle_get_one_data( - &self, - responsor: RPCResponsor, - req: proto::GetOneDataRequest, - ) -> WSResult<()> { - tracing::debug!("starting rpc_handle_get_one_data {:?}", req); - - let kv_store_engine = self.view.kv_store_engine(); - let _ = self.view - .get_metadata(&req.unique_id, req.delete) - .await - .map_err(|err| { - tracing::warn!("rpc_handle_get_one_data get_metadata failed: {:?}", err); - err - })?; - - let mut got_or_deleted = vec![]; - let mut kv_ope_err = vec![]; - - for idx in req.idxs { - let value = if req.delete { - match kv_store_engine.del( - KeyTypeDataSetItem { - uid: req.unique_id.as_ref(), - idx: idx as u8, - }, - false, - ) { - Ok(value) => value, - Err(e) => { - kv_ope_err.push(e); - None - } - } - } else { - kv_store_engine.get( - &KeyTypeDataSetItem { - uid: req.unique_id.as_ref(), - idx: idx as u8, - }, - false, - KvAdditionalConf {}, - ) - }; - got_or_deleted.push(value); - } - - let (mut success, mut message): (bool, String) = if kv_ope_err.len() > 0 { - (false, { - let mut msg = String::from("KvEngine operation failed: "); - for e in kv_ope_err.iter() { - msg.push_str(&format!("{:?}", e)); - } - msg - }) - } else if got_or_deleted.iter().all(|v| v.is_some()) { - (true, "success".to_owned()) - } else { - tracing::warn!("some data not found"); - (false, "some data not found".to_owned()) - }; - - let mut got_or_deleted_checked: Vec = vec![]; - if success { - for v in got_or_deleted { - let decode_res = proto::DataItem::decode_persist(v.unwrap().1); - match decode_res { - Ok(item) => { - tracing::debug!("decoded data item: {:?}", item.to_string()); - got_or_deleted_checked.push(item); - } - Err(e) => { - tracing::error!("Failed to decode data item: {:?}", e); - success = false; - message = format!("Failed to decode data item: {:?}", e); - break; - } - } - } - } - - responsor - .send_resp(proto::GetOneDataResponse { - success, - data: got_or_deleted_checked, - message, - }) - .await?; - - Ok(()) - } - - /// 处理批量数据写入请求 - pub async fn rpc_handle_batch_data( - &self, - responsor: RPCResponsor, - req: proto::BatchDataRequest, - ) -> WSResult<()> { - tracing::debug!("rpc_handle_batch_data with batchid({:?})", req.request_id.clone().unwrap()); - let batch_receive_states = self.batch_receive_states.clone(); - // 预先克隆闭包外需要的字段 - let block_index = req.block_index; - let data = req.data.clone(); - let request_id = req.request_id.clone().unwrap(); - - // 1. 查找或创建状态 - let state = match self.batch_receive_states - .get_or_init(req.request_id.clone().unwrap(), async move { - // 创建任务组和句柄 - let (mut group, handle) = match WriteSplitDataTaskGroup::new( - req.unique_id.clone(), - req.total_size as usize, - // req.block_type(), 类型错误 曾俊 - req.block_type.unwrap().data_item_dispatch.unwrap(), - req.version, - ).await { - Ok((group, handle)) => (group, handle), - Err(e) => { - tracing::error!("Failed to create task group: {:?}", e); - return Err(e); - } - }; - - // 再process之前订阅,避免通知先于订阅 - let waiter = handle.get_all_tasks_waiter(); - - // 启动process_tasks - let _ = tokio::spawn(async move { - match group.process_tasks().await { - Ok(item) => Ok(item), - Err(e) => { - tracing::error!("Failed to process tasks: {}", e); - Err(e) - } - } - }); - - let state = Arc::new(BatchReceiveState::new(handle, SharedWithBatchHandler::new())); - let state_clone = state.clone(); - - // response task - let _=tokio::spawn(async move { - tracing::debug!("rpc_handle_batch_data response task started"); - // 等待所有任务完成 - if let Err(e) = waiter.wait().await { - tracing::error!("Failed to wait for tasks: {}", e); - todo!("use responsor to send error response"); - return; - } - - tracing::debug!("rpc_handle_batch_data response task wait all tasks done"); - - // 发送最终响应 - if let Some(final_responsor) = state_clone.shared.get_final_responsor().await { - if let Err(e) = final_responsor.send_resp(proto::BatchDataResponse { - request_id: Some(req.request_id.clone().unwrap()), - success: true, - error_message: String::new(), - version: state_clone.handle.version(), - }).await { - tracing::error!("Failed to send final response: {}", e); - } - } - - // 清理状态 - let _=batch_receive_states.remove(&req.request_id.unwrap()); - }); - - Ok(state) - }) - .await { - Err(e) => return Err(WSError::WsDataError(WsDataError::BatchTransferError { - request_id, - msg: format!("Failed to initialize batch state: {}", e) - })), - Ok(state) => state, - }; - - tracing::debug!("rpc_handle_batch_data ready with write_split_data_task_group"); - - // 2. 提交分片数据 - let data_item = proto::DataItem { - data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes(data)), - ..Default::default() - }; - - tracing::debug!("submit_split with data split idx: {}, at node: {}", block_index, self.view.p2p().nodes_config.this_node()); - state.handle.submit_split( - block_index as usize * DEFAULT_BLOCK_SIZE, - data_item, - ).await?; - - // 3. 更新响应器 - state.shared.update_responsor(responsor).await; - - Ok(()) - } -} - -#[derive(Serialize, Deserialize, Debug, Clone)] -pub struct DataMetaSys { - pub cache: i32, - pub distribute: i32, -} -impl From for DataMetaSys { - fn from(d: DataMeta) -> Self { - Self { - cache: d.cache, - distribute: d.distribute, - } - } -} -impl Into for DataMetaSys { - fn into(self) -> DataMeta { - DataMeta { - cache: self.cache, - distribute: self.distribute, - } - } -} - -/// 数据集元信息 -#[derive(Serialize, Deserialize)] -pub struct DataSetMetaV1 { - // unique_id: Vec, - pub version: u64, - pub data_metas: Vec, - pub synced_nodes: HashSet, -} - -/// 数据集元信息 -/// -/// 注意:新建元信息请使用 `DataSetMetaBuilder` -/// -/// https://fvd360f8oos.feishu.cn/docx/XoFudWhAgox84MxKC3ccP1TcnUh#share-Tqqkdxubpokwi5xREincb1sFnLc -#[derive(Serialize, Deserialize, Debug,Clone)] -pub struct DataSetMetaV2 { - // unique_id: Vec, - api_version: u8, - pub version: u64, - pub datas_splits: Vec, - pub data_metas: Vec, - pub synced_nodes: HashSet, - pub cache_mode: Vec, -} - -impl DataSetMetaV2 { - pub fn cache_mode_visitor(&self, idx: DataItemIdx) -> CacheModeVisitor { - CacheModeVisitor(self.cache_mode[idx as usize]) - } - - pub fn data_item_cnt(&self) -> usize { - self.datas_splits.len() - } - - pub fn get_data_node(&self, idx: DataItemIdx) -> NodeID { - // 获取指定数据项的主节点 - self.datas_splits[idx as usize].splits[0].node_id - } -} - -pub type DataSetMeta = DataSetMetaV2; - -// message EachNodeSplit{ -// uint32 node_id=1; -// uint32 data_offset=2; -// uint32 data_size=3; -// } - -#[derive(Serialize, Deserialize, Debug, Clone)] -pub struct EachNodeSplit { - pub node_id: NodeID, - pub data_offset: u32, - pub data_size: u32, - pub cache_mode: u32, // 添加 cache_mode 字段 -} - -impl EachNodeSplit { - pub fn cache_mode_visitor(&self) -> CacheModeVisitor { - CacheModeVisitor(self.cache_mode as u16) - } -} - -/// 数据项的分片信息 -/// 我们需要知道每个数据项的分片大小 -#[derive(Serialize, Deserialize, Debug, Clone)] -pub struct DataSplit { - pub splits: Vec, -} - -pub type DataSplitIdx = usize; - -// impl DataSplit { -// /// node_2_datas will be consumed partially -// pub fn recorver_data( -// &self, -// unique_id: &[u8], -// idx: DataItemIdx, -// node_2_datas: &mut HashMap<(NodeID, DataItemIdx), proto::DataItem>, -// ) -> WSResult> { -// let nodes = node_2_datas -// .iter() -// .filter(|v| v.0 .1 == idx) -// .map(|v| v.0 .0) -// .collect::>(); - -// let mut each_node_splits: HashMap)> = -// HashMap::new(); - -// for node in nodes { -// let data = node_2_datas.remove(&(node, idx)).unwrap(); -// let _ = each_node_splits.insert(node, (data, None)); -// } - -// let mut max_size = 0; -// let mut missing = vec![]; - -// // zip with split info -// // by the way, check if the split is missing -// for split in &self.splits { -// let Some(find) = each_node_splits.get_mut(&split.node_id) else { -// missing.push((*split).clone()); -// continue; -// }; -// find.1 = Some(split.clone()); -// if split.data_offset + split.data_size > max_size { -// max_size = split.data_offset + split.data_size; -// } -// } - -// if missing.len() > 0 { -// return Err(WsDataError::SplitRecoverMissing { -// unique_id: unique_id.to_owned(), -// idx, -// missing, -// } -// .into()); -// } - -// let mut recover = vec![0; max_size.try_into().unwrap()]; - -// for (_node, (data, splitmeta)) in each_node_splits { -// let splitmeta = splitmeta.unwrap(); -// let begin = splitmeta.data_offset as usize; -// let end = begin + splitmeta.data_size as usize; -// recover[begin..end].copy_from_slice(data.as_ref()); -// } - -// Ok(recover) -// } -// } - -impl Into for EachNodeSplit { - fn into(self) -> proto::EachNodeSplit { - proto::EachNodeSplit { - node_id: self.node_id, - data_offset: self.data_offset, - data_size: self.data_size, - } - } -} - -impl Into for DataSplit { - fn into(self) -> proto::DataSplit { - proto::DataSplit { - splits: self.splits.into_iter().map(|s| s.into()).collect(), - } - } -} -// uint32 split_size = 1; -// repeated uint32 node_ids = 2; - -macro_rules! generate_cache_mode_methods { - // The macro takes a list of pairs of the form [time, mask] and generates methods. - ($(($group:ident, $mode:ident)),*) => { - paste!{ - impl CacheModeVisitor { - $( - pub fn [](&self) -> bool { - (self.0 & []) == - ([] & []) - } - )* - } - impl DataSetMetaBuilder { - $( - pub fn [](&mut self, idx: DataItemIdx) -> &mut Self { - self.assert_cache_mode_len(); - self.building.as_mut().unwrap().cache_mode[idx as usize] = - (self.building.as_mut().unwrap().cache_mode[idx as usize] & ![]) | - ([] & []); - self - } - )* - } - } - }; -} -generate_cache_mode_methods!( - (time, forever), - (time, auto), - (pos, allnode), - (pos, specnode), - (pos, auto), - (map, common_kv), - (map, file) -); - -#[test] -fn test_cache_mode_visitor() { - let cache_mode_visitor = CacheModeVisitor(CACHE_MODE_TIME_FOREVER_MASK); - assert!(cache_mode_visitor.is_time_forever()); - assert!(!cache_mode_visitor.is_time_auto()); - - let cache_mode_visitor = CacheModeVisitor(CACHE_MODE_POS_ALLNODE_MASK); - assert!(cache_mode_visitor.is_pos_allnode()); - assert!(!cache_mode_visitor.is_pos_specnode()); - assert!(!cache_mode_visitor.is_pos_auto()); - - let cache_mode_visitor = CacheModeVisitor(CACHE_MODE_MAP_FILE_MASK); - assert!(cache_mode_visitor.is_map_file()); - assert!(!cache_mode_visitor.is_map_common_kv()); - - // test builder - - let meta = DataSetMetaBuilder::new() - .set_data_splits(vec![DataSplit { splits: vec![] }]) - .cache_mode_map_file(0) - .cache_mode_time_forever(0) - .build(); - assert!(meta.cache_mode_visitor(0).is_map_file()); - assert!(!meta.cache_mode_visitor(0).is_map_common_kv()); - assert!(meta.cache_mode_visitor(0).is_time_forever()); - assert!(!meta.cache_mode_visitor(0).is_time_auto()); - let meta = DataSetMetaBuilder::new() - .set_data_splits(vec![DataSplit { splits: vec![] }]) - .cache_mode_map_common_kv(0) - .cache_mode_time_forever(0) - .build(); - assert!(meta.cache_mode_visitor(0).is_map_common_kv()); - assert!(!meta.cache_mode_visitor(0).is_map_file()); - assert!(meta.cache_mode_visitor(0).is_time_forever()); - assert!(!meta.cache_mode_visitor(0).is_time_auto()); -} - -pub struct DataSetMetaBuilder { - building: Option, -} -impl From for DataSetMetaBuilder { - fn from(d: DataSetMetaV2) -> Self { - Self { building: Some(d) } - } -} -impl DataSetMetaBuilder { - pub fn new() -> Self { - Self { - building: Some(DataSetMetaV2 { - version: 0, - datas_splits: vec![], - data_metas: vec![], - api_version: 2, - synced_nodes: HashSet::new(), - cache_mode: vec![], - }), - } - } - fn assert_cache_mode_len(&self) { - if self.building.as_ref().unwrap().cache_mode.len() == 0 { - panic!("please set_data_splits before set_cache_mode"); - } - } - - pub fn version(&mut self, version: u64) -> &mut Self { - self.building.as_mut().unwrap().version = version; - self - } - - #[must_use] - pub fn set_data_splits(&mut self, splits: Vec) -> &mut Self { - let building = self.building.as_mut().unwrap(); - building.datas_splits = splits; - building.cache_mode = vec![0; building.datas_splits.len()]; - self - } - - pub fn set_cache_mode(&mut self, idx: DataItemIdx, mode: u16) -> &mut Self { - self.building.as_mut().unwrap().cache_mode[idx as usize] = mode; - self - } - - pub fn set_cache_mode_for_all(&mut self, mode: Vec) -> &mut Self { - self.building.as_mut().unwrap().cache_mode = mode; - assert_eq!( - self.building.as_mut().unwrap().cache_mode.len(), - self.building.as_mut().unwrap().datas_splits.len(), - "cache mode len must be equal to data splits len" - ); - self - } - - pub fn build(&mut self) -> DataSetMetaV2 { - self.building.take().unwrap() - } -} - -pub struct GetOrDelDataArg { - pub meta: Option, - pub unique_id: Vec, - pub ty: GetOrDelDataArgType, -} - -#[derive(Debug, Clone)] -pub enum GetOrDelDataArgType { - All, - Delete, - PartialOne { idx: DataItemIdx }, - PartialMany { idxs: BTreeSet }, -} - -impl DataGeneralView { - fn get_data_meta_local( - &self, - unique_id: &[u8], - delete: bool, - ) -> WSResult> { - let ope_name = if delete { "delete" } else { "get" }; - tracing::debug!("{} data meta for uid({:?})", ope_name, unique_id); - - let kv_store_engine = self.kv_store_engine(); - let key = KeyTypeDataSetMeta(&unique_id); - let keybytes = key.make_key(); - - let write_lock = kv_store_engine.with_rwlock(&keybytes); - let _guard = write_lock.write(); - - let meta_opt = if delete { - kv_store_engine.del(key, true)? - } else { - kv_store_engine.get(&key, true, KvAdditionalConf {}) - }; - Ok(meta_opt) - } - - pub async fn get_metadata( - &self, - unique_id: &[u8], - delete: bool, - ) -> WSResult { - // 先尝试从本地获取 - if let Some((_version, meta)) = self.get_data_meta_local(unique_id, delete)? { - return Ok(meta); - } - - // 本地不存在,从 master 获取 - self.data_general().get_or_del_datameta_from_master(unique_id, delete).await - } -} - -impl From for WSError { - fn from(err: JoinError) -> Self { - WsNetworkLogicErr::TaskJoinError { err }.into() - } -} - -#[async_trait] -impl LogicalModule for DataGeneral { - fn inner_new(args: LogicalModuleNewArgs) -> Self - where - Self: Sized, - { - Self { - view: DataGeneralView::new(args.logical_modules_ref.clone()), - rpc_call_data_version_schedule: RPCCaller::new(), - rpc_call_write_once_data: RPCCaller::new(), - rpc_call_batch_data: RPCCaller::new(), - rpc_call_get_data_meta: RPCCaller::new(), - rpc_call_get_data: RPCCaller::new(), - - rpc_handler_write_once_data: RPCHandler::new(), - rpc_handler_batch_data: RPCHandler::new(), - rpc_handler_data_meta_update: RPCHandler::new(), - rpc_handler_get_data_meta: RPCHandler::new(), - rpc_handler_get_data: RPCHandler::new(), - - // 批量数据接收状态管理 - batch_receive_states: AsyncInitMap::new(), - } - } - - async fn start(&self) -> WSResult> { - tracing::info!("start as master"); - - let p2p = self.view.p2p(); - // register rpc callers - { - self.rpc_call_data_version_schedule.regist(p2p); - self.rpc_call_write_once_data.regist(p2p); - self.rpc_call_batch_data.regist(p2p); - self.rpc_call_get_data_meta.regist(p2p); - self.rpc_call_get_data.regist(p2p); - } - - // register rpc handlers - { - let view = self.view.clone(); - self.rpc_handler_write_once_data - .regist(p2p, move |responsor, req| { - let view = view.clone(); - let _ = tokio::spawn(async move { - view.data_general().rpc_handle_write_one_data(responsor, req).await; - }); - Ok(()) - }); - - let view = self.view.clone(); - self.rpc_handler_batch_data.regist( - p2p, - move |responsor: RPCResponsor, - req: proto::BatchDataRequest| { - let view = view.clone(); - let _ = tokio::spawn(async move { - let _ = view.data_general().rpc_handle_batch_data(responsor, req).await; - }); - Ok(()) - }, - ); - - let view = self.view.clone(); - self.rpc_handler_data_meta_update.regist( - p2p, - move |responsor: RPCResponsor, - req: proto::DataMetaUpdateRequest| { - let view = view.clone(); - let _ = tokio::spawn(async move { - view.data_general().rpc_handle_data_meta_update(responsor, req).await - }); - Ok(()) - }, - ); - - let view = self.view.clone(); - self.rpc_handler_get_data_meta - .regist(p2p, move |responsor, req| { - let view = view.clone(); - let _ = tokio::spawn(async move { - view.data_general().rpc_handle_get_data_meta(req, responsor) - .await - .todo_handle("rpc_handle_get_data_meta err"); - }); - Ok(()) - }); - - let view = self.view.clone(); - self.rpc_handler_get_data.regist( - p2p, - move |responsor: RPCResponsor, - req: proto::GetOneDataRequest| { - let view = view.clone(); - let _ = tokio::spawn(async move { - view.data_general().rpc_handle_get_one_data(responsor, req).await - }); - Ok(()) - }, - ); - } - - Ok(vec![]) - } -} - -#[derive(Debug, Clone, Copy)] -pub struct CacheModeVisitor(pub u16); \ No newline at end of file diff --git a/src/main/src/general/data/m_kv_store_engine.rs b/src/main/src/general/data/m_kv_store_engine.rs index b488f0a..8338032 100644 --- a/src/main/src/general/data/m_kv_store_engine.rs +++ b/src/main/src/general/data/m_kv_store_engine.rs @@ -580,8 +580,7 @@ mod test { .build(), false, ) - // .todo_handle(); - .todo_handle("This part of the code needs to be implemented."); + .todo_handle(); let set = view .kv_store_engine() .get( diff --git a/src/main/src/general/m_os/mod.rs b/src/main/src/general/m_os/mod.rs index 225f347..90ad580 100644 --- a/src/main/src/general/m_os/mod.rs +++ b/src/main/src/general/m_os/mod.rs @@ -111,13 +111,6 @@ pub enum OsProcessType { } impl OperatingSystem { - pub fn abs_file_path(&self, p: PathBuf) -> PathBuf { - if p.is_absolute() { - p - } else { - self.file_path.join(p) - } - } pub fn app_path(&self, app: &str) -> PathBuf { self.view.appmeta_manager().fs_layer.concat_app_dir(app) } @@ -254,7 +247,7 @@ impl OperatingSystem { }) .await .unwrap(); - responser.send_resp(res).await.todo_handle("This part of the code needs to be implemented."); //返回结果未处理 曾俊 + responser.send_resp(res).await.todo_handle(); } async fn remote_get_dir_content_handler( @@ -321,7 +314,7 @@ impl OperatingSystem { }) .await .unwrap(); - responser.send_resp(res).await.todo_handle("This part of the code needs to be implemented."); //返回结果未处理 曾俊 + responser.send_resp(res).await.todo_handle(); } pub fn open_file(&self, fname: &str) -> WSResult { diff --git a/src/main/src/general/network/m_p2p.rs b/src/main/src/general/network/m_p2p.rs index 33b4b67..82a9297 100644 --- a/src/main/src/general/network/m_p2p.rs +++ b/src/main/src/general/network/m_p2p.rs @@ -421,10 +421,7 @@ impl P2PModule { taskid, DispatchPayload::Local(Box::new(r)), ) - //.todo_handle(); - //虞光勇修改,修改原因:在调用 todo_handle 方法时遇到了缺少参数的问题。需要确保在调用 todo_handle 方法时提供所需的字符串参数。 - //修改内容:加入字符串参数。 - .todo_handle("This part of the code needs to be implemented."); //返回结果未处理 曾俊 + .todo_handle(); let resp = rx.await.unwrap(); let resp = resp.downcast::().unwrap(); diff --git a/src/main/src/general/network/m_p2p_quic.rs b/src/main/src/general/network/m_p2p_quic.rs index b0226fd..9e221e3 100644 --- a/src/main/src/general/network/m_p2p_quic.rs +++ b/src/main/src/general/network/m_p2p_quic.rs @@ -360,8 +360,7 @@ async fn handle_connection( let head=bytes.split_to(headlen as usize); match deserialize_msg_id_task_id(&head) { Ok((msg_id, task_id)) => { - //返回结果未处理 曾俊 - view.p2p().dispatch(remote_id, msg_id, task_id, bytes.into()).todo_handle("This part of the code needs to be implemented."); + view.p2p().dispatch(remote_id, msg_id, task_id, bytes.into()).todo_handle(); } Err(err) => { tracing::warn!("incoming deserial head error: {:?}", err); diff --git a/src/main/src/general/network/proto_ext.rs b/src/main/src/general/network/proto_ext.rs index ad1aafb..0dc42c4 100644 --- a/src/main/src/general/network/proto_ext.rs +++ b/src/main/src/general/network/proto_ext.rs @@ -55,8 +55,7 @@ impl ProtoExtDataItem for proto::DataItem { // 从文件读取指定范围的数据 async fn read_file_range(path: &Path, file: tokio::fs::File, range: Range) -> WSResult> { let mut file = tokio::io::BufReader::new(file); - // file.seek(std::io::SeekFrom::Start(range.start as u64)) 曾俊 没对正常返回值做处理 - let _ = file.seek(std::io::SeekFrom::Start(range.start as u64)) + file.seek(std::io::SeekFrom::Start(range.start as u64)) .await .map_err(|e| WSError::WsDataError(WsDataError::FileSeekErr { path: path.to_path_buf(), @@ -64,8 +63,7 @@ impl ProtoExtDataItem for proto::DataItem { }))?; let mut buffer = vec![0; range.end - range.start]; - // file.read_exact(&mut buffer) - let _ = file.read_exact(&mut buffer) + file.read_exact(&mut buffer) .await .map_err(|e| WSError::WsDataError(WsDataError::FileReadErr { path: path.to_path_buf(), @@ -84,12 +82,8 @@ impl ProtoExtDataItem for proto::DataItem { let actual_path = if path.is_dir() { zip_path.as_ref().ok_or_else(|| WSError::WsDataError( WsDataError::BatchTransferFailed { - // node: 0, - // batch: 0, - request_id:proto::BatchRequestId { - node_id: 0, - sequence: 0, - }, + node: 0, + batch: 0, reason: "Directory must have zip_path".to_string(), } ))? diff --git a/src/main/src/general/network/proto_src/data.proto b/src/main/src/general/network/proto_src/data.proto index e585b0d..7296f20 100644 --- a/src/main/src/general/network/proto_src/data.proto +++ b/src/main/src/general/network/proto_src/data.proto @@ -175,6 +175,13 @@ message GetOneDataResponse{ repeated DataItem data =2; string message=3; } + +enum BatchDataBlockType { + MEMORY = 0; // 内存数据块 + FILE = 1; // 文件数据块 + DIR=2; // 目录数据块 +} + message BatchRequestId { uint32 node_id = 1; // 节点ID uint64 sequence = 2; // 原子自增序列号 @@ -184,7 +191,7 @@ message BatchDataRequest { BatchRequestId request_id = 1; // 请求唯一标识(节点ID + 序列号) bytes dataset_unique_id = 2; // 数据集唯一标识 uint32 data_item_idx = 3; // 数据项索引 - DataItem block_type = 4; // 数据块类型(文件/内存), 将数据留空 + BatchDataBlockType block_type = 4; // 数据块类型(文件/内存) uint32 block_index = 5; // 数据块索引 bytes data = 6; // 数据块内容 DataOpeType operation = 7; // 操作类型 diff --git a/src/main/src/main.rs b/src/main/src/main.rs index fefd8ca..82e6707 100644 --- a/src/main/src/main.rs +++ b/src/main/src/main.rs @@ -43,8 +43,8 @@ async fn main() { // dist_kv_raft::tikvraft_proxy::start(); let mut sys=Sys::new(config); let modules_ref=sys.new_logical_modules_ref(); - // modules_global_bridge::modules_ref_scope(modules_ref, async move{sys.wait_for_end().await;}) 由于modules_ref_scope改为了异步函数,所以这里加上.await 曾俊 - modules_global_bridge::modules_ref_scope(modules_ref, async move{sys.wait_for_end().await;}).await; + modules_global_bridge::modules_ref_scope(modules_ref, async move{sys.wait_for_end().await;}) + } pub fn start_tracing() { diff --git a/src/main/src/master/data/m_data_master.rs b/src/main/src/master/data/m_data_master.rs index 08848ad..44a4d70 100644 --- a/src/main/src/master/data/m_data_master.rs +++ b/src/main/src/master/data/m_data_master.rs @@ -335,7 +335,6 @@ impl DataMaster { new_meta.version ); - //返回结果未处理 曾俊 responsor .send_resp(DataVersionScheduleResponse { version: new_meta.version, @@ -348,8 +347,7 @@ impl DataMaster { cache_nodes, }) .await - //.todo_handle(); - .todo_handle("This part of the code needs to be implemented."); + .todo_handle(); Ok(()) } // async fn rpc_handler_dataversion_synced_on_node( diff --git a/src/main/src/modules_global_bridge/mod.rs b/src/main/src/modules_global_bridge/mod.rs index bf86c32..c3de453 100644 --- a/src/main/src/modules_global_bridge/mod.rs +++ b/src/main/src/modules_global_bridge/mod.rs @@ -1,6 +1,5 @@ use std::future::Future; -use crate::result::WSError;//虞光勇修改,修改内容:增加use crate::result::WSError;来导入 WSError。 -use crate::result::WsRuntimeErr;//虞光勇修改,修改内容:增加use crate::result::WsRuntimeErr;来导入 WsRuntimeErr。 + use crate::result::WSResult; use crate::sys::LogicalModules; use crate::sys::LogicalModulesRef; @@ -16,32 +15,15 @@ tokio::task_local! { } pub fn try_get_modules_ref() -> WSResult { - //没有处理try_wth的错误返回 曾俊 - // let mut res=Err(WSError::WsRuntimeErr(WsRuntimeErr::ModulesRefOutofLifetime)); - // MODULES_REF.try_with(|m|{ - // res=Ok(m.clone()); - // }); - // res - - MODULES_REF.try_with(|m| { - // 克隆 m 并返回 Ok 结果 - Ok(m.clone()) - }) - // 如果 try_with 失败,则返回相应的错误 - .map_err(|_e| WSError::WsRuntimeErr(WsRuntimeErr::ModulesRefOutofLifetime))? + let mut res=Err(WSError::WsRuntimeErr(WsRuntimeErr::ModulesRefOutofLifetime)); + MODULES_REF.try_with(|m|{ + res=Ok(m.clone()); + }); + res } -//没有处理scope的返回值 曾俊 -// pub fn modules_ref_scope(modules_ref: LogicalModulesRef,future: impl Future) { -// MODULES_REF.scope(modules_ref,future); -// } -pub async fn modules_ref_scope(modules_ref: LogicalModulesRef, future: F) -where - F: Future + 'static, -{ - MODULES_REF.scope(modules_ref, async move { - let _ = future.await; - }).await; +pub fn modules_ref_scope(modules_ref: LogicalModulesRef,future: impl Future) { + MODULES_REF.scope(modules_ref,future); } fn modules() -> &'static LogicalModules { diff --git a/src/main/src/result.rs b/src/main/src/result.rs index 53ff8ff..2d6ac18 100644 --- a/src/main/src/result.rs +++ b/src/main/src/result.rs @@ -253,10 +253,6 @@ pub enum WsDataError { path: String, err: Infallible, }, - UnzipErr{ - path: PathBuf, - err: ZipExtractError, - }, SplitRecoverMissing { unique_id: Vec, idx: DataItemIdx, @@ -461,28 +457,18 @@ impl_err_convertor!(InitializeError, WsRaftErr, InitializeError); impl_err_convertor!(RaftError, WsRaftErr, RaftError); impl_err_convertor!(std::io::Error, WsIoErr, Io); -pub trait WSResultExt :Sized { - fn todo_handle(self, err_comment: &str) -> Self; +pub trait WSResultExt { + fn todo_handle(&self); } -impl WSResultExt for WSResult { +impl WSResultExt for WSResult { #[inline] - fn todo_handle(self, err_comment: &str) -> Self { - match &self { + fn todo_handle(&self) { + match self { Ok(_ok) => {} Err(err) => { - tracing::error!("{}, err: {:?}", err_comment, err); + tracing::warn!("result err: {:?}", err); } } - self - } -} - -// impl WSResultExt for WSError { WSError并没有泛型参数 去除特征约束 曾俊 -impl WSResultExt for WSError { - fn todo_handle(self, err_comment: &str) -> Self { - tracing::error!("{}, err: {:?}", err_comment, self); - self } } - diff --git a/src/main/src/util/zip.rs b/src/main/src/util/zip.rs index 791bc69..1cdfdc4 100644 --- a/src/main/src/util/zip.rs +++ b/src/main/src/util/zip.rs @@ -1,10 +1,9 @@ use std::path::Path; -use std::io::{self, Write, Seek, Cursor,Read}; +use std::io::{self, Write, Seek, Cursor}; use std::fs; use walkdir::WalkDir; -use zip::{write::FileOptions, ZipWriter, result::ZipError}; +use zip::{write::FileOptions, ZipWriter, ZipError}; use crate::result::{WSResult, WSError, WsIoErr}; -use std::os::unix::fs::PermissionsExt; // 添加这一行以引入PermissionsExt trait 针对下方的.mode()报错 曾俊 pub fn unzip_data_2_path(p: impl AsRef, data: Vec) -> WSResult<()> { // remove old dir @@ -64,7 +63,7 @@ where .metadata() .map_err(|e| WSError::from(e))? .permissions() - .mode(), // 修改!!! 在文件上方导入了一个PermissionsExt trait 曾俊 + .mode(), ); // Write file or directory explicitly @@ -120,25 +119,18 @@ pub async fn zip_dir_2_file( method: zip::CompressionMethod, mut dst_file: std::fs::File, ) -> WSResult<()> { - // // if !src_dir.is_dir() { //泛型参数不会自动解引用 曾俊 - // if !src_dir.as_ref().is_dir() { - // return Err(WsIoErr::Zip2(ZipError::FileNotFound).into()); - // } - let src_dir = src_dir.as_ref().to_path_buf(); // 将 src_dir 转换为 PathBuf - if !src_dir.is_dir() { return Err(WsIoErr::Zip2(ZipError::FileNotFound).into()); } - let walkdir = WalkDir::new(src_dir.clone()); + let walkdir = WalkDir::new(src_dir); let it = walkdir.into_iter(); // 使用阻塞线程执行 zip 操作,因为 zip 库不支持异步 IO tokio::task::spawn_blocking(move || { zip_dir( &mut it.filter_map(|e| e.ok()), - // src_dir, //泛型参数不会自动解引用 曾俊 - src_dir.as_ref(), + src_dir, &mut dst_file, method, ) @@ -175,8 +167,7 @@ mod tests { zip_dir_2_file( src_path, zip::CompressionMethod::Stored, - output_file, - // output_file.as_file_mut(), + output_file.as_file_mut(), ).await })?; diff --git a/src/main/src/worker/m_kv_user_client.rs b/src/main/src/worker/m_kv_user_client.rs index 9d18f6a..4de9d52 100644 --- a/src/main/src/worker/m_kv_user_client.rs +++ b/src/main/src/worker/m_kv_user_client.rs @@ -5,7 +5,6 @@ use crate::{ m_data_general::{ new_data_unique_id_fn_kv, DataGeneral, DataItemIdx, DataSetMetaV2, GetOrDelDataArg, GetOrDelDataArgType, - dataitem::DataItemArgWrapper }, m_dist_lock::DistLock, }, @@ -210,16 +209,12 @@ impl KvUserClient { tracing::debug!("handle_kv_set: key: {:?}", key); let data_general = self.view.data_general(); - //返回结果未处理 曾俊 data_general .write_data( new_data_unique_id_fn_kv(&key), - //原代码: - // vec![proto::DataItem { - // data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes(value)), - // }], - //修改后封装成要求的DataItemArgWrapper类型 tmpzipfile设置为Uninitialized状态 在DataItemArgWrapper结构体中添加了一个new方法 曾俊 - vec![DataItemArgWrapper::new(value)], + vec![proto::DataItem { + data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes(value)), + }], Some(( cur_node, proto::DataOpeType::Write, @@ -230,7 +225,7 @@ impl KvUserClient { )), ) .await - .todo_handle("This part of the code needs to be implemented."); + .todo_handle(); KvResponse::new_common(vec![]) } From 4de8e55801a997aebd435564e569e227f13b2491 Mon Sep 17 00:00:00 2001 From: pa <1020401660@qq.com> Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 03/26] Revert "backup" This reverts commit 7507ecd4023732747e113351b3de434c59dcb22b. --- plan.md | 84 ------ src/main/Cargo.toml | 1 - src/main/src/general/app/mod.rs | 103 ++++--- .../general/data/m_data_general/dataitem.rs | 190 +----------- .../src/general/data/m_data_general/mod.rs | 45 +-- src/main/src/general/m_os/mod.rs | 3 - src/main/src/general/m_os/zip.rs | 115 +++++++- src/main/src/general/network/proto_ext.rs | 127 ++------ .../src/general/network/proto_src/data.proto | 6 - src/main/src/main.rs | 7 +- src/main/src/modules_global_bridge/mod.rs | 19 -- src/main/src/result.rs | 28 +- src/main/src/sys.rs | 9 - src/main/src/util/mod.rs | 1 - src/main/src/util/zip.rs | 275 ------------------ 15 files changed, 225 insertions(+), 788 deletions(-) delete mode 100644 plan.md delete mode 100644 src/main/src/util/zip.rs diff --git a/plan.md b/plan.md deleted file mode 100644 index a767a78..0000000 --- a/plan.md +++ /dev/null @@ -1,84 +0,0 @@ -\begin{abstract} -Serverless computing has transformed cloud resource management; however, the separation of scaling and scheduling in current methods leads to low resource utilization, increased cold start delays, and high operational costs. This paper presents a joint optimization mechanism, Cejoss, which integrates a reinforcement learning-based scaling strategy (RELA) with a data-aware pre-scheduling mechanism (DECODS). By sharing scaling state and task information in real time, the system achieves coordinated decision-making between scaling and scheduling to achieve joint optimization. With this joint optimization, the system is able to dynamically adjust the number of instances and pre-schedule tasks to reduce cold start delays, ultimately achieving a near-optimal resource allocation. Moreover, the scaler utilizes reinforcement learning to anticipate workload fluctuations and efficiently regulate instance counts, while the scheduler employs a data-aware pre-scheduling strategy to optimize task assignments and minimize latency. Experimental results show that in single-function scenarios the quality-price ratio improves by at least 50\%, while in multi-function scenarios it increases by at least 17\%, demonstrating significant advantages in enhancing system performance and reducing costs. This approach is adaptable to various workload scales and application types, offering a novel perspective for efficient serverless platform operations. -\end{abstract} - -\begin{IEEEkeywords} -Cloud Computing, Servreless, Scaler, Scheduler, Joint Optimization -\end{IEEEkeywords} - -\section{Introduction} - -Serverless computing has become a paradigm shift in cloud resource management, offering auto-scaling, pay-as-you-go pricing, and rapid application development. This innovative approach offers versatile solutions for various workloads, including web services, data analysis, scientific computing, and machine learning inference. Examining the architecture of serverless systems, it can be divided into components like scaler, scheduler, observation and storage. Among these, the scaler and scheduler play critical roles in the performance and cost efficiency of serverless applications. The scaler determines the number and placement of function instances, directly affecting resource utilization and cold start , while the scheduler's decisions on task assignment impact execution latency and load distribution. Their interaction fundamentally shapes the system's ability to handle varying workloads efficiently. - -Current serverless systems face three critical challenges that significantly impact their performance and efficiency. At the architectural level, the lack of coordination between scaling and scheduling components leads to fundamental limitations. This independent operation creates information isolation, where scaling decisions are made without knowledge of scheduling states and vice versa, resulting in suboptimal resource allocation and increased cold start delays. - -In the scaling component, current mechanisms rely heavily on static parameters and manual tuning, making them unable to adapt to dynamic workloads. The delayed response to workload changes not only leads to resource inefficiency but also hinders the scheduler's ability to make informed placement decisions. This limitation becomes particularly evident in scenarios with varying request patterns, where static scaling rules fail to provide appropriate resource levels for optimal scheduling. - -On the scheduling side, existing strategies lack the sophistication needed for efficient task placement in a modern serverless environment. Traditional approaches either trigger tasks reactively after predecessor completion (causing unnecessary cold starts) or pre-schedule all tasks aggressively (wasting resources). Moreover, they often ignore critical factors like DAG data transmission delays, which significantly impact overall performance. These scheduling limitations, combined with the lack of coordination with scaling decisions, further compound the system's inefficiencies. - -To address these challenges, this paper presents Cejoss (Cost-effective Joint Optimization for Scaler and Scheduler), which makes the following key contributions: - -(1) The core of our contribution is a novel joint optimization framework \textbf{Cejoss} that merges scaling node selection and scheduling node selection stages. This integration enables real-time sharing of resource views between components, allowing both the scaler and scheduler to maintain timely awareness of each other's decisions. Through this coordinated approach, the system achieves significant improvements in both latency and cost metrics. - -(2) To improve dynamic resource management, we introduce \textbf{RELA} (REinforce Learning-based scAler), which employs PPO-based reinforcement learning with carefully designed state space, reward function, and action mapping. RELA achieves adaptive optimization between different application types and request frequencies, demonstrating superior performance in both single-function and multi-function scenarios and effectively addressing the limitations of static scaling parameters. - -(3) For efficient task scheduling, we develop \textbf{DECODS} (DEcoupled CO-scaling Data-aware Scheduler), which implements a three-stage scheduling approach comprising Task Collection, Scaling Nodes Selection, and Task Nodes Selection. DECODS introduces moderate pre-scheduling to reduce cold start time while avoiding over-allocation of resources. By considering DAG data transmission latency for optimized task placement, it achieves a balanced trade-off between pre-scheduling benefits and resource efficiency. - -(4) Our comprehensive experimental evaluation demonstrates that this integrated approach significantly improves system performance. The quality-price ratio improves by at least 50\% in single-function scenarios and 17\% in multi-function scenarios compared to state-of-the-art approaches. These results validate the effectiveness of our joint optimization strategy across various workload scales and application types. - - -\section{Background and Motivation} - -\subsection{Notations and Terms} - -Before discussing the challenges in current serverless systems, we first define several key terms used throughout this paper: - -\textbf{Function:} A single, stateless piece of application logic deployed to the cloud. Each function has its own code and resource configuration (memory, timeout, etc.) and is invoked on-demand in response to events or requests. For example, a function might resize an image or process a database query. - -\textbf{Instance:} The runtime environment (typically a container or virtual machine) that executes function tasks. An instance has loaded the function code and can serve multiple tasks concurrently. New instances may be started (incurring cold start delays) or terminated by the platform based on demand. - -\textbf{Task (Invocation):} A runtime instance of a function execution, triggered by a specific event or request. Each task runs the function code with given input and produces output. Tasks are the units of work that must be scheduled for execution. - -\textbf{Quality-Price Ratio:} A comprehensive metric that evaluates both system performance and resource efficiency, calculated as $QP = \frac{Performance}{Cost}$. Performance considers factors like request latency and throughput, while Cost accounts for resource consumption and instance hours. - -\textbf{Single-Function Applications:} These applications involve independent function invocations without dependencies. Common examples include API endpoints (e.g., HTTP request handlers), event processors (e.g., image resizing, video transcoding), and stateless microservices. While simpler to manage, they still require efficient scaling to handle varying request rates and optimal instance placement for load balancing. - -\textbf{Multi-Function Applications:} These applications compose multiple functions into complex DAGs, where the output of one function serves as input to others. Different applications exhibit varying characteristics in terms of computation and data transfer requirements. Video processing pipelines, for example, involve significant data movement as they transform raw uploads through decode, filter, encode, and thumbnail generation stages. Machine learning applications, on the other hand, often emphasize computational intensity, chaining functions from data preprocessing and feature extraction to model inference and result ranking. Data analytics pipelines combine both aspects, processing logs through collection, parsing, aggregation, and visualization stages, with varying demands on computation and data transfer at different stages. Each stage in these workflows represents a separate function, with both data and control dependencies flowing between them. - - -\subsection{Scaler and Scheduler} - -The core of serverless resource management consists of two key components: - -\textbf{Scaler:} The component responsible for dynamic resource management in serverless systems. It continuously monitors system metrics (e.g., request rates, CPU utilization) to determine both the appropriate number of function instances and their optimal placement across nodes. When workload increases, the scaler must decide not only how many new instances to launch, but also which nodes should host them, considering factors like node capacity, current load, and network conditions. Conversely, during periods of low demand, it identifies underutilized instances for termination while maintaining sufficient capacity for incoming requests. These scaling decisions directly impact both system performance (through cold start latency and execution efficiency) and cost (through resource utilization and instance hours). - -\textbf{Scheduler:} The component that assigns incoming tasks to available instances. When a task arrives, the scheduler must decide which instance should execute it, considering factors like instance availability, load balancing, and data locality. In modern serverless systems, schedulers must also handle complex application represented as DAGs (Directed Acyclic Graphs), where multiple functions have dependencies and data transfer requirements. - -The effectiveness of these components heavily depends on how they interact and coordinate their decisions. In practice, current serverless platforms have explored two fundamentally different architectural patterns. The \textbf{Global Scheduler Only} pattern relies solely on a global scheduler, without an explicit scaling component. When a task arrives, the scheduler assigns it to a node, triggering instance creation if necessary. The instance lifecycle is managed through node-local mechanisms: instances are automatically started when tasks are scheduled (incurring cold starts), and terminated based on local policies such as idle timeouts or cache eviction strategies. This approach simplifies the architecture but leaves scaling decisions to emerge from the combined effects of task placement and local instance management. \textbf{The Decoupled Scaler & Scheduler} pattern adopts a different approach, where separate scaling and scheduling components work independently. The scaler proactively manages instance counts based on global metrics, while the scheduler focuses on task placement among available instances. - -Figure \ref{fig:arch_patterns} illustrates these contrasting architectures and their decision flows. - -\begin{figure}[htbp] -\centerline{\includegraphics[width=1.0\linewidth]{ArchPatterns.png}} -\caption{Two predominant architectural patterns in serverless platforms: (a) Global Scheduler Only pattern, where task placement triggers instance creation and local policies handle termination; (b) Decoupled pattern, where explicit scaling decisions are made independently of task scheduling.} -\label{fig:arch_patterns} -\end{figure} - -\begin{figure}[htbp] -\centerline{\includegraphics[width=1.0\linewidth]{SimpleCompare.png}} -\caption{The metrics of three basic strategies in terms of average request latency, average request cost, and the number of instances tested under medium workload (The latency is composed of a dark part and a light part, with the dark part representing execution latency and the light part representing cold start latency; The specific details of the testing environment are described in Section \ref{sec:exp_env}.)} -\label{fig:simplecmp} -\end{figure} - - -To evaluate these patterns, we implemented three representative strategies and analyzed their performance (Figure \ref{fig:simplecmp}). For the Global Scheduler Only pattern, we tested two approaches: the \textbf{Hash-Based Strategy}, which maintains a single instance per function to minimize cold starts but suffers from high execution latency under load, and the \textbf{No-Scaler Greedy Strategy}, which creates multiple replicas but leads to over-provisioning. For the Decoupled pattern, we implemented the \textbf{HPA Scaler + Greedy Strategy}, which shows detection lag in scaling decisions and increased cold starts due to poor coordination. As shown in the experimental results, neither pattern achieves satisfactory performance, motivating the need for better coordination between scaling and scheduling decisions. - - -\subsection{Existing Improvements} - -\textbf{Advanced Scaling:} Systems like Hansel \cite{lstm_hansel} and Autopilot \cite{autopilot} have introduced predictive and learning-based scaling mechanisms to better handle dynamic workloads. However, these solutions still operate independently from scheduling decisions. - -\textbf{Improved Scheduling:} Platforms like FaaSFlow and FnSched \cite{fnsched} have developed DAG-aware and hybrid scheduling strategies. Yet, they typically lack coordination with scaling components. - -While these improvements have enhanced individual components, they fail to address the fundamental issue: the lack of coordination between scaling and scheduling decisions. This limitation motivates our investigation into a joint optimization approach that can bridge this gap. - diff --git a/src/main/Cargo.toml b/src/main/Cargo.toml index 12a8226..b61ac6c 100644 --- a/src/main/Cargo.toml +++ b/src/main/Cargo.toml @@ -63,7 +63,6 @@ path-absolutize.workspace = true dashmap.workspace = true base64.workspace = true hex = "0.4.3" -tempfile.workspace = true [dependencies.uuid] version = "1.8.0" diff --git a/src/main/src/general/app/mod.rs b/src/main/src/general/app/mod.rs index 01859e8..9a2a837 100644 --- a/src/main/src/general/app/mod.rs +++ b/src/main/src/general/app/mod.rs @@ -7,13 +7,11 @@ pub mod m_executor; pub mod v_os; use super::data::m_data_general::{DataSetMetaV2, GetOrDelDataArg, GetOrDelDataArgType}; -use super::m_os::APPS_REL_DIR; use crate::general::app::app_native::native_apps; use crate::general::app::instance::m_instance_manager::InstanceManager; use crate::general::app::m_executor::Executor; use crate::general::app::m_executor::FnExeCtxAsyncAllowedType; use crate::general::app::v_os::AppMetaVisitOs; -use crate::general::data::m_data_general::dataitem::DataItemArgWrapper; use crate::general::network::proto_ext::ProtoExtDataItem; use crate::util::VecExt; use crate::{general::network::proto, result::WSResultExt}; @@ -780,36 +778,36 @@ impl AppMetaManager { // let appdir = self.fs_layer.concat_app_dir(app); let appmeta = self.fs_layer.read_app_meta(tmpapp).await?; - // // TODO: 2.check project dir - // // 3. if java, take snapshot - // if let AppType::Jar = appmeta.app_type { - // let _ = self - // .meta - // .write() - // .await - // .tmp_app_metas - // .insert(tmpapp.to_owned(), appmeta.clone()); - // tracing::debug!("record app meta to make checkpoint {}", tmpapp); - // self.view - // .instance_manager() - // .make_checkpoint_for_app(tmpapp) - // .await?; - // self.view - // .instance_manager() - // .drap_app_instances(tmpapp) - // .await; - // // remove app_meta - // tracing::debug!("checkpoint made, remove app meta {}", tmpapp); - // let _ = self - // .meta - // .write() - // .await - // .tmp_app_metas - // .remove(tmpapp) - // .unwrap_or_else(|| { - // panic!("remove app meta failed, app: {}", tmpapp); - // }); - // } + // TODO: 2.check project dir + // 3. if java, take snapshot + if let AppType::Jar = appmeta.app_type { + let _ = self + .meta + .write() + .await + .tmp_app_metas + .insert(tmpapp.to_owned(), appmeta.clone()); + tracing::debug!("record app meta to make checkpoint {}", tmpapp); + self.view + .instance_manager() + .make_checkpoint_for_app(tmpapp) + .await?; + self.view + .instance_manager() + .drap_app_instances(tmpapp) + .await; + // remove app_meta + tracing::debug!("checkpoint made, remove app meta {}", tmpapp); + let _ = self + .meta + .write() + .await + .tmp_app_metas + .remove(tmpapp) + .unwrap_or_else(|| { + panic!("remove app meta failed, app: {}", tmpapp); + }); + } Ok(appmeta) } @@ -1012,19 +1010,46 @@ impl AppMetaManager { Ok(appmeta) => appmeta, }; + // 4. zip tmp dir to memory + let zipfiledata = { + tracing::debug!("zip tmp dir to memory"); + // if let Ok(direntries) = fs::read_dir(tmpappdir.join("checkpoint-dir")) { + // for f in direntries { + // tracing::debug!( + // "file in checkpoint-dir: {:?}", + // f.map(|v| v.file_name().to_str().unwrap().to_owned()) + // ); + // } + // } + let view = self.view.clone(); + tokio::task::spawn_blocking(move || { + view.os() + .zip_dir_2_data(&tmpappdir, zip::CompressionMethod::Deflated) + }) + .await + .unwrap() + }?; + // remove temp dir // let _ = fs::remove_dir_all(&tmpappdir).map_err(|e| WSError::from(WsIoErr::Io(e)))?; - // mv temp app to formal app dir - let rel_app_dir = format!("{}/{}", APPS_REL_DIR, appname); - let formal_app_dir = self.view.os().file_path.join(rel_app_dir); - let _ = fs::rename(&tmpappdir, &formal_app_dir).map_err(|e| WSError::from(WsDataError::FileOpenErr { path: (), err: () })); - // 3. broadcast meta and appfile let write_data_id = format!("{}{}", DATA_UID_PREFIX_APP_META, appname); let write_datas = vec![ - DataItemArgWrapper::from_bytes(bincode::serialize(&appmeta).unwrap()), - DataItemArgWrapper::from_file(rel_app_dir), + proto::DataItem { + data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes( + bincode::serialize(&appmeta).unwrap(), + )), + }, + proto::DataItem { + data_item_dispatch: Some(proto::data_item::DataItemDispatch::File( + proto::FileData { + file_name_opt: format!("apps/{}", appname), + is_dir_opt: true, + file_content: zipfiledata, + }, + )), + }, ]; tracing::debug!( "app data size: {:?}", diff --git a/src/main/src/general/data/m_data_general/dataitem.rs b/src/main/src/general/data/m_data_general/dataitem.rs index 757c165..fbec9a8 100644 --- a/src/main/src/general/data/m_data_general/dataitem.rs +++ b/src/main/src/general/data/m_data_general/dataitem.rs @@ -1,13 +1,12 @@ use crate::general::data::m_data_general::UniqueId; use crate::general::network::proto; use crate::general::data::m_data_general::{DataItemIdx, DataSplitIdx, GetOrDelDataArgType}; -use crate::general::network::proto_ext::{NewPartialFileDataArg, ProtoExtDataItem}; +use crate::general::network::proto_ext::ProtoExtDataItem; use crate::result::{WSError, WSResult, WsDataError}; use futures::stream::{FuturesUnordered, StreamExt}; use std::collections::btree_set; use std::ops::Range; use std::path::PathBuf; -use std::str::FromStr; use std::sync::Arc; use tokio::sync::mpsc; use tokio::sync::broadcast; @@ -198,12 +197,6 @@ pub fn calculate_splits(total_size: usize) -> Vec> { /// 支持写入文件或内存两种模式 #[derive(Debug, Clone)] pub enum WriteSplitDataType { - Dir{ - /// 接受的压缩文件形式 - zip_file: PathBuf, - /// 解压后的文件路径 - path: PathBuf, - }, /// 文件写入模式 File { /// 目标文件路径 @@ -694,184 +687,3 @@ impl DataItemExt for DataItemSource { } } } - -#[derive(Debug)] -enum DataItemZip { - /// 未初始化状态 - Uninitialized, - /// 不需要压缩(非目录) - NoNeed, - /// 已压缩的目录 - Directory { - zipped_file: PathBuf, - } -} - -pub struct DataItemArgWrapper { - pub dataitem: proto::DataItem, - /// 目录压缩状态 - tmpzipfile: DataItemZip, -} - -impl DataItemArgWrapper { - pub fn from_file(filepath: PathBuf) -> Self { - Self { - dataitem: proto::DataItem{ - data_item_dispatch: Some(proto::data_item::DataItemDispatch::File(proto::FileData{ - is_dir_opt: filepath.is_dir(), - file_name_opt: filepath.to_str().unwrap().to_string(), - file_content: vec![], - })), - }, - tmpzipfile: DataItemZip::Uninitialized, - } - } - - pub fn from_bytes(bytes: Vec) -> Self { - Self { - dataitem: proto::DataItem::new_raw_bytes(bytes), - tmpzipfile: DataItemZip::Uninitialized, - } - } - - pub async fn get_tmpzipfile(&mut self) -> WSResult> { - match &self.tmpzipfile { - DataItemZip::Uninitialized => { - self.init_tmpzipfile().await?; - } - _ => {} - } - - match &self.tmpzipfile { - DataItemZip::Directory { zipped_file } => Ok(Some(zipped_file)), - DataItemZip::NoNeed => Ok(None), - DataItemZip::Uninitialized => unreachable!(), - } - } - - async fn init_tmpzipfile(&mut self) -> WSResult<()> { - // 确保只初始化一次 - if !matches!(self.tmpzipfile, DataItemZip::Uninitialized) { - return Ok(()); - } - - let filedata = match self.dataitem.data_item_dispatch.as_ref().unwrap() { - proto::data_item::DataItemDispatch::File(file_data) => file_data, - proto::data_item::DataItemDispatch::RawBytes(_) => { - self.tmpzipfile = DataItemZip::NoNeed; - return Ok(()); - }, - }; - - // 检查目录元数据 - let metadata = tokio::fs::metadata(&filedata.file_name_opt).await.map_err(|e| { - WSError::WsDataError(WsDataError::FileMetadataErr { - path: PathBuf::from(&filedata.file_name_opt), - err: e, - }) - })?; - - if metadata.is_dir() { - let tmp_file = tempfile::NamedTempFile::new().map_err(|e| { - WSError::WsDataError(WsDataError::FileMetadataErr { - path: PathBuf::from(&filedata.file_name_opt), - err: e, - }) - })?; - let tmp_path = tmp_file.path().to_path_buf(); - - // 压缩目录到临时文件 - crate::util::zip::zip_dir_2_file( - &filedata.file_name_opt, - zip::CompressionMethod::Stored, - tmp_file.into_file(), - ).await?; - - self.tmpzipfile = DataItemZip::Directory { - zipped_file: tmp_path, - }; - } else { - self.tmpzipfile = DataItemZip::NoNeed; - } - - Ok(()) - } - - pub async fn transfer_size(&mut self) -> WSResult { - match &self.dataitem.data_item_dispatch { - Some(proto::data_item::DataItemDispatch::RawBytes(bytes)) => return Ok(bytes.len()), - Some(proto::data_item::DataItemDispatch::File(_)) => { - // handle in following - } - None => return Ok(0), - } - - if let Some(tmp_path) = self.get_tmpzipfile().await? { - let metadata = tokio::fs::metadata(tmp_path).await?; - Ok(metadata.len() as usize) - } else { - let file_data=match &self.dataitem.data_item_dispatch { - Some(proto::data_item::DataItemDispatch::File(file_data)) => { - // handle in following - file_data - } - Some(proto::data_item::DataItemDispatch::RawBytes(_)) | None=>{panic!("these case should be handled in previous match")} - }; - let metadata = tokio::fs::metadata(&file_data.file_name_opt).await?; - Ok(metadata.len() as usize) - } - } - - pub async fn clone_split_range(&mut self, range: Range) -> WSResult { - match &self.dataitem.data_item_dispatch { - Some(proto::data_item::DataItemDispatch::RawBytes(bytes)) => { - return Ok(proto::DataItem::new_partial_raw_bytes(bytes.to_owned(), range).map_err(|err|{ - tracing::error!("Failed to clone split range: {}", err); - err - })?) - } - Some(proto::data_item::DataItemDispatch::File(_)) => { - - // handle in following - } - None => panic!("proto dataitem must be Some"), - } - - fn get_filedata(dataitem:&DataItemArgWrapper)->&proto::FileData{ - match &dataitem.dataitem.data_item_dispatch { - Some(proto::data_item::DataItemDispatch::File(file_data)) => file_data, - Some(proto::data_item::DataItemDispatch::RawBytes(_)) | None=>{panic!("these case should be handled in previous match")} - } - } - - // if zipped, use zipped file - // else use file_data.file_name_opt - if let Some(tmp_path) = self.get_tmpzipfile().await?.cloned() { - let file_data=get_filedata(self); - Ok(proto::DataItem::new_partial_file_data(NewPartialFileDataArg::FilePath { path: PathBuf::from_str(&file_data.file_name_opt).map_err(|err|{ - let err=WsDataError::FilePathParseErr { - path: file_data.file_name_opt.clone(), - err: err, - }; - tracing::error!("Failed to clone split range: {:?}", err); - err - })? , zip_path: Some(tmp_path.clone()) }, range).await.map_err(|err|{ - tracing::error!("Failed to clone split range: {}", err); - err - })?) - } else { - let file_data=get_filedata(self); - Ok(proto::DataItem::new_partial_file_data(NewPartialFileDataArg::FilePath { path: PathBuf::from_str(&file_data.file_name_opt).map_err(|err|{ - let err=WsDataError::FilePathParseErr { - path: file_data.file_name_opt.clone(), - err: err, - }; - tracing::error!("Failed to clone split range: {:?}", err); - err - })? , zip_path: None }, range).await.map_err(|err|{ - tracing::error!("Failed to clone split range: {}", err); - err - })?) - } - } -} diff --git a/src/main/src/general/data/m_data_general/mod.rs b/src/main/src/general/data/m_data_general/mod.rs index b16fe88..51779cb 100644 --- a/src/main/src/general/data/m_data_general/mod.rs +++ b/src/main/src/general/data/m_data_general/mod.rs @@ -7,7 +7,6 @@ pub mod batch_handler; use crate::general::data::m_data_general::dataitem::{calculate_splits, WantIdxIter, WriteSplitDataTaskGroup, DataItemSource}; use crate::general::data::m_data_general::batch_handler::{BatchReceiveState, SharedWithBatchHandler}; -use dataitem::DataItemArgWrapper; use tokio::io::{AsyncSeekExt, AsyncReadExt}; use crate::general::{ @@ -159,7 +158,7 @@ impl DataGeneral { let data_size = data.size().await?; let splits = calculate_splits(data_size); - tracing::debug!("batch_transfer total size({}), splits: {:?}, to node {}", data_size, splits, target_node); + tracing::debug!("batch_transfer total size({}), splits: {:?}", data_size, splits); for (block_idx, split_range) in splits.iter().enumerate() { let block_data = match data.as_ref() { @@ -489,7 +488,7 @@ impl DataGeneral { pub async fn write_data( &self, unique_id: impl Into>, - datas: Vec, + datas: Vec, context_openode_opetype_operole: Option<( NodeID, proto::DataOpeType, @@ -499,15 +498,7 @@ impl DataGeneral { let unique_id = unique_id.into(); let log_tag = format!("[write_data({})]", String::from_utf8_lossy(&unique_id)); tracing::debug!("{} start write data", log_tag); - - let mut data_transfer_sizes=Vec::new(); - data_transfer_sizes.reserve(datas.len()); - for d in datas.iter_mut(){ - data_transfer_sizes.push(d.transfer_size().await.map_err(|err|{ - tracing::error!("{} transfer size error: {}", log_tag, err); - err - })?); - } + // 获取数据调度计划 let version_schedule_resp = self .rpc_call_data_version_schedule @@ -518,7 +509,10 @@ impl DataGeneral { unique_id: unique_id.clone(), context: context_openode_opetype_operole.map(|(node, ope, role)| { proto::DataScheduleContext { - each_data_sz_bytes: data_transfer_sizes, + each_data_sz_bytes: datas + .iter() + .map(|d| d.data_sz_bytes() as u32) + .collect(), ope_node: node as i64, ope_type: ope as i32, ope_role: Some(role), @@ -537,7 +531,7 @@ impl DataGeneral { // 处理每个数据项 let mut iter = WantIdxIter::new(&GetOrDelDataArgType::All, datas.len() as u8); while let Some(data_item_idx) = iter.next() { - let data_item: &DataItemArgWrapper = &datas[data_item_idx as usize]; + let data_item = &datas[data_item_idx as usize]; let split = &splits[data_item_idx as usize]; let mut primary_tasks = Vec::new(); @@ -549,7 +543,10 @@ impl DataGeneral { log_tag, split_idx + 1, split.splits.len(), split_info.node_id, split_info.data_offset, split_info.data_size); let split_info = split_info.clone(); let unique_id_clone = unique_id.clone(); - let data_item_primary = data_item.clone_split_range(split_info.data_offset..split_info.data_offset+split_info.data_size) + let data_item_primary = data_item.clone_split_range( + split_info.data_offset as usize + ..(split_info.data_offset + split_info.data_size) as usize + ); let view = self.view.clone(); let version_copy = version; let task = tokio::spawn(async move { @@ -612,9 +609,7 @@ impl DataGeneral { } let primary_results = futures::future::join_all(primary_tasks).await; - tracing::debug!("{} primary_results: {:?}", log_tag, primary_results); let cache_results = futures::future::join_all(cache_tasks).await; - tracing::debug!("{} cache_results: {:?}", log_tag, cache_results); if primary_results.iter().any(|res| res.is_err()) || cache_results.iter().any(|res| res.is_err()) { let error_msg = format!("主节点或缓存节点数据写入失败"); @@ -753,7 +748,7 @@ impl DataGeneral { } // Step3: write data - tracing::debug!("start to write partial data"); + tracing::debug!("start to write data"); let lock = kv_store_engine.with_rwlock(&KeyTypeDataSetMeta(&req.unique_id).make_key()); let guard = KeyLockGuard::Write(lock.write()); let check_meta = kv_store_engine.get( @@ -805,7 +800,7 @@ impl DataGeneral { } kv_store_engine.flush(); drop(guard); - tracing::debug!("data partial is written"); + tracing::debug!("data is written"); responsor .send_resp(WriteOneDataResponse { remote_version: req.version, @@ -1027,7 +1022,6 @@ impl DataGeneral { responsor: RPCResponsor, req: proto::BatchDataRequest, ) -> WSResult<()> { - tracing::debug!("rpc_handle_batch_data with batchid({:?})", req.request_id.clone().unwrap()); let batch_receive_states = self.batch_receive_states.clone(); // 预先克隆闭包外需要的字段 let block_index = req.block_index; @@ -1051,9 +1045,6 @@ impl DataGeneral { } }; - // 再process之前订阅,避免通知先于订阅 - let mut waiter = handle.get_all_tasks_waiter(); - // 启动process_tasks let _ = tokio::spawn(async move { match group.process_tasks().await { @@ -1070,16 +1061,12 @@ impl DataGeneral { // response task let _=tokio::spawn(async move { - tracing::debug!("rpc_handle_batch_data response task started"); // 等待所有任务完成 - if let Err(e) = waiter.wait().await { + if let Err(e) = state_clone.handle.wait_all_tasks().await { tracing::error!("Failed to wait for tasks: {}", e); - todo!("use responsor to send error response"); return; } - tracing::debug!("rpc_handle_batch_data response task wait all tasks done"); - // 发送最终响应 if let Some(final_responsor) = state_clone.shared.get_final_responsor().await { if let Err(e) = final_responsor.send_resp(proto::BatchDataResponse { @@ -1106,8 +1093,6 @@ impl DataGeneral { Ok(state) => state, }; - tracing::debug!("rpc_handle_batch_data ready with write_split_data_task_group"); - // 2. 提交分片数据 let data_item = proto::DataItem { data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes(data)), diff --git a/src/main/src/general/m_os/mod.rs b/src/main/src/general/m_os/mod.rs index 90ad580..9e86d35 100644 --- a/src/main/src/general/m_os/mod.rs +++ b/src/main/src/general/m_os/mod.rs @@ -37,9 +37,6 @@ logical_module_view_impl!(OperatingSystemView, p2p, P2PModule); logical_module_view_impl!(OperatingSystemView, os, OperatingSystem); logical_module_view_impl!(OperatingSystemView, appmeta_manager, AppMetaManager); - -pub const APPS_REL_DIR: &str = "apps"; - #[derive(LogicalModule)] pub struct OperatingSystem { view: OperatingSystemView, diff --git a/src/main/src/general/m_os/zip.rs b/src/main/src/general/m_os/zip.rs index 4392411..ee6591d 100644 --- a/src/main/src/general/m_os/zip.rs +++ b/src/main/src/general/m_os/zip.rs @@ -10,5 +10,118 @@ use walkdir::WalkDir; use zip::{result::ZipError, write::FileOptions}; impl OperatingSystem { - + pub fn unzip_data_2_path(&self, p: impl AsRef, data: Vec) -> WSResult<()> { + // remove old dir + let p = p.as_ref(); + if p.exists() { + fs::remove_dir_all(p).unwrap(); + } + // create new dir + fs::create_dir_all(p).unwrap(); + // unzip + match zip_extract::extract(Cursor::new(data), &p, false) { + Ok(_) => (), + Err(e) => { + return Err(WsIoErr::Zip(e).into()); + } + } + + Ok(()) + } + + // pub fn zip_dir_2_data(&self, p: impl AsRef) -> WSResult> { + // let p = p.as_ref(); + // let mut data = Vec::new(); + // let writer = Cursor::new(&mut data); + // let mut list = self.list_dir_with_prefix(p, p.to_str().unwrap())?; + // self.zip_dir( + // &mut list.iter_mut(), + // p.to_str().unwrap(), + // ZipWriter::new(&data), + // zip::CompressionMethod::Stored, + // ) + // .map_err(|e| WsIoErr::Zip2(e))?; + // Ok(data) + // } + + fn zip_dir( + it: &mut dyn Iterator, + prefix: &Path, + writer: T, + method: zip::CompressionMethod, + ) -> WSResult<()> + where + T: Write + Seek, + { + let mut zip = zip::ZipWriter::new(writer); + // let options = FileOptions::default() + // .compression_method(method) + // .unix_permissions(0o755); + + let prefix = Path::new(prefix); + let mut buffer = Vec::new(); + for entry in it { + let path = entry.path(); + let name = path.strip_prefix(prefix).unwrap(); + let path_as_string = name.to_str().unwrap().to_owned(); + + let options = FileOptions::default() + .compression_method(method) + .unix_permissions( + entry + .metadata() + .map_err(|e| WSError::from(e))? + .permissions() + .mode(), + ); + + // Write file or directory explicitly + // Some unzip tools unzip files with directory paths correctly, some do not! + if path.is_file() { + tracing::debug!("adding file {path:?} as {name:?} ..."); + zip.start_file(path_as_string, options) + .map_err(|e| WSError::from(WsIoErr::Zip2(e)))?; + let mut f = File::open(path).map_err(|e| WSError::from(WsIoErr::Io(e)))?; + + let _ = f + .read_to_end(&mut buffer) + .map_err(|e| WSError::from(WsIoErr::Io(e)))?; + zip.write_all(&buffer) + .map_err(|e| WSError::from(WsIoErr::Io(e)))?; + buffer.clear(); + } else if !name.as_os_str().is_empty() { + // Only if not root! Avoids path spec / warning + // and mapname conversion failed error on unzip + tracing::debug!("adding dir {path_as_string:?} as {name:?} ..."); + zip.add_directory(path_as_string, options) + .map_err(|e| WSError::from(WsIoErr::Zip2(e)))?; + } + } + let _ = zip.finish().map_err(|e| WSError::from(WsIoErr::Zip2(e)))?; + Ok(()) + } + + pub fn zip_dir_2_data( + &self, + src_dir: &Path, + method: zip::CompressionMethod, + ) -> WSResult> { + if !Path::new(src_dir).is_dir() { + return Err(WsIoErr::Zip2(ZipError::FileNotFound).into()); + } + + let mut data = Vec::new(); + + let walkdir = WalkDir::new(src_dir); + let it = walkdir.into_iter(); + + Self::zip_dir( + &mut it.filter_map(|e| e.ok()), + src_dir, + io::Cursor::new(&mut data), + method, + )?; + + Ok(data) + } } diff --git a/src/main/src/general/network/proto_ext.rs b/src/main/src/general/network/proto_ext.rs index 0dc42c4..1fbbee4 100644 --- a/src/main/src/general/network/proto_ext.rs +++ b/src/main/src/general/network/proto_ext.rs @@ -10,129 +10,56 @@ use super::proto::{self, kv::KvResponse, FileData}; use std::{ops::Range, path::Path}; use crate::result::{WSResult, WSError, WsDataError}; -use std::path::PathBuf; -use std::fs::File; -use tokio; -use tokio::io::{AsyncSeekExt, AsyncReadExt}; -pub enum NewPartialFileDataArg { - FilePath{path: PathBuf, zip_path:Option}, - FileContent{path:PathBuf,content:Vec}, - File{path:PathBuf,file:File}, -} - -pub trait ProtoExtDataItem: Sized { +pub trait ProtoExtDataItem { fn data_sz_bytes(&self) -> usize; fn clone_split_range(&self, range: Range) -> Self; fn to_string(&self) -> String; + fn new_raw_bytes(rawbytes: impl Into>) -> Self; fn as_raw_bytes<'a>(&'a self) -> Option<&'a [u8]>; + fn new_file_data(filepath: impl AsRef, is_dir: bool) -> Self; fn as_file_data(&self) -> Option<&proto::FileData>; fn to_data_item_source(&self) -> DataItemSource; - async fn new_partial_file_data(arg: NewPartialFileDataArg, range: Range) -> WSResult; - fn new_partial_raw_bytes(rawbytes: impl Into>, range: Range) -> WSResult; } impl ProtoExtDataItem for proto::DataItem { - fn new_partial_raw_bytes(rawbytes: impl Into>, range: Range) -> WSResult { - let bytes = rawbytes.into(); - if range.end > bytes.len() { - return Err(WSError::WsDataError(WsDataError::SizeMismatch { - expected: range.end, - actual: bytes.len(), - })); - } - - Ok(Self { + fn new_raw_bytes(rawbytes: impl Into>) -> Self { + proto::DataItem { data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes( - bytes[range].to_vec() + rawbytes.into(), )), - }) - } - - async fn new_partial_file_data(arg: NewPartialFileDataArg, range: Range) -> WSResult { - let mut file_data = proto::FileData::default(); - - // 从文件读取指定范围的数据 - async fn read_file_range(path: &Path, file: tokio::fs::File, range: Range) -> WSResult> { - let mut file = tokio::io::BufReader::new(file); - file.seek(std::io::SeekFrom::Start(range.start as u64)) - .await - .map_err(|e| WSError::WsDataError(WsDataError::FileSeekErr { - path: path.to_path_buf(), - err: e, - }))?; - - let mut buffer = vec![0; range.end - range.start]; - file.read_exact(&mut buffer) - .await - .map_err(|e| WSError::WsDataError(WsDataError::FileReadErr { - path: path.to_path_buf(), - err: e, - }))?; - - Ok(buffer) } - - match arg { - NewPartialFileDataArg::FilePath { path, zip_path } => { - file_data.file_name_opt = path.to_string_lossy().to_string(); - file_data.is_dir_opt = path.is_dir(); - - // 如果是目录,使用zip文件 - let actual_path = if path.is_dir() { - zip_path.as_ref().ok_or_else(|| WSError::WsDataError( - WsDataError::BatchTransferFailed { - node: 0, - batch: 0, - reason: "Directory must have zip_path".to_string(), - } - ))? - } else { - &path - }; - - let file = tokio::fs::File::open(actual_path) - .await - .map_err(|e| WSError::WsDataError(WsDataError::FileOpenErr { - path: actual_path.to_path_buf(), - err: e, - }))?; - - file_data.file_content = read_file_range(actual_path, file, range).await?; - }, - NewPartialFileDataArg::FileContent { path, content } => { - if range.end > content.len() { - return Err(WSError::WsDataError(WsDataError::SizeMismatch { - expected: range.end, - actual: content.len(), - })); - } - file_data.file_name_opt = path.to_string_lossy().to_string(); - file_data.is_dir_opt = path.is_dir(); - file_data.file_content = content[range].to_vec(); - }, - NewPartialFileDataArg::File { path, file } => { - file_data.file_name_opt = path.to_string_lossy().to_string(); - file_data.is_dir_opt = path.is_dir(); - - let file = tokio::fs::File::from_std(file); - file_data.file_content = read_file_range(&path, file, range).await?; - } + } + fn new_file_data(filepath: impl AsRef, is_dir: bool) -> Self { + let file_content = std::fs::read(filepath.as_ref()).unwrap(); + Self { + data_item_dispatch: Some(proto::data_item::DataItemDispatch::File(FileData { + file_name_opt: filepath.as_ref().to_string_lossy().to_string(), + is_dir_opt: is_dir, + file_content, + })), } - - Ok(Self { - data_item_dispatch: Some(proto::data_item::DataItemDispatch::File(file_data)), - }) } fn data_sz_bytes(&self) -> usize { match self.data_item_dispatch.as_ref().unwrap() { proto::data_item::DataItemDispatch::File(file_data) => file_data.file_content.len(), proto::data_item::DataItemDispatch::RawBytes(vec) => vec.len(), + // proto::write_one_data_request::DataItem::Data(d) => d.data.len(), + // proto::write_one_data_request::DataItem::DataVersion(d) => d.data.len(), } } fn clone_split_range(&self, range: Range) -> Self { + // let data_length = match &self.data_item_dispatch.as_ref().unwrap() { + // proto::data_item::DataItemDispatch::File(file_data) => file_data.file_content.len(), + // proto::data_item::DataItemDispatch::RawBytes(vec) => vec.len(), + // }; + + // if range.start >= data_length || range.end > data_length { + // panic!("range out of bounds: {:?}", range); + // } + Self { data_item_dispatch: Some(match &self.data_item_dispatch.as_ref().unwrap() { proto::data_item::DataItemDispatch::File(file_data) => { @@ -337,6 +264,8 @@ impl DataItemExt for proto::DataItem { ret } proto::data_item::DataItemDispatch::RawBytes(bytes) => { + // tracing::debug!("writing data part{} bytes", idx); + // VecOrSlice::from(&bytes) let mut ret = vec![1]; ret.extend_from_slice(bytes); ret diff --git a/src/main/src/general/network/proto_src/data.proto b/src/main/src/general/network/proto_src/data.proto index 7296f20..fdd6fee 100644 --- a/src/main/src/general/network/proto_src/data.proto +++ b/src/main/src/general/network/proto_src/data.proto @@ -122,17 +122,12 @@ message DataMetaGetResponse{ // DataItem data = 2; // } - -// 使用proto ext初始化 message FileData { string file_name_opt = 1; // store in the first node - // 初始化时为空,只需要指定file_name_opt bool is_dir_opt =2; - // file_content should be empty except when send the data split bytes file_content = 3; } -// 使用proto ext初始化 message DataItem { oneof data_item_dispatch { FileData file = 1; @@ -179,7 +174,6 @@ message GetOneDataResponse{ enum BatchDataBlockType { MEMORY = 0; // 内存数据块 FILE = 1; // 文件数据块 - DIR=2; // 目录数据块 } message BatchRequestId { diff --git a/src/main/src/main.rs b/src/main/src/main.rs index 82e6707..8a9b56a 100644 --- a/src/main/src/main.rs +++ b/src/main/src/main.rs @@ -16,7 +16,7 @@ use clap::Parser; use cmd_arg::CmdArgs; -use sys::{LogicalModulesRef, Sys}; +use sys::Sys; use tracing::Level; use tracing_subscriber::{ prelude::__tracing_subscriber_SubscriberExt, util::SubscriberInitExt, Layer, @@ -41,10 +41,7 @@ async fn main() { let config = config::read_config(args.this_id, args.files_dir); tracing::info!("config: {:?}", config); // dist_kv_raft::tikvraft_proxy::start(); - let mut sys=Sys::new(config); - let modules_ref=sys.new_logical_modules_ref(); - modules_global_bridge::modules_ref_scope(modules_ref, async move{sys.wait_for_end().await;}) - + Sys::new(config).wait_for_end().await; } pub fn start_tracing() { diff --git a/src/main/src/modules_global_bridge/mod.rs b/src/main/src/modules_global_bridge/mod.rs index c3de453..c7618fa 100644 --- a/src/main/src/modules_global_bridge/mod.rs +++ b/src/main/src/modules_global_bridge/mod.rs @@ -1,6 +1,3 @@ -use std::future::Future; - -use crate::result::WSResult; use crate::sys::LogicalModules; use crate::sys::LogicalModulesRef; @@ -10,22 +7,6 @@ lazy_static::lazy_static! { static ref MODULES: Option=None; } -tokio::task_local! { - static MODULES_REF: LogicalModulesRef; -} - -pub fn try_get_modules_ref() -> WSResult { - let mut res=Err(WSError::WsRuntimeErr(WsRuntimeErr::ModulesRefOutofLifetime)); - MODULES_REF.try_with(|m|{ - res=Ok(m.clone()); - }); - res -} - -pub fn modules_ref_scope(modules_ref: LogicalModulesRef,future: impl Future) { - MODULES_REF.scope(modules_ref,future); -} - fn modules() -> &'static LogicalModules { #[cfg(feature = "unsafe-log")] tracing::debug!("modules begin"); diff --git a/src/main/src/result.rs b/src/main/src/result.rs index 2d6ac18..fe823c3 100644 --- a/src/main/src/result.rs +++ b/src/main/src/result.rs @@ -1,4 +1,4 @@ -use std::{convert::Infallible, fmt::Debug, os::unix::net::SocketAddr, path::PathBuf, sync::Arc}; +use std::{fmt::Debug, os::unix::net::SocketAddr, sync::Arc, path::PathBuf}; use async_raft::{InitializeError, RaftError}; use camelpaste::paste; @@ -64,7 +64,6 @@ pub enum WsIoErr { #[derive(Debug)] pub enum WsRuntimeErr { TokioJoin { err: JoinError, context: String }, - ModulesRefOutofLifetime, } impl From for WSError { @@ -228,31 +227,6 @@ pub enum WsDataError { actual: proto::data_item::DataItemDispatch, context: String, }, - FileMetadataErr { - path: PathBuf, - err: std::io::Error, - }, - FileSeekErr { - path: PathBuf, - err: std::io::Error, - }, - FileReadErr { - path: PathBuf, - err: std::io::Error, - }, - FileOpenErr { - path: PathBuf, - err: std::io::Error, - }, - FileRenameErr { - from: PathBuf, - to: PathBuf, - err: std::io::Error, - }, - FilePathParseErr { - path: String, - err: Infallible, - }, SplitRecoverMissing { unique_id: Vec, idx: DataItemIdx, diff --git a/src/main/src/sys.rs b/src/main/src/sys.rs index 40937c3..d10de92 100644 --- a/src/main/src/sys.rs +++ b/src/main/src/sys.rs @@ -37,21 +37,12 @@ impl Drop for Sys { impl Sys { pub fn new(config: NodesConfig) -> Sys { - - // chdir to file_path - std::env::set_current_dir(&config.file_dir).unwrap(); - tracing::info!("Running at dir: {:?}", std::env::current_dir()); - Sys { logical_modules: LogicalModules::new(config), sub_tasks: Vec::new().into(), } } - pub fn new_logical_modules_ref(&self) -> LogicalModulesRef { - LogicalModulesRef::new(self.logical_modules.clone()) - } - pub async fn wait_for_end(&mut self) { if let Err(err) = (*self.logical_modules).as_ref().unwrap().start(self).await { panic!("start logical nodes error: {:?}", err); diff --git a/src/main/src/util/mod.rs b/src/main/src/util/mod.rs index 221b76a..702b9f7 100644 --- a/src/main/src/util/mod.rs +++ b/src/main/src/util/mod.rs @@ -1,5 +1,4 @@ pub mod container; -pub mod zip; use std::{ fmt::Debug, diff --git a/src/main/src/util/zip.rs b/src/main/src/util/zip.rs deleted file mode 100644 index 1cdfdc4..0000000 --- a/src/main/src/util/zip.rs +++ /dev/null @@ -1,275 +0,0 @@ -use std::path::Path; -use std::io::{self, Write, Seek, Cursor}; -use std::fs; -use walkdir::WalkDir; -use zip::{write::FileOptions, ZipWriter, ZipError}; -use crate::result::{WSResult, WSError, WsIoErr}; - -pub fn unzip_data_2_path(p: impl AsRef, data: Vec) -> WSResult<()> { - // remove old dir - let p = p.as_ref(); - if p.exists() { - fs::remove_dir_all(p).unwrap(); - } - // create new dir - fs::create_dir_all(p).unwrap(); - // unzip - match zip_extract::extract(Cursor::new(data), &p, false) { - Ok(_) => (), - Err(e) => { - return Err(WsIoErr::Zip(e).into()); - } - } - - Ok(()) -} - -// pub fn zip_dir_2_data(&self, p: impl AsRef) -> WSResult> { -// let p = p.as_ref(); -// let mut data = Vec::new(); -// let writer = Cursor::new(&mut data); -// let mut list = self.list_dir_with_prefix(p, p.to_str().unwrap())?; -// self.zip_dir( -// &mut list.iter_mut(), -// p.to_str().unwrap(), -// ZipWriter::new(&data), -// zip::CompressionMethod::Stored, -// ) -// .map_err(|e| WsIoErr::Zip2(e))?; -// Ok(data) -// } - -fn zip_dir( - it: &mut dyn Iterator, - prefix: &Path, - writer: T, - method: zip::CompressionMethod, -) -> WSResult<()> -where - T: Write + Seek, -{ - let mut zip = ZipWriter::new(writer); - let prefix = Path::new(prefix); - let mut buffer = Vec::new(); - for entry in it { - let path = entry.path(); - let name = path.strip_prefix(prefix).unwrap(); - let path_as_string = name.to_str().unwrap().to_owned(); - - let options = FileOptions::default() - .compression_method(method) - .unix_permissions( - entry - .metadata() - .map_err(|e| WSError::from(e))? - .permissions() - .mode(), - ); - - // Write file or directory explicitly - // Some unzip tools unzip files with directory paths correctly, some do not! - if path.is_file() { - tracing::debug!("adding file {path:?} as {name:?} ..."); - zip.start_file(path_as_string, options) - .map_err(|e| WSError::from(WsIoErr::Zip2(e)))?; - let mut f = fs::File::open(path).map_err(|e| WSError::from(WsIoErr::Io(e)))?; - - let _ = f - .read_to_end(&mut buffer) - .map_err(|e| WSError::from(WsIoErr::Io(e)))?; - zip.write_all(&buffer) - .map_err(|e| WSError::from(WsIoErr::Io(e)))?; - buffer.clear(); - } else if !name.as_os_str().is_empty() { - // Only if not root! Avoids path spec / warning - // and mapname conversion failed error on unzip - tracing::debug!("adding dir {path_as_string:?} as {name:?} ..."); - zip.add_directory(path_as_string, options) - .map_err(|e| WSError::from(WsIoErr::Zip2(e)))?; - } - } - let _ = zip.finish().map_err(|e| WSError::from(WsIoErr::Zip2(e)))?; - Ok(()) -} - -pub fn zip_dir_2_mem( - src_dir: &Path, - method: zip::CompressionMethod, -) -> WSResult> { - if !src_dir.is_dir() { - return Err(WsIoErr::Zip2(ZipError::FileNotFound).into()); - } - - let mut data = Vec::new(); - let walkdir = WalkDir::new(src_dir); - let it = walkdir.into_iter(); - - zip_dir( - &mut it.filter_map(|e| e.ok()), - src_dir, - io::Cursor::new(&mut data), - method, - )?; - - Ok(data) -} - -pub async fn zip_dir_2_file( - src_dir: impl AsRef, - method: zip::CompressionMethod, - mut dst_file: std::fs::File, -) -> WSResult<()> { - if !src_dir.is_dir() { - return Err(WsIoErr::Zip2(ZipError::FileNotFound).into()); - } - - let walkdir = WalkDir::new(src_dir); - let it = walkdir.into_iter(); - - // 使用阻塞线程执行 zip 操作,因为 zip 库不支持异步 IO - tokio::task::spawn_blocking(move || { - zip_dir( - &mut it.filter_map(|e| e.ok()), - src_dir, - &mut dst_file, - method, - ) - }).await.map_err(|e| WsIoErr::Io(std::io::Error::new( - std::io::ErrorKind::Other, - format!("Failed to execute zip task: {}", e) - )))??; - - Ok(()) -} - -#[cfg(test)] -mod tests { - use super::*; - use std::io::Write; - use tempfile::{tempdir, NamedTempFile}; - - #[test] - fn test_zip_and_unzip_single_file() -> WSResult<()> { - // 创建临时目录 - let src_dir = tempdir()?; - let src_path = src_dir.path(); - - // 创建测试文件 - let test_file_path = src_path.join("test.txt"); - let test_content = b"Hello, World!"; - fs::write(&test_file_path, test_content)?; - - // 创建临时输出文件 - let mut output_file = NamedTempFile::new()?; - - // 执行压缩 - tokio::runtime::Runtime::new()?.block_on(async { - zip_dir_2_file( - src_path, - zip::CompressionMethod::Stored, - output_file.as_file_mut(), - ).await - })?; - - // 读取压缩后的数据 - let zip_data = fs::read(output_file.path())?; - - // 创建临时解压目录 - let extract_dir = tempdir()?; - - // 执行解压 - unzip_data_2_path(extract_dir.path(), zip_data)?; - - // 验证解压后的文件内容 - let extracted_content = fs::read(extract_dir.path().join("test.txt"))?; - assert_eq!(extracted_content, test_content); - - Ok(()) - } - - #[test] - fn test_zip_and_unzip_directory() -> WSResult<()> { - // 创建临时目录结构 - let src_dir = tempdir()?; - let src_path = src_dir.path(); - - // 创建子目录和文件 - let sub_dir = src_path.join("subdir"); - fs::create_dir(&sub_dir)?; - - let test_files = vec![ - ("test1.txt", b"Content 1"), - ("subdir/test2.txt", b"Content 2"), - ]; - - for (path, content) in test_files.iter() { - let file_path = src_path.join(path); - if let Some(parent) = file_path.parent() { - fs::create_dir_all(parent)?; - } - fs::write(&file_path, content)?; - } - - // 创建临时输出文件 - let mut output_file = NamedTempFile::new()?; - - // 执行压缩 - tokio::runtime::Runtime::new()?.block_on(async { - zip_dir_2_file( - src_path, - zip::CompressionMethod::Stored, - output_file.as_file_mut(), - ).await - })?; - - // 读取压缩后的数据 - let zip_data = fs::read(output_file.path())?; - - // 创建临时解压目录 - let extract_dir = tempdir()?; - - // 执行解压 - unzip_data_2_path(extract_dir.path(), zip_data)?; - - // 验证解压后的文件内容 - for (path, content) in test_files.iter() { - let extracted_content = fs::read(extract_dir.path().join(path))?; - assert_eq!(&extracted_content, content); - } - - Ok(()) - } - - #[test] - fn test_zip_empty_directory() -> WSResult<()> { - // 创建空临时目录 - let src_dir = tempdir()?; - - // 创建临时输出文件 - let mut output_file = NamedTempFile::new()?; - - // 执行压缩 - tokio::runtime::Runtime::new()?.block_on(async { - zip_dir_2_file( - src_dir.path(), - zip::CompressionMethod::Stored, - output_file.as_file_mut(), - ).await - })?; - - // 读取压缩后的数据 - let zip_data = fs::read(output_file.path())?; - - // 创建临时解压目录 - let extract_dir = tempdir()?; - - // 执行解压 - unzip_data_2_path(extract_dir.path(), zip_data)?; - - // 验证目录是否为空 - let entries = fs::read_dir(extract_dir.path())?; - assert_eq!(entries.count(), 0); - - Ok(()) - } -} From c39f0b3d6e318e7ffc9f326751deb71cd1bb4200 Mon Sep 17 00:00:00 2001 From: pa <1020401660@qq.com> Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 04/26] Revert "pass kv client test" This reverts commit 24acd7a251a21ee2a7ae901aef7ee4f8cd5f131d. --- design.canvas | 124 ++++---- scripts/mount_s3fs.sh | 3 - scripts/sync_md_files.py | 274 +++--------------- scripts/test_design_json_tool.py | 240 --------------- .../src/general/data/m_data_general/batch.rs | 1 - .../general/data/m_data_general/dataitem.rs | 107 ++----- .../src/general/data/m_data_general/mod.rs | 62 ++-- src/main/src/general/network/proto_ext.rs | 11 - .../src/general/network/proto_src/data.proto | 1 - src/main/src/main.rs | 6 +- src/main/src/master/data/m_data_master.rs | 123 ++++---- src/main/src/worker/m_kv_user_client.rs | 1 - 12 files changed, 223 insertions(+), 730 deletions(-) delete mode 100644 scripts/mount_s3fs.sh delete mode 100644 scripts/test_design_json_tool.py diff --git a/design.canvas b/design.canvas index aca677a..6323eab 100755 --- a/design.canvas +++ b/design.canvas @@ -1,77 +1,75 @@ { "nodes":[ - {"id":"cb82b904dab26671","type":"group","x":-3420,"y":-1000,"width":6580,"height":3720,"label":"data"}, + {"id":"cb82b904dab26671","type":"group","x":-3420,"y":-1000,"width":6580,"height":3540,"label":"data"}, {"id":"batch_transfer_group","type":"group","x":-1580,"y":80,"width":4700,"height":1960,"label":"Batch数据传输实现"}, {"id":"batch_receiver_group","type":"group","x":60,"y":140,"width":2940,"height":1820,"label":"接收端 [DataGeneral]"}, - {"id":"7a2427112a116cd3","type":"group","x":-3360,"y":120,"width":1544,"height":2560,"label":"WriteSplitDataTaskGroup"}, + {"id":"7a2427112a116cd3","type":"group","x":-3280,"y":120,"width":1464,"height":2340,"label":"WriteSplitDataTaskGroup"}, {"id":"batch_sender_group","type":"group","x":-1520,"y":444,"width":1340,"height":1596,"label":"写入端 [DataGeneral]"}, {"id":"d3ff298bf342a238","type":"group","x":-1490,"y":817,"width":1290,"height":1195,"label":"fn batch_transfer"}, - {"id":"data_write_flow","type":"group","x":-1580,"y":-880,"width":2680,"height":520,"label":"数据写入流程"}, - {"id":"storage_write_flow","type":"group","x":20,"y":-820,"width":1020,"height":400,"label":"存储节点写入流程"}, + {"id":"data_write_flow","type":"group","x":-1620,"y":-640,"width":2680,"height":520,"label":"数据写入流程"}, + {"id":"storage_write_flow","type":"group","x":-20,"y":-580,"width":1020,"height":400,"label":"存储节点写入流程"}, {"id":"7127ed217f71f72d","type":"group","x":-3260,"y":1140,"width":1010,"height":375,"label":"fn register_handle("}, - {"id":"handle_lookup","type":"text","text":"# Handle查找 [条件分支]\n\n## batch_receive_states.get()\n- 已存在: 验证version\n- 不存在: 创建新handle\n","x":395,"y":765,"width":410,"height":210,"color":"2"}, - {"id":"rpc_handle_batch_data","type":"text","text":"# DataGeneral::rpc_handle_batch_data\n\n## 处理流程","x":150,"y":478,"width":570,"height":118,"color":"1"}, - {"id":"state_manager","type":"text","text":"# 状态管理器 [DataGeneral.batch_receive_states]\n\n## 核心数据结构\n```rust\nDashMap\n```\n- BatchReceiveState\n\t- handle: WriteSplitDataTaskHandle\n\t- shared: SharedWithBatchHandler\n## 生命周期\n- 创建: 首次接收分片\n- 更新: 每次接收分片\n- 删除: 写入完成","x":840,"y":171,"width":640,"height":486,"color":"1"}, - {"id":"write_task_file","type":"text","text":"# ToFile 写入流程 [阻塞执行]\n\n## WriteSplitDataTaskGroup::ToFile\n- file_path: PathBuf\n- tasks: Vec>\n- rx: mpsc::Receiver>\n- expected_size: usize\n- current_size: usize\n\n## 操作流程 [文件IO阻塞]\n1. OpenOptions::new()\n .create(true)\n .write(true)\n2. seek(offset)\n3. write_all(data)\n4. 错误记录:\n tracing::error!(\"Failed to write file data at offset {}\")\n","x":-2236,"y":504,"width":400,"height":400,"color":"1"}, + {"id":"97d3d9fd7432a861","type":"text","text":"# WriteSplitDataTaskHandle::submit_split() 实现 [异步发送]\n\n## match write_type {\n- WriteSplitDataType::File => 文件写入任务\n- WriteSplitDataType::Mem => 内存写入任务\n}\n\n## 发送任务 [channel阻塞]\ntx.send(task).await","x":-2209,"y":1120,"width":347,"height":445}, + {"id":"4dbe01dc59cea4c2","type":"text","text":"pub struct WriteSplitDataTaskHandle {\n tx: mpsc::Sender>,\n write_type: WriteSplitDataType,\n}","x":-2572,"y":1660,"width":418,"height":160}, + {"id":"task_pool","type":"text","text":"# 任务池 [handles]\n\n- 收集任务句柄\n- 等待任务完成 [阻塞]\n- 错误聚合","x":-1414,"y":1732,"width":300,"height":260,"color":"5"}, + {"id":"86a8707f54d19c74","type":"text","text":"join all,并返回","x":-1389,"y":1549,"width":250,"height":60}, + {"id":"data_reader","type":"text","text":"# 数据读取器 [DataSource]\n\n- 计算数据范围\n- 读取数据块 [阻塞]\n- 错误传播","x":-970,"y":1163,"width":300,"height":200,"color":"3"}, + {"id":"write_handle_submit","type":"text","text":"# submit_split() [异步发送]\n\n## 执行流程\n1. 根据write_type构造任务\n2. 发送到任务通道\n3. 错误处理和日志\n\n## 阻塞特性\n- File写入: IO阻塞\n- Mem写入: 内存阻塞\n- 通道发送: channel阻塞","x":-2209,"y":1120,"width":347,"height":445,"color":"2"}, + {"id":"general_phase1","type":"text","text":"General阶段1:准备\n- 初始化DataItems\n- 计算数据大小\n- 创建SharedMemHolder","x":-1580,"y":-550,"width":200,"height":100,"color":"1"}, + {"id":"general_phase2","type":"text","text":"General阶段2:调度\n- 生成unique_id\n- 发送调度请求\n- 等待决策返回","x":-1580,"y":-420,"width":200,"height":100,"color":"1"}, + {"id":"general_phase3","type":"text","text":"General阶段3:分发\n- 解析调度决策\n- 创建写入任务组\n- 初始化并发控制","x":-1580,"y":-250,"width":200,"height":100,"color":"1"}, + {"id":"storage_node_3","type":"text","text":"存储节点1","x":-445,"y":-590,"width":150,"height":60,"color":"3"}, + {"id":"concurrency_controller","type":"text","text":"# 并发控制器 [Semaphore]\n\n- 最大并发数: 32\n- 许可获取 [阻塞]\n- 许可释放 [非阻塞]\n- RAII风格管理","x":-970,"y":1536,"width":300,"height":200,"color":"2"}, + {"id":"5009f9e4bcc6ed6c","type":"text","text":"### 加入任务池","x":-920,"y":1902,"width":250,"height":60}, + {"id":"batch_initiator","type":"text","text":"# 发起节点 [DataGeneral]\n\n## call_batch_data()\n- 分割数据块(1MB)\n- 创建有界任务池\n- 建议并发数=3\n- 任务队列控制","x":-1470,"y":488,"width":300,"height":290,"color":"1"}, + {"id":"data_source_interface","type":"text","text":"# DataSource 接口设计\n\n## trait DataSource: Send + Sync + 'static\n```rust\nasync fn size(&self) -> WSResult;\nasync fn read_chunk(&self, offset: usize, size: usize) -> WSResult>;\nfn block_type(&self) -> BatchDataBlockType;\n```\n\n## 实现类型\n1. FileDataSource\n - 文件路径管理\n - 异步IO操作\n - 错误处理\n\n2. MemDataSource\n - Arc<[u8]>共享数据\n - 边界检查\n - 零拷贝优化","x":-1459,"y":864,"width":390,"height":646,"color":"4"}, + {"id":"133214da264cfe72","type":"text","text":"## struct DataGeneral\n- 提供数据读写接口\n- 管理元数据\n- 协调各子模块功能\n- 错误处理和恢复\n- 资源管理","x":-2780,"y":-720,"width":340,"height":214,"color":"4"}, + {"id":"b31695207931d96e","type":"text","text":"## fn get_or_del_data\n- 数据检索和删除\n- 资源清理\n- 缓存一致性\n- 并发访问控制","x":-2310,"y":-662,"width":330,"height":156,"color":"4"}, + {"id":"core_functions","type":"text","text":"## fn write_data\n- 同步/异步写入\n- 数据完整性保证\n- 分片并行写入\n- 缓存节点同步\n- 错误重试机制","x":-2425,"y":-467,"width":280,"height":275,"color":"4"}, + {"id":"821e415b6438e20d","type":"text","text":"## struct DataSplit\n- 数据分片管理\n- 分片信息维护\n- 分片操作协调\n- 存储节点分配\n- 局部性优化","x":-2952,"y":-132,"width":342,"height":158,"color":"4"}, + {"id":"data_item","type":"text","text":"# 数据项处理\n\n## enum WriteSplitDataTaskGroup\n- 管理数据分片写入任务组\n- 分片合并优化\n- 状态同步\n- 并行控制\n","x":-3010,"y":140,"width":450,"height":280,"color":"3"}, {"id":"b0205b4457afeb2b","type":"text","text":"## SharedMemOwnedAccess\n- 共享内存所有权控制\n- 访问安全保证\n- 生命周期管理","x":-2350,"y":202,"width":364,"height":178}, - {"id":"batch_request1","type":"text","text":"# BatchDataRequest(1)\n- request_id (1)\n- dataset_unique_id (2)\n- data_item_idx (3)\n- block_type (4)\n- block_index: 0 (5)\n- data (6)\n- operation (7)\n- unique_id (8)\n- version (9)","x":-160,"y":544,"width":250,"height":120,"color":"2"}, - {"id":"4dbe01dc59cea4c2","type":"text","text":"### pub struct WriteSplitDataTaskHandle {\n tx: mpsc::Sender>,\n write_type: WriteSplitDataType,\n}","x":-2572,"y":1660,"width":418,"height":202}, {"id":"write_task_mem","type":"text","text":"# ToMem 写入流程 [阻塞执行]\n\n## WriteSplitDataTaskGroup::ToMem\n- shared_mem: SharedMemHolder\n- tasks: Vec>\n- rx: mpsc::Receiver>\n- expected_size: usize\n- current_size: usize\n\n## 操作流程 [内存写入阻塞]\n1. shared_mem.write(offset, data)\n2. 错误记录:\n tracing::error!(\"Failed to write memory data at offset {}\")\n","x":-2670,"y":486,"width":400,"height":436,"color":"2"}, - {"id":"general_phase2","type":"text","text":"General阶段2:调度\n- 生成unique_id\n- 发送调度请求\n- 等待决策返回","x":-1540,"y":-660,"width":200,"height":100,"color":"1"}, - {"id":"general_phase3","type":"text","text":"General阶段3:分发\n- 解析调度决策\n- 创建写入任务组\n- 初始化并发控制","x":-1540,"y":-490,"width":200,"height":100,"color":"1"}, - {"id":"general_phase1","type":"text","text":"General阶段1:准备\n- 初始化DataItems\n- 计算数据大小\n- 创建SharedMemHolder","x":-1540,"y":-790,"width":200,"height":100,"color":"1"}, + {"id":"write_task_file","type":"text","text":"# ToFile 写入流程 [阻塞执行]\n\n## WriteSplitDataTaskGroup::ToFile\n- file_path: PathBuf\n- tasks: Vec>\n- rx: mpsc::Receiver>\n- expected_size: usize\n- current_size: usize\n\n## 操作流程 [文件IO阻塞]\n1. OpenOptions::new()\n .create(true)\n .write(true)\n2. seek(offset)\n3. write_all(data)\n4. 错误记录:\n tracing::error!(\"Failed to write file data at offset {}\")\n","x":-2236,"y":504,"width":400,"height":400,"color":"1"}, {"id":"02d1bafb13062e3b","type":"text","text":"### batch 接口要和 write作区分\n#### batch是主动推送完整数据\n#### write是将数据写入到系统\n\n- wirte中也会使用batch接口用来在写入之前并行推送缓存","x":-1514,"y":142,"width":445,"height":228}, - {"id":"batch_initiator","type":"text","text":"# 发起节点 [DataGeneral]\n\n## call_batch_data()\n- 分割数据块(1MB)\n- 创建有界任务池\n- 建议并发数=3\n- 任务队列控制","x":-1470,"y":488,"width":300,"height":290,"color":"1"}, - {"id":"9fa1c2f8d08978bb","type":"text","text":"## 判断还有分片?","x":-935,"y":1404,"width":230,"height":80,"color":"3"}, - {"id":"data_reader","type":"text","text":"# 数据读取器 [DataSource]\n\n- 计算数据范围\n- 读取数据块 [阻塞]\n- 错误传播","x":-970,"y":1163,"width":300,"height":200,"color":"3"}, - {"id":"data_source_interface","type":"text","text":"# DataSource 接口设计\n\n## trait DataSource: Send + Sync + 'static\n```rust\nasync fn size(&self) -> WSResult;\nasync fn read_chunk(&self, offset: usize, size: usize) -> WSResult>;\nfn block_type(&self) -> BatchDataBlockType;\n```\n\n## 实现类型\n1. FileDataSource\n - 文件路径管理\n - 异步IO操作\n - 错误处理\n\n2. MemDataSource\n - Arc<[u8]>共享数据\n - 边界检查\n - 零拷贝优化","x":-1459,"y":864,"width":390,"height":646,"color":"4"}, - {"id":"batch_transfer_main","type":"text","text":"# batch_transfer [主控制器]\n\n- 初始化数据源\n- 创建并发控制器\n- 启动传输任务\n- 等待任务完成\n\n[阻塞执行]","x":-970,"y":837,"width":370,"height":294,"color":"1"}, - {"id":"97d3d9fd7432a861","type":"text","text":"# WriteSplitDataTaskHandle::submit_split() 实现 [异步发送]\n\n## match write_type {\n- WriteSplitDataType::File => 文件写入任务\n- WriteSplitDataType::Mem => 内存写入任务\n}\n\n## 发送任务 [channel阻塞]\ntx.send(task).await","x":-2209,"y":1120,"width":347,"height":445}, - {"id":"write_handle_submit","type":"text","text":"# submit_split() [异步发送]\n\n## 执行流程\n1. 根据write_type构造任务\n2. 发送到任务通道\n3. 错误处理和日志\n\n## 阻塞特性\n- File写入: IO阻塞\n- Mem写入: 内存阻塞\n- 通道发送: channel阻塞","x":-2209,"y":1120,"width":347,"height":445,"color":"2"}, + {"id":"data_general_core","type":"text","text":"# 数据管理核心模块\n- 数据流向控制\n- 并行结构管理\n- 错误处理链\n- 资源管理","x":-3070,"y":-446,"width":330,"height":234,"color":"4"}, + {"id":"1ec171d545e8995d","type":"text","text":"## SharedMemHolder\n- 共享内存数据访问\n- 资源自动管理","x":-3105,"y":754,"width":300,"height":150}, + {"id":"06d4a92778dd83c8","type":"text","text":"# 第一个分片开始写入 [阻塞执行]\n\n## 初始化写入\nfn start_first_split(data: Vec) -> Result<(), WSError> {\n let task = self.build_task(data, 0);\n self.tasks.push(task);\n self.current_size += data.len();\n Ok(())\n}\n\n## 错误处理\n- 写入失败记录日志\n- 返回具体错误类型","x":-3240,"y":1161,"width":455,"height":310}, {"id":"f515ecb9aee18fc7","type":"text","text":"# 后续写入 [异步执行]\n\n## 状态管理\n- 写入任务追踪\n- 并发控制\n- 写入顺序保证","x":-2572,"y":1178,"width":302,"height":275}, + {"id":"e2576a54f3f852b3","type":"text","text":"# process_tasks() 实现 [阻塞循环]\n\n## 循环处理 [select阻塞]\n1. try_complete() 检查完成状态\n2. tokio::select! {\n - rx.recv() => 接收新任务\n - futures::future::select_all(tasks) => 等待任务完成\n}\n\n## 完成条件\n- current_size >= expected_size\n- 返回 proto::DataItem","x":-3055,"y":1780,"width":377,"height":460}, + {"id":"155106edf5eb3cd7","type":"text","text":"# try_complete() 实现 [同步检查]\n\n## 返回 Option\n- ToFile => proto::DataItem::new_file_data()\n- ToMem => proto::DataItem::new_mem_data()","x":-3094,"y":2260,"width":455,"height":180}, {"id":"223edf4677db9339","type":"text","text":"pub struct WriteSplitDataManager {\n // 只存储任务句柄\n handles: DashMap,\n}","x":-3110,"y":960,"width":610,"height":140}, - {"id":"06d4a92778dd83c8","type":"text","text":"# 第一个分片开始写入 [阻塞执行]\n\n## 初始化写入\nfn start_first_split(data: Vec) -> Result<(), WSError> {\n let task = self.build_task(data, 0);\n self.tasks.push(task);\n self.current_size += data.len();\n Ok(())\n}\n\n## 错误处理\n- 写入失败记录日志\n- 返回具体错误类型","x":-3240,"y":1161,"width":455,"height":310}, - {"id":"batch_data_request","type":"text","text":"# Batch RPC Proto定义\n\n## 数据块类型\nenum BatchDataBlockType {\n MEMORY = 0; // 内存数据块\n FILE = 1; // 文件数据块\n}\n\n## 操作类型\nenum DataOpeType {\n Read = 0;\n Write = 1;\n}\n\n## 请求ID\nmessage BatchRequestId {\n uint32 node_id = 1; // 节点ID\n uint64 sequence = 2; // 原子自增序列号\n}\n\n## 请求消息\nmessage BatchDataRequest {\n BatchRequestId request_id = 1; // 请求唯一标识(节点ID + 序列号)\n uint32 dataset_unique_id = 2; // 数据集唯一标识\n uint32 data_item_idx = 3; // 数据项索引\n BatchDataBlockType block_type = 4; // 数据块类型(文件/内存)\n uint32 block_index = 5; // 数据块索引\n bytes data = 6; // 数据块内容\n DataOpeType operation = 7; // 操作类型\n bytes unique_id = 8; // 数据唯一标识\n uint64 version = 9; // 数据版本\n}\n\n## 响应消息\nmessage BatchDataResponse {\n BatchRequestId request_id = 1; // 对应请求ID\n bool success = 2; // 处理状态\n string error_message = 3; // 错误信息\n uint64 version = 4; // 处理后的版本\n}\n","x":-155,"y":1536,"width":550,"height":1184,"color":"2"}, - {"id":"20145fd68e8aaa75","type":"text","text":"# 构造 [同步初始化]\n\n## fn new_task_group 任务组初始化\nfn new_task_group(type_: WriteSplitDataType) -> Self\n### fn calculate_split\n- calculate_spli 根据block size计算出每个split的range\n 支持range 以在分片大小不一时依旧可以用的灵活性\n- ","x":-3220,"y":1520,"width":542,"height":294}, - {"id":"1ec171d545e8995d","type":"text","text":"## SharedMemHolder\n- 共享内存数据访问\n- 资源自动管理","x":-3105,"y":754,"width":300,"height":150}, - {"id":"data_item","type":"text","text":"# 数据项处理\n\n## enum WriteSplitDataTaskGroup\n- 管理数据分片写入任务组\n- 分片合并优化\n- 状态同步\n- 并行控制\n","x":-3010,"y":140,"width":450,"height":280,"color":"3"}, - {"id":"821e415b6438e20d","type":"text","text":"## struct DataSplit\n- 数据分片管理\n- 分片信息维护\n- 分片操作协调\n- 存储节点分配\n- 局部性优化","x":-2952,"y":-132,"width":342,"height":158,"color":"4"}, - {"id":"core_functions","type":"text","text":"## fn write_data\n- 同步/异步写入\n- 数据完整性保证\n- 分片并行写入\n- 缓存节点同步\n- 错误重试机制","x":-2425,"y":-467,"width":280,"height":275,"color":"4"}, - {"id":"data_general_core","type":"text","text":"# 数据管理核心模块\n- 数据流向控制\n- 并行结构管理\n- 错误处理链\n- 资源管理","x":-3070,"y":-446,"width":330,"height":234,"color":"4"}, - {"id":"133214da264cfe72","type":"text","text":"## struct DataGeneral\n- 提供数据读写接口\n- 管理元数据\n- 协调各子模块功能\n- 错误处理和恢复\n- 资源管理","x":-2780,"y":-720,"width":340,"height":214,"color":"4"}, - {"id":"completion_monitor","type":"text","text":"# 完成监控 [独立任务]\n\n## 1. 等待写入完成\n```rust\nhandle.wait_all_tasks().await?;\n```\n\n## 2. 发送最终响应\n```rust\nif let Some(final_responsor) = \n shared.get_final_responsor().await {\n final_responsor.response(Ok(()))\n .await?;\n}\n```\n\n## 3. 清理状态\n```rust\nbatch_receive_states.remove(&unique_id);\n```","x":1635,"y":1335,"width":445,"height":571,"color":"4"}, - {"id":"2dbde64bc1dbac6a","type":"text","text":"## 响应任务(独立任务)","x":1760,"y":1132,"width":365,"height":110}, - {"id":"b31695207931d96e","type":"text","text":"## fn get_or_del_data\n- 数据检索和删除\n- 资源清理\n- 缓存一致性\n- 并发访问控制","x":-2310,"y":-662,"width":330,"height":156,"color":"4"}, + {"id":"20145fd68e8aaa75","type":"text","text":"# 构造 [同步初始化]\n\n## 任务组初始化\nfn new_task_group(type_: WriteSplitDataType) -> Self {\n let (tx, rx) = mpsc::channel(32);\n Self {\n type_,\n tasks: Vec::new(),\n rx,\n expected_size: 0,\n current_size: 0,\n }\n}\n\n## 参数验证\n- 检查写入类型\n- 验证初始参数","x":-3205,"y":1540,"width":450,"height":220}, + {"id":"cache_node_3","type":"text","text":"缓存节点3","x":-440,"y":-240,"width":150,"height":60,"color":"5"}, + {"id":"storage_node_5","type":"text","text":"存储节点3","x":-440,"y":-440,"width":150,"height":60,"color":"3"}, + {"id":"batch_request3","type":"text","text":"# BatchDataRequest(3)\n- request_id\n- block_type\n- block_index: 2\n- data","x":-160,"y":784,"width":250,"height":120,"color":"2"}, + {"id":"f8ade98240211305","type":"text","text":"### [tokio::spawn]\n","x":-945,"y":1784,"width":250,"height":60}, + {"id":"9fa1c2f8d08978bb","type":"text","text":"## 判断还有分片?","x":-935,"y":1404,"width":230,"height":80,"color":"3"}, + {"id":"rpc_caller","type":"text","text":"# RPC调用器 [view.rpc_call]\n\n- 构造请求\n- 发送数据 [阻塞]\n- 等待响应 [阻塞]\n- 错误处理","x":-520,"y":1267,"width":300,"height":200,"color":"4"}, + {"id":"parallel_task","type":"text","text":"# 并行任务 \n- 持有信号量许可\n- 执行RPC调用\n- 处理响应\n- 自动释放许可\n\n[独立执行]","x":-520,"y":1579,"width":300,"height":200,"color":"6"}, + {"id":"batch_transfer_main","type":"text","text":"# batch_transfer [主控制器]\n\n- 初始化数据源\n- 创建并发控制器\n- 启动传输任务\n- 等待任务完成\n\n[阻塞执行]","x":-970,"y":837,"width":370,"height":294,"color":"1"}, + {"id":"master_node","type":"text","text":"Master节点 [DataMaster]\n- schedule_data()\n1. 生成DataSetMeta\n2. 创建DataSplits\n3. 分配存储节点\n4. 返回调度决策","x":-1120,"y":-550,"width":200,"height":160,"color":"2"}, + {"id":"storage_group","type":"text","text":"存储节点组","x":-640,"y":-550,"width":150,"height":60,"color":"3"}, + {"id":"cache_group","type":"text","text":"缓存节点组","x":-640,"y":-350,"width":150,"height":60,"color":"5"}, + {"id":"cache_node_1","type":"text","text":"缓存节点1","x":-440,"y":-400,"width":150,"height":60,"color":"5"}, + {"id":"cache_node_2","type":"text","text":"缓存节点2","x":-440,"y":-320,"width":150,"height":60,"color":"5"}, + {"id":"storage_node_1","type":"text","text":"存储节点1\n接收层:\n- 接收分片请求\n- 版本号验证\n- 数据完整性校验\n写入任务层:\n- 分片范围验证\n- 并发写入控制\n- 错误重试机制\n本地存储层:\n- 数据持久化\n- 版本管理\n- 空间回收\n结果返回:\n- 写入状态\n- 远程版本号\n- 错误信息","x":20,"y":-540,"width":200,"height":280,"color":"1"}, + {"id":"write_task_1","type":"text","text":"写入任务1\n- 分片范围验证\n- 数据完整性检查\n- 并发写入控制\n- 错误重试","x":360,"y":-540,"width":200,"height":120,"color":"2"}, + {"id":"storage_node_4","type":"text","text":"存储节点2","x":-440,"y":-520,"width":150,"height":60,"color":"3"}, + {"id":"write_result_1","type":"text","text":"写入结果1\n- 成功/失败\n- 远程版本号\n- 错误信息","x":620,"y":-320,"width":200,"height":100,"color":"4"}, + {"id":"batch_request2","type":"text","text":"# BatchDataRequest(2)\n- request_id\n- block_type\n- block_index: 1\n- data","x":-160,"y":664,"width":250,"height":120,"color":"2"}, + {"id":"batch_request1","type":"text","text":"# BatchDataRequest(1)\n- request_id\n- block_type\n- block_index: 0\n- data","x":-160,"y":424,"width":250,"height":240,"color":"2"}, + {"id":"handle_lookup","type":"text","text":"# Handle查找 [条件分支]\n\n## batch_receive_states.get()\n- 已存在: 验证version\n- 不存在: 创建新handle\n","x":395,"y":765,"width":410,"height":210,"color":"2"}, {"id":"task_spawn_flow","type":"text","text":"# 任务生成流程 [异步执行]\n\n## 1. 提交分片数据handle.submit_split\n```rust\nstate.handle.submit_split(\n request.block_idx * DEFAULT_BLOCK_SIZE,\n request.data\n).await?\n```\n\n## 2. 更新响应器shared.update_responsor\n```rust\nstate.shared.update_responsor(responsor).await;\n```\nupdate时,旧的reponsor要先返回","x":480,"y":1106,"width":405,"height":538,"color":"3"}, {"id":"e156c034cc9ec24f","type":"text","text":"## responsor send","x":595,"y":1755,"width":250,"height":60}, + {"id":"completion_monitor","type":"text","text":"# 完成监控 [独立任务]\n\n## 1. 等待写入完成\n```rust\nhandle.wait_all_tasks().await?;\n```\n\n## 2. 发送最终响应\n```rust\nif let Some(final_responsor) = \n shared.get_final_responsor().await {\n final_responsor.response(Ok(()))\n .await?;\n}\n```\n\n## 3. 清理状态\n```rust\nbatch_receive_states.remove(&unique_id);\n```","x":1635,"y":1335,"width":445,"height":571,"color":"4"}, + {"id":"rpc_handle_batch_data","type":"text","text":"# DataGeneral::rpc_handle_batch_data\n\n## 处理流程","x":150,"y":478,"width":570,"height":118,"color":"1"}, + {"id":"2dbde64bc1dbac6a","type":"text","text":"## 响应任务(独立任务)","x":1760,"y":1132,"width":365,"height":110}, + {"id":"state_manager","type":"text","text":"# 状态管理器 [DataGeneral.batch_receive_states]\n\n## 核心数据结构\n```rust\nDashMap\n```\n- BatchReceiveState\n\t- handle: WriteSplitDataTaskHandle\n\t- shared: SharedWithBatchHandler\n## 生命周期\n- 创建: 首次接收分片\n- 更新: 每次接收分片\n- 删除: 写入完成","x":840,"y":171,"width":640,"height":486,"color":"1"}, {"id":"write_task_handle","type":"text","text":"# 写入任务句柄 [WriteSplitDataTaskHandle]\n\n## 关键对象\n```rust\npub struct WriteSplitDataTaskHandle {\n tx: mpsc::Sender>,\n write_type: WriteSplitDataType,\n}\n```\n\n## 核心函数\n```rust\nasync fn submit_split(\n &self,\n offset: usize,\n data: Vec\n) -> WSResult<()>\n```","x":956,"y":765,"width":505,"height":530,"color":"2"}, {"id":"task_spawner","type":"text","text":"# tokio::spawn 响应任务\n\n```\n\n## 核心函数\n```rust\nfn spawn_write_task(\n data: Vec,\n offset: usize\n) -> JoinHandle<()>\n```","x":1008,"y":1385,"width":400,"height":400,"color":"3"}, - {"id":"rpc_caller","type":"text","text":"# RPC调用器 [view.rpc_call]\n\n- 构造请求\n- 发送数据 [阻塞]\n- 等待响应 [阻塞]\n- 错误处理","x":-520,"y":1267,"width":300,"height":200,"color":"4"}, - {"id":"parallel_task","type":"text","text":"# 并行任务 \n- 持有信号量许可\n- 执行RPC调用\n- 处理响应\n- 自动释放许可\n\n[独立执行]","x":-520,"y":1579,"width":300,"height":200,"color":"6"}, - {"id":"batch_request3","type":"text","text":"# BatchDataRequest(3)\n- request_id (1)\n- dataset_unique_id (2)\n- data_item_idx (3)\n- block_type (4)\n- block_index: 2 (5)\n- data (6)\n- operation (7)\n- unique_id (8)\n- version (9)","x":-160,"y":784,"width":250,"height":120,"color":"2"}, - {"id":"storage_node_5","type":"text","text":"存储节点3","x":-400,"y":-680,"width":150,"height":60,"color":"3"}, - {"id":"storage_node_4","type":"text","text":"存储节点2","x":-400,"y":-760,"width":150,"height":60,"color":"3"}, - {"id":"cache_node_3","type":"text","text":"缓存节点3","x":-400,"y":-480,"width":150,"height":60,"color":"5"}, - {"id":"cache_node_1","type":"text","text":"缓存节点1","x":-400,"y":-640,"width":150,"height":60,"color":"5"}, - {"id":"cache_node_2","type":"text","text":"缓存节点2","x":-400,"y":-560,"width":150,"height":60,"color":"5"}, - {"id":"storage_node_1","type":"text","text":"存储节点1\n接收层:\n- 接收分片请求\n- 版本号验证\n- 数据完整性校验\n写入任务层:\n- 分片范围验证\n- 并发写入控制\n- 错误重试机制\n本地存储层:\n- 数据持久化\n- 版本管理\n- 空间回收\n结果返回:\n- 写入状态\n- 远程版本号\n- 错误信息","x":60,"y":-780,"width":200,"height":280,"color":"1"}, - {"id":"write_task_1","type":"text","text":"写入任务1\n- 分片范围验证\n- 数据完整性检查\n- 并发写入控制\n- 错误重试","x":400,"y":-780,"width":200,"height":120,"color":"2"}, {"id":"batch_data_constants","type":"text","text":"# 批量数据常量定义\n\n## 数据块大小\n```rust\n/// 默认数据块大小 (4MB)\nconst DEFAULT_BLOCK_SIZE: usize = 4 * 1024 * 1024;\n```\n\n## 数据分片索引\n```rust\n/// 数据分片在整体数据中的偏移量\npub type DataSplitIdx = usize;\n```","x":-160,"y":1052,"width":400,"height":380,"color":"4"}, - {"id":"batch_request2","type":"text","text":"# BatchDataRequest(2)\n- request_id (1)\n- dataset_unique_id (2)\n- data_item_idx (3)\n- block_type (4)\n- block_index: 1 (5)\n- data (6)\n- operation (7)\n- unique_id (8)\n- version (9)","x":-160,"y":664,"width":250,"height":120,"color":"2"}, - {"id":"storage_node_3","type":"text","text":"存储节点1","x":-405,"y":-830,"width":150,"height":60,"color":"3"}, - {"id":"master_node","type":"text","text":"Master节点 [DataMaster]\n- schedule_data()\n1. 生成DataSetMeta\n2. 创建DataSplits\n3. 分配存储节点\n4. 返回调度决策","x":-1080,"y":-790,"width":200,"height":160,"color":"2"}, - {"id":"storage_group","type":"text","text":"存储节点组","x":-600,"y":-790,"width":150,"height":60,"color":"3"}, - {"id":"cache_group","type":"text","text":"缓存节点组","x":-600,"y":-590,"width":150,"height":60,"color":"5"}, - {"id":"write_result_1","type":"text","text":"写入结果1\n- 成功/失败\n- 远程版本号\n- 错误信息","x":660,"y":-560,"width":200,"height":100,"color":"4"}, - {"id":"86a8707f54d19c74","type":"text","text":"join all,并返回","x":-1389,"y":1549,"width":250,"height":60}, - {"id":"task_pool","type":"text","text":"# 任务池 [handles]\n\n- 收集任务句柄\n- 等待任务完成 [阻塞]\n- 错误聚合","x":-1414,"y":1732,"width":300,"height":260,"color":"5"}, - {"id":"5009f9e4bcc6ed6c","type":"text","text":"### 加入任务池","x":-920,"y":1902,"width":250,"height":60}, - {"id":"f8ade98240211305","type":"text","text":"### [tokio::spawn]\n","x":-945,"y":1784,"width":250,"height":60}, - {"id":"concurrency_controller","type":"text","text":"# 并发控制器 [Semaphore]\n\n- 最大并发数: 32\n- 许可获取 [阻塞]\n- 许可释放 [非阻塞]\n- RAII风格管理","x":-970,"y":1536,"width":300,"height":200,"color":"2"}, - {"id":"handle_wait_all","type":"text","text":"# handle.wait_all_tasks [异步等待]\n\n## 核心职责\n- 等待所有分片任务完成\n- 处理任务执行结果\n- 清理任务资源\n\n## 实现细节\n```rust\nasync fn wait_all_tasks(&self) -> WSResult<()> {\n // 等待所有任务完成\n while let Some(task) = rx.recv().await {\n task.await??;\n }\n Ok(())\n}\n```\n\n## 调用时机\n1. 外部调用: 批量传输完成检查\n2. 内部调用: process_tasks完成时","x":-2209,"y":1922,"width":320,"height":400}, - {"id":"0dee80a0e2345514","type":"text","text":"# 完成处理 [同步]\n\n## 执行流程\n1. 合并所有分片数据\n2. 构造最终DataItem\n3. 返回Some(item)给process_tasks\n4. process_tasks收到完成信号后退出循环\n\n## 数据流向\nprocess_tasks -> try_complete -> handle.wait_all_tasks","x":-2176,"y":2380,"width":254,"height":260}, - {"id":"e2576a54f3f852b3","type":"text","text":"# process_tasks() 实现 [阻塞循环]\n\n## 循环处理 [select阻塞]\n1. try_complete() 检查完成状态\n2. tokio::select! {\n - rx.recv() => 接收新任务\n - futures::future::select_all(tasks) => 等待任务完成\n}\n\n## 完成条件\n- current_size >= expected_size\n- 返回 proto::DataItem\n\n## 核心职责\n- 作为group的主事件循环\n- 在new group后立即启动\n- 负责接收和处理所有提交的任务\n- 维护任务状态直到完成\n\n## 执行流程\n1. 循环开始前检查完成状态\n2. 使用select等待新任务或已有任务完成\n3. 处理完成的任务并更新状态\n4. 检查是否达到完成条件\n5. 未完成则继续循环\n6. 完成则返回合并后的数据","x":-3272,"y":1892,"width":517,"height":688}, - {"id":"155106edf5eb3cd7","type":"text","text":"# 检查完成状态 try_complete() 实现 [同步检查]\n\n## 核心职责\n- 是process_tasks内部使用的状态检查\n- 判断是否所有分片都完成\n- 返回最终合并的数据\n\n## 检查流程\n1. 验证current_size是否达到expected_size\n2. 检查所有任务是否完成\n3. 合并分片数据\n4. 返回Option\n\n## 返回值\n- Some(item): 所有分片完成,返回合并数据\n- None: 未完成,继续等待\n\n## 错误处理\n- 分片数据不完整\n- 合并失败\n- 数据损坏","x":-2678,"y":2180,"width":455,"height":400} + {"id":"batch_data_request","type":"text","text":"# Batch RPC Proto定义\n\n## 数据块类型\nenum BatchDataBlockType {\n MEMORY = 0; // 内存数据块\n FILE = 1; // 文件数据块\n}\n\n## 操作类型\nenum DataOpeType {\n Read = 0;\n Write = 1;\n}\n\n## 请求ID\nmessage BatchRequestId {\n uint32 node_id = 1; // 节点ID\n uint64 sequence = 2; // 原子自增序列号\n}\n\n## 请求消息\nmessage BatchDataRequest {\n BatchRequestId request_id = 1; // 请求唯一标识(节点ID + 序列号)\n BatchDataBlockType block_type = 2; // 数据块类型(文件/内存)\n uint32 block_index = 3; // 数据块索引\n bytes data = 4; // 数据块内容\n DataOpeType operation = 5; // 操作类型\n bytes unique_id = 6; // 数据唯一标识\n uint64 version = 7; // 数据版本\n}\n\n## 响应消息\nmessage BatchDataResponse {\n BatchRequestId request_id = 1; // 对应请求ID\n bool success = 2; // 处理状态\n string error_message = 3; // 错误信息\n uint64 version = 4; // 处理后的版本\n}\n","x":-155,"y":1536,"width":490,"height":552,"color":"2"} ], "edges":[ {"id":"master_to_phase2","fromNode":"master_node","fromSide":"left","toNode":"general_phase2","toSide":"right","label":"调度决策\n- version\n- splits\n- nodes"}, @@ -97,11 +95,12 @@ {"id":"f7105db89ffabd1e","fromNode":"20145fd68e8aaa75","fromSide":"bottom","toNode":"e2576a54f3f852b3","toSide":"top"}, {"id":"7504b1b3a99e992c","fromNode":"4dbe01dc59cea4c2","fromSide":"right","toNode":"97d3d9fd7432a861","toSide":"bottom","label":"获取到handle"}, {"id":"a993a3f4d7b2211d","fromNode":"97d3d9fd7432a861","fromSide":"left","toNode":"e2576a54f3f852b3","toSide":"right"}, - {"id":"a996588f6c59c88f","fromNode":"e2576a54f3f852b3","fromSide":"bottom","toNode":"155106edf5eb3cd7","toSide":"left"}, + {"id":"a996588f6c59c88f","fromNode":"e2576a54f3f852b3","fromSide":"bottom","toNode":"155106edf5eb3cd7","toSide":"top"}, {"id":"a42104592fedd4c7","fromNode":"97d3d9fd7432a861","fromSide":"right","toNode":"write_task_mem","toSide":"bottom"}, {"id":"c45aaa564ae87a7c","fromNode":"97d3d9fd7432a861","fromSide":"right","toNode":"write_task_file","toSide":"bottom"}, {"id":"write_flow_1","fromNode":"20145fd68e8aaa75","fromSide":"top","toNode":"06d4a92778dd83c8","toSide":"bottom","label":"初始化完成"}, {"id":"write_flow_2","fromNode":"06d4a92778dd83c8","fromSide":"right","toNode":"f515ecb9aee18fc7","toSide":"left","label":"首个分片写入完成"}, + {"id":"write_flow_5","fromNode":"e2576a54f3f852b3","fromSide":"left","toNode":"155106edf5eb3cd7","toSide":"left","label":"检查完成状态"}, {"id":"86a2aa913f7bd3d9","fromNode":"223edf4677db9339","fromSide":"bottom","toNode":"06d4a92778dd83c8","toSide":"top"}, {"id":"a99c309f19fd9853","fromNode":"batch_request1","fromSide":"right","toNode":"rpc_handle_batch_data","toSide":"left"}, {"id":"batch_data_flow2","fromNode":"batch_data_constants","fromSide":"top","toNode":"batch_request3","toSide":"bottom","label":"使用常量"}, @@ -129,13 +128,8 @@ {"id":"dcf437aa83674d1a","fromNode":"completion_monitor","fromSide":"left","toNode":"e156c034cc9ec24f","toSide":"right"}, {"id":"7ae0cf5ea0bc0b06","fromNode":"task_spawn_flow","fromSide":"bottom","toNode":"e156c034cc9ec24f","toSide":"top"}, {"id":"49b65724e2a3b08f","fromNode":"e156c034cc9ec24f","fromSide":"left","toNode":"batch_request3","toSide":"right"}, - {"id":"lookup_to_state","fromNode":"handle_lookup","fromSide":"top","toNode":"state_manager","toSide":"bottom","label":"查找/创建 proto::BatchRequestId"}, + {"id":"lookup_to_state","fromNode":"handle_lookup","fromSide":"top","toNode":"state_manager","toSide":"bottom","label":"查找/创建"}, {"id":"monitor_to_state","fromNode":"completion_monitor","fromSide":"right","toNode":"state_manager","toSide":"bottom","label":"清理"}, - {"id":"facc3fcfb55cf19d","fromNode":"batch_data_request","fromSide":"top","toNode":"batch_request3","toSide":"bottom"}, - {"id":"271f79d015a55fdf","fromNode":"batch_data_request","fromSide":"right","toNode":"e156c034cc9ec24f","toSide":"bottom"}, - {"id":"6a7413aedbbca964","fromNode":"155106edf5eb3cd7","fromSide":"top","toNode":"e2576a54f3f852b3","toSide":"right","label":"未完成"}, - {"id":"6604bc585e5ffe59","fromNode":"155106edf5eb3cd7","fromSide":"bottom","toNode":"0dee80a0e2345514","toSide":"bottom","label":"完成"}, - {"id":"handle_wait_flow","fromNode":"0dee80a0e2345514","fromSide":"right","toNode":"handle_wait_all","toSide":"right","label":"通知等待完成"}, - {"id":"e732f2950f5744ff","fromNode":"4dbe01dc59cea4c2","fromSide":"bottom","toNode":"handle_wait_all","toSide":"top"} + {"id":"facc3fcfb55cf19d","fromNode":"batch_data_request","fromSide":"top","toNode":"batch_request3","toSide":"bottom"} ] } \ No newline at end of file diff --git a/scripts/mount_s3fs.sh b/scripts/mount_s3fs.sh deleted file mode 100644 index 2e22278..0000000 --- a/scripts/mount_s3fs.sh +++ /dev/null @@ -1,3 +0,0 @@ -umount /mnt/s3fs -s3fs s3fs /mnt/s3fs -o passwd_file=/root/.passwd-s3fs -o url=http://127.0.0.1:9000 -o use_path_request_style -o umask=0022,uid=$(id -u),gid=$(id -g) -o use_cache=/var/cache/s3fs -echo "mount s3fs success" \ No newline at end of file diff --git a/scripts/sync_md_files.py b/scripts/sync_md_files.py index 97879e3..d4a3795 100644 --- a/scripts/sync_md_files.py +++ b/scripts/sync_md_files.py @@ -1,245 +1,47 @@ #!/usr/bin/env python3 -import json import os -import sys -from datetime import datetime -from typing import List, Dict, Optional +import shutil +import argparse +import datetime +import tarfile +from pathlib import Path -class Node: - def __init__(self, data: dict): - self.data = data - self.children = [] - self.parent = None - - @property - def id(self) -> str: - return self.data.get('id', '') - - @property - def type(self) -> str: - return self.data.get('type', '') - - @property - def x(self) -> float: - return float(self.data.get('x', 0)) - - @property - def y(self) -> float: - return float(self.data.get('y', 0)) - - @property - def width(self) -> float: - return float(self.data.get('width', 0)) - - @property - def height(self) -> float: - return float(self.data.get('height', 0)) - - def contains(self, other: 'Node') -> bool: - """判断当前节点是否在空间上包含另一个节点""" - if self.type != 'group': - return False - - # 考虑边界重叠的情况 - return (other.x >= self.x - 1 and - other.y >= self.y - 1 and - other.x + other.width <= self.x + self.width + 1 and - other.y + other.height <= self.y + self.height + 1) - - def to_dict(self) -> dict: - """转换为字典格式""" - result = self.data.copy() - if self.children: - result['children'] = [child.to_dict() for child in self.children] - return result - - def to_flat_dict(self) -> List[dict]: - """转换为扁平的字典列表""" - result = [] - if self.type != 'root': # 不包含根节点 - node_data = self.data.copy() - if 'children' in node_data: - del node_data['children'] # 移除children字段 - result.append(node_data) - for child in self.children: - result.extend(child.to_flat_dict()) - return result -def tree_to_flat_nodes(tree_data: dict) -> List[dict]: - """将树状结构转换为扁平的节点列表""" - result = [] - - # 处理当前节点 - if tree_data.get('type') != 'root': - node_data = tree_data.copy() - if 'children' in node_data: - del node_data['children'] - result.append(node_data) - - # 递归处理子节点 - for child in tree_data.get('children', []): - result.extend(tree_to_flat_nodes(child)) - - return result +def sync_md_files(source_dir, target_dir): + # read source file + toreplace=" " + withcontent=" " + with open(f"{source_dir}/design.canvas") as f: + canvas = f.read() + canvas=canvas.replace(toreplace,withcontent) + with open(f"{source_dir}/design.canvas","w") as f: + f.write(canvas) -class CanvasData: - def __init__(self, data: dict): - self.nodes = [] - self.groups = [] - self.edges = [] - self.parse_data(data) - - def parse_data(self, data: dict): - """解析canvas数据""" - # 处理所有节点 - for item in data: - node = Node(item) - self.nodes.append(node) - if node.type == 'group': - self.groups.append(node) - - def find_best_parent(self, node: Node) -> Optional[Node]: - """为节点找到最佳的父节点""" - candidates = [] - for group in self.groups: - if group.contains(node) and group != node: - candidates.append(group) - - if not candidates: - return None - - # 选择面积最小的包含组作为父节点 - return min(candidates, - key=lambda g: g.width * g.height) - - def build_tree(self) -> Node: - """构建树状结构""" - # 创建虚拟根节点 - root = Node({ - 'id': 'root', - 'type': 'root', - }) - - # 按面积从大到小排序groups - self.groups.sort(key=lambda g: g.width * g.height, reverse=True) - - # 构建节点关系 - assigned_nodes = set() - - # 先处理groups之间的关系 - for group in self.groups: - parent = self.find_best_parent(group) - if parent: - parent.children.append(group) - group.parent = parent - assigned_nodes.add(group.id) - else: - root.children.append(group) - group.parent = root - assigned_nodes.add(group.id) - - # 处理剩余节点 - for node in self.nodes: - if node.id not in assigned_nodes: - parent = self.find_best_parent(node) - if parent: - parent.children.append(node) - node.parent = parent - else: - root.children.append(node) - node.parent = root - - return root - - def to_tree_json(self) -> dict: - """转换为树状JSON结构""" - root = self.build_tree() - return root.to_dict() - - def to_flat_json(self) -> List[dict]: - """转换为扁平JSON结构""" - root = self.build_tree() - return root.to_flat_dict() + os.system(f"cp -r {source_dir}/design.canvas {target_dir}/design.canvas") -def backup_file(file_path: str): - """备份文件""" - if os.path.exists(file_path): - timestamp = datetime.now().strftime('%Y%m%d%H%M%S') - backup_path = f"{file_path}.{timestamp}.bak" - os.rename(file_path, backup_path) - print(f"Backup {file_path} to {backup_path}") - -def sync_from_s3fs(): - """从s3fs同步到本地,并生成树状结构""" - s3fs_dir = "/mnt/s3fs/waverless" - local_dir = "/root/prjs/waverless" - - print(f"Starting sync from {s3fs_dir} to {local_dir}") - - # 同步canvas文件 - canvas_path = os.path.join(local_dir, "design.canvas") - s3fs_canvas_path = os.path.join(s3fs_dir, "design.canvas") +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Sync markdown and canvas files between local and s3fs') + parser.add_argument('direction', choices=['to_s3fs', 'from_s3fs'], + help='Direction of sync: to_s3fs or from_s3fs') + args = parser.parse_args() - if os.path.exists(s3fs_canvas_path): - # 备份当前文件 - backup_file(canvas_path) - - # 读取s3fs中的canvas - with open(s3fs_canvas_path, 'r', encoding='utf-8') as f: - canvas_data = json.load(f) - - # 生成树状结构 - canvas = CanvasData(canvas_data.get('nodes', [])) - tree_data = canvas.to_tree_json() - - # 保存树状结构 - tree_path = os.path.join(local_dir, "design.json") - with open(tree_path, 'w', encoding='utf-8') as f: - json.dump(tree_data, f, ensure_ascii=False, indent=2) - - # 保存原始canvas - with open(canvas_path, 'w', encoding='utf-8') as f: - json.dump(canvas_data, f, ensure_ascii=False, indent=2) - -def sync_to_s3fs(): - """从本地同步到s3fs,将树状结构转换回扁平结构""" - s3fs_dir = "/mnt/s3fs/waverless" local_dir = "/root/prjs/waverless" + s3fs_dir = "/mnt/s3fs/waverless" - print(f"Starting sync from {local_dir} to {s3fs_dir}") - - # 读取树状结构 - tree_path = os.path.join(local_dir, "design.json") - if not os.path.exists(tree_path): - print(f"Tree file {tree_path} not found") - return - - with open(tree_path, 'r', encoding='utf-8') as f: - tree_data = json.load(f) - - # 直接将树状结构转换为扁平节点列表 - flat_nodes = tree_to_flat_nodes(tree_data) - - # 保存到s3fs - s3fs_canvas_path = os.path.join(s3fs_dir, "design.canvas") - backup_file(s3fs_canvas_path) - - with open(s3fs_canvas_path, 'w', encoding='utf-8') as f: - json.dump({'nodes': flat_nodes}, f, ensure_ascii=False, indent=2) - -def main(): - if len(sys.argv) != 2: - print("Usage: python3 sync_md_files.py [from_s3fs|to_s3fs]") - sys.exit(1) - - command = sys.argv[1] - if command == "from_s3fs": - sync_from_s3fs() - elif command == "to_s3fs": - sync_to_s3fs() - else: - print(f"Unknown command: {command}") - sys.exit(1) - -if __name__ == "__main__": - main() + if args.direction == 'to_s3fs': + source_dir = local_dir + target_dir = s3fs_dir + else: # from_s3fs + source_dir = s3fs_dir + target_dir = local_dir + + # # Backup target directory before sync + # print(f"Creating backup of target directory: {target_dir}") + # backup_path = backup_files(target_dir) + + print(f"Starting sync from {source_dir} to {target_dir}") + sync_md_files(source_dir, target_dir) + if args.direction == 'from_s3fs': + timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S") + os.system(f"cp {target_dir}/design.canvas {target_dir}/design.canvas.{timestamp}.bak") + print(f"Backup design.canvas to design.canvas.{timestamp}.bak") diff --git a/scripts/test_design_json_tool.py b/scripts/test_design_json_tool.py deleted file mode 100644 index b3b761e..0000000 --- a/scripts/test_design_json_tool.py +++ /dev/null @@ -1,240 +0,0 @@ -#!/usr/bin/env python3 -import os -import json -import shutil -import unittest -from scripts.design_json_tool import DesignJson, Node - -class TestDesignJsonTool(unittest.TestCase): - def setUp(self): - """测试前准备工作""" - # 创建测试用的JSON文件 - self.test_json_path = 'test_design.json' - self.test_data = { - "id": "root", - "type": "root", - "children": [ - { - "id": "group1", - "type": "group", - "label": "测试组1", - "children": [ - { - "id": "node1", - "type": "text", - "text": "测试节点1" - } - ] - } - ], - "edges": [] - } - with open(self.test_json_path, 'w', encoding='utf-8') as f: - json.dump(self.test_data, f, ensure_ascii=False, indent=2) - - self.design = DesignJson(self.test_json_path) - - def tearDown(self): - """测试后清理工作""" - if os.path.exists(self.test_json_path): - os.remove(self.test_json_path) - - def test_read_all(self): - """测试读取整个JSON""" - root = self.design.root - self.assertEqual(root.id, "root") - self.assertEqual(root.type, "root") - self.assertEqual(len(root.children), 1) - - def test_read_node(self): - """测试读取单个节点""" - node = self.design.get_node("node1") - self.assertIsNotNone(node) - self.assertEqual(node.type, "text") - self.assertEqual(node.text, "测试节点1") - - def test_read_group(self): - """测试读取组内容""" - nodes = self.design.get_group_nodes("group1") - self.assertEqual(len(nodes), 1) - self.assertEqual(nodes[0].id, "node1") - - def test_create_node(self): - """测试创建新节点""" - node_data = { - "id": "new_node", - "type": "text", - "text": "新建节点" - } - node_id = self.design.create_node(node_data) - self.assertEqual(node_id, "new_node") - node = self.design.get_node(node_id) - self.assertIsNotNone(node) - self.assertEqual(node.text, "新建节点") - - def test_update_node(self): - """测试更新节点""" - updates = {"text": "更新后的文本"} - success = self.design.update_node("node1", updates) - self.assertTrue(success) - node = self.design.get_node("node1") - self.assertEqual(node.text, "更新后的文本") - - def test_move_to_group(self): - """测试移动节点到组""" - # 先创建新组 - group_data = { - "id": "group2", - "type": "group", - "label": "测试组2" - } - self.design.create_node(group_data) - - # 移动节点 - success = self.design.move_to_group("node1", "group2") - self.assertTrue(success) - - # 验证移动结果 - nodes = self.design.get_group_nodes("group2") - self.assertEqual(len(nodes), 1) - self.assertEqual(nodes[0].id, "node1") - - def test_edges(self): - """测试边操作""" - # 添加边 - success = self.design.add_edge("node1", "group1", "test_edge") - self.assertTrue(success) - - # 验证入度 - incoming = self.design.get_incoming_nodes("group1") - self.assertEqual(len(incoming), 1) - self.assertEqual(incoming[0], ("node1", "test_edge")) - - # 验证出度 - outgoing = self.design.get_outgoing_nodes("node1") - self.assertEqual(len(outgoing), 1) - self.assertEqual(outgoing[0], ("group1", "test_edge")) - - # 删除边 - success = self.design.remove_edge("node1", "group1", "test_edge") - self.assertTrue(success) - - # 验证边已删除 - incoming = self.design.get_incoming_nodes("group1") - self.assertEqual(len(incoming), 0) - - def test_nonexistent_node(self): - """测试操作不存在的节点""" - # 读取不存在的节点 - node = self.design.get_node("nonexistent") - self.assertIsNone(node) - - # 更新不存在的节点 - success = self.design.update_node("nonexistent", {"text": "新文本"}) - self.assertFalse(success) - - # 移动不存在的节点 - success = self.design.move_to_group("nonexistent", "group1") - self.assertFalse(success) - - # 添加包含不存在节点的边 - success = self.design.add_edge("nonexistent", "node1") - self.assertFalse(success) - - def test_duplicate_operations(self): - """测试重复操作""" - # 重复创建同ID节点 - node_data = { - "id": "node1", # 已存在的ID - "type": "text", - "text": "重复节点" - } - original_node = self.design.get_node("node1") - node_id = self.design.create_node(node_data) - self.assertEqual(node_id, "node1") - # 验证节点内容未被覆盖 - node = self.design.get_node("node1") - self.assertEqual(node.text, original_node.text) - - # 重复添加相同的边 - self.design.add_edge("node1", "group1", "test_edge") - success = self.design.add_edge("node1", "group1", "test_edge") - self.assertTrue(success) # 添加成功但不会重复 - incoming = self.design.get_incoming_nodes("group1") - self.assertEqual(len(incoming), 1) # 只有一条边 - - def test_nested_groups(self): - """测试嵌套组操作""" - # 创建嵌套的组结构 - group2_data = { - "id": "group2", - "type": "group", - "label": "测试组2" - } - group3_data = { - "id": "group3", - "type": "group", - "label": "测试组3" - } - self.design.create_node(group2_data) - self.design.create_node(group3_data) - - # 将group3移动到group2中 - success = self.design.move_to_group("group3", "group2") - self.assertTrue(success) - - # 验证嵌套结构 - nodes = self.design.get_group_nodes("group2") - self.assertEqual(len(nodes), 1) - self.assertEqual(nodes[0].id, "group3") - - # 将节点移动到最内层组 - success = self.design.move_to_group("node1", "group3") - self.assertTrue(success) - - # 验证节点位置 - nodes = self.design.get_group_nodes("group3") - self.assertEqual(len(nodes), 1) - self.assertEqual(nodes[0].id, "node1") - - def test_save_and_load(self): - """测试保存和加载功能""" - # 修改数据 - self.design.update_node("node1", {"text": "修改后的文本"}) - self.design.add_edge("node1", "group1", "test_edge") - - # 保存文件 - self.design.save() - - # 重新加载 - new_design = DesignJson(self.test_json_path) - - # 验证修改是否保持 - node = new_design.get_node("node1") - self.assertEqual(node.text, "修改后的文本") - - incoming = new_design.get_incoming_nodes("group1") - self.assertEqual(len(incoming), 1) - self.assertEqual(incoming[0], ("node1", "test_edge")) - - def test_invalid_operations(self): - """测试无效操作""" - # 测试移动到非组节点 - success = self.design.move_to_group("node1", "node1") # node1不是组 - self.assertFalse(success) - - # 测试更新不存在的属性 - success = self.design.update_node("node1", {"nonexistent_attr": "value"}) - self.assertTrue(success) # 更新成功但属性未添加 - node = self.design.get_node("node1") - self.assertFalse(hasattr(node, "nonexistent_attr")) - - # 测试创建缺少必要属性的节点 - invalid_node = { - "type": "text" # 缺少id - } - with self.assertRaises(KeyError): - self.design.create_node(invalid_node) - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/src/main/src/general/data/m_data_general/batch.rs b/src/main/src/general/data/m_data_general/batch.rs index e27d3cd..9f04a8c 100644 --- a/src/main/src/general/data/m_data_general/batch.rs +++ b/src/main/src/general/data/m_data_general/batch.rs @@ -105,7 +105,6 @@ impl DataGeneral { operation: proto::DataOpeType::Write as i32, unique_id: unique_id.clone(), version, - total_size: total_size as u64, }; // 发送请求 diff --git a/src/main/src/general/data/m_data_general/dataitem.rs b/src/main/src/general/data/m_data_general/dataitem.rs index fbec9a8..cf40988 100644 --- a/src/main/src/general/data/m_data_general/dataitem.rs +++ b/src/main/src/general/data/m_data_general/dataitem.rs @@ -99,10 +99,6 @@ pub struct SharedMemHolder { } impl SharedMemHolder { - pub fn len(&self) -> usize { - self.data.len() - } - pub fn try_take_data(self) -> Option> { // SAFETY: // 1. We're only replacing the Arc with an empty Vec @@ -177,17 +173,16 @@ pub fn new_shared_mem(splits: &[Range]) -> (SharedMemHolder, Vec>` - 分片范围列表 #[must_use] -pub fn calculate_splits(total_size: usize) -> Vec> { - let total_blocks = (total_size + DEFAULT_BLOCK_SIZE - 1) / DEFAULT_BLOCK_SIZE; - let mut splits = Vec::with_capacity(total_blocks); +pub fn calculate_splits(total_blocks: u32) -> Vec> { + let mut splits = Vec::with_capacity(total_blocks as usize); for i in 0..total_blocks { - let start = i * DEFAULT_BLOCK_SIZE; - let end = (start + DEFAULT_BLOCK_SIZE).min(total_size); + let start = i as usize * DEFAULT_BLOCK_SIZE; + let end = start + DEFAULT_BLOCK_SIZE; splits.push(start..end); } splits @@ -209,13 +204,6 @@ pub enum WriteSplitDataType { }, } -/// 写入分片任务的结果 -#[derive(Debug)] -pub struct WriteSplitTaskResult { - /// 写入的数据大小 - pub written_size: usize, -} - /// 写入分片任务组 /// 管理一组相关的写入任务 #[derive(Debug)] @@ -227,9 +215,9 @@ pub enum WriteSplitDataTaskGroup { /// 目标文件路径 file_path: PathBuf, /// 任务列表 - tasks: Vec>, + tasks: Vec>, /// 接收新任务的通道 - rx: mpsc::Receiver>, + rx: mpsc::Receiver>, /// 预期总大小 expected_size: usize, /// 当前已写入大小 @@ -244,9 +232,9 @@ pub enum WriteSplitDataTaskGroup { /// 共享内存区域 shared_mem: SharedMemHolder, /// 任务列表 - tasks: Vec>, + tasks: Vec>, /// 接收新任务的通道 - rx: mpsc::Receiver>, + rx: mpsc::Receiver>, /// 预期总大小 expected_size: usize, /// 当前已写入大小 @@ -260,10 +248,11 @@ impl WriteSplitDataTaskGroup { /// 创建新的任务组 pub async fn new( unique_id: UniqueId, - total_size: usize, + splits: Vec>, block_type: proto::BatchDataBlockType, version: u64, ) -> WSResult<(Self, WriteSplitDataTaskHandle)> { + let expected_size = splits.iter().map(|range| range.len()).sum(); let (tx, rx) = mpsc::channel(32); let (broadcast_tx, _) = broadcast::channel::<()>(32); let broadcast_tx = Arc::new(broadcast_tx); @@ -287,7 +276,7 @@ impl WriteSplitDataTaskGroup { file_path, tasks: Vec::new(), rx, - expected_size: total_size, + expected_size, current_size: 0, broadcast_tx: broadcast_tx.clone(), }; @@ -296,7 +285,7 @@ impl WriteSplitDataTaskGroup { } proto::BatchDataBlockType::Memory => { let shared_mem = SharedMemHolder { - data: Arc::new(vec![0; total_size]), + data: Arc::new(vec![0; expected_size]), }; let handle = WriteSplitDataTaskHandle { @@ -313,7 +302,7 @@ impl WriteSplitDataTaskGroup { shared_mem, tasks: Vec::new(), rx, - expected_size: total_size, + expected_size, current_size: 0, broadcast_tx: broadcast_tx.clone(), }; @@ -329,7 +318,7 @@ impl WriteSplitDataTaskGroup { /// * `Ok(item)` - 所有数据写入完成,返回数据项 /// * `Err(e)` - 写入过程中出错 pub async fn process_tasks(&mut self) -> WSResult { - let mut pending_tasks: FuturesUnordered> = FuturesUnordered::new(); + let mut pending_tasks: FuturesUnordered> = FuturesUnordered::new(); match self { Self::ToFile { tasks, .. } | @@ -356,25 +345,25 @@ impl WriteSplitDataTaskGroup { pending_tasks.push(new_task); } Some(completed_result) = pending_tasks.next() => { - match completed_result { - Ok(result) => { - match self { - Self::ToFile { current_size, .. } | - Self::ToMem { current_size, .. } => { - *current_size += result.written_size; - } - } - } - Err(e) => { - tracing::error!("Task failed: {}", e); - return Err(WSError::WsDataError(WsDataError::BatchTransferTaskFailed { - reason: format!("Task failed: {}", e) - })); + if let Err(e) = completed_result { + tracing::error!("Task failed: {}", e); + return Err(WSError::WsDataError(WsDataError::BatchTransferTaskFailed { + reason: format!("Task failed: {}", e) + })); + } + match self { + Self::ToFile { current_size, .. } | + Self::ToMem { current_size, .. } => { + *current_size += DEFAULT_BLOCK_SIZE; // 每个任务写入一个块 } } } } } + + Err(WSError::WsDataError(WsDataError::BatchTransferTaskFailed { + reason: "Channel closed".to_string() + })) } /// 检查写入完成状态 @@ -426,7 +415,7 @@ impl WriteSplitDataTaskGroup { #[derive(Clone)] pub struct WriteSplitDataTaskHandle { /// 发送任务的通道 - tx: mpsc::Sender>, + tx: mpsc::Sender>, /// 写入类型(文件或内存) write_type: WriteSplitDataType, /// 数据版本号 @@ -459,7 +448,6 @@ impl WriteSplitDataTaskHandle { let path = path.clone(); let offset = idx; let data = data.as_raw_bytes().unwrap_or(&[]).to_vec(); - let written_size = data.len(); tokio::spawn(async move { let result = tokio::fs::OpenOptions::new() .create(true) @@ -484,13 +472,10 @@ impl WriteSplitDataTaskHandle { Ok::<_, std::io::Error>(()) }.await { tracing::error!("Failed to write file data at offset {}: {}", offset, e); - panic!("Failed to write file: {}", e); } - WriteSplitTaskResult { written_size } } Err(e) => { tracing::error!("Failed to open file at offset {}: {}", offset, e); - panic!("Failed to open file: {}", e); } } }) @@ -498,18 +483,7 @@ impl WriteSplitDataTaskHandle { WriteSplitDataType::Mem { shared_mem } => { let mem = shared_mem.clone(); let offset = idx; - let Some(data) = data.as_raw_bytes().map(|data| data.to_vec()) else { - return Err(WSError::WsDataError(WsDataError::BatchTransferFailed { - request_id: proto::BatchRequestId { - node_id: 0, - sequence: 0, - }, - reason: format!("mem data expected"), - })); - }; - let written_size = data.len(); - tracing::debug!("submit_split: Mem, len:{}, target len:{}", data.len(), shared_mem.len()); - + let data = data.as_raw_bytes().unwrap_or(&[]).to_vec(); tokio::spawn(async move { unsafe { let slice = std::slice::from_raw_parts_mut( @@ -518,7 +492,6 @@ impl WriteSplitDataTaskHandle { ); slice[offset..offset + data.len()].copy_from_slice(&data); } - WriteSplitTaskResult { written_size } }) } }; @@ -584,24 +557,6 @@ impl DataItemSource { } } - pub async fn size(&self) -> WSResult { - match self { - DataItemSource::Memory { data } => Ok(data.len()), - DataItemSource::File { path } => { - let metadata = tokio::fs::metadata(path).await.map_err(|e| - WSError::WsDataError(WsDataError::BatchTransferFailed { - request_id: proto::BatchRequestId { - node_id: 0, // 这里需要传入正确的node_id - sequence: 0, - }, - reason: format!("Failed to get file size: {}", e), - }) - )?; - Ok(metadata.len() as usize) - } - } - } - pub fn block_type(&self) -> proto::BatchDataBlockType { match self { DataItemSource::Memory { .. } => proto::BatchDataBlockType::Memory, diff --git a/src/main/src/general/data/m_data_general/mod.rs b/src/main/src/general/data/m_data_general/mod.rs index 51779cb..475d1d2 100644 --- a/src/main/src/general/data/m_data_general/mod.rs +++ b/src/main/src/general/data/m_data_general/mod.rs @@ -155,11 +155,24 @@ impl DataGeneral { let (tx, mut rx) = tokio::sync::mpsc::channel(32); let mut handles = Vec::new(); - let data_size = data.size().await?; - let splits = calculate_splits(data_size); + let data_size = match data.as_ref() { + DataItemSource::Memory { data } => data.len(), + DataItemSource::File { path } => { + let metadata = tokio::fs::metadata(path).await.map_err(|e| WsDataError::BatchTransferFailed { + request_id: proto::BatchRequestId { + node_id: target_node as u32, + sequence: 0, + }, + reason: format!("Failed to get file size: {}", e), + })?; + metadata.len() as usize + } + }; + + // 从 batch_handler 中获取总块数 + let total_blocks = (data_size + DEFAULT_BLOCK_SIZE - 1) / DEFAULT_BLOCK_SIZE; + let splits = calculate_splits(total_blocks as u32); - tracing::debug!("batch_transfer total size({}), splits: {:?}", data_size, splits); - for (block_idx, split_range) in splits.iter().enumerate() { let block_data = match data.as_ref() { DataItemSource::Memory { data } => data[split_range.clone()].to_vec(), @@ -218,7 +231,6 @@ impl DataGeneral { operation: proto::DataOpeType::Write as i32, unique_id: unique_id.clone(), version, - total_size: data_size as u64, }; let tx = tx.clone(); @@ -294,7 +306,6 @@ impl DataGeneral { unique_id: &[u8], delete: bool, ) -> WSResult { - tracing::debug!("get_or_del_datameta_from_master uid: {:?}, delete: {}, whoami: {}", unique_id, delete, self.view.p2p().nodes_config.this.0); let p2p = self.view.p2p(); // get meta from master let meta = self @@ -334,7 +345,6 @@ impl DataGeneral { ty, }: GetOrDelDataArg, ) -> WSResult<(DataSetMetaV2, HashMap)> { - tracing::debug!("get_or_del_data uid: {:?}, maybe with meta: {:?}", unique_id, meta); let mut data_map = HashMap::new(); // get meta from master @@ -345,7 +355,7 @@ impl DataGeneral { .await? }; - tracing::debug!("start get_or_del_data uid: {:?},meta: {:?}", unique_id, meta); + tracing::debug!("get_or_del_data uid: {:?},meta: {:?}", unique_id, meta); // basical verify for idx in 0..meta.data_item_cnt() { @@ -830,14 +840,8 @@ impl DataGeneral { let key = KeyTypeDataSetMeta(&req.unique_id); let keybytes = key.make_key(); - - // test only log - #[cfg(test)] - tracing::debug!("rpc_handle_data_meta_update {:?}\n {:?}", req,bincode::deserialize::(&req.serialized_meta)); - // not test log - #[cfg(not(test))] - tracing::debug!("rpc_handle_data_meta_update {:?}", req); + tracing::debug!("rpc_handle_data_meta_update {:?}", req); let kv_lock = self.view.kv_store_engine().with_rwlock(&keybytes); let _kv_write_lock_guard = kv_lock.write(); @@ -907,15 +911,15 @@ impl DataGeneral { responsor: RPCResponsor, ) -> WSResult<()> { tracing::debug!("rpc_handle_get_data_meta with req({:?})", req); - let meta = self.view.get_data_meta_local(&req.unique_id, req.delete)?; - if meta.is_none() { - tracing::debug!("rpc_handle_get_data_meta data meta not found"); - } else { - tracing::debug!("rpc_handle_get_data_meta data meta found"); - } - let serialized_meta = meta.map_or(vec![], |(_kvversion, meta)| { - bincode::serialize(&meta).unwrap() - }); + let meta = self.view.get_metadata(&req.unique_id, req.delete).await?; + tracing::debug!("rpc_handle_get_data_meta data meta found"); + + let serialized_meta = bincode::serialize(&meta).map_err(|err| { + WsSerialErr::BincodeErr { + err, + context: "rpc_handle_get_data_meta".to_owned(), + } + })?; responsor .send_resp(proto::DataMetaGetResponse { serialized_meta }) @@ -1034,7 +1038,7 @@ impl DataGeneral { // 创建任务组和句柄 let (mut group, handle) = match WriteSplitDataTaskGroup::new( req.unique_id.clone(), - req.total_size as usize, + Vec::new(), // TODO: 根据实际需求设置分片范围 req.block_type(), req.version, ).await { @@ -1098,8 +1102,6 @@ impl DataGeneral { data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes(data)), ..Default::default() }; - - tracing::debug!("submit_split with data split idx: {}, at node: {}", block_index, self.view.p2p().nodes_config.this_node()); state.handle.submit_split( block_index as usize * DEFAULT_BLOCK_SIZE, data_item, @@ -1433,7 +1435,7 @@ pub enum GetOrDelDataArgType { } impl DataGeneralView { - fn get_data_meta_local( + fn get_data_meta( &self, unique_id: &[u8], delete: bool, @@ -1462,7 +1464,7 @@ impl DataGeneralView { delete: bool, ) -> WSResult { // 先尝试从本地获取 - if let Some((_version, meta)) = self.get_data_meta_local(unique_id, delete)? { + if let Some((_version, meta)) = self.get_data_meta(unique_id, delete)? { return Ok(meta); } @@ -1584,4 +1586,4 @@ impl LogicalModule for DataGeneral { } #[derive(Debug, Clone, Copy)] -pub struct CacheModeVisitor(pub u16); \ No newline at end of file +pub struct CacheModeVisitor(pub u16); diff --git a/src/main/src/general/network/proto_ext.rs b/src/main/src/general/network/proto_ext.rs index 1fbbee4..0e15f7c 100644 --- a/src/main/src/general/network/proto_ext.rs +++ b/src/main/src/general/network/proto_ext.rs @@ -1,6 +1,5 @@ use crate::general::app::DataEventTrigger; use crate::general::data::m_data_general::dataitem::DataItemSource; -use crate::general::data::m_data_general::DataItemIdx; use crate::general::data::m_dist_lock::DistLockOpe; use crate::general::network::proto::sche::distribute_task_req::{ DataEventTriggerNew, DataEventTriggerWrite, Trigger, @@ -291,16 +290,6 @@ impl ProtoExtDataEventTrigger for DataEventTrigger { } } -pub trait ProtoExtDataScheduleContext { - fn dataitem_cnt(&self) -> DataItemIdx; -} - -impl ProtoExtDataScheduleContext for proto::DataScheduleContext { - fn dataitem_cnt(&self) -> DataItemIdx { - self.each_data_sz_bytes.len() as DataItemIdx - } -} - // Example usage in tests #[cfg(test)] mod tests { diff --git a/src/main/src/general/network/proto_src/data.proto b/src/main/src/general/network/proto_src/data.proto index fdd6fee..b6ae0d5 100644 --- a/src/main/src/general/network/proto_src/data.proto +++ b/src/main/src/general/network/proto_src/data.proto @@ -191,7 +191,6 @@ message BatchDataRequest { DataOpeType operation = 7; // 操作类型 bytes unique_id = 8; // 数据唯一标识 uint64 version = 9; // 数据版本 - uint64 total_size = 10; // 数据总大小 } message BatchDataResponse { diff --git a/src/main/src/main.rs b/src/main/src/main.rs index 8a9b56a..e3b2af3 100644 --- a/src/main/src/main.rs +++ b/src/main/src/main.rs @@ -65,9 +65,9 @@ pub fn start_tracing() { return false; } if *v.level() == Level::DEBUG { - // if mp.contains("wasm_serverless::worker::m_kv_user_client") { - // return false; - // } + if mp.contains("wasm_serverless::worker::m_kv_user_client") { + return false; + } // if mp.contains("wasm_serverless::general::m_data_general") { // return false; // } diff --git a/src/main/src/master/data/m_data_master.rs b/src/main/src/master/data/m_data_master.rs index 44a4d70..02a9501 100644 --- a/src/main/src/master/data/m_data_master.rs +++ b/src/main/src/master/data/m_data_master.rs @@ -1,7 +1,6 @@ use crate::general::app::m_executor::Executor; use crate::general::app::AppMetaManager; use crate::general::app::DataEventTrigger; -use crate::general::data::m_data_general::CacheModeVisitor; use crate::general::network::m_p2p::{P2PModule, RPCCaller, RPCHandler, RPCResponsor}; use crate::general::network::proto::{ self, DataVersionScheduleRequest, DataVersionScheduleResponse, @@ -191,12 +190,14 @@ impl DataMaster { // 设置数据分片 let _ = builder.set_data_splits(splits.clone()); - // 暂时用zui'lzuil - for idx in 0..splits.len() { - let _= builder.cache_mode_time_auto(idx as u8).cache_mode_pos_auto(idx as u8); - } - let cache_modes=builder.build().cache_mode; - tracing::debug!("planned for write data({:?}) cache_modes: {:?}", data_unique_id, cache_modes); + + // 设置缓存模式 - 对所有缓存节点启用永久缓存 + let cache_modes = vec![ + CACHE_MODE_TIME_FOREVER_MASK | CACHE_MODE_MAP_COMMON_KV_MASK; + context.each_data_sz_bytes.len() + ]; + let _ = builder.set_cache_mode_for_all(cache_modes.clone()); + Ok((cache_modes, splits, cache_nodes)) } @@ -266,68 +267,64 @@ impl DataMaster { }; // update version peers - { - tracing::debug!("updating meta({:?}) to peers for data({:?})", new_meta, req.unique_id); - let need_notify_nodes = { - let mut need_notify_nodes = HashSet::new(); - for one_data_splits in &new_meta.datas_splits { - for data_split in &one_data_splits.splits { - let _ = need_notify_nodes.insert(data_split.node_id); - } + let need_notify_nodes = { + let mut need_notify_nodes = HashSet::new(); + for one_data_splits in &new_meta.datas_splits { + for data_split in &one_data_splits.splits { + let _ = need_notify_nodes.insert(data_split.node_id); } - // TODO: do we need to notify cache nodes? - need_notify_nodes - }; + } + // TODO: do we need to notify cache nodes? + need_notify_nodes + }; - for need_notify_node in need_notify_nodes { - let view = self.view.clone(); - let serialized_meta = bincode::serialize(&new_meta).unwrap(); - let unique_id = req.unique_id.clone(); - let version = new_meta.version; - let _ = tokio::spawn(async move { - let p2p = view.p2p(); - let display_id = std::str::from_utf8(&unique_id) - .map_or_else(|_err| format!("{:?}", unique_id), |ok| ok.to_owned()); - tracing::debug!( - "updating version for data({:?}) to node: {}, this_node: {}", - display_id, - need_notify_node, - p2p.nodes_config.this_node() - ); + for need_notify_node in need_notify_nodes { + let view = self.view.clone(); + let serialized_meta = bincode::serialize(&new_meta).unwrap(); + let unique_id = req.unique_id.clone(); + let version = new_meta.version; + let _ = tokio::spawn(async move { + let p2p = view.p2p(); + let display_id = std::str::from_utf8(&unique_id) + .map_or_else(|_err| format!("{:?}", unique_id), |ok| ok.to_owned()); + tracing::debug!( + "updating version for data({:?}) to node: {}, this_node: {}", + display_id, + need_notify_node, + p2p.nodes_config.this_node() + ); - tracing::debug!( - "async notify `DataMetaUpdateRequest` to node {}", - need_notify_node + tracing::debug!( + "async notify `DataMetaUpdateRequest` to node {}", + need_notify_node + ); + let resp = view + .data_master() + .rpc_caller_data_meta_update + .call( + p2p, + need_notify_node, + proto::DataMetaUpdateRequest { + unique_id, + version, + serialized_meta, + }, + Some(Duration::from_secs(60)), + ) + .await; + if let Err(err) = resp { + tracing::error!( + "notify `DataMetaUpdateRequest` to node {} failed: {}", + need_notify_node, + err ); - let resp = view - .data_master() - .rpc_caller_data_meta_update - .call( - p2p, - need_notify_node, - proto::DataMetaUpdateRequest { - unique_id, - version, - serialized_meta, - }, - Some(Duration::from_secs(60)), - ) - .await; - if let Err(err) = resp { - tracing::error!( - "notify `DataMetaUpdateRequest` to node {} failed: {}", - need_notify_node, - err - ); - } else if let Ok(ok) = resp { - if ok.version != version { - tracing::error!("notify `DataMetaUpdateRequest` to node {} failed: version mismatch, expect: {}, remote: {}", need_notify_node, version, ok.version); - } + } else if let Ok(ok) = resp { + if ok.version != version { + tracing::error!("notify `DataMetaUpdateRequest` to node {} failed: version mismatch, expect: {}, remote: {}", need_notify_node, version, ok.version); } - }); - } + } + }); } - tracing::debug!( "data:{:?} version required({}) and schedule done, caller will do following thing after receive `DataVersionScheduleResponse`", diff --git a/src/main/src/worker/m_kv_user_client.rs b/src/main/src/worker/m_kv_user_client.rs index 4de9d52..c9e97d3 100644 --- a/src/main/src/worker/m_kv_user_client.rs +++ b/src/main/src/worker/m_kv_user_client.rs @@ -235,7 +235,6 @@ impl KvUserClient { _meta: DataSetMetaV2, splits: HashMap, ) -> WSResult> { - tracing::debug!("convert_get_data_res_to_kv_response uid: {:?}, split keys: {:?}", uid, splits.keys().collect::>()); if splits.len() != 1 { return Err(WSError::WsDataError( WsDataError::KvGotWrongSplitCountAndIdx { From 2a19714727e86df09f564f11ee461f6d212faedb Mon Sep 17 00:00:00 2001 From: pa <1020401660@qq.com> Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 05/26] Revert "fix import warning" This reverts commit ae63a81b9fca946e41dc5b979285324399a7f8b0. --- src/main/src/general/app/app_owned/mod.rs | 2 +- .../general/app/app_owned/wasm_host_funcs/mod.rs | 2 +- .../app/app_owned/wasm_host_funcs/result.rs | 1 + src/main/src/general/app/app_shared/java.rs | 1 + src/main/src/general/app/app_shared/process.rs | 2 +- src/main/src/general/app/mod.rs | 4 +++- .../general/data/m_data_general/batch_handler.rs | 2 ++ src/main/src/general/data/m_data_general/mod.rs | 14 +++++++++++--- src/main/src/master/app/fddg.rs | 4 ++++ src/main/src/master/app/m_app_master.rs | 11 +++++++++-- src/main/src/master/data/m_data_master.rs | 6 +++++- src/main/src/master/m_master.rs | 2 +- src/main/src/util/container/async_init_map.rs | 1 + src/main/src/util/container/sync_trie.rs | 4 +++- 14 files changed, 44 insertions(+), 12 deletions(-) diff --git a/src/main/src/general/app/app_owned/mod.rs b/src/main/src/general/app/app_owned/mod.rs index 615bf55..782b24c 100644 --- a/src/main/src/general/app/app_owned/mod.rs +++ b/src/main/src/general/app/app_owned/mod.rs @@ -4,7 +4,7 @@ pub mod wasm_host_funcs; use crate::general::app::instance::InstanceTrait; use crate::general::app::instance::OwnedInstance; use crate::general::app::m_executor::{FnExeCtxAsync, FnExeCtxSync}; -use crate::result::{WSResult}; +use crate::result::{WSResult, WsFuncError}; use async_trait::async_trait; #[async_trait] diff --git a/src/main/src/general/app/app_owned/wasm_host_funcs/mod.rs b/src/main/src/general/app/app_owned/wasm_host_funcs/mod.rs index 30e3516..c3df65c 100644 --- a/src/main/src/general/app/app_owned/wasm_host_funcs/mod.rs +++ b/src/main/src/general/app/app_owned/wasm_host_funcs/mod.rs @@ -13,7 +13,7 @@ use result::ResultFuncsRegister; mod utils { use super::UnsafeFunctionCtx; - use crate::general::app::m_executor::{FnExeCtxAsync}; + use crate::general::app::m_executor::{FnExeCtxAsync, FnExeCtxBase}; use crate::general::app::InstanceManager; use crate::{ general::m_os::OperatingSystem, sys::LogicalModulesRef, util::SendNonNull, diff --git a/src/main/src/general/app/app_owned/wasm_host_funcs/result.rs b/src/main/src/general/app/app_owned/wasm_host_funcs/result.rs index 6d092cd..ff83530 100644 --- a/src/main/src/general/app/app_owned/wasm_host_funcs/result.rs +++ b/src/main/src/general/app/app_owned/wasm_host_funcs/result.rs @@ -1,4 +1,5 @@ use super::{utils, HostFuncRegister}; +use crate::general::app::m_executor::FnExeCtxAsync; #[cfg(target_os = "macos")] use wasmer::{imports, Function, FunctionType, Imports}; diff --git a/src/main/src/general/app/app_shared/java.rs b/src/main/src/general/app/app_shared/java.rs index 432edf5..d70304a 100644 --- a/src/main/src/general/app/app_shared/java.rs +++ b/src/main/src/general/app/app_shared/java.rs @@ -6,6 +6,7 @@ use crate::{ general::m_os::{OperatingSystem, OsProcessType}, result::{WSError, WSResult, WsFuncError}, }; +use std::path::Path; use super::process::PID; diff --git a/src/main/src/general/app/app_shared/process.rs b/src/main/src/general/app/app_shared/process.rs index 2f96d6a..298d13e 100644 --- a/src/main/src/general/app/app_shared/process.rs +++ b/src/main/src/general/app/app_shared/process.rs @@ -8,7 +8,7 @@ use crate::general::{ app::AppType, network::rpc_model::{self, HashValue}, }; -use crate::result::{WsFuncError}; +use crate::result::{WSError, WsFuncError}; use async_trait::async_trait; use enum_as_inner::EnumAsInner; use parking_lot::RwLock; diff --git a/src/main/src/general/app/mod.rs b/src/main/src/general/app/mod.rs index 9a2a837..a1f154c 100644 --- a/src/main/src/general/app/mod.rs +++ b/src/main/src/general/app/mod.rs @@ -18,6 +18,7 @@ use crate::{general::network::proto, result::WSResultExt}; use crate::{ general::{ data::{ + kv_interface::KvOps, m_data_general::{DataGeneral, DATA_UID_PREFIX_APP_META}, m_kv_store_engine::{KeyTypeServiceList, KvAdditionalConf, KvStoreEngine}, }, @@ -33,7 +34,7 @@ use crate::{ use crate::{ logical_module_view_impl, master::m_master::Master, - result::{WSResult, WsFuncError}, + result::{ErrCvt, WSResult, WsFuncError}, sys::{LogicalModule, LogicalModuleNewArgs, LogicalModulesRef, NodeID}, util::{self, JoinHandleWrapper}, }; @@ -42,6 +43,7 @@ use axum::body::Bytes; use enum_as_inner::EnumAsInner; use m_executor::FnExeCtxSyncAllowedType; use serde::{de::Error, Deserialize, Deserializer, Serialize}; +use std::path::PathBuf; use std::{ borrow::Borrow, collections::{BTreeMap, HashMap}, diff --git a/src/main/src/general/data/m_data_general/batch_handler.rs b/src/main/src/general/data/m_data_general/batch_handler.rs index c5420ce..6352c99 100644 --- a/src/main/src/general/data/m_data_general/batch_handler.rs +++ b/src/main/src/general/data/m_data_general/batch_handler.rs @@ -3,6 +3,8 @@ use crate::general::network::{ proto::BatchDataResponse, m_p2p::RPCResponsor, }; +use crate::general::data::m_data_general::dataitem::{WriteSplitDataTaskHandle, WriteSplitDataTaskGroup}; +use super::UniqueId; use std::sync::Arc; use tokio::sync::Mutex; use tracing; diff --git a/src/main/src/general/data/m_data_general/mod.rs b/src/main/src/general/data/m_data_general/mod.rs index 475d1d2..0db88f5 100644 --- a/src/main/src/general/data/m_data_general/mod.rs +++ b/src/main/src/general/data/m_data_general/mod.rs @@ -17,7 +17,8 @@ use crate::general::{ network::{ m_p2p::{P2PModule, RPCCaller, RPCHandler, RPCResponsor}, proto::{ - self, DataMeta, WriteOneDataResponse, + self, BatchDataBlockType, DataMeta, DataMetaGetRequest, DataVersionScheduleRequest, + WriteOneDataRequest, WriteOneDataResponse, }, proto_ext::ProtoExtDataItem, }, @@ -25,10 +26,10 @@ use crate::general::{ use crate::{ general::{ data::m_kv_store_engine::{KeyLockGuard, KeyType}, - network::{proto_ext::DataItemExt}, + network::{msg_pack::MsgPack, proto_ext::DataItemExt}, }, logical_module_view_impl, - result::{WSError, WSResult, WSResultExt, WsSerialErr, WsNetworkLogicErr}, + result::{WSError, WSResult, WSResultExt, WsRuntimeErr, WsSerialErr, WsNetworkLogicErr}, sys::{LogicalModule, LogicalModuleNewArgs, NodeID}, util::{JoinHandleWrapper, container::async_init_map::AsyncInitMap}, }; @@ -36,8 +37,11 @@ use crate::{result::WsDataError, sys::LogicalModulesRef}; use async_trait::async_trait; use camelpaste::paste; use core::str; +use enum_as_inner::EnumAsInner; +use dashmap::DashMap; use serde::{Deserialize, Serialize}; +use std::ops::Range; use std::{ collections::{BTreeSet, HashMap, HashSet}, sync::Arc, @@ -45,8 +49,12 @@ use std::{ sync::atomic::{AtomicU32, Ordering}, }; use tokio::sync::Semaphore; +use tokio::task::JoinHandle; use tokio::task::JoinError; use ws_derive::LogicalModule; +use std::future::Future; +use tokio::sync::mpsc; +use tokio::sync::oneshot; logical_module_view_impl!(DataGeneralView); logical_module_view_impl!(DataGeneralView, p2p, P2PModule); diff --git a/src/main/src/master/app/fddg.rs b/src/main/src/master/app/fddg.rs index 31ce5ee..3bbad97 100644 --- a/src/main/src/master/app/fddg.rs +++ b/src/main/src/master/app/fddg.rs @@ -3,10 +3,14 @@ use crate::util::container::sync_trie::SyncedTrie; use crate::{ general::{ app::{AppType, FnMeta}, + data::{self, m_data_general::DataItemIdx}, + network::proto, }, result::WSResult, }; +use dashmap::DashMap; use std::collections::HashMap; +use std::collections::HashSet; // function data dependency graph // - need update when app uploaded diff --git a/src/main/src/master/app/m_app_master.rs b/src/main/src/master/app/m_app_master.rs index cf25a67..52f2d86 100644 --- a/src/main/src/master/app/m_app_master.rs +++ b/src/main/src/master/app/m_app_master.rs @@ -1,13 +1,20 @@ use crate::general::app::m_executor::Executor; use crate::general::app::AppMetaManager; +use crate::general::app::{AffinityPattern, AffinityRule, AppType, FnMeta, NodeTag}; use crate::general::network::m_p2p::P2PModule; +use crate::general::network::m_p2p::RPCCaller; +use crate::general::network::proto::sche::{self, distribute_task_req::Trigger}; use crate::logical_module_view_impl; use crate::master::app::fddg::FDDGMgmt; -use crate::master::m_master::{Master}; -use crate::result::{WSResult}; +use crate::master::m_master::{FunctionTriggerContext, Master}; +use crate::result::{WSResult, WsFuncError}; +use crate::sys::NodeID; use crate::sys::{LogicalModule, LogicalModuleNewArgs, LogicalModulesRef}; use crate::util::JoinHandleWrapper; use async_trait::async_trait; +use std::collections::{HashMap, HashSet}; +use std::sync::atomic::{AtomicU32, Ordering}; +use std::time::Duration; use ws_derive::LogicalModule; logical_module_view_impl!(MasterAppMgmtView); diff --git a/src/main/src/master/data/m_data_master.rs b/src/main/src/master/data/m_data_master.rs index 02a9501..bd6605c 100644 --- a/src/main/src/master/data/m_data_master.rs +++ b/src/main/src/master/data/m_data_master.rs @@ -1,6 +1,10 @@ +use crate::general::app::m_executor::EventCtx; use crate::general::app::m_executor::Executor; +use crate::general::app::m_executor::FnExeCtxAsync; +use crate::general::app::m_executor::FnExeCtxAsyncAllowedType; use crate::general::app::AppMetaManager; use crate::general::app::DataEventTrigger; +use crate::general::app::{AffinityPattern, AffinityRule, NodeTag}; use crate::general::network::m_p2p::{P2PModule, RPCCaller, RPCHandler, RPCResponsor}; use crate::general::network::proto::{ self, DataVersionScheduleRequest, DataVersionScheduleResponse, @@ -12,7 +16,7 @@ use crate::util::JoinHandleWrapper; use crate::{ general::data::{ m_data_general::{ - CacheMode, DataGeneral, DataSetMetaBuilder, DataSplit, + CacheMode, DataGeneral, DataItemIdx, DataSetMeta, DataSetMetaBuilder, DataSplit, EachNodeSplit, CACHE_MODE_MAP_COMMON_KV_MASK, CACHE_MODE_TIME_FOREVER_MASK, }, m_kv_store_engine::{KeyType, KeyTypeDataSetMeta, KvAdditionalConf, KvStoreEngine}, diff --git a/src/main/src/master/m_master.rs b/src/main/src/master/m_master.rs index 5c4849f..92e53f0 100644 --- a/src/main/src/master/m_master.rs +++ b/src/main/src/master/m_master.rs @@ -13,7 +13,7 @@ use ws_derive::LogicalModule; use crate::{ config::NodesConfig, general::{ - app::{AppMetaManager, DataEventTrigger}, + app::{AffinityPattern, AffinityRule, AppMetaManager, AppType, DataEventTrigger, FnMeta}, network::{ m_p2p::{P2PModule, RPCCaller}, proto::{ diff --git a/src/main/src/util/container/async_init_map.rs b/src/main/src/util/container/async_init_map.rs index 953d1d0..3a22394 100644 --- a/src/main/src/util/container/async_init_map.rs +++ b/src/main/src/util/container/async_init_map.rs @@ -5,6 +5,7 @@ use dashmap::DashMap; use tokio::sync::broadcast; use thiserror::Error; +use crate::result::WSResult; /// AsyncInitMap 的错误类型 #[derive(Debug, Error)] diff --git a/src/main/src/util/container/sync_trie.rs b/src/main/src/util/container/sync_trie.rs index 2043c35..a91fae0 100644 --- a/src/main/src/util/container/sync_trie.rs +++ b/src/main/src/util/container/sync_trie.rs @@ -1,7 +1,9 @@ -use parking_lot::{RwLock}; +use parking_lot::{RwLock, RwLockReadGuard}; use std::collections::HashMap; use std::ops::{Deref, DerefMut}; use std::sync::Arc; +use std::thread; +use std::time::Duration; pub struct TrieNode { children: HashMap>>>, From 4e880247b0504f83b6529e56fc7fabf8bd3f5637 Mon Sep 17 00:00:00 2001 From: pa <1020401660@qq.com> Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 06/26] Revert "batch finish" This reverts commit 18b8ea52763f3305cab6d6241f0a05e6c1a4e6fa. --- .../src/general/data/m_data_general/batch.rs | 367 +++++++---- .../data/m_data_general/batch_handler.rs | 23 +- .../general/data/m_data_general/dataitem.rs | 404 +++--------- .../src/general/data/m_data_general/mod.rs | 579 +++++++++--------- src/main/src/general/network/proto_ext.rs | 56 +- .../src/general/network/proto_src/data.proto | 14 +- src/main/src/result.rs | 43 +- src/main/src/util/container/async_init_map.rs | 64 +- 8 files changed, 698 insertions(+), 852 deletions(-) diff --git a/src/main/src/general/data/m_data_general/batch.rs b/src/main/src/general/data/m_data_general/batch.rs index 9f04a8c..976c91d 100644 --- a/src/main/src/general/data/m_data_general/batch.rs +++ b/src/main/src/general/data/m_data_general/batch.rs @@ -26,11 +26,15 @@ /// For detailed implementation of the regular data interface, see the data.rs module. use super::*; use crate::general::network::proto; -use tokio::io::{AsyncReadExt, AsyncSeekExt}; -use tokio::sync::Semaphore; -use std::sync::Arc; +use base64::Engine; +use crate::general::network::m_p2p::RPCResponsor; +use tokio::io::AsyncWriteExt; +use dashmap::DashMap; +use std::sync::atomic::{AtomicU64, Ordering}; use std::time::Duration; -use crate::general::data::m_data_general::dataitem::DataItemSource; +use tokio::sync::mpsc; +use tokio::task::JoinHandle; +use std::ops::Range; impl proto::DataItem { pub fn size(&self) -> usize { @@ -42,119 +46,274 @@ impl proto::DataItem { } } +/// 管理单个批量传输的状态 +pub(super) struct BatchTransfer { + pub unique_id: Vec, + pub version: u64, + pub block_type: proto::BatchDataBlockType, + pub total_blocks: u32, + // 使用 channel 进行数据传输 + data_sender: mpsc::Sender>, + // 写入任务 + write_task: JoinHandle>, + // 完成通知 channel + pub tx: Option>>, +} + +impl BatchTransfer { + pub async fn new( + unique_id: Vec, + version: u64, + block_type: proto::BatchDataBlockType, + total_blocks: u32, + block_size: usize, + tx: mpsc::Sender>, + ) -> WSResult { + // 创建数据传输 channel + let (data_sender, data_receiver) = mpsc::channel(total_blocks as usize); + + // 计算数据分片 + let splits = Self::calculate_splits(total_blocks as usize * block_size, block_size); + + // 为异步任务克隆 unique_id + let unique_id_for_task = unique_id.clone(); + + // 创建写入任务 + let write_task = tokio::spawn(async move { + let group = WriteSplitDataTaskGroup::new( + unique_id_for_task, + splits, + data_receiver, + block_type, + ).await?; + + group.join().await + }); + + Ok(Self { + unique_id, + version, + block_type, + total_blocks, + data_sender, + write_task, + tx: Some(tx), + }) + } + + pub async fn add_block(&self, index: u32, data: Vec) -> WSResult { + if index >= self.total_blocks { + return Ok(false); + } + + // 通过 channel 发送数据块 + self.data_sender.send(Ok(( + index as usize, + proto::DataItem::new_raw_bytes(data), + ))).await.map_err(|_| WsDataError::BatchTransferError { + unique_id: self.unique_id.clone(), + msg: "failed to send data block".to_string(), + })?; + + Ok(index == self.total_blocks - 1) + } + + #[allow(dead_code)] + pub async fn complete(mut self) -> WSResult<()> { + // 定义错误转换函数 + let join_error = |e| WsDataError::BatchTransferError { + unique_id: self.unique_id.clone(), + msg: format!("write task join failed: {}", e), + }; + + let write_error = |e| WsDataError::BatchTransferError { + unique_id: self.unique_id.clone(), + msg: format!("write data failed: {}", e), + }; + + let send_error = || WsDataError::BatchTransferError { + unique_id: self.unique_id.clone(), + msg: "send result failed".to_string(), + }; + + drop(self.data_sender); + + if let Some(tx) = self.tx.take() { + let join_result = self.write_task.await + .map_err(join_error)?; + + let data_item = join_result + .map_err(write_error)?; + + tx.send(Ok(data_item)).await + .map_err(|_| send_error())?; + } + Ok(()) + } + + // 辅助函数:计算数据分片 + fn calculate_splits(total_size: usize, block_size: usize) -> Vec> { + let mut splits = Vec::new(); + let mut offset = 0; + while offset < total_size { + let end = (offset + block_size).min(total_size); + splits.push(offset..end); + offset = end; + } + splits + } +} + +/// 管理所有进行中的批量传输 +pub(super) struct BatchManager { + transfers: DashMap, + sequence: AtomicU64, +} + +impl BatchManager { + pub fn new() -> Self { + Self { + transfers: DashMap::new(), + sequence: AtomicU64::new(0), + } + } + + pub fn next_sequence(&self) -> u64 { + self.sequence.fetch_add(1, Ordering::Relaxed) + } + + pub async fn create_transfer( + &self, + unique_id: Vec, + version: u64, + block_type: proto::BatchDataBlockType, + total_blocks: u32, + tx: mpsc::Sender>, + ) -> WSResult { + let request_id = proto::BatchRequestId { + node_id: 0, // TODO: Get from config + sequence: self.next_sequence(), + }; + + let transfer = BatchTransfer::new( + unique_id.clone(), + version, + block_type, + total_blocks, + 1024 * 1024, // 1MB block size + tx, + ).await?; + + let _ = self.transfers.insert(request_id.clone(), transfer); + Ok(request_id) + } + + pub async fn handle_block( + &self, + request_id: proto::BatchRequestId, + block_index: u32, + data: Vec, + ) -> WSResult { + if let Some(transfer) = self.transfers.get(&request_id) { + let is_complete = transfer.add_block(block_index, data).await?; + if is_complete { + // Remove and complete the transfer + if let Some((_, transfer)) = self.transfers.remove(&request_id) { + transfer.complete().await? + } + } + Ok(is_complete) + } else { + Err(WsDataError::BatchTransferNotFound { + node_id: request_id.node_id, + sequence: request_id.sequence, + } + .into()) + } + } +} + impl DataGeneral { /// 发起批量数据传输 - pub async fn call_batch_data( + pub(super) async fn call_batch_data( &self, node_id: NodeID, unique_id: Vec, version: u64, data: proto::DataItem, + block_type: proto::BatchDataBlockType, ) -> WSResult { - // 调用 batch_transfer 函数处理数据传输 - async fn batch_transfer( - unique_id: Vec, - version: u64, - target_node: NodeID, - data: Arc, - view: DataGeneralView, - ) -> WSResult<()> { - let total_size = match data.as_ref() { - DataItemSource::Memory { data } => data.len(), - DataItemSource::File { path } => { - tokio::fs::metadata(path).await?.len() as usize - } + // 将数据分割成块 + let block_size = 1024 * 1024; // 1MB per block + let data_bytes = match data { + proto::DataItem { data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes(bytes)) } => bytes, + proto::DataItem { data_item_dispatch: Some(proto::data_item::DataItemDispatch::File(file_data)) } => file_data.file_content, + _ => return Err(WsDataError::InvalidDataType.into()), + }; + + let total_blocks = (data_bytes.len() + block_size - 1) / block_size; + + // 创建channel用于接收响应 + let (tx, mut rx) = mpsc::channel(1); + + // 创建传输任务 + let request_id = self.batch_manager.create_transfer( + unique_id.clone(), + version, + block_type, + total_blocks as u32, + tx, + ).await?; + + // 发送数据块 + for (i, chunk) in data_bytes.chunks(block_size).enumerate() { + let request = proto::BatchDataRequest { + request_id: Some(request_id.clone()), + block_type: block_type as i32, + block_index: i as u32, + data: chunk.to_vec(), + operation: proto::DataOpeType::Write as i32, + unique_id: unique_id.clone(), + version, }; - let total_blocks = (total_size + DEFAULT_BLOCK_SIZE - 1) / DEFAULT_BLOCK_SIZE; - let semaphore = Arc::new(Semaphore::new(32)); - let mut handles: Vec>> = Vec::new(); - - // 发送所有数据块 - for block_idx in 0..total_blocks { - // 获取信号量许可 - let permit = semaphore.clone().acquire_owned().await.unwrap(); - let offset = block_idx as usize * DEFAULT_BLOCK_SIZE; - let size = DEFAULT_BLOCK_SIZE.min(total_size - offset); - - // 读取数据块 - let block_data = match data.as_ref() { - DataItemSource::Memory { data } => data[offset..offset + size].to_vec(), - DataItemSource::File { path } => { - let mut file = tokio::fs::File::open(path).await?; - let mut buffer = vec![0; size]; - let _ = file.seek(std::io::SeekFrom::Start(offset as u64)).await?; - let _ = file.read_exact(&mut buffer).await?; - buffer - } - }; - - // 构造请求 - let request = proto::BatchDataRequest { - request_id: Some(proto::BatchRequestId { - node_id: target_node as u32, - sequence: block_idx as u64, // 修复:使用 u64 - }), - dataset_unique_id: unique_id.clone(), - data_item_idx: 0, // 因为是整体传输,所以使用0 - block_type: match data.as_ref() { - DataItemSource::Memory { .. } => proto::BatchDataBlockType::Memory as i32, - DataItemSource::File { .. } => proto::BatchDataBlockType::File as i32, - }, - block_index: block_idx as u32, - data: block_data, - operation: proto::DataOpeType::Write as i32, - unique_id: unique_id.clone(), - version, - }; - - // 发送请求 - let view = view.clone(); - let handle = tokio::spawn(async move { - let _permit = permit; // 持有permit直到任务完成 - let resp = view.data_general() - .rpc_call_batch_data - .call( - view.p2p(), - target_node, - request, - Some(Duration::from_secs(30)), - ) - .await?; - - if !resp.success { - return Err(WsDataError::BatchTransferError { - request_id: proto::BatchRequestId { - node_id: target_node as u32, - sequence: block_idx as u64, // 修复:使用 u64 - }, - msg: resp.error_message, - }.into()); - } - Ok(()) - }); - - handles.push(handle); - } - // 等待所有请求完成 - for handle in handles { - handle.await??; - } + let response = self + .rpc_call_batch_data + .call( + self.view.p2p(), + node_id, + request, + Some(Duration::from_secs(30)), + ) + .await?; - Ok(()) + if !response.success { + return Ok(response); + } } - let data = Arc::new(DataItemSource::new(data)); - batch_transfer(unique_id.clone(), version, node_id, data, self.view.clone()).await?; - - Ok(proto::BatchDataResponse { - request_id: Some(proto::BatchRequestId { - node_id: node_id, - sequence: 0, + // 等待所有块处理完成 + match rx.recv().await { + Some(Ok(_data_item)) => Ok(proto::BatchDataResponse { + request_id: Some(request_id), + success: true, + error_message: String::new(), + version, }), - success: true, - error_message: String::new(), - version, - }) + Some(Err(err)) => Ok(proto::BatchDataResponse { + request_id: Some(request_id), + success: false, + error_message: err.to_string(), + version, + }), + None => Ok(proto::BatchDataResponse { + request_id: Some(request_id), + success: false, + error_message: "transfer channel closed unexpectedly".to_string(), + version, + }), + } } + + } diff --git a/src/main/src/general/data/m_data_general/batch_handler.rs b/src/main/src/general/data/m_data_general/batch_handler.rs index 6352c99..61c61d6 100644 --- a/src/main/src/general/data/m_data_general/batch_handler.rs +++ b/src/main/src/general/data/m_data_general/batch_handler.rs @@ -1,10 +1,7 @@ use crate::general::network::{ proto::BatchDataRequest, - proto::BatchDataResponse, m_p2p::RPCResponsor, }; -use crate::general::data::m_data_general::dataitem::{WriteSplitDataTaskHandle, WriteSplitDataTaskGroup}; -use super::UniqueId; use std::sync::Arc; use tokio::sync::Mutex; use tracing; @@ -20,7 +17,6 @@ pub struct SharedWithBatchHandler { impl SharedWithBatchHandler { /// 创建新的共享状态 - #[must_use] pub fn new() -> Self { Self { responsor: Arc::new(Mutex::new(None)), @@ -36,12 +32,7 @@ impl SharedWithBatchHandler { let mut guard = self.responsor.lock().await; if let Some(old_responsor) = guard.take() { // 旧的responsor直接返回成功 - if let Err(e) = old_responsor.send_resp(BatchDataResponse { - request_id: None, // 这里需要正确的 request_id - version: 0, // 这里需要正确的版本号 - success: true, - error_message: String::new(), - }).await { + if let Err(e) = old_responsor.response(Ok(())).await { tracing::error!("Failed to respond to old request: {}", e); } } @@ -62,6 +53,10 @@ pub struct BatchReceiveState { pub handle: super::dataitem::WriteSplitDataTaskHandle, /// 共享状态,用于处理请求响应 pub shared: SharedWithBatchHandler, + /// 任务组,持有以保持其生命周期 + /// 当 BatchReceiveState 被 drop 时,任务组也会被 drop + /// 确保所有相关资源都被正确释放 + pub task_group: super::dataitem::WriteSplitDataTaskGroup, } impl BatchReceiveState { @@ -69,11 +64,13 @@ impl BatchReceiveState { /// /// # 参数 /// * `handle` - 写入任务句柄 - /// * `shared` - 共享状态 - pub fn new(handle: super::dataitem::WriteSplitDataTaskHandle, shared: SharedWithBatchHandler) -> Self { + /// * `task_group` - 任务组 + pub fn new(handle: super::dataitem::WriteSplitDataTaskHandle, + task_group: super::dataitem::WriteSplitDataTaskGroup) -> Self { Self { handle, - shared, + shared: SharedWithBatchHandler::new(), + task_group, } } } diff --git a/src/main/src/general/data/m_data_general/dataitem.rs b/src/main/src/general/data/m_data_general/dataitem.rs index cf40988..d82f81f 100644 --- a/src/main/src/general/data/m_data_general/dataitem.rs +++ b/src/main/src/general/data/m_data_general/dataitem.rs @@ -1,33 +1,32 @@ -use crate::general::data::m_data_general::UniqueId; +use crate::general::data::m_data_general::DataItemIdx; +use crate::general::data::m_data_general::GetOrDelDataArgType; use crate::general::network::proto; -use crate::general::data::m_data_general::{DataItemIdx, DataSplitIdx, GetOrDelDataArgType}; use crate::general::network::proto_ext::ProtoExtDataItem; -use crate::result::{WSError, WSResult, WsDataError}; +use crate::result::WSError; +use crate::result::WSResult; +use crate::result::WsDataError; +use crate::result::WsIoErr; +use crate::result::WsRuntimeErr; +use base64::Engine; +use futures::future::join_all; use futures::stream::{FuturesUnordered, StreamExt}; use std::collections::btree_set; use std::ops::Range; use std::path::PathBuf; use std::sync::Arc; -use tokio::sync::mpsc; -use tokio::sync::broadcast; -use tracing; -use base64::{engine::general_purpose::STANDARD, Engine as _}; -const DEFAULT_BLOCK_SIZE: usize = 4096; +use super::CacheModeVisitor; +use super::DataSplitIdx; -/// 用于遍历数据项索引的迭代器 -#[derive(Debug)] +// iterator for wanted dataitem idxs pub(super) enum WantIdxIter<'a> { - /// 遍历多个指定索引 PartialMany { iter: btree_set::Iter<'a, DataItemIdx>, }, - /// 遍历单个索引 PartialOne { idx: DataItemIdx, itercnt: u8, }, - /// 遍历所有或删除操作的索引 Other { ty: GetOrDelDataArgType, itercnt: u8, @@ -36,12 +35,6 @@ pub(super) enum WantIdxIter<'a> { } impl<'a> WantIdxIter<'a> { - /// 创建新的索引迭代器 - /// - /// # 参数 - /// * `ty` - 迭代类型 - /// * `itemcnt` - 数据项总数 - #[must_use] pub(super) fn new(ty: &'a GetOrDelDataArgType, itemcnt: DataItemIdx) -> Self { match ty { GetOrDelDataArgType::PartialMany { idxs } => Self::PartialMany { iter: idxs.iter() }, @@ -79,22 +72,18 @@ impl<'a> Iterator for WantIdxIter<'a> { let ret = *itercnt; *itercnt += 1; Some(ret) - } - } + } + } GetOrDelDataArgType::PartialMany { .. } | GetOrDelDataArgType::PartialOne { .. } => { panic!("PartialMany should be handled by iter") - } +} }, } } } -/// 共享内存区域的持有者 -/// 负责管理共享内存的所有权和生命周期 -#[derive(Debug, Clone)] pub struct SharedMemHolder { - /// 共享内存数据 data: Arc>, } @@ -112,34 +101,15 @@ impl SharedMemHolder { None } } - - pub fn as_raw_bytes(&self) -> Option<&[u8]> { - Some(self.data.as_ref()) - } + // } } -impl From for Vec { - fn from(holder: SharedMemHolder) -> Self { - holder.as_raw_bytes().expect("Failed to get raw bytes").to_vec() - } -} - -/// 共享内存区域的访问者 -/// 提供对特定范围内存的安全访问 pub struct SharedMemOwnedAccess { - /// 共享内存数据 data: Arc>, - /// 访问范围 range: Range, } impl SharedMemOwnedAccess { - /// 获取可变字节切片 - /// - /// # Safety - /// 调用者必须确保: - /// 1. 没有其他线程同时访问这块内存 - /// 2. 访问范围不超过内存边界 pub unsafe fn as_bytes_mut(&self) -> &mut [u8] { // SAFETY: // 1. We have &mut self, so we have exclusive access to this data @@ -151,12 +121,7 @@ impl SharedMemOwnedAccess { } } -/// 创建新的共享内存和访问者 -/// -/// # 参数 -/// * `splits` - 内存分片范围列表 -#[must_use] -pub fn new_shared_mem(splits: &[Range]) -> (SharedMemHolder, Vec) { +pub fn new_shared_mem(splits: &Vec>) -> (SharedMemHolder, Vec) { let len = splits.iter().map(|range| range.len()).sum(); let data = Arc::new(vec![0; len]); let owned_accesses = splits @@ -170,97 +135,39 @@ pub fn new_shared_mem(splits: &[Range]) -> (SharedMemHolder, Vec>` - 分片范围列表 -#[must_use] -pub fn calculate_splits(total_blocks: u32) -> Vec> { - let mut splits = Vec::with_capacity(total_blocks as usize); - for i in 0..total_blocks { - let start = i as usize * DEFAULT_BLOCK_SIZE; - let end = start + DEFAULT_BLOCK_SIZE; - splits.push(start..end); - } - splits -} - -/// 写入类型 -/// 支持写入文件或内存两种模式 -#[derive(Debug, Clone)] -pub enum WriteSplitDataType { - /// 文件写入模式 - File { - /// 目标文件路径 - path: PathBuf, - }, - /// 内存写入模式 - Mem { - /// 共享内存区域 - shared_mem: SharedMemHolder, - }, -} - -/// 写入分片任务组 -/// 管理一组相关的写入任务 -#[derive(Debug)] pub enum WriteSplitDataTaskGroup { - /// 文件写入模式 ToFile { - /// 任务唯一标识 unique_id: UniqueId, - /// 目标文件路径 file_path: PathBuf, - /// 任务列表 tasks: Vec>, - /// 接收新任务的通道 rx: mpsc::Receiver>, - /// 预期总大小 expected_size: usize, - /// 当前已写入大小 current_size: usize, - /// 广播通道发送端,用于通知任务完成 - broadcast_tx: Arc>, }, - /// 内存写入模式 ToMem { - /// 任务唯一标识 unique_id: UniqueId, - /// 共享内存区域 shared_mem: SharedMemHolder, - /// 任务列表 tasks: Vec>, - /// 接收新任务的通道 rx: mpsc::Receiver>, - /// 预期总大小 expected_size: usize, - /// 当前已写入大小 current_size: usize, - /// 广播通道发送端,用于通知任务完成 - broadcast_tx: Arc>, }, } impl WriteSplitDataTaskGroup { - /// 创建新的任务组 pub async fn new( unique_id: UniqueId, - splits: Vec>, + splits: Vec>, block_type: proto::BatchDataBlockType, version: u64, - ) -> WSResult<(Self, WriteSplitDataTaskHandle)> { + ) -> (Self, WriteSplitDataTaskHandle) { let expected_size = splits.iter().map(|range| range.len()).sum(); let (tx, rx) = mpsc::channel(32); - let (broadcast_tx, _) = broadcast::channel::<()>(32); - let broadcast_tx = Arc::new(broadcast_tx); match block_type { proto::BatchDataBlockType::File => { let file_path = PathBuf::from(format!("{}.data", - STANDARD.encode(&unique_id))); + base64::engine::general_purpose::STANDARD.encode(&unique_id))); let handle = WriteSplitDataTaskHandle { tx, @@ -268,7 +175,6 @@ impl WriteSplitDataTaskGroup { path: file_path.clone(), }, version, - broadcast_tx: broadcast_tx.clone(), }; let group = Self::ToFile { @@ -278,15 +184,12 @@ impl WriteSplitDataTaskGroup { rx, expected_size, current_size: 0, - broadcast_tx: broadcast_tx.clone(), }; - Ok((group, handle)) + (group, handle) } - proto::BatchDataBlockType::Memory => { - let shared_mem = SharedMemHolder { - data: Arc::new(vec![0; expected_size]), - }; + _ => { + let shared_mem = new_shared_mem(&splits).unwrap_or_default(); let handle = WriteSplitDataTaskHandle { tx, @@ -294,7 +197,6 @@ impl WriteSplitDataTaskGroup { shared_mem: shared_mem.clone(), }, version, - broadcast_tx: broadcast_tx.clone(), }; let group = Self::ToMem { @@ -304,21 +206,15 @@ impl WriteSplitDataTaskGroup { rx, expected_size, current_size: 0, - broadcast_tx: broadcast_tx.clone(), }; - Ok((group, handle)) + (group, handle) } } } - /// 处理所有写入任务 - /// - /// # 返回 - /// * `Ok(item)` - 所有数据写入完成,返回数据项 - /// * `Err(e)` - 写入过程中出错 - pub async fn process_tasks(&mut self) -> WSResult { - let mut pending_tasks: FuturesUnordered> = FuturesUnordered::new(); + async fn process_tasks(&mut self) -> WSResult { + let mut pending_tasks = FuturesUnordered::new(); match self { Self::ToFile { tasks, .. } | @@ -347,7 +243,11 @@ impl WriteSplitDataTaskGroup { Some(completed_result) = pending_tasks.next() => { if let Err(e) = completed_result { tracing::error!("Task failed: {}", e); - return Err(WSError::WsDataError(WsDataError::BatchTransferTaskFailed { + return Err(WSError::WsDataError(WsDataError::BatchTransferFailed { + request_id: match self { + Self::ToFile { unique_id, .. } | + Self::ToMem { unique_id, .. } => unique_id.clone() + }, reason: format!("Task failed: {}", e) })); } @@ -358,10 +258,38 @@ impl WriteSplitDataTaskGroup { } } } + None = match self { + Self::ToFile { rx, .. } | + Self::ToMem { rx, .. } => rx.recv() + } => { + while let Some(completed_result) = pending_tasks.next().await { + if let Err(e) = completed_result { + tracing::error!("Task failed during cleanup: {}", e); + return Err(WSError::WsDataError(WsDataError::BatchTransferFailed { + request_id: match self { + Self::ToFile { unique_id, .. } | + Self::ToMem { unique_id, .. } => unique_id.clone() + }, + reason: format!("Task failed during cleanup: {}", e) + })); + } + match self { + Self::ToFile { current_size, .. } | + Self::ToMem { current_size, .. } => { + *current_size += DEFAULT_BLOCK_SIZE; + } + } + } + break; + } } } - Err(WSError::WsDataError(WsDataError::BatchTransferTaskFailed { + Err(WSError::WsDataError(WsDataError::BatchTransferFailed { + request_id: match self { + Self::ToFile { unique_id, .. } | + Self::ToMem { unique_id, .. } => unique_id.clone() + }, reason: "Channel closed".to_string() })) } @@ -377,15 +305,11 @@ impl WriteSplitDataTaskGroup { Self::ToFile { current_size, expected_size, file_path, unique_id, .. } => { if *current_size > *expected_size { Err(WSError::WsDataError(WsDataError::BatchTransferError { - request_id: proto::BatchRequestId { - node_id: 0, // 这里需要传入正确的node_id - sequence: 0, - }, - msg: format!("Written size {} exceeds expected size {} for unique_id {:?}", - current_size, expected_size, unique_id) + request_id: unique_id.clone(), + msg: format!("Written size {} exceeds expected size {}", current_size, expected_size) })) } else if *current_size == *expected_size { - Ok(Some(proto::DataItem::new_file_data(file_path.clone(), false))) + Ok(Some(proto::DataItem::new_file_data(file_path.clone()))) } else { Ok(None) } @@ -393,15 +317,11 @@ impl WriteSplitDataTaskGroup { Self::ToMem { current_size, expected_size, shared_mem, unique_id, .. } => { if *current_size > *expected_size { Err(WSError::WsDataError(WsDataError::BatchTransferError { - request_id: proto::BatchRequestId { - node_id: 0, // 这里需要传入正确的node_id - sequence: 0, - }, - msg: format!("Written size {} exceeds expected size {} for unique_id {:?}", - current_size, expected_size, unique_id) + request_id: unique_id.clone(), + msg: format!("Written size {} exceeds expected size {}", current_size, expected_size) })) } else if *current_size == *expected_size { - Ok(Some(proto::DataItem::new_raw_bytes(shared_mem.clone()))) + Ok(Some(proto::DataItem::new_mem_data(shared_mem.clone()))) } else { Ok(None) } @@ -412,7 +332,6 @@ impl WriteSplitDataTaskGroup { /// 写入分片任务的句柄 /// 用于提交新的分片任务和等待任务完成 -#[derive(Clone)] pub struct WriteSplitDataTaskHandle { /// 发送任务的通道 tx: mpsc::Sender>, @@ -423,8 +342,6 @@ pub struct WriteSplitDataTaskHandle { /// 1. 防止旧版本数据覆盖新版本数据 /// 2. 客户端可以通过比较版本号确认数据是否最新 version: u64, - /// 广播通道发送端,用于通知任务完成 - broadcast_tx: Arc>, } impl WriteSplitDataTaskHandle { @@ -447,43 +364,33 @@ impl WriteSplitDataTaskHandle { WriteSplitDataType::File { path } => { let path = path.clone(); let offset = idx; - let data = data.as_raw_bytes().unwrap_or(&[]).to_vec(); + let data = data.as_bytes().to_vec(); + // 启动异步任务写入文件 + // 使用 spawn 是因为文件 IO 可能比较慢,不应该阻塞当前任务 tokio::spawn(async move { - let result = tokio::fs::OpenOptions::new() + if let Err(e) = tokio::fs::OpenOptions::new() .create(true) .write(true) .open(&path) - .await; - - match result { - Ok(mut file) => { + .await + .and_then(|mut file| async move { use tokio::io::{AsyncSeekExt, AsyncWriteExt}; - if let Err(e) = async move { - // 验证seek结果 - let seek_pos = file.seek(std::io::SeekFrom::Start(offset as u64)).await?; - if seek_pos != offset as u64 { - return Err(std::io::Error::new( - std::io::ErrorKind::Other, - format!("Seek position mismatch: expected {}, got {}", offset, seek_pos) - )); - } - // write_all保证写入所有数据或返回错误 - file.write_all(&data).await?; - Ok::<_, std::io::Error>(()) - }.await { - tracing::error!("Failed to write file data at offset {}: {}", offset, e); - } - } - Err(e) => { - tracing::error!("Failed to open file at offset {}: {}", offset, e); - } + file.seek(std::io::SeekFrom::Start(offset as u64)).await?; + file.write_all(&data).await + }) + .await + { + tracing::error!("Failed to write file data at offset {}: {}", offset, e); } }) } WriteSplitDataType::Mem { shared_mem } => { let mem = shared_mem.clone(); let offset = idx; - let data = data.as_raw_bytes().unwrap_or(&[]).to_vec(); + let data = data.as_bytes().to_vec(); + // 启动异步任务写入内存 + // 使用 spawn 是因为需要保证所有写入操作都在同一个线程上执行 + // 避免多线程并发写入同一块内存导致的数据竞争 tokio::spawn(async move { unsafe { let slice = std::slice::from_raw_parts_mut( @@ -496,149 +403,34 @@ impl WriteSplitDataTaskHandle { } }; - // 发送到通道 - let _ = self.broadcast_tx.send(()); self.tx.send(task).await.map_err(|e| { - tracing::error!("Failed to submit task: channel closed, idx: {:?}, error: {}", idx, e); - WSError::WsDataError(WsDataError::DataSplitTaskError { - msg: format!("Failed to submit task: channel closed, error: {}", e) + tracing::error!("Failed to submit task: channel closed, idx: {:?}", idx); + WSError::WsDataError(WsDataError::BatchTransferFailed { + request_id: idx.into(), + reason: "Failed to submit task: channel closed".to_string() }) }) } /// 等待所有已提交的写入任务完成 /// 关闭发送端,不再接收新任务 - pub async fn wait_all_tasks(&self) -> WSResult<()> { - // 等待广播通知 - let mut rx = self.broadcast_tx.subscribe(); - rx.recv().await.map_err(|e| { - tracing::error!("Failed to wait for tasks: {}", e); - WSError::WsDataError(WsDataError::BatchTransferTaskFailed { - reason: format!("Failed to wait for tasks: {}", e) - }) - })?; - + pub async fn wait_all_tasks(self) -> WSResult<()> { + drop(self.tx); Ok(()) } } -#[derive(Debug)] -pub enum DataItemSource { - Memory { - data: Vec, - }, +/// 写入类型 +/// 支持写入文件或内存两种模式 +pub enum WriteSplitDataType { + /// 文件写入模式 File { + /// 目标文件路径 path: PathBuf, }, -} - -impl DataItemSource { - pub fn to_debug_string(&self) -> String { - match self { - Self::Memory { data } => { - //limit range vec - format!("Memory({:?})", data[0..10.min(data.len())].to_vec()) - } - Self::File { path } => format!("File({})", path.to_string_lossy()), - } - } - - pub fn new(data: proto::DataItem) -> Self { - match &data.data_item_dispatch { - Some(proto::data_item::DataItemDispatch::RawBytes(bytes)) => Self::Memory { - data: bytes.clone(), - }, - Some(proto::data_item::DataItemDispatch::File(file_data)) => Self::File { - path: file_data.file_name_opt.clone().into(), - }, - _ => Self::Memory { - data: Vec::new(), - }, - } - } - - pub fn block_type(&self) -> proto::BatchDataBlockType { - match self { - DataItemSource::Memory { .. } => proto::BatchDataBlockType::Memory, - DataItemSource::File { .. } => proto::BatchDataBlockType::File, - } - } - - pub async fn get_block(&self, block_idx: usize) -> WSResult> { - match self { - DataItemSource::Memory { data } => { - if block_idx == 0 { - Ok(data.clone()) - } else { - Err(WSError::WsDataError(WsDataError::SizeMismatch { - expected: data.len(), - actual: 0, - })) - } - }, - DataItemSource::File { path } => { - let content = tokio::fs::read(path).await.map_err(|_e| { - WSError::WsDataError(WsDataError::ReadDataFailed { - path: path.clone(), - }) - })?; - if block_idx == 0 { - Ok(content) - } else { - Err(WSError::WsDataError(WsDataError::SizeMismatch { - expected: content.len(), - actual: 0, - })) - } - }, - } - } -} - -use crate::general::network::proto_ext::DataItemExt; - -impl DataItemExt for DataItemSource { - fn decode_persist(data: Vec) -> WSResult { - if data.is_empty() { - return Err(WSError::WsDataError(WsDataError::DataDecodeError { - reason: "Empty data".to_string(), - data_type: "DataItemSource".to_string(), - })); - } - match data[0] { - 0 => { - let path_str = String::from_utf8(data[1..].to_vec()).map_err(|e| { - WSError::WsDataError(WsDataError::DataDecodeError { - reason: format!("Failed to decode path string: {}", e), - data_type: "DataItemSource::File".to_string(), - }) - })?; - Ok(DataItemSource::File { - path: PathBuf::from(path_str), - }) - }, - 1 => Ok(DataItemSource::Memory { - data: data[1..].to_owned(), - }), - _ => Err(WSError::WsDataError(WsDataError::DataDecodeError { - reason: format!("Unknown data item type id: {}", data[0]), - data_type: "DataItemSource".to_string(), - })) - } - } - - fn encode_persist(&self) -> Vec { - match self { - DataItemSource::File { path } => { - let mut ret = vec![0]; - ret.extend_from_slice(path.to_string_lossy().as_bytes()); - ret - } - DataItemSource::Memory { data } => { - let mut ret = vec![1]; - ret.extend_from_slice(data); - ret - } - } - } + /// 内存写入模式 + Mem { + /// 共享内存区域 + shared_mem: SharedMemHolder, + }, } diff --git a/src/main/src/general/data/m_data_general/mod.rs b/src/main/src/general/data/m_data_general/mod.rs index 0db88f5..a31196e 100644 --- a/src/main/src/general/data/m_data_general/mod.rs +++ b/src/main/src/general/data/m_data_general/mod.rs @@ -1,13 +1,9 @@ -/// 缓存模式类型 -pub type CacheMode = u16; - -pub mod dataitem; -pub mod batch; -pub mod batch_handler; +mod dataitem; +mod batch; +mod batch_handler; -use crate::general::data::m_data_general::dataitem::{calculate_splits, WantIdxIter, WriteSplitDataTaskGroup, DataItemSource}; +use crate::general::data::m_data_general::dataitem::{WantIdxIter, WriteSplitDataTaskGroup}; use crate::general::data::m_data_general::batch_handler::{BatchReceiveState, SharedWithBatchHandler}; -use tokio::io::{AsyncSeekExt, AsyncReadExt}; use crate::general::{ data::m_kv_store_engine::{ @@ -31,7 +27,7 @@ use crate::{ logical_module_view_impl, result::{WSError, WSResult, WSResultExt, WsRuntimeErr, WsSerialErr, WsNetworkLogicErr}, sys::{LogicalModule, LogicalModuleNewArgs, NodeID}, - util::{JoinHandleWrapper, container::async_init_map::AsyncInitMap}, + util::JoinHandleWrapper, }; use crate::{result::WsDataError, sys::LogicalModulesRef}; use async_trait::async_trait; @@ -54,7 +50,8 @@ use tokio::task::JoinError; use ws_derive::LogicalModule; use std::future::Future; use tokio::sync::mpsc; -use tokio::sync::oneshot; + +// use super::m_appmeta_manager::AppMeta; logical_module_view_impl!(DataGeneralView); logical_module_view_impl!(DataGeneralView, p2p, P2PModule); @@ -97,9 +94,6 @@ pub fn new_data_unique_id_fn_kv(key: &[u8]) -> Vec { // format!("{}{}", DATA_UID_PREFIX_FN_KV, key_str) } -/// 唯一标识符类型 -pub type UniqueId = Vec; - #[derive(LogicalModule)] pub struct DataGeneral { view: DataGeneralView, @@ -116,27 +110,10 @@ pub struct DataGeneral { rpc_handler_get_data: RPCHandler, // 批量数据接收状态管理 - batch_receive_states: AsyncInitMap>, + batch_receive_states: DashMap, } impl DataGeneral { - pub fn inner_new(args: LogicalModuleNewArgs) -> Self { - Self { - view: DataGeneralView::new(args.logical_modules_ref.clone()), - rpc_call_data_version_schedule: RPCCaller::new(), - rpc_call_write_once_data: RPCCaller::new(), - rpc_call_batch_data: RPCCaller::new(), - rpc_call_get_data_meta: RPCCaller::new(), - rpc_call_get_data: RPCCaller::new(), - rpc_handler_write_once_data: RPCHandler::new(), - rpc_handler_batch_data: RPCHandler::new(), - rpc_handler_data_meta_update: RPCHandler::new(), - rpc_handler_get_data_meta: RPCHandler::new(), - rpc_handler_get_data: RPCHandler::new(), - batch_receive_states: AsyncInitMap::new(), - } - } - #[allow(dead_code)] fn next_batch_id(&self) -> u32 { static NEXT_BATCH_ID: AtomicU32 = AtomicU32::new(1); // 从1开始,保留0作为特殊值 @@ -145,170 +122,96 @@ impl DataGeneral { pub async fn write_data_batch( &self, - unique_id: UniqueId, + unique_id: &[u8], version: u64, data: proto::DataItem, - data_item_idx: DataItemIdx, + data_item_idx: usize, node_id: NodeID, ) -> WSResult<()> { // 调用 batch_transfer 函数处理数据传输 - async fn batch_transfer( - data_item_idx: DataItemIdx, - unique_id: UniqueId, - version: u64, - target_node: NodeID, - data: Arc, - view: DataGeneralView, - ) -> WSResult<()> { - let (tx, mut rx) = tokio::sync::mpsc::channel(32); - let mut handles = Vec::new(); + batch_transfer( + unique_id.to_vec(), + version, + node_id, + Arc::new(DataItemSource::new(data)), + self.view.clone(), + ).await + } + + async fn batch_transfer( + unique_id: Vec, + version: u64, + target_node: NodeID, + data: Arc, + view: DataGeneralView, + ) -> WSResult<()> { + let total_size = data.size().await?; + let total_blocks = (total_size + DEFAULT_BLOCK_SIZE - 1) / DEFAULT_BLOCK_SIZE; + let semaphore = Arc::new(Semaphore::new(32)); + let mut handles = Vec::new(); + + // 发送所有数据块 + for block_idx in 0..total_blocks { + // 获取信号量许可 + let permit = semaphore.clone().acquire_owned().await.unwrap(); - let data_size = match data.as_ref() { - DataItemSource::Memory { data } => data.len(), - DataItemSource::File { path } => { - let metadata = tokio::fs::metadata(path).await.map_err(|e| WsDataError::BatchTransferFailed { - request_id: proto::BatchRequestId { - node_id: target_node as u32, - sequence: 0, - }, - reason: format!("Failed to get file size: {}", e), - })?; - metadata.len() as usize - } - }; + let offset = block_idx as usize * DEFAULT_BLOCK_SIZE; + let size = DEFAULT_BLOCK_SIZE.min(total_size - offset); - // 从 batch_handler 中获取总块数 - let total_blocks = (data_size + DEFAULT_BLOCK_SIZE - 1) / DEFAULT_BLOCK_SIZE; - let splits = calculate_splits(total_blocks as u32); + // 读取数据块 + let block_data = data.read_chunk(offset, size).await?; - for (block_idx, split_range) in splits.iter().enumerate() { - let block_data = match data.as_ref() { - DataItemSource::Memory { data } => data[split_range.clone()].to_vec(), - DataItemSource::File { path } => { - // 读取文件对应块的数据 - let mut file = tokio::fs::File::open(path).await.map_err(|e| WsDataError::BatchTransferFailed { - request_id: proto::BatchRequestId { - node_id: target_node as u32, - sequence: block_idx as u64, - }, - reason: format!("Failed to open file: {}", e), - })?; - let mut buffer = vec![0; split_range.len()]; - // 验证seek结果 - let seek_pos = file.seek(std::io::SeekFrom::Start(split_range.start as u64)).await.map_err(|e| WsDataError::BatchTransferFailed { - request_id: proto::BatchRequestId { - node_id: target_node as u32, - sequence: block_idx as u64, - }, - reason: format!("Failed to seek file: {}", e), - })?; - if seek_pos != split_range.start as u64 { - return Err(WsDataError::BatchTransferFailed { - request_id: proto::BatchRequestId { - node_id: target_node as u32, - sequence: block_idx as u64, - }, - reason: format!("Seek position mismatch: expected {}, got {}", split_range.start, seek_pos), - }.into()); - } - // read_exact保证读取指定长度的数据或返回错误 - let _ = file.read_exact(&mut buffer).await.map_err(|e| WsDataError::BatchTransferFailed { - request_id: proto::BatchRequestId { - node_id: target_node as u32, - sequence: block_idx as u64, - }, - reason: format!("Failed to read file: {}", e), - })?; - buffer - } - }; - - let request = proto::BatchDataRequest { - request_id: Some(proto::BatchRequestId { - node_id: target_node as u32, - sequence: block_idx as u64, - }), - dataset_unique_id: unique_id.clone(), - data_item_idx: data_item_idx as u32, - block_type: match data.as_ref() { - DataItemSource::Memory { .. } => proto::BatchDataBlockType::Memory as i32, - DataItemSource::File { .. } => proto::BatchDataBlockType::File as i32, - }, - block_index: block_idx as u32, - data: block_data, - operation: proto::DataOpeType::Write as i32, - unique_id: unique_id.clone(), - version, - }; - - let tx = tx.clone(); - let view = view.clone(); - - let handle = tokio::spawn(async move { - let result = view.data_general() - .rpc_call_batch_data - .call( - view.p2p(), - target_node, - request, - Some(Duration::from_secs(30)), - ) - .await; - - if let Err(e) = tx.send(result).await { - tracing::error!("Failed to send batch transfer result: {}", e); - } - }); - - handles.push(handle); - } - - drop(tx); + // 构造请求 + let request = proto::BatchDataRequest { + request_id: Some(proto::BatchRequestId { + node_id: target_node as u32, + sequence: block_idx as u32, + }), + block_type: data.block_type() as i32, + block_index: block_idx as u32, + data: block_data, + operation: proto::DataOpeType::Write as i32, + unique_id: unique_id.clone(), + version, + }; - while let Some(result) = rx.recv().await { - match result { - Ok(resp) if !resp.success => { - return Err(WsDataError::BatchTransferFailed { - request_id: proto::BatchRequestId { - node_id: target_node as u32, - sequence: 0, // TODO: Add proper sequence number - }, - reason: resp.error_message, - }.into()); - } - Ok(_) => continue, - Err(e) => { - return Err(WsDataError::BatchTransferFailed { - request_id: proto::BatchRequestId { - node_id: target_node as u32, - sequence: 0, - }, - reason: format!("RPC call failed: {}", e), - }.into()); - } - } - } - - for handle in handles { - handle.await.map_err(|e| { - WsDataError::BatchTransferFailed { + // 发送请求 + let view = view.clone(); + let handle = tokio::spawn(async move { + let _permit = permit; // 持有permit直到任务完成 + let resp = view.data_general() + .rpc_call_batch_data + .call( + view.p2p(), + target_node, + request, + Some(Duration::from_secs(30)), + ) + .await?; + + if !resp.success { + return Err(WsDataError::BatchTransferFailed { request_id: proto::BatchRequestId { - node_id: target_node as u32, - sequence: 0, + node_id: target_node.into(), + sequence: block_idx.into(), }, - reason: format!("Task join failed: {}", e), - } - })?; - } - - Ok(()) + reason: resp.error_message, + }.into()); + } + + Ok(()) + }); + handles.push(handle); } - - let data = Arc::new(data.to_data_item_source()); - batch_transfer(data_item_idx,unique_id, version, node_id, data, self.view.clone()).await + + // 等待所有请求完成 + for handle in handles { + handle.await??; + } + + Ok(()) } - pub async fn get_or_del_datameta_from_master( &self, unique_id: &[u8], @@ -574,7 +477,7 @@ impl DataGeneral { view.p2p(), split_info.node_id, proto::WriteOneDataRequest { - unique_id: unique_id_clone.clone(), + unique_id: unique_id_clone, version: version_copy, data: vec![proto::DataItemWithIdx { idx: data_item_idx as u32, @@ -614,7 +517,7 @@ impl DataGeneral { let task = tokio::spawn(async move { let _permit = permit; // 持有permit直到任务完成 view.data_general() - .write_data_batch(unique_id_clone.clone(), version, data_item_cache, data_item_idx, node_id) + .write_data_batch(&unique_id_clone, version, data_item_cache, data_item_idx as usize, node_id) .await?; Ok::(proto::WriteOneDataResponse { remote_version: version, @@ -644,8 +547,8 @@ impl DataGeneral { async fn rpc_handle_write_one_data( &self, - responsor: RPCResponsor, - req: proto::WriteOneDataRequest, + responsor: RPCResponsor, + req: WriteOneDataRequest, ) { tracing::debug!("verify data meta bf write data"); let kv_store_engine = self.view.kv_store_engine(); @@ -796,14 +699,12 @@ impl DataGeneral { for data_with_idx in req.data.into_iter() { let proto::DataItemWithIdx { idx, data } = data_with_idx; let data = data.unwrap(); - let data_source = data.to_data_item_source(); - let data = Arc::new(data_source); - let serialize = data.as_ref().encode_persist(); + let serialize = data.encode_persist(); tracing::debug!( "writing data part uid({:?}) idx({}) item({})", req.unique_id, idx, - data.to_debug_string() + data.to_string() ); if let Err(err) = kv_store_engine.set( KeyTypeDataSetItem { @@ -983,7 +884,7 @@ impl DataGeneral { got_or_deleted.push(value); } - let (mut success, mut message): (bool, String) = if kv_ope_err.len() > 0 { + let (success, message): (bool, String) = if kv_ope_err.len() > 0 { (false, { let mut msg = String::from("KvEngine operation failed: "); for e in kv_ope_err.iter() { @@ -1002,18 +903,8 @@ impl DataGeneral { if success { for v in got_or_deleted { let decode_res = proto::DataItem::decode_persist(v.unwrap().1); - match decode_res { - Ok(item) => { - tracing::debug!("decoded data item: {:?}", item.to_string()); - got_or_deleted_checked.push(item); - } - Err(e) => { - tracing::error!("Failed to decode data item: {:?}", e); - success = false; - message = format!("Failed to decode data item: {:?}", e); - break; - } - } + tracing::debug!("decode_res type: {:?}", decode_res.to_string()); + got_or_deleted_checked.push(decode_res); } } @@ -1028,101 +919,66 @@ impl DataGeneral { Ok(()) } - /// 处理批量数据写入请求 - pub async fn rpc_handle_batch_data( + async fn rpc_handle_batch_data( &self, responsor: RPCResponsor, req: proto::BatchDataRequest, ) -> WSResult<()> { - let batch_receive_states = self.batch_receive_states.clone(); - // 预先克隆闭包外需要的字段 - let block_index = req.block_index; - let data = req.data.clone(); - let request_id = req.request_id.clone().unwrap(); - // 1. 查找或创建状态 - let state = match self.batch_receive_states - .get_or_init(req.request_id.clone().unwrap(), async move { - // 创建任务组和句柄 - let (mut group, handle) = match WriteSplitDataTaskGroup::new( - req.unique_id.clone(), + let (state, is_new_state) = self.batch_receive_states + .entry(req.request_id.clone()) + .or_insert_with(|| { + // 通过 WriteSplitDataTaskGroup::new 创建任务组和句柄 + let (group, handle) = super::dataitem::WriteSplitDataTaskGroup::new( + req.request_id.clone(), Vec::new(), // TODO: 根据实际需求设置分片范围 - req.block_type(), - req.version, - ).await { - Ok((group, handle)) => (group, handle), - Err(e) => { - tracing::error!("Failed to create task group: {:?}", e); - return Err(e); - } - }; - - // 启动process_tasks - let _ = tokio::spawn(async move { - match group.process_tasks().await { - Ok(item) => Ok(item), - Err(e) => { - tracing::error!("Failed to process tasks: {}", e); - Err(e) - } - } - }); - - let state = Arc::new(BatchReceiveState::new(handle, SharedWithBatchHandler::new())); - let state_clone = state.clone(); - - // response task - let _=tokio::spawn(async move { - // 等待所有任务完成 - if let Err(e) = state_clone.handle.wait_all_tasks().await { - tracing::error!("Failed to wait for tasks: {}", e); - return; - } - - // 发送最终响应 - if let Some(final_responsor) = state_clone.shared.get_final_responsor().await { - if let Err(e) = final_responsor.send_resp(proto::BatchDataResponse { - request_id: Some(req.request_id.clone().unwrap()), - success: true, - error_message: String::new(), - version: state_clone.handle.version(), - }).await { - tracing::error!("Failed to send final response: {}", e); - } - } - - // 清理状态 - let _=batch_receive_states.remove(&req.request_id.unwrap()); - }); - - Ok(state) - }) - .await { - Err(e) => return Err(WSError::WsDataError(WsDataError::BatchTransferError { - request_id, - msg: format!("Failed to initialize batch state: {}", e) - })), - Ok(state) => state, - }; + req.block_type, + 0, // TODO: 根据实际需求设置版本号 + ).await; + + (super::batch_handler::BatchReceiveState::new(handle, group), true) + }); // 2. 提交分片数据 - let data_item = proto::DataItem { - data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes(data)), - ..Default::default() - }; state.handle.submit_split( - block_index as usize * DEFAULT_BLOCK_SIZE, - data_item, + req.block_idx * DEFAULT_BLOCK_SIZE, + req.data ).await?; // 3. 更新响应器 state.shared.update_responsor(responsor).await; + // 4. 只在首次创建状态时启动完成监控任务 + if is_new_state { + let state_clone = state.clone(); + let request_id = req.request_id.clone(); + let batch_receive_states = self.batch_receive_states.clone(); + + tokio::spawn(async move { + // 等待所有任务完成 + if let Err(e) = state_clone.handle.wait_all_tasks().await { + tracing::error!("Failed to wait for tasks: {}", e); + return; + } + + // 发送最终响应 + if let Some(final_responsor) = state_clone.shared.get_final_responsor().await { + if let Err(e) = final_responsor.response(Ok(())).await { + tracing::error!("Failed to send final response: {}", e); + } + } + + // 清理状态 + batch_receive_states.remove(&request_id); + }); + } + Ok(()) } } -#[derive(Serialize, Deserialize, Debug, Clone)] + +#[derive(Serialize, Deserialize)] pub struct DataMetaSys { pub cache: i32, pub distribute: i32, @@ -1144,6 +1000,69 @@ impl Into for DataMetaSys { } } + +/// DataItem 数据源 +pub enum DataItemSource { + Memory { + data: Arc>, + }, + File { + path: String, + }, +} + +impl DataItemSource { + pub fn new(data: proto::DataItem) -> Self { + match &data.data_item_dispatch { + Some(proto::data_item::DataItemDispatch::RawBytes(bytes)) => Self::Memory { + data: Arc::new(bytes.clone()), + }, + Some(proto::data_item::DataItemDispatch::File(file_data)) => Self::File { + path: file_data.file_path.clone(), + }, + _ => Self::Memory { + data: Arc::new(Vec::new()), + }, + } + } +} + +impl DataItemSource { + async fn size(&self) -> WSResult { + match self { + Self::Memory { data } => Ok(data.len()), + Self::File { path } => { + let metadata = tokio::fs::metadata(path).await?; + Ok(metadata.len() as usize) + } + } + } + + async fn read_chunk(&self, offset: usize, size: usize) -> WSResult> { + match self { + Self::Memory { data } => { + let end = (offset + size).min(data.len()); + Ok(data[offset..end].to_vec()) + } + Self::File { path } => { + let mut file = tokio::fs::File::open(path).await?; + file.seek(std::io::SeekFrom::Start(offset as u64)).await?; + let mut buffer = vec![0u8; size]; + let n = file.read(&mut buffer).await?; + buffer.truncate(n); + Ok(buffer) + } + } + } + + fn block_type(&self) -> proto::BatchDataBlockType { + match self { + Self::Memory { .. } => proto::BatchDataBlockType::Memory, + Self::File { .. } => proto::BatchDataBlockType::File, + } + } +} + /// 数据集元信息 #[derive(Serialize, Deserialize)] pub struct DataSetMetaV1 { @@ -1153,6 +1072,8 @@ pub struct DataSetMetaV1 { pub synced_nodes: HashSet, } +pub type CacheMode = u16; + /// 数据集元信息 /// /// 注意:新建元信息请使用 `DataSetMetaBuilder` @@ -1163,10 +1084,9 @@ pub struct DataSetMetaV2 { // unique_id: Vec, api_version: u8, pub version: u64, - pub datas_splits: Vec, - pub data_metas: Vec, - pub synced_nodes: HashSet, pub cache_mode: Vec, + /// 每个数据项的分片信息,索引为数据项索引 + pub datas_splits: Vec, } impl DataSetMetaV2 { @@ -1381,11 +1301,9 @@ impl DataSetMetaBuilder { Self { building: Some(DataSetMetaV2 { version: 0, - datas_splits: vec![], - data_metas: vec![], - api_version: 2, - synced_nodes: HashSet::new(), cache_mode: vec![], + api_version: 2, + datas_splits: vec![], }), } } @@ -1428,13 +1346,95 @@ impl DataSetMetaBuilder { } } +// impl From for DataSetMetaV2 { +// fn from( +// DataSetMetaV1 { +// version, +// data_metas: _, +// synced_nodes: _, +// }: DataSetMetaV1, +// ) -> Self { +// DataSetMetaBuilder::new() +// .version(version) +// .cache_mode_pos_allnode() +// .build() +// // DataSetMetaV2 { +// // version, +// // data_metas, +// // synced_nodes, +// // } +// } +// } + +mod test { + #[test] + fn test_option_and_vec_serialization_size() { + // 定义一个具体的值 + let value: i32 = 42; + + // 创建 Option 类型的变量 + let some_value: Option = Some(value); + let none_value: Option = None; + + // 创建 Vec 类型的变量 + let empty_vec: Vec = Vec::new(); + let single_element_vec: Vec = vec![value]; + + let some_empty_vec: Option> = Some(vec![]); + let some_one_vec: Option> = Some(vec![value]); + + // 序列化 + let serialized_some = bincode::serialize(&some_value).unwrap(); + let serialized_none = bincode::serialize(&none_value).unwrap(); + let serialized_empty_vec = bincode::serialize(&empty_vec).unwrap(); + let serialized_single_element_vec = bincode::serialize(&single_element_vec).unwrap(); + let serialized_some_empty_vec = bincode::serialize(&some_empty_vec).unwrap(); + let serialized_some_one_vec = bincode::serialize(&some_one_vec).unwrap(); + + // 获取序列化后的字节大小 + let size_some = serialized_some.len(); + let size_none = serialized_none.len(); + let size_empty_vec = serialized_empty_vec.len(); + let size_single_element_vec = serialized_single_element_vec.len(); + let size_some_empty_vec = serialized_some_empty_vec.len(); + let size_some_one_vec = serialized_some_one_vec.len(); + + // 打印结果 + println!("Size of serialized Some(42): {}", size_some); + println!("Size of serialized None: {}", size_none); + println!("Size of serialized empty Vec: {}", size_empty_vec); + println!( + "Size of serialized Vec with one element (42): {}", + size_single_element_vec + ); + println!( + "Size of serialized Some(empty Vec): {}", + size_some_empty_vec + ); + println!( + "Size of serialized Some(one element Vec): {}", + size_some_one_vec + ); + + // 比较大小 + assert!( + size_some > size_none, + "Expected serialized Some to be larger than serialized None" + ); + assert!( + size_single_element_vec > size_empty_vec, + "Expected serialized Vec with one element to be larger than serialized empty Vec" + ); + } +} + pub struct GetOrDelDataArg { pub meta: Option, pub unique_id: Vec, pub ty: GetOrDelDataArgType, } -#[derive(Debug, Clone)] +#[derive(Clone)] pub enum GetOrDelDataArgType { All, Delete, @@ -1508,7 +1508,7 @@ impl LogicalModule for DataGeneral { rpc_handler_get_data: RPCHandler::new(), // 批量数据接收状态管理 - batch_receive_states: AsyncInitMap::new(), + batch_receive_states: DashMap::new(), } } @@ -1592,6 +1592,3 @@ impl LogicalModule for DataGeneral { Ok(vec![]) } } - -#[derive(Debug, Clone, Copy)] -pub struct CacheModeVisitor(pub u16); diff --git a/src/main/src/general/network/proto_ext.rs b/src/main/src/general/network/proto_ext.rs index 0e15f7c..60f64fd 100644 --- a/src/main/src/general/network/proto_ext.rs +++ b/src/main/src/general/network/proto_ext.rs @@ -1,5 +1,4 @@ use crate::general::app::DataEventTrigger; -use crate::general::data::m_data_general::dataitem::DataItemSource; use crate::general::data::m_dist_lock::DistLockOpe; use crate::general::network::proto::sche::distribute_task_req::{ DataEventTriggerNew, DataEventTriggerWrite, Trigger, @@ -8,7 +7,6 @@ use crate::general::network::proto::sche::distribute_task_req::{ use super::proto::{self, kv::KvResponse, FileData}; use std::{ops::Range, path::Path}; -use crate::result::{WSResult, WSError, WsDataError}; pub trait ProtoExtDataItem { fn data_sz_bytes(&self) -> usize; @@ -18,7 +16,6 @@ pub trait ProtoExtDataItem { fn as_raw_bytes<'a>(&'a self) -> Option<&'a [u8]>; fn new_file_data(filepath: impl AsRef, is_dir: bool) -> Self; fn as_file_data(&self) -> Option<&proto::FileData>; - fn to_data_item_source(&self) -> DataItemSource; } impl ProtoExtDataItem for proto::DataItem { @@ -98,20 +95,6 @@ impl ProtoExtDataItem for proto::DataItem { _ => None, } } - - fn to_data_item_source(&self) -> DataItemSource { - match &self.data_item_dispatch { - Some(proto::data_item::DataItemDispatch::RawBytes(bytes)) => DataItemSource::Memory { - data: bytes.clone(), - }, - Some(proto::data_item::DataItemDispatch::File(file_data)) => DataItemSource::File { - path: file_data.file_name_opt.clone().into(), - }, - _ => DataItemSource::Memory { - data: Vec::new(), - }, - } - } } impl AsRef<[u8]> for proto::DataItem { @@ -217,43 +200,26 @@ impl KvRequestExt for proto::kv::KvRequest { } pub trait DataItemExt { - fn decode_persist(data: Vec) -> WSResult where Self: Sized; + fn decode_persist(data: Vec) -> Self; fn encode_persist<'a>(&'a self) -> Vec; } impl DataItemExt for proto::DataItem { - fn decode_persist(data: Vec) -> WSResult where Self: Sized { - if data.is_empty() { - return Err(WSError::WsDataError(WsDataError::DataDecodeError { - reason: "Empty data".to_string(), - data_type: "proto::DataItem".to_string(), - })); - } + fn decode_persist(data: Vec) -> Self { let data_item_dispatch = match data[0] { - 0 => { - let path_str = String::from_utf8(data[1..].to_vec()).map_err(|e| { - WSError::WsDataError(WsDataError::DataDecodeError { - reason: format!("Failed to decode path string: {}", e), - data_type: "proto::DataItem::File".to_string(), - }) - })?; - proto::data_item::DataItemDispatch::File(FileData { - file_name_opt: path_str, - is_dir_opt: false, - file_content: Vec::new(), - }) - }, - 1 => proto::data_item::DataItemDispatch::RawBytes(data[1..].to_vec()), + 0 => proto::data_item::DataItemDispatch::File(FileData { + file_name_opt: String::new(), + is_dir_opt: false, + file_content: data[1..].to_owned(), + }), + 1 => proto::data_item::DataItemDispatch::RawBytes(data[1..].to_owned()), _ => { - return Err(WSError::WsDataError(WsDataError::DataDecodeError { - reason: format!("Unknown data item type id: {}", data[0]), - data_type: "proto::DataItem".to_string(), - })); + panic!("unknown data item type id: {}", data[0]) } }; - Ok(Self { + Self { data_item_dispatch: Some(data_item_dispatch), - }) + } } fn encode_persist<'a>(&'a self) -> Vec { match self.data_item_dispatch.as_ref().unwrap() { diff --git a/src/main/src/general/network/proto_src/data.proto b/src/main/src/general/network/proto_src/data.proto index b6ae0d5..7984fcf 100644 --- a/src/main/src/general/network/proto_src/data.proto +++ b/src/main/src/general/network/proto_src/data.proto @@ -183,14 +183,12 @@ message BatchRequestId { message BatchDataRequest { BatchRequestId request_id = 1; // 请求唯一标识(节点ID + 序列号) - bytes dataset_unique_id = 2; // 数据集唯一标识 - uint32 data_item_idx = 3; // 数据项索引 - BatchDataBlockType block_type = 4; // 数据块类型(文件/内存) - uint32 block_index = 5; // 数据块索引 - bytes data = 6; // 数据块内容 - DataOpeType operation = 7; // 操作类型 - bytes unique_id = 8; // 数据唯一标识 - uint64 version = 9; // 数据版本 + BatchDataBlockType block_type = 2; // 数据块类型(文件/内存) + uint32 block_index = 3; // 数据块索引 + bytes data = 4; // 数据块内容 + DataOpeType operation = 5; // 操作类型 + bytes unique_id = 6; // 数据唯一标识 + uint64 version = 7; // 数据版本 } message BatchDataResponse { diff --git a/src/main/src/result.rs b/src/main/src/result.rs index fe823c3..e45655d 100644 --- a/src/main/src/result.rs +++ b/src/main/src/result.rs @@ -1,4 +1,4 @@ -use std::{fmt::Debug, os::unix::net::SocketAddr, sync::Arc, path::PathBuf}; +use std::{fmt::Debug, os::unix::net::SocketAddr, sync::Arc}; use async_raft::{InitializeError, RaftError}; use camelpaste::paste; @@ -250,20 +250,6 @@ pub enum WsDataError { request_id: proto::BatchRequestId, idx: DataSplitIdx, }, - BatchTransferTaskFailed { - reason: String, - }, - BatchTransferFailed { - request_id: proto::BatchRequestId, - reason: String, - }, - BatchTransferNotFound { - request_id: proto::BatchRequestId, - }, - BatchTransferError { - request_id: proto::BatchRequestId, - msg: String, - }, UnknownCacheMapMode { mode: u16, }, @@ -278,27 +264,20 @@ pub enum WsDataError { len: u8, }, ItemIdxEmpty, - VersionMismatch { - expected: u64, - actual: u64, - }, - SizeMismatch { - expected: usize, // 预期的数据大小 - actual: usize, // 实际的数据大小 + BatchTransferFailed { + request_id: proto::BatchRequestId, + reason: String, }, - ReadDataFailed { - path: PathBuf, // 读取失败的文件路径 + BatchTransferNotFound { + request_id: proto::BatchRequestId, }, - /// 数据分片任务错误 - DataSplitTaskError { + BatchTransferError { + request_id: proto::BatchRequestId, msg: String, }, - /// 数据解码错误 - DataDecodeError { - /// 错误原因 - reason: String, - /// 数据类型(用于调试) - data_type: String, + VersionMismatch { + expected: u64, + actual: u64, }, } diff --git a/src/main/src/util/container/async_init_map.rs b/src/main/src/util/container/async_init_map.rs index 3a22394..71bc54e 100644 --- a/src/main/src/util/container/async_init_map.rs +++ b/src/main/src/util/container/async_init_map.rs @@ -5,8 +5,6 @@ use dashmap::DashMap; use tokio::sync::broadcast; use thiserror::Error; -use crate::result::WSResult; - /// AsyncInitMap 的错误类型 #[derive(Debug, Error)] pub enum AsyncInitError { @@ -15,31 +13,6 @@ pub enum AsyncInitError { WaitError(broadcast::error::RecvError), } -/// Map 值的包装器,用于异步初始化Map中的值 -#[derive(Clone)] -pub struct AsyncInitMapValue { - inner: ValueState -} - -impl AsyncInitMapValue { - /// 获取就绪值的引用 - pub fn get(&self) -> Option<&V> { - self.inner.as_ready() - } - - fn new_initializing(tx: broadcast::Sender) -> Self { - Self { - inner: ValueState::Initializing(tx) - } - } - - fn new_ready(value: V) -> Self { - Self { - inner: ValueState::Ready(value) - } - } -} - /// Map 值的状态 #[derive(Clone)] enum ValueState { @@ -67,14 +40,12 @@ impl ValueState { } /// 是否已经就绪 - #[allow(dead_code)] - pub(crate) fn is_ready(&self) -> bool { + fn is_ready(&self) -> bool { matches!(self, Self::Ready(_)) } /// 是否正在初始化 - #[allow(dead_code)] - pub(crate) fn is_initializing(&self) -> bool { + fn is_initializing(&self) -> bool { matches!(self, Self::Initializing(_)) } } @@ -85,7 +56,7 @@ where K: Eq + Hash + Clone + Send + Sync + 'static, V: Clone + Send + Sync+'static, { - inner: Arc>>, + inner: Arc>>, } impl AsyncInitMap @@ -100,18 +71,6 @@ where } } - /// 获取一个已经初始化的值,如果值不存在或未初始化完成则返回None - pub fn get(&self, key: &K) -> Option { - self.inner.get(key) - .and_then(|entry| entry.value().get().cloned()) - } - - /// 移除一个键值对,返回被移除的值(如果存在且已初始化) - pub fn remove(&self, key: &K) -> Option { - self.inner.remove(key) - .and_then(|(_, value)| value.get().cloned()) - } - /// 获取或初始化一个值 /// /// # 参数 @@ -120,14 +79,13 @@ where /// /// # 返回 /// 返回初始化完成的值,如果初始化失败则返回错误 - pub async fn get_or_init(&self, key: K, init_fut: Fut) -> Result + pub async fn get_or_init(&self, key: K, init_fut: Fut) -> Result where - Fut: std::future::Future> + Send + 'static, - FutErr: std::fmt::Debug, + Fut: std::future::Future> + Send + 'static, { // 先尝试只读获取 if let Some(entry) = self.inner.get(&key) { - match &entry.value().inner { + match entry.value() { ValueState::Ready(v) => return Ok(v.clone()), ValueState::Initializing(tx) => { let mut rx = tx.subscribe(); @@ -152,20 +110,20 @@ where // 先通过 channel 发送值 let _ = tx.send(value.clone()); // 然后更新状态 - let _ = inner.insert(key, AsyncInitMapValue::new_ready(value)); + inner.insert(key, ValueState::Ready(value)); } Err(e) => { - let _ = inner.remove(&key); + inner.remove(&key); tracing::error!("初始化失败: {:?}", e); drop(tx); // 关闭 channel 通知错误 } } }); - AsyncInitMapValue::new_initializing(tx_clone) + ValueState::Initializing(tx_clone) }); - entry.value().inner.as_initializing() + entry.value().as_initializing() .expect("刚插入的值必定处于初始化状态") .subscribe() }; @@ -202,7 +160,7 @@ where K: Eq + Hash + Clone + Send + Sync + 'static, V: Clone + Send + Sync+'static, { - type Target = DashMap>; + type Target = DashMap>; fn deref(&self) -> &Self::Target { &self.inner From e155529bedce1ec67e6ad638b132f64095700392 Mon Sep 17 00:00:00 2001 From: pa <1020401660@qq.com> Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 07/26] Revert "feat: async init map" This reverts commit 5625763ed985d6cc0bf3badc95034ec488dc22b5. --- async_init_map.md | 150 ---------------- src/main/src/util/container/async_init_map.rs | 168 ------------------ src/main/src/util/container/mod.rs | 2 - 3 files changed, 320 deletions(-) delete mode 100644 async_init_map.md delete mode 100644 src/main/src/util/container/async_init_map.rs diff --git a/async_init_map.md b/async_init_map.md deleted file mode 100644 index 711a124..0000000 --- a/async_init_map.md +++ /dev/null @@ -1,150 +0,0 @@ -# AsyncInitConcurrentMap 封装(基于dashmap) - -## 设计动机 - -在 Rust 异步编程中,我们经常遇到这样的场景:需要一个并发 Map,同时要支持异步初始化。 - -### 现有方案的问题 - -1. **DashMap 的 or_insert 限制**: -```rust -// DashMap 的 or_insert_with 是同步的 -map.entry(key).or_insert_with(|| { - // 这里不能直接用 async 函数 - // 如果在这里调用 block_on 会导致严重问题 -}); -``` - -2. **同步调用异步的问题**: - - 如果在同步上下文中调用异步函数(如使用 block_on) - - 当前线程会被阻塞 - - 导致其他异步任务无法调度 - - 可能引发死锁 - -### 解决方案 - -我们的方案是将异步初始化逻辑从 entry 的回调中分离出来: - -```rust -// 不在 or_insert_with 中执行异步初始化 -let entry = map.entry(key).or_insert_with(|| { - // 只创建初始状态 - ValueState::Initializing(tx) -}); - -// 在单独的异步任务中执行初始化 -tokio::spawn(async move { - // 这里可以安全地执行异步操作 - match init_fut.await { - Ok(value) => { - let _ = tx.send(value.clone()); // 先发送值 - inner.insert(key, ValueState::Ready(value)); // 再更新状态 - } - Err(e) => { - inner.remove(&key); - drop(tx); // 通知错误 - } - } -}); -``` - -## 核心实现 - -### 状态管理 - -**设计原因**: -- 使用枚举保证状态转换的类型安全 -- 将通知 channel 绑定到初始化状态,确保生命周期正确 -- 避免使用额外的标志位,保持内存效率 - -```rust -enum ValueState { - Initializing(broadcast::Sender), // channel 直接传递值 - Ready(V), -} -``` - -**关键细节**: -- `Initializing` 持有 `broadcast::Sender` 而不是 `oneshot`,支持多个等待者 -- `Ready` 直接持有值,避免额外的引用计数 -- 枚举设计使得状态检查在编译时完成 - -### 读写分离设计 - -**设计原因**: -- 读操作应该尽可能快速且无阻塞 -- 写操作需要保证原子性,但要最小化锁持有时间 -- 异步等待不能持有任何锁 - -1. **快速路径(读)**: -```rust -if let Some(entry) = self.inner.get(&key) { // 只获取读锁 - match entry.value() { - ValueState::Ready(v) => return Ok(v.clone()), - ValueState::Initializing(tx) => { - let mut rx = tx.subscribe(); - drop(entry); // 立即释放读锁 - return Ok(rx.recv().await?); - } - } -} -``` - -**关键细节**: -- 使用 `get()` 而不是 `entry()`,避免不必要的写锁 -- 获取 subscriber 后立即释放锁,允许其他读者访问 -- 值的克隆在锁外进行,最小化锁持有时间 - -2. **初始化路径(写)**: -```rust -let mut rx = { // 使用代码块控制写锁范围 - let entry = self.inner.entry(key.clone()).or_insert_with(|| { - let (tx, _) = broadcast::channel(1); - // 启动异步初始化... - ValueState::Initializing(tx_clone) - }); - entry.value().as_initializing() - .expect("刚插入的值必定处于初始化状态") - .subscribe() -}; // 写锁在这里释放 -``` - -**关键细节**: -- 使用代码块限制 entry 的生命周期,确保写锁及时释放 -- `or_insert_with` 保证检查和插入的原子性 -- 初始化任务在获取 subscriber 后启动,避免竞态条件 - -### 通过 Channel 传递值 - -**设计原因**: -- 直接通过 channel 传递值,避免等待者重新查询 map -- broadcast channel 支持多个等待者同时等待初始化结果 -- 错误处理更简单,关闭 channel 即可通知所有等待者 - -```rust -// 优化后的设计 -enum ValueState { - Initializing(broadcast::Sender), // channel 直接传递值 - Ready(V), -} - -// 初始化完成时 -match init_fut.await { - Ok(value) => { - let _ = tx.send(value.clone()); // 先发送值 - inner.insert(key, ValueState::Ready(value)); // 再更新状态 - } - // ... -} - -// 等待初始化时 -let mut rx = tx.subscribe(); -drop(entry); -return Ok(rx.recv().await?); // 直接从 channel 获取值,无需再查询 map -``` - -**关键细节**: -- 等待者直接从 channel 接收值,无需再次获取锁查询 map -- 使用 broadcast channel 支持多个等待者,而不是 oneshot -- channel 容量为 1 即可,因为只需要传递一次初始化结果 -- 初始化失败时,直接关闭 channel 通知所有等待者,简化错误处理 diff --git a/src/main/src/util/container/async_init_map.rs b/src/main/src/util/container/async_init_map.rs deleted file mode 100644 index 71bc54e..0000000 --- a/src/main/src/util/container/async_init_map.rs +++ /dev/null @@ -1,168 +0,0 @@ -use std::hash::Hash; -use std::sync::Arc; -use std::ops::Deref; -use dashmap::DashMap; -use tokio::sync::broadcast; -use thiserror::Error; - -/// AsyncInitMap 的错误类型 -#[derive(Debug, Error)] -pub enum AsyncInitError { - /// 等待初始化完成时发生错误 - #[error("等待初始化完成时发生错误: {0}")] - WaitError(broadcast::error::RecvError), -} - -/// Map 值的状态 -#[derive(Clone)] -enum ValueState { - /// 正在初始化,包含一个通知 channel - Initializing(broadcast::Sender), - /// 初始化完成,包含实际值 - Ready(V), -} - -impl ValueState { - /// 获取就绪值的引用 - fn as_ready(&self) -> Option<&V> { - match self { - Self::Ready(v) => Some(v), - _ => None, - } - } - - /// 获取初始化中的 sender - fn as_initializing(&self) -> Option<&broadcast::Sender> { - match self { - Self::Initializing(tx) => Some(tx), - _ => None, - } - } - - /// 是否已经就绪 - fn is_ready(&self) -> bool { - matches!(self, Self::Ready(_)) - } - - /// 是否正在初始化 - fn is_initializing(&self) -> bool { - matches!(self, Self::Initializing(_)) - } -} - -/// 支持异步初始化的并发 Map -pub struct AsyncInitMap -where - K: Eq + Hash + Clone + Send + Sync + 'static, - V: Clone + Send + Sync+'static, -{ - inner: Arc>>, -} - -impl AsyncInitMap -where - K: Eq + Hash + Clone + Send + Sync + 'static, - V: Clone + Send + Sync+'static, -{ - /// 创建新的异步初始化 Map - pub fn new() -> Self { - Self { - inner: Arc::new(DashMap::new()), - } - } - - /// 获取或初始化一个值 - /// - /// # 参数 - /// * `key` - 键 - /// * `init_fut` - 初始化 Future - /// - /// # 返回 - /// 返回初始化完成的值,如果初始化失败则返回错误 - pub async fn get_or_init(&self, key: K, init_fut: Fut) -> Result - where - Fut: std::future::Future> + Send + 'static, - { - // 先尝试只读获取 - if let Some(entry) = self.inner.get(&key) { - match entry.value() { - ValueState::Ready(v) => return Ok(v.clone()), - ValueState::Initializing(tx) => { - let mut rx = tx.subscribe(); - drop(entry); - return Ok(rx.recv().await.map_err(AsyncInitError::WaitError)?); - } - } - } - - // 使用 or_insert_with 进行原子操作并获取 rx - let mut rx = { - let entry = self.inner.entry(key.clone()).or_insert_with(|| { - let (tx, _) = broadcast::channel(1); - let tx_clone = tx.clone(); - - let inner = self.inner.clone(); - let key = key.clone(); - - let _ = tokio::spawn(async move { - match init_fut.await { - Ok(value) => { - // 先通过 channel 发送值 - let _ = tx.send(value.clone()); - // 然后更新状态 - inner.insert(key, ValueState::Ready(value)); - } - Err(e) => { - inner.remove(&key); - tracing::error!("初始化失败: {:?}", e); - drop(tx); // 关闭 channel 通知错误 - } - } - }); - - ValueState::Initializing(tx_clone) - }); - - entry.value().as_initializing() - .expect("刚插入的值必定处于初始化状态") - .subscribe() - }; - - // 等待值通过 channel 传递 - Ok(rx.recv().await.map_err(AsyncInitError::WaitError)?) - } -} - -impl Default for AsyncInitMap -where - K: Eq + Hash + Clone + Send + Sync + 'static, - V: Clone + Send + Sync+'static, -{ - fn default() -> Self { - Self::new() - } -} - -impl Clone for AsyncInitMap -where - K: Eq + Hash + Clone + Send + Sync + 'static, - V: Clone + Send + Sync+'static, -{ - fn clone(&self) -> Self { - Self { - inner: self.inner.clone(), - } - } -} - -impl Deref for AsyncInitMap -where - K: Eq + Hash + Clone + Send + Sync + 'static, - V: Clone + Send + Sync+'static, -{ - type Target = DashMap>; - - fn deref(&self) -> &Self::Target { - &self.inner - } -} diff --git a/src/main/src/util/container/mod.rs b/src/main/src/util/container/mod.rs index 20198f6..1c9a676 100644 --- a/src/main/src/util/container/mod.rs +++ b/src/main/src/util/container/mod.rs @@ -1,4 +1,2 @@ pub mod map; pub mod sync_trie; - -pub mod async_init_map; From 0637469c1715a62e0bc12f7f5519193e44d401d9 Mon Sep 17 00:00:00 2001 From: pa <1020401660@qq.com> Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 08/26] Revert "group in progress" This reverts commit 5542e232fab525774a65e42b5f113158a6a1f75c. --- batch_data_enhancement_plan.md | 280 ---------- .../src/general/data/m_data_general/batch.rs | 127 +++++ .../data/m_data_general/batch_handler.rs | 76 --- .../general/data/m_data_general/dataitem.rs | 506 +++++++++--------- .../src/general/data/m_data_general/mod.rs | 72 +-- 5 files changed, 376 insertions(+), 685 deletions(-) delete mode 100644 batch_data_enhancement_plan.md delete mode 100644 src/main/src/general/data/m_data_general/batch_handler.rs diff --git a/batch_data_enhancement_plan.md b/batch_data_enhancement_plan.md deleted file mode 100644 index 5137616..0000000 --- a/batch_data_enhancement_plan.md +++ /dev/null @@ -1,280 +0,0 @@ -# 批量数据处理改进计划 - -## 1. 删除代码 [根据review.md] - -### 1.1 src/main/src/general/data/m_data_general/batch.rs -1. 删除 BatchManager 结构体及其实现 -2. 删除 BatchTransfer 结构体及其实现 - -### 1.2 src/main/src/general/data/m_data_general/mod.rs -1. 删除 DataGeneral 中的 batch_manager 字段 -2. 删除 DataGeneral::new() 中的相关初始化代码 - -## 2. 错误处理增强 [根据review.md] - -### 2.1 修改 src/main/src/result.rs -```rust -pub enum WsDataError { - BatchTransferFailed { - request_id: proto::BatchRequestId, - reason: String, - }, - BatchTransferNotFound { - request_id: proto::BatchRequestId, - }, - BatchTransferError { - request_id: proto::BatchRequestId, - msg: String, - }, - WriteDataFailed { - request_id: proto::BatchRequestId, - }, - SplitTaskFailed { - request_id: proto::BatchRequestId, - idx: DataSplitIdx, - }, - VersionMismatch { - expected: u64, - actual: u64, - }, -} -``` - -## 3. 新增代码 [根据review.md] - -### 3.1 src/main/src/general/data/m_data_general/task.rs - -#### WriteSplitDataTaskHandle -```rust -pub struct WriteSplitDataTaskHandle { - tx: mpsc::Sender>, - write_type: WriteSplitDataType, - version: u64, -} - -enum WriteSplitDataType { - File { path: PathBuf }, - Mem { shared_mem: SharedMemHolder }, -} -``` - -#### WriteSplitDataTaskGroup -```rust -enum WriteSplitDataTaskGroup { - ToFile { - unique_id: UniqueId, - file_path: PathBuf, - tasks: Vec>, - rx: mpsc::Receiver>, - expected_size: usize, - current_size: usize, - }, - ToMem { - unique_id: UniqueId, - shared_mem: SharedMemHolder, - tasks: Vec>, - rx: mpsc::Receiver>, - expected_size: usize, - current_size: usize, - } -} -``` - -### 3.2 src/main/src/general/data/m_data_general/mod.rs - -#### SharedWithBatchHandler [根据review.md] -```rust -#[derive(Clone)] -struct SharedWithBatchHandler { - responsor: Arc>>>, -} - -impl SharedWithBatchHandler { - fn new() -> Self { - Self { - responsor: Arc::new(Mutex::new(None)), - } - } - - async fn update_responsor(&self, responsor: RPCResponsor) { - let mut guard = self.responsor.lock().await; - if let Some(old_responsor) = guard.take() { - // 旧的responsor直接返回成功 - if let Err(e) = old_responsor.response(Ok(())).await { - tracing::error!("Failed to respond to old request: {}", e); - } - } - *guard = Some(responsor); - } - - async fn get_final_responsor(&self) -> Option> { - self.responsor.lock().await.take() - } -} -``` - -#### BatchReceiveState [根据review.md] -```rust -// 由DataGeneral持有,存储在DashMap中 -// 用于管理每个批量数据传输请求的状态 -struct BatchReceiveState { - handle: WriteSplitDataTaskHandle, // 写入任务句柄 - shared: SharedWithBatchHandler, // 共享响应器 -} -``` - -impl DataGeneral { - pub fn new() -> Self { - Self { - batch_receive_states: DashMap::new(), - // ... 其他字段初始化 - } - } -} - -## 4. 功能实现 [根据design.canvas] - -### 4.1 process_tasks() 实现 [阻塞循环] -```rust -impl WriteSplitDataTaskGroup { - async fn process_tasks(&mut self) -> WSResult { - loop { - // 1. 检查完成状态 - if let Some(item) = self.try_complete() { - return Ok(item); - } - - // 2. 等待新任务或已有任务完成 - tokio::select! { - Some(new_task) = match self { - Self::ToFile { rx, .. } | - Self::ToMem { rx, .. } => rx.recv() - } => { - match self { - Self::ToFile { tasks, .. } | - Self::ToMem { tasks, .. } => { - tasks.push(new_task); - } - } - } - Some(completed_task) = futures::future::select_all(match self { - Self::ToFile { tasks, .. } | - Self::ToMem { tasks, .. } => tasks - }) => { - // 检查任务是否成功完成 - if let Err(e) = completed_task.0 { - tracing::error!("Task failed: {}", e); - return Err(WSError::WsDataError(WsDataError::BatchTransferFailed { - request_id: match self { - Self::ToFile { unique_id, .. } | - Self::ToMem { unique_id, .. } => unique_id.clone() - }, - reason: format!("Task failed: {}", e) - })); - } - // 从任务列表中移除已完成的任务 - match self { - Self::ToFile { tasks, current_size, .. } | - Self::ToMem { tasks, current_size, .. } => { - tasks.remove(completed_task.1); - // 更新当前大小 - *current_size += DEFAULT_BLOCK_SIZE; - } - } - } - None = match self { - Self::ToFile { rx, .. } | - Self::ToMem { rx, .. } => rx.recv() - } => { - // 通道关闭,直接退出 - break; - } - } - } - - Err(WSError::WsDataError(WsDataError::BatchTransferFailed { - request_id: match self { - Self::ToFile { unique_id, .. } | - Self::ToMem { unique_id, .. } => unique_id.clone() - }, - reason: "Channel closed".to_string() - })) - } -} -``` - -### 4.2 try_complete() 实现 [同步检查] -```rust -impl WriteSplitDataTaskGroup { - fn try_complete(&self) -> Option { - match self { - Self::ToFile { current_size, expected_size, file_path, .. } => { - if *current_size >= *expected_size { - Some(proto::DataItem::new_file_data(file_path.clone())) - } else { - None - } - } - Self::ToMem { current_size, expected_size, shared_mem, .. } => { - if *current_size >= *expected_size { - Some(proto::DataItem::new_mem_data(shared_mem.clone())) - } else { - None - } - } - } - } -} -``` - -## 5. 日志增强 [根据错误处理规范] - -### 5.1 关键点日志 -```rust -// 文件写入错误 -tracing::error!("Failed to write file data at offset {}: {}", offset, e); - -// 内存写入错误 -tracing::error!("Failed to write memory data at offset {}: {}", offset, e); - -// 任务提交错误 -tracing::error!("Failed to submit task: channel closed, idx: {:?}", idx); - -// 任务组创建 -tracing::debug!( - "Creating new task group: unique_id={:?}, block_type={:?}, version={}", - unique_id, block_type, version -); - -// 响应器更新错误 -tracing::error!("Failed to respond to old request: {}", e); -``` - -## 6. 测试计划 - -### 6.1 单元测试 -1. WriteSplitDataTaskHandle - - 版本号获取 - - 分片任务提交 - - 任务等待 - -2. WriteSplitDataTaskGroup - - 任务组创建 - - 任务处理循环 - - 完成状态检查 - -3. DataItemSource - - 内存数据读取 - - 文件数据读取 - - 块类型判断 - -4. SharedWithBatchHandler - - 响应器更新 - - 旧响应器处理 - - 最终响应器获取 - -### 6.2 集成测试 -1. 文件写入流程 -2. 内存写入流程 -3. 错误处理 -4. 并发控制 diff --git a/src/main/src/general/data/m_data_general/batch.rs b/src/main/src/general/data/m_data_general/batch.rs index 976c91d..c099321 100644 --- a/src/main/src/general/data/m_data_general/batch.rs +++ b/src/main/src/general/data/m_data_general/batch.rs @@ -315,5 +315,132 @@ impl DataGeneral { } } + /// 处理批量数据请求 + pub(super) async fn rpc_handle_batch_data( + &self, + responsor: RPCResponsor, + req: proto::BatchDataRequest, + ) -> WSResult<()> { + // Step 1: 获取数据元信息 + let meta = match self.view.get_metadata(&req.unique_id, false).await { + Ok(meta) => meta, + Err(err) => { + tracing::warn!("get data meta failed: {}", err); + responsor + .send_resp(proto::BatchDataResponse { + request_id: req.request_id, + success: false, + error_message: format!("get data meta failed: {}", err), + version: 0, + }) + .await?; + return Ok(()); + } + }; + + // Step 2: 复用 get_data 逻辑获取数据 + let get_arg = GetOrDelDataArg { + meta: Some(meta.clone()), + unique_id: req.unique_id.clone(), + ty: GetOrDelDataArgType::All, + }; + + let data_result = match self.get_or_del_data(get_arg).await { + Ok((_, data)) => data, + Err(err) => { + tracing::warn!("get data failed: {}", err); + responsor + .send_resp(proto::BatchDataResponse { + request_id: req.request_id, + success: false, + error_message: format!("get data failed: {}", err), + version: meta.version, + }) + .await?; + return Ok(()); + } + }; + + // Step 3: 创建数据分片并设置写入任务 + let mut splits = Vec::new(); + let mut offset = 0; + + for item in data_result.values() { + let size = item.size(); + splits.push(offset..offset + size); + offset += size; + } + + // 创建channel用于传输数据 + let (tx, rx) = mpsc::channel(splits.len()); + + // 发送数据到channel + for (idx, item) in data_result.into_iter() { + if let Err(err) = tx.send(Ok((idx as usize, item))).await { + tracing::error!("send data to channel failed: {}", err); + responsor + .send_resp(proto::BatchDataResponse { + request_id: req.request_id, + success: false, + error_message: format!("internal error: {}", err), + version: meta.version, + }) + .await?; + return Ok(()); + } + } + drop(tx); // 关闭发送端 + + // Step 4: 根据请求类型选择写入方式并执行 + let task_group = match WriteSplitDataTaskGroup::new( + req.unique_id, + splits, + rx, + proto::BatchDataBlockType::from_i32(req.block_type).unwrap_or(proto::BatchDataBlockType::Memory), + ) + .await + { + Ok(group) => group, + Err(err) => { + tracing::warn!("create write task group failed: {}", err); + responsor + .send_resp(proto::BatchDataResponse { + request_id: req.request_id, + success: false, + error_message: format!("create write task group failed: {}", err), + version: meta.version, + }) + .await?; + return Ok(()); + } + }; + + // Step 5: 等待所有写入任务完成 + match task_group.join().await { + Ok(_) => { + responsor + .send_resp(proto::BatchDataResponse { + request_id: req.request_id, + success: true, + error_message: String::new(), + version: meta.version, + }) + .await?; + Ok(()) + } + Err(err) => { + tracing::warn!("write data failed: {}", err); + responsor + .send_resp(proto::BatchDataResponse { + request_id: req.request_id, + success: false, + error_message: format!("write data failed: {}", err), + version: meta.version, + }) + .await?; + Ok(()) + } + } + } } diff --git a/src/main/src/general/data/m_data_general/batch_handler.rs b/src/main/src/general/data/m_data_general/batch_handler.rs deleted file mode 100644 index 61c61d6..0000000 --- a/src/main/src/general/data/m_data_general/batch_handler.rs +++ /dev/null @@ -1,76 +0,0 @@ -use crate::general::network::{ - proto::BatchDataRequest, - m_p2p::RPCResponsor, -}; -use std::sync::Arc; -use tokio::sync::Mutex; -use tracing; - -/// 共享状态,用于记录最新的请求响应器 -/// 当收到新的请求时,会更新响应器并自动处理旧的请求 -#[derive(Clone)] -pub struct SharedWithBatchHandler { - /// 当前活跃的响应器 - /// 使用 Arc 保证线程安全 - responsor: Arc>>>, -} - -impl SharedWithBatchHandler { - /// 创建新的共享状态 - pub fn new() -> Self { - Self { - responsor: Arc::new(Mutex::new(None)), - } - } - - /// 更新响应器 - /// 如果存在旧的响应器,会自动返回成功 - /// - /// # 参数 - /// * `responsor` - 新的响应器 - pub async fn update_responsor(&self, responsor: RPCResponsor) { - let mut guard = self.responsor.lock().await; - if let Some(old_responsor) = guard.take() { - // 旧的responsor直接返回成功 - if let Err(e) = old_responsor.response(Ok(())).await { - tracing::error!("Failed to respond to old request: {}", e); - } - } - *guard = Some(responsor); - } - - /// 获取最终的响应器 - /// 用于在所有数据都写入完成后发送最终响应 - pub async fn get_final_responsor(&self) -> Option> { - self.responsor.lock().await.take() - } -} - -/// 批量数据传输状态 -/// 用于管理单个批量数据传输请求的生命周期 -pub struct BatchReceiveState { - /// 写入任务句柄 - pub handle: super::dataitem::WriteSplitDataTaskHandle, - /// 共享状态,用于处理请求响应 - pub shared: SharedWithBatchHandler, - /// 任务组,持有以保持其生命周期 - /// 当 BatchReceiveState 被 drop 时,任务组也会被 drop - /// 确保所有相关资源都被正确释放 - pub task_group: super::dataitem::WriteSplitDataTaskGroup, -} - -impl BatchReceiveState { - /// 创建新的批量数据传输状态 - /// - /// # 参数 - /// * `handle` - 写入任务句柄 - /// * `task_group` - 任务组 - pub fn new(handle: super::dataitem::WriteSplitDataTaskHandle, - task_group: super::dataitem::WriteSplitDataTaskGroup) -> Self { - Self { - handle, - shared: SharedWithBatchHandler::new(), - task_group, - } - } -} diff --git a/src/main/src/general/data/m_data_general/dataitem.rs b/src/main/src/general/data/m_data_general/dataitem.rs index d82f81f..b755ab0 100644 --- a/src/main/src/general/data/m_data_general/dataitem.rs +++ b/src/main/src/general/data/m_data_general/dataitem.rs @@ -9,7 +9,6 @@ use crate::result::WsIoErr; use crate::result::WsRuntimeErr; use base64::Engine; use futures::future::join_all; -use futures::stream::{FuturesUnordered, StreamExt}; use std::collections::btree_set; use std::ops::Range; use std::path::PathBuf; @@ -137,300 +136,287 @@ pub fn new_shared_mem(splits: &Vec>) -> (SharedMemHolder, Vec>, - rx: mpsc::Receiver>, - expected_size: usize, - current_size: usize, + tasks: Vec>>, }, ToMem { - unique_id: UniqueId, shared_mem: SharedMemHolder, - tasks: Vec>, - rx: mpsc::Receiver>, - expected_size: usize, - current_size: usize, + tasks: Vec>>, }, } impl WriteSplitDataTaskGroup { pub async fn new( - unique_id: UniqueId, + unique_id: Vec, splits: Vec>, + mut rx: tokio::sync::mpsc::Receiver>, block_type: proto::BatchDataBlockType, - version: u64, - ) -> (Self, WriteSplitDataTaskHandle) { - let expected_size = splits.iter().map(|range| range.len()).sum(); - let (tx, rx) = mpsc::channel(32); + ) -> WSResult { + tracing::debug!( + "new merge task group for uid({:?}), block_type({:?})", + unique_id, + block_type + ); + if block_type == proto::BatchDataBlockType::File { + tracing::debug!("block_type is file"); + // base64 + // let file_path = PathBuf::from(format!("{:?}.data", unique_id)); + let file_path = PathBuf::from(format!( + "{}.data", + base64::engine::general_purpose::STANDARD.encode(&unique_id) + )); - match block_type { - proto::BatchDataBlockType::File => { - let file_path = PathBuf::from(format!("{}.data", - base64::engine::general_purpose::STANDARD.encode(&unique_id))); - - let handle = WriteSplitDataTaskHandle { - tx, - write_type: WriteSplitDataType::File { - path: file_path.clone(), - }, - version, - }; - - let group = Self::ToFile { - unique_id, - file_path, - tasks: Vec::new(), - rx, - expected_size, - current_size: 0, - }; - - (group, handle) - } - _ => { - let shared_mem = new_shared_mem(&splits).unwrap_or_default(); - - let handle = WriteSplitDataTaskHandle { - tx, - write_type: WriteSplitDataType::Mem { - shared_mem: shared_mem.clone(), - }, - version, - }; - - let group = Self::ToMem { - unique_id, - shared_mem, - tasks: Vec::new(), - rx, - expected_size, - current_size: 0, - }; - - (group, handle) - } - } - } + let file = std::fs::OpenOptions::new() + .create(true) + .write(true) + .open(&file_path)?; + let file = std::sync::Arc::new(file); - async fn process_tasks(&mut self) -> WSResult { - let mut pending_tasks = FuturesUnordered::new(); - - match self { - Self::ToFile { tasks, .. } | - Self::ToMem { tasks, .. } => { - for task in tasks.drain(..) { - pending_tasks.push(task); - } - } - } + let mut tasks = vec![]; + for _ in 0..splits.len() { + let parital_data = rx.recv().await.unwrap(); + match parital_data { + Err(e) => { + return Err(e); + } + Ok((splitidx, split_data_item)) => { + let file = file.clone(); + let unique_id = unique_id.clone(); + let split_range = splits[splitidx as usize].clone(); - loop { - // 1. 检查完成状态 - match self.try_complete()? { - Some(item) => return Ok(item), - None => {} // 继续等待 - } + let task = tokio::task::spawn_blocking(move || { + let Some(proto::FileData { + file_content: split_data_bytes, + .. + }) = split_data_item.as_file_data() + else { + return Err(WsDataError::SplitDataItemNotFileData { + unique_id: unique_id.clone(), + splitidx, + } + .into()); + }; - // 2. 等待新任务或已有任务完成 - tokio::select! { - Some(new_task) = match self { - Self::ToFile { rx, .. } | - Self::ToMem { rx, .. } => rx.recv() - } => { - pending_tasks.push(new_task); - } - Some(completed_result) = pending_tasks.next() => { - if let Err(e) = completed_result { - tracing::error!("Task failed: {}", e); - return Err(WSError::WsDataError(WsDataError::BatchTransferFailed { - request_id: match self { - Self::ToFile { unique_id, .. } | - Self::ToMem { unique_id, .. } => unique_id.clone() - }, - reason: format!("Task failed: {}", e) - })); - } - match self { - Self::ToFile { current_size, .. } | - Self::ToMem { current_size, .. } => { - *current_size += DEFAULT_BLOCK_SIZE; // 每个任务写入一个块 - } + if split_range.len() != split_data_bytes.len() { + return Err(WsDataError::SplitLenMismatch { + unique_id, + splitidx, + expect: split_range.len(), + actual: split_data_bytes.len(), + } + .into()); + } + // SAFETY: Each task writes to a different non-overlapping portion of the file + use std::os::unix::fs::FileExt; + if let Err(e) = + file.write_at(split_data_bytes, split_range.start as u64) + { + return Err(WSError::WsIoErr(WsIoErr::Io(e))); + } + Ok(()) + }); + tasks.push(task); } } - None = match self { - Self::ToFile { rx, .. } | - Self::ToMem { rx, .. } => rx.recv() - } => { - while let Some(completed_result) = pending_tasks.next().await { - if let Err(e) = completed_result { - tracing::error!("Task failed during cleanup: {}", e); - return Err(WSError::WsDataError(WsDataError::BatchTransferFailed { - request_id: match self { - Self::ToFile { unique_id, .. } | - Self::ToMem { unique_id, .. } => unique_id.clone() - }, - reason: format!("Task failed during cleanup: {}", e) - })); - } - match self { - Self::ToFile { current_size, .. } | - Self::ToMem { current_size, .. } => { - *current_size += DEFAULT_BLOCK_SIZE; + } + Ok(Self::ToFile { file_path, tasks }) + } else if block_type == proto::BatchDataBlockType::Memory { + tracing::debug!("block_type is memory"); + let (shared_mem, owned_accesses) = new_shared_mem(&splits); + let mut owned_accesses = owned_accesses + .into_iter() + .map(|access| Some(access)) + .collect::>(); + let mut tasks = vec![]; + for _ in 0..splits.len() { + let parital_data = rx.recv().await.unwrap(); + match parital_data { + Err(e) => { + return Err(e); + } + Ok((splitidx, split_data_item)) => { + let owned_access = owned_accesses[splitidx].take().unwrap(); + let unique_id = unique_id.clone(); + let task = tokio::spawn(async move { + // write to shared memory + let access = unsafe { owned_access.as_bytes_mut() }; + let Some(split_data_item) = split_data_item.as_raw_bytes() else { + return Err(WsDataError::SplitDataItemNotRawBytes { + unique_id: unique_id.clone(), + splitidx, + } + .into()); + }; + if access.len() != split_data_item.len() { + return Err(WsDataError::SplitLenMismatch { + unique_id: unique_id.clone(), + splitidx, + expect: access.len(), + actual: split_data_item.len(), + } + .into()); } - } + access.copy_from_slice(split_data_item); + Ok(()) + }); + tasks.push(task); } - break; } } + Ok(Self::ToMem { shared_mem, tasks }) + } else { + panic!("block_type should be file or memory"); } - - Err(WSError::WsDataError(WsDataError::BatchTransferFailed { - request_id: match self { - Self::ToFile { unique_id, .. } | - Self::ToMem { unique_id, .. } => unique_id.clone() - }, - reason: "Channel closed".to_string() - })) } - /// 检查写入完成状态 - /// - /// 返回: - /// - Ok(Some(item)) - 写入完成,返回数据项 - /// - Ok(None) - 写入未完成 - /// - Err(e) - 写入出错 - fn try_complete(&self) -> WSResult> { + pub async fn join(self) -> WSResult { match self { - Self::ToFile { current_size, expected_size, file_path, unique_id, .. } => { - if *current_size > *expected_size { - Err(WSError::WsDataError(WsDataError::BatchTransferError { - request_id: unique_id.clone(), - msg: format!("Written size {} exceeds expected size {}", current_size, expected_size) - })) - } else if *current_size == *expected_size { - Ok(Some(proto::DataItem::new_file_data(file_path.clone()))) - } else { - Ok(None) + WriteSplitDataTaskGroup::ToFile { file_path, tasks } => { + let taskress = join_all(tasks).await; + for res in taskress { + if res.is_err() { + return Err(WSError::from(WsRuntimeErr::TokioJoin { + err: res.unwrap_err(), + context: "write split data to file".to_owned(), + })); + } + if res.as_ref().unwrap().is_err() { + return Err(res.unwrap().unwrap_err()); + } } + Ok(proto::DataItem::new_file_data(file_path, false)) } - Self::ToMem { current_size, expected_size, shared_mem, unique_id, .. } => { - if *current_size > *expected_size { - Err(WSError::WsDataError(WsDataError::BatchTransferError { - request_id: unique_id.clone(), - msg: format!("Written size {} exceeds expected size {}", current_size, expected_size) - })) - } else if *current_size == *expected_size { - Ok(Some(proto::DataItem::new_mem_data(shared_mem.clone()))) - } else { - Ok(None) + WriteSplitDataTaskGroup::ToMem { + shared_mem: shared_mems, + tasks, + } => { + let taskress = join_all(tasks).await; + for res in taskress { + if res.is_err() { + return Err(WSError::from(WsRuntimeErr::TokioJoin { + err: res.unwrap_err(), + context: "write split data to file".to_owned(), + })); + } + if res.as_ref().unwrap().is_err() { + return Err(res.unwrap().unwrap_err()); + } } + // convert to dataitem + Ok(proto::DataItem::new_raw_bytes( + shared_mems + .try_take_data() + .expect("shared_mems should be take when all partial task stoped"), + )) } } } } -/// 写入分片任务的句柄 -/// 用于提交新的分片任务和等待任务完成 -pub struct WriteSplitDataTaskHandle { - /// 发送任务的通道 - tx: mpsc::Sender>, - /// 写入类型(文件或内存) - write_type: WriteSplitDataType, - /// 数据版本号 - /// 用于防止数据覆盖和保证数据一致性: - /// 1. 防止旧版本数据覆盖新版本数据 - /// 2. 客户端可以通过比较版本号确认数据是否最新 - version: u64, -} +// pub async fn read_splitdata_from_nodes_to_file<'a>( +// ty: &GetOrDelDataArgType, +// unique_id: &[u8], +// view: &DataGeneralView, +// meta: &DataSetMetaV2, +// each_node_data: HashMap, +// ) ->ReadSplitDataTask{ +// // prepare file with meta size +// let file_path = format!("{}.data", unique_id); +// let file = File::create(file_path)?; -impl WriteSplitDataTaskHandle { - /// 获取当前数据版本号 - pub fn version(&self) -> u64 { - self.version - } +// // parallel read and write to position of file with pwrite +// let mut tasks = vec![]; +// // get idxs, one idx one file - /// 提交新的分片任务 - /// - /// # 参数 - /// * `idx` - 分片索引,表示数据在整体中的偏移位置 - /// * `data` - 分片数据 - /// - /// # 返回 - /// * `Ok(())` - 任务提交成功 - /// * `Err(e)` - 任务提交失败,可能是通道已关闭 - pub async fn submit_split(&self, idx: DataSplitIdx, data: proto::DataItem) -> WSResult<()> { - let task = match &self.write_type { - WriteSplitDataType::File { path } => { - let path = path.clone(); - let offset = idx; - let data = data.as_bytes().to_vec(); - // 启动异步任务写入文件 - // 使用 spawn 是因为文件 IO 可能比较慢,不应该阻塞当前任务 - tokio::spawn(async move { - if let Err(e) = tokio::fs::OpenOptions::new() - .create(true) - .write(true) - .open(&path) - .await - .and_then(|mut file| async move { - use tokio::io::{AsyncSeekExt, AsyncWriteExt}; - file.seek(std::io::SeekFrom::Start(offset as u64)).await?; - file.write_all(&data).await - }) - .await - { - tracing::error!("Failed to write file data at offset {}: {}", offset, e); - } - }) - } - WriteSplitDataType::Mem { shared_mem } => { - let mem = shared_mem.clone(); - let offset = idx; - let data = data.as_bytes().to_vec(); - // 启动异步任务写入内存 - // 使用 spawn 是因为需要保证所有写入操作都在同一个线程上执行 - // 避免多线程并发写入同一块内存导致的数据竞争 - tokio::spawn(async move { - unsafe { - let slice = std::slice::from_raw_parts_mut( - mem.data.as_ptr() as *mut u8, - mem.data.len() - ); - slice[offset..offset + data.len()].copy_from_slice(&data); - } - }) - } - }; +// for (node_id, req) in each_node_data { +// let view = view.clone(); +// let task = tokio::spawn(async move { +// let res = view +// .data_general() +// .rpc_call_get_data +// .call(view.p2p(), node_id, req, Some(Duration::from_secs(30))) +// .await; +// match res { +// Err(err) => { +// tracing::warn!("get/delete data failed {}", err); +// vec![] +// } +// Ok(res) => { +// res. +// // get offset and size by meta with got - self.tx.send(task).await.map_err(|e| { - tracing::error!("Failed to submit task: channel closed, idx: {:?}", idx); - WSError::WsDataError(WsDataError::BatchTransferFailed { - request_id: idx.into(), - reason: "Failed to submit task: channel closed".to_string() - }) - }) - } +// vec![] +// }, +// } +// }); +// tasks.push(task); +// } +// Ok(HashMap::new()) +// } - /// 等待所有已提交的写入任务完成 - /// 关闭发送端,不再接收新任务 - pub async fn wait_all_tasks(self) -> WSResult<()> { - drop(self.tx); - Ok(()) - } -} +// pub async fn read_splitdata_from_nodes_to_mem<'a>( +// ty: &GetOrDelDataArgType, +// unique_id: &[u8], +// view: &DataGeneralView, +// meta: &DataSetMetaV2, +// each_node_data: HashMap, +// ) -> ReadSplitDataTask { +// // read to mem +// let mut tasks = vec![]; +// for (node_id, req) in each_node_data { +// let view = view.clone(); +// let task = tokio::spawn(async move { +// let req_idxs = req.idxs.clone(); +// tracing::debug!("rpc_call_get_data start, remote({})", node_id); +// let res = view +// .data_general() +// .rpc_call_get_data +// .call(view.p2p(), node_id, req, Some(Duration::from_secs(30))) +// .await; +// tracing::debug!("rpc_call_get_data returned, remote({})", node_id); +// let res: WSResult> = res.map(|response| { +// if !response.success { +// tracing::warn!("get/delete data failed {}", response.message); +// vec![] +// } else { +// req_idxs.into_iter().zip(response.data).collect() +// } +// }); +// (node_id, res) +// }); +// tasks.push(task); +// } -/// 写入类型 -/// 支持写入文件或内存两种模式 -pub enum WriteSplitDataType { - /// 文件写入模式 - File { - /// 目标文件路径 - path: PathBuf, - }, - /// 内存写入模式 - Mem { - /// 共享内存区域 - shared_mem: SharedMemHolder, - }, -} +// let mut node_partialdatas: HashMap<(NodeID, DataItemIdx), proto::DataItem> = HashMap::new(); +// for tasks in tasks { +// let (node_id, partdata) = tasks.await.map_err(|err| { +// WSError::from(WsRuntimeErr::TokioJoin { +// err, +// context: "get_or_del_data - get_or_del ing remote data".to_owned(), +// }) +// })?; + +// match partdata { +// Err(err) => { +// return Err(err); +// } +// Ok(partdata) => { +// for (idx, data_item) in partdata { +// let _ = node_partialdatas.insert((node_id, idx as u8), data_item); +// } +// } +// } +// } + +// let mut idx_2_data_item: HashMap = HashMap::new(); +// for idx in WantIdxIter::new(&ty) { +// let data_split = &meta.datas_splits[idx as usize]; +// let data_item = data_split.recorver_data(unique_id, idx, &mut node_partialdatas)?; + +// idx_2_data_item +// .insert(idx, proto::DataItem::new_raw_bytes(data_item)) +// .expect("dataitem should be unique with idx"); +// } + +// Ok(idx_2_data_item) +// } diff --git a/src/main/src/general/data/m_data_general/mod.rs b/src/main/src/general/data/m_data_general/mod.rs index a31196e..cf03c01 100644 --- a/src/main/src/general/data/m_data_general/mod.rs +++ b/src/main/src/general/data/m_data_general/mod.rs @@ -1,10 +1,8 @@ mod dataitem; -mod batch; -mod batch_handler; - -use crate::general::data::m_data_general::dataitem::{WantIdxIter, WriteSplitDataTaskGroup}; -use crate::general::data::m_data_general::batch_handler::{BatchReceiveState, SharedWithBatchHandler}; +// mod batch; +use crate::general::data::m_data_general::dataitem::WantIdxIter; +use crate::general::data::m_data_general::dataitem::WriteSplitDataTaskGroup; use crate::general::{ data::m_kv_store_engine::{ KeyTypeDataSetItem, KeyTypeDataSetMeta, KvAdditionalConf, KvStoreEngine, KvVersion, @@ -108,9 +106,6 @@ pub struct DataGeneral { rpc_handler_data_meta_update: RPCHandler, rpc_handler_get_data_meta: RPCHandler, rpc_handler_get_data: RPCHandler, - - // 批量数据接收状态管理 - batch_receive_states: DashMap, } impl DataGeneral { @@ -918,66 +913,8 @@ impl DataGeneral { Ok(()) } - - async fn rpc_handle_batch_data( - &self, - responsor: RPCResponsor, - req: proto::BatchDataRequest, - ) -> WSResult<()> { - // 1. 查找或创建状态 - let (state, is_new_state) = self.batch_receive_states - .entry(req.request_id.clone()) - .or_insert_with(|| { - // 通过 WriteSplitDataTaskGroup::new 创建任务组和句柄 - let (group, handle) = super::dataitem::WriteSplitDataTaskGroup::new( - req.request_id.clone(), - Vec::new(), // TODO: 根据实际需求设置分片范围 - req.block_type, - 0, // TODO: 根据实际需求设置版本号 - ).await; - - (super::batch_handler::BatchReceiveState::new(handle, group), true) - }); - - // 2. 提交分片数据 - state.handle.submit_split( - req.block_idx * DEFAULT_BLOCK_SIZE, - req.data - ).await?; - - // 3. 更新响应器 - state.shared.update_responsor(responsor).await; - - // 4. 只在首次创建状态时启动完成监控任务 - if is_new_state { - let state_clone = state.clone(); - let request_id = req.request_id.clone(); - let batch_receive_states = self.batch_receive_states.clone(); - - tokio::spawn(async move { - // 等待所有任务完成 - if let Err(e) = state_clone.handle.wait_all_tasks().await { - tracing::error!("Failed to wait for tasks: {}", e); - return; - } - - // 发送最终响应 - if let Some(final_responsor) = state_clone.shared.get_final_responsor().await { - if let Err(e) = final_responsor.response(Ok(())).await { - tracing::error!("Failed to send final response: {}", e); - } - } - - // 清理状态 - batch_receive_states.remove(&request_id); - }); - } - - Ok(()) - } } - #[derive(Serialize, Deserialize)] pub struct DataMetaSys { pub cache: i32, @@ -1506,9 +1443,6 @@ impl LogicalModule for DataGeneral { rpc_handler_data_meta_update: RPCHandler::new(), rpc_handler_get_data_meta: RPCHandler::new(), rpc_handler_get_data: RPCHandler::new(), - - // 批量数据接收状态管理 - batch_receive_states: DashMap::new(), } } From 180e1f5b9f4626fa9252cee0064be1b44c3da418 Mon Sep 17 00:00:00 2001 From: pa <1020401660@qq.com> Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 09/26] Revert "update_write_data_batch" This reverts commit 6c7807d763da744da656d42bac2cfd7f68bd609c. --- review.md | 83 +++--- .../src/general/data/m_data_general/mod.rs | 248 +++++++----------- update_batch_transfer.md | 70 ----- update_write_data_batch.md | 172 ------------ 4 files changed, 125 insertions(+), 448 deletions(-) delete mode 100644 update_batch_transfer.md delete mode 100644 update_write_data_batch.md diff --git a/review.md b/review.md index 6c016c1..9569cea 100644 --- a/review.md +++ b/review.md @@ -4,43 +4,43 @@ ```rust // 1. src/main/src/general/data/m_data_general/batch.rs 中删除 // 1.1 删除 BatchManager -// pub(super) struct BatchManager { -// transfers: DashMap, -// sequence: AtomicU64, -// } - -// impl BatchManager { -// pub fn new() -> Self -// pub fn next_sequence(&self) -> u64 -// pub async fn create_transfer(...) -// pub async fn handle_block(...) -// } +pub(super) struct BatchManager { + transfers: DashMap, + sequence: AtomicU64, +} + +impl BatchManager { + pub fn new() -> Self + pub fn next_sequence(&self) -> u64 + pub async fn create_transfer(...) + pub async fn handle_block(...) +} // 1.2 删除 BatchTransfer -// pub(super) struct BatchTransfer { -// pub unique_id: Vec, -// pub version: u64, -// pub block_type: proto::BatchDataBlockType, -// pub total_blocks: u32, -// data_sender: mpsc::Sender>, -// write_task: JoinHandle>, -// pub tx: Option>>, -// } - -// impl BatchTransfer { -// pub async fn new(...) -// pub async fn add_block(...) -// pub async fn complete(...) -// fn calculate_splits(...) -// } +pub(super) struct BatchTransfer { + pub unique_id: Vec, + pub version: u64, + pub block_type: proto::BatchDataBlockType, + pub total_blocks: u32, + data_sender: mpsc::Sender>, + write_task: JoinHandle>, + pub tx: Option>>, +} + +impl BatchTransfer { + pub async fn new(...) + pub async fn add_block(...) + pub async fn complete(...) + fn calculate_splits(...) +} // 2. src/main/src/general/data/m_data_general/mod.rs 中删除 -// struct DataGeneral { -// batch_manager: Arc, // 删除此字段 -// } +struct DataGeneral { + batch_manager: Arc, // 删除此字段 +} // DataGeneral::new() 中删除 -// batch_manager: Arc::new(BatchManager::new()), +batch_manager: Arc::new(BatchManager::new()), ``` ## 2. 新增代码 @@ -325,27 +325,6 @@ impl WriteSplitDataTaskGroup { } } } - -/// DataItem 数据源 -pub enum DataItemSource { - Memory { - data: Arc>, - }, - File { - path: String, - }, -} - -DataItemSource 采用枚举设计,优点: -1. 类型安全:使用枚举确保数据源类型的互斥性 -2. 内存效率:文件类型只存储路径,避免一次性加载 -3. 延迟读取:只在实际需要时才读取文件数据 -4. 符合分层:配合 WriteSplitDataTaskGroup 的文件/内存写入流程 - -实现了 DataSource trait: -- size(): 获取数据总大小 -- read_chunk(): 读取指定范围的数据 -- block_type(): 返回对应的 BlockType ``` ### src/main/src/general/data/m_data_general/mod.rs diff --git a/src/main/src/general/data/m_data_general/mod.rs b/src/main/src/general/data/m_data_general/mod.rs index cf03c01..34fc0ed 100644 --- a/src/main/src/general/data/m_data_general/mod.rs +++ b/src/main/src/general/data/m_data_general/mod.rs @@ -1,5 +1,7 @@ mod dataitem; -// mod batch; +mod batch; + +use crate::general::data::m_data_general::batch::BatchManager; use crate::general::data::m_data_general::dataitem::WantIdxIter; use crate::general::data::m_data_general::dataitem::WriteSplitDataTaskGroup; @@ -11,8 +13,8 @@ use crate::general::{ network::{ m_p2p::{P2PModule, RPCCaller, RPCHandler, RPCResponsor}, proto::{ - self, BatchDataBlockType, DataMeta, DataMetaGetRequest, DataVersionScheduleRequest, - WriteOneDataRequest, WriteOneDataResponse, + self, DataMeta, DataMetaGetRequest, DataVersionScheduleRequest, WriteOneDataRequest, + WriteOneDataResponse, }, proto_ext::ProtoExtDataItem, }, @@ -47,7 +49,6 @@ use tokio::task::JoinHandle; use tokio::task::JoinError; use ws_derive::LogicalModule; use std::future::Future; -use tokio::sync::mpsc; // use super::m_appmeta_manager::AppMeta; @@ -63,9 +64,6 @@ pub type DataItemIdx = u8; pub const DATA_UID_PREFIX_APP_META: &str = "app"; pub const DATA_UID_PREFIX_FN_KV: &str = "fkv"; -/// 默认数据块大小 (4MB) -pub const DEFAULT_BLOCK_SIZE: usize = 4 * 1024 * 1024; - pub const CACHE_MODE_TIME_MASK: u16 = 0xf000; pub const CACHE_MODE_TIME_FOREVER_MASK: u16 = 0x0fff; pub const CACHE_MODE_TIME_AUTO_MASK: u16 = 0x1fff; @@ -95,6 +93,7 @@ pub fn new_data_unique_id_fn_kv(key: &[u8]) -> Vec { #[derive(LogicalModule)] pub struct DataGeneral { view: DataGeneralView, + batch_manager: Arc, pub rpc_call_data_version_schedule: RPCCaller, rpc_call_write_once_data: RPCCaller, rpc_call_batch_data: RPCCaller, @@ -115,93 +114,51 @@ impl DataGeneral { NEXT_BATCH_ID.fetch_add(1, Ordering::Relaxed) } - pub async fn write_data_batch( + async fn write_data_batch( &self, unique_id: &[u8], version: u64, data: proto::DataItem, data_item_idx: usize, node_id: NodeID, + _batch_size: usize, ) -> WSResult<()> { - // 调用 batch_transfer 函数处理数据传输 - batch_transfer( + let block_type = proto::BatchDataBlockType::Memory; + + // 创建 channel 接收数据块 + let (tx, _rx) = tokio::sync::mpsc::channel(1); + + // 创建传输任务 + let request_id = self.batch_manager.create_transfer( unique_id.to_vec(), version, - node_id, - Arc::new(DataItemSource::new(data)), - self.view.clone(), - ).await - } - - async fn batch_transfer( - unique_id: Vec, - version: u64, - target_node: NodeID, - data: Arc, - view: DataGeneralView, - ) -> WSResult<()> { - let total_size = data.size().await?; - let total_blocks = (total_size + DEFAULT_BLOCK_SIZE - 1) / DEFAULT_BLOCK_SIZE; - let semaphore = Arc::new(Semaphore::new(32)); - let mut handles = Vec::new(); + block_type, + data.data_sz_bytes() as u32, + tx, + ).await?; - // 发送所有数据块 - for block_idx in 0..total_blocks { - // 获取信号量许可 - let permit = semaphore.clone().acquire_owned().await.unwrap(); - - let offset = block_idx as usize * DEFAULT_BLOCK_SIZE; - let size = DEFAULT_BLOCK_SIZE.min(total_size - offset); - - // 读取数据块 - let block_data = data.read_chunk(offset, size).await?; - - // 构造请求 - let request = proto::BatchDataRequest { - request_id: Some(proto::BatchRequestId { - node_id: target_node as u32, - sequence: block_idx as u32, - }), - block_type: data.block_type() as i32, - block_index: block_idx as u32, - data: block_data, - operation: proto::DataOpeType::Write as i32, - unique_id: unique_id.clone(), + // 使用现有的 call_batch_data 函数发送数据 + let response = self.rpc_call_batch_data.call( + self.view.p2p(), + node_id, + proto::BatchDataRequest { + unique_id: unique_id.to_vec(), version, - }; - - // 发送请求 - let view = view.clone(); - let handle = tokio::spawn(async move { - let _permit = permit; // 持有permit直到任务完成 - let resp = view.data_general() - .rpc_call_batch_data - .call( - view.p2p(), - target_node, - request, - Some(Duration::from_secs(30)), - ) - .await?; - - if !resp.success { - return Err(WsDataError::BatchTransferFailed { - request_id: proto::BatchRequestId { - node_id: target_node.into(), - sequence: block_idx.into(), - }, - reason: resp.error_message, - }.into()); - } - - Ok(()) - }); - handles.push(handle); - } + request_id: Some(request_id.clone()), + block_type: block_type as i32, + block_index: data_item_idx as u32, + operation: proto::DataOpeType::Write as i32, + data: data.encode_persist(), + }, + Some(Duration::from_secs(60)), + ).await?; - // 等待所有请求完成 - for handle in handles { - handle.await??; + if !response.success { + return Err(WsDataError::BatchTransferFailed { + node: node_id, + batch: 0, + reason: response.error_message, + }.into()); } Ok(()) @@ -466,7 +423,7 @@ impl DataGeneral { let view = self.view.clone(); let version_copy = version; let task = tokio::spawn(async move { - view.data_general() + view.data_general() .rpc_call_write_once_data .call( view.p2p(), @@ -510,9 +467,9 @@ impl DataGeneral { let data_item_cache = data_item.clone(); let view = self.view.clone(); let task = tokio::spawn(async move { - let _permit = permit; // 持有permit直到任务完成 + let _permit = permit; view.data_general() - .write_data_batch(&unique_id_clone, version, data_item_cache, data_item_idx as usize, node_id) + .write_data_batch(&unique_id_clone, version, data_item_cache, data_item_idx as usize, node_id, 1024 * 1024) .await?; Ok::(proto::WriteOneDataResponse { remote_version: version, @@ -937,70 +894,9 @@ impl Into for DataMetaSys { } } - -/// DataItem 数据源 -pub enum DataItemSource { - Memory { - data: Arc>, - }, - File { - path: String, - }, -} - -impl DataItemSource { - pub fn new(data: proto::DataItem) -> Self { - match &data.data_item_dispatch { - Some(proto::data_item::DataItemDispatch::RawBytes(bytes)) => Self::Memory { - data: Arc::new(bytes.clone()), - }, - Some(proto::data_item::DataItemDispatch::File(file_data)) => Self::File { - path: file_data.file_path.clone(), - }, - _ => Self::Memory { - data: Arc::new(Vec::new()), - }, - } - } -} - -impl DataItemSource { - async fn size(&self) -> WSResult { - match self { - Self::Memory { data } => Ok(data.len()), - Self::File { path } => { - let metadata = tokio::fs::metadata(path).await?; - Ok(metadata.len() as usize) - } - } - } - - async fn read_chunk(&self, offset: usize, size: usize) -> WSResult> { - match self { - Self::Memory { data } => { - let end = (offset + size).min(data.len()); - Ok(data[offset..end].to_vec()) - } - Self::File { path } => { - let mut file = tokio::fs::File::open(path).await?; - file.seek(std::io::SeekFrom::Start(offset as u64)).await?; - let mut buffer = vec![0u8; size]; - let n = file.read(&mut buffer).await?; - buffer.truncate(n); - Ok(buffer) - } - } - } - - fn block_type(&self) -> proto::BatchDataBlockType { - match self { - Self::Memory { .. } => proto::BatchDataBlockType::Memory, - Self::File { .. } => proto::BatchDataBlockType::File, - } - } -} - -/// 数据集元信息 +/// depracated, latest is v2 +/// the data's all in one meta +/// https://fvd360f8oos.feishu.cn/docx/XoFudWhAgox84MxKC3ccP1TcnUh#share-Tqqkdxubpokwi5xREincb1sFnLc #[derive(Serialize, Deserialize)] pub struct DataSetMetaV1 { // unique_id: Vec, @@ -1011,9 +907,9 @@ pub struct DataSetMetaV1 { pub type CacheMode = u16; -/// 数据集元信息 +/// the data's all in one meta /// -/// 注意:新建元信息请使用 `DataSetMetaBuilder` +/// attention: new from `DataSetMetaBuilder` /// /// https://fvd360f8oos.feishu.cn/docx/XoFudWhAgox84MxKC3ccP1TcnUh#share-Tqqkdxubpokwi5xREincb1sFnLc #[derive(Serialize, Deserialize, Debug,Clone)] @@ -1022,7 +918,7 @@ pub struct DataSetMetaV2 { api_version: u8, pub version: u64, pub cache_mode: Vec, - /// 每个数据项的分片信息,索引为数据项索引 + /// the data splits for each data item, the index is the data item index pub datas_splits: Vec, } @@ -1063,8 +959,8 @@ impl EachNodeSplit { } } -/// 数据项的分片信息 -/// 我们需要知道每个数据项的分片大小 +/// the split of one dataitem +/// we need to know the split size for one data #[derive(Serialize, Deserialize, Debug, Clone)] pub struct DataSplit { pub splits: Vec, @@ -1152,6 +1048,9 @@ impl Into for DataSplit { // uint32 split_size = 1; // repeated uint32 node_ids = 2; +#[derive(Debug, Clone, Copy)] +pub struct CacheModeVisitor(pub u16); + macro_rules! generate_cache_mode_methods { // The macro takes a list of pairs of the form [time, mask] and generates methods. ($(($group:ident, $mode:ident)),*) => { @@ -1432,6 +1331,7 @@ impl LogicalModule for DataGeneral { { Self { view: DataGeneralView::new(args.logical_modules_ref.clone()), + batch_manager: Arc::new(BatchManager::new()), rpc_call_data_version_schedule: RPCCaller::new(), rpc_call_write_once_data: RPCCaller::new(), rpc_call_batch_data: RPCCaller::new(), @@ -1526,3 +1426,43 @@ impl LogicalModule for DataGeneral { Ok(vec![]) } } +#[allow(dead_code)] +fn flush_the_data( + log_tag: &str, + unique_id: &[u8], + version: u64, + split_size: usize, + view: &DataGeneralView, + one_data_item: &proto::DataItem, + nodeid: NodeID, + offset: usize, + dataitem_idx: usize, + write_source_data_tasks: &mut Vec>>, +) { + let log_tag = log_tag.to_owned(); + let unique_id = unique_id.to_owned(); + let view = view.clone(); + let one_data_item_split = one_data_item.clone_split_range(offset..offset + split_size); + let t = tokio::spawn(async move { + let req = WriteOneDataRequest { + unique_id, + version, + data: vec![proto::DataItemWithIdx { + idx: dataitem_idx as u32, + data: Some(one_data_item_split), + }], + }; + tracing::debug!( + "[{}] write_data flushing, target node: {}, `WriteOneDataRequest` msg_id: {}", + log_tag, + nodeid, + req.msg_id() + ); + view.data_general() + .rpc_call_write_once_data + .call(view.p2p(), nodeid, req, Some(Duration::from_secs(60))) + .await + }); + write_source_data_tasks.push(t); +} + diff --git a/update_batch_transfer.md b/update_batch_transfer.md deleted file mode 100644 index c78818a..0000000 --- a/update_batch_transfer.md +++ /dev/null @@ -1,70 +0,0 @@ -# 更新 batch_transfer 函数 - -## 1. 改动目标 -更新 batch_transfer 函数,使其严格遵循设计文档规范。 - -## 2. 相关文件 -1. `/root/prjs/waverless/src/main/src/general/data/m_data_general/mod.rs` - - batch_transfer 函数 - - write_data_batch 函数 - - DataItemSource 结构 - -## 3. 设计文档分析 -1. review.md: - - 保持使用 dyn trait 接口 - - 使用新的错误类型 WsDataError::BatchTransferFailed - - 不删除现有功能代码 - -2. design.canvas: - - batch_sender_group 组件定义了接口规范 - - 使用 DEFAULT_BLOCK_SIZE 常量 (4MB) - - 保持四层架构设计 - -## 4. 改动步骤 -1. 添加块大小常量: - ```rust - /// 默认数据块大小 (4MB) - const DEFAULT_BLOCK_SIZE: usize = 4 * 1024 * 1024; - ``` - -2. 保持 batch_transfer 函数签名: - ```rust - async fn batch_transfer( - unique_id: Vec, - version: u64, - target_node: NodeID, - data: Arc, - view: DataGeneralView, - ) -> WSResult<()> - ``` - -3. 使用正确的错误类型: - ```rust - WsDataError::BatchTransferFailed { - request_id: proto::BatchRequestId { - node_id: target_node as u32, - sequence: block_idx as u32, - }, - reason: String, - } - ``` - -## 5. 改动分析 -1. 符合分层设计: - - 接收层:保持 dyn trait 接口 - - 写入任务层:使用 DEFAULT_BLOCK_SIZE - - 本地存储层:支持文件和内存数据 - - 结果返回层:使用新的错误类型 - -2. 保持兼容性: - - 函数签名不变 - - 错误处理规范化 - - 分块大小标准化 - -## 6. 删除内容分析 -本次改动不涉及删除操作,只是规范化和标准化现有代码。 - -## 7. 后续任务 -1. 添加更多错误处理日志 -2. 更新相关文档 -3. 添加单元测试 diff --git a/update_write_data_batch.md b/update_write_data_batch.md deleted file mode 100644 index 5c59e5b..0000000 --- a/update_write_data_batch.md +++ /dev/null @@ -1,172 +0,0 @@ -# 更新写入数据批处理函数 - -## 1. 删除代码分析(>500字) - -我们需要删除以下代码: - -```rust -// 在 src/main/src/general/data/m_data_general/mod.rs 中 -async fn transfer_data( - &self, - node_id: NodeID, - unique_id: Vec, - version: u64, - data: proto::DataItem, - data_item_idx: usize, - batch_size: usize, -) -> WSResult<()> -``` - -删除原因分析: -1. 功能重叠:transfer_data 函数与设计文档中的 batch_transfer 函数功能重叠,但实现不符合规范 -2. 参数不一致: - - transfer_data 使用了 data_item_idx 和 batch_size 参数,这在设计中并不需要 - - 缺少了 DataSource trait 的抽象 -3. 错误处理: - - 原实现的错误处理不符合四层架构的要求 - - 缺少对版本号的验证 -4. 并发控制: - - 原实现使用了固定的信号量大小(10) - - 新设计中使用32作为并发限制 -5. 代码组织: - - 原实现将所有逻辑放在一个函数中 - - 新设计通过 DataSource trait 实现更好的抽象 -6. 资源管理: - - 原实现没有很好地管理资源生命周期 - - 新设计通过 Arc 更好地管理资源 - -删除这段代码不会影响其他功能,因为: -1. write_data_batch 函数会调用新的 batch_transfer 函数 -2. 错误处理逻辑会更加完善 -3. 并发控制更加合理 -4. 代码结构更加清晰 - -## 2. 新增代码 - -### 2.1 DataSource Trait -```rust -/// 数据源接口 -#[async_trait] -pub trait DataSource: Send + Sync + 'static { - /// 获取数据总大小 - async fn size(&self) -> WSResult; - /// 读取指定范围的数据 - async fn read_chunk(&self, offset: usize, size: usize) -> WSResult>; - /// 获取数据块类型 - fn block_type(&self) -> BatchDataBlockType; -} -``` - -### 2.2 批量传输函数 -```rust -/// 批量传输数据 -pub async fn batch_transfer( - unique_id: Vec, - version: u64, - target_node: NodeID, - data: Arc, - view: DataGeneralView, -) -> WSResult<()> { - let total_size = data.size().await?; - let total_blocks = (total_size + DEFAULT_BLOCK_SIZE - 1) / DEFAULT_BLOCK_SIZE; - let semaphore = Arc::new(Semaphore::new(32)); - let mut handles = Vec::new(); - - // 发送所有数据块 - for block_idx in 0..total_blocks { - // 获取信号量许可 - let permit = semaphore.clone().acquire_owned().await.unwrap(); - - let offset = block_idx as usize * DEFAULT_BLOCK_SIZE; - let size = DEFAULT_BLOCK_SIZE.min(total_size - offset); - - // 读取数据块 - let block_data = data.read_chunk(offset, size).await?; - - // 构造请求 - let request = proto::BatchDataRequest { - request_id: Some(proto::BatchRequestId { - node_id: target_node as u32, - sequence: block_idx as u32, - }), - block_type: data.block_type() as i32, - block_index: block_idx as u32, - data: block_data, - operation: proto::DataOpeType::Write as i32, - unique_id: unique_id.clone(), - version, - }; - - // 发送请求 - let view = view.clone(); - let handle = tokio::spawn(async move { - let _permit = permit; // 持有permit直到任务完成 - let resp = view.data_general().rpc_call_batch_data.call( - view.p2p(), - target_node, - request, - Some(Duration::from_secs(30)), - ).await?; - - if !resp.success { - return Err(WsDataError::BatchTransferFailed { - node: target_node, - batch: block_idx as u32, - reason: resp.error_message, - }.into()); - } - - Ok(()) - }); - - handles.push(handle); - } - - // 等待所有请求完成 - for handle in handles { - handle.await??; - } - - Ok(()) -} -``` - -### 2.3 更新 write_data_batch 函数 -```rust -pub async fn write_data_batch( - &self, - unique_id: &[u8], - version: u64, - data: proto::DataItem, - data_item_idx: usize, - node_id: NodeID, - batch_size: usize, -) -> WSResult<()> { - // 创建 DataSource - let data_source = Arc::new(DataItemSource::new(data)); - - // 调用 batch_transfer 函数处理数据传输 - batch_transfer( - unique_id.to_vec(), - version, - node_id, - data_source, - self.view.clone(), - ).await -} -``` - -## 3. 实现说明 - -1. 严格按照设计文档实现 -2. 保持四层架构设计 -3. 遵循错误处理规范 -4. 使用规范中定义的数据类型 -5. 保持代码清晰可维护 - -## 4. 下一步计划 - -1. 实现 DataItemSource 结构体 -2. 添加必要的单元测试 -3. 完善错误处理 -4. 添加详细的文档注释 From a8b6714caf1aafc290a86c6d98768bf6d86ecb2b Mon Sep 17 00:00:00 2001 From: pa <1020401660@qq.com> Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 10/26] Revert "backup" This reverts commit e36c89014dc80005a45fb5e6b87a4e3503005766. --- src/main/src/result.rs | 23 +++++++------- update_error_types.md | 71 ------------------------------------------ 2 files changed, 12 insertions(+), 82 deletions(-) delete mode 100644 update_error_types.md diff --git a/src/main/src/result.rs b/src/main/src/result.rs index e45655d..62afdfe 100644 --- a/src/main/src/result.rs +++ b/src/main/src/result.rs @@ -246,10 +246,6 @@ pub enum WsDataError { expect: usize, actual: usize, }, - SplitTaskFailed { - request_id: proto::BatchRequestId, - idx: DataSplitIdx, - }, UnknownCacheMapMode { mode: u16, }, @@ -265,20 +261,25 @@ pub enum WsDataError { }, ItemIdxEmpty, BatchTransferFailed { - request_id: proto::BatchRequestId, + node: NodeID, + batch: u32, reason: String, }, + BatchTransferNotFound { - request_id: proto::BatchRequestId, + node_id: u32, + sequence: u64, + }, + + BatchBlockMissing { + unique_id: Vec, + block_index: u32, }, + BatchTransferError { - request_id: proto::BatchRequestId, + unique_id: Vec, msg: String, }, - VersionMismatch { - expected: u64, - actual: u64, - }, } #[derive(Error, Debug)] diff --git a/update_error_types.md b/update_error_types.md deleted file mode 100644 index a3db43f..0000000 --- a/update_error_types.md +++ /dev/null @@ -1,71 +0,0 @@ -# 更新错误类型结构 - -## 改动说明 -本次改动主要针对错误类型结构的更新,将在 `src/main/src/result.rs` 中修改 `WsDataError` 枚举。 - -### 1. 修改目标 -- 更新 `WsDataError` 枚举中的错误类型 -- 统一使用 `request_id` 替代之前的节点和批次号 -- 添加新的错误类型以支持分片任务 -- 确保错误信息更加明确和具体 - -### 2. 关联性分析(>500字) -本次错误类型修改与多个部分密切相关: - -1. 与批量传输模块的关联: - - 新的错误类型直接支持 `WriteSplitDataTaskHandle` 和 `WriteSplitDataTaskGroup` 的错误处理 - - 通过 `request_id` 统一标识批量传输任务,替代之前分散的节点和批次号 - - 错误类型的修改为后续删除 `BatchManager` 和 `BatchTransfer` 做准备 - -2. 与四层架构的关联: - - 错误类型覆盖了所有四层的错误场景: - * 接收层:BatchTransferNotFound 用于处理请求接收错误 - * 写入任务层:SplitTaskFailed 用于处理分片任务错误 - * 本地存储层:WriteDataFailed 用于处理写入错误 - * 结果返回层:BatchTransferError 用于处理一般性错误 - -3. 与状态管理的关联: - - 错误类型中包含 version 相关错误,支持版本验证 - - 通过 request_id 可以准确定位出错的任务状态 - - 错误信息包含足够的上下文,便于状态恢复和清理 - -4. 与日志记录的关联: - - 错误类型设计符合 tracing 库的使用规范 - - 每个错误变体都包含足够的信息用于日志记录 - - 错误信息的结构化有助于日志分析和问题定位 - -### 3. 影响分析(>500字) -本次修改将产生以下影响: - -1. 代码结构影响: - - 简化了错误处理逻辑,统一使用 request_id - - 提供了更清晰的错误类型层次 - - 改进了错误信息的可读性和可追踪性 - -2. 功能影响: - - 支持更细粒度的错误处理 - - 提供更准确的错误定位 - - 便于实现错误重试机制 - - 有助于问题诊断和调试 - -3. 性能影响: - - 错误类型的修改不会对性能造成明显影响 - - 结构化的错误信息可能略微增加内存使用 - - 日志记录的信息更加完整,可能略微增加IO开销 - -4. 维护性影响: - - 提高了代码的可维护性 - - 简化了错误处理的代码编写 - - 使错误追踪和修复更加容易 - - 有助于系统监控和问题诊断 - -5. 兼容性影响: - - 需要修改所有使用旧错误类型的代码 - - 需要更新相关的测试用例 - - 可能需要更新错误处理相关的文档 - -### 4. 执行计划 -1. 修改 src/main/src/result.rs 中的 WsDataError 枚举 -2. 更新错误类型的使用位置 -3. 添加必要的注释和文档 -4. 确保与 tracing 日志记录的集成 From fb125ff4082047d3492184833b2212a28929e5c1 Mon Sep 17 00:00:00 2001 From: pa <1020401660@qq.com> Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 11/26] Revert "basical edit plan" This reverts commit 4d7def389070f3ce591dd6365c1420614e14bbb1. --- design_of_new_batch.md | 699 ----------------------------------------- review.md | 642 ++++++++++++++++++++++++++----------- 2 files changed, 460 insertions(+), 881 deletions(-) delete mode 100755 design_of_new_batch.md mode change 100644 => 100755 review.md diff --git a/design_of_new_batch.md b/design_of_new_batch.md deleted file mode 100755 index c360c6d..0000000 --- a/design_of_new_batch.md +++ /dev/null @@ -1,699 +0,0 @@ -# 项目分析与修改计划 - - -### 变更 - -#### 核心接口定义 -```rust - - -#### WriteSplitDataTaskGroup 核心实现 -```rust -// 写入任务相关错误 -#[derive(Debug)] -pub enum WsDataErr { - WriteDataFailed { - unique_id: Vec, - }, - SplitTaskFailed { - idx: DataSplitIdx, - }, -} - -// 写入任务句柄,用于提交新的分片任务 -pub struct WriteSplitDataTaskHandle { - tx: mpsc::Sender>, - write_type: WriteSplitDataType, -} - -// 写入类型 -enum WriteSplitDataType { - File { - path: PathBuf, - }, - Mem { - shared_mem: SharedMemHolder, - }, -} - -impl WriteSplitDataTaskHandle { - // 提交新的分片任务 - pub async fn submit_split(&self, idx: DataSplitIdx, data: proto::DataItem) { - let task = match &self.write_type { - WriteSplitDataType::File { path } => { - let path = path.clone(); - let offset = idx.offset; - let data = data.as_bytes().to_vec(); - tokio::spawn(async move { - if let Err(e) = tokio::fs::OpenOptions::new() - .create(true) - .write(true) - .open(&path) - .await - .and_then(|mut file| async move { - file.seek(SeekFrom::Start(offset)).await?; - file.write_all(&data).await - }) - .await - { - tracing::error!("Failed to write file data at offset {}: {}", offset, e); - } - }) - } - WriteSplitDataType::Mem { shared_mem } => { - let mem = shared_mem.clone(); - let offset = idx.offset as usize; - let data = data.as_bytes().to_vec(); - tokio::spawn(async move { - if let Err(e) = mem.write(offset, &data).await { - tracing::error!("Failed to write memory data at offset {}: {}", offset, e); - } - }) - } - }; - - if let Err(e) = self.tx.send(task).await { - tracing::error!("Failed to submit task: channel closed, idx: {:?}", idx); - } - } -} - -// 写入任务组 -enum WriteSplitDataTaskGroup { - // 文件写入模式 - ToFile { - unique_id: UniqueId, // 任务唯一标识 - file_path: PathBuf, // 文件路径 - tasks: Vec>, // 写入任务列表 - rx: mpsc::Receiver>, // 任务接收通道 - expected_size: usize, // 预期总大小 - current_size: usize, // 当前写入大小 - manager: Arc, // 管理器引用 - }, - // 内存写入模式 - ToMem { - unique_id: UniqueId, // 任务唯一标识 - shared_mem: SharedMemHolder, // 共享内存 - tasks: Vec>, // 写入任务列表 - rx: mpsc::Receiver>, // 任务接收通道 - expected_size: usize, // 预期总大小 - current_size: usize, // 当前写入大小 - manager: Arc, // 管理器引用 - } -} - -impl WriteSplitDataTaskGroup { - // 创建新任务组 - async fn new( - unique_id: UniqueId, - splits: Vec>, - block_type: proto::BatchDataBlockType, - manager: Arc, - ) -> (Self, WriteSplitDataTaskHandle) { - // 计算预期总大小 - let expected_size = splits.iter().map(|range| range.len()).sum(); - - // 创建通道 - let (tx, rx) = mpsc::channel(32); - - match block_type { - proto::BatchDataBlockType::File => { - let file_path = PathBuf::from(format!("{}.data", - base64::engine::general_purpose::STANDARD.encode(&unique_id))); - - let handle = WriteSplitDataTaskHandle { - tx, - write_type: WriteSplitDataType::File { - path: file_path.clone(), - }, - }; - - let group = Self::ToFile { - unique_id, - file_path, - tasks: Vec::new(), - rx, - expected_size, - current_size: 0, - manager: manager.clone(), - }; - - (group, handle) - } - _ => { - let shared_mem = new_shared_mem(&splits).unwrap_or_default(); - - let handle = WriteSplitDataTaskHandle { - tx, - write_type: WriteSplitDataType::Mem { - shared_mem: shared_mem.clone(), - }, - }; - - let group = Self::ToMem { - unique_id, - shared_mem, - tasks: Vec::new(), - rx, - expected_size, - current_size: 0, - manager: manager.clone(), - }; - - (group, handle) - } - } - } - - // 处理任务完成 - async fn handle_completion(&self) { - match self { - Self::ToFile { unique_id, manager, .. } | - Self::ToMem { unique_id, manager, .. } => { - // 从管理器中移除句柄 - manager.remove_handle(unique_id); - } - } - } - - // 任务处理循环 - async fn process_tasks(&mut self) -> WSResult { - loop { - // 检查是否已完成所有写入 - if let Some(result) = self.try_complete() { - // 处理完成,清理资源 - self.handle_completion().await; - return Ok(result); - } - - // 等待新任务或已有任务完成 - tokio::select! { - Some(new_task) = match self { - Self::ToFile { rx, .. } | - Self::ToMem { rx, .. } => rx.recv() - } => { - match self { - Self::ToFile { tasks, .. } | - Self::ToMem { tasks, .. } => { - tasks.push(new_task); - } - } - } - else => { - // 通道关闭,清理资源 - self.handle_completion().await; - break; - } - } - } - - Err(WSError::WsDataError(WsDataErr::WriteDataFailed { - unique_id: match self { - Self::ToFile { unique_id, .. } | - Self::ToMem { unique_id, .. } => unique_id.clone(), - } - })) - } -} - -// WriteSplitDataManager 管理器 -pub struct WriteSplitDataManager { - // 只存储任务句柄 - handles: DashMap, -} - -impl WriteSplitDataManager { - pub fn new() -> Arc { - Arc::new(Self { - handles: DashMap::new(), - }) - } - - // 注册新的任务句柄 - pub fn register_handle( - &self, - request_id: proto::BatchRequestId, - handle: WriteSplitDataTaskHandle, - ) -> WSResult<()> { - // 检查是否已存在 - if self.handles.contains_key(&request_id) { - return Err(WSError::WsDataError(WsDataErr::WriteDataFailed { - request_id, - })); - } - - // 存储句柄 - self.handles.insert(request_id, handle); - Ok(()) - } - - // 获取已存在的任务句柄 - pub fn get_handle(&self, request_id: &proto::BatchRequestId) -> Option { - self.handles.get(request_id).map(|h| h.clone()) - } - - // 移除任务句柄 - pub fn remove_handle(&self, request_id: &proto::BatchRequestId) { - self.handles.remove(request_id); - } -} - -## 修改 使用情况以适配新接口 计划 - -### 1. 修改 get_or_del_data 函数 - -```diff - pub async fn get_or_del_data(&self, GetOrDelDataArg { meta, unique_id, ty }: GetOrDelDataArg) - -> WSResult<(DataSetMetaV2, HashMap)> - { - let want_idxs: Vec = WantIdxIter::new(&ty, meta.data_item_cnt() as DataItemIdx).collect(); - - let mut groups = Vec::new(); - let mut idxs = Vec::new(); - let p2p = self.view.p2p(); - let mut ret = HashMap::new(); - - for idx in want_idxs { - // 为每个数据项创建独立的任务组 - let (tx, rx) = tokio::sync::mpsc::channel(1); - let splits = vec![0..1]; - let splits = vec![0..1]; - let (mut group, handle) = WriteSplitDataTaskGroup::new( - unique_id.clone(), - splits, - match ty { - GetOrDelDataArgType::Delete => proto::BatchDataBlockType::Delete, - _ => proto::BatchDataBlockType::Memory, - }, - Arc::clone(&self.manager), - ).await; - - let p2p = p2p.clone(); - let unique_id = unique_id.clone(); - let data_node = meta.get_data_node(idx); - let delete = matches!(ty, GetOrDelDataArgType::Delete); - let rpc_call = self.rpc_call_get_data.clone(); - - let handle_clone = handle.clone(); - let handle = tokio::spawn(async move { - let resp = rpc_call.call( - p2p, - data_node, - proto::GetOneDataRequest { - unique_id: unique_id.to_vec(), - idxs: vec![idx as u32], - delete, - return_data: true, - }, - Some(Duration::from_secs(60)), - ).await?; - - if !resp.success { - tracing::error!("Failed to get data for idx {}: {}", idx, resp.message); - return Err(WsDataError::GetDataFailed { - unique_id: unique_id.to_vec(), - msg: resp.message, - }.into()); - } - - handle_clone.submit_split(0, resp.data[0].clone()).await; - Ok::<_, WSError>(()) - }); - - groups.push(group); - idxs.push((idx, handle)); - } - - // 等待所有RPC任务完成 - for (group, (idx, handle)) in groups.into_iter().zip(idxs.into_iter()) { - if let Err(e) = handle.await.map_err(|e| WSError::from(e))?.map_err(|e| e) { - tracing::error!("RPC task failed for idx {}: {}", idx, e); - continue; - } - - match group.join().await { - Ok(data_item) => { - ret.insert(idx, data_item); - } - Err(e) => { - tracing::error!("Task group join failed for idx {}: {}", idx, e); - } - } - } - - Ok(ret) -} -``` - -### 2. Batch数据处理流程更新 - -#### 2.1 WriteSplitDataTaskHandle扩展 等待全部完成的函数 - -```rust -impl WriteSplitDataTaskHandle { - ... - - /// 等待所有已提交的写入任务完成 - pub async fn wait_all_tasks(self) -> WSResult<()> { - } -} -``` - -#### 2.2 BatchTransfer 实现 - -```rust -/// 数据源接口 -#[async_trait] -pub trait DataSource: Send + Sync + 'static { - /// 获取数据总大小 - async fn size(&self) -> WSResult; - /// 读取指定范围的数据 - async fn read_chunk(&self, offset: usize, size: usize) -> WSResult>; - /// 获取数据块类型 - fn block_type(&self) -> BatchDataBlockType; -} - -/// 批量传输数据 -pub async fn batch_transfer( - unique_id: Vec, - version: u64, - target_node: NodeID, - data: Arc, - view: DataGeneralView, -) -> WSResult<()> { - let total_size = data.size().await?; - let total_blocks = (total_size + DEFAULT_BLOCK_SIZE - 1) / DEFAULT_BLOCK_SIZE; - let semaphore = Arc::new(Semaphore::new(32)); - let mut handles = Vec::new(); - - // 发送所有数据块 - for block_idx in 0..total_blocks { - // 获取信号量许可 - let permit = semaphore.clone().acquire_owned().await.unwrap(); - - let offset = block_idx as usize * DEFAULT_BLOCK_SIZE; - let size = DEFAULT_BLOCK_SIZE.min(total_size - offset); - - // 读取数据块 - let block_data = data.read_chunk(offset, size).await?; - - // 构造请求 - let request = proto::BatchDataRequest { - request_id: Some(proto::BatchRequestId { - node_id: target_node as u32, - sequence: block_idx as u32, - }), - block_type: data.block_type() as i32, - block_index: block_idx as u32, - data: block_data, - operation: proto::DataOpeType::Write as i32, - unique_id: unique_id.clone(), - version, - }; - - // 发送请求 - let view = view.clone(); - let handle = tokio::spawn(async move { - let _permit = permit; // 持有permit直到任务完成 - let resp = view.data_general().rpc_call_batch_data.call( - view.p2p(), - target_node, - request, - Some(Duration::from_secs(30)), - ).await?; - - if !resp.success { - return Err(WsDataError::BatchTransferFailed { - node: target_node, - batch: block_idx as u32, - reason: resp.error_message, - }.into()); - } - - Ok(()) - }); - - handles.push(handle); - } - - // 等待所有请求完成 - for handle in handles { - handle.await??; - } - - Ok(()) -} -``` - -#### 2.3 DataGeneral RPC处理实现 - -```rust -/// 默认数据块大小 (4MB) -const DEFAULT_BLOCK_SIZE: usize = 4 * 1024 * 1024; - -/// 批量数据传输状态 -struct BatchTransferState { - handle: WriteSplitDataTaskHandle, - shared: SharedWithBatchHandler, -} - -/// 共享状态,用于记录最新的请求响应器 -#[derive(Clone)] -struct SharedWithBatchHandler { - responsor: Arc>>>, -} - -impl SharedWithBatchHandler { - fn new() -> Self { - Self { - responsor: Arc::new(Mutex::new(None)), - } - } - - async fn update_responsor(&self, responsor: RPCResponsor) { - let mut guard = self.responsor.lock().await; - if let Some(old_responsor) = guard.take() { - // 旧的responsor直接返回成功 - if let Err(e) = old_responsor.response(Ok(())).await { - tracing::error!("Failed to respond to old request: {}", e); - } - } - *guard = Some(responsor); - } - - async fn get_final_responsor(&self) -> Option> { - self.responsor.lock().await.take() - } -} - -impl DataGeneral { - /// 创建新的DataGeneral实例 - pub fn new() -> Self { - Self { - batch_receive_states: DashMap::new(), - // ...其他字段 - } - } -} - -impl DataGeneral { - /// 处理批量数据写入请求 - /// - /// # 处理流程 - /// 1. 从batch_receive_states查询或创建传输状态 - /// 2. 使用WriteSplitDataTaskHandle提交写入任务 - /// 3. 等待写入完成并返回结果 - pub async fn rpc_handle_batch_data( - &self, - request: BatchDataRequest, - responsor: RPCResponsor, - ) -> WSResult<()> { - // 1. 从batch_receive_states查询或创建传输状态 - let state = if let Some(state) = self.batch_receive_states.get(&request.unique_id) { - // 验证版本号 - if state.handle.version() != request.version { - tracing::error!( - "Version mismatch for transfer {}, expected {}, got {}", - hex::encode(&request.unique_id), - state.handle.version(), - request.version - ); - return Err(WSError::BatchError(WsBatchErr::VersionMismatch { - expected: state.handle.version(), - actual: request.version, - })); - } - state - } else { - // 创建新的写入任务组 - let (group, handle) = WriteSplitDataTaskGroup::new( - request.unique_id.clone(), - calculate_splits(request.total_blocks), - request.block_type, - ).await?; - - // 创建共享状态 - let shared = SharedWithBatchHandler::new(); - let state = BatchTransferState { handle: handle.clone(), shared: shared.clone() }; - - // 启动等待完成的任务 - let unique_id = request.unique_id.clone(); - let batch_receive_states = self.batch_receive_states.clone(); - tokio::spawn(async move { - // 等待所有任务完成 - if let Err(e) = handle.wait_all_tasks().await { - tracing::error!( - "Failed to complete transfer {}: {}", - hex::encode(&unique_id), - e - ); - // 获取最后的responsor并返回错误 - if let Some(final_responsor) = shared.get_final_responsor().await { - if let Err(e) = final_responsor.response(Err(e)).await { - tracing::error!("Failed to send error response: {}", e); - } - } - // 清理状态 - batch_receive_states.remove(&unique_id); - return; - } - - // 获取最后的responsor并返回成功 - if let Some(final_responsor) = shared.get_final_responsor().await { - if let Err(e) = final_responsor.response(Ok(())).await { - tracing::error!("Failed to send success response: {}", e); - } - } - // 清理状态 - batch_receive_states.remove(&unique_id); - }); - - // 插入新状态 - self.batch_receive_states.insert(request.unique_id.clone(), state); - self.batch_receive_states.get(&request.unique_id).unwrap() - }; - - // 2. 使用WriteSplitDataTaskHandle提交写入任务 - let offset = request.block_idx as usize * DEFAULT_BLOCK_SIZE; - - if let Err(e) = state.handle.submit_split(offset, request.data).await { - tracing::error!( - "Failed to submit split for transfer {}, block {}: {}", - hex::encode(&request.unique_id), - request.block_idx, - e - ); - return Err(e); - } - - // 3. 更新共享状态中的responsor - state.shared.update_responsor(responsor).await; - - tracing::debug!( - "Successfully submitted block {} for transfer {}", - request.block_idx, - hex::encode(&request.unique_id) - ); - - Ok(()) - } -} - -/// 计算数据分片范围 -fn calculate_splits(total_blocks: u32) -> Vec> { - let mut splits = Vec::with_capacity(total_blocks as usize); - for i in 0..total_blocks { - let start = i as usize * DEFAULT_BLOCK_SIZE; - let end = start + DEFAULT_BLOCK_SIZE; - splits.push(start..end); - } - splits -} - -/// 数据源实现 -pub struct FileDataSource { - path: PathBuf, - file: Option, -} - -impl FileDataSource { - pub fn new(path: PathBuf) -> Self { - Self { - path, - file: None, - } - } -} - -#[async_trait] -impl DataSource for FileDataSource { - async fn size(&self) -> WSResult { - tokio::fs::metadata(&self.path) - .await - .map(|m| m.len() as usize) - .map_err(|e| WsDataError::ReadSourceFailed { - source: format!("{}", self.path.display()), - error: e.to_string(), - }.into()) - } - - async fn read_chunk(&self, offset: usize, size: usize) -> WSResult> { - let mut file = tokio::fs::File::open(&self.path).await - .map_err(|e| WsDataError::ReadSourceFailed { - source: format!("{}", self.path.display()), - error: e.to_string(), - })?; - - file.seek(SeekFrom::Start(offset as u64)).await - .map_err(|e| WsDataError::ReadSourceFailed { - source: format!("{}", self.path.display()), - error: e.to_string(), - })?; - - let mut buf = vec![0; size]; - file.read_exact(&mut buf).await - .map_err(|e| WsDataError::ReadSourceFailed { - source: format!("{}", self.path.display()), - error: e.to_string(), - })?; - - Ok(buf) - } - - fn block_type(&self) -> BatchDataBlockType { - BatchDataBlockType::File - } -} - -pub struct MemDataSource { - data: Arc<[u8]>, -} - -impl MemDataSource { - pub fn new(data: Vec) -> Self { - Self { - data: data.into() - } - } -} - -#[async_trait] -impl DataSource for MemDataSource { - async fn size(&self) -> WSResult { - Ok(self.data.len()) - } - - async fn read_chunk(&self, offset: usize, size: usize) -> WSResult> { - if offset + size > self.data.len() { - return Err(WsDataError::ReadSourceFailed { - source: "memory".into(), - error: "read beyond bounds".into(), - }.into()); - } - Ok(self.data[offset..offset + size].to_vec()) - } - - fn block_type(&self) -> BatchDataBlockType { - BatchDataBlockType::Memory - } -} diff --git a/review.md b/review.md old mode 100644 new mode 100755 index 9569cea..4636297 --- a/review.md +++ b/review.md @@ -1,86 +1,85 @@ -# 代码修改清单 - -## 1. 删除代码 +# 项目分析与修改计划 + + +### 现有 + +#### DataGeneral +- 功能:数据管理核心模块 +- 职责: + 1. 提供数据读写接口 + 2. 管理元数据 + 3. 协调各子模块功能 + 4. 错误处理和恢复 + 5. 资源生命周期管理 + +#### DataSplit +- 功能:数据分片管理 +- 核心组件: + 1. EachNodeSplit:单节点分片信息 + ```protobuf + message EachNodeSplit { + uint32 node_id = 1; + uint32 data_offset = 2; + uint32 data_size = 3; + } + ``` + 2. DataSplit:分片集合 + ```protobuf + message DataSplit { + repeated EachNodeSplit splits = 1; + } + ``` + +#### BatchTransfer +- 功能:管理单个批量传输的状态 +- 核心字段: + ```rust + struct BatchTransfer { + unique_id: Vec, + version: u64, + block_type: BatchDataBlockType, + total_blocks: u32, + received_blocks: DashMap>, + tx: Option>> + } + ``` + +#### WriteSplitDataTaskGroup +- 功能:管理数据分片写入任务组 +- 实现类型: + 1. ToFile:文件写入任务组 + - 文件路径管理 + - 文件操作错误处理 + - 磁盘同步策略 + 2. ToMem:内存写入任务组 + - SharedMemHolder管理 + - 内存访问安全 + - 资源自动回收 + + +### 变更 + +#### 核心接口定义 ```rust -// 1. src/main/src/general/data/m_data_general/batch.rs 中删除 -// 1.1 删除 BatchManager -pub(super) struct BatchManager { - transfers: DashMap, - sequence: AtomicU64, -} - -impl BatchManager { - pub fn new() -> Self - pub fn next_sequence(&self) -> u64 - pub async fn create_transfer(...) - pub async fn handle_block(...) -} - -// 1.2 删除 BatchTransfer -pub(super) struct BatchTransfer { - pub unique_id: Vec, - pub version: u64, - pub block_type: proto::BatchDataBlockType, - pub total_blocks: u32, - data_sender: mpsc::Sender>, - write_task: JoinHandle>, - pub tx: Option>>, -} - -impl BatchTransfer { - pub async fn new(...) - pub async fn add_block(...) - pub async fn complete(...) - fn calculate_splits(...) -} - -// 2. src/main/src/general/data/m_data_general/mod.rs 中删除 -struct DataGeneral { - batch_manager: Arc, // 删除此字段 -} -// DataGeneral::new() 中删除 -batch_manager: Arc::new(BatchManager::new()), -``` - -## 2. 新增代码 -### src/main/src/result.rs +#### WriteSplitDataTaskGroup 核心实现 ```rust -pub enum WsDataError { - // 修改错误类型 - BatchTransferFailed { - request_id: proto::BatchRequestId, // 改为 request_id - reason: String, - }, - BatchTransferNotFound { - request_id: proto::BatchRequestId, // 改为 request_id - }, - BatchTransferError { - request_id: proto::BatchRequestId, // 改为 request_id - msg: String, - }, +// 写入任务相关错误 +#[derive(Debug)] +pub enum WsDataErr { WriteDataFailed { - request_id: proto::BatchRequestId, + unique_id: Vec, }, SplitTaskFailed { - request_id: proto::BatchRequestId, idx: DataSplitIdx, }, - VersionMismatch { - expected: u64, - actual: u64, - }, } -``` -### src/main/src/general/data/m_data_general/task.rs -```rust // 写入任务句柄,用于提交新的分片任务 pub struct WriteSplitDataTaskHandle { tx: mpsc::Sender>, write_type: WriteSplitDataType, - version: u64, // 添加版本号字段 } // 写入类型 @@ -94,13 +93,8 @@ enum WriteSplitDataType { } impl WriteSplitDataTaskHandle { - // 获取版本号 - pub fn version(&self) -> u64 { - self.version - } - // 提交新的分片任务 - pub async fn submit_split(&self, idx: DataSplitIdx, data: proto::DataItem) -> WSResult<()> { + pub async fn submit_split(&self, idx: DataSplitIdx, data: proto::DataItem) { let task = match &self.write_type { WriteSplitDataType::File { path } => { let path = path.clone(); @@ -134,21 +128,9 @@ impl WriteSplitDataTaskHandle { } }; - self.tx.send(task).await.map_err(|e| { + if let Err(e) = self.tx.send(task).await { tracing::error!("Failed to submit task: channel closed, idx: {:?}", idx); - WSError::WsDataError(WsDataError::BatchTransferFailed { - request_id: idx.into(), // 需要实现 From for BatchRequestId - reason: "Failed to submit task: channel closed".to_string() - }) - }) - } - - /// 等待所有已提交的写入任务完成 - pub async fn wait_all_tasks(self) -> WSResult<()> { - // 关闭发送端,不再接收新任务 - drop(self.tx); - - Ok(()) + } } } @@ -162,6 +144,7 @@ enum WriteSplitDataTaskGroup { rx: mpsc::Receiver>, // 任务接收通道 expected_size: usize, // 预期总大小 current_size: usize, // 当前写入大小 + manager: Arc, // 管理器引用 }, // 内存写入模式 ToMem { @@ -171,6 +154,7 @@ enum WriteSplitDataTaskGroup { rx: mpsc::Receiver>, // 任务接收通道 expected_size: usize, // 预期总大小 current_size: usize, // 当前写入大小 + manager: Arc, // 管理器引用 } } @@ -180,7 +164,7 @@ impl WriteSplitDataTaskGroup { unique_id: UniqueId, splits: Vec>, block_type: proto::BatchDataBlockType, - version: u64, // 添加版本号参数 + manager: Arc, ) -> (Self, WriteSplitDataTaskHandle) { // 计算预期总大小 let expected_size = splits.iter().map(|range| range.len()).sum(); @@ -198,7 +182,6 @@ impl WriteSplitDataTaskGroup { write_type: WriteSplitDataType::File { path: file_path.clone(), }, - version, // 设置版本号 }; let group = Self::ToFile { @@ -208,6 +191,7 @@ impl WriteSplitDataTaskGroup { rx, expected_size, current_size: 0, + manager: manager.clone(), }; (group, handle) @@ -220,7 +204,6 @@ impl WriteSplitDataTaskGroup { write_type: WriteSplitDataType::Mem { shared_mem: shared_mem.clone(), }, - version, // 设置版本号 }; let group = Self::ToMem { @@ -230,6 +213,7 @@ impl WriteSplitDataTaskGroup { rx, expected_size, current_size: 0, + manager: manager.clone(), }; (group, handle) @@ -237,11 +221,24 @@ impl WriteSplitDataTaskGroup { } } + // 处理任务完成 + async fn handle_completion(&self) { + match self { + Self::ToFile { unique_id, manager, .. } | + Self::ToMem { unique_id, manager, .. } => { + // 从管理器中移除句柄 + manager.remove_handle(unique_id); + } + } + } + // 任务处理循环 async fn process_tasks(&mut self) -> WSResult { loop { // 检查是否已完成所有写入 if let Some(result) = self.try_complete() { + // 处理完成,清理资源 + self.handle_completion().await; return Ok(result); } @@ -255,80 +252,267 @@ impl WriteSplitDataTaskGroup { Self::ToFile { tasks, .. } | Self::ToMem { tasks, .. } => { tasks.push(new_task); - // 不需要更新current_size,因为是在任务完成时更新 - } - } - } - Some(completed_task) = futures::future::select_all(match self { - Self::ToFile { tasks, .. } | - Self::ToMem { tasks, .. } => tasks - }) => { - // 检查任务是否成功完成 - if let Err(e) = completed_task.0 { - tracing::error!("Task failed: {}", e); - return Err(WSError::WsDataError(WsDataError::BatchTransferFailed { - request_id: match self { - Self::ToFile { unique_id, .. } | - Self::ToMem { unique_id, .. } => unique_id.clone() - }, - reason: format!("Task failed: {}", e) - })); - } - // 从任务列表中移除已完成的任务 - match self { - Self::ToFile { tasks, current_size, .. } | - Self::ToMem { tasks, current_size, .. } => { - tasks.remove(completed_task.1); - // 更新当前大小 - *current_size += DEFAULT_BLOCK_SIZE; // 每个任务写入一个块 } } } - None = match self { - Self::ToFile { rx, .. } | - Self::ToMem { rx, .. } => rx.recv() - } => { - // 通道关闭,直接退出 + else => { + // 通道关闭,清理资源 + self.handle_completion().await; break; } } } - Err(WSError::WsDataError(WsDataError::BatchTransferFailed { - request_id: match self { + Err(WSError::WsDataError(WsDataErr::WriteDataFailed { + unique_id: match self { Self::ToFile { unique_id, .. } | - Self::ToMem { unique_id, .. } => unique_id.clone() - }, - reason: "Channel closed".to_string() + Self::ToMem { unique_id, .. } => unique_id.clone(), + } })) } +} - /// 检查是否已完成所有写入 - fn try_complete(&self) -> Option { - match self { - Self::ToFile { current_size, expected_size, file_path, .. } => { - if *current_size >= *expected_size { - // 所有数据已写入,返回文件数据项 - Some(proto::DataItem::new_file_data(file_path.clone())) - } else { - None - } +// WriteSplitDataManager 管理器 +pub struct WriteSplitDataManager { + // 只存储任务句柄 + handles: DashMap, +} + +impl WriteSplitDataManager { + pub fn new() -> Arc { + Arc::new(Self { + handles: DashMap::new(), + }) + } + + // 注册新的任务句柄 + pub fn register_handle( + &self, + request_id: proto::BatchRequestId, + handle: WriteSplitDataTaskHandle, + ) -> WSResult<()> { + // 检查是否已存在 + if self.handles.contains_key(&request_id) { + return Err(WSError::WsDataError(WsDataErr::WriteDataFailed { + request_id, + })); + } + + // 存储句柄 + self.handles.insert(request_id, handle); + Ok(()) + } + + // 获取已存在的任务句柄 + pub fn get_handle(&self, request_id: &proto::BatchRequestId) -> Option { + self.handles.get(request_id).map(|h| h.clone()) + } + + // 移除任务句柄 + pub fn remove_handle(&self, request_id: &proto::BatchRequestId) { + self.handles.remove(request_id); + } +} + +## 修改 使用情况以适配新接口 计划 + +### 1. 修改 get_or_del_data 函数 + +```diff + pub async fn get_or_del_data(&self, GetOrDelDataArg { meta, unique_id, ty }: GetOrDelDataArg) + -> WSResult<(DataSetMetaV2, HashMap)> + { + let want_idxs: Vec = WantIdxIter::new(&ty, meta.data_item_cnt() as DataItemIdx).collect(); + + let mut groups = Vec::new(); + let mut idxs = Vec::new(); + let p2p = self.view.p2p(); + let mut ret = HashMap::new(); + + for idx in want_idxs { + // 为每个数据项创建独立的任务组 + let (tx, rx) = tokio::sync::mpsc::channel(1); + let splits = vec![0..1]; + let splits = vec![0..1]; + let (mut group, handle) = WriteSplitDataTaskGroup::new( + unique_id.clone(), + splits, + match ty { + GetOrDelDataArgType::Delete => proto::BatchDataBlockType::Delete, + _ => proto::BatchDataBlockType::Memory, + }, + Arc::clone(&self.manager), + ).await; + + let p2p = p2p.clone(); + let unique_id = unique_id.clone(); + let data_node = meta.get_data_node(idx); + let delete = matches!(ty, GetOrDelDataArgType::Delete); + let rpc_call = self.rpc_call_get_data.clone(); + + let handle_clone = handle.clone(); + let handle = tokio::spawn(async move { + let resp = rpc_call.call( + p2p, + data_node, + proto::GetOneDataRequest { + unique_id: unique_id.to_vec(), + idxs: vec![idx as u32], + delete, + return_data: true, + }, + Some(Duration::from_secs(60)), + ).await?; + + if !resp.success { + tracing::error!("Failed to get data for idx {}: {}", idx, resp.message); + return Err(WsDataError::GetDataFailed { + unique_id: unique_id.to_vec(), + msg: resp.message, + }.into()); } - Self::ToMem { current_size, expected_size, shared_mem, .. } => { - if *current_size >= *expected_size { - // 所有数据已写入,返回内存数据项 - Some(proto::DataItem::new_mem_data(shared_mem.clone())) - } else { - None - } + + handle_clone.submit_split(0, resp.data[0].clone()).await; + Ok::<_, WSError>(()) + }); + + groups.push(group); + idxs.push((idx, handle)); + } + + // 等待所有RPC任务完成 + for (group, (idx, handle)) in groups.into_iter().zip(idxs.into_iter()) { + if let Err(e) = handle.await.map_err(|e| WSError::from(e))?.map_err(|e| e) { + tracing::error!("RPC task failed for idx {}: {}", idx, e); + continue; + } + + match group.join().await { + Ok(data_item) => { + ret.insert(idx, data_item); + } + Err(e) => { + tracing::error!("Task group join failed for idx {}: {}", idx, e); } } } + + Ok(ret) +} +``` + +### 2. Batch数据处理流程更新 + +#### 2.1 WriteSplitDataTaskHandle扩展 等待全部完成的函数 + +```rust +impl WriteSplitDataTaskHandle { + ... + + /// 等待所有已提交的写入任务完成 + pub async fn wait_all_tasks(self) -> WSResult<()> { + } +} +``` + +#### 2.2 BatchTransfer 实现 + +```rust +/// 数据源接口 +#[async_trait] +pub trait DataSource: Send + Sync + 'static { + /// 获取数据总大小 + async fn size(&self) -> WSResult; + /// 读取指定范围的数据 + async fn read_chunk(&self, offset: usize, size: usize) -> WSResult>; + /// 获取数据块类型 + fn block_type(&self) -> BatchDataBlockType; +} + +/// 批量传输数据 +pub async fn batch_transfer( + unique_id: Vec, + version: u64, + target_node: NodeID, + data: Arc, + view: DataGeneralView, +) -> WSResult<()> { + let total_size = data.size().await?; + let total_blocks = (total_size + DEFAULT_BLOCK_SIZE - 1) / DEFAULT_BLOCK_SIZE; + let semaphore = Arc::new(Semaphore::new(32)); + let mut handles = Vec::new(); + + // 发送所有数据块 + for block_idx in 0..total_blocks { + // 获取信号量许可 + let permit = semaphore.clone().acquire_owned().await.unwrap(); + + let offset = block_idx as usize * DEFAULT_BLOCK_SIZE; + let size = DEFAULT_BLOCK_SIZE.min(total_size - offset); + + // 读取数据块 + let block_data = data.read_chunk(offset, size).await?; + + // 构造请求 + let request = proto::BatchDataRequest { + request_id: Some(proto::BatchRequestId { + node_id: target_node as u32, + sequence: block_idx as u32, + }), + block_type: data.block_type() as i32, + block_index: block_idx as u32, + data: block_data, + operation: proto::DataOpeType::Write as i32, + unique_id: unique_id.clone(), + version, + }; + + // 发送请求 + let view = view.clone(); + let handle = tokio::spawn(async move { + let _permit = permit; // 持有permit直到任务完成 + let resp = view.data_general().rpc_call_batch_data.call( + view.p2p(), + target_node, + request, + Some(Duration::from_secs(30)), + ).await?; + + if !resp.success { + return Err(WsDataError::BatchTransferFailed { + node: target_node, + batch: block_idx as u32, + reason: resp.error_message, + }.into()); + } + + Ok(()) + }); + + handles.push(handle); + } + + // 等待所有请求完成 + for handle in handles { + handle.await??; + } + + Ok(()) } ``` -### src/main/src/general/data/m_data_general/mod.rs +#### 2.3 DataGeneral RPC处理实现 + ```rust +/// 默认数据块大小 (4MB) +const DEFAULT_BLOCK_SIZE: usize = 4 * 1024 * 1024; + +/// 批量数据传输状态 +struct BatchTransferState { + handle: WriteSplitDataTaskHandle, + shared: SharedWithBatchHandler, +} + /// 共享状态,用于记录最新的请求响应器 #[derive(Clone)] struct SharedWithBatchHandler { @@ -358,45 +542,41 @@ impl SharedWithBatchHandler { } } -/// 批量数据传输状态 -struct BatchReceiveState { - handle: WriteSplitDataTaskHandle, - shared: SharedWithBatchHandler, -} - -pub struct DataGeneral { - // 批量数据接收状态管理 - batch_receive_states: DashMap, - // ... 其他字段 -} - impl DataGeneral { + /// 创建新的DataGeneral实例 pub fn new() -> Self { Self { batch_receive_states: DashMap::new(), - // ... 其他字段初始化 + // ...其他字段 } } +} +impl DataGeneral { /// 处理批量数据写入请求 + /// + /// # 处理流程 + /// 1. 从batch_receive_states查询或创建传输状态 + /// 2. 使用WriteSplitDataTaskHandle提交写入任务 + /// 3. 等待写入完成并返回结果 pub async fn rpc_handle_batch_data( &self, request: BatchDataRequest, responsor: RPCResponsor, ) -> WSResult<()> { - let state = if let Some(state) = self.batch_receive_states.get(&request.request_id) { + // 1. 从batch_receive_states查询或创建传输状态 + let state = if let Some(state) = self.batch_receive_states.get(&request.unique_id) { // 验证版本号 if state.handle.version() != request.version { tracing::error!( - "Version mismatch for transfer {:?}, expected {}, got {}", - request.request_id, + "Version mismatch for transfer {}, expected {}, got {}", + hex::encode(&request.unique_id), state.handle.version(), request.version ); - return Err(WSError::WsDataError(WsDataError::BatchTransferError { - request_id: request.request_id, - msg: format!("Version mismatch, expected {}, got {}", - state.handle.version(), request.version) + return Err(WSError::BatchError(WsBatchErr::VersionMismatch { + expected: state.handle.version(), + actual: request.version, })); } state @@ -406,22 +586,21 @@ impl DataGeneral { request.unique_id.clone(), calculate_splits(request.total_blocks), request.block_type, - request.version, // 传递版本号 ).await?; // 创建共享状态 let shared = SharedWithBatchHandler::new(); - let state = BatchReceiveState { handle: handle.clone(), shared: shared.clone() }; + let state = BatchTransferState { handle: handle.clone(), shared: shared.clone() }; // 启动等待完成的任务 - let request_id = request.request_id.clone(); // 使用 request_id + let unique_id = request.unique_id.clone(); let batch_receive_states = self.batch_receive_states.clone(); tokio::spawn(async move { // 等待所有任务完成 if let Err(e) = handle.wait_all_tasks().await { tracing::error!( - "Failed to complete transfer {:?}: {}", - request_id, // 使用 request_id + "Failed to complete transfer {}: {}", + hex::encode(&unique_id), e ); // 获取最后的responsor并返回错误 @@ -431,7 +610,7 @@ impl DataGeneral { } } // 清理状态 - batch_receive_states.remove(&request_id); // 使用 request_id + batch_receive_states.remove(&unique_id); return; } @@ -442,22 +621,22 @@ impl DataGeneral { } } // 清理状态 - batch_receive_states.remove(&request_id); // 使用 request_id + batch_receive_states.remove(&unique_id); }); // 插入新状态 - self.batch_receive_states.insert(request.request_id.clone(), state); - self.batch_receive_states.get(&request.request_id).unwrap() + self.batch_receive_states.insert(request.unique_id.clone(), state); + self.batch_receive_states.get(&request.unique_id).unwrap() }; // 2. 使用WriteSplitDataTaskHandle提交写入任务 - let offset = request.block_index as usize * DEFAULT_BLOCK_SIZE; // 使用 block_index + let offset = request.block_idx as usize * DEFAULT_BLOCK_SIZE; if let Err(e) = state.handle.submit_split(offset, request.data).await { tracing::error!( - "Failed to submit split for transfer {:?}, block {}: {}", - request.request_id, - request.block_index, // 使用 block_index + "Failed to submit split for transfer {}, block {}: {}", + hex::encode(&request.unique_id), + request.block_idx, e ); return Err(e); @@ -467,11 +646,110 @@ impl DataGeneral { state.shared.update_responsor(responsor).await; tracing::debug!( - "Successfully submitted block {} for transfer {:?}", - request.block_index, - request.request_id + "Successfully submitted block {} for transfer {}", + request.block_idx, + hex::encode(&request.unique_id) ); Ok(()) } -} \ No newline at end of file +} + +/// 计算数据分片范围 +fn calculate_splits(total_blocks: u32) -> Vec> { + let mut splits = Vec::with_capacity(total_blocks as usize); + for i in 0..total_blocks { + let start = i as usize * DEFAULT_BLOCK_SIZE; + let end = start + DEFAULT_BLOCK_SIZE; + splits.push(start..end); + } + splits +} + +/// 数据源实现 +pub struct FileDataSource { + path: PathBuf, + file: Option, +} + +impl FileDataSource { + pub fn new(path: PathBuf) -> Self { + Self { + path, + file: None, + } + } +} + +#[async_trait] +impl DataSource for FileDataSource { + async fn size(&self) -> WSResult { + tokio::fs::metadata(&self.path) + .await + .map(|m| m.len() as usize) + .map_err(|e| WsDataError::ReadSourceFailed { + source: format!("{}", self.path.display()), + error: e.to_string(), + }.into()) + } + + async fn read_chunk(&self, offset: usize, size: usize) -> WSResult> { + let mut file = tokio::fs::File::open(&self.path).await + .map_err(|e| WsDataError::ReadSourceFailed { + source: format!("{}", self.path.display()), + error: e.to_string(), + })?; + + file.seek(SeekFrom::Start(offset as u64)).await + .map_err(|e| WsDataError::ReadSourceFailed { + source: format!("{}", self.path.display()), + error: e.to_string(), + })?; + + let mut buf = vec![0; size]; + file.read_exact(&mut buf).await + .map_err(|e| WsDataError::ReadSourceFailed { + source: format!("{}", self.path.display()), + error: e.to_string(), + })?; + + Ok(buf) + } + + fn block_type(&self) -> BatchDataBlockType { + BatchDataBlockType::File + } +} + +pub struct MemDataSource { + data: Arc<[u8]>, +} + +impl MemDataSource { + pub fn new(data: Vec) -> Self { + Self { + data: data.into() + } + } +} + +#[async_trait] +impl DataSource for MemDataSource { + async fn size(&self) -> WSResult { + Ok(self.data.len()) + } + + async fn read_chunk(&self, offset: usize, size: usize) -> WSResult> { + if offset + size > self.data.len() { + return Err(WsDataError::ReadSourceFailed { + source: "memory".into(), + error: "read beyond bounds".into(), + }.into()); + } + Ok(self.data[offset..offset + size].to_vec()) + } + + fn block_type(&self) -> BatchDataBlockType { + BatchDataBlockType::Memory + } +} From 3697fa8d34bbfd376cb4efbfb65bb0e0ca40a652 Mon Sep 17 00:00:00 2001 From: pa <1020401660@qq.com> Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 12/26] Revert "general design of batch" This reverts commit e526b4a5aeb287d35baf48228983d9ae1b832468. --- .cursorrules | 68 +--- design 1.canvas | 90 +++++ design.canvas | 166 ++++----- design.canvas.tmp.20250206220621 | 78 ++++ design.canvas.tmp.20250206221714 | 82 +++++ design.canvas.tmp.20250206221714.backup | 75 ++++ review.md | 469 ++++++++++-------------- scripts/sync_md_files.py | 4 - 8 files changed, 595 insertions(+), 437 deletions(-) create mode 100755 design 1.canvas create mode 100644 design.canvas.tmp.20250206220621 create mode 100755 design.canvas.tmp.20250206221714 create mode 100755 design.canvas.tmp.20250206221714.backup diff --git a/.cursorrules b/.cursorrules index 74ffc49..adfa3b7 100755 --- a/.cursorrules +++ b/.cursorrules @@ -12,67 +12,12 @@ 使用细致的图表达并行或顺序结构,条件结构;以及数据流转 一个阻塞执行的角色应该强化在块里,如子并行task,rpc caller,rpc handler,任务池 -- 修改canvas要求 - - 每次修改都必须,更新项目下canvas,阅读最新内容 - - 不可擅自删除内容,除非是目标修改内容,其他内容都得保留 - - 要结合原本canvas内的关联内容修改 - - 分离关键执行角色,如rpc caller,rpc handler,任务池,子并行task - - 将代码函数名,类型名都反映在关联逻辑的位置 - - 函数具体逻辑要反映成流程图结构,而不是黏贴代码 - - 例如函数里会spawn任务,就要分离spawn任务和当前函数的对象(概念),然后用图表现他们的关系 - - 例如多个task直接会通过channel通信,就要展现数据流向,以及两边怎么处理数据的发送接收(阻塞or 非阻塞) - - 示例: - pub async fn batch_transfer(unique_id: Vec,version: u64,target_node: NodeID,data: Arc,view: DataGeneralView,) -> WSResult<()> { - let total_size = data.size().await?; - let total_blocks = (total_size + DEFAULT_BLOCK_SIZE - 1) / DEFAULT_BLOCK_SIZE; - let semaphore = Arc::new(Semaphore::new(32)); - let mut handles = Vec::new(); - // 发送所有数据块 - for block_idx in 0..total_blocks { - // 获取信号量许可 - let permit = semaphore.clone().acquire_owned().await.unwrap(); - let offset = block_idx as usize * DEFAULT_BLOCK_SIZE; - let size = DEFAULT_BLOCK_SIZE.min(total_size - offset); - // 读取数据块 - let block_data = data.read_chunk(offset, size).await?; - // 构造请求 - let request = proto::BatchDataRequest {request_id: Some(proto::BatchRequestId {node_id: target_node as u32,sequence: block_idx as u32,}),block_type: data.block_type() as i32,block_index: block_idx as u32,data: block_data,operation: proto::DataOpeType::Write as i32,unique_id: unique_id.clone(),version,}; - // 发送请求 - let view = view.clone(); - let handle = tokio::spawn(async move { - let _permit = permit; // 持有permit直到任务完成 - let resp = view.data_general().rpc_call_batch_data.call(view.p2p(),target_node,request,Some(Duration::from_secs(30)),).await?; - if !resp.success {return Err(WsDataError::BatchTransferFailed {node: target_node,batch: block_idx as u32,reason: resp.error_message,}.into());} - Ok(()) - }); - handles.push(handle); - } - // 等待所有请求完成 - for handle in handles { handle.await??;} - Ok(()) - } - 对象(表上关键类型名) - - 当前函数进程 - - spawn的进程 - - Semaphore - 流程结构 - - 条件和循环 - - 多个task并行 - 数据流向 - - 发送数据转移给子进程 - - semaphore clone 转移给子进程 - 操作(需要表上关键函数名) - - 当前函数进程.预先准备 - - 当前函数进程.阻塞申请semaphore - - 当前函数进程.spawn子进程 - - 子进程.rpc_call - - 子进程释放semaphore - - 更新canvas流程 - - 更新项目下canvas 以进行编辑 - 使用 python3 scripts/sync_md_files.py from_s3fs, 将从s3fs目录获取最新编辑,将在项目目录下访问到 design.canvas - - 更新s3fs canvas以反馈review最新修改 - 使用 python3 scripts/sync_md_files.py to_s3fs, 将项目目录下的design.canvas 更新到s3fs目录 + 将 /mnt/s3fs/waverless/design.canvas 拷贝成待时间戳的tmp和tmp.bak + 如 {项目根路径}/design.canvas.1703171246.tmp + 和 {项目根路径}/design.canvas.1703171246.tmp.bak + 然后在 {项目根路径}/design.canvas.1703171246.tmp 中进行修改 + 然后覆盖原来 /mnt/s3fs/waverless/design.canvas 以及{项目根路径}/design.canvas - 提到“我更新了canvas”的情况,执行下python3 scripts/sync_md_files.py from_s3fs 这样项目下的 {项目根路径}/design.canvas 才是最新的 @@ -85,9 +30,6 @@ - error的结构是一个 WSError,包含子error结构形如 WsXXXErr,父结构实现Error derive,子结构只需要实现debug 子结构尽量实现现有分类 -- 修改代码原则 - 现在review中迭代代码草稿 - 确认草稿后,在更新到当前项目中 ## 1. 任务执行强制等待规则 - 制定计划后必须等待用户确认: diff --git a/design 1.canvas b/design 1.canvas new file mode 100755 index 0000000..9605161 --- /dev/null +++ b/design 1.canvas @@ -0,0 +1,90 @@ +{ + "nodes":[ + {"id":"cb82b904dab26671","type":"group","x":-3400,"y":-960,"width":4560,"height":3500,"label":"data"}, + {"id":"batch_transfer_group","type":"group","x":-1560,"y":120,"width":2300,"height":2040,"label":"Batch数据传输实现"}, + {"id":"write_split_group","type":"group","x":-3260,"y":120,"width":1470,"height":2360,"label":"WriteSplitDataTaskGroup 写入流程"}, + {"id":"data_write_flow","type":"group","x":-1600,"y":-600,"width":2680,"height":520,"label":"数据写入流程"}, + {"id":"batch_sender_group","type":"group","x":-1500,"y":200,"width":1000,"height":1000,"label":"写入端 [DataGeneral]"}, + {"id":"batch_receiver_group","type":"group","x":-400,"y":200,"width":1000,"height":900,"label":"接收端 [DataGeneral]"}, + {"id":"storage_write_flow","type":"group","x":0,"y":-540,"width":1020,"height":400,"label":"存储节点写入流程"}, + {"id":"7127ed217f71f72d","type":"group","x":-3240,"y":1180,"width":1010,"height":375,"label":"fn register_handle("}, + {"id":"data_general_core","type":"text","text":"# 数据管理核心模块\n- 数据流向控制\n- 并行结构管理\n- 错误处理链\n- 资源管理","x":-3050,"y":-406,"width":330,"height":234,"color":"4"}, + {"id":"b31695207931d96e","type":"text","text":"## fn get_or_del_data\n- 数据检索和删除\n- 资源清理\n- 缓存一致性\n- 并发访问控制","x":-2290,"y":-622,"width":330,"height":156,"color":"4"}, + {"id":"133214da264cfe72","type":"text","text":"## struct DataGeneral\n- 提供数据读写接口\n- 管理元数据\n- 协调各子模块功能\n- 错误处理和恢复\n- 资源生命周期","x":-2760,"y":-680,"width":340,"height":214,"color":"4"}, + {"id":"core_functions","type":"text","text":"## fn write_data\n- 同步/异步写入\n- 数据完整性保证\n- 分片并行写入\n- 缓存节点同步\n- 错误重试机制","x":-2405,"y":-427,"width":280,"height":275,"color":"4"}, + {"id":"general_phase2","type":"text","text":"General阶段2:调度\n- 生成unique_id\n- 发送调度请求\n- 等待决策返回","x":-1560,"y":-380,"width":200,"height":100,"color":"1"}, + {"id":"storage_node_3","type":"text","text":"存储节点1","x":-425,"y":-550,"width":150,"height":60,"color":"3"}, + {"id":"master_node","type":"text","text":"Master节点 [DataMaster]\n- schedule_data()\n1. 生成DataSetMeta\n2. 创建DataSplits\n3. 分配存储节点\n4. 返回调度决策","x":-1100,"y":-510,"width":200,"height":160,"color":"2"}, + {"id":"storage_group","type":"text","text":"存储节点组","x":-620,"y":-510,"width":150,"height":60,"color":"3"}, + {"id":"821e415b6438e20d","type":"text","text":"## struct DataSplit\n- 数据分片管理\n- 分片信息维护\n- 分片操作协调\n- 存储节点分配\n- 局部性优化","x":-2932,"y":-92,"width":342,"height":158,"color":"4"}, + {"id":"cache_group","type":"text","text":"缓存节点组","x":-620,"y":-310,"width":150,"height":60,"color":"5"}, + {"id":"general_phase1","type":"text","text":"General阶段1:准备\n- 初始化DataItems\n- 计算数据大小\n- 创建SharedMemHolder","x":-1560,"y":-510,"width":200,"height":100,"color":"1"}, + {"id":"data_item","type":"text","text":"# 数据项处理\n\n## enum WriteSplitDataTaskGroup\n- 管理数据分片写入任务组\n- 分片合并优化\n- 状态同步\n- 并行控制\n","x":-2990,"y":180,"width":450,"height":280,"color":"3"}, + {"id":"1ec171d545e8995d","type":"text","text":"## SharedMemHolder\n- 共享内存数据访问\n- 资源自动管理","x":-3085,"y":794,"width":300,"height":150}, + {"id":"223edf4677db9339","type":"text","text":"pub struct WriteSplitDataManager {\n    // 只存储任务句柄\n    handles: DashMap,\n}","x":-3090,"y":1000,"width":610,"height":140}, + {"id":"f515ecb9aee18fc7","type":"text","text":"# 后续写入 [异步执行]\n\n## 状态管理\n- 写入任务追踪\n- 并发控制\n- 写入顺序保证","x":-2552,"y":1218,"width":302,"height":275}, + {"id":"06d4a92778dd83c8","type":"text","text":"# 第一个分片开始写入 [阻塞执行]\n\n## 初始化写入\nfn start_first_split(data: Vec) -> Result<(), WSError> {\n let task = self.build_task(data, 0);\n self.tasks.push(task);\n self.current_size += data.len();\n Ok(())\n}\n\n## 错误处理\n- 写入失败记录日志\n- 返回具体错误类型","x":-3220,"y":1201,"width":455,"height":310}, + {"id":"general_phase3","type":"text","text":"General阶段3:分发\n- 解析调度决策\n- 创建写入任务组\n- 初始化并发控制","x":-1560,"y":-210,"width":200,"height":100,"color":"1"}, + {"id":"storage_node_4","type":"text","text":"存储节点2","x":-420,"y":-480,"width":150,"height":60,"color":"3"}, + {"id":"storage_node_5","type":"text","text":"存储节点3","x":-420,"y":-400,"width":150,"height":60,"color":"3"}, + {"id":"cache_node_1","type":"text","text":"缓存节点1","x":-420,"y":-360,"width":150,"height":60,"color":"5"}, + {"id":"97d3d9fd7432a861","type":"text","text":"# WriteSplitDataTaskHandle::submit_split() 实现 [异步发送]\n\n## match write_type {\n- WriteSplitDataType::File => 文件写入任务\n- WriteSplitDataType::Mem => 内存写入任务\n}\n\n## 发送任务 [channel阻塞]\ntx.send(task).await","x":-2189,"y":1160,"width":347,"height":445}, + {"id":"batch_request1","type":"text","text":"# BatchDataRequest(1)\n- request_id\n- block_type\n- block_index: 0\n- data","x":-620,"y":190,"width":250,"height":240,"color":"2"}, + {"id":"batch_initiator","type":"text","text":"# 发起节点 [DataGeneral]\n\n## call_batch_data()\n- 分割数据块(1MB)\n- 创建有界任务池\n- 建议并发数=3\n- 任务队列控制","x":-1400,"y":331,"width":300,"height":300,"color":"1"}, + {"id":"batch_manager","type":"text","text":"# BatchTransfer","x":-1100,"y":744,"width":300,"height":300,"color":"1"}, + {"id":"5c4357fc2216ea51","type":"text","text":"## batch写入接口\n- 缓存主动推送\n- 并行写入支持\n- 错误恢复机制\n- 内存复用优化","x":-2180,"y":-92,"width":250,"height":120,"color":"4"}, + {"id":"cache_node_2","type":"text","text":"缓存节点2","x":-420,"y":-280,"width":150,"height":60,"color":"5"}, + {"id":"cache_node_3","type":"text","text":"缓存节点3","x":-420,"y":-200,"width":150,"height":60,"color":"5"}, + {"id":"storage_node_1","type":"text","text":"存储节点1\n接收层:\n- 接收分片请求\n- 版本号验证\n- 数据完整性校验\n写入任务层:\n- 分片范围验证\n- 并发写入控制\n- 错误重试机制\n本地存储层:\n- 数据持久化\n- 版本管理\n- 空间回收\n结果返回:\n- 写入状态\n- 远程版本号\n- 错误信息","x":40,"y":-500,"width":200,"height":280,"color":"1"}, + {"id":"write_task_1","type":"text","text":"写入任务1\n- 分片范围验证\n- 数据完整性检查\n- 并发写入控制\n- 错误重试","x":380,"y":-500,"width":200,"height":120,"color":"2"}, + {"id":"write_result_1","type":"text","text":"写入结果1\n- 成功/失败\n- 远程版本号\n- 错误信息","x":640,"y":-280,"width":200,"height":100,"color":"4"}, + {"id":"write_task_file","type":"text","text":"# ToFile 写入流程 [阻塞执行]\n\n## WriteSplitDataTaskGroup::ToFile\n- file_path: PathBuf\n- tasks: Vec>\n- rx: mpsc::Receiver>\n- expected_size: usize\n- current_size: usize\n\n## 操作流程 [文件IO阻塞]\n1. OpenOptions::new()\n .create(true)\n .write(true)\n2. seek(offset)\n3. write_all(data)\n4. 错误记录:\n tracing::error!(\"Failed to write file data at offset {}\")\n","x":-2216,"y":544,"width":400,"height":400,"color":"1"}, + {"id":"write_task_mem","type":"text","text":"# ToMem 写入流程 [阻塞执行]\n\n## WriteSplitDataTaskGroup::ToMem\n- shared_mem: SharedMemHolder\n- tasks: Vec>\n- rx: mpsc::Receiver>\n- expected_size: usize\n- current_size: usize\n\n## 操作流程 [内存写入阻塞]\n1. shared_mem.write(offset, data)\n2. 错误记录:\n tracing::error!(\"Failed to write memory data at offset {}\")\n","x":-2650,"y":526,"width":400,"height":436,"color":"2"}, + {"id":"b0205b4457afeb2b","type":"text","text":"## SharedMemOwnedAccess\n- 共享内存所有权控制\n- 访问安全保证\n- 生命周期管理","x":-2330,"y":242,"width":364,"height":178}, + {"id":"e2576a54f3f852b3","type":"text","text":"# process_tasks() 实现 [阻塞循环]\n\n## 循环处理 [select阻塞]\n1. try_complete() 检查完成状态\n2. tokio::select! {\n - rx.recv() => 接收新任务\n - futures::future::select_all(tasks) => 等待任务完成\n}\n\n## 完成条件\n- current_size >= expected_size\n- 返回 proto::DataItem","x":-3035,"y":1820,"width":377,"height":460}, + {"id":"155106edf5eb3cd7","type":"text","text":"# try_complete() 实现 [同步检查]\n\n## 返回 Option\n- ToFile => proto::DataItem::new_file_data()\n- ToMem => proto::DataItem::new_mem_data()","x":-3074,"y":2300,"width":455,"height":180}, + {"id":"4dbe01dc59cea4c2","type":"text","text":"pub struct WriteSplitDataTaskHandle {\n    tx: mpsc::Sender>,\n    write_type: WriteSplitDataType,\n}","x":-2552,"y":1700,"width":418,"height":160}, + {"id":"20145fd68e8aaa75","type":"text","text":"# 构造 [同步初始化]\n\n## 任务组初始化\nfn new_task_group(type_: WriteSplitDataType) -> Self {\n let (tx, rx) = mpsc::channel(32);\n Self {\n type_,\n tasks: Vec::new(),\n rx,\n expected_size: 0,\n current_size: 0,\n }\n}\n\n## 参数验证\n- 检查写入类型\n- 验证初始参数","x":-3185,"y":1580,"width":450,"height":220}, + {"id":"batch_request2","type":"text","text":"# BatchDataRequest(2)\n- request_id\n- block_type\n- block_index: 1\n- data","x":-620,"y":470,"width":250,"height":120,"color":"2"}, + {"id":"batch_request3","type":"text","text":"# BatchDataRequest(3)\n- request_id\n- block_type\n- block_index: 2\n- data","x":-620,"y":610,"width":250,"height":120,"color":"2"}, + {"id":"batch_receiver_tasks","type":"text","text":"WriteSplitDataTaskGroup","x":-160,"y":570,"width":400,"height":300,"color":"1"} + ], + "edges":[ + {"id":"master_to_phase2","fromNode":"master_node","fromSide":"left","toNode":"general_phase2","toSide":"right","label":"调度决策\n- version\n- splits\n- nodes"}, + {"id":"phase2_to_phase3","fromNode":"general_phase2","fromSide":"bottom","toNode":"general_phase3","toSide":"top","label":"决策信息"}, + {"id":"phase3_to_storage","fromNode":"general_phase3","fromSide":"right","toNode":"storage_group","toSide":"left","label":"分发存储任务"}, + {"id":"storage_to_nodes","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_3","toSide":"left"}, + {"id":"storage_to_nodes2","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_4","toSide":"left"}, + {"id":"storage_to_nodes3","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_5","toSide":"left"}, + {"id":"phase3_to_cache","fromNode":"general_phase3","fromSide":"right","toNode":"cache_group","toSide":"left","label":"分发缓存任务"}, + {"id":"cache_to_nodes","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_1","toSide":"left"}, + {"id":"cache_to_nodes2","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_2","toSide":"left"}, + {"id":"cache_to_nodes3","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_3","toSide":"left"}, + {"id":"initiator_to_manager","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_manager","toSide":"left","label":"创建批量传输"}, + {"id":"initiator_to_request1","fromNode":"batch_manager","fromSide":"right","toNode":"batch_request1","toSide":"left","label":"并发发送\n数据块1"}, + {"id":"initiator_to_request2","fromNode":"batch_manager","fromSide":"right","toNode":"batch_request2","toSide":"left","label":"并发发送\n数据块2"}, + {"id":"initiator_to_request3","fromNode":"batch_manager","fromSide":"right","toNode":"batch_request3","toSide":"left","label":"并发发送\n数据块3"}, + {"id":"b5a17c0afede8e4a","fromNode":"data_general_core","fromSide":"right","toNode":"133214da264cfe72","toSide":"bottom"}, + {"id":"2ad5991c43fd6098","fromNode":"data_general_core","fromSide":"right","toNode":"821e415b6438e20d","toSide":"top"}, + {"id":"caa45c92a135042c","fromNode":"data_general_core","fromSide":"right","toNode":"core_functions","toSide":"left"}, + {"id":"09c7b9957992d62d","fromNode":"data_general_core","fromSide":"right","toNode":"b31695207931d96e","toSide":"left"}, + {"id":"adfa1cca1009ff43","fromNode":"data_general_core","fromSide":"right","toNode":"5c4357fc2216ea51","toSide":"left"}, + {"id":"ef995a514a2210bb","fromNode":"5c4357fc2216ea51","fromSide":"right","toNode":"batch_transfer_group","toSide":"top"}, + {"id":"3d79872a234731c0","fromNode":"cache_node_3","fromSide":"bottom","toNode":"batch_transfer_group","toSide":"top"}, + {"id":"9094221953b6c685","fromNode":"write_task_mem","fromSide":"top","toNode":"b0205b4457afeb2b","toSide":"bottom"}, + {"id":"77ec04f5deef7cee","fromNode":"write_task_mem","fromSide":"left","toNode":"1ec171d545e8995d","toSide":"top"}, + {"id":"7b99fb72410f07d9","fromNode":"06d4a92778dd83c8","fromSide":"bottom","toNode":"20145fd68e8aaa75","toSide":"top"}, + {"id":"df9b4bc9170fdec1","fromNode":"20145fd68e8aaa75","fromSide":"right","toNode":"4dbe01dc59cea4c2","toSide":"left"}, + {"id":"61e0637af4beba94","fromNode":"f515ecb9aee18fc7","fromSide":"left","toNode":"4dbe01dc59cea4c2","toSide":"left"}, + {"id":"f7105db89ffabd1e","fromNode":"20145fd68e8aaa75","fromSide":"bottom","toNode":"e2576a54f3f852b3","toSide":"top"}, + {"id":"7504b1b3a99e992c","fromNode":"4dbe01dc59cea4c2","fromSide":"right","toNode":"97d3d9fd7432a861","toSide":"bottom","label":"获取到handle"}, + {"id":"a993a3f4d7b2211d","fromNode":"97d3d9fd7432a861","fromSide":"left","toNode":"e2576a54f3f852b3","toSide":"right"}, + {"id":"a996588f6c59c88f","fromNode":"e2576a54f3f852b3","fromSide":"bottom","toNode":"155106edf5eb3cd7","toSide":"top"}, + {"id":"a42104592fedd4c7","fromNode":"97d3d9fd7432a861","fromSide":"right","toNode":"write_task_mem","toSide":"bottom"}, + {"id":"c45aaa564ae87a7c","fromNode":"97d3d9fd7432a861","fromSide":"right","toNode":"write_task_file","toSide":"bottom"}, + {"id":"write_flow_1","fromNode":"20145fd68e8aaa75","fromSide":"top","toNode":"06d4a92778dd83c8","toSide":"bottom","label":"初始化完成"}, + {"id":"write_flow_2","fromNode":"06d4a92778dd83c8","fromSide":"right","toNode":"f515ecb9aee18fc7","toSide":"left","label":"首个分片写入完成"}, + {"id":"write_flow_5","fromNode":"e2576a54f3f852b3","fromSide":"left","toNode":"155106edf5eb3cd7","toSide":"left","label":"检查完成状态"}, + {"id":"86a2aa913f7bd3d9","fromNode":"223edf4677db9339","fromSide":"bottom","toNode":"06d4a92778dd83c8","toSide":"top"} + ] +} \ No newline at end of file diff --git a/design.canvas b/design.canvas index 6323eab..346eb9d 100755 --- a/design.canvas +++ b/design.canvas @@ -1,75 +1,58 @@ { "nodes":[ - {"id":"cb82b904dab26671","type":"group","x":-3420,"y":-1000,"width":6580,"height":3540,"label":"data"}, - {"id":"batch_transfer_group","type":"group","x":-1580,"y":80,"width":4700,"height":1960,"label":"Batch数据传输实现"}, - {"id":"batch_receiver_group","type":"group","x":60,"y":140,"width":2940,"height":1820,"label":"接收端 [DataGeneral]"}, - {"id":"7a2427112a116cd3","type":"group","x":-3280,"y":120,"width":1464,"height":2340,"label":"WriteSplitDataTaskGroup"}, - {"id":"batch_sender_group","type":"group","x":-1520,"y":444,"width":1340,"height":1596,"label":"写入端 [DataGeneral]"}, - {"id":"d3ff298bf342a238","type":"group","x":-1490,"y":817,"width":1290,"height":1195,"label":"fn batch_transfer"}, - {"id":"data_write_flow","type":"group","x":-1620,"y":-640,"width":2680,"height":520,"label":"数据写入流程"}, - {"id":"storage_write_flow","type":"group","x":-20,"y":-580,"width":1020,"height":400,"label":"存储节点写入流程"}, - {"id":"7127ed217f71f72d","type":"group","x":-3260,"y":1140,"width":1010,"height":375,"label":"fn register_handle("}, - {"id":"97d3d9fd7432a861","type":"text","text":"# WriteSplitDataTaskHandle::submit_split() 实现 [异步发送]\n\n## match write_type {\n- WriteSplitDataType::File => 文件写入任务\n- WriteSplitDataType::Mem => 内存写入任务\n}\n\n## 发送任务 [channel阻塞]\ntx.send(task).await","x":-2209,"y":1120,"width":347,"height":445}, - {"id":"4dbe01dc59cea4c2","type":"text","text":"pub struct WriteSplitDataTaskHandle {\n tx: mpsc::Sender>,\n write_type: WriteSplitDataType,\n}","x":-2572,"y":1660,"width":418,"height":160}, - {"id":"task_pool","type":"text","text":"# 任务池 [handles]\n\n- 收集任务句柄\n- 等待任务完成 [阻塞]\n- 错误聚合","x":-1414,"y":1732,"width":300,"height":260,"color":"5"}, - {"id":"86a8707f54d19c74","type":"text","text":"join all,并返回","x":-1389,"y":1549,"width":250,"height":60}, - {"id":"data_reader","type":"text","text":"# 数据读取器 [DataSource]\n\n- 计算数据范围\n- 读取数据块 [阻塞]\n- 错误传播","x":-970,"y":1163,"width":300,"height":200,"color":"3"}, - {"id":"write_handle_submit","type":"text","text":"# submit_split() [异步发送]\n\n## 执行流程\n1. 根据write_type构造任务\n2. 发送到任务通道\n3. 错误处理和日志\n\n## 阻塞特性\n- File写入: IO阻塞\n- Mem写入: 内存阻塞\n- 通道发送: channel阻塞","x":-2209,"y":1120,"width":347,"height":445,"color":"2"}, - {"id":"general_phase1","type":"text","text":"General阶段1:准备\n- 初始化DataItems\n- 计算数据大小\n- 创建SharedMemHolder","x":-1580,"y":-550,"width":200,"height":100,"color":"1"}, - {"id":"general_phase2","type":"text","text":"General阶段2:调度\n- 生成unique_id\n- 发送调度请求\n- 等待决策返回","x":-1580,"y":-420,"width":200,"height":100,"color":"1"}, - {"id":"general_phase3","type":"text","text":"General阶段3:分发\n- 解析调度决策\n- 创建写入任务组\n- 初始化并发控制","x":-1580,"y":-250,"width":200,"height":100,"color":"1"}, - {"id":"storage_node_3","type":"text","text":"存储节点1","x":-445,"y":-590,"width":150,"height":60,"color":"3"}, - {"id":"concurrency_controller","type":"text","text":"# 并发控制器 [Semaphore]\n\n- 最大并发数: 32\n- 许可获取 [阻塞]\n- 许可释放 [非阻塞]\n- RAII风格管理","x":-970,"y":1536,"width":300,"height":200,"color":"2"}, - {"id":"5009f9e4bcc6ed6c","type":"text","text":"### 加入任务池","x":-920,"y":1902,"width":250,"height":60}, - {"id":"batch_initiator","type":"text","text":"# 发起节点 [DataGeneral]\n\n## call_batch_data()\n- 分割数据块(1MB)\n- 创建有界任务池\n- 建议并发数=3\n- 任务队列控制","x":-1470,"y":488,"width":300,"height":290,"color":"1"}, - {"id":"data_source_interface","type":"text","text":"# DataSource 接口设计\n\n## trait DataSource: Send + Sync + 'static\n```rust\nasync fn size(&self) -> WSResult;\nasync fn read_chunk(&self, offset: usize, size: usize) -> WSResult>;\nfn block_type(&self) -> BatchDataBlockType;\n```\n\n## 实现类型\n1. FileDataSource\n - 文件路径管理\n - 异步IO操作\n - 错误处理\n\n2. MemDataSource\n - Arc<[u8]>共享数据\n - 边界检查\n - 零拷贝优化","x":-1459,"y":864,"width":390,"height":646,"color":"4"}, - {"id":"133214da264cfe72","type":"text","text":"## struct DataGeneral\n- 提供数据读写接口\n- 管理元数据\n- 协调各子模块功能\n- 错误处理和恢复\n- 资源管理","x":-2780,"y":-720,"width":340,"height":214,"color":"4"}, - {"id":"b31695207931d96e","type":"text","text":"## fn get_or_del_data\n- 数据检索和删除\n- 资源清理\n- 缓存一致性\n- 并发访问控制","x":-2310,"y":-662,"width":330,"height":156,"color":"4"}, - {"id":"core_functions","type":"text","text":"## fn write_data\n- 同步/异步写入\n- 数据完整性保证\n- 分片并行写入\n- 缓存节点同步\n- 错误重试机制","x":-2425,"y":-467,"width":280,"height":275,"color":"4"}, - {"id":"821e415b6438e20d","type":"text","text":"## struct DataSplit\n- 数据分片管理\n- 分片信息维护\n- 分片操作协调\n- 存储节点分配\n- 局部性优化","x":-2952,"y":-132,"width":342,"height":158,"color":"4"}, - {"id":"data_item","type":"text","text":"# 数据项处理\n\n## enum WriteSplitDataTaskGroup\n- 管理数据分片写入任务组\n- 分片合并优化\n- 状态同步\n- 并行控制\n","x":-3010,"y":140,"width":450,"height":280,"color":"3"}, - {"id":"b0205b4457afeb2b","type":"text","text":"## SharedMemOwnedAccess\n- 共享内存所有权控制\n- 访问安全保证\n- 生命周期管理","x":-2350,"y":202,"width":364,"height":178}, - {"id":"write_task_mem","type":"text","text":"# ToMem 写入流程 [阻塞执行]\n\n## WriteSplitDataTaskGroup::ToMem\n- shared_mem: SharedMemHolder\n- tasks: Vec>\n- rx: mpsc::Receiver>\n- expected_size: usize\n- current_size: usize\n\n## 操作流程 [内存写入阻塞]\n1. shared_mem.write(offset, data)\n2. 错误记录:\n tracing::error!(\"Failed to write memory data at offset {}\")\n","x":-2670,"y":486,"width":400,"height":436,"color":"2"}, - {"id":"write_task_file","type":"text","text":"# ToFile 写入流程 [阻塞执行]\n\n## WriteSplitDataTaskGroup::ToFile\n- file_path: PathBuf\n- tasks: Vec>\n- rx: mpsc::Receiver>\n- expected_size: usize\n- current_size: usize\n\n## 操作流程 [文件IO阻塞]\n1. OpenOptions::new()\n .create(true)\n .write(true)\n2. seek(offset)\n3. write_all(data)\n4. 错误记录:\n tracing::error!(\"Failed to write file data at offset {}\")\n","x":-2236,"y":504,"width":400,"height":400,"color":"1"}, - {"id":"02d1bafb13062e3b","type":"text","text":"### batch 接口要和 write作区分\n#### batch是主动推送完整数据\n#### write是将数据写入到系统\n\n- wirte中也会使用batch接口用来在写入之前并行推送缓存","x":-1514,"y":142,"width":445,"height":228}, - {"id":"data_general_core","type":"text","text":"# 数据管理核心模块\n- 数据流向控制\n- 并行结构管理\n- 错误处理链\n- 资源管理","x":-3070,"y":-446,"width":330,"height":234,"color":"4"}, - {"id":"1ec171d545e8995d","type":"text","text":"## SharedMemHolder\n- 共享内存数据访问\n- 资源自动管理","x":-3105,"y":754,"width":300,"height":150}, - {"id":"06d4a92778dd83c8","type":"text","text":"# 第一个分片开始写入 [阻塞执行]\n\n## 初始化写入\nfn start_first_split(data: Vec) -> Result<(), WSError> {\n let task = self.build_task(data, 0);\n self.tasks.push(task);\n self.current_size += data.len();\n Ok(())\n}\n\n## 错误处理\n- 写入失败记录日志\n- 返回具体错误类型","x":-3240,"y":1161,"width":455,"height":310}, - {"id":"f515ecb9aee18fc7","type":"text","text":"# 后续写入 [异步执行]\n\n## 状态管理\n- 写入任务追踪\n- 并发控制\n- 写入顺序保证","x":-2572,"y":1178,"width":302,"height":275}, - {"id":"e2576a54f3f852b3","type":"text","text":"# process_tasks() 实现 [阻塞循环]\n\n## 循环处理 [select阻塞]\n1. try_complete() 检查完成状态\n2. tokio::select! {\n - rx.recv() => 接收新任务\n - futures::future::select_all(tasks) => 等待任务完成\n}\n\n## 完成条件\n- current_size >= expected_size\n- 返回 proto::DataItem","x":-3055,"y":1780,"width":377,"height":460}, - {"id":"155106edf5eb3cd7","type":"text","text":"# try_complete() 实现 [同步检查]\n\n## 返回 Option\n- ToFile => proto::DataItem::new_file_data()\n- ToMem => proto::DataItem::new_mem_data()","x":-3094,"y":2260,"width":455,"height":180}, - {"id":"223edf4677db9339","type":"text","text":"pub struct WriteSplitDataManager {\n // 只存储任务句柄\n handles: DashMap,\n}","x":-3110,"y":960,"width":610,"height":140}, - {"id":"20145fd68e8aaa75","type":"text","text":"# 构造 [同步初始化]\n\n## 任务组初始化\nfn new_task_group(type_: WriteSplitDataType) -> Self {\n let (tx, rx) = mpsc::channel(32);\n Self {\n type_,\n tasks: Vec::new(),\n rx,\n expected_size: 0,\n current_size: 0,\n }\n}\n\n## 参数验证\n- 检查写入类型\n- 验证初始参数","x":-3205,"y":1540,"width":450,"height":220}, - {"id":"cache_node_3","type":"text","text":"缓存节点3","x":-440,"y":-240,"width":150,"height":60,"color":"5"}, - {"id":"storage_node_5","type":"text","text":"存储节点3","x":-440,"y":-440,"width":150,"height":60,"color":"3"}, - {"id":"batch_request3","type":"text","text":"# BatchDataRequest(3)\n- request_id\n- block_type\n- block_index: 2\n- data","x":-160,"y":784,"width":250,"height":120,"color":"2"}, - {"id":"f8ade98240211305","type":"text","text":"### [tokio::spawn]\n","x":-945,"y":1784,"width":250,"height":60}, - {"id":"9fa1c2f8d08978bb","type":"text","text":"## 判断还有分片?","x":-935,"y":1404,"width":230,"height":80,"color":"3"}, - {"id":"rpc_caller","type":"text","text":"# RPC调用器 [view.rpc_call]\n\n- 构造请求\n- 发送数据 [阻塞]\n- 等待响应 [阻塞]\n- 错误处理","x":-520,"y":1267,"width":300,"height":200,"color":"4"}, - {"id":"parallel_task","type":"text","text":"# 并行任务 \n- 持有信号量许可\n- 执行RPC调用\n- 处理响应\n- 自动释放许可\n\n[独立执行]","x":-520,"y":1579,"width":300,"height":200,"color":"6"}, - {"id":"batch_transfer_main","type":"text","text":"# batch_transfer [主控制器]\n\n- 初始化数据源\n- 创建并发控制器\n- 启动传输任务\n- 等待任务完成\n\n[阻塞执行]","x":-970,"y":837,"width":370,"height":294,"color":"1"}, - {"id":"master_node","type":"text","text":"Master节点 [DataMaster]\n- schedule_data()\n1. 生成DataSetMeta\n2. 创建DataSplits\n3. 分配存储节点\n4. 返回调度决策","x":-1120,"y":-550,"width":200,"height":160,"color":"2"}, - {"id":"storage_group","type":"text","text":"存储节点组","x":-640,"y":-550,"width":150,"height":60,"color":"3"}, - {"id":"cache_group","type":"text","text":"缓存节点组","x":-640,"y":-350,"width":150,"height":60,"color":"5"}, - {"id":"cache_node_1","type":"text","text":"缓存节点1","x":-440,"y":-400,"width":150,"height":60,"color":"5"}, - {"id":"cache_node_2","type":"text","text":"缓存节点2","x":-440,"y":-320,"width":150,"height":60,"color":"5"}, - {"id":"storage_node_1","type":"text","text":"存储节点1\n接收层:\n- 接收分片请求\n- 版本号验证\n- 数据完整性校验\n写入任务层:\n- 分片范围验证\n- 并发写入控制\n- 错误重试机制\n本地存储层:\n- 数据持久化\n- 版本管理\n- 空间回收\n结果返回:\n- 写入状态\n- 远程版本号\n- 错误信息","x":20,"y":-540,"width":200,"height":280,"color":"1"}, - {"id":"write_task_1","type":"text","text":"写入任务1\n- 分片范围验证\n- 数据完整性检查\n- 并发写入控制\n- 错误重试","x":360,"y":-540,"width":200,"height":120,"color":"2"}, - {"id":"storage_node_4","type":"text","text":"存储节点2","x":-440,"y":-520,"width":150,"height":60,"color":"3"}, - {"id":"write_result_1","type":"text","text":"写入结果1\n- 成功/失败\n- 远程版本号\n- 错误信息","x":620,"y":-320,"width":200,"height":100,"color":"4"}, - {"id":"batch_request2","type":"text","text":"# BatchDataRequest(2)\n- request_id\n- block_type\n- block_index: 1\n- data","x":-160,"y":664,"width":250,"height":120,"color":"2"}, - {"id":"batch_request1","type":"text","text":"# BatchDataRequest(1)\n- request_id\n- block_type\n- block_index: 0\n- data","x":-160,"y":424,"width":250,"height":240,"color":"2"}, - {"id":"handle_lookup","type":"text","text":"# Handle查找 [条件分支]\n\n## batch_receive_states.get()\n- 已存在: 验证version\n- 不存在: 创建新handle\n","x":395,"y":765,"width":410,"height":210,"color":"2"}, - {"id":"task_spawn_flow","type":"text","text":"# 任务生成流程 [异步执行]\n\n## 1. 提交分片数据handle.submit_split\n```rust\nstate.handle.submit_split(\n request.block_idx * DEFAULT_BLOCK_SIZE,\n request.data\n).await?\n```\n\n## 2. 更新响应器shared.update_responsor\n```rust\nstate.shared.update_responsor(responsor).await;\n```\nupdate时,旧的reponsor要先返回","x":480,"y":1106,"width":405,"height":538,"color":"3"}, - {"id":"e156c034cc9ec24f","type":"text","text":"## responsor send","x":595,"y":1755,"width":250,"height":60}, - {"id":"completion_monitor","type":"text","text":"# 完成监控 [独立任务]\n\n## 1. 等待写入完成\n```rust\nhandle.wait_all_tasks().await?;\n```\n\n## 2. 发送最终响应\n```rust\nif let Some(final_responsor) = \n shared.get_final_responsor().await {\n final_responsor.response(Ok(()))\n .await?;\n}\n```\n\n## 3. 清理状态\n```rust\nbatch_receive_states.remove(&unique_id);\n```","x":1635,"y":1335,"width":445,"height":571,"color":"4"}, - {"id":"rpc_handle_batch_data","type":"text","text":"# DataGeneral::rpc_handle_batch_data\n\n## 处理流程","x":150,"y":478,"width":570,"height":118,"color":"1"}, - {"id":"2dbde64bc1dbac6a","type":"text","text":"## 响应任务(独立任务)","x":1760,"y":1132,"width":365,"height":110}, - {"id":"state_manager","type":"text","text":"# 状态管理器 [DataGeneral.batch_receive_states]\n\n## 核心数据结构\n```rust\nDashMap\n```\n- BatchReceiveState\n\t- handle: WriteSplitDataTaskHandle\n\t- shared: SharedWithBatchHandler\n## 生命周期\n- 创建: 首次接收分片\n- 更新: 每次接收分片\n- 删除: 写入完成","x":840,"y":171,"width":640,"height":486,"color":"1"}, - {"id":"write_task_handle","type":"text","text":"# 写入任务句柄 [WriteSplitDataTaskHandle]\n\n## 关键对象\n```rust\npub struct WriteSplitDataTaskHandle {\n tx: mpsc::Sender>,\n write_type: WriteSplitDataType,\n}\n```\n\n## 核心函数\n```rust\nasync fn submit_split(\n &self,\n offset: usize,\n data: Vec\n) -> WSResult<()>\n```","x":956,"y":765,"width":505,"height":530,"color":"2"}, - {"id":"task_spawner","type":"text","text":"# tokio::spawn 响应任务\n\n```\n\n## 核心函数\n```rust\nfn spawn_write_task(\n data: Vec,\n offset: usize\n) -> JoinHandle<()>\n```","x":1008,"y":1385,"width":400,"height":400,"color":"3"}, - {"id":"batch_data_constants","type":"text","text":"# 批量数据常量定义\n\n## 数据块大小\n```rust\n/// 默认数据块大小 (4MB)\nconst DEFAULT_BLOCK_SIZE: usize = 4 * 1024 * 1024;\n```\n\n## 数据分片索引\n```rust\n/// 数据分片在整体数据中的偏移量\npub type DataSplitIdx = usize;\n```","x":-160,"y":1052,"width":400,"height":380,"color":"4"}, - {"id":"batch_data_request","type":"text","text":"# Batch RPC Proto定义\n\n## 数据块类型\nenum BatchDataBlockType {\n MEMORY = 0; // 内存数据块\n FILE = 1; // 文件数据块\n}\n\n## 操作类型\nenum DataOpeType {\n Read = 0;\n Write = 1;\n}\n\n## 请求ID\nmessage BatchRequestId {\n uint32 node_id = 1; // 节点ID\n uint64 sequence = 2; // 原子自增序列号\n}\n\n## 请求消息\nmessage BatchDataRequest {\n BatchRequestId request_id = 1; // 请求唯一标识(节点ID + 序列号)\n BatchDataBlockType block_type = 2; // 数据块类型(文件/内存)\n uint32 block_index = 3; // 数据块索引\n bytes data = 4; // 数据块内容\n DataOpeType operation = 5; // 操作类型\n bytes unique_id = 6; // 数据唯一标识\n uint64 version = 7; // 数据版本\n}\n\n## 响应消息\nmessage BatchDataResponse {\n BatchRequestId request_id = 1; // 对应请求ID\n bool success = 2; // 处理状态\n string error_message = 3; // 错误信息\n uint64 version = 4; // 处理后的版本\n}\n","x":-155,"y":1536,"width":490,"height":552,"color":"2"} + {"id":"cb82b904dab26671","type":"group","x":-3400,"y":-960,"width":4820,"height":3520,"label":"data"}, + {"id":"batch_transfer_group","type":"group","x":-1560,"y":120,"width":2940,"height":1900,"label":"Batch数据传输实现"}, + {"id":"7a2427112a116cd3","x":-3260,"y":160,"width":1464,"height":2340,"type":"group","label":"WriteSplitDataTaskGroup"}, + {"id":"batch_receiver_group","type":"group","x":80,"y":200,"width":1240,"height":1560,"label":"接收端 [DataGeneral]"}, + {"id":"data_write_flow","type":"group","x":-1600,"y":-600,"width":2680,"height":520,"label":"数据写入流程"}, + {"id":"batch_sender_group","type":"group","x":-1500,"y":200,"width":1320,"height":1000,"label":"写入端 [DataGeneral]"}, + {"id":"c03f87b1d9551659","type":"group","x":180,"y":282,"width":1110,"height":878,"label":"DataGeneral::rpc_handle_batch_data"}, + {"id":"storage_write_flow","type":"group","x":0,"y":-540,"width":1020,"height":400,"label":"存储节点写入流程"}, + {"id":"7127ed217f71f72d","type":"group","x":-3240,"y":1180,"width":1010,"height":375,"label":"fn register_handle("}, + {"id":"general_phase2","type":"text","text":"General阶段2:调度\n- 生成unique_id\n- 发送调度请求\n- 等待决策返回","x":-1560,"y":-380,"width":200,"height":100,"color":"1"}, + {"id":"storage_node_3","type":"text","text":"存储节点1","x":-425,"y":-550,"width":150,"height":60,"color":"3"}, + {"id":"general_phase1","type":"text","text":"General阶段1:准备\n- 初始化DataItems\n- 计算数据大小\n- 创建SharedMemHolder","x":-1560,"y":-510,"width":200,"height":100,"color":"1"}, + {"id":"general_phase3","type":"text","text":"General阶段3:分发\n- 解析调度决策\n- 创建写入任务组\n- 初始化并发控制","x":-1560,"y":-210,"width":200,"height":100,"color":"1"}, + {"id":"b31695207931d96e","type":"text","text":"## fn get_or_del_data\n- 数据检索和删除\n- 资源清理\n- 缓存一致性\n- 并发访问控制","x":-2290,"y":-622,"width":330,"height":156,"color":"4"}, + {"id":"133214da264cfe72","type":"text","text":"## struct DataGeneral\n- 提供数据读写接口\n- 管理元数据\n- 协调各子模块功能\n- 错误处理和恢复\n- 资源生命周期","x":-2760,"y":-680,"width":340,"height":214,"color":"4"}, + {"id":"core_functions","type":"text","text":"## fn write_data\n- 同步/异步写入\n- 数据完整性保证\n- 分片并行写入\n- 缓存节点同步\n- 错误重试机制","x":-2405,"y":-427,"width":280,"height":275,"color":"4"}, + {"id":"821e415b6438e20d","type":"text","text":"## struct DataSplit\n- 数据分片管理\n- 分片信息维护\n- 分片操作协调\n- 存储节点分配\n- 局部性优化","x":-2932,"y":-92,"width":342,"height":158,"color":"4"}, + {"id":"1ec171d545e8995d","type":"text","text":"## SharedMemHolder\n- 共享内存数据访问\n- 资源自动管理","x":-3085,"y":794,"width":300,"height":150}, + {"id":"223edf4677db9339","type":"text","text":"pub struct WriteSplitDataManager {\n // 只存储任务句柄\n handles: DashMap,\n}","x":-3090,"y":1000,"width":610,"height":140}, + {"id":"97d3d9fd7432a861","type":"text","text":"# WriteSplitDataTaskHandle::submit_split() 实现 [异步发送]\n\n## match write_type {\n- WriteSplitDataType::File => 文件写入任务\n- WriteSplitDataType::Mem => 内存写入任务\n}\n\n## 发送任务 [channel阻塞]\ntx.send(task).await","x":-2189,"y":1160,"width":347,"height":445}, + {"id":"e2576a54f3f852b3","type":"text","text":"# process_tasks() 实现 [阻塞循环]\n\n## 循环处理 [select阻塞]\n1. try_complete() 检查完成状态\n2. tokio::select! {\n - rx.recv() => 接收新任务\n - futures::future::select_all(tasks) => 等待任务完成\n}\n\n## 完成条件\n- current_size >= expected_size\n- 返回 proto::DataItem","x":-3035,"y":1820,"width":377,"height":460}, + {"id":"155106edf5eb3cd7","type":"text","text":"# try_complete() 实现 [同步检查]\n\n## 返回 Option\n- ToFile => proto::DataItem::new_file_data()\n- ToMem => proto::DataItem::new_mem_data()","x":-3074,"y":2300,"width":455,"height":180}, + {"id":"data_general_core","type":"text","text":"# 数据管理核心模块\n- 数据流向控制\n- 并行结构管理\n- 错误处理链\n- 资源管理","x":-3050,"y":-406,"width":330,"height":234,"color":"4"}, + {"id":"data_item","type":"text","text":"# 数据项处理\n\n## enum WriteSplitDataTaskGroup\n- 管理数据分片写入任务组\n- 分片合并优化\n- 状态同步\n- 并行控制\n","x":-2990,"y":180,"width":450,"height":280,"color":"3"}, + {"id":"f515ecb9aee18fc7","type":"text","text":"# 后续写入 [异步执行]\n\n## 状态管理\n- 写入任务追踪\n- 并发控制\n- 写入顺序保证","x":-2552,"y":1218,"width":302,"height":275}, + {"id":"06d4a92778dd83c8","type":"text","text":"# 第一个分片开始写入 [阻塞执行]\n\n## 初始化写入\nfn start_first_split(data: Vec) -> Result<(), WSError> {\n let task = self.build_task(data, 0);\n self.tasks.push(task);\n self.current_size += data.len();\n Ok(())\n}\n\n## 错误处理\n- 写入失败记录日志\n- 返回具体错误类型","x":-3220,"y":1201,"width":455,"height":310}, + {"id":"write_task_file","type":"text","text":"# ToFile 写入流程 [阻塞执行]\n\n## WriteSplitDataTaskGroup::ToFile\n- file_path: PathBuf\n- tasks: Vec>\n- rx: mpsc::Receiver>\n- expected_size: usize\n- current_size: usize\n\n## 操作流程 [文件IO阻塞]\n1. OpenOptions::new()\n .create(true)\n .write(true)\n2. seek(offset)\n3. write_all(data)\n4. 错误记录:\n tracing::error!(\"Failed to write file data at offset {}\")\n","x":-2216,"y":544,"width":400,"height":400,"color":"1"}, + {"id":"write_task_mem","type":"text","text":"# ToMem 写入流程 [阻塞执行]\n\n## WriteSplitDataTaskGroup::ToMem\n- shared_mem: SharedMemHolder\n- tasks: Vec>\n- rx: mpsc::Receiver>\n- expected_size: usize\n- current_size: usize\n\n## 操作流程 [内存写入阻塞]\n1. shared_mem.write(offset, data)\n2. 错误记录:\n tracing::error!(\"Failed to write memory data at offset {}\")\n","x":-2650,"y":526,"width":400,"height":436,"color":"2"}, + {"id":"b0205b4457afeb2b","type":"text","text":"## SharedMemOwnedAccess\n- 共享内存所有权控制\n- 访问安全保证\n- 生命周期管理","x":-2330,"y":242,"width":364,"height":178}, + {"id":"4dbe01dc59cea4c2","type":"text","text":"pub struct WriteSplitDataTaskHandle {\n tx: mpsc::Sender>,\n write_type: WriteSplitDataType,\n}","x":-2552,"y":1700,"width":418,"height":160}, + {"id":"20145fd68e8aaa75","type":"text","text":"# 构造 [同步初始化]\n\n## 任务组初始化\nfn new_task_group(type_: WriteSplitDataType) -> Self {\n let (tx, rx) = mpsc::channel(32);\n Self {\n type_,\n tasks: Vec::new(),\n rx,\n expected_size: 0,\n current_size: 0,\n }\n}\n\n## 参数验证\n- 检查写入类型\n- 验证初始参数","x":-3185,"y":1580,"width":450,"height":220}, + {"id":"batch_manager","type":"text","text":"# BatchTransfer\n\n## 核心字段\n- unique_id: Vec\n- version: u64\n- block_type: BatchDataBlockType\n- total_blocks: u32\n- data_sender: mpsc::Sender\n- write_task: JoinHandle\n\n## 主要方法\n1. new()\n - 创建数据传输channel\n - 计算数据分片\n - 启动写入任务\n2. add_block()\n - 通过channel发送数据块\n - 检查完成状态\n3. complete()\n - 等待写入任务完成\n - 发送结果通知","x":-1100,"y":744,"width":300,"height":400,"color":"1"}, + {"id":"storage_node_4","type":"text","text":"存储节点2","x":-420,"y":-480,"width":150,"height":60,"color":"3"}, + {"id":"storage_node_5","type":"text","text":"存储节点3","x":-420,"y":-400,"width":150,"height":60,"color":"3"}, + {"id":"master_node","type":"text","text":"Master节点 [DataMaster]\n- schedule_data()\n1. 生成DataSetMeta\n2. 创建DataSplits\n3. 分配存储节点\n4. 返回调度决策","x":-1100,"y":-510,"width":200,"height":160,"color":"2"}, + {"id":"storage_group","type":"text","text":"存储节点组","x":-620,"y":-510,"width":150,"height":60,"color":"3"}, + {"id":"cache_group","type":"text","text":"缓存节点组","x":-620,"y":-310,"width":150,"height":60,"color":"5"}, + {"id":"cache_node_1","type":"text","text":"缓存节点1","x":-420,"y":-360,"width":150,"height":60,"color":"5"}, + {"id":"cache_node_2","type":"text","text":"缓存节点2","x":-420,"y":-280,"width":150,"height":60,"color":"5"}, + {"id":"cache_node_3","type":"text","text":"缓存节点3","x":-420,"y":-200,"width":150,"height":60,"color":"5"}, + {"id":"storage_node_1","type":"text","text":"存储节点1\n接收层:\n- 接收分片请求\n- 版本号验证\n- 数据完整性校验\n写入任务层:\n- 分片范围验证\n- 并发写入控制\n- 错误重试机制\n本地存储层:\n- 数据持久化\n- 版本管理\n- 空间回收\n结果返回:\n- 写入状态\n- 远程版本号\n- 错误信息","x":40,"y":-500,"width":200,"height":280,"color":"1"}, + {"id":"write_task_1","type":"text","text":"写入任务1\n- 分片范围验证\n- 数据完整性检查\n- 并发写入控制\n- 错误重试","x":380,"y":-500,"width":200,"height":120,"color":"2"}, + {"id":"write_result_1","type":"text","text":"写入结果1\n- 成功/失败\n- 远程版本号\n- 错误信息","x":640,"y":-280,"width":200,"height":100,"color":"4"}, + {"id":"batch_request1","type":"text","text":"# BatchDataRequest(1)\n- request_id\n- block_type\n- block_index: 0\n- data","x":-140,"y":290,"width":250,"height":240,"color":"2"}, + {"id":"batch_request2","type":"text","text":"# BatchDataRequest(2)\n- request_id\n- block_type\n- block_index: 1\n- data","x":-110,"y":620,"width":250,"height":120,"color":"2"}, + {"id":"batch_request3","type":"text","text":"# BatchDataRequest(3)\n- request_id\n- block_type\n- block_index: 2\n- data","x":-140,"y":824,"width":250,"height":120,"color":"2"}, + {"id":"write_split_init","type":"text","text":"1. 创建WriteSplitDataTaskGroup\n```rust\nlet (group, handle) = WriteSplitDataTaskGroup::new(\n unique_id,\n splits,\n block_type,\n).await?;\n```\n- unique_id: 任务唯一标识\n- splits: 数据分片范围\n- block_type: 写入类型(File/Mem)","x":670,"y":376,"width":600,"height":310,"color":"1"}, + {"id":"fac5077e07b5a23e","type":"text","text":"1. 使用WriteSplitDataTaskManager\n查询handle","x":260,"y":302,"width":300,"height":160,"color":"2"}, + {"id":"write_split_handle","type":"text","text":"2. 使用WriteSplitDataTaskHandle\n```rust\nhandle.submit_split(\n DataSplitIdx { offset },\n data_item\n).await;\n```\n- 通过handle异步提交写入任务\n- 可以并发提交多个分片\n- handle可以跨线程使用","x":230,"y":533,"width":360,"height":306,"color":"2"}, + {"id":"write_split_complete","type":"text","text":"\nprocess_tasks(独立task)\n- 循环等待新任务\n- 执行写入操作\n- 检查完成状态","x":860,"y":1360,"width":380,"height":306,"color":"4"}, + {"id":"bd7d0a299fe215df","x":230,"y":948,"width":310,"height":156,"type":"text","text":"struct SharedWithBatchHandler\n记录最新的request responsor\n\n旧的responsor直接返回"}, + {"id":"batch_initiator","type":"text","text":"# 发起节点 [DataGeneral]\n\n## call_batch_data()\n- 分割数据块(1MB)\n- 创建有界任务池\n- 建议并发数=3\n- 任务队列控制","x":-1400,"y":310,"width":300,"height":300,"color":"1"}, + {"id":"864d06859ca25962","type":"text","text":"spawn 一个独立task,调用handle的等待结束接口\n\n结束之后,从share状态里取出最新responsor,响应完整接收成功信息","x":640,"y":944,"width":300,"height":176,"color":"2"} ], "edges":[ {"id":"master_to_phase2","fromNode":"master_node","fromSide":"left","toNode":"general_phase2","toSide":"right","label":"调度决策\n- version\n- splits\n- nodes"}, @@ -82,6 +65,10 @@ {"id":"cache_to_nodes","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_1","toSide":"left"}, {"id":"cache_to_nodes2","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_2","toSide":"left"}, {"id":"cache_to_nodes3","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_3","toSide":"left"}, + {"id":"initiator_to_manager","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_manager","toSide":"left","label":"创建批量传输"}, + {"id":"initiator_to_request1","fromNode":"batch_manager","fromSide":"right","toNode":"batch_request1","toSide":"left","label":"并发发送\n数据块1"}, + {"id":"initiator_to_request2","fromNode":"batch_manager","fromSide":"right","toNode":"batch_request2","toSide":"left","label":"并发发送\n数据块2"}, + {"id":"initiator_to_request3","fromNode":"batch_manager","fromSide":"right","toNode":"batch_request3","toSide":"left","label":"并发发送\n数据块3"}, {"id":"b5a17c0afede8e4a","fromNode":"data_general_core","fromSide":"right","toNode":"133214da264cfe72","toSide":"bottom"}, {"id":"2ad5991c43fd6098","fromNode":"data_general_core","fromSide":"right","toNode":"821e415b6438e20d","toSide":"top"}, {"id":"caa45c92a135042c","fromNode":"data_general_core","fromSide":"right","toNode":"core_functions","toSide":"left"}, @@ -102,34 +89,15 @@ {"id":"write_flow_2","fromNode":"06d4a92778dd83c8","fromSide":"right","toNode":"f515ecb9aee18fc7","toSide":"left","label":"首个分片写入完成"}, {"id":"write_flow_5","fromNode":"e2576a54f3f852b3","fromSide":"left","toNode":"155106edf5eb3cd7","toSide":"left","label":"检查完成状态"}, {"id":"86a2aa913f7bd3d9","fromNode":"223edf4677db9339","fromSide":"bottom","toNode":"06d4a92778dd83c8","toSide":"top"}, - {"id":"a99c309f19fd9853","fromNode":"batch_request1","fromSide":"right","toNode":"rpc_handle_batch_data","toSide":"left"}, - {"id":"batch_data_flow2","fromNode":"batch_data_constants","fromSide":"top","toNode":"batch_request3","toSide":"bottom","label":"使用常量"}, - {"id":"5e772afc67478d04","fromNode":"rpc_handle_batch_data","fromSide":"bottom","toNode":"handle_lookup","toSide":"top"}, - {"id":"concurrency_to_task","fromNode":"concurrency_controller","fromSide":"bottom","toNode":"f8ade98240211305","toSide":"top"}, - {"id":"task_to_rpc","fromNode":"parallel_task","fromSide":"top","toNode":"rpc_caller","toSide":"bottom","label":"调用"}, - {"id":"213831c4b82c9e93","fromNode":"data_source_interface","fromSide":"right","toNode":"data_reader","toSide":"left"}, - {"id":"7218875ebe7967fa","fromNode":"batch_transfer_main","fromSide":"bottom","toNode":"data_reader","toSide":"top"}, - {"id":"4b20152fe7211934","fromNode":"data_reader","fromSide":"bottom","toNode":"9fa1c2f8d08978bb","toSide":"top"}, - {"id":"4da12698f8ee3b63","fromNode":"rpc_caller","fromSide":"top","toNode":"batch_request3","toSide":"left"}, - {"id":"f4671fc434a3d0e1","fromNode":"f8ade98240211305","fromSide":"bottom","toNode":"5009f9e4bcc6ed6c","toSide":"top","label":"\n"}, - {"id":"9f748faecadaaa42","fromNode":"f8ade98240211305","fromSide":"right","toNode":"parallel_task","toSide":"left"}, - {"id":"8115e7d6d539f0c0","fromNode":"5009f9e4bcc6ed6c","fromSide":"right","toNode":"data_reader","toSide":"right"}, - {"id":"9e8cb09dfe630443","fromNode":"9fa1c2f8d08978bb","fromSide":"bottom","toNode":"concurrency_controller","toSide":"top"}, - {"id":"d95b89e25235928f","fromNode":"9fa1c2f8d08978bb","fromSide":"left","toNode":"86a8707f54d19c74","toSide":"right"}, - {"id":"9debe9b97cdaf245","fromNode":"86a8707f54d19c74","fromSide":"bottom","toNode":"task_pool","toSide":"top"}, - {"id":"a63472bc8934c7f9","fromNode":"5009f9e4bcc6ed6c","fromSide":"left","toNode":"task_pool","toSide":"right"}, - {"id":"f3ca63243b2c22f7","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_transfer_main","toSide":"left"}, - {"id":"handle_to_spawner","fromNode":"write_task_handle","fromSide":"bottom","toNode":"task_spawner","toSide":"top","label":"tokio::spawn()"}, - {"id":"lookup_to_submit","fromNode":"handle_lookup","fromSide":"right","toNode":"write_task_handle","toSide":"left","label":"\n"}, - {"id":"9abc95f005b8b2d8","fromNode":"task_spawner","fromSide":"right","toNode":"2dbde64bc1dbac6a","toSide":"left"}, - {"id":"e6bd3dfca32e245b","fromNode":"handle_lookup","fromSide":"bottom","toNode":"task_spawn_flow","toSide":"top"}, - {"id":"3fca8aa5c568a44d","fromNode":"task_spawner","fromSide":"left","toNode":"task_spawn_flow","toSide":"right"}, - {"id":"0a095928ebb7ac26","fromNode":"2dbde64bc1dbac6a","fromSide":"bottom","toNode":"completion_monitor","toSide":"top"}, - {"id":"dcf437aa83674d1a","fromNode":"completion_monitor","fromSide":"left","toNode":"e156c034cc9ec24f","toSide":"right"}, - {"id":"7ae0cf5ea0bc0b06","fromNode":"task_spawn_flow","fromSide":"bottom","toNode":"e156c034cc9ec24f","toSide":"top"}, - {"id":"49b65724e2a3b08f","fromNode":"e156c034cc9ec24f","fromSide":"left","toNode":"batch_request3","toSide":"right"}, - {"id":"lookup_to_state","fromNode":"handle_lookup","fromSide":"top","toNode":"state_manager","toSide":"bottom","label":"查找/创建"}, - {"id":"monitor_to_state","fromNode":"completion_monitor","fromSide":"right","toNode":"state_manager","toSide":"bottom","label":"清理"}, - {"id":"facc3fcfb55cf19d","fromNode":"batch_data_request","fromSide":"top","toNode":"batch_request3","toSide":"bottom"} + {"id":"write_1_4","fromNode":"write_split_init","fromSide":"bottom","toNode":"write_split_complete","toSide":"top","label":"等待完成"}, + {"id":"a99c309f19fd9853","fromNode":"batch_request1","fromSide":"right","toNode":"fac5077e07b5a23e","toSide":"left"}, + {"id":"90a20648ba7c7b0d","fromNode":"fac5077e07b5a23e","fromSide":"right","toNode":"write_split_init","toSide":"left"}, + {"id":"c8e5b437e8d768ef","fromNode":"write_split_init","fromSide":"top","toNode":"fac5077e07b5a23e","toSide":"right","label":"插入handle"}, + {"id":"e9443a3b677ce562","fromNode":"fac5077e07b5a23e","fromSide":"bottom","toNode":"write_split_handle","toSide":"top"}, + {"id":"aac9c2ea6e65a686","fromNode":"batch_request2","fromSide":"right","toNode":"fac5077e07b5a23e","toSide":"left"}, + {"id":"886cbf70f878e962","fromNode":"batch_request3","fromSide":"right","toNode":"fac5077e07b5a23e","toSide":"left"}, + {"id":"612e4d1938f911b0","fromNode":"write_split_handle","fromSide":"right","toNode":"write_split_init","toSide":"left","label":"提交分片"}, + {"id":"dbff6534cbb03fce","fromNode":"864d06859ca25962","fromSide":"left","toNode":"bd7d0a299fe215df","toSide":"right"}, + {"id":"9c31b6c98bcb3875","fromNode":"batch_request3","fromSide":"right","toNode":"bd7d0a299fe215df","toSide":"top","label":"记录responsor"} ] } \ No newline at end of file diff --git a/design.canvas.tmp.20250206220621 b/design.canvas.tmp.20250206220621 new file mode 100644 index 0000000..1c5b83a --- /dev/null +++ b/design.canvas.tmp.20250206220621 @@ -0,0 +1,78 @@ +{ + "nodes":[ + {"id":"cb82b904dab26671","type":"group","x":-1600,"y":-680,"width":2780,"height":2200,"label":"data"}, + {"id":"core_module_group","type":"group","x":-1600,"y":-680,"width":1000,"height":780,"label":"数据管理核心模块"}, + {"id":"data_write_flow","type":"group","x":-380,"y":140,"width":1520,"height":460,"label":"数据写入流程"}, + {"id":"batch_transfer_group","type":"group","x":-740,"y":640,"width":1880,"height":820,"label":"Batch数据传输实现"}, + {"id":"parallel_group","type":"group","x":-740,"y":1500,"width":1880,"height":600,"label":"并发执行结构"}, + {"id":"storage_write_flow","type":"group","x":-380,"y":-300,"width":1520,"height":400,"label":"存储节点写入流程"}, + {"id":"133214da264cfe72","type":"text","text":"## struct DataGeneral\n- 提供数据读写接口\n- 管理元数据\n- 协调各子模块功能\n- 错误处理和恢复\n- 资源生命周期","x":-1200,"y":-660,"width":340,"height":214,"color":"4"}, + {"id":"core_functions","type":"text","text":"## fn write_data\n- 同步/异步写入\n- 数据完整性保证\n- 分片并行写入\n- 缓存节点同步\n- 错误重试机制","x":-845,"y":-407,"width":280,"height":275,"color":"4"}, + {"id":"data_general_core","type":"text","text":"# 数据管理核心模块\n- 数据流向控制\n- 并行结构管理\n- 错误处理链\n- 资源管理","x":-1403,"y":-339,"width":330,"height":100,"color":"4"}, + {"id":"821e415b6438e20d","type":"text","text":"## struct DataSplit\n- 数据分片管理\n- 分片信息维护\n- 分片操作协调\n- 存储节点分配\n- 局部性优化","x":-1415,"y":-53,"width":342,"height":158,"color":"4"}, + {"id":"b31695207931d96e","type":"text","text":"## fn get_or_del_data\n- 数据检索和删除\n- 资源清理\n- 缓存一致性\n- 并发访问控制","x":-730,"y":-602,"width":330,"height":156,"color":"4"}, + {"id":"5c4357fc2216ea51","type":"text","text":"## batch写入接口\n- 缓存主动推送\n- 并行写入支持\n- 错误恢复机制\n- 内存复用优化","x":-525,"y":-192,"width":250,"height":120,"color":"4"}, + {"id":"data_item","type":"text","text":"# 数据项处理\n\n## WriteSplitDataTaskGroup\n- 管理数据分片写入任务组\n- 分片合并优化\n- 状态同步\n- 并行控制\n## SharedMemHolder\n- 共享内存数据访问\n- 资源自动管理\n## SharedMemOwnedAccess\n- 共享内存所有权控制\n- 访问安全保证\n- 生命周期管理","x":-160,"y":-472,"width":460,"height":520,"color":"3"}, + {"id":"storage_node_1","type":"text","text":"存储节点1\n接收层:\n- 接收分片请求\n- 版本号验证\n- 数据完整性校验\n写入任务层:\n- 分片范围验证\n- 并发写入控制\n- 错误重试机制\n本地存储层:\n- 数据持久化\n- 版本管理\n- 空间回收\n结果返回:\n- 写入状态\n- 远程版本号\n- 错误信息","x":-340,"y":-260,"width":200,"height":280,"color":"1"}, + {"id":"storage_node_2","type":"text","text":"存储节点2\n接收层:\n- 接收分片请求\n- 版本号验证\n- 数据完整性校验\n写入任务层:\n- 分片范围验证\n- 并发写入控制\n- 错误重试机制\n本地存储层:\n- 数据持久化\n- 版本管理\n- 空间回收\n结果返回:\n- 写入状态\n- 远程版本号\n- 错误信息","x":-340,"y":-120,"width":200,"height":280,"color":"1"}, + {"id":"write_task_1","type":"text","text":"写入任务1\n- 分片范围验证\n- 数据完整性检查\n- 并发写入控制\n- 错误重试","x":0,"y":-260,"width":200,"height":120,"color":"2"}, + {"id":"write_task_2","type":"text","text":"写入任务2\n- 分片范围验证\n- 数据完整性检查\n- 并发写入控制\n- 错误重试","x":0,"y":-120,"width":200,"height":120,"color":"2"}, + {"id":"local_storage_1","type":"text","text":"本地存储1\n- 持久化数据\n- 版本管理\n- 空间回收","x":320,"y":-260,"width":200,"height":100,"color":"3"}, + {"id":"local_storage_2","type":"text","text":"本地存储2\n- 持久化数据\n- 版本管理\n- 空间回收","x":320,"y":-120,"width":200,"height":100,"color":"3"}, + {"id":"write_result_1","type":"text","text":"写入结果1\n- 成功/失败\n- 远程版本号\n- 错误信息","x":640,"y":-260,"width":200,"height":100,"color":"4"}, + {"id":"write_result_2","type":"text","text":"写入结果2\n- 成功/失败\n- 远程版本号\n- 错误信息","x":640,"y":-120,"width":200,"height":100,"color":"4"}, + {"id":"general_phase1","type":"text","text":"General阶段1:准备\n- 初始化DataItems\n- 计算数据大小\n- 创建SharedMemHolder","x":-340,"y":170,"width":200,"height":100,"color":"1"}, + {"id":"general_phase2","type":"text","text":"General阶段2:调度\n- 生成unique_id\n- 发送调度请求\n- 等待决策返回","x":-340,"y":300,"width":200,"height":100,"color":"1"}, + {"id":"master_node","type":"text","text":"Master节点 [DataMaster]\n- schedule_data()\n1. 生成DataSetMeta\n2. 创建DataSplits\n3. 分配存储节点\n4. 返回调度决策","x":120,"y":170,"width":200,"height":160,"color":"2"}, + {"id":"general_phase3","type":"text","text":"General阶段3:分发\n- 解析调度决策\n- 创建写入任务组\n- 初始化并发控制","x":-340,"y":430,"width":200,"height":100,"color":"1"}, + {"id":"storage_group","type":"text","text":"存储节点组","x":600,"y":170,"width":150,"height":60,"color":"3"}, + {"id":"storage_node_3","type":"text","text":"存储节点1","x":800,"y":120,"width":150,"height":60,"color":"3"}, + {"id":"storage_node_4","type":"text","text":"存储节点2","x":800,"y":200,"width":150,"height":60,"color":"3"}, + {"id":"storage_node_5","type":"text","text":"存储节点3","x":800,"y":280,"width":150,"height":60,"color":"3"}, + {"id":"cache_group","type":"text","text":"缓存节点组","x":600,"y":370,"width":150,"height":60,"color":"5"}, + {"id":"cache_node_1","type":"text","text":"缓存节点1","x":800,"y":320,"width":150,"height":60,"color":"5"}, + {"id":"cache_node_2","type":"text","text":"缓存节点2","x":800,"y":400,"width":150,"height":60,"color":"5"}, + {"id":"cache_node_3","type":"text","text":"缓存节点3","x":800,"y":480,"width":150,"height":60,"color":"5"}, + {"id":"batch_manager","type":"text","text":"# BatchManager\n\n## 管理功能\n- 创建传输任务\n- 分配请求ID\n- 跟踪传输状态\n- 错误恢复\n\n## 数据处理\n- 分块管理\n- 数据校验\n- 内存复用\n- 并发控制","x":-700,"y":700,"width":300,"height":300,"color":"1"}, + {"id":"batch_transfer","type":"text","text":"# BatchTransfer\n\n## 传输控制\n- 数据分块\n- 进度跟踪\n- 错误处理\n- 资源管理\n\n## 数据流\n- 发送队列\n- 接收缓冲\n- 内存池\n- 流量控制","x":-700,"y":1020,"width":300,"height":300,"color":"2"}, + {"id":"parallel_executor","type":"text","text":"# 并发执行器\n\n## 任务调度\n- 优先级队列\n- 负载均衡\n- 资源限制\n- 任务分组\n\n## 执行控制\n- 状态跟踪\n- 超时处理\n- 错误恢复\n- 取消机制","x":-700,"y":1540,"width":300,"height":300,"color":"3"}, + {"id":"task_group","type":"text","text":"# 任务组\n\n## 组织结构\n- 任务依赖\n- 执行顺序\n- 资源分配\n- 状态同步\n\n## 控制功能\n- 进度监控\n- 故障处理\n- 数据一致性\n- 完成确认","x":-340,"y":1540,"width":300,"height":300,"color":"4"}, + {"id":"batch_initiator","type":"text","text":"# 发起节点 [DataGeneral]\n\n## call_batch_data()\n- 分割数据块(1MB)\n- 创建有界任务池\n- 建议并发数=3\n- 任务队列控制","x":-300,"y":700,"width":300,"height":180,"color":"1"}, + {"id":"batch_request1","type":"text","text":"# BatchDataRequest(1)\n- request_id\n- block_type\n- block_index: 0\n- data","x":100,"y":700,"width":250,"height":120,"color":"2"}, + {"id":"batch_request2","type":"text","text":"# BatchDataRequest(2)\n- request_id\n- block_type\n- block_index: 1\n- data","x":100,"y":840,"width":250,"height":120,"color":"2"}, + {"id":"batch_request3","type":"text","text":"# BatchDataRequest(3)\n- request_id\n- block_type\n- block_index: 2\n- data","x":100,"y":980,"width":250,"height":120,"color":"2"}, + {"id":"batch_response1","type":"text","text":"# BatchDataResponse(1)\n- request_id\n- success\n- error_message\n- version","x":450,"y":700,"width":250,"height":120,"color":"3"}, + {"id":"batch_response2","type":"text","text":"# BatchDataResponse(2)\n- request_id\n- success\n- error_message\n- version","x":450,"y":840,"width":250,"height":120,"color":"3"}, + {"id":"batch_response3","type":"text","text":"# BatchDataResponse(3)\n- request_id\n- success\n- error_message\n- version","x":450,"y":980,"width":250,"height":120,"color":"3"} + ], + "edges":[ + {"id":"storage_to_task1","fromNode":"storage_node_1","fromSide":"right","toNode":"write_task_1","toSide":"left","label":"分片数据"}, + {"id":"storage_to_task2","fromNode":"storage_node_2","fromSide":"right","toNode":"write_task_2","toSide":"left","label":"分片数据"}, + {"id":"task_to_local1","fromNode":"write_task_1","fromSide":"right","toNode":"local_storage_1","toSide":"left","label":"持久化"}, + {"id":"task_to_local2","fromNode":"write_task_2","fromSide":"right","toNode":"local_storage_2","toSide":"left","label":"持久化"}, + {"id":"local_to_result1","fromNode":"local_storage_1","fromSide":"right","toNode":"write_result_1","toSide":"left","label":"写入状态"}, + {"id":"local_to_result2","fromNode":"local_storage_2","fromSide":"right","toNode":"write_result_2","toSide":"left","label":"写入状态"}, + {"id":"phase1_to_phase2","fromNode":"general_phase1","fromSide":"bottom","toNode":"general_phase2","toSide":"top","label":"DataItems"}, + {"id":"phase2_to_master","fromNode":"general_phase2","fromSide":"right","toNode":"master_node","toSide":"left","label":"调度请求"}, + {"id":"master_to_phase2","fromNode":"master_node","fromSide":"left","toNode":"general_phase2","toSide":"right","label":"调度决策\n- version\n- splits\n- nodes"}, + {"id":"phase2_to_phase3","fromNode":"general_phase2","fromSide":"bottom","toNode":"general_phase3","toSide":"top","label":"决策信息"}, + {"id":"phase3_to_storage","fromNode":"general_phase3","fromSide":"right","toNode":"storage_group","toSide":"left","label":"分发存储任务"}, + {"id":"storage_to_nodes","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_3","toSide":"left"}, + {"id":"storage_to_nodes2","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_4","toSide":"left"}, + {"id":"storage_to_nodes3","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_5","toSide":"left"}, + {"id":"phase3_to_cache","fromNode":"general_phase3","fromSide":"right","toNode":"cache_group","toSide":"left","label":"分发缓存任务"}, + {"id":"cache_to_nodes","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_1","toSide":"left"}, + {"id":"cache_to_nodes2","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_2","toSide":"left"}, + {"id":"cache_to_nodes3","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_3","toSide":"left"}, + {"id":"batch_flow1","fromNode":"batch_manager","fromSide":"right","toNode":"batch_transfer","toSide":"left","label":"创建传输"}, + {"id":"batch_flow2","fromNode":"batch_transfer","fromSide":"right","toNode":"parallel_executor","toSide":"left","label":"执行任务"}, + {"id":"parallel_flow","fromNode":"parallel_executor","fromSide":"right","toNode":"task_group","toSide":"left","label":"任务调度"}, + {"id":"initiator_to_manager","fromNode":"batch_initiator","fromSide":"left","toNode":"batch_manager","toSide":"right","label":"创建批量传输"}, + {"id":"initiator_to_request1","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_request1","toSide":"left","label":"并发发送\n数据块1"}, + {"id":"initiator_to_request2","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_request2","toSide":"left","label":"并发发送\n数据块2"}, + {"id":"initiator_to_request3","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_request3","toSide":"left","label":"并发发送\n数据块3"}, + {"id":"request1_to_response1","fromNode":"batch_request1","fromSide":"right","toNode":"batch_response1","toSide":"left","label":"处理响应"}, + {"id":"request2_to_response2","fromNode":"batch_request2","fromSide":"right","toNode":"batch_response2","toSide":"left","label":"处理响应"}, + {"id":"request3_to_response3","fromNode":"batch_request3","fromSide":"right","toNode":"batch_response3","toSide":"left","label":"处理响应"} + ] +} diff --git a/design.canvas.tmp.20250206221714 b/design.canvas.tmp.20250206221714 new file mode 100755 index 0000000..70199ee --- /dev/null +++ b/design.canvas.tmp.20250206221714 @@ -0,0 +1,82 @@ +{ + "nodes":[ + {"id":"cb82b904dab26671","type":"group","x":-1600,"y":-960,"width":2780,"height":2660,"label":"data"}, + {"id":"batch_transfer_group","type":"group","x":-1600,"y":640,"width":2740,"height":1060,"label":"Batch数据传输实现"}, + {"id":"core_module_group","type":"group","x":-1600,"y":-820,"width":1920,"height":780,"label":"数据管理核心模块"}, + {"id":"data_write_flow","type":"group","x":-1600,"y":80,"width":2680,"height":520,"label":"数据写入流程"}, + {"id":"2e84a4ef9e137fb7","type":"group","x":-1560,"y":1300,"width":2680,"height":820,"label":"batch handler 具体逻辑"}, + {"id":"storage_write_flow","type":"group","x":0,"y":140,"width":1020,"height":400,"label":"存储节点写入流程"}, + {"id":"cache_group","type":"text","text":"缓存节点组","x":-620,"y":370,"width":150,"height":60,"color":"5"}, + {"id":"storage_node_4","type":"text","text":"存储节点2","x":-420,"y":200,"width":150,"height":60,"color":"3"}, + {"id":"storage_node_5","type":"text","text":"存储节点3","x":-420,"y":280,"width":150,"height":60,"color":"3"}, + {"id":"cache_node_1","type":"text","text":"缓存节点1","x":-420,"y":320,"width":150,"height":60,"color":"5"}, + {"id":"general_phase1","type":"text","text":"General阶段1:准备\n- 初始化DataItems\n- 计算数据大小\n- 创建SharedMemHolder","x":-1560,"y":170,"width":200,"height":100,"color":"1"}, + {"id":"general_phase2","type":"text","text":"General阶段2:调度\n- 生成unique_id\n- 发送调度请求\n- 等待决策返回","x":-1560,"y":300,"width":200,"height":100,"color":"1"}, + {"id":"general_phase3","type":"text","text":"General阶段3:分发\n- 解析调度决策\n- 创建写入任务组\n- 初始化并发控制","x":-1560,"y":430,"width":200,"height":100,"color":"1"}, + {"id":"master_node","type":"text","text":"Master节点 [DataMaster]\n- schedule_data()\n1. 生成DataSetMeta\n2. 创建DataSplits\n3. 分配存储节点\n4. 返回调度决策","x":-1100,"y":170,"width":200,"height":160,"color":"2"}, + {"id":"storage_group","type":"text","text":"存储节点组","x":-620,"y":170,"width":150,"height":60,"color":"3"}, + {"id":"cache_node_2","type":"text","text":"缓存节点2","x":-420,"y":400,"width":150,"height":60,"color":"5"}, + {"id":"cache_node_3","type":"text","text":"缓存节点3","x":-420,"y":480,"width":150,"height":60,"color":"5"}, + {"id":"batch_transfer","type":"text","text":"# BatchTransfer\n\n## 传输控制\n- 数据分块\n- 进度跟踪\n- 错误处理\n- 资源管理\n\n## 数据流\n- 发送队列\n- 接收缓冲\n- 内存池\n- 流量控制","x":-1215,"y":1120,"width":430,"height":460,"color":"2"}, + {"id":"batch_request3","type":"text","text":"# BatchDataRequest(3)\n- request_id\n- block_type\n- block_index: 2\n- data","x":-660,"y":1120,"width":250,"height":120,"color":"2"}, + {"id":"batch_response3","type":"text","text":"# BatchDataResponse(3)\n- request_id\n- success\n- error_message\n- version","x":-310,"y":1120,"width":250,"height":120,"color":"3"}, + {"id":"batch_manager","type":"text","text":"# BatchManager\n\n## 管理功能\n- 创建传输任务\n- 分配请求ID\n- 跟踪传输状态\n- 错误恢复\n\n## 数据处理\n- 分块管理\n- 数据校验\n- 内存复用\n- 并发控制","x":-1560,"y":700,"width":300,"height":300,"color":"1"}, + {"id":"batch_initiator","type":"text","text":"# 发起节点 [DataGeneral]\n\n## call_batch_data()\n- 分割数据块(1MB)\n- 创建有界任务池\n- 建议并发数=3\n- 任务队列控制","x":-1160,"y":700,"width":300,"height":300,"color":"1"}, + {"id":"batch_request1","type":"text","text":"# BatchDataRequest(1)\n- request_id\n- block_type\n- block_index: 0\n- data","x":-660,"y":700,"width":250,"height":240,"color":"2"}, + {"id":"batch_response1","type":"text","text":"# BatchDataResponse(1)\n- request_id\n- success\n- error_message\n- version","x":-310,"y":700,"width":250,"height":240,"color":"3"}, + {"id":"batch_request2","type":"text","text":"# BatchDataRequest(2)\n- request_id\n- block_type\n- block_index: 1\n- data","x":-660,"y":980,"width":250,"height":120,"color":"2"}, + {"id":"batch_response2","type":"text","text":"# BatchDataResponse(2)\n- request_id\n- success\n- error_message\n- version","x":-310,"y":980,"width":310,"height":60,"color":"3"}, + {"id":"storage_node_3","type":"text","text":"存储节点1","x":-425,"y":130,"width":150,"height":60,"color":"3"}, + {"id":"storage_node_1","type":"text","text":"存储节点1\n接收层:\n- 接收分片请求\n- 版本号验证\n- 数据完整性校验\n写入任务层:\n- 分片范围验证\n- 并发写入控制\n- 错误重试机制\n本地存储层:\n- 数据持久化\n- 版本管理\n- 空间回收\n结果返回:\n- 写入状态\n- 远程版本号\n- 错误信息","x":40,"y":180,"width":200,"height":280,"color":"1"}, + {"id":"write_task_1","type":"text","text":"写入任务1\n- 分片范围验证\n- 数据完整性检查\n- 并发写入控制\n- 错误重试","x":380,"y":180,"width":200,"height":120,"color":"2"}, + {"id":"local_storage_1","type":"text","text":"本地存储1\n- 持久化数据\n- 版本管理\n- 空间回收","x":700,"y":180,"width":200,"height":100,"color":"3"}, + {"id":"write_result_1","type":"text","text":"写入结果1\n- 成功/失败\n- 远程版本号\n- 错误信息","x":640,"y":400,"width":200,"height":100,"color":"4"}, + {"id":"1ec171d545e8995d","x":214,"y":-636,"width":250,"height":60,"type":"text","text":""}, + {"id":"b31695207931d96e","type":"text","text":"## fn get_or_del_data\n- 数据检索和删除\n- 资源清理\n- 缓存一致性\n- 并发访问控制","x":-730,"y":-742,"width":330,"height":156,"color":"4"}, + {"id":"data_item","type":"text","text":"# 数据项处理\n\n## WriteSplitDataTaskGroup\n- 管理数据分片写入任务组\n- 分片合并优化\n- 状态同步\n- 并行控制\n## SharedMemHolder\n- 共享内存数据访问\n- 资源自动管理\n## SharedMemOwnedAccess\n- 共享内存所有权控制\n- 访问安全保证\n- 生命周期管理","x":-160,"y":-612,"width":460,"height":520,"color":"3"}, + {"id":"133214da264cfe72","type":"text","text":"## struct DataGeneral\n- 提供数据读写接口\n- 管理元数据\n- 协调各子模块功能\n- 错误处理和恢复\n- 资源生命周期","x":-1200,"y":-800,"width":340,"height":214,"color":"4"}, + {"id":"core_functions","type":"text","text":"## fn write_data\n- 同步/异步写入\n- 数据完整性保证\n- 分片并行写入\n- 缓存节点同步\n- 错误重试机制","x":-845,"y":-547,"width":280,"height":275,"color":"4"}, + {"id":"data_general_core","type":"text","text":"# 数据管理核心模块\n- 数据流向控制\n- 并行结构管理\n- 错误处理链\n- 资源管理","x":-1490,"y":-526,"width":330,"height":234,"color":"4"}, + {"id":"821e415b6438e20d","type":"text","text":"## struct DataSplit\n- 数据分片管理\n- 分片信息维护\n- 分片操作协调\n- 存储节点分配\n- 局部性优化","x":-1372,"y":-212,"width":342,"height":158,"color":"4"}, + {"id":"5c4357fc2216ea51","type":"text","text":"## batch写入接口\n- 缓存主动推送\n- 并行写入支持\n- 错误恢复机制\n- 内存复用优化","x":-620,"y":-212,"width":250,"height":120,"color":"4"}, + {"id":"batch_handler_1","type":"text","text":"# BatchHandler 核心组件\n\n## call_batch_data()\n- 分块大小: 1MB\n- 数据分割\n- 创建channel\n- 创建传输任务\n- 并发发送数据块\n- 等待响应","x":-1520,"y":1340,"width":300,"height":240,"color":"1"}, + {"id":"batch_handler_2","type":"text","text":"# BatchManager 管理器\n\n## 核心功能\n- create_transfer()\n * 生成请求ID\n * 创建BatchTransfer\n * 管理传输生命周期\n\n## 状态管理\n- 传输进度跟踪\n- 错误处理与恢复\n- 并发控制","x":-1120,"y":1340,"width":300,"height":300,"color":"2"}, + {"id":"batch_handler_3","type":"text","text":"# BatchTransfer 传输器\n\n## 属性\n- unique_id\n- version\n- block_type\n- total_blocks\n\n## 数据通道\n- data_sender\n- write_task\n- tx","x":-720,"y":1340,"width":300,"height":300,"color":"3"}, + {"id":"batch_handler_4","type":"text","text":"# 数据块处理\n\n## add_block()\n- 校验块索引\n- 发送数据到channel\n- 返回处理状态\n\n## complete()\n- 关闭data_sender\n- 等待write_task\n- 发送结果","x":-320,"y":1340,"width":300,"height":300,"color":"4"}, + {"id":"batch_handler_5","type":"text","text":"# 错误处理\n\n## 错误类型\n- BatchTransferError\n- InvalidDataType\n- WriteTaskError\n\n## 错误恢复\n- 重试机制\n- 超时控制\n- 资源清理","x":80,"y":1340,"width":300,"height":300,"color":"5"}, + {"id":"batch_handler_6","type":"text","text":"# 并发控制\n\n## 并发限制\n- 建议并发数=3\n- 有界任务池\n- 队列管理\n\n## 资源管理\n- 内存复用\n- 通道缓冲\n- 任务调度","x":480,"y":1340,"width":300,"height":300,"color":"6"}, + {"id":"batch_handler_7","type":"text","text":"# 数据分片\n\n## calculate_splits()\n- 计算分片范围\n- 优化分片大小\n- 内存占用控制\n\n## 分片策略\n- 固定大小(1MB)\n- 动态调整\n- 性能优化","x":880,"y":1340,"width":300,"height":300,"color":"3"} + ], + "edges":[ + {"id":"storage_to_task1","fromNode":"storage_node_1","fromSide":"right","toNode":"write_task_1","toSide":"left","label":"分片数据"}, + {"id":"task_to_local1","fromNode":"write_task_1","fromSide":"right","toNode":"local_storage_1","toSide":"left","label":"持久化"}, + {"id":"local_to_result1","fromNode":"local_storage_1","fromSide":"right","toNode":"write_result_1","toSide":"left","label":"写入状态"}, + {"id":"phase1_to_phase2","fromNode":"general_phase1","fromSide":"bottom","toNode":"general_phase2","toSide":"top","label":"DataItems"}, + {"id":"phase2_to_master","fromNode":"general_phase2","fromSide":"right","toNode":"master_node","toSide":"left","label":"调度请求"}, + {"id":"master_to_phase2","fromNode":"master_node","fromSide":"left","toNode":"general_phase2","toSide":"right","label":"调度决策\n- version\n- splits\n- nodes"}, + {"id":"phase2_to_phase3","fromNode":"general_phase2","fromSide":"bottom","toNode":"general_phase3","toSide":"top","label":"决策信息"}, + {"id":"phase3_to_storage","fromNode":"general_phase3","fromSide":"right","toNode":"storage_group","toSide":"left","label":"分发存储任务"}, + {"id":"storage_to_nodes","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_3","toSide":"left"}, + {"id":"storage_to_nodes2","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_4","toSide":"left"}, + {"id":"storage_to_nodes3","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_5","toSide":"left"}, + {"id":"phase3_to_cache","fromNode":"general_phase3","fromSide":"right","toNode":"cache_group","toSide":"left","label":"分发缓存任务"}, + {"id":"cache_to_nodes","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_1","toSide":"left"}, + {"id":"cache_to_nodes2","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_2","toSide":"left"}, + {"id":"cache_to_nodes3","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_3","toSide":"left"}, + {"id":"batch_flow1","fromNode":"batch_manager","fromSide":"right","toNode":"batch_transfer","toSide":"left","label":"创建传输"}, + {"id":"initiator_to_manager","fromNode":"batch_initiator","fromSide":"left","toNode":"batch_manager","toSide":"right","label":"创建批量传输"}, + {"id":"initiator_to_request1","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_request1","toSide":"left","label":"并发发送\n数据块1"}, + {"id":"initiator_to_request2","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_request2","toSide":"left","label":"并发发送\n数据块2"}, + {"id":"initiator_to_request3","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_request3","toSide":"left","label":"并发发送\n数据块3"}, + {"id":"request1_to_response1","fromNode":"batch_request1","fromSide":"right","toNode":"batch_response1","toSide":"left","label":"处理响应"}, + {"id":"request2_to_response2","fromNode":"batch_request2","fromSide":"right","toNode":"batch_response2","toSide":"left","label":"处理响应"}, + {"id":"request3_to_response3","fromNode":"batch_request3","fromSide":"right","toNode":"batch_response3","toSide":"left","label":"处理响应"}, + {"id":"b5a17c0afede8e4a","fromNode":"data_general_core","fromSide":"right","toNode":"133214da264cfe72","toSide":"bottom"}, + {"id":"2ad5991c43fd6098","fromNode":"data_general_core","fromSide":"right","toNode":"821e415b6438e20d","toSide":"top"}, + {"id":"caa45c92a135042c","fromNode":"data_general_core","fromSide":"right","toNode":"core_functions","toSide":"left"}, + {"id":"09c7b9957992d62d","fromNode":"data_general_core","fromSide":"right","toNode":"b31695207931d96e","toSide":"left"}, + {"id":"adfa1cca1009ff43","fromNode":"data_general_core","fromSide":"right","toNode":"5c4357fc2216ea51","toSide":"left"}, + {"id":"ef995a514a2210bb","fromNode":"5c4357fc2216ea51","fromSide":"right","toNode":"data_item","toSide":"left"}, + {"id":"3d79872a234731c0","fromNode":"cache_node_3","fromSide":"bottom","toNode":"batch_transfer_group","toSide":"top"} + ] +} \ No newline at end of file diff --git a/design.canvas.tmp.20250206221714.backup b/design.canvas.tmp.20250206221714.backup new file mode 100755 index 0000000..08a2b9b --- /dev/null +++ b/design.canvas.tmp.20250206221714.backup @@ -0,0 +1,75 @@ +{ + "nodes":[ + {"id":"cb82b904dab26671","type":"group","x":-1600,"y":-960,"width":2780,"height":2660,"label":"data"}, + {"id":"batch_transfer_group","type":"group","x":-1600,"y":640,"width":2740,"height":1060,"label":"Batch数据传输实现"}, + {"id":"core_module_group","type":"group","x":-1600,"y":-820,"width":1920,"height":780,"label":"数据管理核心模块"}, + {"id":"data_write_flow","type":"group","x":-1600,"y":80,"width":2680,"height":520,"label":"数据写入流程"}, + {"id":"2e84a4ef9e137fb7","x":-737,"y":1300,"width":1377,"height":460,"type":"group","label":"batch handler 具体逻辑"}, + {"id":"storage_write_flow","type":"group","x":0,"y":140,"width":1020,"height":400,"label":"存储节点写入流程"}, + {"id":"cache_group","type":"text","text":"缓存节点组","x":-620,"y":370,"width":150,"height":60,"color":"5"}, + {"id":"storage_node_4","type":"text","text":"存储节点2","x":-420,"y":200,"width":150,"height":60,"color":"3"}, + {"id":"storage_node_5","type":"text","text":"存储节点3","x":-420,"y":280,"width":150,"height":60,"color":"3"}, + {"id":"cache_node_1","type":"text","text":"缓存节点1","x":-420,"y":320,"width":150,"height":60,"color":"5"}, + {"id":"general_phase1","type":"text","text":"General阶段1:准备\n- 初始化DataItems\n- 计算数据大小\n- 创建SharedMemHolder","x":-1560,"y":170,"width":200,"height":100,"color":"1"}, + {"id":"general_phase2","type":"text","text":"General阶段2:调度\n- 生成unique_id\n- 发送调度请求\n- 等待决策返回","x":-1560,"y":300,"width":200,"height":100,"color":"1"}, + {"id":"general_phase3","type":"text","text":"General阶段3:分发\n- 解析调度决策\n- 创建写入任务组\n- 初始化并发控制","x":-1560,"y":430,"width":200,"height":100,"color":"1"}, + {"id":"master_node","type":"text","text":"Master节点 [DataMaster]\n- schedule_data()\n1. 生成DataSetMeta\n2. 创建DataSplits\n3. 分配存储节点\n4. 返回调度决策","x":-1100,"y":170,"width":200,"height":160,"color":"2"}, + {"id":"storage_group","type":"text","text":"存储节点组","x":-620,"y":170,"width":150,"height":60,"color":"3"}, + {"id":"cache_node_2","type":"text","text":"缓存节点2","x":-420,"y":400,"width":150,"height":60,"color":"5"}, + {"id":"cache_node_3","type":"text","text":"缓存节点3","x":-420,"y":480,"width":150,"height":60,"color":"5"}, + {"id":"batch_transfer","type":"text","text":"# BatchTransfer\n\n## 传输控制\n- 数据分块\n- 进度跟踪\n- 错误处理\n- 资源管理\n\n## 数据流\n- 发送队列\n- 接收缓冲\n- 内存池\n- 流量控制","x":-1215,"y":1120,"width":430,"height":460,"color":"2"}, + {"id":"batch_request3","type":"text","text":"# BatchDataRequest(3)\n- request_id\n- block_type\n- block_index: 2\n- data","x":-660,"y":1120,"width":250,"height":120,"color":"2"}, + {"id":"batch_response3","type":"text","text":"# BatchDataResponse(3)\n- request_id\n- success\n- error_message\n- version","x":-310,"y":1120,"width":250,"height":120,"color":"3"}, + {"id":"batch_manager","type":"text","text":"# BatchManager\n\n## 管理功能\n- 创建传输任务\n- 分配请求ID\n- 跟踪传输状态\n- 错误恢复\n\n## 数据处理\n- 分块管理\n- 数据校验\n- 内存复用\n- 并发控制","x":-1560,"y":700,"width":300,"height":300,"color":"1"}, + {"id":"batch_initiator","type":"text","text":"# 发起节点 [DataGeneral]\n\n## call_batch_data()\n- 分割数据块(1MB)\n- 创建有界任务池\n- 建议并发数=3\n- 任务队列控制","x":-1160,"y":700,"width":300,"height":300,"color":"1"}, + {"id":"batch_request1","type":"text","text":"# BatchDataRequest(1)\n- request_id\n- block_type\n- block_index: 0\n- data","x":-660,"y":700,"width":250,"height":240,"color":"2"}, + {"id":"batch_response1","type":"text","text":"# BatchDataResponse(1)\n- request_id\n- success\n- error_message\n- version","x":-310,"y":700,"width":250,"height":240,"color":"3"}, + {"id":"batch_request2","type":"text","text":"# BatchDataRequest(2)\n- request_id\n- block_type\n- block_index: 1\n- data","x":-660,"y":980,"width":250,"height":120,"color":"2"}, + {"id":"batch_response2","type":"text","text":"# BatchDataResponse(2)\n- request_id\n- success\n- error_message\n- version","x":-310,"y":980,"width":310,"height":60,"color":"3"}, + {"id":"storage_node_3","type":"text","text":"存储节点1","x":-425,"y":130,"width":150,"height":60,"color":"3"}, + {"id":"storage_node_1","type":"text","text":"存储节点1\n接收层:\n- 接收分片请求\n- 版本号验证\n- 数据完整性校验\n写入任务层:\n- 分片范围验证\n- 并发写入控制\n- 错误重试机制\n本地存储层:\n- 数据持久化\n- 版本管理\n- 空间回收\n结果返回:\n- 写入状态\n- 远程版本号\n- 错误信息","x":40,"y":180,"width":200,"height":280,"color":"1"}, + {"id":"write_task_1","type":"text","text":"写入任务1\n- 分片范围验证\n- 数据完整性检查\n- 并发写入控制\n- 错误重试","x":380,"y":180,"width":200,"height":120,"color":"2"}, + {"id":"local_storage_1","type":"text","text":"本地存储1\n- 持久化数据\n- 版本管理\n- 空间回收","x":700,"y":180,"width":200,"height":100,"color":"3"}, + {"id":"write_result_1","type":"text","text":"写入结果1\n- 成功/失败\n- 远程版本号\n- 错误信息","x":640,"y":400,"width":200,"height":100,"color":"4"}, + {"id":"1ec171d545e8995d","x":214,"y":-636,"width":250,"height":60,"type":"text","text":""}, + {"id":"b31695207931d96e","type":"text","text":"## fn get_or_del_data\n- 数据检索和删除\n- 资源清理\n- 缓存一致性\n- 并发访问控制","x":-730,"y":-742,"width":330,"height":156,"color":"4"}, + {"id":"data_item","type":"text","text":"# 数据项处理\n\n## WriteSplitDataTaskGroup\n- 管理数据分片写入任务组\n- 分片合并优化\n- 状态同步\n- 并行控制\n## SharedMemHolder\n- 共享内存数据访问\n- 资源自动管理\n## SharedMemOwnedAccess\n- 共享内存所有权控制\n- 访问安全保证\n- 生命周期管理","x":-160,"y":-612,"width":460,"height":520,"color":"3"}, + {"id":"133214da264cfe72","type":"text","text":"## struct DataGeneral\n- 提供数据读写接口\n- 管理元数据\n- 协调各子模块功能\n- 错误处理和恢复\n- 资源生命周期","x":-1200,"y":-800,"width":340,"height":214,"color":"4"}, + {"id":"core_functions","type":"text","text":"## fn write_data\n- 同步/异步写入\n- 数据完整性保证\n- 分片并行写入\n- 缓存节点同步\n- 错误重试机制","x":-845,"y":-547,"width":280,"height":275,"color":"4"}, + {"id":"data_general_core","type":"text","text":"# 数据管理核心模块\n- 数据流向控制\n- 并行结构管理\n- 错误处理链\n- 资源管理","x":-1490,"y":-526,"width":330,"height":234,"color":"4"}, + {"id":"821e415b6438e20d","type":"text","text":"## struct DataSplit\n- 数据分片管理\n- 分片信息维护\n- 分片操作协调\n- 存储节点分配\n- 局部性优化","x":-1372,"y":-212,"width":342,"height":158,"color":"4"}, + {"id":"5c4357fc2216ea51","type":"text","text":"## batch写入接口\n- 缓存主动推送\n- 并行写入支持\n- 错误恢复机制\n- 内存复用优化","x":-620,"y":-212,"width":250,"height":120,"color":"4"} + ], + "edges":[ + {"id":"storage_to_task1","fromNode":"storage_node_1","fromSide":"right","toNode":"write_task_1","toSide":"left","label":"分片数据"}, + {"id":"task_to_local1","fromNode":"write_task_1","fromSide":"right","toNode":"local_storage_1","toSide":"left","label":"持久化"}, + {"id":"local_to_result1","fromNode":"local_storage_1","fromSide":"right","toNode":"write_result_1","toSide":"left","label":"写入状态"}, + {"id":"phase1_to_phase2","fromNode":"general_phase1","fromSide":"bottom","toNode":"general_phase2","toSide":"top","label":"DataItems"}, + {"id":"phase2_to_master","fromNode":"general_phase2","fromSide":"right","toNode":"master_node","toSide":"left","label":"调度请求"}, + {"id":"master_to_phase2","fromNode":"master_node","fromSide":"left","toNode":"general_phase2","toSide":"right","label":"调度决策\n- version\n- splits\n- nodes"}, + {"id":"phase2_to_phase3","fromNode":"general_phase2","fromSide":"bottom","toNode":"general_phase3","toSide":"top","label":"决策信息"}, + {"id":"phase3_to_storage","fromNode":"general_phase3","fromSide":"right","toNode":"storage_group","toSide":"left","label":"分发存储任务"}, + {"id":"storage_to_nodes","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_3","toSide":"left"}, + {"id":"storage_to_nodes2","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_4","toSide":"left"}, + {"id":"storage_to_nodes3","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_5","toSide":"left"}, + {"id":"phase3_to_cache","fromNode":"general_phase3","fromSide":"right","toNode":"cache_group","toSide":"left","label":"分发缓存任务"}, + {"id":"cache_to_nodes","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_1","toSide":"left"}, + {"id":"cache_to_nodes2","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_2","toSide":"left"}, + {"id":"cache_to_nodes3","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_3","toSide":"left"}, + {"id":"batch_flow1","fromNode":"batch_manager","fromSide":"right","toNode":"batch_transfer","toSide":"left","label":"创建传输"}, + {"id":"initiator_to_manager","fromNode":"batch_initiator","fromSide":"left","toNode":"batch_manager","toSide":"right","label":"创建批量传输"}, + {"id":"initiator_to_request1","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_request1","toSide":"left","label":"并发发送\n数据块1"}, + {"id":"initiator_to_request2","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_request2","toSide":"left","label":"并发发送\n数据块2"}, + {"id":"initiator_to_request3","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_request3","toSide":"left","label":"并发发送\n数据块3"}, + {"id":"request1_to_response1","fromNode":"batch_request1","fromSide":"right","toNode":"batch_response1","toSide":"left","label":"处理响应"}, + {"id":"request2_to_response2","fromNode":"batch_request2","fromSide":"right","toNode":"batch_response2","toSide":"left","label":"处理响应"}, + {"id":"request3_to_response3","fromNode":"batch_request3","fromSide":"right","toNode":"batch_response3","toSide":"left","label":"处理响应"}, + {"id":"b5a17c0afede8e4a","fromNode":"data_general_core","fromSide":"right","toNode":"133214da264cfe72","toSide":"bottom"}, + {"id":"2ad5991c43fd6098","fromNode":"data_general_core","fromSide":"right","toNode":"821e415b6438e20d","toSide":"top"}, + {"id":"caa45c92a135042c","fromNode":"data_general_core","fromSide":"right","toNode":"core_functions","toSide":"left"}, + {"id":"09c7b9957992d62d","fromNode":"data_general_core","fromSide":"right","toNode":"b31695207931d96e","toSide":"left"}, + {"id":"adfa1cca1009ff43","fromNode":"data_general_core","fromSide":"right","toNode":"5c4357fc2216ea51","toSide":"left"}, + {"id":"ef995a514a2210bb","fromNode":"5c4357fc2216ea51","fromSide":"right","toNode":"data_item","toSide":"left"}, + {"id":"3d79872a234731c0","fromNode":"cache_node_3","fromSide":"bottom","toNode":"batch_transfer_group","toSide":"top"} + ] +} \ No newline at end of file diff --git a/review.md b/review.md index 4636297..85f5a0b 100755 --- a/review.md +++ b/review.md @@ -43,6 +43,11 @@ tx: Option>> } ``` +- 主要方法: + 1. `new()`: 创建新的传输任务 + 2. `add_block()`: 添加数据块 + 3. `complete()`: 完成传输处理 + 4. `calculate_splits()`: 计算数据分片 #### WriteSplitDataTaskGroup - 功能:管理数据分片写入任务组 @@ -272,10 +277,10 @@ impl WriteSplitDataTaskGroup { } } -// WriteSplitDataManager 管理器 +// WriteSplitDataTaskGroup 管理器 pub struct WriteSplitDataManager { // 只存储任务句柄 - handles: DashMap, + handles: DashMap, } impl WriteSplitDataManager { @@ -288,29 +293,29 @@ impl WriteSplitDataManager { // 注册新的任务句柄 pub fn register_handle( &self, - request_id: proto::BatchRequestId, + unique_id: UniqueId, handle: WriteSplitDataTaskHandle, ) -> WSResult<()> { // 检查是否已存在 - if self.handles.contains_key(&request_id) { + if self.handles.contains_key(&unique_id) { return Err(WSError::WsDataError(WsDataErr::WriteDataFailed { - request_id, + unique_id, })); } // 存储句柄 - self.handles.insert(request_id, handle); + self.handles.insert(unique_id, handle); Ok(()) } // 获取已存在的任务句柄 - pub fn get_handle(&self, request_id: &proto::BatchRequestId) -> Option { - self.handles.get(request_id).map(|h| h.clone()) + pub fn get_handle(&self, unique_id: &UniqueId) -> Option { + self.handles.get(unique_id).map(|h| h.clone()) } // 移除任务句柄 - pub fn remove_handle(&self, request_id: &proto::BatchRequestId) { - self.handles.remove(request_id); + pub fn remove_handle(&self, unique_id: &UniqueId) { + self.handles.remove(unique_id); } } @@ -418,221 +423,228 @@ impl WriteSplitDataTaskHandle { #### 2.2 BatchTransfer 实现 ```rust -/// 数据源接口 +pub struct BatchTransfer { + unique_id: Vec, + version: u64, + block_type: BatchDataBlockType, + total_blocks: u32, + block_size: usize, + data: Arc, // 文件或内存数据源 + write_task: JoinHandle>, +} + +impl BatchTransfer { + /// 创建新的批量传输任务 + pub async fn new( + unique_id: Vec, + version: u64, + data: Arc, + block_size: usize, + manager: Arc, + ) -> WSResult { + // 计算分片信息 + let total_size = data.size().await?; + let total_blocks = (total_size + block_size - 1) / block_size; + let block_type = data.block_type(); + + // 创建写入任务组和handle + let (group, handle) = WriteSplitDataTaskGroup::new( + unique_id.clone(), + calculate_splits(total_blocks as u32, block_size), + block_type, + manager, + ).await; + + // 启动写入任务 + let write_task = tokio::spawn(async move { + let mut current_block = 0; + let mut in_flight_tasks = FuturesUnordered::new(); + + // 循环直到所有数据块都发送完成 + loop { + // 如果还有数据块且未达到最大并发数,则读取并发送新数据块 + while current_block < total_blocks && in_flight_tasks.len() < 32 { + // 读取数据块 + let offset = current_block * block_size; + let size = block_size.min(total_size - offset); + let block_data = data.read_chunk(offset, size).await?; + + // 提交数据到写入任务组 + let submit_future = handle.submit_split( + current_block as usize * block_size, + block_data, + ); + in_flight_tasks.push(submit_future); + current_block += 1; + } + + // 等待任意一个任务完成 + match in_flight_tasks.next().await { + Some(result) => { + // 处理任务结果 + result?; + } + None if current_block >= total_blocks => { + // 所有数据块都已发送且完成 + break; + } + None => { + // 不应该发生:还有数据块但没有运行中的任务 + return Err(WSError::BatchError(WsBatchErr::InternalError { + message: "No in-flight tasks but blocks remaining".into() + })); + } + } + } + + // 等待所有任务完成 + while let Some(result) = in_flight_tasks.next().await { + result?; + } + + // 等待写入任务组处理完所有数据 + handle.wait_all_tasks().await?; + group.process_tasks().await + }); + + Ok(Self { + unique_id, + version, + block_type, + total_blocks: total_blocks as u32, + block_size, + data, + write_task, + }) + } + + /// 等待传输完成 + pub async fn wait_complete(self) -> WSResult { + self.write_task.await? + } +} + +/// 数据源trait #[async_trait] pub trait DataSource: Send + Sync + 'static { /// 获取数据总大小 async fn size(&self) -> WSResult; + /// 读取指定范围的数据 async fn read_chunk(&self, offset: usize, size: usize) -> WSResult>; + /// 获取数据块类型 fn block_type(&self) -> BatchDataBlockType; } -/// 批量传输数据 -pub async fn batch_transfer( - unique_id: Vec, - version: u64, - target_node: NodeID, - data: Arc, - view: DataGeneralView, -) -> WSResult<()> { - let total_size = data.size().await?; - let total_blocks = (total_size + DEFAULT_BLOCK_SIZE - 1) / DEFAULT_BLOCK_SIZE; - let semaphore = Arc::new(Semaphore::new(32)); - let mut handles = Vec::new(); - - // 发送所有数据块 - for block_idx in 0..total_blocks { - // 获取信号量许可 - let permit = semaphore.clone().acquire_owned().await.unwrap(); - - let offset = block_idx as usize * DEFAULT_BLOCK_SIZE; - let size = DEFAULT_BLOCK_SIZE.min(total_size - offset); - - // 读取数据块 - let block_data = data.read_chunk(offset, size).await?; - - // 构造请求 - let request = proto::BatchDataRequest { - request_id: Some(proto::BatchRequestId { - node_id: target_node as u32, - sequence: block_idx as u32, - }), - block_type: data.block_type() as i32, - block_index: block_idx as u32, - data: block_data, - operation: proto::DataOpeType::Write as i32, - unique_id: unique_id.clone(), - version, - }; - - // 发送请求 - let view = view.clone(); - let handle = tokio::spawn(async move { - let _permit = permit; // 持有permit直到任务完成 - let resp = view.data_general().rpc_call_batch_data.call( - view.p2p(), - target_node, - request, - Some(Duration::from_secs(30)), - ).await?; - - if !resp.success { - return Err(WsDataError::BatchTransferFailed { - node: target_node, - batch: block_idx as u32, - reason: resp.error_message, - }.into()); - } - - Ok(()) - }); - - handles.push(handle); - } - - // 等待所有请求完成 - for handle in handles { - handle.await??; - } - - Ok(()) +/// 文件数据源实现 +pub struct FileDataSource { + path: PathBuf, } -``` -#### 2.3 DataGeneral RPC处理实现 +#[async_trait] +impl DataSource for FileDataSource { + async fn size(&self) -> WSResult { + tokio::fs::metadata(&self.path) + .await + .map(|m| m.len() as usize) + .map_err(|e| WSError::BatchError(WsBatchErr::ReadSourceFailed { + source: format!("{}", self.path.display()), + error: e.to_string(), + })) + } -```rust -/// 默认数据块大小 (4MB) -const DEFAULT_BLOCK_SIZE: usize = 4 * 1024 * 1024; + async fn read_chunk(&self, offset: usize, size: usize) -> WSResult> { + let mut file = tokio::fs::File::open(&self.path).await?; + let mut buf = vec![0; size]; + file.seek(SeekFrom::Start(offset as u64)).await?; + file.read_exact(&mut buf).await?; + Ok(buf) + } -/// 批量数据传输状态 -struct BatchTransferState { - handle: WriteSplitDataTaskHandle, - shared: SharedWithBatchHandler, + fn block_type(&self) -> BatchDataBlockType { + BatchDataBlockType::File + } } -/// 共享状态,用于记录最新的请求响应器 -#[derive(Clone)] -struct SharedWithBatchHandler { - responsor: Arc>>>, +/// 内存数据源实现 +pub struct MemDataSource { + data: Arc<[u8]>, } -impl SharedWithBatchHandler { - fn new() -> Self { - Self { - responsor: Arc::new(Mutex::new(None)), - } +#[async_trait] +impl DataSource for MemDataSource { + async fn size(&self) -> WSResult { + Ok(self.data.len()) } - async fn update_responsor(&self, responsor: RPCResponsor) { - let mut guard = self.responsor.lock().await; - if let Some(old_responsor) = guard.take() { - // 旧的responsor直接返回成功 - if let Err(e) = old_responsor.response(Ok(())).await { - tracing::error!("Failed to respond to old request: {}", e); - } - } - *guard = Some(responsor); + async fn read_chunk(&self, offset: usize, size: usize) -> WSResult> { + Ok(self.data[offset..offset+size].to_vec()) } - async fn get_final_responsor(&self) -> Option> { - self.responsor.lock().await.take() + fn block_type(&self) -> BatchDataBlockType { + BatchDataBlockType::Mem } } -impl DataGeneral { - /// 创建新的DataGeneral实例 - pub fn new() -> Self { - Self { - batch_receive_states: DashMap::new(), - // ...其他字段 - } - } -} +#### 2.3 DataGeneral RPC处理实现 + +```rust +/// 默认数据块大小 (4MB) +const DEFAULT_BLOCK_SIZE: usize = 4 * 1024 * 1024; impl DataGeneral { /// 处理批量数据写入请求 /// /// # 处理流程 - /// 1. 从batch_receive_states查询或创建传输状态 + /// 1. 使用WriteSplitDataTaskManager查询handle /// 2. 使用WriteSplitDataTaskHandle提交写入任务 /// 3. 等待写入完成并返回结果 pub async fn rpc_handle_batch_data( &self, request: BatchDataRequest, - responsor: RPCResponsor, ) -> WSResult<()> { - // 1. 从batch_receive_states查询或创建传输状态 - let state = if let Some(state) = self.batch_receive_states.get(&request.unique_id) { - // 验证版本号 - if state.handle.version() != request.version { - tracing::error!( - "Version mismatch for transfer {}, expected {}, got {}", - hex::encode(&request.unique_id), - state.handle.version(), - request.version - ); - return Err(WSError::BatchError(WsBatchErr::VersionMismatch { - expected: state.handle.version(), - actual: request.version, - })); - } - state - } else { - // 创建新的写入任务组 - let (group, handle) = WriteSplitDataTaskGroup::new( - request.unique_id.clone(), - calculate_splits(request.total_blocks), - request.block_type, - ).await?; - - // 创建共享状态 - let shared = SharedWithBatchHandler::new(); - let state = BatchTransferState { handle: handle.clone(), shared: shared.clone() }; - - // 启动等待完成的任务 - let unique_id = request.unique_id.clone(); - let batch_receive_states = self.batch_receive_states.clone(); - tokio::spawn(async move { - // 等待所有任务完成 - if let Err(e) = handle.wait_all_tasks().await { + // 1. 使用WriteSplitDataTaskManager查询handle + let handle = match self.write_manager.get_handle(&request.unique_id) { + Some(handle) => { + // 验证版本号 + if handle.version() != request.version { tracing::error!( - "Failed to complete transfer {}: {}", - hex::encode(&unique_id), - e + "Version mismatch for transfer {}, expected {}, got {}", + hex::encode(&request.unique_id), + handle.version(), + request.version ); - // 获取最后的responsor并返回错误 - if let Some(final_responsor) = shared.get_final_responsor().await { - if let Err(e) = final_responsor.response(Err(e)).await { - tracing::error!("Failed to send error response: {}", e); - } - } - // 清理状态 - batch_receive_states.remove(&unique_id); - return; + return Err(WSError::BatchError(WsBatchErr::VersionMismatch { + expected: handle.version(), + actual: request.version, + })); } + handle + } + None => { + // 创建新的写入任务组 + let (group, handle) = WriteSplitDataTaskGroup::new( + request.unique_id.clone(), + calculate_splits(request.total_blocks), + request.block_type, + ).await?; + + // 注册handle + self.write_manager.register_handle( + request.unique_id.clone(), + handle.clone(), + group, + ); - // 获取最后的responsor并返回成功 - if let Some(final_responsor) = shared.get_final_responsor().await { - if let Err(e) = final_responsor.response(Ok(())).await { - tracing::error!("Failed to send success response: {}", e); - } - } - // 清理状态 - batch_receive_states.remove(&unique_id); - }); - - // 插入新状态 - self.batch_receive_states.insert(request.unique_id.clone(), state); - self.batch_receive_states.get(&request.unique_id).unwrap() + handle + } }; // 2. 使用WriteSplitDataTaskHandle提交写入任务 let offset = request.block_idx as usize * DEFAULT_BLOCK_SIZE; - if let Err(e) = state.handle.submit_split(offset, request.data).await { + if let Err(e) = handle.submit_split(offset, request.data).await { tracing::error!( "Failed to submit split for transfer {}, block {}: {}", hex::encode(&request.unique_id), @@ -642,9 +654,6 @@ impl DataGeneral { return Err(e); } - // 3. 更新共享状态中的responsor - state.shared.update_responsor(responsor).await; - tracing::debug!( "Successfully submitted block {} for transfer {}", request.block_idx, @@ -655,6 +664,12 @@ impl DataGeneral { } } +/// 数据分片索引 +#[derive(Debug, Clone, Copy)] +pub struct DataSplitIdx { + pub offset: usize, +} + /// 计算数据分片范围 fn calculate_splits(total_blocks: u32) -> Vec> { let mut splits = Vec::with_capacity(total_blocks as usize); @@ -665,91 +680,3 @@ fn calculate_splits(total_blocks: u32) -> Vec> { } splits } - -/// 数据源实现 -pub struct FileDataSource { - path: PathBuf, - file: Option, -} - -impl FileDataSource { - pub fn new(path: PathBuf) -> Self { - Self { - path, - file: None, - } - } -} - -#[async_trait] -impl DataSource for FileDataSource { - async fn size(&self) -> WSResult { - tokio::fs::metadata(&self.path) - .await - .map(|m| m.len() as usize) - .map_err(|e| WsDataError::ReadSourceFailed { - source: format!("{}", self.path.display()), - error: e.to_string(), - }.into()) - } - - async fn read_chunk(&self, offset: usize, size: usize) -> WSResult> { - let mut file = tokio::fs::File::open(&self.path).await - .map_err(|e| WsDataError::ReadSourceFailed { - source: format!("{}", self.path.display()), - error: e.to_string(), - })?; - - file.seek(SeekFrom::Start(offset as u64)).await - .map_err(|e| WsDataError::ReadSourceFailed { - source: format!("{}", self.path.display()), - error: e.to_string(), - })?; - - let mut buf = vec![0; size]; - file.read_exact(&mut buf).await - .map_err(|e| WsDataError::ReadSourceFailed { - source: format!("{}", self.path.display()), - error: e.to_string(), - })?; - - Ok(buf) - } - - fn block_type(&self) -> BatchDataBlockType { - BatchDataBlockType::File - } -} - -pub struct MemDataSource { - data: Arc<[u8]>, -} - -impl MemDataSource { - pub fn new(data: Vec) -> Self { - Self { - data: data.into() - } - } -} - -#[async_trait] -impl DataSource for MemDataSource { - async fn size(&self) -> WSResult { - Ok(self.data.len()) - } - - async fn read_chunk(&self, offset: usize, size: usize) -> WSResult> { - if offset + size > self.data.len() { - return Err(WsDataError::ReadSourceFailed { - source: "memory".into(), - error: "read beyond bounds".into(), - }.into()); - } - Ok(self.data[offset..offset + size].to_vec()) - } - - fn block_type(&self) -> BatchDataBlockType { - BatchDataBlockType::Memory - } -} diff --git a/scripts/sync_md_files.py b/scripts/sync_md_files.py index d4a3795..f574558 100644 --- a/scripts/sync_md_files.py +++ b/scripts/sync_md_files.py @@ -41,7 +41,3 @@ def sync_md_files(source_dir, target_dir): print(f"Starting sync from {source_dir} to {target_dir}") sync_md_files(source_dir, target_dir) - if args.direction == 'from_s3fs': - timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S") - os.system(f"cp {target_dir}/design.canvas {target_dir}/design.canvas.{timestamp}.bak") - print(f"Backup design.canvas to design.canvas.{timestamp}.bak") From 588e2796d9f61a9088e331785abbd0f369e13883 Mon Sep 17 00:00:00 2001 From: pa <1020401660@qq.com> Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 13/26] Revert "batch basic design" This reverts commit 824aa6a82222df0f62704cc90654afddf9011b79. --- design 1.canvas | 90 ------------- design.canvas | 200 ++++++++++++++-------------- review.md | 276 +-------------------------------------- scripts/sync_md_files.py | 72 ++++++++-- 4 files changed, 159 insertions(+), 479 deletions(-) delete mode 100755 design 1.canvas diff --git a/design 1.canvas b/design 1.canvas deleted file mode 100755 index 9605161..0000000 --- a/design 1.canvas +++ /dev/null @@ -1,90 +0,0 @@ -{ - "nodes":[ - {"id":"cb82b904dab26671","type":"group","x":-3400,"y":-960,"width":4560,"height":3500,"label":"data"}, - {"id":"batch_transfer_group","type":"group","x":-1560,"y":120,"width":2300,"height":2040,"label":"Batch数据传输实现"}, - {"id":"write_split_group","type":"group","x":-3260,"y":120,"width":1470,"height":2360,"label":"WriteSplitDataTaskGroup 写入流程"}, - {"id":"data_write_flow","type":"group","x":-1600,"y":-600,"width":2680,"height":520,"label":"数据写入流程"}, - {"id":"batch_sender_group","type":"group","x":-1500,"y":200,"width":1000,"height":1000,"label":"写入端 [DataGeneral]"}, - {"id":"batch_receiver_group","type":"group","x":-400,"y":200,"width":1000,"height":900,"label":"接收端 [DataGeneral]"}, - {"id":"storage_write_flow","type":"group","x":0,"y":-540,"width":1020,"height":400,"label":"存储节点写入流程"}, - {"id":"7127ed217f71f72d","type":"group","x":-3240,"y":1180,"width":1010,"height":375,"label":"fn register_handle("}, - {"id":"data_general_core","type":"text","text":"# 数据管理核心模块\n- 数据流向控制\n- 并行结构管理\n- 错误处理链\n- 资源管理","x":-3050,"y":-406,"width":330,"height":234,"color":"4"}, - {"id":"b31695207931d96e","type":"text","text":"## fn get_or_del_data\n- 数据检索和删除\n- 资源清理\n- 缓存一致性\n- 并发访问控制","x":-2290,"y":-622,"width":330,"height":156,"color":"4"}, - {"id":"133214da264cfe72","type":"text","text":"## struct DataGeneral\n- 提供数据读写接口\n- 管理元数据\n- 协调各子模块功能\n- 错误处理和恢复\n- 资源生命周期","x":-2760,"y":-680,"width":340,"height":214,"color":"4"}, - {"id":"core_functions","type":"text","text":"## fn write_data\n- 同步/异步写入\n- 数据完整性保证\n- 分片并行写入\n- 缓存节点同步\n- 错误重试机制","x":-2405,"y":-427,"width":280,"height":275,"color":"4"}, - {"id":"general_phase2","type":"text","text":"General阶段2:调度\n- 生成unique_id\n- 发送调度请求\n- 等待决策返回","x":-1560,"y":-380,"width":200,"height":100,"color":"1"}, - {"id":"storage_node_3","type":"text","text":"存储节点1","x":-425,"y":-550,"width":150,"height":60,"color":"3"}, - {"id":"master_node","type":"text","text":"Master节点 [DataMaster]\n- schedule_data()\n1. 生成DataSetMeta\n2. 创建DataSplits\n3. 分配存储节点\n4. 返回调度决策","x":-1100,"y":-510,"width":200,"height":160,"color":"2"}, - {"id":"storage_group","type":"text","text":"存储节点组","x":-620,"y":-510,"width":150,"height":60,"color":"3"}, - {"id":"821e415b6438e20d","type":"text","text":"## struct DataSplit\n- 数据分片管理\n- 分片信息维护\n- 分片操作协调\n- 存储节点分配\n- 局部性优化","x":-2932,"y":-92,"width":342,"height":158,"color":"4"}, - {"id":"cache_group","type":"text","text":"缓存节点组","x":-620,"y":-310,"width":150,"height":60,"color":"5"}, - {"id":"general_phase1","type":"text","text":"General阶段1:准备\n- 初始化DataItems\n- 计算数据大小\n- 创建SharedMemHolder","x":-1560,"y":-510,"width":200,"height":100,"color":"1"}, - {"id":"data_item","type":"text","text":"# 数据项处理\n\n## enum WriteSplitDataTaskGroup\n- 管理数据分片写入任务组\n- 分片合并优化\n- 状态同步\n- 并行控制\n","x":-2990,"y":180,"width":450,"height":280,"color":"3"}, - {"id":"1ec171d545e8995d","type":"text","text":"## SharedMemHolder\n- 共享内存数据访问\n- 资源自动管理","x":-3085,"y":794,"width":300,"height":150}, - {"id":"223edf4677db9339","type":"text","text":"pub struct WriteSplitDataManager {\n    // 只存储任务句柄\n    handles: DashMap,\n}","x":-3090,"y":1000,"width":610,"height":140}, - {"id":"f515ecb9aee18fc7","type":"text","text":"# 后续写入 [异步执行]\n\n## 状态管理\n- 写入任务追踪\n- 并发控制\n- 写入顺序保证","x":-2552,"y":1218,"width":302,"height":275}, - {"id":"06d4a92778dd83c8","type":"text","text":"# 第一个分片开始写入 [阻塞执行]\n\n## 初始化写入\nfn start_first_split(data: Vec) -> Result<(), WSError> {\n let task = self.build_task(data, 0);\n self.tasks.push(task);\n self.current_size += data.len();\n Ok(())\n}\n\n## 错误处理\n- 写入失败记录日志\n- 返回具体错误类型","x":-3220,"y":1201,"width":455,"height":310}, - {"id":"general_phase3","type":"text","text":"General阶段3:分发\n- 解析调度决策\n- 创建写入任务组\n- 初始化并发控制","x":-1560,"y":-210,"width":200,"height":100,"color":"1"}, - {"id":"storage_node_4","type":"text","text":"存储节点2","x":-420,"y":-480,"width":150,"height":60,"color":"3"}, - {"id":"storage_node_5","type":"text","text":"存储节点3","x":-420,"y":-400,"width":150,"height":60,"color":"3"}, - {"id":"cache_node_1","type":"text","text":"缓存节点1","x":-420,"y":-360,"width":150,"height":60,"color":"5"}, - {"id":"97d3d9fd7432a861","type":"text","text":"# WriteSplitDataTaskHandle::submit_split() 实现 [异步发送]\n\n## match write_type {\n- WriteSplitDataType::File => 文件写入任务\n- WriteSplitDataType::Mem => 内存写入任务\n}\n\n## 发送任务 [channel阻塞]\ntx.send(task).await","x":-2189,"y":1160,"width":347,"height":445}, - {"id":"batch_request1","type":"text","text":"# BatchDataRequest(1)\n- request_id\n- block_type\n- block_index: 0\n- data","x":-620,"y":190,"width":250,"height":240,"color":"2"}, - {"id":"batch_initiator","type":"text","text":"# 发起节点 [DataGeneral]\n\n## call_batch_data()\n- 分割数据块(1MB)\n- 创建有界任务池\n- 建议并发数=3\n- 任务队列控制","x":-1400,"y":331,"width":300,"height":300,"color":"1"}, - {"id":"batch_manager","type":"text","text":"# BatchTransfer","x":-1100,"y":744,"width":300,"height":300,"color":"1"}, - {"id":"5c4357fc2216ea51","type":"text","text":"## batch写入接口\n- 缓存主动推送\n- 并行写入支持\n- 错误恢复机制\n- 内存复用优化","x":-2180,"y":-92,"width":250,"height":120,"color":"4"}, - {"id":"cache_node_2","type":"text","text":"缓存节点2","x":-420,"y":-280,"width":150,"height":60,"color":"5"}, - {"id":"cache_node_3","type":"text","text":"缓存节点3","x":-420,"y":-200,"width":150,"height":60,"color":"5"}, - {"id":"storage_node_1","type":"text","text":"存储节点1\n接收层:\n- 接收分片请求\n- 版本号验证\n- 数据完整性校验\n写入任务层:\n- 分片范围验证\n- 并发写入控制\n- 错误重试机制\n本地存储层:\n- 数据持久化\n- 版本管理\n- 空间回收\n结果返回:\n- 写入状态\n- 远程版本号\n- 错误信息","x":40,"y":-500,"width":200,"height":280,"color":"1"}, - {"id":"write_task_1","type":"text","text":"写入任务1\n- 分片范围验证\n- 数据完整性检查\n- 并发写入控制\n- 错误重试","x":380,"y":-500,"width":200,"height":120,"color":"2"}, - {"id":"write_result_1","type":"text","text":"写入结果1\n- 成功/失败\n- 远程版本号\n- 错误信息","x":640,"y":-280,"width":200,"height":100,"color":"4"}, - {"id":"write_task_file","type":"text","text":"# ToFile 写入流程 [阻塞执行]\n\n## WriteSplitDataTaskGroup::ToFile\n- file_path: PathBuf\n- tasks: Vec>\n- rx: mpsc::Receiver>\n- expected_size: usize\n- current_size: usize\n\n## 操作流程 [文件IO阻塞]\n1. OpenOptions::new()\n .create(true)\n .write(true)\n2. seek(offset)\n3. write_all(data)\n4. 错误记录:\n tracing::error!(\"Failed to write file data at offset {}\")\n","x":-2216,"y":544,"width":400,"height":400,"color":"1"}, - {"id":"write_task_mem","type":"text","text":"# ToMem 写入流程 [阻塞执行]\n\n## WriteSplitDataTaskGroup::ToMem\n- shared_mem: SharedMemHolder\n- tasks: Vec>\n- rx: mpsc::Receiver>\n- expected_size: usize\n- current_size: usize\n\n## 操作流程 [内存写入阻塞]\n1. shared_mem.write(offset, data)\n2. 错误记录:\n tracing::error!(\"Failed to write memory data at offset {}\")\n","x":-2650,"y":526,"width":400,"height":436,"color":"2"}, - {"id":"b0205b4457afeb2b","type":"text","text":"## SharedMemOwnedAccess\n- 共享内存所有权控制\n- 访问安全保证\n- 生命周期管理","x":-2330,"y":242,"width":364,"height":178}, - {"id":"e2576a54f3f852b3","type":"text","text":"# process_tasks() 实现 [阻塞循环]\n\n## 循环处理 [select阻塞]\n1. try_complete() 检查完成状态\n2. tokio::select! {\n - rx.recv() => 接收新任务\n - futures::future::select_all(tasks) => 等待任务完成\n}\n\n## 完成条件\n- current_size >= expected_size\n- 返回 proto::DataItem","x":-3035,"y":1820,"width":377,"height":460}, - {"id":"155106edf5eb3cd7","type":"text","text":"# try_complete() 实现 [同步检查]\n\n## 返回 Option\n- ToFile => proto::DataItem::new_file_data()\n- ToMem => proto::DataItem::new_mem_data()","x":-3074,"y":2300,"width":455,"height":180}, - {"id":"4dbe01dc59cea4c2","type":"text","text":"pub struct WriteSplitDataTaskHandle {\n    tx: mpsc::Sender>,\n    write_type: WriteSplitDataType,\n}","x":-2552,"y":1700,"width":418,"height":160}, - {"id":"20145fd68e8aaa75","type":"text","text":"# 构造 [同步初始化]\n\n## 任务组初始化\nfn new_task_group(type_: WriteSplitDataType) -> Self {\n let (tx, rx) = mpsc::channel(32);\n Self {\n type_,\n tasks: Vec::new(),\n rx,\n expected_size: 0,\n current_size: 0,\n }\n}\n\n## 参数验证\n- 检查写入类型\n- 验证初始参数","x":-3185,"y":1580,"width":450,"height":220}, - {"id":"batch_request2","type":"text","text":"# BatchDataRequest(2)\n- request_id\n- block_type\n- block_index: 1\n- data","x":-620,"y":470,"width":250,"height":120,"color":"2"}, - {"id":"batch_request3","type":"text","text":"# BatchDataRequest(3)\n- request_id\n- block_type\n- block_index: 2\n- data","x":-620,"y":610,"width":250,"height":120,"color":"2"}, - {"id":"batch_receiver_tasks","type":"text","text":"WriteSplitDataTaskGroup","x":-160,"y":570,"width":400,"height":300,"color":"1"} - ], - "edges":[ - {"id":"master_to_phase2","fromNode":"master_node","fromSide":"left","toNode":"general_phase2","toSide":"right","label":"调度决策\n- version\n- splits\n- nodes"}, - {"id":"phase2_to_phase3","fromNode":"general_phase2","fromSide":"bottom","toNode":"general_phase3","toSide":"top","label":"决策信息"}, - {"id":"phase3_to_storage","fromNode":"general_phase3","fromSide":"right","toNode":"storage_group","toSide":"left","label":"分发存储任务"}, - {"id":"storage_to_nodes","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_3","toSide":"left"}, - {"id":"storage_to_nodes2","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_4","toSide":"left"}, - {"id":"storage_to_nodes3","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_5","toSide":"left"}, - {"id":"phase3_to_cache","fromNode":"general_phase3","fromSide":"right","toNode":"cache_group","toSide":"left","label":"分发缓存任务"}, - {"id":"cache_to_nodes","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_1","toSide":"left"}, - {"id":"cache_to_nodes2","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_2","toSide":"left"}, - {"id":"cache_to_nodes3","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_3","toSide":"left"}, - {"id":"initiator_to_manager","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_manager","toSide":"left","label":"创建批量传输"}, - {"id":"initiator_to_request1","fromNode":"batch_manager","fromSide":"right","toNode":"batch_request1","toSide":"left","label":"并发发送\n数据块1"}, - {"id":"initiator_to_request2","fromNode":"batch_manager","fromSide":"right","toNode":"batch_request2","toSide":"left","label":"并发发送\n数据块2"}, - {"id":"initiator_to_request3","fromNode":"batch_manager","fromSide":"right","toNode":"batch_request3","toSide":"left","label":"并发发送\n数据块3"}, - {"id":"b5a17c0afede8e4a","fromNode":"data_general_core","fromSide":"right","toNode":"133214da264cfe72","toSide":"bottom"}, - {"id":"2ad5991c43fd6098","fromNode":"data_general_core","fromSide":"right","toNode":"821e415b6438e20d","toSide":"top"}, - {"id":"caa45c92a135042c","fromNode":"data_general_core","fromSide":"right","toNode":"core_functions","toSide":"left"}, - {"id":"09c7b9957992d62d","fromNode":"data_general_core","fromSide":"right","toNode":"b31695207931d96e","toSide":"left"}, - {"id":"adfa1cca1009ff43","fromNode":"data_general_core","fromSide":"right","toNode":"5c4357fc2216ea51","toSide":"left"}, - {"id":"ef995a514a2210bb","fromNode":"5c4357fc2216ea51","fromSide":"right","toNode":"batch_transfer_group","toSide":"top"}, - {"id":"3d79872a234731c0","fromNode":"cache_node_3","fromSide":"bottom","toNode":"batch_transfer_group","toSide":"top"}, - {"id":"9094221953b6c685","fromNode":"write_task_mem","fromSide":"top","toNode":"b0205b4457afeb2b","toSide":"bottom"}, - {"id":"77ec04f5deef7cee","fromNode":"write_task_mem","fromSide":"left","toNode":"1ec171d545e8995d","toSide":"top"}, - {"id":"7b99fb72410f07d9","fromNode":"06d4a92778dd83c8","fromSide":"bottom","toNode":"20145fd68e8aaa75","toSide":"top"}, - {"id":"df9b4bc9170fdec1","fromNode":"20145fd68e8aaa75","fromSide":"right","toNode":"4dbe01dc59cea4c2","toSide":"left"}, - {"id":"61e0637af4beba94","fromNode":"f515ecb9aee18fc7","fromSide":"left","toNode":"4dbe01dc59cea4c2","toSide":"left"}, - {"id":"f7105db89ffabd1e","fromNode":"20145fd68e8aaa75","fromSide":"bottom","toNode":"e2576a54f3f852b3","toSide":"top"}, - {"id":"7504b1b3a99e992c","fromNode":"4dbe01dc59cea4c2","fromSide":"right","toNode":"97d3d9fd7432a861","toSide":"bottom","label":"获取到handle"}, - {"id":"a993a3f4d7b2211d","fromNode":"97d3d9fd7432a861","fromSide":"left","toNode":"e2576a54f3f852b3","toSide":"right"}, - {"id":"a996588f6c59c88f","fromNode":"e2576a54f3f852b3","fromSide":"bottom","toNode":"155106edf5eb3cd7","toSide":"top"}, - {"id":"a42104592fedd4c7","fromNode":"97d3d9fd7432a861","fromSide":"right","toNode":"write_task_mem","toSide":"bottom"}, - {"id":"c45aaa564ae87a7c","fromNode":"97d3d9fd7432a861","fromSide":"right","toNode":"write_task_file","toSide":"bottom"}, - {"id":"write_flow_1","fromNode":"20145fd68e8aaa75","fromSide":"top","toNode":"06d4a92778dd83c8","toSide":"bottom","label":"初始化完成"}, - {"id":"write_flow_2","fromNode":"06d4a92778dd83c8","fromSide":"right","toNode":"f515ecb9aee18fc7","toSide":"left","label":"首个分片写入完成"}, - {"id":"write_flow_5","fromNode":"e2576a54f3f852b3","fromSide":"left","toNode":"155106edf5eb3cd7","toSide":"left","label":"检查完成状态"}, - {"id":"86a2aa913f7bd3d9","fromNode":"223edf4677db9339","fromSide":"bottom","toNode":"06d4a92778dd83c8","toSide":"top"} - ] -} \ No newline at end of file diff --git a/design.canvas b/design.canvas index 346eb9d..47e8de4 100755 --- a/design.canvas +++ b/design.canvas @@ -1,103 +1,101 @@ { - "nodes":[ - {"id":"cb82b904dab26671","type":"group","x":-3400,"y":-960,"width":4820,"height":3520,"label":"data"}, - {"id":"batch_transfer_group","type":"group","x":-1560,"y":120,"width":2940,"height":1900,"label":"Batch数据传输实现"}, - {"id":"7a2427112a116cd3","x":-3260,"y":160,"width":1464,"height":2340,"type":"group","label":"WriteSplitDataTaskGroup"}, - {"id":"batch_receiver_group","type":"group","x":80,"y":200,"width":1240,"height":1560,"label":"接收端 [DataGeneral]"}, - {"id":"data_write_flow","type":"group","x":-1600,"y":-600,"width":2680,"height":520,"label":"数据写入流程"}, - {"id":"batch_sender_group","type":"group","x":-1500,"y":200,"width":1320,"height":1000,"label":"写入端 [DataGeneral]"}, - {"id":"c03f87b1d9551659","type":"group","x":180,"y":282,"width":1110,"height":878,"label":"DataGeneral::rpc_handle_batch_data"}, - {"id":"storage_write_flow","type":"group","x":0,"y":-540,"width":1020,"height":400,"label":"存储节点写入流程"}, - {"id":"7127ed217f71f72d","type":"group","x":-3240,"y":1180,"width":1010,"height":375,"label":"fn register_handle("}, - {"id":"general_phase2","type":"text","text":"General阶段2:调度\n- 生成unique_id\n- 发送调度请求\n- 等待决策返回","x":-1560,"y":-380,"width":200,"height":100,"color":"1"}, - {"id":"storage_node_3","type":"text","text":"存储节点1","x":-425,"y":-550,"width":150,"height":60,"color":"3"}, - {"id":"general_phase1","type":"text","text":"General阶段1:准备\n- 初始化DataItems\n- 计算数据大小\n- 创建SharedMemHolder","x":-1560,"y":-510,"width":200,"height":100,"color":"1"}, - {"id":"general_phase3","type":"text","text":"General阶段3:分发\n- 解析调度决策\n- 创建写入任务组\n- 初始化并发控制","x":-1560,"y":-210,"width":200,"height":100,"color":"1"}, - {"id":"b31695207931d96e","type":"text","text":"## fn get_or_del_data\n- 数据检索和删除\n- 资源清理\n- 缓存一致性\n- 并发访问控制","x":-2290,"y":-622,"width":330,"height":156,"color":"4"}, - {"id":"133214da264cfe72","type":"text","text":"## struct DataGeneral\n- 提供数据读写接口\n- 管理元数据\n- 协调各子模块功能\n- 错误处理和恢复\n- 资源生命周期","x":-2760,"y":-680,"width":340,"height":214,"color":"4"}, - {"id":"core_functions","type":"text","text":"## fn write_data\n- 同步/异步写入\n- 数据完整性保证\n- 分片并行写入\n- 缓存节点同步\n- 错误重试机制","x":-2405,"y":-427,"width":280,"height":275,"color":"4"}, - {"id":"821e415b6438e20d","type":"text","text":"## struct DataSplit\n- 数据分片管理\n- 分片信息维护\n- 分片操作协调\n- 存储节点分配\n- 局部性优化","x":-2932,"y":-92,"width":342,"height":158,"color":"4"}, - {"id":"1ec171d545e8995d","type":"text","text":"## SharedMemHolder\n- 共享内存数据访问\n- 资源自动管理","x":-3085,"y":794,"width":300,"height":150}, - {"id":"223edf4677db9339","type":"text","text":"pub struct WriteSplitDataManager {\n // 只存储任务句柄\n handles: DashMap,\n}","x":-3090,"y":1000,"width":610,"height":140}, - {"id":"97d3d9fd7432a861","type":"text","text":"# WriteSplitDataTaskHandle::submit_split() 实现 [异步发送]\n\n## match write_type {\n- WriteSplitDataType::File => 文件写入任务\n- WriteSplitDataType::Mem => 内存写入任务\n}\n\n## 发送任务 [channel阻塞]\ntx.send(task).await","x":-2189,"y":1160,"width":347,"height":445}, - {"id":"e2576a54f3f852b3","type":"text","text":"# process_tasks() 实现 [阻塞循环]\n\n## 循环处理 [select阻塞]\n1. try_complete() 检查完成状态\n2. tokio::select! {\n - rx.recv() => 接收新任务\n - futures::future::select_all(tasks) => 等待任务完成\n}\n\n## 完成条件\n- current_size >= expected_size\n- 返回 proto::DataItem","x":-3035,"y":1820,"width":377,"height":460}, - {"id":"155106edf5eb3cd7","type":"text","text":"# try_complete() 实现 [同步检查]\n\n## 返回 Option\n- ToFile => proto::DataItem::new_file_data()\n- ToMem => proto::DataItem::new_mem_data()","x":-3074,"y":2300,"width":455,"height":180}, - {"id":"data_general_core","type":"text","text":"# 数据管理核心模块\n- 数据流向控制\n- 并行结构管理\n- 错误处理链\n- 资源管理","x":-3050,"y":-406,"width":330,"height":234,"color":"4"}, - {"id":"data_item","type":"text","text":"# 数据项处理\n\n## enum WriteSplitDataTaskGroup\n- 管理数据分片写入任务组\n- 分片合并优化\n- 状态同步\n- 并行控制\n","x":-2990,"y":180,"width":450,"height":280,"color":"3"}, - {"id":"f515ecb9aee18fc7","type":"text","text":"# 后续写入 [异步执行]\n\n## 状态管理\n- 写入任务追踪\n- 并发控制\n- 写入顺序保证","x":-2552,"y":1218,"width":302,"height":275}, - {"id":"06d4a92778dd83c8","type":"text","text":"# 第一个分片开始写入 [阻塞执行]\n\n## 初始化写入\nfn start_first_split(data: Vec) -> Result<(), WSError> {\n let task = self.build_task(data, 0);\n self.tasks.push(task);\n self.current_size += data.len();\n Ok(())\n}\n\n## 错误处理\n- 写入失败记录日志\n- 返回具体错误类型","x":-3220,"y":1201,"width":455,"height":310}, - {"id":"write_task_file","type":"text","text":"# ToFile 写入流程 [阻塞执行]\n\n## WriteSplitDataTaskGroup::ToFile\n- file_path: PathBuf\n- tasks: Vec>\n- rx: mpsc::Receiver>\n- expected_size: usize\n- current_size: usize\n\n## 操作流程 [文件IO阻塞]\n1. OpenOptions::new()\n .create(true)\n .write(true)\n2. seek(offset)\n3. write_all(data)\n4. 错误记录:\n tracing::error!(\"Failed to write file data at offset {}\")\n","x":-2216,"y":544,"width":400,"height":400,"color":"1"}, - {"id":"write_task_mem","type":"text","text":"# ToMem 写入流程 [阻塞执行]\n\n## WriteSplitDataTaskGroup::ToMem\n- shared_mem: SharedMemHolder\n- tasks: Vec>\n- rx: mpsc::Receiver>\n- expected_size: usize\n- current_size: usize\n\n## 操作流程 [内存写入阻塞]\n1. shared_mem.write(offset, data)\n2. 错误记录:\n tracing::error!(\"Failed to write memory data at offset {}\")\n","x":-2650,"y":526,"width":400,"height":436,"color":"2"}, - {"id":"b0205b4457afeb2b","type":"text","text":"## SharedMemOwnedAccess\n- 共享内存所有权控制\n- 访问安全保证\n- 生命周期管理","x":-2330,"y":242,"width":364,"height":178}, - {"id":"4dbe01dc59cea4c2","type":"text","text":"pub struct WriteSplitDataTaskHandle {\n tx: mpsc::Sender>,\n write_type: WriteSplitDataType,\n}","x":-2552,"y":1700,"width":418,"height":160}, - {"id":"20145fd68e8aaa75","type":"text","text":"# 构造 [同步初始化]\n\n## 任务组初始化\nfn new_task_group(type_: WriteSplitDataType) -> Self {\n let (tx, rx) = mpsc::channel(32);\n Self {\n type_,\n tasks: Vec::new(),\n rx,\n expected_size: 0,\n current_size: 0,\n }\n}\n\n## 参数验证\n- 检查写入类型\n- 验证初始参数","x":-3185,"y":1580,"width":450,"height":220}, - {"id":"batch_manager","type":"text","text":"# BatchTransfer\n\n## 核心字段\n- unique_id: Vec\n- version: u64\n- block_type: BatchDataBlockType\n- total_blocks: u32\n- data_sender: mpsc::Sender\n- write_task: JoinHandle\n\n## 主要方法\n1. new()\n - 创建数据传输channel\n - 计算数据分片\n - 启动写入任务\n2. add_block()\n - 通过channel发送数据块\n - 检查完成状态\n3. complete()\n - 等待写入任务完成\n - 发送结果通知","x":-1100,"y":744,"width":300,"height":400,"color":"1"}, - {"id":"storage_node_4","type":"text","text":"存储节点2","x":-420,"y":-480,"width":150,"height":60,"color":"3"}, - {"id":"storage_node_5","type":"text","text":"存储节点3","x":-420,"y":-400,"width":150,"height":60,"color":"3"}, - {"id":"master_node","type":"text","text":"Master节点 [DataMaster]\n- schedule_data()\n1. 生成DataSetMeta\n2. 创建DataSplits\n3. 分配存储节点\n4. 返回调度决策","x":-1100,"y":-510,"width":200,"height":160,"color":"2"}, - {"id":"storage_group","type":"text","text":"存储节点组","x":-620,"y":-510,"width":150,"height":60,"color":"3"}, - {"id":"cache_group","type":"text","text":"缓存节点组","x":-620,"y":-310,"width":150,"height":60,"color":"5"}, - {"id":"cache_node_1","type":"text","text":"缓存节点1","x":-420,"y":-360,"width":150,"height":60,"color":"5"}, - {"id":"cache_node_2","type":"text","text":"缓存节点2","x":-420,"y":-280,"width":150,"height":60,"color":"5"}, - {"id":"cache_node_3","type":"text","text":"缓存节点3","x":-420,"y":-200,"width":150,"height":60,"color":"5"}, - {"id":"storage_node_1","type":"text","text":"存储节点1\n接收层:\n- 接收分片请求\n- 版本号验证\n- 数据完整性校验\n写入任务层:\n- 分片范围验证\n- 并发写入控制\n- 错误重试机制\n本地存储层:\n- 数据持久化\n- 版本管理\n- 空间回收\n结果返回:\n- 写入状态\n- 远程版本号\n- 错误信息","x":40,"y":-500,"width":200,"height":280,"color":"1"}, - {"id":"write_task_1","type":"text","text":"写入任务1\n- 分片范围验证\n- 数据完整性检查\n- 并发写入控制\n- 错误重试","x":380,"y":-500,"width":200,"height":120,"color":"2"}, - {"id":"write_result_1","type":"text","text":"写入结果1\n- 成功/失败\n- 远程版本号\n- 错误信息","x":640,"y":-280,"width":200,"height":100,"color":"4"}, - {"id":"batch_request1","type":"text","text":"# BatchDataRequest(1)\n- request_id\n- block_type\n- block_index: 0\n- data","x":-140,"y":290,"width":250,"height":240,"color":"2"}, - {"id":"batch_request2","type":"text","text":"# BatchDataRequest(2)\n- request_id\n- block_type\n- block_index: 1\n- data","x":-110,"y":620,"width":250,"height":120,"color":"2"}, - {"id":"batch_request3","type":"text","text":"# BatchDataRequest(3)\n- request_id\n- block_type\n- block_index: 2\n- data","x":-140,"y":824,"width":250,"height":120,"color":"2"}, - {"id":"write_split_init","type":"text","text":"1. 创建WriteSplitDataTaskGroup\n```rust\nlet (group, handle) = WriteSplitDataTaskGroup::new(\n unique_id,\n splits,\n block_type,\n).await?;\n```\n- unique_id: 任务唯一标识\n- splits: 数据分片范围\n- block_type: 写入类型(File/Mem)","x":670,"y":376,"width":600,"height":310,"color":"1"}, - {"id":"fac5077e07b5a23e","type":"text","text":"1. 使用WriteSplitDataTaskManager\n查询handle","x":260,"y":302,"width":300,"height":160,"color":"2"}, - {"id":"write_split_handle","type":"text","text":"2. 使用WriteSplitDataTaskHandle\n```rust\nhandle.submit_split(\n DataSplitIdx { offset },\n data_item\n).await;\n```\n- 通过handle异步提交写入任务\n- 可以并发提交多个分片\n- handle可以跨线程使用","x":230,"y":533,"width":360,"height":306,"color":"2"}, - {"id":"write_split_complete","type":"text","text":"\nprocess_tasks(独立task)\n- 循环等待新任务\n- 执行写入操作\n- 检查完成状态","x":860,"y":1360,"width":380,"height":306,"color":"4"}, - {"id":"bd7d0a299fe215df","x":230,"y":948,"width":310,"height":156,"type":"text","text":"struct SharedWithBatchHandler\n记录最新的request responsor\n\n旧的responsor直接返回"}, - {"id":"batch_initiator","type":"text","text":"# 发起节点 [DataGeneral]\n\n## call_batch_data()\n- 分割数据块(1MB)\n- 创建有界任务池\n- 建议并发数=3\n- 任务队列控制","x":-1400,"y":310,"width":300,"height":300,"color":"1"}, - {"id":"864d06859ca25962","type":"text","text":"spawn 一个独立task,调用handle的等待结束接口\n\n结束之后,从share状态里取出最新responsor,响应完整接收成功信息","x":640,"y":944,"width":300,"height":176,"color":"2"} - ], - "edges":[ - {"id":"master_to_phase2","fromNode":"master_node","fromSide":"left","toNode":"general_phase2","toSide":"right","label":"调度决策\n- version\n- splits\n- nodes"}, - {"id":"phase2_to_phase3","fromNode":"general_phase2","fromSide":"bottom","toNode":"general_phase3","toSide":"top","label":"决策信息"}, - {"id":"phase3_to_storage","fromNode":"general_phase3","fromSide":"right","toNode":"storage_group","toSide":"left","label":"分发存储任务"}, - {"id":"storage_to_nodes","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_3","toSide":"left"}, - {"id":"storage_to_nodes2","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_4","toSide":"left"}, - {"id":"storage_to_nodes3","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_5","toSide":"left"}, - {"id":"phase3_to_cache","fromNode":"general_phase3","fromSide":"right","toNode":"cache_group","toSide":"left","label":"分发缓存任务"}, - {"id":"cache_to_nodes","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_1","toSide":"left"}, - {"id":"cache_to_nodes2","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_2","toSide":"left"}, - {"id":"cache_to_nodes3","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_3","toSide":"left"}, - {"id":"initiator_to_manager","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_manager","toSide":"left","label":"创建批量传输"}, - {"id":"initiator_to_request1","fromNode":"batch_manager","fromSide":"right","toNode":"batch_request1","toSide":"left","label":"并发发送\n数据块1"}, - {"id":"initiator_to_request2","fromNode":"batch_manager","fromSide":"right","toNode":"batch_request2","toSide":"left","label":"并发发送\n数据块2"}, - {"id":"initiator_to_request3","fromNode":"batch_manager","fromSide":"right","toNode":"batch_request3","toSide":"left","label":"并发发送\n数据块3"}, - {"id":"b5a17c0afede8e4a","fromNode":"data_general_core","fromSide":"right","toNode":"133214da264cfe72","toSide":"bottom"}, - {"id":"2ad5991c43fd6098","fromNode":"data_general_core","fromSide":"right","toNode":"821e415b6438e20d","toSide":"top"}, - {"id":"caa45c92a135042c","fromNode":"data_general_core","fromSide":"right","toNode":"core_functions","toSide":"left"}, - {"id":"09c7b9957992d62d","fromNode":"data_general_core","fromSide":"right","toNode":"b31695207931d96e","toSide":"left"}, - {"id":"3d79872a234731c0","fromNode":"cache_node_3","fromSide":"bottom","toNode":"batch_transfer_group","toSide":"top"}, - {"id":"9094221953b6c685","fromNode":"write_task_mem","fromSide":"top","toNode":"b0205b4457afeb2b","toSide":"bottom"}, - {"id":"77ec04f5deef7cee","fromNode":"write_task_mem","fromSide":"left","toNode":"1ec171d545e8995d","toSide":"top"}, - {"id":"7b99fb72410f07d9","fromNode":"06d4a92778dd83c8","fromSide":"bottom","toNode":"20145fd68e8aaa75","toSide":"top"}, - {"id":"df9b4bc9170fdec1","fromNode":"20145fd68e8aaa75","fromSide":"right","toNode":"4dbe01dc59cea4c2","toSide":"left"}, - {"id":"61e0637af4beba94","fromNode":"f515ecb9aee18fc7","fromSide":"left","toNode":"4dbe01dc59cea4c2","toSide":"left"}, - {"id":"f7105db89ffabd1e","fromNode":"20145fd68e8aaa75","fromSide":"bottom","toNode":"e2576a54f3f852b3","toSide":"top"}, - {"id":"7504b1b3a99e992c","fromNode":"4dbe01dc59cea4c2","fromSide":"right","toNode":"97d3d9fd7432a861","toSide":"bottom","label":"获取到handle"}, - {"id":"a993a3f4d7b2211d","fromNode":"97d3d9fd7432a861","fromSide":"left","toNode":"e2576a54f3f852b3","toSide":"right"}, - {"id":"a996588f6c59c88f","fromNode":"e2576a54f3f852b3","fromSide":"bottom","toNode":"155106edf5eb3cd7","toSide":"top"}, - {"id":"a42104592fedd4c7","fromNode":"97d3d9fd7432a861","fromSide":"right","toNode":"write_task_mem","toSide":"bottom"}, - {"id":"c45aaa564ae87a7c","fromNode":"97d3d9fd7432a861","fromSide":"right","toNode":"write_task_file","toSide":"bottom"}, - {"id":"write_flow_1","fromNode":"20145fd68e8aaa75","fromSide":"top","toNode":"06d4a92778dd83c8","toSide":"bottom","label":"初始化完成"}, - {"id":"write_flow_2","fromNode":"06d4a92778dd83c8","fromSide":"right","toNode":"f515ecb9aee18fc7","toSide":"left","label":"首个分片写入完成"}, - {"id":"write_flow_5","fromNode":"e2576a54f3f852b3","fromSide":"left","toNode":"155106edf5eb3cd7","toSide":"left","label":"检查完成状态"}, - {"id":"86a2aa913f7bd3d9","fromNode":"223edf4677db9339","fromSide":"bottom","toNode":"06d4a92778dd83c8","toSide":"top"}, - {"id":"write_1_4","fromNode":"write_split_init","fromSide":"bottom","toNode":"write_split_complete","toSide":"top","label":"等待完成"}, - {"id":"a99c309f19fd9853","fromNode":"batch_request1","fromSide":"right","toNode":"fac5077e07b5a23e","toSide":"left"}, - {"id":"90a20648ba7c7b0d","fromNode":"fac5077e07b5a23e","fromSide":"right","toNode":"write_split_init","toSide":"left"}, - {"id":"c8e5b437e8d768ef","fromNode":"write_split_init","fromSide":"top","toNode":"fac5077e07b5a23e","toSide":"right","label":"插入handle"}, - {"id":"e9443a3b677ce562","fromNode":"fac5077e07b5a23e","fromSide":"bottom","toNode":"write_split_handle","toSide":"top"}, - {"id":"aac9c2ea6e65a686","fromNode":"batch_request2","fromSide":"right","toNode":"fac5077e07b5a23e","toSide":"left"}, - {"id":"886cbf70f878e962","fromNode":"batch_request3","fromSide":"right","toNode":"fac5077e07b5a23e","toSide":"left"}, - {"id":"612e4d1938f911b0","fromNode":"write_split_handle","fromSide":"right","toNode":"write_split_init","toSide":"left","label":"提交分片"}, - {"id":"dbff6534cbb03fce","fromNode":"864d06859ca25962","fromSide":"left","toNode":"bd7d0a299fe215df","toSide":"right"}, - {"id":"9c31b6c98bcb3875","fromNode":"batch_request3","fromSide":"right","toNode":"bd7d0a299fe215df","toSide":"top","label":"记录responsor"} - ] + "nodes":[ + {"id":"cb82b904dab26671","type":"group","x":-3400,"y":-960,"width":4560,"height":3500,"label":"data"}, + {"id":"batch_transfer_group","type":"group","x":-1560,"y":120,"width":2300,"height":1600,"label":"Batch数据传输实现"}, + {"id":"write_split_group","type":"group","x":-3260,"y":120,"width":1470,"height":2360,"label":"WriteSplitDataTaskGroup 写入流程"}, + {"id":"data_write_flow","type":"group","x":-1600,"y":-600,"width":2680,"height":520,"label":"数据写入流程"}, + {"id":"2e84a4ef9e137fb7","type":"group","x":-1000,"y":800,"width":1495,"height":820,"label":"batch handler 流程"}, + {"id":"storage_write_flow","type":"group","x":0,"y":-540,"width":1020,"height":400,"label":"存储节点写入流程"}, + {"id":"data_general_core","type":"text","text":"# 数据管理核心模块\n- 数据流向控制\n- 并行结构管理\n- 错误处理链\n- 资源管理","x":-3050,"y":-406,"width":330,"height":234,"color":"4"}, + {"id":"821e415b6438e20d","type":"text","text":"## struct DataSplit\n- 数据分片管理\n- 分片信息维护\n- 分片操作协调\n- 存储节点分配\n- 局部性优化","x":-2932,"y":-92,"width":342,"height":158,"color":"4"}, + {"id":"5c4357fc2216ea51","type":"text","text":"## batch写入接口\n- 缓存主动推送\n- 并行写入支持\n- 错误恢复机制\n- 内存复用优化","x":-2180,"y":-92,"width":250,"height":120,"color":"4"}, + {"id":"b31695207931d96e","type":"text","text":"## fn get_or_del_data\n- 数据检索和删除\n- 资源清理\n- 缓存一致性\n- 并发访问控制","x":-2290,"y":-622,"width":330,"height":156,"color":"4"}, + {"id":"133214da264cfe72","type":"text","text":"## struct DataGeneral\n- 提供数据读写接口\n- 管理元数据\n- 协调各子模块功能\n- 错误处理和恢复\n- 资源生命周期","x":-2760,"y":-680,"width":340,"height":214,"color":"4"}, + {"id":"core_functions","type":"text","text":"## fn write_data\n- 同步/异步写入\n- 数据完整性保证\n- 分片并行写入\n- 缓存节点同步\n- 错误重试机制","x":-2405,"y":-427,"width":280,"height":275,"color":"4"}, + {"id":"general_phase2","type":"text","text":"General阶段2:调度\n- 生成unique_id\n- 发送调度请求\n- 等待决策返回","x":-1560,"y":-380,"width":200,"height":100,"color":"1"}, + {"id":"cache_group","type":"text","text":"缓存节点组","x":-620,"y":-310,"width":150,"height":60,"color":"5"}, + {"id":"general_phase3","type":"text","text":"General阶段3:分发\n- 解析调度决策\n- 创建写入任务组\n- 初始化并发控制","x":-1560,"y":-210,"width":200,"height":100,"color":"1"}, + {"id":"storage_node_3","type":"text","text":"存储节点1","x":-425,"y":-550,"width":150,"height":60,"color":"3"}, + {"id":"master_node","type":"text","text":"Master节点 [DataMaster]\n- schedule_data()\n1. 生成DataSetMeta\n2. 创建DataSplits\n3. 分配存储节点\n4. 返回调度决策","x":-1100,"y":-510,"width":200,"height":160,"color":"2"}, + {"id":"storage_group","type":"text","text":"存储节点组","x":-620,"y":-510,"width":150,"height":60,"color":"3"}, + {"id":"storage_node_4","type":"text","text":"存储节点2","x":-420,"y":-480,"width":150,"height":60,"color":"3"}, + {"id":"storage_node_5","type":"text","text":"存储节点3","x":-420,"y":-400,"width":150,"height":60,"color":"3"}, + {"id":"cache_node_1","type":"text","text":"缓存节点1","x":-420,"y":-360,"width":150,"height":60,"color":"5"}, + {"id":"cache_node_2","type":"text","text":"缓存节点2","x":-420,"y":-280,"width":150,"height":60,"color":"5"}, + {"id":"cache_node_3","type":"text","text":"缓存节点3","x":-420,"y":-200,"width":150,"height":60,"color":"5"}, + {"id":"storage_node_1","type":"text","text":"存储节点1\n接收层:\n- 接收分片请求\n- 版本号验证\n- 数据完整性校验\n写入任务层:\n- 分片范围验证\n- 并发写入控制\n- 错误重试机制\n本地存储层:\n- 数据持久化\n- 版本管理\n- 空间回收\n结果返回:\n- 写入状态\n- 远程版本号\n- 错误信息","x":40,"y":-500,"width":200,"height":280,"color":"1"}, + {"id":"write_task_1","type":"text","text":"写入任务1\n- 分片范围验证\n- 数据完整性检查\n- 并发写入控制\n- 错误重试","x":380,"y":-500,"width":200,"height":120,"color":"2"}, + {"id":"write_result_1","type":"text","text":"写入结果1\n- 成功/失败\n- 远程版本号\n- 错误信息","x":640,"y":-280,"width":200,"height":100,"color":"4"}, + {"id":"general_phase1","type":"text","text":"General阶段1:准备\n- 初始化DataItems\n- 计算数据大小\n- 创建SharedMemHolder","x":-1560,"y":-510,"width":200,"height":100,"color":"1"}, + {"id":"batch_request1","type":"text","text":"# BatchDataRequest(1)\n- request_id\n- block_type\n- block_index: 0\n- data","x":-620,"y":180,"width":250,"height":240,"color":"2"}, + {"id":"batch_request2","type":"text","text":"# BatchDataRequest(2)\n- request_id\n- block_type\n- block_index: 1\n- data","x":-620,"y":460,"width":250,"height":120,"color":"2"}, + {"id":"batch_response1","type":"text","text":"# BatchDataResponse(1)\n- request_id\n- success\n- error_message\n- version","x":-270,"y":180,"width":250,"height":240,"color":"3"}, + {"id":"batch_response2","type":"text","text":"# BatchDataResponse(2)\n- request_id\n- success\n- error_message\n- version","x":-270,"y":460,"width":310,"height":60,"color":"3"}, + {"id":"batch_response3","type":"text","text":"# BatchDataResponse(3)\n- request_id\n- success\n- error_message\n- version","x":-270,"y":600,"width":250,"height":120,"color":"3"}, + {"id":"batch_request3","type":"text","text":"# BatchDataRequest(3)\n- request_id\n- block_type\n- block_index: 2\n- data","x":-620,"y":600,"width":250,"height":120,"color":"2"}, + {"id":"batch_handler_3","type":"text","text":"# 3. 创建数据分片\n\n## 分片准备\n- 创建分片列表\n * 计算offset\n * 记录分片范围\n- 创建mpsc通道\n * 大小 = splits.len()\n * 发送数据到通道","x":-495,"y":820,"width":350,"height":300,"color":"3"}, + {"id":"batch_handler_5","type":"text","text":"# 5. 等待写入完成\n\n## task_group.join()\n- 成功情况\n * 返回成功响应\n * 更新版本号\n- 失败情况\n * 记录警告\n * 返回错误信息","x":80,"y":900,"width":300,"height":300,"color":"5"}, + {"id":"batch_handler_4","type":"text","text":"# 4. 创建写入任务组\n\n## WriteSplitDataTaskGroup\n- 创建任务组\n * unique_id\n * splits\n * rx channel\n * block_type\n- 错误处理\n * 记录警告\n * 返回失败响应","x":-320,"y":1200,"width":300,"height":360,"color":"4"}, + {"id":"batch_handler_2","type":"text","text":"# 2. 验证请求数据\n\n## verify_request()\n- 验证请求参数\n * block_type\n * block_index\n * data完整性\n- 错误处理\n * 记录警告\n * 返回失败响应","x":-795,"y":1230,"width":355,"height":330,"color":"2"}, + {"id":"batch_handler_1","type":"text","text":"# 1. 获取元信息\n\n## get_metadata()\n- 获取元数据\n * unique_id\n * version\n- 错误处理\n * 记录警告\n * 返回失败响应","x":-945,"y":860,"width":300,"height":300,"color":"1"}, + {"id":"batch_initiator","type":"text","text":"# 发起节点 [DataGeneral]\n\n## call_batch_data()\n- 分割数据块(1MB)\n- 创建有界任务池\n- 建议并发数=3\n- 任务队列控制","x":-1100,"y":190,"width":300,"height":300,"color":"1"}, + {"id":"batch_manager","type":"text","text":"# BatchManager\n\n## 管理功能\n- 创建传输任务\n- 分配请求ID\n- 跟踪传输状态\n- 错误恢复\n\n## 数据处理\n- 分块管理\n- 数据校验\n- 内存复用\n- 并发控制","x":-1460,"y":420,"width":300,"height":300,"color":"1"}, + {"id":"write_task_file","type":"text","text":"# ToFile 写入流程 [阻塞执行]\n\n## WriteSplitDataTaskGroup::ToFile\n- file_path: PathBuf\n- tasks: Vec>\n- rx: mpsc::Receiver>\n- expected_size: usize\n- current_size: usize\n\n## 操作流程 [文件IO阻塞]\n1. OpenOptions::new()\n .create(true)\n .write(true)\n2. seek(offset)\n3. write_all(data)\n4. 错误记录:\n tracing::error!(\"Failed to write file data at offset {}\")\n","x":-2216,"y":544,"width":400,"height":400,"color":"1"}, + {"id":"write_task_mem","type":"text","text":"# ToMem 写入流程 [阻塞执行]\n\n## WriteSplitDataTaskGroup::ToMem\n- shared_mem: SharedMemHolder\n- tasks: Vec>\n- rx: mpsc::Receiver>\n- expected_size: usize\n- current_size: usize\n\n## 操作流程 [内存写入阻塞]\n1. shared_mem.write(offset, data)\n2. 错误记录:\n tracing::error!(\"Failed to write memory data at offset {}\")\n","x":-2650,"y":526,"width":400,"height":436,"color":"2"}, + {"id":"data_item","type":"text","text":"# 数据项处理\n\n## enum WriteSplitDataTaskGroup\n- 管理数据分片写入任务组\n- 分片合并优化\n- 状态同步\n- 并行控制\n","x":-2990,"y":180,"width":450,"height":280,"color":"3"}, + {"id":"b0205b4457afeb2b","type":"text","text":"## SharedMemOwnedAccess\n- 共享内存所有权控制\n- 访问安全保证\n- 生命周期管理","x":-2330,"y":242,"width":364,"height":178}, + {"id":"e2576a54f3f852b3","type":"text","text":"# process_tasks() 实现 [阻塞循环]\n\n## 循环处理 [select阻塞]\n1. try_complete() 检查完成状态\n2. tokio::select! {\n - rx.recv() => 接收新任务\n - futures::future::select_all(tasks) => 等待任务完成\n}\n\n## 完成条件\n- current_size >= expected_size\n- 返回 proto::DataItem","x":-3035,"y":1820,"width":377,"height":420}, + {"id":"97d3d9fd7432a861","type":"text","text":"# submit_split() 实现 [异步发送]\n\n## match write_type {\n- WriteSplitDataType::File => 文件写入任务\n- WriteSplitDataType::Mem => 内存写入任务\n}\n\n## 发送任务 [channel阻塞]\ntx.send(task).await","x":-2227,"y":1175,"width":355,"height":420}, + {"id":"4dbe01dc59cea4c2","type":"text","text":"# 任务状态 [状态追踪]\n\n## 状态管理\n- 任务状态记录\n- 写入进度更新\n- 完成状态检查","x":-2660,"y":1432,"width":250,"height":200}, + {"id":"20145fd68e8aaa75","type":"text","text":"# 构造 [同步初始化]\n\n## 任务组初始化\nfn new_task_group(type_: WriteSplitDataType) -> Self {\n let (tx, rx) = mpsc::channel(32);\n Self {\n type_,\n tasks: Vec::new(),\n rx,\n expected_size: 0,\n current_size: 0,\n }\n}\n\n## 参数验证\n- 检查写入类型\n- 验证初始参数","x":-3085,"y":1585,"width":455,"height":200}, + {"id":"1ec171d545e8995d","type":"text","text":"## SharedMemHolder\n- 共享内存数据访问\n- 资源自动管理","x":-2880,"y":1025,"width":300,"height":150}, + {"id":"f515ecb9aee18fc7","type":"text","text":"# 后续写入 [异步执行]\n\n## 状态管理\n- 写入任务追踪\n- 并发控制\n- 写入顺序保证","x":-2685,"y":1315,"width":250,"height":200}, + {"id":"06d4a92778dd83c8","type":"text","text":"# 第一个分片开始写入 [阻塞执行]\n\n## 初始化写入\nfn start_first_split(data: Vec) -> Result<(), WSError> {\n let task = self.build_task(data, 0);\n self.tasks.push(task);\n self.current_size += data.len();\n Ok(())\n}\n\n## 错误处理\n- 写入失败记录日志\n- 返回具体错误类型","x":-3185,"y":1200,"width":455,"height":310}, + {"id":"155106edf5eb3cd7","type":"text","text":"# try_complete() 实现 [同步检查]\n\n## 返回 Option\n- ToFile => proto::DataItem::new_file_data()\n- ToMem => proto::DataItem::new_mem_data()","x":-3074,"y":2300,"width":455,"height":180} + ], + "edges":[ + {"id":"verify_flow_1","fromNode":"batch_handler_4","fromSide":"right","toNode":"batch_handler_5","toSide":"left","label":"块状态更新"}, + {"id":"master_to_phase2","fromNode":"master_node","fromSide":"left","toNode":"general_phase2","toSide":"right","label":"调度决策\n- version\n- splits\n- nodes"}, + {"id":"phase2_to_phase3","fromNode":"general_phase2","fromSide":"bottom","toNode":"general_phase3","toSide":"top","label":"决策信息"}, + {"id":"phase3_to_storage","fromNode":"general_phase3","fromSide":"right","toNode":"storage_group","toSide":"left","label":"分发存储任务"}, + {"id":"storage_to_nodes","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_3","toSide":"left"}, + {"id":"storage_to_nodes2","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_4","toSide":"left"}, + {"id":"storage_to_nodes3","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_5","toSide":"left"}, + {"id":"phase3_to_cache","fromNode":"general_phase3","fromSide":"right","toNode":"cache_group","toSide":"left","label":"分发缓存任务"}, + {"id":"cache_to_nodes","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_1","toSide":"left"}, + {"id":"cache_to_nodes2","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_2","toSide":"left"}, + {"id":"cache_to_nodes3","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_3","toSide":"left"}, + {"id":"initiator_to_manager","fromNode":"batch_initiator","fromSide":"left","toNode":"batch_manager","toSide":"right","label":"创建批量传输"}, + {"id":"initiator_to_request1","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_request1","toSide":"left","label":"并发发送\n数据块1"}, + {"id":"initiator_to_request2","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_request2","toSide":"left","label":"并发发送\n数据块2"}, + {"id":"initiator_to_request3","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_request3","toSide":"left","label":"并发发送\n数据块3"}, + {"id":"request1_to_response1","fromNode":"batch_request1","fromSide":"right","toNode":"batch_response1","toSide":"left","label":"处理响应"}, + {"id":"request2_to_response2","fromNode":"batch_request2","fromSide":"right","toNode":"batch_response2","toSide":"left","label":"处理响应"}, + {"id":"request3_to_response3","fromNode":"batch_request3","fromSide":"right","toNode":"batch_response3","toSide":"left","label":"处理响应"}, + {"id":"b5a17c0afede8e4a","fromNode":"data_general_core","fromSide":"right","toNode":"133214da264cfe72","toSide":"bottom"}, + {"id":"2ad5991c43fd6098","fromNode":"data_general_core","fromSide":"right","toNode":"821e415b6438e20d","toSide":"top"}, + {"id":"caa45c92a135042c","fromNode":"data_general_core","fromSide":"right","toNode":"core_functions","toSide":"left"}, + {"id":"09c7b9957992d62d","fromNode":"data_general_core","fromSide":"right","toNode":"b31695207931d96e","toSide":"left"}, + {"id":"adfa1cca1009ff43","fromNode":"data_general_core","fromSide":"right","toNode":"5c4357fc2216ea51","toSide":"left"}, + {"id":"ef995a514a2210bb","fromNode":"5c4357fc2216ea51","fromSide":"right","toNode":"batch_transfer_group","toSide":"top"}, + {"id":"3d79872a234731c0","fromNode":"cache_node_3","fromSide":"bottom","toNode":"batch_transfer_group","toSide":"top"}, + {"id":"batch_flow_4_5","fromNode":"batch_handler_4","fromSide":"right","toNode":"batch_handler_5","toSide":"left","label":"BlockStatus"}, + {"id":"handler_1_to_2","fromNode":"batch_handler_1","fromSide":"right","toNode":"batch_handler_2","toSide":"left","label":"元数据信息"}, + {"id":"handler_2_to_3","fromNode":"batch_handler_2","fromSide":"right","toNode":"batch_handler_3","toSide":"left","label":"数据内容"}, + {"id":"handler_3_to_4","fromNode":"batch_handler_3","fromSide":"right","toNode":"batch_handler_4","toSide":"left","label":"分片列表"}, + {"id":"9094221953b6c685","fromNode":"write_task_mem","fromSide":"top","toNode":"b0205b4457afeb2b","toSide":"bottom"}, + {"id":"77ec04f5deef7cee","fromNode":"write_task_mem","fromSide":"bottom","toNode":"1ec171d545e8995d","toSide":"top"}, + {"id":"7b99fb72410f07d9","fromNode":"06d4a92778dd83c8","fromSide":"bottom","toNode":"20145fd68e8aaa75","toSide":"top"}, + {"id":"df9b4bc9170fdec1","fromNode":"20145fd68e8aaa75","fromSide":"right","toNode":"4dbe01dc59cea4c2","toSide":"left"}, + {"id":"61e0637af4beba94","fromNode":"f515ecb9aee18fc7","fromSide":"bottom","toNode":"4dbe01dc59cea4c2","toSide":"left"}, + {"id":"f7105db89ffabd1e","fromNode":"20145fd68e8aaa75","fromSide":"bottom","toNode":"e2576a54f3f852b3","toSide":"top"}, + {"id":"7504b1b3a99e992c","fromNode":"4dbe01dc59cea4c2","fromSide":"right","toNode":"97d3d9fd7432a861","toSide":"bottom","label":"获取到handle"}, + {"id":"a993a3f4d7b2211d","fromNode":"97d3d9fd7432a861","fromSide":"left","toNode":"e2576a54f3f852b3","toSide":"right"}, + {"id":"a996588f6c59c88f","fromNode":"e2576a54f3f852b3","fromSide":"bottom","toNode":"155106edf5eb3cd7","toSide":"top"}, + {"id":"a42104592fedd4c7","fromNode":"97d3d9fd7432a861","fromSide":"right","toNode":"write_task_mem","toSide":"bottom"}, + {"id":"c45aaa564ae87a7c","fromNode":"97d3d9fd7432a861","fromSide":"right","toNode":"write_task_file","toSide":"bottom"}, + {"id":"write_flow_1","fromNode":"20145fd68e8aaa75","fromSide":"top","toNode":"06d4a92778dd83c8","toSide":"bottom","label":"初始化完成"}, + {"id":"write_flow_2","fromNode":"06d4a92778dd83c8","fromSide":"right","toNode":"f515ecb9aee18fc7","toSide":"left","label":"首个分片写入完成"}, + {"id":"write_flow_5","fromNode":"e2576a54f3f852b3","fromSide":"left","toNode":"155106edf5eb3cd7","toSide":"left","label":"检查完成状态"} + ] } \ No newline at end of file diff --git a/review.md b/review.md index 85f5a0b..5a435cc 100755 --- a/review.md +++ b/review.md @@ -1,6 +1,5 @@ # 项目分析与修改计划 - ### 现有 #### DataGeneral @@ -406,277 +405,4 @@ impl WriteSplitDataManager { } ``` -### 2. Batch数据处理流程更新 - -#### 2.1 WriteSplitDataTaskHandle扩展 等待全部完成的函数 - -```rust -impl WriteSplitDataTaskHandle { - ... - - /// 等待所有已提交的写入任务完成 - pub async fn wait_all_tasks(self) -> WSResult<()> { - } -} -``` - -#### 2.2 BatchTransfer 实现 - -```rust -pub struct BatchTransfer { - unique_id: Vec, - version: u64, - block_type: BatchDataBlockType, - total_blocks: u32, - block_size: usize, - data: Arc, // 文件或内存数据源 - write_task: JoinHandle>, -} - -impl BatchTransfer { - /// 创建新的批量传输任务 - pub async fn new( - unique_id: Vec, - version: u64, - data: Arc, - block_size: usize, - manager: Arc, - ) -> WSResult { - // 计算分片信息 - let total_size = data.size().await?; - let total_blocks = (total_size + block_size - 1) / block_size; - let block_type = data.block_type(); - - // 创建写入任务组和handle - let (group, handle) = WriteSplitDataTaskGroup::new( - unique_id.clone(), - calculate_splits(total_blocks as u32, block_size), - block_type, - manager, - ).await; - - // 启动写入任务 - let write_task = tokio::spawn(async move { - let mut current_block = 0; - let mut in_flight_tasks = FuturesUnordered::new(); - - // 循环直到所有数据块都发送完成 - loop { - // 如果还有数据块且未达到最大并发数,则读取并发送新数据块 - while current_block < total_blocks && in_flight_tasks.len() < 32 { - // 读取数据块 - let offset = current_block * block_size; - let size = block_size.min(total_size - offset); - let block_data = data.read_chunk(offset, size).await?; - - // 提交数据到写入任务组 - let submit_future = handle.submit_split( - current_block as usize * block_size, - block_data, - ); - in_flight_tasks.push(submit_future); - current_block += 1; - } - - // 等待任意一个任务完成 - match in_flight_tasks.next().await { - Some(result) => { - // 处理任务结果 - result?; - } - None if current_block >= total_blocks => { - // 所有数据块都已发送且完成 - break; - } - None => { - // 不应该发生:还有数据块但没有运行中的任务 - return Err(WSError::BatchError(WsBatchErr::InternalError { - message: "No in-flight tasks but blocks remaining".into() - })); - } - } - } - - // 等待所有任务完成 - while let Some(result) = in_flight_tasks.next().await { - result?; - } - - // 等待写入任务组处理完所有数据 - handle.wait_all_tasks().await?; - group.process_tasks().await - }); - - Ok(Self { - unique_id, - version, - block_type, - total_blocks: total_blocks as u32, - block_size, - data, - write_task, - }) - } - - /// 等待传输完成 - pub async fn wait_complete(self) -> WSResult { - self.write_task.await? - } -} - -/// 数据源trait -#[async_trait] -pub trait DataSource: Send + Sync + 'static { - /// 获取数据总大小 - async fn size(&self) -> WSResult; - - /// 读取指定范围的数据 - async fn read_chunk(&self, offset: usize, size: usize) -> WSResult>; - - /// 获取数据块类型 - fn block_type(&self) -> BatchDataBlockType; -} - -/// 文件数据源实现 -pub struct FileDataSource { - path: PathBuf, -} - -#[async_trait] -impl DataSource for FileDataSource { - async fn size(&self) -> WSResult { - tokio::fs::metadata(&self.path) - .await - .map(|m| m.len() as usize) - .map_err(|e| WSError::BatchError(WsBatchErr::ReadSourceFailed { - source: format!("{}", self.path.display()), - error: e.to_string(), - })) - } - - async fn read_chunk(&self, offset: usize, size: usize) -> WSResult> { - let mut file = tokio::fs::File::open(&self.path).await?; - let mut buf = vec![0; size]; - file.seek(SeekFrom::Start(offset as u64)).await?; - file.read_exact(&mut buf).await?; - Ok(buf) - } - - fn block_type(&self) -> BatchDataBlockType { - BatchDataBlockType::File - } -} - -/// 内存数据源实现 -pub struct MemDataSource { - data: Arc<[u8]>, -} - -#[async_trait] -impl DataSource for MemDataSource { - async fn size(&self) -> WSResult { - Ok(self.data.len()) - } - - async fn read_chunk(&self, offset: usize, size: usize) -> WSResult> { - Ok(self.data[offset..offset+size].to_vec()) - } - - fn block_type(&self) -> BatchDataBlockType { - BatchDataBlockType::Mem - } -} - -#### 2.3 DataGeneral RPC处理实现 - -```rust -/// 默认数据块大小 (4MB) -const DEFAULT_BLOCK_SIZE: usize = 4 * 1024 * 1024; - -impl DataGeneral { - /// 处理批量数据写入请求 - /// - /// # 处理流程 - /// 1. 使用WriteSplitDataTaskManager查询handle - /// 2. 使用WriteSplitDataTaskHandle提交写入任务 - /// 3. 等待写入完成并返回结果 - pub async fn rpc_handle_batch_data( - &self, - request: BatchDataRequest, - ) -> WSResult<()> { - // 1. 使用WriteSplitDataTaskManager查询handle - let handle = match self.write_manager.get_handle(&request.unique_id) { - Some(handle) => { - // 验证版本号 - if handle.version() != request.version { - tracing::error!( - "Version mismatch for transfer {}, expected {}, got {}", - hex::encode(&request.unique_id), - handle.version(), - request.version - ); - return Err(WSError::BatchError(WsBatchErr::VersionMismatch { - expected: handle.version(), - actual: request.version, - })); - } - handle - } - None => { - // 创建新的写入任务组 - let (group, handle) = WriteSplitDataTaskGroup::new( - request.unique_id.clone(), - calculate_splits(request.total_blocks), - request.block_type, - ).await?; - - // 注册handle - self.write_manager.register_handle( - request.unique_id.clone(), - handle.clone(), - group, - ); - - handle - } - }; - - // 2. 使用WriteSplitDataTaskHandle提交写入任务 - let offset = request.block_idx as usize * DEFAULT_BLOCK_SIZE; - - if let Err(e) = handle.submit_split(offset, request.data).await { - tracing::error!( - "Failed to submit split for transfer {}, block {}: {}", - hex::encode(&request.unique_id), - request.block_idx, - e - ); - return Err(e); - } - - tracing::debug!( - "Successfully submitted block {} for transfer {}", - request.block_idx, - hex::encode(&request.unique_id) - ); - - Ok(()) - } -} - -/// 数据分片索引 -#[derive(Debug, Clone, Copy)] -pub struct DataSplitIdx { - pub offset: usize, -} - -/// 计算数据分片范围 -fn calculate_splits(total_blocks: u32) -> Vec> { - let mut splits = Vec::with_capacity(total_blocks as usize); - for i in 0..total_blocks { - let start = i as usize * DEFAULT_BLOCK_SIZE; - let end = start + DEFAULT_BLOCK_SIZE; - splits.push(start..end); - } - splits -} +### 2. BatchTransfer 的 new 方法 diff --git a/scripts/sync_md_files.py b/scripts/sync_md_files.py index f574558..747dc3c 100644 --- a/scripts/sync_md_files.py +++ b/scripts/sync_md_files.py @@ -6,18 +6,64 @@ import tarfile from pathlib import Path +def backup_files(directory, file_types=( '.canvas')): + # Get current timestamp + timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') + + # Create backup filename + backup_name = f'backup_{timestamp}.tar.gz' + backup_path = Path(directory).parent / backup_name + + # Create tar archive + with tarfile.open(backup_path, 'w:gz') as tar: + # Walk through the directory + for root, _, files in os.walk(directory): + # Filter for target file types + target_files = [f for f in files if f.endswith(file_types)] + + for file in target_files: + file_path = Path(root) / file + # Add file to archive with its relative path + tar.add(file_path, arcname=file_path.relative_to(directory)) + + print(f'Created backup: {backup_path}') + return backup_path def sync_md_files(source_dir, target_dir): - # read source file - toreplace=" " - withcontent=" " - with open(f"{source_dir}/design.canvas") as f: - canvas = f.read() - canvas=canvas.replace(toreplace,withcontent) - with open(f"{source_dir}/design.canvas","w") as f: - f.write(canvas) - - os.system(f"cp -r {source_dir}/design.canvas {target_dir}/design.canvas") + # Convert to Path objects for easier handling + source_path = Path(source_dir).resolve() + target_path = Path(target_dir).resolve() + + # Create target directory if it doesn't exist + target_path.mkdir(parents=True, exist_ok=True) + + # Counter for statistics + copied_files = 0 + + # Walk through the source directory + for root, _, files in os.walk(source_path): + # Filter for .md and .canvas files + target_files = [f for f in files if f.endswith(('.canvas'))] + + for target_file in target_files: + # Get the full source path + source_file = Path(root) / target_file + + # Calculate relative path from source_dir + rel_path = source_file.relative_to(source_path) + + # Create target file path + target_file = target_path / rel_path + + # Create target directory if it doesn't exist + target_file.parent.mkdir(parents=True, exist_ok=True) + + # Copy the file + shutil.copy2(source_file, target_file) + copied_files += 1 + print(f"Copied: {rel_path}") + + print(f"\nSync complete! Copied {copied_files} Markdown and Canvas files.") if __name__ == "__main__": parser = argparse.ArgumentParser(description='Sync markdown and canvas files between local and s3fs') @@ -35,9 +81,9 @@ def sync_md_files(source_dir, target_dir): source_dir = s3fs_dir target_dir = local_dir - # # Backup target directory before sync - # print(f"Creating backup of target directory: {target_dir}") - # backup_path = backup_files(target_dir) + # Backup target directory before sync + print(f"Creating backup of target directory: {target_dir}") + backup_path = backup_files(target_dir) print(f"Starting sync from {source_dir} to {target_dir}") sync_md_files(source_dir, target_dir) From c56a8903e6326035688f24a7e21845004f485ed2 Mon Sep 17 00:00:00 2001 From: pa <1020401660@qq.com> Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 14/26] Revert "design new WriteSplitDataTaskGroup for get_or_del_data" This reverts commit 680608107f07ac7b7db92c777586bc108819cf44. --- review.md | 92 ++----------------------------------------------------- 1 file changed, 2 insertions(+), 90 deletions(-) diff --git a/review.md b/review.md index 5a435cc..1c7cb00 100755 --- a/review.md +++ b/review.md @@ -1,5 +1,6 @@ # 项目分析与修改计划 + ### 现有 #### DataGeneral @@ -316,93 +317,4 @@ impl WriteSplitDataManager { pub fn remove_handle(&self, unique_id: &UniqueId) { self.handles.remove(unique_id); } -} - -## 修改 使用情况以适配新接口 计划 - -### 1. 修改 get_or_del_data 函数 - -```diff - pub async fn get_or_del_data(&self, GetOrDelDataArg { meta, unique_id, ty }: GetOrDelDataArg) - -> WSResult<(DataSetMetaV2, HashMap)> - { - let want_idxs: Vec = WantIdxIter::new(&ty, meta.data_item_cnt() as DataItemIdx).collect(); - - let mut groups = Vec::new(); - let mut idxs = Vec::new(); - let p2p = self.view.p2p(); - let mut ret = HashMap::new(); - - for idx in want_idxs { - // 为每个数据项创建独立的任务组 - let (tx, rx) = tokio::sync::mpsc::channel(1); - let splits = vec![0..1]; - let splits = vec![0..1]; - let (mut group, handle) = WriteSplitDataTaskGroup::new( - unique_id.clone(), - splits, - match ty { - GetOrDelDataArgType::Delete => proto::BatchDataBlockType::Delete, - _ => proto::BatchDataBlockType::Memory, - }, - Arc::clone(&self.manager), - ).await; - - let p2p = p2p.clone(); - let unique_id = unique_id.clone(); - let data_node = meta.get_data_node(idx); - let delete = matches!(ty, GetOrDelDataArgType::Delete); - let rpc_call = self.rpc_call_get_data.clone(); - - let handle_clone = handle.clone(); - let handle = tokio::spawn(async move { - let resp = rpc_call.call( - p2p, - data_node, - proto::GetOneDataRequest { - unique_id: unique_id.to_vec(), - idxs: vec![idx as u32], - delete, - return_data: true, - }, - Some(Duration::from_secs(60)), - ).await?; - - if !resp.success { - tracing::error!("Failed to get data for idx {}: {}", idx, resp.message); - return Err(WsDataError::GetDataFailed { - unique_id: unique_id.to_vec(), - msg: resp.message, - }.into()); - } - - handle_clone.submit_split(0, resp.data[0].clone()).await; - Ok::<_, WSError>(()) - }); - - groups.push(group); - idxs.push((idx, handle)); - } - - // 等待所有RPC任务完成 - for (group, (idx, handle)) in groups.into_iter().zip(idxs.into_iter()) { - if let Err(e) = handle.await.map_err(|e| WSError::from(e))?.map_err(|e| e) { - tracing::error!("RPC task failed for idx {}: {}", idx, e); - continue; - } - - match group.join().await { - Ok(data_item) => { - ret.insert(idx, data_item); - } - Err(e) => { - tracing::error!("Task group join failed for idx {}: {}", idx, e); - } - } - } - - Ok(ret) -} -``` - -### 2. BatchTransfer 的 new 方法 +} \ No newline at end of file From 1697889adaaa4cb5fe1e0138c6b51e78921cf959 Mon Sep 17 00:00:00 2001 From: pa <1020401660@qq.com> Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 15/26] Revert "design of WriteSplitDataTaskGroup" This reverts commit 332cc04dd7114fef3c36fa2cb5139ee5de7e27e1. --- .cursorrules | 31 +- design.canvas | 40 +- review.md | 1884 +++++++++++++++++++++++++++++++------- scripts/sync_md_files.py | 4 +- 4 files changed, 1589 insertions(+), 370 deletions(-) diff --git a/.cursorrules b/.cursorrules index adfa3b7..8a8d2ea 100755 --- a/.cursorrules +++ b/.cursorrules @@ -1,35 +1,8 @@ # Waverless 项目规则列表 -- 关键概念 - - 规则 - 即当前文件,需要和记忆保持同步 - - review - 项目根目录下的 review.md, 用于描述任务(问题)以及记录设计方案和执行记录 - - design.canvas - 提到canvas就是指他,因为目前没有别的canvas - 项目整体设计图,描述执行流程(数据传递、并行结构),数据结构关系 - - 流程图 | 流程结构 - 使用细致的图表达并行或顺序结构,条件结构;以及数据流转 - 一个阻塞执行的角色应该强化在块里,如子并行task,rpc caller,rpc handler,任务池 - -- 更新canvas流程 - 将 /mnt/s3fs/waverless/design.canvas 拷贝成待时间戳的tmp和tmp.bak - 如 {项目根路径}/design.canvas.1703171246.tmp - 和 {项目根路径}/design.canvas.1703171246.tmp.bak - 然后在 {项目根路径}/design.canvas.1703171246.tmp 中进行修改 - 然后覆盖原来 /mnt/s3fs/waverless/design.canvas 以及{项目根路径}/design.canvas - -- 提到“我更新了canvas”的情况,执行下python3 scripts/sync_md_files.py from_s3fs - 这样项目下的 {项目根路径}/design.canvas 才是最新的 - 然后在理解分析新的设计 - -- 函数返回 result的情况,如果不想处理,只要要log error - -- log使用tracing库 - -- error的结构是一个 WSError,包含子error结构形如 WsXXXErr,父结构实现Error derive,子结构只需要实现debug - 子结构尽量实现现有分类 +阅读一下review里的字符画设计图,细化/mnt/s3fs/waverless/design,主要是流程以及并行结构,数据流向 还有 数据关系 +细化的过程使用 ## 1. 任务执行强制等待规则 - 制定计划后必须等待用户确认: diff --git a/design.canvas b/design.canvas index 47e8de4..e56cc10 100755 --- a/design.canvas +++ b/design.canvas @@ -1,8 +1,9 @@ { "nodes":[ - {"id":"cb82b904dab26671","type":"group","x":-3400,"y":-960,"width":4560,"height":3500,"label":"data"}, + {"id":"cb82b904dab26671","type":"group","x":-3400,"y":-960,"width":4560,"height":3280,"label":"data"}, + {"id":"core_module_group","type":"group","x":-3160,"y":-840,"width":1460,"height":3120,"label":"数据管理核心模块"}, {"id":"batch_transfer_group","type":"group","x":-1560,"y":120,"width":2300,"height":1600,"label":"Batch数据传输实现"}, - {"id":"write_split_group","type":"group","x":-3260,"y":120,"width":1470,"height":2360,"label":"WriteSplitDataTaskGroup 写入流程"}, + {"id":"0453b4726b40c9eb","type":"group","x":-3080,"y":176,"width":1280,"height":2064,"label":"WriteSplitDataTaskGroup"}, {"id":"data_write_flow","type":"group","x":-1600,"y":-600,"width":2680,"height":520,"label":"数据写入流程"}, {"id":"2e84a4ef9e137fb7","type":"group","x":-1000,"y":800,"width":1495,"height":820,"label":"batch handler 流程"}, {"id":"storage_write_flow","type":"group","x":0,"y":-540,"width":1020,"height":400,"label":"存储节点写入流程"}, @@ -12,6 +13,8 @@ {"id":"b31695207931d96e","type":"text","text":"## fn get_or_del_data\n- 数据检索和删除\n- 资源清理\n- 缓存一致性\n- 并发访问控制","x":-2290,"y":-622,"width":330,"height":156,"color":"4"}, {"id":"133214da264cfe72","type":"text","text":"## struct DataGeneral\n- 提供数据读写接口\n- 管理元数据\n- 协调各子模块功能\n- 错误处理和恢复\n- 资源生命周期","x":-2760,"y":-680,"width":340,"height":214,"color":"4"}, {"id":"core_functions","type":"text","text":"## fn write_data\n- 同步/异步写入\n- 数据完整性保证\n- 分片并行写入\n- 缓存节点同步\n- 错误重试机制","x":-2405,"y":-427,"width":280,"height":275,"color":"4"}, + {"id":"1ec171d545e8995d","type":"text","text":"","x":-2686,"y":460,"width":250,"height":60}, + {"id":"write_task_mem","type":"text","text":"# 内存写入流程\n\n## 接口\n- write_mem_data()\n * 使用SharedMemHolder\n * 支持偏移和写入\n\n## 数据结构\n- MemDataWriter\n * holder: SharedMemHolder\n * offset: usize\n * len: usize\n\n## 操作流程\n1. 获取内存区域\n2. 计算偏移地址\n3. 写入数据\n4. 更新元数据","x":-3000,"y":860,"width":400,"height":400,"color":"2"}, {"id":"general_phase2","type":"text","text":"General阶段2:调度\n- 生成unique_id\n- 发送调度请求\n- 等待决策返回","x":-1560,"y":-380,"width":200,"height":100,"color":"1"}, {"id":"cache_group","type":"text","text":"缓存节点组","x":-620,"y":-310,"width":150,"height":60,"color":"5"}, {"id":"general_phase3","type":"text","text":"General阶段3:分发\n- 解析调度决策\n- 创建写入任务组\n- 初始化并发控制","x":-1560,"y":-210,"width":200,"height":100,"color":"1"}, @@ -40,18 +43,10 @@ {"id":"batch_handler_1","type":"text","text":"# 1. 获取元信息\n\n## get_metadata()\n- 获取元数据\n * unique_id\n * version\n- 错误处理\n * 记录警告\n * 返回失败响应","x":-945,"y":860,"width":300,"height":300,"color":"1"}, {"id":"batch_initiator","type":"text","text":"# 发起节点 [DataGeneral]\n\n## call_batch_data()\n- 分割数据块(1MB)\n- 创建有界任务池\n- 建议并发数=3\n- 任务队列控制","x":-1100,"y":190,"width":300,"height":300,"color":"1"}, {"id":"batch_manager","type":"text","text":"# BatchManager\n\n## 管理功能\n- 创建传输任务\n- 分配请求ID\n- 跟踪传输状态\n- 错误恢复\n\n## 数据处理\n- 分块管理\n- 数据校验\n- 内存复用\n- 并发控制","x":-1460,"y":420,"width":300,"height":300,"color":"1"}, - {"id":"write_task_file","type":"text","text":"# ToFile 写入流程 [阻塞执行]\n\n## WriteSplitDataTaskGroup::ToFile\n- file_path: PathBuf\n- tasks: Vec>\n- rx: mpsc::Receiver>\n- expected_size: usize\n- current_size: usize\n\n## 操作流程 [文件IO阻塞]\n1. OpenOptions::new()\n .create(true)\n .write(true)\n2. seek(offset)\n3. write_all(data)\n4. 错误记录:\n tracing::error!(\"Failed to write file data at offset {}\")\n","x":-2216,"y":544,"width":400,"height":400,"color":"1"}, - {"id":"write_task_mem","type":"text","text":"# ToMem 写入流程 [阻塞执行]\n\n## WriteSplitDataTaskGroup::ToMem\n- shared_mem: SharedMemHolder\n- tasks: Vec>\n- rx: mpsc::Receiver>\n- expected_size: usize\n- current_size: usize\n\n## 操作流程 [内存写入阻塞]\n1. shared_mem.write(offset, data)\n2. 错误记录:\n tracing::error!(\"Failed to write memory data at offset {}\")\n","x":-2650,"y":526,"width":400,"height":436,"color":"2"}, - {"id":"data_item","type":"text","text":"# 数据项处理\n\n## enum WriteSplitDataTaskGroup\n- 管理数据分片写入任务组\n- 分片合并优化\n- 状态同步\n- 并行控制\n","x":-2990,"y":180,"width":450,"height":280,"color":"3"}, - {"id":"b0205b4457afeb2b","type":"text","text":"## SharedMemOwnedAccess\n- 共享内存所有权控制\n- 访问安全保证\n- 生命周期管理","x":-2330,"y":242,"width":364,"height":178}, - {"id":"e2576a54f3f852b3","type":"text","text":"# process_tasks() 实现 [阻塞循环]\n\n## 循环处理 [select阻塞]\n1. try_complete() 检查完成状态\n2. tokio::select! {\n - rx.recv() => 接收新任务\n - futures::future::select_all(tasks) => 等待任务完成\n}\n\n## 完成条件\n- current_size >= expected_size\n- 返回 proto::DataItem","x":-3035,"y":1820,"width":377,"height":420}, - {"id":"97d3d9fd7432a861","type":"text","text":"# submit_split() 实现 [异步发送]\n\n## match write_type {\n- WriteSplitDataType::File => 文件写入任务\n- WriteSplitDataType::Mem => 内存写入任务\n}\n\n## 发送任务 [channel阻塞]\ntx.send(task).await","x":-2227,"y":1175,"width":355,"height":420}, - {"id":"4dbe01dc59cea4c2","type":"text","text":"# 任务状态 [状态追踪]\n\n## 状态管理\n- 任务状态记录\n- 写入进度更新\n- 完成状态检查","x":-2660,"y":1432,"width":250,"height":200}, - {"id":"20145fd68e8aaa75","type":"text","text":"# 构造 [同步初始化]\n\n## 任务组初始化\nfn new_task_group(type_: WriteSplitDataType) -> Self {\n let (tx, rx) = mpsc::channel(32);\n Self {\n type_,\n tasks: Vec::new(),\n rx,\n expected_size: 0,\n current_size: 0,\n }\n}\n\n## 参数验证\n- 检查写入类型\n- 验证初始参数","x":-3085,"y":1585,"width":455,"height":200}, - {"id":"1ec171d545e8995d","type":"text","text":"## SharedMemHolder\n- 共享内存数据访问\n- 资源自动管理","x":-2880,"y":1025,"width":300,"height":150}, - {"id":"f515ecb9aee18fc7","type":"text","text":"# 后续写入 [异步执行]\n\n## 状态管理\n- 写入任务追踪\n- 并发控制\n- 写入顺序保证","x":-2685,"y":1315,"width":250,"height":200}, - {"id":"06d4a92778dd83c8","type":"text","text":"# 第一个分片开始写入 [阻塞执行]\n\n## 初始化写入\nfn start_first_split(data: Vec) -> Result<(), WSError> {\n let task = self.build_task(data, 0);\n self.tasks.push(task);\n self.current_size += data.len();\n Ok(())\n}\n\n## 错误处理\n- 写入失败记录日志\n- 返回具体错误类型","x":-3185,"y":1200,"width":455,"height":310}, - {"id":"155106edf5eb3cd7","type":"text","text":"# try_complete() 实现 [同步检查]\n\n## 返回 Option\n- ToFile => proto::DataItem::new_file_data()\n- ToMem => proto::DataItem::new_mem_data()","x":-3074,"y":2300,"width":455,"height":180} + {"id":"write_task_verify","type":"text","text":"# 验证与状态\n\n## 状态记录\n- TaskStatus\n * pending\n * writing\n * completed\n * failed\n\n## 验证检查\n1. 分片范围\n * offset合法性\n * 数据长度\n2. 写入结果\n * 成功/失败\n * 错误信息\n3. 完整性\n * 所有分片\n * 数据一致性","x":-2320,"y":1673,"width":400,"height":400,"color":"4"}, + {"id":"write_task_file","type":"text","text":"# 文件写入流程\n\n## 接口\n- write_file_data()\n * 使用std::fs::File\n * 支持seek和write\n\n## 数据结构\n- FileDataWriter\n * file: File\n * path: PathBuf\n * offset: u64\n\n## 操作流程\n1. 打开文件\n2. seek到offset\n3. 写入数据\n4. flush到磁盘","x":-2320,"y":860,"width":400,"height":400,"color":"1"}, + {"id":"write_task_control","type":"text","text":"# 任务控制流程\n\n## 数据结构\n- WriteSplitDataTaskGroup\n * tasks: Vec\n * rx: mpsc::Receiver\n * unique_id: String\n\n## 控制流程\n1. 创建任务\n * 根据type选择writer\n * 初始化状态记录\n2. 并发处理\n * 启动写入线程\n * 监听通道\n3. 等待完成\n * join所有任务\n * 汇总错误","x":-3000,"y":1420,"width":480,"height":653,"color":"3"}, + {"id":"data_item","type":"text","text":"# 数据项处理\n\n## WriteSplitDataTaskGroup\n- 管理数据分片写入任务组\n- 分片合并优化\n- 状态同步\n- 并行控制\n## SharedMemHolder\n- 共享内存数据访问\n- 资源自动管理\n## SharedMemOwnedAccess\n- 共享内存所有权控制\n- 访问安全保证\n- 生命周期管理","x":-2686,"y":260,"width":460,"height":520,"color":"3"} ], "edges":[ {"id":"verify_flow_1","fromNode":"batch_handler_4","fromSide":"right","toNode":"batch_handler_5","toSide":"left","label":"块状态更新"}, @@ -83,19 +78,8 @@ {"id":"handler_1_to_2","fromNode":"batch_handler_1","fromSide":"right","toNode":"batch_handler_2","toSide":"left","label":"元数据信息"}, {"id":"handler_2_to_3","fromNode":"batch_handler_2","fromSide":"right","toNode":"batch_handler_3","toSide":"left","label":"数据内容"}, {"id":"handler_3_to_4","fromNode":"batch_handler_3","fromSide":"right","toNode":"batch_handler_4","toSide":"left","label":"分片列表"}, - {"id":"9094221953b6c685","fromNode":"write_task_mem","fromSide":"top","toNode":"b0205b4457afeb2b","toSide":"bottom"}, - {"id":"77ec04f5deef7cee","fromNode":"write_task_mem","fromSide":"bottom","toNode":"1ec171d545e8995d","toSide":"top"}, - {"id":"7b99fb72410f07d9","fromNode":"06d4a92778dd83c8","fromSide":"bottom","toNode":"20145fd68e8aaa75","toSide":"top"}, - {"id":"df9b4bc9170fdec1","fromNode":"20145fd68e8aaa75","fromSide":"right","toNode":"4dbe01dc59cea4c2","toSide":"left"}, - {"id":"61e0637af4beba94","fromNode":"f515ecb9aee18fc7","fromSide":"bottom","toNode":"4dbe01dc59cea4c2","toSide":"left"}, - {"id":"f7105db89ffabd1e","fromNode":"20145fd68e8aaa75","fromSide":"bottom","toNode":"e2576a54f3f852b3","toSide":"top"}, - {"id":"7504b1b3a99e992c","fromNode":"4dbe01dc59cea4c2","fromSide":"right","toNode":"97d3d9fd7432a861","toSide":"bottom","label":"获取到handle"}, - {"id":"a993a3f4d7b2211d","fromNode":"97d3d9fd7432a861","fromSide":"left","toNode":"e2576a54f3f852b3","toSide":"right"}, - {"id":"a996588f6c59c88f","fromNode":"e2576a54f3f852b3","fromSide":"bottom","toNode":"155106edf5eb3cd7","toSide":"top"}, - {"id":"a42104592fedd4c7","fromNode":"97d3d9fd7432a861","fromSide":"right","toNode":"write_task_mem","toSide":"bottom"}, - {"id":"c45aaa564ae87a7c","fromNode":"97d3d9fd7432a861","fromSide":"right","toNode":"write_task_file","toSide":"bottom"}, - {"id":"write_flow_1","fromNode":"20145fd68e8aaa75","fromSide":"top","toNode":"06d4a92778dd83c8","toSide":"bottom","label":"初始化完成"}, - {"id":"write_flow_2","fromNode":"06d4a92778dd83c8","fromSide":"right","toNode":"f515ecb9aee18fc7","toSide":"left","label":"首个分片写入完成"}, - {"id":"write_flow_5","fromNode":"e2576a54f3f852b3","fromSide":"left","toNode":"155106edf5eb3cd7","toSide":"left","label":"检查完成状态"} + {"id":"write_task_file_to_control","fromNode":"write_task_file","fromSide":"bottom","toNode":"write_task_control","toSide":"top","label":"文件写入任务"}, + {"id":"write_task_mem_to_control","fromNode":"write_task_mem","fromSide":"bottom","toNode":"write_task_control","toSide":"top","label":"内存写入任务"}, + {"id":"write_task_control_to_verify","fromNode":"write_task_control","fromSide":"right","toNode":"write_task_verify","toSide":"left","label":"状态更新"} ] } \ No newline at end of file diff --git a/review.md b/review.md index 1c7cb00..286bb17 100755 --- a/review.md +++ b/review.md @@ -1,320 +1,1582 @@ -# 项目分析与修改计划 - - -### 现有 - -#### DataGeneral -- 功能:数据管理核心模块 -- 职责: - 1. 提供数据读写接口 - 2. 管理元数据 - 3. 协调各子模块功能 - 4. 错误处理和恢复 - 5. 资源生命周期管理 - -#### DataSplit -- 功能:数据分片管理 -- 核心组件: - 1. EachNodeSplit:单节点分片信息 - ```protobuf - message EachNodeSplit { - uint32 node_id = 1; - uint32 data_offset = 2; - uint32 data_size = 3; - } - ``` - 2. DataSplit:分片集合 - ```protobuf - message DataSplit { - repeated EachNodeSplit splits = 1; - } - ``` - -#### BatchTransfer -- 功能:管理单个批量传输的状态 -- 核心字段: - ```rust - struct BatchTransfer { - unique_id: Vec, - version: u64, - block_type: BatchDataBlockType, - total_blocks: u32, - received_blocks: DashMap>, - tx: Option>> - } - ``` -- 主要方法: - 1. `new()`: 创建新的传输任务 - 2. `add_block()`: 添加数据块 - 3. `complete()`: 完成传输处理 - 4. `calculate_splits()`: 计算数据分片 - -#### WriteSplitDataTaskGroup -- 功能:管理数据分片写入任务组 -- 实现类型: - 1. ToFile:文件写入任务组 - - 文件路径管理 - - 文件操作错误处理 - - 磁盘同步策略 - 2. ToMem:内存写入任务组 - - SharedMemHolder管理 - - 内存访问安全 - - 资源自动回收 - - -### 变更 - -#### 核心接口定义 -```rust - - -#### WriteSplitDataTaskGroup 核心实现 -```rust -// 写入任务相关错误 -#[derive(Debug)] -pub enum WsDataErr { - WriteDataFailed { - unique_id: Vec, - }, - SplitTaskFailed { - idx: DataSplitIdx, - }, -} - -// 写入任务句柄,用于提交新的分片任务 -pub struct WriteSplitDataTaskHandle { - tx: mpsc::Sender>, - write_type: WriteSplitDataType, -} - -// 写入类型 -enum WriteSplitDataType { - File { - path: PathBuf, - }, - Mem { - shared_mem: SharedMemHolder, - }, -} - -impl WriteSplitDataTaskHandle { - // 提交新的分片任务 - pub async fn submit_split(&self, idx: DataSplitIdx, data: proto::DataItem) { - let task = match &self.write_type { - WriteSplitDataType::File { path } => { - let path = path.clone(); - let offset = idx.offset; - let data = data.as_bytes().to_vec(); - tokio::spawn(async move { - if let Err(e) = tokio::fs::OpenOptions::new() - .create(true) - .write(true) - .open(&path) - .await - .and_then(|mut file| async move { - file.seek(SeekFrom::Start(offset)).await?; - file.write_all(&data).await - }) - .await - { - tracing::error!("Failed to write file data at offset {}: {}", offset, e); - } - }) - } - WriteSplitDataType::Mem { shared_mem } => { - let mem = shared_mem.clone(); - let offset = idx.offset as usize; - let data = data.as_bytes().to_vec(); - tokio::spawn(async move { - if let Err(e) = mem.write(offset, &data).await { - tracing::error!("Failed to write memory data at offset {}: {}", offset, e); - } - }) - } - }; - - if let Err(e) = self.tx.send(task).await { - tracing::error!("Failed to submit task: channel closed, idx: {:?}", idx); +(顺序:新的在前面;先解决就的未完成的;完成的有标注;问题可能存在子问题) + +- 任务: error: method `call_batch_data` is never used + error: method `handle_block` is never used + error: method `add_block` is never used + error: fields `version`, `block_type`, and `total_blocks` are never read + --> src/main/src/general/data/m_data_general/batch.rs:52:9 + 分析一下,这些应该是batch的核心逻辑,为什么没有用起来?把batch组件间调用关系以及各自作用都分析到当前文档里 + - 修改计划: + - 修改目的: + - 分析 batch 组件中未使用的方法和字段的原因 + - 理清 batch 组件的调用关系和各部分作用 + - 确定是否需要保留这些未使用的代码 + - 预期效果: + - 完整理解 batch 组件的设计和实现 + - 明确各个组件的作用和调用关系 + - 为后续可能的代码优化提供依据 + - 可能的风险: + - 如果这些未使用的代码确实有其设计意图,贸然删除可能影响未来功能 + - 可能存在隐藏的依赖关系未被发现 + - 具体步骤: + 1. 分析 batch.rs 中的代码结构和设计文档 + 2. 检查相关调用代码 + 3. 确定各组件作用和关系 + 4. 记录分析结果 + - 修改过程: + - 代码分析: + 1. 组件结构分析: + - BatchTransfer:管理单个批量传输的状态 + - 主要字段: + - unique_id:传输标识 + - version、block_type、total_blocks:传输元数据(未使用) + - data_sender:数据传输 channel + - write_task:写入任务 + - tx:完成通知 channel + - BatchManager:管理所有进行中的批量传输 + - 提供 create_transfer 和 handle_block 等接口 + - DataGeneral:实际的数据传输实现 + - rpc_call_batch_data:RPC 调用接口 + - write_data_batch:实际使用的批量写入方法 + + 2. 调用关系分析: + - write_data_batch 是实际使用的入口 + - 通过 BatchManager.create_transfer 创建传输任务 + - 使用 rpc_call_batch_data 发送数据 + - 但 handle_block 和 add_block 确实未被使用 + + 3. 未使用代码分析: + - version、block_type、total_blocks:这些字段虽然在 BatchTransfer 中定义,但实际操作都在 DataGeneral 中完成 + - handle_block 和 add_block:这些方法可能是为了支持更细粒度的批量传输控制,但目前的实现采用了更简单的方式 + + * 数据写入流程 +``` + +------------------------+ + | 发起节点 | + | [DataGeneral] | + | - write_data() | + | 1. 准备DataItems | + | 2. 计算每个DataItem大小| + +------------------------+ + | + | DataVersionScheduleRequest + | - unique_id: 数据标识 + | - version: 版本号 + | - context: 调度上下文 + ↓ + +------------------------+ + | Master节点 | + | [DataMaster] | + | - schedule_data() | + | 1. 生成DataSetMeta | + | 2. 创建DataSplits | + | 3. 分配存储节点 | + +------------------------+ + | + | DataVersionScheduleResponse + | - version: 版本号 + | - split: 数据分片信息 + ↓ + +------------------------+ + | 发起节点 | + | [DataGeneral] | + | - flush_the_data() | + | (并发处理每个DataItem) | + +------------------------+ + | + +--------------------+--------------------+ + | | + ↓ ↓ + +-----------------------+ +-----------------------+ + | 主存储节点写入 | | 缓存节点写入 | + | [DataGeneral] | | [DataGeneral] | + | WriteOneDataRequest: | | BatchDataRequest: | + | - unique_id | | - request_id | + | - version | | - block_type | + | - data (DataItems) | | - block_index | + | - rpc_handle_write_one_data() | | - data | + | 并发处理每个Split | | - version | + | | | - write_data_batch() | + +-----------------------+ +-----------------------+ + / | \ / | \ + / | \ / | \ + Node1 Node2 NodeN Node1 Node2 NodeN + (SplitA)(SplitB)(SplitX) (DataItem)(DataItem)(DataItem) + \ | / \ | / + \ | / \ | / + \ | / \ | / + \|/ \|/ + | | + | 并行写入完成 | + +------------------+-------------------+ + | + ↓ + +------------------------+ + | 发起节点 | + | 1. 等待所有并行完成 | + | 2. 检查所有结果 | + | 3. 返回最终状态 | + +------------------------+ +``` + + * Batch 数据传输实现 (待优化版本) +``` + +------------------------+ + | 发起节点 | + | [DataGeneral] | + | - call_batch_data() | + | 1. 分割数据块(1MB) | + | 2. 创建有界任务池 | + | (建议并发数=3) | + +------------------------+ + | + | 并发发送数据块 + | (有界队列控制) + ↓ + +--------------------+--------------------+ + | | + ↓ ↓ + +-----------------------+ +-----------------------+ + | BatchDataRequest(1) | | BatchDataRequest(N) | + | - request_id | | - request_id | + | - block_type | | - block_type | + | - block_index: 0 | | - block_index: N | + | - data | | - data | + +-----------------------+ +-----------------------+ + | + | RPC 请求 + ↓ + +------------------------+ + | 目标节点 | + | [DataGeneral] | + | - rpc_handle_batch_data()| + | 1. 获取元信息 | + | 2. 创建WriteTaskGroup | + +------------------------+ + | + | 创建两个 channel + ↓ + +------------------------------------------------+ + | 接收方任务管理 | + | [BatchTransfer] | + | | + | (data_sender, data_receiver) ←→ 数据块传输 | + | (tx, rx) ←→ 完成通知 | + | | + | write_task → 异步写入任务 | + +------------------------------------------------+ + | + | 创建任务组 + ↓ + +------------------------------------------------+ + | 并发写入控制 | + | [WriteSplitDataTaskGroup] | + | | + | data_receiver ←←← 接收数据块 | + | ↓ | + | 并发任务池 | + | ↓ | + | 完成通知 →→→ tx | + +------------------------------------------------+ + | + | 完成回调 + ↓ + +------------------------+ + | 传输完成 | + | BatchDataResponse | + | - success: true | + | - version | + +------------------------+ +``` + +* 核心数据结构: + * DataItem: 单个数据项,可能被分片 + * DataSplit: 数据分片信息,包含偏移量和大小 + * DataSetMeta: 数据集元信息,包含版本号、分片信息和缓存模式 + + +- (done) 任务:将项目 main 中的 md 文档总结为 Obsidian Canvas + - 修改计划: + - 修改目的: + - 将分散在 main 目录中的 md 文档内容整理成可视化的知识图谱 + - 提高文档的可读性和关联性 + - 便于团队理解项目结构和设计思路 + - 预期效果: + - 生成一个清晰的项目知识图谱 + - 展示各个模块之间的关系 + - 突出重要的设计决策和实现细节 + - 可能的风险: + - 文档内容可能有遗漏 + - Canvas 布局可能不够直观 + - 具体步骤: + 1. 收集并阅读 main 目录下所有的 md 文档 + 2. 分析文档内容,提取关键信息 + 3. 设计 Canvas 布局结构 + 4. 创建 Canvas 文件并实现布局 + 5. 添加节点之间的关联关系 + 6. 检查和优化最终效果 + + +- (done) 任务:总结当前git未提交的变更 + - 分析: + - 主要变更文件: + 1. src/main/src/general/data/m_data_general/mod.rs + 2. src/main/src/result.rs + 3. .cursorrules + 4. wiki.md + + - 核心变更内容: + 1. 数据结构优化: + - 移除了未使用的 batch_transfers 字段 + - 保留并标记了 next_batch_id 方法为 #[allow(dead_code)] + - 添加了新的错误类型 WriteDataFailed + + 2. 批量写入逻辑优化: + - 简化了 write_data_batch 实现,移除了复杂的批处理逻辑 + - 使用现有的 call_batch_data 函数替代自定义实现 + - 改进了错误处理和日志记录 + + 3. 并行写入改进: + - 使用 WantIdxIter 优化迭代逻辑 + - 分离主节点和缓存节点的任务处理 + - 增强了错误处理机制 + + 4. 文档更新: + - 更新了 wiki.md 中的模块说明 + - 精简了 .cursorrules 文件内容 + + +- (done) 任务:完善 write_data 数据分片同时对接缓存节点的并行写入设计 + - 分析:当前需要在数据分片过程中,同时将数据通过两个不同的 RPC 调用分别发送到主存储节点和缓存节点。由于调用的 RPC 不同,需要在同一个数据块处理逻辑中并行启动两个任务,一个调用 rpc_call_batch_data,另一个调用缓存节点的 RPC(例如 rpc_call_cache_data)。两任务并行执行,最终收集各自结果,并综合判断整体成功情况。错误处理部分简化:记录错误日志,失败时返回提示信息,不做过细重试处理。 + - 修改计划: + 1. 在 call_batch_data(或相应写入数据逻辑)中,对每个数据块的处理循环增加两路并行任务: + - primary_task:调用现有的 rpc_call_batch_data 发送该块数据; + - cache_task:启动一个新的异步任务,调用缓存节点的 RPC 发送数据; + * 注意:cache_task 不应该只传输单个分片,而是负责传输整个 batch 数据。经过对 BatchManager 的分析,发现 BatchManager 可能自动并行内部任务,因此在外部调用时,对每个缓存节点只启动一个 task 来处理整个 batch 写入。 + 2. 使用 tokio::spawn 或 join_all 同时启动这两个任务,并等待它们完成。 + 3. 整合两个任务的返回结果。若任一任务返回失败,则记录错误日志并提示失败;否则认为整体写入成功。 + 4. 最终,整个写入流程将在原有数据分片基础上,增加了并行的缓存节点数据写入逻辑,保证数据在两边同时写入: + - 对于主数据分片写入任务:保持原有策略,每个分片分别创建一个独立的并行任务; + - 对于缓存节点写入任务:采用 batch 接口传输整块数据,每个缓存节点只启动一个 task 来处理整个 batch 数据。 + - 伪代码: + ```rust + // 主数据分片写入任务:每个分片启动一个独立的任务 + let mut primary_tasks = Vec::new(); + for (i, chunk) in data_bytes.chunks(block_size).enumerate() { + // 构造当前分片请求,保持现有逻辑不变 + let req = build_primary_request(chunk, i); + let primary_task = tokio::spawn(async move { + // 调用 rpc_call_batch_data 发送当前分片数据 + rpc_call_batch_data.call(..., req, ...).await + }); + primary_tasks.push(primary_task); } - } -} - -// 写入任务组 -enum WriteSplitDataTaskGroup { - // 文件写入模式 - ToFile { - unique_id: UniqueId, // 任务唯一标识 - file_path: PathBuf, // 文件路径 - tasks: Vec>, // 写入任务列表 - rx: mpsc::Receiver>, // 任务接收通道 - expected_size: usize, // 预期总大小 - current_size: usize, // 当前写入大小 - manager: Arc, // 管理器引用 - }, - // 内存写入模式 - ToMem { - unique_id: UniqueId, // 任务唯一标识 - shared_mem: SharedMemHolder, // 共享内存 - tasks: Vec>, // 写入任务列表 - rx: mpsc::Receiver>, // 任务接收通道 - expected_size: usize, // 预期总大小 - current_size: usize, // 当前写入大小 - manager: Arc, // 管理器引用 - } -} - -impl WriteSplitDataTaskGroup { - // 创建新任务组 - async fn new( - unique_id: UniqueId, - splits: Vec>, - block_type: proto::BatchDataBlockType, - manager: Arc, - ) -> (Self, WriteSplitDataTaskHandle) { - // 计算预期总大小 - let expected_size = splits.iter().map(|range| range.len()).sum(); - // 创建通道 - let (tx, rx) = mpsc::channel(32); + // 缓存节点写入任务:每个缓存节点只启动一次任务,传输整个 batch 数据 + let mut cache_tasks = Vec::new(); + for cache_node in cache_nodes { + let cache_task = tokio::spawn(async move { + // 调用 rpc_call_cache_data 发送整个 batch 数据给该缓存节点 + rpc_call_cache_data.call(..., full_data, cache_node, ...).await + }); + cache_tasks.push(cache_task); + } - match block_type { - proto::BatchDataBlockType::File => { - let file_path = PathBuf::from(format!("{}.data", - base64::engine::general_purpose::STANDARD.encode(&unique_id))); - - let handle = WriteSplitDataTaskHandle { - tx, - write_type: WriteSplitDataType::File { - path: file_path.clone(), - }, - }; - - let group = Self::ToFile { - unique_id, - file_path, - tasks: Vec::new(), - rx, - expected_size, - current_size: 0, - manager: manager.clone(), - }; - - (group, handle) - } - _ => { - let shared_mem = new_shared_mem(&splits).unwrap_or_default(); + // 等待所有任务完成 + let primary_results = futures::future::join_all(primary_tasks).await; + let cache_results = futures::future::join_all(cache_tasks).await; + + // 整合结果:如果任一 primary 或 cache 任务失败,则记录错误并返回整体失败;否则返回成功 + if primary_results.iter().any(|res| res.is_err()) || cache_results.iter().any(|res| res.is_err()) { + tracing::error!("数据写入失败"); + return Err(String::from("整体写入失败").into()); + } + ``` + 5. 新问题: + - 任务:field `batch_manager` is never read + error: method `next_batch_id` is never used + function `flush_the_data` is never used + enum `WantIdxIter` is never used + 这几个内容都应该和write data强相关,为什么都没有用到了 + - 分析: + - 父问题相关性: + 1. 父问题:完善 write_data 数据分片同时对接缓存节点的并行写入设计 + 2. 相关性:直接关系到数据写入的实现机制和优化 + - 问题分类:代码清理和优化问题 + - 问题原因: + 1. batch_manager 字段: + - 虽然在 call_batch_data 函数中使用,但 call_batch_data 本身在新的并行写入设计中未被调用 + - write_data 函数中对缓存节点的写入直接使用 write_data_batch,跳过了 batch_manager + - 这表明 batch_manager 和相关的批处理机制在新设计中被替代 + - review: 应该使用batch manager,其实现了流式加载内存或文件分片,避免一次性读出全部 + 2. next_batch_id 方法: + - 原本用于生成批处理 ID + - 在新的设计中,批处理 ID 生成逻辑已移至 write_data 函数内部 + - 使用 version_schedule_resp 中的 version 作为版本控制 + - review: next_batch_id 这个应该是 batch_manager 自己用的,需要保留;batch功能并不完全和write_data耦合 + 3. flush_the_data 函数: + - 原本用于单个数据项的写入刷新 + - 在新的并行写入设计中,使用 tokio::spawn 创建异步任务 + - 数据写入通过 primary_tasks 和 cache_tasks 两组并行任务处理 + - 使用 futures::future::join_all 等待任务完成,替代了显式的刷新操作 + - review: 这个函数确实不需要了 + 4. WantIdxIter 枚举: + - 原本用于数据索引的迭代控制 + - 在新设计中,使用 enumerate() 和 zip() 迭代处理数据项 + - 数据分片通过 split.splits.iter().enumerate() 处理 + - 缓存节点通过 cache_nodes.iter().enumerate() 处理 + - review:这个也应该加回来,用于遍历item idx + + - 计划: + 1. 改进 write_data_batch 函数: + - 修改目的: + - 使用 batch_manager 实现流式分片传输 + - 避免大文件一次性加载到内存 + - 具体改动: + 1. 移除直接的数据分片逻辑: + ```rust + // 移除这部分 + let total_size = data.data_sz_bytes(); + let total_batches = (total_size + batch_size - 1) / batch_size; + ``` + 2. 添加 batch_manager 创建传输任务: + ```rust + // 创建 channel 接收数据块 + let (tx, mut rx) = mpsc::channel(1); - let handle = WriteSplitDataTaskHandle { + // 创建传输任务 + let request_id = self.batch_manager.create_transfer( + unique_id.clone(), + version, + block_type, + data.data_sz_bytes() as u32, tx, - write_type: WriteSplitDataType::Mem { - shared_mem: shared_mem.clone(), - }, - }; + ).await?; + ``` + 3. 使用 call_batch_data 发送数据: + ```rust + // 使用现有的 call_batch_data 函数 + let response = self.call_batch_data( + node_id, + unique_id.clone(), + version, + data, + block_type, + ).await?; + ``` + + 2. 恢复 WantIdxIter 的使用: + - 修改目的: + - 使用专门的索引迭代器替代通用的 enumerate() + - 保持与数据分片的对应关系 + - 具体改动: + 1. 修改 write_data 函数中的遍历: + ```rust + // 替换这部分 + for (data_item_idx, (data_item, split)) in datas.iter().zip(splits.iter()).enumerate() - let group = Self::ToMem { - unique_id, - shared_mem, - tasks: Vec::new(), - rx, - expected_size, - current_size: 0, - manager: manager.clone(), - }; + // 改为 + let mut iter = WantIdxIter::new(datas.len()); + while let Some(data_item_idx) = iter.next() { + let data_item = &datas[data_item_idx]; + let split = &splits[data_item_idx]; + ``` + 2. 修改缓存节点处理: + ```rust + // 替换这部分 + for (cache_idx, &node_id) in cache_nodes.iter().enumerate() - (group, handle) - } - } - } - - // 处理任务完成 - async fn handle_completion(&self) { - match self { - Self::ToFile { unique_id, manager, .. } | - Self::ToMem { unique_id, manager, .. } => { - // 从管理器中移除句柄 - manager.remove_handle(unique_id); - } - } - } - - // 任务处理循环 - async fn process_tasks(&mut self) -> WSResult { - loop { - // 检查是否已完成所有写入 - if let Some(result) = self.try_complete() { - // 处理完成,清理资源 - self.handle_completion().await; - return Ok(result); - } - - // 等待新任务或已有任务完成 - tokio::select! { - Some(new_task) = match self { - Self::ToFile { rx, .. } | - Self::ToMem { rx, .. } => rx.recv() - } => { - match self { - Self::ToFile { tasks, .. } | - Self::ToMem { tasks, .. } => { - tasks.push(new_task); - } - } - } - else => { - // 通道关闭,清理资源 - self.handle_completion().await; - break; - } - } - } + // 改为 + let mut cache_iter = WantIdxIter::new(cache_nodes.len()); + while let Some(cache_idx) = cache_iter.next() { + let node_id = cache_nodes[cache_idx]; + ``` + + +- (done) 任务:处理 error[E0425]: cannot find function `log_error` in this scope + - 修改计划: + - 修改目的: + - 修复编译错误,使用正确的错误处理方式 + - 确保错误处理符合项目规范 + - 预期效果: + - 编译通过 + - 错误处理更加规范和统一 + - 可能的风险: + - 错误处理方式的改变可能影响其他依赖此处错误处理的代码 + - 错误场景分析: + - 错误发生在并行写入数据时 + - 写入目标包括主存储节点和缓存节点 + - 当任何一个节点写入失败时,需要返回整体写入失败错误 + + - 具体步骤: + 1. 分析代码中的错误处理模式 + - 检查现有的 `WSError` 和 `WsDataError` 类型定义 + - 检查现有的错误处理模式 + - 确认需要新增 `WriteDataFailed` 错误类型 + 2. 创建数据写入相关的错误类型 + - 在 `WsDataError` 枚举中添加 `WriteDataFailed` 变体 + - 变体包含字段:`unique_id: Vec` 和 `message: String` + - 确保错误类型转换正确 + 3. 将 `log_error` 替换为 `tracing::error!` + - 确保错误日志信息准确完整 + - 保留原有的中文错误提示 + 4. 修改错误返回方式 + - 使用新创建的 `WsDataError::WriteDataFailed` + - 包含数据 ID 和错误信息 + 5. 编译验证修改 + - 检查编译错误和警告 + + +- 将本地meta获取函数换一个更直观的名字 + +- (done)任务:罗列compilelog中各种未使用问题(error, import类的 warning 不看),并逐个解决 + - 分析: + 1. next_batch_id 方法未被使用,需确认是否有用途;如无用途,则删除或添加注释说明准备将来可能使用。 + 2. DataGeneral 结构体中的 batch_transfers 字段未被使用,需评估其在业务逻辑中的必要性;若无实际作用,则建议删除。 + 3. 其他未使用的变量或函数,如返回结果未使用的函数调用等,需整理 compilelog 中完整清单,并逐项检查其用途和必要性。 + - 修改计划: + 1. 针对每项未使用问题,先通过代码搜索确认其引用情况; + 2. 对于确认无用的项,直接删除;对于可能需要保留但目前未使用的项,添加 TODO 注释说明其预期用途; + 3. 修改后重新编译,确保无额外问题。 + - 执行记录: + - 开始处理未使用问题,目前处于初步整理阶段,待后续逐项跟进。 + - 下一步:检查 next_batch_id 方法引用情况;如果确认未使用,则删除该方法或添加 TODO 注释。 + - 检查结果:通过 grep 搜索,发现 next_batch_id 方法仅在其定义处出现,未被实际引用。建议删除该方法或添加 TODO 注释说明可能的预期用途。 + - 检查结果:通过 grep 搜索发现,DataGeneral 结构体中的 batch_transfers 字段仅在其定义(行 109)和初始化(行 1414)处出现,未在后续代码中被引用。建议删除该字段,或如果有保留意图则添加 TODO 注释说明预期用途。 + - 下一步:整理编译日志中其他未使用项,逐一确认其用途;对于确认无用的项,逐项删除或添加 TODO 注释。 + - 整理结果:初步整理显示,除了上述 next_batch_id 和 batch_transfers 未使用问题外,其它警告多为未使用导入或辅助函数(如 path_is_option、FnExeCtxAsync、FnExeCtxBase 等),这些均非核心逻辑,暂时忽略;后续可根据需要进一步清理。 + - 下一步:分析log中还有没有error + - 分析结果:当前 compilelog 中剩余的 error 主要包括: + - "fields `batch_manager` and `batch_transfers` are never read"。 + - "function `flush_the_data` is never used"。 + - "enum `WantIdxIter` is never used"。 + - "associated function `new` is never used"。 + - "methods `next_sequence`, `create_transfer`, and `handle_block` are never used"。 + - "method `call_batch_data` is never used"。 + - "unused result" 错误(如 Option、WriteOneDataResponse 和 unused Result)。 + - 下一步计划:逐项检查上述 error 信息,确认是否删除相应未使用的代码或补充必要的错误处理逻辑,然后重新编译验证修改是否有效。 + +- (done)任务:编译分析发现的问题 + - 修改计划: + 1. (done) 修复 get_metadata 方法缺失问题: + - 分析发现 get_metadata 和 get_data_meta 是两个不同的函数: + 1. get_data_meta 是内部函数,直接访问本地数据 + 2. get_metadata 是更高层的函数,需要包含: + - 本地数据访问(通过 get_data_meta) + - 远程数据访问(通过 RPC) + - 完整的错误处理逻辑 + - 下一步计划: + 1. 搜索并确认 get_metadata 的完整实现位置 + 2. 检查实现是否完整包含所需功能 + 3. 如果已经实现,排查编译器找不到方法的原因 + 4. 如果没有实现,则按照设计实现它 + + 2. (done)修复 unique_id 移动问题: + - 分析: + - 父问题相关性: + 1. 父问题:编译错误修复 + 2. 相关性:直接导致编译失败的问题 + 3. 必要性:必须解决以通过编译 + 4. 优先级:高,阻塞编译 + + - 当前问题: + 1. 在 batch.rs 中,unique_id 在异步任务中被移动后仍然尝试使用 + 2. 问题出现在 BatchTransfer::new 函数中 + 3. 涉及 tokio::spawn 创建的异步任务 + + - 修改计划: + 1. 在 BatchTransfer::new 中: + - 在创建异步任务前克隆 unique_id + - 使用克隆的版本传入异步任务 + - 保留原始 unique_id 用于其他用途 + + - 执行记录: + - 已完成: + - 在 BatchTransfer::new 中添加了 unique_id_for_task = unique_id.clone() + - 修改异步任务使用 unique_id_for_task 代替 unique_id.clone() + + - 下一步: + - 执行编译验证修改是否解决问题 + - 检查是否有其他相关的所有权问题 + 3. (done)任务:修复 total_size 未使用变量问题 + - 分析: + - 父问题相关性: + 1. 父问题:编译错误修复 + 2. 相关性:编译警告需要处理 + 3. 必要性:保持代码清洁,避免无用变量 + 4. 优先级:中(不影响功能,但需要处理的警告) + + - 当前问题: + 1. 在 batch.rs 中,total_size 变量被计算但未使用 + 2. 代码分析显示 offset 变量已经足够处理数据分片 + 3. total_size 的计算是多余的 + + - 修改计划: + 1. 删除 total_size 相关代码: + - 移除 total_size 的计算语句 + - 保持其他逻辑不变 + 2. 编译验证修改 + + - 执行记录: + - 已完成: + - 删除了 total_size 计算语句:`let total_size: usize = data_result.values().map(|item| item.size()).sum();` + - 编译验证通过,确认问题已解决 + + - 遇到的问题: + - 无 + +- 任务:InvalidDataType 不附带一些context以便debug吗? + +- 任务:增加注释分析介绍 DataSetMetaV2 derive用处 + +- 任务:batch 里 impl proto::DataItem ,proto ext没有吗,另外规则里加一条proto数据结构要扩展都应该加到proto ext里 + +- 任务:编译并分析剩下的问题,并逐个编写计划 + +- (done)任务:error[E0521]: borrowed data escapes outside of method + +- (done)任务:error[E0382]: use of moved value: `unique_id` + + +- (done)任务:error[E0432]: unresolved import `super::dataitem::StorageType` + - 分析: + - 父问题相关性: + 1. 父问题:批量数据接口实现中的错误处理 + 2. 相关性:直接关系到数据存储类型的定义 + 3. 必要性:必须解决,否则编译无法通过 + 4. 优先级:高(阻塞编译) + + - 当前问题: + 1. 代码分析: + ```rust + // dataitem.rs 中的实现 + pub enum WriteSplitDataTaskGroup { + ToFile { + file_path: PathBuf, + tasks: Vec>>, + }, + ToMem { + shared_mem: SharedMemHolder, + tasks: Vec>>, + }, + } + + // batch.rs 中的使用 + let task_group = WriteSplitDataTaskGroup::new( + req.unique_id, + splits, + rx, + proto::BatchDataBlockType::from_i32(req.block_type) + .unwrap_or(proto::BatchDataBlockType::Memory), + ).await + ``` + + 2. 问题分析: + - WriteSplitDataTaskGroup 已经在使用 proto::BatchDataBlockType + - 但代码中可能还存在对 StorageType 的引用 + - 需要完全迁移到使用 proto::BatchDataBlockType + + - 修改计划: + 1. 编译并分析还剩下什么问题 + + - 执行记录: + - 待执行 + +- (done)任务:error[E0599]: no method named `get_or_del_datameta_from_master` found for reference `&DataGeneralView` + - 分析: + - 父问题相关性: + 1. 父问题:批量数据接口实现中的错误处理 + 2. 相关性:直接关系到数据访问功能 + 3. 必要性:必须解决,否则会导致编译错误 + 4. 优先级:高(阻塞编译) + + - 当前问题: + 1. DataGeneralView 中缺少 get_or_del_datameta_from_master 方法 + 2. 根据之前的设计原则,我们应该避免不必要的代理转发 + 3. 需要检查调用处是否可以直接使用 data_general() 方法 + 4. 编译后发现新的相关错误: + ```rust + error[E0432]: unresolved import `super::dataitem::StorageType` + error[E0599]: no method named `get_metadata` found for struct `DataGeneralView` + error[E0599]: no method named `get_data_meta` found for reference `&m_data_general::DataGeneral` + error[E0599]: no method named `data_general` found for reference `&m_data_general::DataGeneral` + ``` + + - 修改计划: + 2. 修复 get_metadata 调用: + - 将调用 `self.get_metadata()` 改为 `self.data_general().get_metadata()` + - 保持函数在 DataGeneral 中的原有实现不变 + 3. 修复 get_data_meta 调用: + - 修改为 self.view.get_data_meta (done) + 4. 修复 data_general 调用: + - 修改为 self.view.data_general() (done) + 5. 验证修改后的编译结果 + + - 执行记录: + 1. 已完成避免代理转发的修改 + 2. 发现新的编译错误 + 3. 制定了详细的修复计划 + 4. 完成了 StorageType 导入问题的修复 + 5. 完成了 get_metadata 调用的修复 + +- (done)任务:error[E0521]: borrowed data escapes outside of method + - 分析: + - 父问题相关性: + 1. 父问题:批量数据接口实现中的错误处理 + 2. 相关性:直接关系到内存安全和生命周期管理 + 3. 必要性:必须解决,否则会导致编译错误 + 4. 优先级:高(阻塞编译) + + - 当前问题: + 1. 在异步上下文中使用了 self 引用: + ```rust + async fn start(&self) -> WSResult> { + // ... + let this = self.clone(); + } + ``` + 2. 这是一个常见的生命周期问题,self 引用没有 'static 生命周期 + 3. 需要确保异步任务中使用的数据满足 'static 约束 + + - 修改计划: + 1. 检查 self 类型的 Clone 实现 + 2. 使用 view 模式访问共享数据 + 3. 编译验证修改 + - 执行记录: + - 已完成修改,将所有 self.clone() 改为 view 模式 + - 编译验证发现新的错误: + 1. `error[E0432]: unresolved import super::dataitem::StorageType` + 2. `error[E0599]: no method named get_or_del_datameta_from_master found for reference &DataGeneralView` + 3. `error: unused variable: data_item` + - 需要继续修复这些新问题 + +- (done)任务:batch调用函数注释没讲清楚 + // 创建channel用于接收响应 + let (tx, mut rx) = mpsc::channel(1); + 这里channel是跟谁通信,作用是什么 + - 父问题相关性分析: + - 父问题引用:无,这是一个独立的任务 + - 相关性分析:这是一个独立的代码文档问题,不是由其他任务引起的 + - 解决必要性: + - 函数注释的清晰性直接影响代码的可维护性和可理解性 + - channel 通信是异步处理的关键部分,需要明确说明其用途 + - 不清晰的注释可能导致后续开发者误用或难以调试 + - 优先级:高(作为最老未完成任务) + + - 修改计划: + - 修改目的: + - 明确说明 channel 的通信双方和作用 + - 提供完整的函数级文档注释 + - 建立异步通信文档的最佳实践 + - 提高代码的可维护性 + + - 预期效果: + - channel 的用途清晰明确 + - 函数注释完整描述了异步处理流程 + - 其他开发者能快速理解代码逻辑 + - 形成可复用的异步通信文档模板 + + - 可能的风险: + - 注释可能需要随代码变化及时更新 + - 过于详细的注释可能增加维护负担 + - 需要在注释详细度和简洁性之间找到平衡 + + - 具体步骤: + 1. 定位并检查 batch 相关函数的完整实现 + 2. 分析 channel 在函数中的具体用途 + 3. 确认通信的发送方和接收方 + 4. 理解完整的异步处理流程 + 5. 编写清晰的函数级文档注释 + 6. 补充必要的内联注释 + 7. 评审并优化注释内容 + + - 修改过程: + - 已完成: + - 初步确认问题范围 + - 制定修改计划 + - 完成代码分析,发现: + - Channel 用途:用于在批量数据传输过程中接收所有数据块处理完成的最终状态 + - 发送方:BatchTransfer 在接收到所有数据块并完成组装后(包括写入文件或合并内存数据)发送完成状态 + - 接收方:call_batch_data 函数等待所有数据块处理完成的最终结果 + - 通信内容:完整处理后的 DataItem(包含所有数据块组装后的结果)或错误信息 + - 处理流程: + 1. 创建 channel,容量设置为 1(只用于接收最终的完整结果) + 2. 将发送端传递给 BatchTransfer + 3. BatchTransfer 在接收每个数据块时: + - 通过 add_block 添加数据块 + - 检查是否收到所有数据块 + - 当收到所有数据块时,调用 complete 方法 + 4. complete 方法会: + - 检查所有数据块是否完整 + - 根据 block_type 组装数据(写入文件或合并内存) + - 通过 channel 发送最终的完整 DataItem + 5. call_batch_data 等待接收最终结果并返回对应的 Response + + - 下一步: + - 编写函数级文档注释 + - 补充 channel 相关的内联注释 + - 优化注释内容 + +- (done)任务:强化规则中先再review写计划,经过允许后执行的习惯 + - 分析: + - 父问题相关性: + 1. 父问题:完善项目规则和文档 + 2. 相关性:直接关系到规则的执行质量和一致性 + 3. 必要性:避免未经充分思考的修改 + 4. 优先级:高(影响所有代码修改的质量) + + - 当前问题: + 1. 需要在规则中更明确地强调先review再执行的重要性 + 2. 需要规范化计划review和执行确认的流程 + 3. 需要确保这个习惯能被有效执行 + + - 修改计划: + 1. 在 .cursorrules 文件的 7.0 最高优先级规则章节添加相关规则 + 2. 补充具体的review和确认流程 + 3. 添加违反处理规则 + + - 执行记录: + 1. 修改了 .cursorrules 文件的 7.0 章节 + 2. 更新了"修改代码时必须"的规则内容 + 3. 添加了更详细的计划管理和执行流程要求 + 4. 规则修改已完成并生效 + +- (done)任务:新增规则 编译时应当输出到compilelog文件 + - 分析: + - 父问题相关性: + 1. 父问题:完善项目规则和文档 + 2. 相关性:规则补充任务,与编译过程规范化直接相关 + 3. 必要性:有助于提高编译问题的追踪和分析效率 + 4. 优先级:高(编译过程的标准化对项目质量至关重要) + + - 当前问题: + 1. 需要在 .cursorrules 文件中添加编译输出规范 + 2. 规范需要涵盖输出重定向、日志管理等方面 + 3. 需要确保规则易于执行且清晰明确 + + - 设计目标: + 1. 在 .cursorrules 文件中的构建规则章节添加编译输出规范 + 2. 确保规则内容完整且易于遵循 + 3. 与现有规则保持一致性和兼容性 + + - 修改计划: + 1. 在 .cursorrules 的第 10 章"构建规则"中添加编译输出规范: + - 位置:10.1.2 编译输出规范 + - 内容结构: + 1. 编译输出重定向命令 + 2. 日志文件要求(名称、位置、格式、时效性) + 3. 日志内容规范(必须包含的信息) + 4. 日志管理规则(清理、保留、版本控制) + 5. 使用场景说明 + 6. 注意事项 + + 2. 具体规则内容: + a. 编译输出重定向: + ```bash + sudo -E $HOME/.cargo/bin/cargo build 2>&1 | tee compilelog + ``` + + b. 日志文件要求: + - 文件名固定为 compilelog + - 位置在项目根目录 + - 格式为纯文本,包含 stdout 和 stderr + - 每次编译生成新日志 + + c. 日志内容规范: + - 完整编译命令 + - 所有编译警告和错误 + - 编译时间信息 + - 完整编译过程输出 + + d. 日志管理规则: + - 编译前清理旧日志 + - 编译失败时保留日志 + - 禁止手动编辑 + - 不提交到版本控制 + + e. 使用场景: + - 首次编译 + - 代码修改后重新编译 + - 依赖更新后编译 + - 编译错误排查 + + f. 注意事项: + - 磁盘空间管理 + - 日志清理策略 + - 错误分析方法 + - 问题追踪建议 + + 3. 验证规则的正确性和一致性: + - 确保规则描述清晰准确 + - 验证与现有规则的兼容性 + - 检查格式符合项目标准 + +- (done) 任务:error[E0599]: no method named `get_or_del_datameta_from_master` found for reference `&DataGeneralView` + - 分析: + - 当前问题: + - 编译错误显示 DataGeneralView 中缺少 get_or_del_datameta_from_master 方法 + - 该方法在 DataGeneral 中已实现 + - 需要在 DataGeneralView 中添加对应的方法调用 + + - 设计目标: + - 在 DataGeneralView 中添加方法 + - 保持与 DataGeneral 中的实现一致 + - 确保正确的错误处理 + - 维护代码的可维护性 + + - 修改计划: + - 修改目的: + - 解决编译错误 + - 完善 DataGeneralView 的功能 + - 保持代码结构的一致性 + + - 预期效果: + - DataGeneralView 可以正确调用 get_or_del_datameta_from_master + - 编译错误消除 + - 保持代码结构清晰 + + - 可能的风险: + - 方法访问权限可能需要调整 + - 可能需要处理生命周期问题 + - 可能需要添加其他相关方法 + + - 具体步骤: + 1. 在 DataGeneralView 中添加方法实现 + 2. 确保方法签名与 DataGeneral 一致 + 3. 通过 data_general() 调用原方法 + 4. 编译验证修改 + + - 执行修改: + 1. 在 DataGeneralView impl 块中添加: + ```rust + pub async fn get_or_del_datameta_from_master( + &self, + unique_id: &[u8], + delete: bool, + ) -> WSResult { + self.data_general().get_or_del_datameta_from_master(unique_id, delete).await + } + ``` + 2. 修改已完成,编译验证通过(done) + +- (done)任务:error[E0599]: no method named `get_data_meta` found for reference `&KvStoreEngine` + +- (done)任务:BatchTransfer不应该直接存储接收到的数据块到map里,应该复用get data那里的逻辑;区分文件和内存;文件通过文件偏移,内存用封装好的代码 + - 父问题相关性分析: + - 父问题引用:无,这是一个独立的代码优化任务 + - 相关性分析:虽然与 BatchTransfer 设计总结任务有关,但这是一个具体的实现优化问题 + - 解决必要性: + - 当前实现存在代码重复,没有复用已有的数据处理逻辑 + - 直接存储到 map 可能导致内存使用效率低下 + - 需要统一数据处理方式,提高代码维护性 + - 优先级:高(涉及核心功能的代码质量) + + - 修改计划: + - 修改目的: + - 复用 get_data 的数据处理逻辑 + - 优化数据存储方式 + - 统一文件和内存数据的处理流程 + - 减少代码重复 + + - 预期效果: + - 文件数据直接写入文件系统,通过偏移量管理 + - 内存数据使用现有的封装代码处理 + - 减少内存占用 + - 提高代码复用性和维护性 + + - 可能的风险: + - 重构过程可能影响现有功能 + - 需要确保并发安全性 + - 文件操作可能带来性能开销 + - 可能需要修改相关的测试代码 + + - 具体步骤: + 1. 分析 get_data 中的数据处理逻辑 + 2. 设计新的数据存储接口 + 3. 实现文件数据的偏移量写入 + 4. 集成内存数据的封装代码 + 5. 修改 BatchTransfer 的实现 + 6. 更新相关测试 + 7. 性能测试和优化 + + - 修改过程: + - 已完成: + - 初步确认问题范围 + - 制定修改计划 + - 分析了当前实现的问题: + 1. BatchTransfer 直接将数据块存储在 DashMap 中,占用内存大 + 2. 没有区分文件和内存数据的处理方式 + 3. 没有复用已有的数据处理逻辑 + - 分析了 get_data 的实现: + 1. 支持并行写入能力: + - 使用 tokio::spawn 创建异步任务 + - 通过信号量控制并发数量 + - 支持多节点并行写入 + 2. 数据处理逻辑: + - 文件数据:使用 seek + write 定位写入 + - 内存数据:使用偏移量计算地址 + - 支持断点续传 + 3. 并发控制: + - 使用 RwLock 保护共享资源 + - 文件操作使用 async 文件 I/O + - 内存操作使用原子操作 + - 深入分析了并行写入实现: + 1. write_data_batch 函数的实现: + - 支持数据分块传输:固定 1MB 大小 + - 使用 request_id 跟踪传输状态 + - 支持初始化和数据传输两个阶段 + - 实现了超时重试机制 + + 2. 并行写入机制: + - 主数据分片并行写入: + - 对每个 split_info 创建独立的写入任务 + - 使用 tokio::spawn 实现异步并行处理 + - 通过 clone_split_range 优化数据复制 + + - 缓存数据并行写入: + - 使用信号量控制并发数量(MAX_CONCURRENT_TRANSFERS = 3) + - 支持多节点同时写入 + - 实现了完整的错误处理和重试机制 + + - 任务管理: + - 使用 Vec 跟踪所有写入任务 + - 实现了等待所有任务完成的机制 + - 支持错误传播和状态同步 + + 3. 数据分片策略: + - 支持按偏移量和大小进行数据分片 + - 实现了数据块的并行传输 + - 保证了数据完整性和顺序性 + + - 分析了 SharedMemOwnedAccess 的实现: + 1. 内存管理机制: + - SharedMemHolder: + - 使用 Arc> 管理共享内存 + - 支持数据所有权转移(try_take_data) + - 确保内存安全释放 + + - SharedMemOwnedAccess: + - 提供对共享内存特定范围的独占访问 + - 使用 Range 控制访问范围 + - 实现了安全的可变借用 + + 2. 内存分片处理: + - new_shared_mem 函数: + - 预分配所需总大小的内存 + - 创建多个 SharedMemOwnedAccess 实例 + - 每个实例负责一个数据范围 + + - 并发写入支持: + - 通过 Arc 共享底层内存 + - 每个 SharedMemOwnedAccess 独占其范围 + - 支持并行安全的写入操作 + + 3. 安全保证机制: + - 内存安全: + - 使用 Arc 管理共享内存生命周期 + - Range 确保访问不越界 + - unsafe 代码有完整的安全性说明 + + - 并发安全: + - 每个 SharedMemOwnedAccess 独占其范围 + - 不同实例的范围不重叠 + - 支持并行写入而无需额外同步 + + - 遇到的问题: + - 问题1:需要设计复用 SharedMemOwnedAccess 的接口 + - 问题描述:如何在 BatchTransfer 中集成 SharedMemOwnedAccess 的内存管理机制 + - 解决方案: + 1. 复用 WriteSplitDataTaskGroup 的现有实现: + ```rust + // 已有的接口和实现: + pub enum WriteSplitDataTaskGroup { + ToFile { ... }, + ToMem { + shared_mem: SharedMemHolder, + tasks: Vec>>, + }, + } + + impl WriteSplitDataTaskGroup { + pub async fn new( + unique_id: Vec, + splits: Vec>, + rx: mpsc::Receiver>, + cachemode: CacheModeVisitor, + ) -> WSResult + } + ``` + + 2. 通过 channel 传输数据: + - 使用 mpsc::channel 在 BatchTransfer 和 WriteSplitDataTaskGroup 之间传输数据 + - 保持 WriteSplitDataTaskGroup 的现有接口不变 + - 在 BatchTransfer 中通过 channel 发送数据块 + + 3. 数据流转设计: + ```rust + // 在 BatchTransfer::new 中: + let (data_sender, data_receiver) = mpsc::channel(total_blocks as usize); + let splits = calculate_splits(total_blocks as usize * block_size, block_size); + + // 创建写入任务: + let write_task = tokio::spawn(async move { + let group = WriteSplitDataTaskGroup::new( + unique_id.clone(), + splits, + data_receiver, + CacheModeVisitor(block_type as u16), + ).await?; + group.join().await + }); + ``` + + 4. 优点: + - 不需要修改 WriteSplitDataTaskGroup 的实现 + - 复用现有的内存管理机制 + - 保持并发安全性 + - 支持文件和内存的统一处理 + + - 解决过程: + 1. 分析了 WriteSplitDataTaskGroup 的实现 + 2. 确认可以直接复用现有接口 + 3. 设计了基于 channel 的数据传输方案 + 4. 下一步将实现具体代码 + + - 子问题1:WriteSplitDataTaskGroup接口设计问题 + - 问题描述:WriteSplitDataTaskGroup 的接口设计不够通用,影响复用性 + - 分析: + - 当前问题: + - WriteSplitDataTaskGroup 使用 CacheModeVisitor 作为参数 + - 这个参数实际只用于区分文件/内存操作 + - 参数名称和类型都不够直观 + - 违反了接口设计的简单性原则 + + - 设计目标: + - 参数应该直观地表达其用途 + - 接口应该简单易用 + - 不应该暴露实现细节 + - 保持向后兼容性 + + - 修改计划: + 1. 新增枚举类型: + ```rust + #[derive(Debug, Clone, Copy)] + pub enum StorageType { + File, + Memory, + } + ``` + + 2. 修改 WriteSplitDataTaskGroup::new 签名: + ```rust + pub async fn new( + unique_id: Vec, + splits: Vec>, + rx: mpsc::Receiver>, + storage_type: StorageType, + ) -> WSResult + ``` + + - 优势: + 1. 接口更直观:参数名称和类型都清晰表达了意图 + 2. 实现解耦:调用方不需要了解内部实现细节 + 3. 提高可复用性:接口简单清晰,易于在其他场景使用 + 4. 类型安全:使用枚举确保类型安全 + 5. 向后兼容:可以在内部保持现有的实现逻辑 + + - 后续工作: + 1. 更新所有调用 WriteSplitDataTaskGroup::new 的代码 + 2. 添加相关测试用例 + 3. 更新文档说明 + 4. 考虑未来可能的存储类型扩展 + + - 处理过程中遇到的问题: + 1. (done)编译错误: + ```rust + error[E0599]: no variant or associated item named `FILE` found for enum `BatchDataBlockType` + ``` + - 原因:使用了错误的枚举变体名称 + - 解决:修改为正确的枚举变体 `File` 和 `Memory` + + 2. (done) 类型转换问题: + ```rust + match storage_type { + StorageType::File => Self::ToFile { ... }, + StorageType::Memory => Self::ToMem { ... }, + } + ``` + - 原因:需要在内部实现中将 StorageType 映射到具体的枚举变体 + - 解决:添加类型转换实现 + + - 子问题2:错误处理链完整性问题 + - 问题描述:write_task的错误处理链需要确保类型一致性 + - 分析: + - 当前问题: + - write_task.await?? 的双重错误处理不够清晰 + - 错误上下文信息不够详细 + - 错误类型转换隐含在 map_err 中 + + - 设计目标: + - 拆分错误处理步骤,使逻辑清晰 + - 添加详细的错误上下文 + - 统一错误转换方式 + + - 修改计划: + 1. 修改错误处理实现: + ```rust + pub async fn complete(mut self) -> WSResult<()> { + // 定义错误转换函数 + let join_error = |e| WsDataError::BatchTransferError { + unique_id: self.unique_id.clone(), + msg: format!("write task join failed: {}", e), + }; + + let write_error = |e| WsDataError::BatchTransferError { + unique_id: self.unique_id.clone(), + msg: format!("write data failed: {}", e), + }; + + let send_error = || WsDataError::BatchTransferError { + unique_id: self.unique_id.clone(), + msg: "send result failed".to_string(), + }; + + drop(self.data_sender); + + if let Some(tx) = self.tx.take() { + let join_result = self.write_task.await + .map_err(join_error)?; + + let data_item = join_result + .map_err(write_error)?; + + tx.send(Ok(data_item)).await + .map_err(|_| send_error())?; + } + Ok(()) + } + ``` + + - 优势: + 1. 错误处理步骤清晰 + 2. 错误包含详细上下文 + 3. 错误转换逻辑统一 + 4. 便于维护和调试 + + - 后续工作: + 1. 修改 complete 方法 + 2. 更新相关测试 + + - 处理过程中遇到的问题: + 1. (done) 错误类型不匹配: + ```rust + error[E0559]: variant `result::WsDataError::BatchTransferError` has no field named `context` + ``` + - 原因:错误类型定义中没有 context 字段 + - 解决:移除 context 字段,将上下文信息合并到 msg 中 + + 2. (done)变量作用域问题: + ```rust + error[E0425]: cannot find value `version` in this scope + ``` + - 代码分析: + ```rust + // 问题代码: + proto::BatchDataResponse { + request_id: req.request_id, + success: true, + error_message: String::new(), + version, // 这里的 version 变量未定义 + } + + // 上下文代码: + let meta = match kv_store_engine.get_data_meta(&req.unique_id).await { + Ok(Some((_, meta))) => meta, + ... + } + ``` + + - 问题成因: + 1. 在构造 BatchDataResponse 时直接使用了未定义的 version 变量 + 2. meta 变量已在函数开始处获取,包含了正确的版本信息 + 3. 应该使用 meta.version 而不是直接使用 version + + - 修复方案: + - 将 version 替换为 meta.version + - 确保在所有响应构造处都使用 meta.version + - 保持版本信息的一致性 + + - 修改验证: + - 编译确认错误消除 + - 检查版本信息传递正确性 + + - 子问题3:生命周期安全问题 + - 问题描述:异步任务中使用的数据需要满足'static约束 + - 分析: + - 当前问题: + - batch_manager 模块未找到 + - unresolved import batch_manager::BatchManager + - 需要修复模块导入和路径问题 + + - 设计目标: + - 确保模块结构正确 + - 修复导入路径 + - 保持代码组织清晰 + + - 修改计划: + 1. 检查模块结构 + 2. 修复导入路径 + 3. 确保生命周期安全 + + - 后续工作: + 1. 修复模块导入问题 + 2. 验证生命周期约束 + 3. 更新相关测试 + + - 处理过程中遇到的问题: + 1. 模块导入错误: + ```rust + error[E0583]: file not found for module `batch_manager` + error[E0432]: unresolved import `batch_manager::BatchManager` + ``` + - 原因:模块文件路径不正确或文件不存在 + - 解决:需要创建正确的模块文件并修复导入路径 + + 2. (done) 类型约束问题: + ```rust + error[E0277]: `Rc>` cannot be sent between threads safely + ``` + - 原因:某些类型不满足 Send trait 约束 + - 解决:使用线程安全的替代类型(如 Arc)或重新设计数据共享方式 + +- (done)任务:BatchTransfer 的设计总结一下,反应在rule里 + - 父问题相关性分析: + - 父问题引用:无,这是一个独立的文档完善任务 + - 相关性分析:虽然与 batch 调用函数注释任务有关联,但这是一个更高层面的设计总结任务 + - 解决必要性: + - BatchTransfer 是批量数据传输的核心组件,其设计原则需要文档化 + - 可以指导后续类似功能的开发 + - 有助于维护代码质量和一致性 + - 优先级:中(重要但不紧急) + + - 修改计划: + - 修改目的: + - 总结 BatchTransfer 的设计思路和最佳实践 + - 将设计经验转化为可复用的规则 + - 完善项目的设计文档 + + - 预期效果: + - 在 .cursorrules 中新增批量数据接口设计章节 + - 形成完整的设计规范文档 + - 为团队提供清晰的设计指导 + + - 可能的风险: + - 规则可能需要随着实现的演进而更新 + - 过于具体的规则可能限制未来的优化空间 + - 需要在规范性和灵活性之间找到平衡 + + - 具体步骤: + 1. 分析 BatchTransfer 的核心设计要素 + 2. 提取关键的设计原则和模式 + 3. 整理接口设计的最佳实践 + 4. 编写规则文档 + 5. 评审并优化规则内容 + + - 修改过程: + - 已完成: + - 初步确认任务范围 + - 制定修改计划 + - 分析了系统的核心组件及其职责: + 1. 数据结构职责划分: + - BatchTransfer:单个批量传输任务的管理器 + - 维护:单个传输任务的所有状态(unique_id, version, block_type, total_blocks) + - 存储:接收到的数据块(received_blocks: DashMap>) + - 通知:任务完成状态(tx: Option) + - 功能:数据块的接收、验证和重组 + + - BatchManager:全局批量传输任务的管理器 + - 维护:所有进行中的传输任务(transfers: DashMap) + - 生成:唯一的请求序列号(sequence: AtomicU64) + - 功能:创建新传输、处理数据块、任务生命周期管理 + + 2. 关键函数职责: + - call_batch_data(发送端入口): + - 将大数据分块(固定 1MB 大小) + - 创建传输任务(通过 BatchManager) + - 发送数据块 + - 等待传输完成 + + - handle_block(接收端处理): + - 接收单个数据块 + - 更新传输状态 + - 触发完成处理(如果所有块都收到) + + - complete(完成处理): + - 校验所有数据块完整性 + - 按类型重组数据(内存/文件) + - 通知传输完成 + + 3. 数据流转过程: + - 发送流程: + 1. call_batch_data 接收原始数据 + 2. 计算分块策略 + 3. BatchManager 创建传输任务 + 4. 循环发送数据块 + + - 接收流程: + 1. handle_block 接收数据块 + 2. BatchTransfer 存储数据块 + 3. 检查完整性 + 4. 触发 complete 处理 + 5. 通知发送端完成 + + 4. 错误处理职责: + - BatchTransfer: + - 数据块完整性验证 + - 重组过程的错误处理 + + - BatchManager: + - 传输任务存在性检查 + - 并发访问保护 + + - 调用方: + - 网络传输错误处理 + - 超时处理 + + - 下一步: + - 将这些设计理念和原则转化为规则文档 + - 编写具体的规范内容 + - 评审规则文档 + +- (done)任务:sche proto 中batch部分需要删掉 + - 执行计划: + - 修改目的: + - 清理不再使用的batch相关proto定义 + - 避免代码冗余和混淆 + - 保持proto文件的简洁性 + + - 预期效果: + - sche proto中不再包含batch相关定义 + - 相关的batch功能完全由其他模块处理 + - 减少代码维护负担 + + - 可能的风险: + - 可能有其他模块仍在使用这些proto定义 + - 删除可能影响现有功能 + - 可能需要修改依赖这些proto的代码 + + - 具体步骤: + 1. 搜索并确认sche proto中batch相关定义的位置 + 2. 检查是否有其他代码引用这些proto定义 + 3. 确认删除不会影响现有功能 + 4. 删除相关proto定义 + 5. 更新受影响的代码(如果有) + + - 执行记录: + - 已完成: + - 确认需要删除sche proto中的batch部分 + - 定位到batch相关proto定义在 src/main/src/general/network/proto_src/sche.proto 中 + - 发现这些定义正在被 src/main/src/general/data/m_data_general/batch.rs 使用 + - 发现 data.proto 中已有更完整的 batch 相关定义 + - 删除了 sche.proto 中的重复定义 + - 确认 batch.rs 中使用通用的 proto 导入,不需要修改引用路径 + + - 子任务1:编译验证 + - 执行计划: + - 目的:验证删除 sche.proto 中 batch 定义后的代码完整性 + - 步骤: + 1. 使用 sudo 执行编译 + 2. 分析编译错误 + 3. 制定修复方案 + + - 执行记录: + - 已完成: + - 执行编译并发现错误 + - 分析了错误原因 + + - 发现的问题: + 1. 导入错误: + - proto 模块导入语法错误:`use crate::general::network::proto::self;` + - `BatchDataResponse` 结构体需要通过 `proto::BatchDataResponse` 来引用 + - 已确认 data.proto 中已定义了 BatchDataResponse + + 2. 类型错误: + - `BatchRequestId` 类型不匹配 + - 需要类型注解 + + - 子任务2:修复编译错误 + - 执行计划: + - 目的:修复编译发现的错误 + - 步骤: + 1. 修复 proto 模块导入语句,改为 `use crate::general::network::proto;` + 2. 修正 BatchRequestId 相关代码,确保类型匹配 + 3. 编译验证修改 + + - 执行记录: + - 待执行 + +- (done)任务:新增rule,编译使用sudo cargo build + - 修改计划: + - 修改目的: + - 规范化项目编译过程 + - 确保编译权限一致性 + - 避免权限相关的编译问题 + + - 预期效果: + - 在 .cursorrules 中新增编译规则 + - 统一团队编译命令使用方式 + - 减少权限相关的编译错误 + + - 可能的风险: + - sudo 权限可能带来安全风险 + - 可能影响现有的编译脚本或工作流 + - 需要确保所有开发者都有 sudo 权限 + + - 具体步骤: + 1. 在 .cursorrules 文件中添加编译规则 + 2. 说明使用 sudo 的原因和场景 + 3. 添加安全注意事项 + 4. 更新相关文档和记忆系统 + + - 修改过程: + - 已完成: + - 确认需要添加编译使用 sudo 的规则 + - 分析了使用 sudo 编译的必要性 + + - 遇到的问题: + - 问题1:需要确定在哪些具体场景下必须使用 sudo + - 解决方案:分析项目依赖和编译过程 + - 解决过程: + 1. 检查项目依赖 + 2. 分析编译权限需求 + 3. 确定必须使用 sudo 的具体情况 + + - 下一步: + - 等待确认修改方案 + - 执行实际的规则添加 + - 更新项目文档 + +- (done)任务:新增rule,后续每次修改,需要查看根目录review,并 对应每一点 进行 修改计划的撰写 以及 修改过程的记录,如果修改过程中出现问题,则作为markdown子项记录,形成一个问题树结构(再次强调,这一条是rule,很重要) + - 修改计划: + - 修改目的: + - 规范化代码修改的文档记录流程 + - 确保所有修改都有清晰的计划和追踪记录 + - 建立统一的问题记录格式 + + - 预期效果: + - 在 .cursorrules 中新增第 8 章节 + - 完整描述代码评审与修改文档规则 + - 包含修改计划、记录要求和维护原则 + + - 可能的风险: + - 规则可能与现有工作流程不完全匹配 + - 可能需要团队成员适应新的文档格式 + + - 具体步骤: + 1. 在 .cursorrules 文件中添加第 8 章节 + 2. 编写完整的规则内容 + 3. 确保格式与现有文档保持一致 + 4. 创建相应的记忆条目 + + - 修改过程: + - 已完成: + - 编写了完整的规则内容 + - 设计了清晰的文档结构规范 + - 定义了详细的记录要求 + + - 下一步: + - 等待确认修改方案 + - 执行实际的文件修改 + - 创建记忆条目 + +- 任务:添加规则 - 避免不必要的代理转发设计(done) + - 分析: + - 父问题相关性: + 1. 父问题:完善项目规则和文档 + 2. 相关性:直接影响代码质量和可维护性 + 3. 必要性:减少冗余代码,提高代码效率 + 4. 优先级:高(影响整体代码设计) + + - 当前问题: + 1. 发现代码中存在不必要的代理转发模式 + 2. 例如 DataGeneralView 中的 get_or_del_datameta_from_master 方法仅仅是转发调用 + 3. 这种设计增加了不必要的代码层级和复杂度 + + - 修改计划: + 1. 在 .cursorrules 文件中添加关于代码设计的新规则 + 2. 删除当前的代理转发实现 + 3. 更新相关调用代码,直接使用原始实现 + + - 执行记录: + 1. 在 .cursorrules 文件中的 7.2 代码修改原则章节添加新规则 + 2. 删除了 DataGeneralView 中的 get_or_del_datameta_from_master 代理方法 + 3. 更新了调用处代码,改为直接使用 data_general().get_or_del_datameta_from_master + 4. 所有修改已完成 + +- 任务:修复 unique_id 移动问题: + - 分析: + - 父问题相关性: + 1. 父问题:编译错误修复 + 2. 相关性:直接导致编译失败的问题 + 3. 必要性:必须解决以通过编译 + 4. 优先级:高,阻塞编译 + + - 当前问题: + 1. 在 batch.rs 中,unique_id 在异步任务中被移动后仍然尝试使用 + 2. 问题出现在 BatchTransfer::new 函数中 + 3. 涉及 tokio::spawn 创建的异步任务 + + - 修改计划: + 1. 在 BatchTransfer::new 中: + - 在创建异步任务前克隆 unique_id + - 使用克隆的版本传入异步任务 + - 保留原始 unique_id 用于其他用途 + + - 执行记录: + - 已完成: + - 在 BatchTransfer::new 中添加了 unique_id_for_task = unique_id.clone() + - 修改异步任务使用 unique_id_for_task 代替 unique_id.clone() + + - 下一步: + - 执行编译验证修改是否解决问题 + - 检查是否有其他相关的所有权问题 + + + + + - 执行记录: + 1. 在 .cursorrules 文件中的 7.2 代码修改原则章节添加新规则 + 2. 删除了 DataGeneralView 中的 get_or_del_datameta_from_master 代理方法 + 3. 更新了调用处代码,改为直接使用 data_general().get_or_del_datameta_from_master + 4. 所有修改已完成 + +- 任务:修复 unique_id 移动问题: + - 分析: + - 父问题相关性: + 1. 父问题:编译错误修复 + 2. 相关性:直接导致编译失败的问题 + 3. 必要性:必须解决以通过编译 + 4. 优先级:高,阻塞编译 + + - 当前问题: + 1. 在 batch.rs 中,unique_id 在异步任务中被移动后仍然尝试使用 + 2. 问题出现在 BatchTransfer::new 函数中 + 3. 涉及 tokio::spawn 创建的异步任务 + + - 修改计划: + 1. 在 BatchTransfer::new 中: + - 在创建异步任务前克隆 unique_id + - 使用克隆的版本传入异步任务 + - 保留原始 unique_id 用于其他用途 + + - 执行记录: + - 已完成: + - 在 BatchTransfer::new 中添加了 unique_id_for_task = unique_id.clone() + - 修改异步任务使用 unique_id_for_task 代替 unique_id.clone() + + - 下一步: + - 执行编译验证修改是否解决问题 + - 检查是否有其他相关的所有权问题 + - Err(WSError::WsDataError(WsDataErr::WriteDataFailed { - unique_id: match self { - Self::ToFile { unique_id, .. } | - Self::ToMem { unique_id, .. } => unique_id.clone(), - } - })) - } -} - -// WriteSplitDataTaskGroup 管理器 -pub struct WriteSplitDataManager { - // 只存储任务句柄 - handles: DashMap, -} - -impl WriteSplitDataManager { - pub fn new() -> Arc { - Arc::new(Self { - handles: DashMap::new(), - }) - } - - // 注册新的任务句柄 - pub fn register_handle( - &self, - unique_id: UniqueId, - handle: WriteSplitDataTaskHandle, - ) -> WSResult<()> { - // 检查是否已存在 - if self.handles.contains_key(&unique_id) { - return Err(WSError::WsDataError(WsDataErr::WriteDataFailed { - unique_id, - })); - } - // 存储句柄 - self.handles.insert(unique_id, handle); - Ok(()) - } - - // 获取已存在的任务句柄 - pub fn get_handle(&self, unique_id: &UniqueId) -> Option { - self.handles.get(unique_id).map(|h| h.clone()) - } - - // 移除任务句柄 - pub fn remove_handle(&self, unique_id: &UniqueId) { - self.handles.remove(unique_id); - } -} \ No newline at end of file diff --git a/scripts/sync_md_files.py b/scripts/sync_md_files.py index 747dc3c..3c82478 100644 --- a/scripts/sync_md_files.py +++ b/scripts/sync_md_files.py @@ -6,7 +6,7 @@ import tarfile from pathlib import Path -def backup_files(directory, file_types=( '.canvas')): +def backup_files(directory, file_types=('.md', '.canvas')): # Get current timestamp timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') @@ -43,7 +43,7 @@ def sync_md_files(source_dir, target_dir): # Walk through the source directory for root, _, files in os.walk(source_path): # Filter for .md and .canvas files - target_files = [f for f in files if f.endswith(('.canvas'))] + target_files = [f for f in files if f.endswith(('.md', '.canvas'))] for target_file in target_files: # Get the full source path From cf0f28f3ee418435a91fe50d777ce96fe514df4b Mon Sep 17 00:00:00 2001 From: pa <1020401660@qq.com> Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 16/26] Revert "feat: in progress" This reverts commit 8a4edef38a0de72f2d3b84dd352f6b650e6706e5. --- .DS_Store | Bin .cursorrules | 29 +- .cursorrules copy | 977 ++++++++++++++++++ .gitignore | 0 Cargo.lock | 0 Cargo.toml | 0 README.md | 0 design.canvas | 85 -- design.canvas.tmp.20250206220621 | 78 -- design.canvas.tmp.20250206221714 | 82 -- design.canvas.tmp.20250206221714.backup | 75 -- review.md | 0 scripts/sync_md_files.py | 89 -- .../src/general/data/m_data_general/README.md | 15 + .../src/general/data/m_data_general/batch.md | Bin 0 -> 2340 bytes .../general/data/m_data_general/dataitem.md | 57 + .../src/general/data/m_data_general/mod.md | 58 ++ 17 files changed, 1108 insertions(+), 437 deletions(-) mode change 100755 => 100644 .DS_Store mode change 100755 => 100644 .cursorrules create mode 100644 .cursorrules copy mode change 100755 => 100644 .gitignore mode change 100755 => 100644 Cargo.lock mode change 100755 => 100644 Cargo.toml mode change 100755 => 100644 README.md delete mode 100755 design.canvas delete mode 100644 design.canvas.tmp.20250206220621 delete mode 100755 design.canvas.tmp.20250206221714 delete mode 100755 design.canvas.tmp.20250206221714.backup mode change 100755 => 100644 review.md delete mode 100644 scripts/sync_md_files.py create mode 100644 src/main/src/general/data/m_data_general/README.md create mode 100644 src/main/src/general/data/m_data_general/batch.md create mode 100644 src/main/src/general/data/m_data_general/dataitem.md create mode 100644 src/main/src/general/data/m_data_general/mod.md diff --git a/.DS_Store b/.DS_Store old mode 100755 new mode 100644 diff --git a/.cursorrules b/.cursorrules old mode 100755 new mode 100644 index 8a8d2ea..8f40ffc --- a/.cursorrules +++ b/.cursorrules @@ -1,9 +1,5 @@ # Waverless 项目规则列表 -阅读一下review里的字符画设计图,细化/mnt/s3fs/waverless/design,主要是流程以及并行结构,数据流向 还有 数据关系 - -细化的过程使用 - ## 1. 任务执行强制等待规则 - 制定计划后必须等待用户确认: - 即使计划看起来很完善 @@ -44,27 +40,4 @@ - 需要分析当前问题时,先阅读 compilelog - 步骤管理: - 每次执行完一个大步骤(更新计划 或 执行计划)后,等待用户下一步指示 - -## 3. 设计文件修改规则 -- 修改前的准备: - - 必须先查看目标文件的最新内容 - - 创建两份临时文件拷贝,都带上时间戳: - * 一份用于修改 - * 一份作为备份 - -- 内容修改原则: - - 不得擅自删除或覆盖原有内容 - - 只能修改确实需要更新的相关内容 - - 不相关的内容必须保持原样 - - 如果是对原有内容的覆盖修改,需要明确指出 - -- 文件管理: - - 保持清晰的文件命名规范,包含时间戳 - - 在修改完成后进行必要的备份确认 - -## 4. 规则同步原则 -- 规则更新时: - - 规则文件(.cursorrules)和记忆(MEMORIES)必须同步更新 - - 确保两者内容保持一致性 - - 不允许单独更新其中之一 \ No newline at end of file + 每次执行完一个大步骤(更新计划 或 执行计划)后,等待用户下一步指示 \ No newline at end of file diff --git a/.cursorrules copy b/.cursorrules copy new file mode 100644 index 0000000..3c0bb19 --- /dev/null +++ b/.cursorrules copy @@ -0,0 +1,977 @@ + + + +*/ +# Waverless 项目关键设计笔记 + +## 1. 函数执行上下文设计 + +### 1.1 基础结构 +- `FnExeCtx`: 私有的基础结构体,包含函数执行的基本信息 + ```rust + struct FnExeCtx { + pub app: String, + pub app_type: AppType, + pub func: String, + pub func_meta: FnMeta, + pub req_id: ReqId, + pub event_ctx: EventCtx, + pub res: Option, + pub sub_waiters: Vec>, + _dummy_private: (), + } + ``` + +### 1.2 公开特化类型 +- `FnExeCtxAsync` 和 `FnExeCtxSync`: + - 异步执行上下文支持 Jar、Wasm、Native 类型,包含子任务支持和完整的性能监控和日志。 + - 同步执行上下文仅支持 Native 类型,不支持子任务,包含基本的性能监控和日志。 + +### 1.3 类型安全 +- `FnExeCtxAsyncAllowedType` 和 `FnExeCtxSyncAllowedType`: + - 异步允许的类型 (Jar, Wasm, Native) + - 同步允许的类型 (仅 Native) + - 通过 `TryFrom` 在编译时强制类型安全 + +## 2. 实例管理设计 + +### 2.1 实例类型与管理器 +- `Instance` 和 `InstanceManager`: + - `Instance` 包含 Owned、Shared 和 Native 类型。 + - `InstanceManager` 管理应用实例和运行时函数上下文。 + ```rust + pub enum Instance { + Owned(OwnedInstance), + Shared(SharedInstance), + Native(NativeAppInstance), + } + + pub struct InstanceManager { + pub app_instances: SkipMap, + pub instance_running_function: DashMap, + } + ``` + +### 2.2 运行时函数上下文 +- `UnsafeFunctionCtx`: + - 包含 Sync 和 Async 类型,分别对应 `FnExeCtxSync` 和 `FnExeCtxAsync`。 + +## 3. 关键修改记录 + +### 3.1 同步/异步执行流程优化与错误处理增强 +- 简化 `finish_using`,移除不必要的异步版本,统一使用同步实现。 +- 添加同步版本的 `load_instance_sync`,仅支持 Native 类型。 +- 优化 `execute_sync` 中的异步调用处理,统一性能监控和日志记录格式。 +- 添加 `UnsupportedAppType` 错误类型,完善同步执行时的类型检查。 + +## 4. 待办事项 +- [x] 考虑添加同步版本的 `load_instance` +- [ ] 优化 `execute_sync` 中的异步-同步转换 +- [ ] 完善错误处理和日志记录 + +## 5. 核心设计原则 + +### 5.1 基础原则与 View 模式设计规则 +- 同步/异步分离,类型安全,性能监控,资源管理。 +- View 生成: + - View 结构体和 `LogicalModule` trait 的实现由宏生成。 + - 只需实现 `inner_new` 函数,使用 `logical_module_view_impl!` 生成访问函数。 + - 每个需要访问的模块都需要单独的 impl 宏调用。 + +### 5.2 去掉 #[derive(LogicalModule)] 的原因和注意事项 +- 实现特定功能:根据需求在 `DataGeneralView` 中实现特定功能,检查冲突。 +- `inner` 字段的管理:由宏管理,不能直接操作,通过宏生成的接口使用。 +- 错误分析:去掉派生后,仔细分析和解决可能出现的错误。 + +## 6. msg_pack 消息封装 + +### 6.1 基本原则与实现示例 +- 使用 `msg_pack.rs` 中的宏实现 trait,使用 `define_msg_ids!` 管理消息类型。 +- 通过 `RPCReq` trait 定义请求-响应关系。 + ```rust + define_msg_ids!( + (proto::sche::BatchDataRequest, pack, { true }), + (proto::sche::BatchDataResponse, _pack, { true }) + ); + + impl RPCReq for proto::sche::BatchDataRequest { + type Resp = proto::sche::BatchDataResponse; + } + ``` + +### 6.2 最佳实践 +- 新增消息类型时:在 `define_msg_ids!` 中添加定义,实现 `RPCReq` trait。 +- 使用消息时:使用 `RPCCaller` 和 `RPCHandler`,遵循统一的错误处理。 + +## 7. Waverless 代码规范核心规则 + +### 7.0 最高优先级规则 +- 在没有经过明确允许的情况下,不要擅自开始操作 +- 必须等待用户明确指示后再进行修改 +- 在进行任何修改前,先提出修改方案并等待确认 +- 有明确指令的情况下,不要擅自做其他操作 +- 删除代码时必须说明: + - 被删除代码的原有功能和作用 + - 删除的具体原因 + - 删除可能带来的影响 +- 修改代码时必须: + - 先提出完整的修改方案 + - 说明每处修改的原因和影响 + - 等待用户确认后再执行 + - 严格按照确认的方案执行,不额外修改 + - 如需额外修改,必须重新提出方案并确认 +- 修改规则文件时必须: + - 确认文件名必须是 `.cursorrules` + - 确认文件以 "# Waverless 项目关键设计笔记" 开头 + - 确认包含完整的设计笔记结构 + - 确认包含所有规则章节(1-7) + - 修改前使用搜索工具确认是正确的规则文件 + - 修改前检查文件的完整内容 + - 修改前确认修改的具体位置 + - 只修改规则相关部分 + - 保持其他内容不变 + - 保持文档结构完整 +- 执行命令时必须: + - 先提出执行计划 + - 说明执行目的和预期结果 + - 等待用户确认后再执行 + - 记录执行结果和遇到的问题 + - 如遇问题,提出解决方案并等待确认 + - 例外情况: + 1. 编译命令(sudo -E $HOME/.cargo/bin/cargo build)可以直接执行,无需等待确认 + 2. 编译命令必须将输出重定向到 compilelog 文件 + 3. 编译命令执行后必须分析结果并更新 review.md + +- 编译验证规则: + - 当用户要求检查编译状态时: + 1. 必须立即执行实际的编译命令,无需等待确认 + 2. 禁止仅查看历史编译日志 + 3. 必须使用正确的编译命令:`sudo -E $HOME/.cargo/bin/cargo build 2>&1 | tee compilelog` + 4. 必须等待编译完成并分析结果 + 5. 必须将编译结果记录到 review.md 中 + - 编译执行前必须: + 1. 确认已经在 review.md 中记录了执行计划 + 2. 确认编译环境已经准备就绪 + 3. 确认使用了正确的编译命令和参数 + - 编译执行后必须: + 1. 分析编译输出中的每个错误和警告 + 2. 更新 review.md 中的任务状态 + 3. 如果发现新的错误,创建相应的任务记录 + - 禁止行为: + 1. 禁止在没有执行编译的情况下判断编译状态 + 2. 禁止仅根据历史记录回答编译相关问题 + 3. 禁止忽略编译警告 + 4. 禁止在编译失败时不更新任务状态 + +- 编译后问题处理规则: + 1. 每次编译完成后,如果发现新的问题: + - 必须先暂停当前操作 + - 立即在 review.md 中记录新问题 + - 对新问题进行完整的分析记录 + - 等待用户确认后再继续处理 + 2. 禁止在发现新问题后未经记录就直接处理 + 3. 禁止在未经用户确认的情况下处理新问题 + 4. 每个新问题必须包含: + - 与父问题的关系分析 + - 问题的具体表现和影响 + - 初步的解决方案建议 + - 预期的处理步骤 + 5. 违反以上规则的行为将被拒绝执行 + +- review.md 使用规则: + - 在执行任何操作前必须: + 1. 先检查 review.md 文件是否存在 + 2. 阅读完整的 review.md 内容 + 3. 理解当前任务的上下文和父问题 + 4. 在合适的位置添加新的任务记录 + + - 更新位置确定原则: + 1. 必须仔细分析当前对话正在处理的具体问题 + 2. 找到该问题在 review.md 中的对应位置 + 3. 将新内容添加到该问题的相关位置 + 4. 禁止简单地追加到文件末尾 + 5. 如果找不到明确的对应位置,必须先在对应任务描述下标记为 (working) 并询问用户确认 + 6. 对于正在计划或执行中的任务,必须标记为 (working);同一时间系统中只允许存在一个 (working) 状态的任务记录。如果发现多个 (working) 标记,必须暂停后续操作,并等待用户确认后再统一标记 + + - 任务记录必须遵循以下格式: + ```markdown + - 任务:[任务描述] + - 分析: + - 父问题相关性: + 1. 父问题:[引用具体的父问题] + 2. 相关性:[说明与父问题的关系] + 3. 必要性:[说明为什么需要解决] + 4. 优先级:[说明优先级和原因] + + - 当前问题: + 1. [具体问题点1] + 2. [具体问题点2] + ... + + - 修改计划: + 1. [具体步骤1] + 2. [具体步骤2] + ... + + - 执行记录: + - 已完成: + - [已完成的步骤1] + - [已完成的步骤2] + + - 遇到的问题: + - 问题1:[问题描述] + - 解决方案:[方案描述] + - 解决过程:[过程记录] + ``` + + - 任务状态管理: + 1. 新任务必须添加在未完成任务的最前面 + 2. 已完成任务必须标记为 (done) + 3. 已完成任务必须移到未完成任务后面 + 4. 子任务必须保持正确的缩进层级 + 5. 任务完成状态必须实时更新 + + - 强制执行要求: + 1. 禁止在未更新 review.md 的情况下执行任何操作 + 2. 禁止在未经确认的情况下修改已有任务记录 + 3. 禁止删除任何历史记录 + 4. 必须在每次操作前后更新执行记录 + 5. 必须在遇到问题时立即记录 + 6. 必须在解决问题后更新解决方案 + 7. 违反以上规则的操作将被拒绝执行 + +- 执行计划必须: + 1. 在执行任何操作前,必须先在 review.md 中记录执行计划 + 2. 执行计划必须包含: + - 任务描述和目标 + - 父问题相关性分析 + - 当前问题分析 + - 具体执行步骤 + - 预期结果 + - 可能的风险 + - 验证方法 + 3. 执行计划必须遵循 review.md 的格式要求: + - 新计划添加在未完成任务的最前面 + - 使用正确的缩进和层级 + - 包含完整的分析和计划部分 + 4. 执行过程必须: + - 严格按照计划执行 + - 实时记录执行结果 + - 遇到问题时立即记录 + - 完成后更新任务状态 + 5. 禁止在没有执行计划的情况下: + - 执行任何命令 + - 修改任何文件 + - 进行任何操作 + 6. 如需修改计划: + - 必须先记录原计划的问题 + - 提出新的计划 + - 等待确认后再继续 + +### 7.1 文档维护与代码组织原则 +- 文档压缩原则:保持无损压缩,合并重复内容,简化表述,重构文档结构。 +- 文档更新规则:确认信息完整性,保留技术细节,使用清晰结构展示信息。 +- 代码组织规则:宏生成的访问函数直接使用,非 pub 函数只在一个地方定义,View 负责核心实现,具体模块负责自己的功能,通过 View 访问其他模块。 + +### 7.2 代码修改原则 + +#### 7.2.1 问题解决原则 +- 仅解决当前 review 中关注的问题和遇到的子问题 +- 解决问题前必须先写出解决方案的规划: + 1. 分析问题的根本原因 + 2. 列出可能的解决方案 + 3. 评估每个方案的优缺点 + 4. 选择最优方案并说明原因 + 5. 列出具体的实施步骤 + 6. 考虑可能的风险和应对措施 + + +- 不随意删除或修改已有的正确实现 +- 不在多处实现同一功能 +- 保持代码结构清晰简单 +- 修改前先理解设计原则 + +#### 异步任务处理原则 +- 分析生命周期和所有权需求 +- 避免盲目克隆,只克隆必要数据 +- 考虑类型特征(如 P2PModule 的轻量级 Clone) +- 评估替代方案 + +```rust +// 反例:过度克隆 +let p2p = self.p2p().clone(); // 不必要,P2PModule 本身就是轻量级的 +let data_general = self.data_general().clone(); // 不必要,同上 + +// 正例:按需克隆 +let split_info = split.clone(); // 必要,因为来自临时变量的引用 +``` + +分析要点: +- 使用场景:确认异步任务中的实际需求 +- 类型特征:检查是否已实现轻量级 Clone +- 生命周期:特别关注临时变量引用 +- 替代方案:考虑其他实现方式 + +### 7.3 错误与正确示例 +- 错误示例:手动实现已有的宏生成函数,在两个地方都实现同一个函数,过度修改已有代码结构,有损压缩文档内容。 +- 正确示例:使用宏生成的访问函数,在合适的位置添加新功能,遵循已有的代码组织方式,保持文档的完整性和准确性。 + +### 7.4 异步任务变量处理规范 + +#### 1. 变量分析原则 +- 生命周期分析:确定变量在异步任务中的生存期 +- 所有权需求:判断是否需要克隆或移动所有权 +- 类型特征:考虑变量的类型特性(如 Clone、Send、'static 等) +- 数据共享:评估是否需要在多个任务间共享数据 + +#### 2. 克隆策略 +必须克隆的情况: +- 临时变量引用:`split_info.clone()`(来自迭代器) +- 多任务共享:`unique_id.clone()`(多个任务需要) +- 部分数据:`data_item.clone_split_range()`(只克隆需要的范围) + +不需要克隆的情况: +- 值类型复制:`version`(直接复制即可) +- 已实现 Copy:基本数据类型 +- 单一任务使用:不需要在多个任务间共享的数据 + +#### 3. View 模式使用规范 +基本原则: +- View 本身已经是完整引用:不需要额外的 view 字段 +- 异步任务中使用:`self.clone()` +- 模块访问:通过 view 直接访问其他模块 + +示例代码: +```rust +// 正确示例 +let view = self.clone(); // View 本身克隆 +let resp = view.data_general().rpc_call_write_once_data... + +// 错误示例 +let view = self.view.clone(); // 错误:不需要额外的 view 字段 +let data_general = self.data_general().clone(); // 错误:不需要单独克隆模块 +``` + +#### 4. 异步任务数据处理检查清单 +- [ ] 是否只克隆必要的数据? +- [ ] 临时变量是否正确处理? +- [ ] View 的使用是否符合规范? +- [ ] 是否避免了重复克隆? +- [ ] 数据共享策略是否合理? + +#### 5. 常见场景示例 + +1. 批量数据处理: +```rust +// 正确处理临时变量和部分数据 +let split_info = split_info.clone(); // 临时变量必须克隆 +let data_item = data_item.clone_split_range(range); // 只克隆需要的部分 +let view = self.clone(); // View 克隆用于异步任务 +``` + +2. 并发任务处理: +```rust +// 使用信号量和数据共享 +let semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT)); +let view = self.clone(); // 一次克隆,多处使用 +for node_id in nodes { + let permit = semaphore.clone(); + let view = view.clone(); // View 在任务间共享 + tokio::spawn(async move { ... }); +} +``` + +### 7.3 变量类型难分辨的情况 + +#### 7.3.1 Proto生成的Rust类型 +1. proto中的普通字段在Rust中的表现: + - proto中的 `string file_name_opt = 1` 生成的是普通 `String` 类型,而不是 `Option` + - proto中的 `bool is_dir_opt = 2` 生成的是普通 `bool` 类型,而不是 `Option` + - 字段名带 `_opt` 后缀不代表它在Rust中是 `Option` 类型 + +2. proto中的message嵌套在Rust中的表现: + - `DataItem` 中的 `oneof data_item_dispatch` 在Rust中是一个字段 + - 访问路径是: `data.data_item_dispatch` 而不是 `data.data.data_item_dispatch` + - `Option` 需要先 `unwrap()` 才能访问其内部字段 + +#### 7.3.2 容易混淆的类型转换 +1. proto生成的类型和标准库类型的关系: + - proto生成的 `String` 字段不能直接用 `unwrap_or_default()` + - proto生成的 `bool` 字段不能直接用 `unwrap_or()` + +### 7.5 思维方式原则 +- 思维优先于行动: + - 在开始任何操作前,先理解"为什么"而不是"怎么做" + - 确保完全理解当前上下文中的所有信息 + - 避免机械性思维和跳过思考的行为模式 + - 对于涉及代码逻辑的命令,必须先阅读和理解相关代码,再执行命令 + - 当需要复用或参考现有代码逻辑时,必须先在项目中查找并理解相关实现 + - 在理解代码时,需要关注: + - 代码的执行流程和依赖关系 + - 数据结构和状态管理方式 + - 错误处理和异常情况的处理方式 + +- 代码分析记录原则: + - 在修改任何代码之前,必须在 review.md 中记录完整的代码分析: + 1. 问题代码:截取导致问题的具体代码片段 + 2. 上下文代码:截取理解问题所需的相关代码 + 3. 问题成因:详细分析问题的具体原因 + 4. 修复方案:说明如何修复以及为什么这样修复 + 5. 修改验证:列出验证修改正确性的方法 + - 分析记录必须: + - 使用代码块格式展示代码 + - 保持代码片段的完整性和可读性 + - 确保分析逻辑清晰 + - 说明修改的影响范围 + +- 父问题相关性分析: + - 在开始分析任何问题之前,必须首先进行父问题相关性分析 + - 分析步骤: + 1. 确认当前问题的父问题是什么 + 2. 回溯父问题的执行计划和记录 + 3. 判断当前问题是否是父问题引起的 + 4. 确认解决当前问题是否必要且有助于解决父问题 + - 分析结果必须包含: + 1. 父问题的明确引用 + 2. 相关性的具体分析 + 3. 解决必要性说明 + 4. 优先级判断 + - 如果当前问题与父问题无关: + 1. 记录分析结果 + 2. 暂时搁置该问题 + 3. 继续专注于父问题的解决 + +- 内化规则: + - 把规则视为思维框架而不是外部约束 + - 养成先检查当前上下文的习惯 + - 避免在已有信息的情况下去外部搜索 +- 关注本质: + - 理解问题的根本原因比立即解决问题更重要 + - 分析失误的思维模式而不是简单记住正确操作 + - 把经验转化为思维方式而不是操作步骤 + +## 8. 代码评审与修改文档规则 + +### 8.1 修改计划与记录要求 +- 每次修改代码前: + 1. 必须查看项目根目录的 `review.md` 文件 + 2. 根据现有内容确定修改计划的位置和层级 + 3. 在对应位置添加修改计划 + 4. 使用 markdown 格式记录,保持层级结构清晰 + +### 8.2 文档结构规范 +- 所有修改记录必须使用以下简化的问题树结构: + ```markdown + - 任务/问题:xxxx + - 分析:xxxx + - 计划任务1:xxxx + 新问题1:xxxx + - 分析:xxxx + - 计划任务3:xxxx + 已完成 + + - 计划任务2:xxxx + 已完成 + ``` + +- 结构规则: + 1. 父节点必须是具体的任务或问题描述 + 2. 第一个子节点必须是对问题的分析 + 3. 后续子节点是具体的计划任务 + 4. 每个计划任务下可以包含新的问题,遵循相同的结构 + 5. 已完成的任务标记为"已完成" + 6. 保持缩进层级清晰 + +- 示例说明: + ```markdown + - 任务:修复类型转换错误 + - 分析:当前代码在类型转换时未考虑空值情况 + - 计划任务1:添加空值检查 + 新问题:如何处理空值转换失败 + - 分析:需要在转换失败时提供默认值 + - 计划任务:实现 Option 转换 + 已完成 + + - 计划任务2:添加单元测试 + 已完成 + ``` + +### 8.3 记录要求 +1. 修改计划必须包含: + - 修改目的 + - 预期效果 + - 可能的风险 + - 具体步骤 + +2. 修改过程必须记录: + - 实际执行的步骤 + - 遇到的每个问题 + - 解决方案和结果 + +3. 问题记录必须包含: + - 问题的具体表现 + - 问题的可能原因 + - 尝试的解决方案 + - 最终的解决方案 + - 预防措施(如果适用) + +### 8.4 维护原则 +- 保持文档的实时更新 +- 确保问题树结构清晰 +- 定期回顾和整理文档 +- 记录经验教训和最佳实践 + +### 8.5 任务识别规则 + +#### 8.5.1 任务状态判断 +1. 完成状态标记: + - 已完成任务必须标记为 `(done)` + - 未标记 `(done)` 的任务视为未完成 + - 不使用其他状态标记 + +2. 任务顺序规则: + - 文档开头说明:`(顺序:新的在前面;先解决就的未完成的;完成的有标注;问题可能存在子问题)` + - 新任务添加到未完成任务的最前面 + - 已完成任务移到未完成任务的后面 + - 子任务跟随父任务,保持缩进层级 + +3. 最老未完成任务识别: + - 从上到下扫描所有顶级任务 + - 跳过带有 `(done)` 标记的任务 + - 第一个不带 `(done)` 标记的任务即为最老未完成任务 + - 子任务不影响父任务的完成状态判断 + +4. 任务优先级: + - 未完成任务按出现顺序表示优先级(越靠后优先级越高) + - 子任务优先级高于同级后续任务 + - 阻塞性问题优先级最高 + +#### 8.5.2 任务解析检查清单 +在识别和处理任务时,必须检查: +- [ ] 任务是否有 `(done)` 标记 +- [ ] 任务是否为顶级任务 +- [ ] 是否有未完成的子任务 +- [ ] 任务的位置是否符合顺序规则 +- [ ] 是否存在阻塞性问题 + +## 9. 批量数据接口设计 + +### 9.1 BatchTransfer 设计规范 + +#### 9.1.1 组件职责定义 + +1. **数据结构职责划分** + - BatchTransfer(单个传输任务管理器)必须: + - 维护单个传输任务的完整状态(unique_id, version, block_type, total_blocks) + - 使用 DashMap 存储接收到的数据块,确保并发安全 + - 通过 Option 管理完成状态通知 + - 负责数据块的接收、验证和重组 + + - BatchManager(全局传输任务管理器)必须: + - 使用 DashMap 维护所有进行中的传输任务 + - 使用原子计数器生成唯一的请求序列号 + - 负责传输任务的创建、数据块处理和生命周期管理 + +2. **函数职责要求** + - call_batch_data(发送端)必须: + - 使用固定大小(1MB)进行数据分块 + - 通过 BatchManager 创建传输任务 + - 负责数据块的发送 + - 等待传输完成通知 + + - handle_block(接收端)必须: + - 接收并验证单个数据块 + - 更新传输状态 + - 在接收完所有块时触发完成处理 + + - complete(完成处理)必须: + - 校验所有数据块的完整性 + - 根据类型(内存/文件)重组数据 + - 发送完成通知 + +#### 9.1.2 数据流转规范 + +1. **发送流程要求** + - 必须按照以下顺序执行: + 1. 接收原始数据并验证 + 2. 计算分块策略 + 3. 创建传输任务 + 4. 按序发送数据块 + +2. **接收流程要求** + - 必须按照以下顺序处理: + 1. 接收数据块并验证 + 2. 存储到对应的 BatchTransfer + 3. 检查完整性 + 4. 触发完成处理 + 5. 通知发送端 + +#### 9.1.3 错误处理规范 + +1. **组件错误处理职责** + - BatchTransfer 必须处理: + - 数据块完整性验证错误 + - 数据重组过程错误 + + - BatchManager 必须处理: + - 传输任务存在性检查错误 + - 并发访问保护错误 + + - 调用方必须处理: + - 网络传输错误 + - 超时错误 + +2. **错误恢复策略** + - 必须支持以下错误恢复机制: + - 单个数据块的重试 + - 传输任务的取消 + - 资源的正确释放 + +#### 9.1.4 资源管理规范 + +1. **内存管理** + - 必须预分配适当的缓冲区大小 + - 必须及时释放不再需要的内存 + - 必须控制并发数据块的最大数量 + +2. **文件管理** + - 必须使用唯一的临时文件名 + - 必须在完成后清理临时文件 + - 必须正确处理文件权限 + +3. **并发控制** + - 必须使用 DashMap 确保并发安全 + - 必须使用原子操作处理计数器 + - 必须正确管理 channel 资源 + +### 9.2 批量写入实现 + +#### 9.2.1 总体流程 + +1. **数据切分** + - 内存数据按 1MB 切块 + - 文件数据按 4MB 切块 + - 计算总块数和最后一块大小 + +2. **任务池初始化** + - 创建 4 个传输任务槽位 + - 每个任务负责一个数据块的传输 + - 任务完成后自动释放槽位 + +3. **数据块获取** + - 空闲任务会请求新的数据块 + - 最多预取 8 个块 + - 超过限制则等待其他块处理完成 + +4. **传输过程** + - 任务获取到数据块后开始传输 + - 每个请求包含块索引和数据类型 + - 单个请求超时时间为 30 秒 + +5. **完成处理** + - 所有块传输完成后结束 + - 失败的块会重试最多 3 次 + - 重试间隔为 1 秒 + +#### 9.2.2 接收方处理 + +1. **数据管理** + - 复用 get_data 的文件和内存管理逻辑 + - 文件使用 FileManager 管理可变文件 + - 内存使用 MemoryManager 管理内存块 + +2. **并行写入** + - 每个数据块作为独立的写入任务 + - 文件写入使用 seek + write 定位写入 + - 内存写入使用偏移量计算地址 + +3. **并发控制** + - 使用 RwLock 保护共享资源 + - 文件操作使用 async 文件 I/O + - 内存操作使用原子操作保证并发安全 + +4. **状态管理** + - 记录每个块的写入状态 + - 支持断点续传和重试 + - 完成后更新元数据 + ``` + +3. **接收方处理** + ```rust + struct BatchDataWriter { + // 文件缓存,使用 unique_id 作为 key + file_cache: HashMap, BatchFileCache>, + // 内存缓存,使用 unique_id 作为 key + memory_cache: HashMap, BatchMemoryCache>, + } + + impl BatchDataWriter { + async fn handle_request(&mut self, req: BatchDataRequest) -> BatchDataResponse { + let cache = match req.block_type { + DataBlockType::Memory => &mut self.memory_cache, + DataBlockType::File => &mut self.file_cache, + }; + + // 获取或创建缓存 + let block_cache = cache.entry(req.unique_id.clone()) + .or_insert_with(|| self.create_cache(req.block_type)); + + // 写入数据块 + match block_cache.write_block(req.block_index, req.data).await { + Ok(()) => BatchDataResponse { + request_id: req.request_id, + success: true, + error_message: String::new(), + version: req.version, + }, + Err(e) => BatchDataResponse { + request_id: req.request_id, + success: false, + error_message: e.to_string(), + version: req.version, + }, + } + } + } + ``` + +#### 9.2.2 缓存管理 + +1. **文件缓存** + ```rust + struct BatchFileCache { + path: PathBuf, // 临时文件路径 + file: File, // 文件句柄 + received_blocks: HashSet, // 已接收的块 + } + + impl BatchFileCache { + async fn write_block(&mut self, index: u32, data: Vec) -> Result<()> { + // 记录块并写入文件 + self.received_blocks.insert(index); + self.file.seek(SeekFrom::Start((index as u64) * BLOCK_SIZE))?; + self.file.write_all(&data)?; + Ok(()) + } + } + ``` + +2. **内存缓存** + ```rust + struct BatchMemoryCache { + blocks: HashMap>, // 块索引 -> 数据 + total_size: usize, // 总大小 + } + + impl BatchMemoryCache { + async fn write_block(&mut self, index: u32, data: Vec) -> Result<()> { + // 直接存储到内存 + self.blocks.insert(index, data); + Ok(()) + } + } + ``` + +#### 9.2.3 注意事项 + +1. **并发控制** + - 使用 MAX_CONCURRENT_TASKS 控制带宽使用 + - 通过 MAX_PENDING_BLOCKS 实现背压控制 + - 任务完成后及时释放资源 + +2. **内存管理** + - 预取块数量不超过 MAX_PENDING_BLOCKS + - 使用 Arc<[u8]> 避免数据复制 + - 大文件优先使用文件缓存 + +3. **错误处理** + - 记录失败的块以便重试 + - 最多重试 MAX_RETRIES 次 + - 重试间隔为 RETRY_DELAY_MS + - 单个任务超过 TASK_TIMEOUT_MS 自动取消 + +4. **性能优化** + - 使用异步 I/O 提高并发性 + - 任务空闲时自动获取新块 + - 支持乱序处理和断点续传 + +5. **监控和调试** + - 记录每个块的处理状态 + - 统计传输速率和成功率 + - 支持取消整个传输任务 + +### 9.3 请求方逻辑 + +1. **请求预处理**: + - 生成唯一的 request_id + - 验证数据项数量不超过 max_batch_size + - 设置适当的超时时间 + +### 9.3 并行写入实现规范 + +#### 9.3.1 WriteSplitDataTaskGroup 设计模式 +1. **基础结构设计** + ```rust + enum WriteSplitDataTaskGroup { + ToFile { + file_path: PathBuf, + tasks: Vec>>, + }, + ToMem { + shared_mem: SharedMemHolder, + tasks: Vec>>, + }, + } + ``` + +2. **职责划分** + - 任务组管理: + - 创建和初始化写入任务 + - 跟踪任务状态和完成情况 + - 提供统一的任务管理接口 + - 数据写入: + - 文件写入使用 FileExt::write_at + - 内存写入使用 SharedMemOwnedAccess + - 支持并发安全的数据访问 + +3. **并发控制要求** + - 文件写入: + - 使用 tokio::task::spawn_blocking 处理 I/O + - 通过文件偏移确保并发安全 + - 每个任务独占写入区域 + - 内存写入: + - 使用 SharedMemOwnedAccess 保证访问安全 + - 通过 Range 隔离数据区域 + - Arc 管理共享内存生命周期 + +4. **错误处理规范** + - 数据验证: + - 检查数据块类型匹配 + - 验证数据长度一致性 + - 确保写入位置正确 + - 错误传播: + - 使用 Result 类型传递错误 + - 支持任务级别的错误处理 + - 实现错误重试机制 + +#### 9.3.2 复用规范 +1. **接口设计要求** + - 提供统一的数据写入接口 + - 支持文件和内存两种模式 + - 保持与现有实现兼容 + +2. **数据管理规范** + - 文件数据: + - 使用文件偏移管理数据位置 + - 支持并发写入和随机访问 + - 实现临时文件清理 + - 内存数据: + - 使用 SharedMemOwnedAccess 管理 + - 支持数据分片和并发访问 + - 确保内存安全释放 + +3. **任务管理要求** + - 并发控制: + - 使用信号量限制并发任务数 + - 支持任务取消和超时处理 + - 实现资源自动释放 + - 状态同步: + - 跟踪任务完成状态 + - 支持等待所有任务完成 + - 提供任务进度反馈 + +4. **性能优化准则** + - 预分配资源: + - 文件空间预分配 + - 内存缓冲区预分配 + - 任务队列容量预设 + - 并发调优: + - 根据系统资源调整并发度 + - 优化任务调度策略 + - 减少数据复制开销 + +## 10. 构建规则 + +### 10.1 编译命令规范 + +#### 10.1.1 使用 sudo 编译 +- 项目编译前必须确保已设置默认工具链: + ```bash + rustup default stable + ``` + +- 项目编译必须使用 sudo 权限: + ```bash + sudo -E $HOME/.cargo/bin/cargo build + ``` + +#### 10.1.2 使用场景 +1. 首次编译项目 +2. 依赖更新后的完整编译 +3. 涉及系统级权限的功能修改 + +#### 10.1.3 安全注意事项 +1. 确保使用 sudo 的必要性: + - 仅在确实需要系统权限时使用 + - 优先考虑其他解决方案 + +2. 权限管理: + - 确保开发者具有必要的 sudo 权限 + - 遵循最小权限原则 + - 避免在非必要情况下使用 sudo + +3. 环境一致性: + - 保持开发环境权限配置一致 + - 记录所有需要 sudo 权限的依赖 + - 在文档中说明使用 sudo 的原因 + +4. 编译环境检查: + - 确保 rustup 工具链已正确安装 + - 确保已设置默认工具链:`rustup default stable` + - 检查 cargo 路径是否正确 + +### 8.3 处理方逻辑 + +1. **并发处理**: + - 使用工作池处理批量请求 + - 控制并发度 + - 实现公平调度 + +2. **资源管理**: + - 内存使用限制 + - 连接数限制 + - CPU 使用限制 + +3. **监控和日志**: + - 记录处理时间 + - 记录成功/失败率 + - 记录资源使用情况 + +### 8.4 最佳实践 + +1. **批量大小**: + - 建议单批次处理 100-1000 个数据项 + - 根据数据大小动态调整 + +2. **超时设置**: + - 基础超时:30秒 + - 根据批量大小线性增加 + - 最大超时:120秒 + +3. **错误处理**: + - 提供详细的错误信息 + - 支持部分成功的情况 + - 实现幂等性 + +4. **性能考虑**: + - 使用异步处理 + - 实现批量压缩 + - 考虑网络带宽限制 + + - 把规则视为思维框架而不是外部约束 + - 养成先检查当前上下文的习惯 + - 避免在已有信息的情况下去外部搜索 +- 关注本质: + - 理解问题的根本原因比立即解决问题更重要 + - 分析失误的思维模式而不是简单记住正确操作 + - 把经验转化为思维方式而不是操作步骤 diff --git a/.gitignore b/.gitignore old mode 100755 new mode 100644 diff --git a/Cargo.lock b/Cargo.lock old mode 100755 new mode 100644 diff --git a/Cargo.toml b/Cargo.toml old mode 100755 new mode 100644 diff --git a/README.md b/README.md old mode 100755 new mode 100644 diff --git a/design.canvas b/design.canvas deleted file mode 100755 index e56cc10..0000000 --- a/design.canvas +++ /dev/null @@ -1,85 +0,0 @@ -{ - "nodes":[ - {"id":"cb82b904dab26671","type":"group","x":-3400,"y":-960,"width":4560,"height":3280,"label":"data"}, - {"id":"core_module_group","type":"group","x":-3160,"y":-840,"width":1460,"height":3120,"label":"数据管理核心模块"}, - {"id":"batch_transfer_group","type":"group","x":-1560,"y":120,"width":2300,"height":1600,"label":"Batch数据传输实现"}, - {"id":"0453b4726b40c9eb","type":"group","x":-3080,"y":176,"width":1280,"height":2064,"label":"WriteSplitDataTaskGroup"}, - {"id":"data_write_flow","type":"group","x":-1600,"y":-600,"width":2680,"height":520,"label":"数据写入流程"}, - {"id":"2e84a4ef9e137fb7","type":"group","x":-1000,"y":800,"width":1495,"height":820,"label":"batch handler 流程"}, - {"id":"storage_write_flow","type":"group","x":0,"y":-540,"width":1020,"height":400,"label":"存储节点写入流程"}, - {"id":"data_general_core","type":"text","text":"# 数据管理核心模块\n- 数据流向控制\n- 并行结构管理\n- 错误处理链\n- 资源管理","x":-3050,"y":-406,"width":330,"height":234,"color":"4"}, - {"id":"821e415b6438e20d","type":"text","text":"## struct DataSplit\n- 数据分片管理\n- 分片信息维护\n- 分片操作协调\n- 存储节点分配\n- 局部性优化","x":-2932,"y":-92,"width":342,"height":158,"color":"4"}, - {"id":"5c4357fc2216ea51","type":"text","text":"## batch写入接口\n- 缓存主动推送\n- 并行写入支持\n- 错误恢复机制\n- 内存复用优化","x":-2180,"y":-92,"width":250,"height":120,"color":"4"}, - {"id":"b31695207931d96e","type":"text","text":"## fn get_or_del_data\n- 数据检索和删除\n- 资源清理\n- 缓存一致性\n- 并发访问控制","x":-2290,"y":-622,"width":330,"height":156,"color":"4"}, - {"id":"133214da264cfe72","type":"text","text":"## struct DataGeneral\n- 提供数据读写接口\n- 管理元数据\n- 协调各子模块功能\n- 错误处理和恢复\n- 资源生命周期","x":-2760,"y":-680,"width":340,"height":214,"color":"4"}, - {"id":"core_functions","type":"text","text":"## fn write_data\n- 同步/异步写入\n- 数据完整性保证\n- 分片并行写入\n- 缓存节点同步\n- 错误重试机制","x":-2405,"y":-427,"width":280,"height":275,"color":"4"}, - {"id":"1ec171d545e8995d","type":"text","text":"","x":-2686,"y":460,"width":250,"height":60}, - {"id":"write_task_mem","type":"text","text":"# 内存写入流程\n\n## 接口\n- write_mem_data()\n * 使用SharedMemHolder\n * 支持偏移和写入\n\n## 数据结构\n- MemDataWriter\n * holder: SharedMemHolder\n * offset: usize\n * len: usize\n\n## 操作流程\n1. 获取内存区域\n2. 计算偏移地址\n3. 写入数据\n4. 更新元数据","x":-3000,"y":860,"width":400,"height":400,"color":"2"}, - {"id":"general_phase2","type":"text","text":"General阶段2:调度\n- 生成unique_id\n- 发送调度请求\n- 等待决策返回","x":-1560,"y":-380,"width":200,"height":100,"color":"1"}, - {"id":"cache_group","type":"text","text":"缓存节点组","x":-620,"y":-310,"width":150,"height":60,"color":"5"}, - {"id":"general_phase3","type":"text","text":"General阶段3:分发\n- 解析调度决策\n- 创建写入任务组\n- 初始化并发控制","x":-1560,"y":-210,"width":200,"height":100,"color":"1"}, - {"id":"storage_node_3","type":"text","text":"存储节点1","x":-425,"y":-550,"width":150,"height":60,"color":"3"}, - {"id":"master_node","type":"text","text":"Master节点 [DataMaster]\n- schedule_data()\n1. 生成DataSetMeta\n2. 创建DataSplits\n3. 分配存储节点\n4. 返回调度决策","x":-1100,"y":-510,"width":200,"height":160,"color":"2"}, - {"id":"storage_group","type":"text","text":"存储节点组","x":-620,"y":-510,"width":150,"height":60,"color":"3"}, - {"id":"storage_node_4","type":"text","text":"存储节点2","x":-420,"y":-480,"width":150,"height":60,"color":"3"}, - {"id":"storage_node_5","type":"text","text":"存储节点3","x":-420,"y":-400,"width":150,"height":60,"color":"3"}, - {"id":"cache_node_1","type":"text","text":"缓存节点1","x":-420,"y":-360,"width":150,"height":60,"color":"5"}, - {"id":"cache_node_2","type":"text","text":"缓存节点2","x":-420,"y":-280,"width":150,"height":60,"color":"5"}, - {"id":"cache_node_3","type":"text","text":"缓存节点3","x":-420,"y":-200,"width":150,"height":60,"color":"5"}, - {"id":"storage_node_1","type":"text","text":"存储节点1\n接收层:\n- 接收分片请求\n- 版本号验证\n- 数据完整性校验\n写入任务层:\n- 分片范围验证\n- 并发写入控制\n- 错误重试机制\n本地存储层:\n- 数据持久化\n- 版本管理\n- 空间回收\n结果返回:\n- 写入状态\n- 远程版本号\n- 错误信息","x":40,"y":-500,"width":200,"height":280,"color":"1"}, - {"id":"write_task_1","type":"text","text":"写入任务1\n- 分片范围验证\n- 数据完整性检查\n- 并发写入控制\n- 错误重试","x":380,"y":-500,"width":200,"height":120,"color":"2"}, - {"id":"write_result_1","type":"text","text":"写入结果1\n- 成功/失败\n- 远程版本号\n- 错误信息","x":640,"y":-280,"width":200,"height":100,"color":"4"}, - {"id":"general_phase1","type":"text","text":"General阶段1:准备\n- 初始化DataItems\n- 计算数据大小\n- 创建SharedMemHolder","x":-1560,"y":-510,"width":200,"height":100,"color":"1"}, - {"id":"batch_request1","type":"text","text":"# BatchDataRequest(1)\n- request_id\n- block_type\n- block_index: 0\n- data","x":-620,"y":180,"width":250,"height":240,"color":"2"}, - {"id":"batch_request2","type":"text","text":"# BatchDataRequest(2)\n- request_id\n- block_type\n- block_index: 1\n- data","x":-620,"y":460,"width":250,"height":120,"color":"2"}, - {"id":"batch_response1","type":"text","text":"# BatchDataResponse(1)\n- request_id\n- success\n- error_message\n- version","x":-270,"y":180,"width":250,"height":240,"color":"3"}, - {"id":"batch_response2","type":"text","text":"# BatchDataResponse(2)\n- request_id\n- success\n- error_message\n- version","x":-270,"y":460,"width":310,"height":60,"color":"3"}, - {"id":"batch_response3","type":"text","text":"# BatchDataResponse(3)\n- request_id\n- success\n- error_message\n- version","x":-270,"y":600,"width":250,"height":120,"color":"3"}, - {"id":"batch_request3","type":"text","text":"# BatchDataRequest(3)\n- request_id\n- block_type\n- block_index: 2\n- data","x":-620,"y":600,"width":250,"height":120,"color":"2"}, - {"id":"batch_handler_3","type":"text","text":"# 3. 创建数据分片\n\n## 分片准备\n- 创建分片列表\n * 计算offset\n * 记录分片范围\n- 创建mpsc通道\n * 大小 = splits.len()\n * 发送数据到通道","x":-495,"y":820,"width":350,"height":300,"color":"3"}, - {"id":"batch_handler_5","type":"text","text":"# 5. 等待写入完成\n\n## task_group.join()\n- 成功情况\n * 返回成功响应\n * 更新版本号\n- 失败情况\n * 记录警告\n * 返回错误信息","x":80,"y":900,"width":300,"height":300,"color":"5"}, - {"id":"batch_handler_4","type":"text","text":"# 4. 创建写入任务组\n\n## WriteSplitDataTaskGroup\n- 创建任务组\n * unique_id\n * splits\n * rx channel\n * block_type\n- 错误处理\n * 记录警告\n * 返回失败响应","x":-320,"y":1200,"width":300,"height":360,"color":"4"}, - {"id":"batch_handler_2","type":"text","text":"# 2. 验证请求数据\n\n## verify_request()\n- 验证请求参数\n * block_type\n * block_index\n * data完整性\n- 错误处理\n * 记录警告\n * 返回失败响应","x":-795,"y":1230,"width":355,"height":330,"color":"2"}, - {"id":"batch_handler_1","type":"text","text":"# 1. 获取元信息\n\n## get_metadata()\n- 获取元数据\n * unique_id\n * version\n- 错误处理\n * 记录警告\n * 返回失败响应","x":-945,"y":860,"width":300,"height":300,"color":"1"}, - {"id":"batch_initiator","type":"text","text":"# 发起节点 [DataGeneral]\n\n## call_batch_data()\n- 分割数据块(1MB)\n- 创建有界任务池\n- 建议并发数=3\n- 任务队列控制","x":-1100,"y":190,"width":300,"height":300,"color":"1"}, - {"id":"batch_manager","type":"text","text":"# BatchManager\n\n## 管理功能\n- 创建传输任务\n- 分配请求ID\n- 跟踪传输状态\n- 错误恢复\n\n## 数据处理\n- 分块管理\n- 数据校验\n- 内存复用\n- 并发控制","x":-1460,"y":420,"width":300,"height":300,"color":"1"}, - {"id":"write_task_verify","type":"text","text":"# 验证与状态\n\n## 状态记录\n- TaskStatus\n * pending\n * writing\n * completed\n * failed\n\n## 验证检查\n1. 分片范围\n * offset合法性\n * 数据长度\n2. 写入结果\n * 成功/失败\n * 错误信息\n3. 完整性\n * 所有分片\n * 数据一致性","x":-2320,"y":1673,"width":400,"height":400,"color":"4"}, - {"id":"write_task_file","type":"text","text":"# 文件写入流程\n\n## 接口\n- write_file_data()\n * 使用std::fs::File\n * 支持seek和write\n\n## 数据结构\n- FileDataWriter\n * file: File\n * path: PathBuf\n * offset: u64\n\n## 操作流程\n1. 打开文件\n2. seek到offset\n3. 写入数据\n4. flush到磁盘","x":-2320,"y":860,"width":400,"height":400,"color":"1"}, - {"id":"write_task_control","type":"text","text":"# 任务控制流程\n\n## 数据结构\n- WriteSplitDataTaskGroup\n * tasks: Vec\n * rx: mpsc::Receiver\n * unique_id: String\n\n## 控制流程\n1. 创建任务\n * 根据type选择writer\n * 初始化状态记录\n2. 并发处理\n * 启动写入线程\n * 监听通道\n3. 等待完成\n * join所有任务\n * 汇总错误","x":-3000,"y":1420,"width":480,"height":653,"color":"3"}, - {"id":"data_item","type":"text","text":"# 数据项处理\n\n## WriteSplitDataTaskGroup\n- 管理数据分片写入任务组\n- 分片合并优化\n- 状态同步\n- 并行控制\n## SharedMemHolder\n- 共享内存数据访问\n- 资源自动管理\n## SharedMemOwnedAccess\n- 共享内存所有权控制\n- 访问安全保证\n- 生命周期管理","x":-2686,"y":260,"width":460,"height":520,"color":"3"} - ], - "edges":[ - {"id":"verify_flow_1","fromNode":"batch_handler_4","fromSide":"right","toNode":"batch_handler_5","toSide":"left","label":"块状态更新"}, - {"id":"master_to_phase2","fromNode":"master_node","fromSide":"left","toNode":"general_phase2","toSide":"right","label":"调度决策\n- version\n- splits\n- nodes"}, - {"id":"phase2_to_phase3","fromNode":"general_phase2","fromSide":"bottom","toNode":"general_phase3","toSide":"top","label":"决策信息"}, - {"id":"phase3_to_storage","fromNode":"general_phase3","fromSide":"right","toNode":"storage_group","toSide":"left","label":"分发存储任务"}, - {"id":"storage_to_nodes","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_3","toSide":"left"}, - {"id":"storage_to_nodes2","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_4","toSide":"left"}, - {"id":"storage_to_nodes3","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_5","toSide":"left"}, - {"id":"phase3_to_cache","fromNode":"general_phase3","fromSide":"right","toNode":"cache_group","toSide":"left","label":"分发缓存任务"}, - {"id":"cache_to_nodes","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_1","toSide":"left"}, - {"id":"cache_to_nodes2","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_2","toSide":"left"}, - {"id":"cache_to_nodes3","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_3","toSide":"left"}, - {"id":"initiator_to_manager","fromNode":"batch_initiator","fromSide":"left","toNode":"batch_manager","toSide":"right","label":"创建批量传输"}, - {"id":"initiator_to_request1","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_request1","toSide":"left","label":"并发发送\n数据块1"}, - {"id":"initiator_to_request2","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_request2","toSide":"left","label":"并发发送\n数据块2"}, - {"id":"initiator_to_request3","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_request3","toSide":"left","label":"并发发送\n数据块3"}, - {"id":"request1_to_response1","fromNode":"batch_request1","fromSide":"right","toNode":"batch_response1","toSide":"left","label":"处理响应"}, - {"id":"request2_to_response2","fromNode":"batch_request2","fromSide":"right","toNode":"batch_response2","toSide":"left","label":"处理响应"}, - {"id":"request3_to_response3","fromNode":"batch_request3","fromSide":"right","toNode":"batch_response3","toSide":"left","label":"处理响应"}, - {"id":"b5a17c0afede8e4a","fromNode":"data_general_core","fromSide":"right","toNode":"133214da264cfe72","toSide":"bottom"}, - {"id":"2ad5991c43fd6098","fromNode":"data_general_core","fromSide":"right","toNode":"821e415b6438e20d","toSide":"top"}, - {"id":"caa45c92a135042c","fromNode":"data_general_core","fromSide":"right","toNode":"core_functions","toSide":"left"}, - {"id":"09c7b9957992d62d","fromNode":"data_general_core","fromSide":"right","toNode":"b31695207931d96e","toSide":"left"}, - {"id":"adfa1cca1009ff43","fromNode":"data_general_core","fromSide":"right","toNode":"5c4357fc2216ea51","toSide":"left"}, - {"id":"ef995a514a2210bb","fromNode":"5c4357fc2216ea51","fromSide":"right","toNode":"batch_transfer_group","toSide":"top"}, - {"id":"3d79872a234731c0","fromNode":"cache_node_3","fromSide":"bottom","toNode":"batch_transfer_group","toSide":"top"}, - {"id":"batch_flow_4_5","fromNode":"batch_handler_4","fromSide":"right","toNode":"batch_handler_5","toSide":"left","label":"BlockStatus"}, - {"id":"handler_1_to_2","fromNode":"batch_handler_1","fromSide":"right","toNode":"batch_handler_2","toSide":"left","label":"元数据信息"}, - {"id":"handler_2_to_3","fromNode":"batch_handler_2","fromSide":"right","toNode":"batch_handler_3","toSide":"left","label":"数据内容"}, - {"id":"handler_3_to_4","fromNode":"batch_handler_3","fromSide":"right","toNode":"batch_handler_4","toSide":"left","label":"分片列表"}, - {"id":"write_task_file_to_control","fromNode":"write_task_file","fromSide":"bottom","toNode":"write_task_control","toSide":"top","label":"文件写入任务"}, - {"id":"write_task_mem_to_control","fromNode":"write_task_mem","fromSide":"bottom","toNode":"write_task_control","toSide":"top","label":"内存写入任务"}, - {"id":"write_task_control_to_verify","fromNode":"write_task_control","fromSide":"right","toNode":"write_task_verify","toSide":"left","label":"状态更新"} - ] -} \ No newline at end of file diff --git a/design.canvas.tmp.20250206220621 b/design.canvas.tmp.20250206220621 deleted file mode 100644 index 1c5b83a..0000000 --- a/design.canvas.tmp.20250206220621 +++ /dev/null @@ -1,78 +0,0 @@ -{ - "nodes":[ - {"id":"cb82b904dab26671","type":"group","x":-1600,"y":-680,"width":2780,"height":2200,"label":"data"}, - {"id":"core_module_group","type":"group","x":-1600,"y":-680,"width":1000,"height":780,"label":"数据管理核心模块"}, - {"id":"data_write_flow","type":"group","x":-380,"y":140,"width":1520,"height":460,"label":"数据写入流程"}, - {"id":"batch_transfer_group","type":"group","x":-740,"y":640,"width":1880,"height":820,"label":"Batch数据传输实现"}, - {"id":"parallel_group","type":"group","x":-740,"y":1500,"width":1880,"height":600,"label":"并发执行结构"}, - {"id":"storage_write_flow","type":"group","x":-380,"y":-300,"width":1520,"height":400,"label":"存储节点写入流程"}, - {"id":"133214da264cfe72","type":"text","text":"## struct DataGeneral\n- 提供数据读写接口\n- 管理元数据\n- 协调各子模块功能\n- 错误处理和恢复\n- 资源生命周期","x":-1200,"y":-660,"width":340,"height":214,"color":"4"}, - {"id":"core_functions","type":"text","text":"## fn write_data\n- 同步/异步写入\n- 数据完整性保证\n- 分片并行写入\n- 缓存节点同步\n- 错误重试机制","x":-845,"y":-407,"width":280,"height":275,"color":"4"}, - {"id":"data_general_core","type":"text","text":"# 数据管理核心模块\n- 数据流向控制\n- 并行结构管理\n- 错误处理链\n- 资源管理","x":-1403,"y":-339,"width":330,"height":100,"color":"4"}, - {"id":"821e415b6438e20d","type":"text","text":"## struct DataSplit\n- 数据分片管理\n- 分片信息维护\n- 分片操作协调\n- 存储节点分配\n- 局部性优化","x":-1415,"y":-53,"width":342,"height":158,"color":"4"}, - {"id":"b31695207931d96e","type":"text","text":"## fn get_or_del_data\n- 数据检索和删除\n- 资源清理\n- 缓存一致性\n- 并发访问控制","x":-730,"y":-602,"width":330,"height":156,"color":"4"}, - {"id":"5c4357fc2216ea51","type":"text","text":"## batch写入接口\n- 缓存主动推送\n- 并行写入支持\n- 错误恢复机制\n- 内存复用优化","x":-525,"y":-192,"width":250,"height":120,"color":"4"}, - {"id":"data_item","type":"text","text":"# 数据项处理\n\n## WriteSplitDataTaskGroup\n- 管理数据分片写入任务组\n- 分片合并优化\n- 状态同步\n- 并行控制\n## SharedMemHolder\n- 共享内存数据访问\n- 资源自动管理\n## SharedMemOwnedAccess\n- 共享内存所有权控制\n- 访问安全保证\n- 生命周期管理","x":-160,"y":-472,"width":460,"height":520,"color":"3"}, - {"id":"storage_node_1","type":"text","text":"存储节点1\n接收层:\n- 接收分片请求\n- 版本号验证\n- 数据完整性校验\n写入任务层:\n- 分片范围验证\n- 并发写入控制\n- 错误重试机制\n本地存储层:\n- 数据持久化\n- 版本管理\n- 空间回收\n结果返回:\n- 写入状态\n- 远程版本号\n- 错误信息","x":-340,"y":-260,"width":200,"height":280,"color":"1"}, - {"id":"storage_node_2","type":"text","text":"存储节点2\n接收层:\n- 接收分片请求\n- 版本号验证\n- 数据完整性校验\n写入任务层:\n- 分片范围验证\n- 并发写入控制\n- 错误重试机制\n本地存储层:\n- 数据持久化\n- 版本管理\n- 空间回收\n结果返回:\n- 写入状态\n- 远程版本号\n- 错误信息","x":-340,"y":-120,"width":200,"height":280,"color":"1"}, - {"id":"write_task_1","type":"text","text":"写入任务1\n- 分片范围验证\n- 数据完整性检查\n- 并发写入控制\n- 错误重试","x":0,"y":-260,"width":200,"height":120,"color":"2"}, - {"id":"write_task_2","type":"text","text":"写入任务2\n- 分片范围验证\n- 数据完整性检查\n- 并发写入控制\n- 错误重试","x":0,"y":-120,"width":200,"height":120,"color":"2"}, - {"id":"local_storage_1","type":"text","text":"本地存储1\n- 持久化数据\n- 版本管理\n- 空间回收","x":320,"y":-260,"width":200,"height":100,"color":"3"}, - {"id":"local_storage_2","type":"text","text":"本地存储2\n- 持久化数据\n- 版本管理\n- 空间回收","x":320,"y":-120,"width":200,"height":100,"color":"3"}, - {"id":"write_result_1","type":"text","text":"写入结果1\n- 成功/失败\n- 远程版本号\n- 错误信息","x":640,"y":-260,"width":200,"height":100,"color":"4"}, - {"id":"write_result_2","type":"text","text":"写入结果2\n- 成功/失败\n- 远程版本号\n- 错误信息","x":640,"y":-120,"width":200,"height":100,"color":"4"}, - {"id":"general_phase1","type":"text","text":"General阶段1:准备\n- 初始化DataItems\n- 计算数据大小\n- 创建SharedMemHolder","x":-340,"y":170,"width":200,"height":100,"color":"1"}, - {"id":"general_phase2","type":"text","text":"General阶段2:调度\n- 生成unique_id\n- 发送调度请求\n- 等待决策返回","x":-340,"y":300,"width":200,"height":100,"color":"1"}, - {"id":"master_node","type":"text","text":"Master节点 [DataMaster]\n- schedule_data()\n1. 生成DataSetMeta\n2. 创建DataSplits\n3. 分配存储节点\n4. 返回调度决策","x":120,"y":170,"width":200,"height":160,"color":"2"}, - {"id":"general_phase3","type":"text","text":"General阶段3:分发\n- 解析调度决策\n- 创建写入任务组\n- 初始化并发控制","x":-340,"y":430,"width":200,"height":100,"color":"1"}, - {"id":"storage_group","type":"text","text":"存储节点组","x":600,"y":170,"width":150,"height":60,"color":"3"}, - {"id":"storage_node_3","type":"text","text":"存储节点1","x":800,"y":120,"width":150,"height":60,"color":"3"}, - {"id":"storage_node_4","type":"text","text":"存储节点2","x":800,"y":200,"width":150,"height":60,"color":"3"}, - {"id":"storage_node_5","type":"text","text":"存储节点3","x":800,"y":280,"width":150,"height":60,"color":"3"}, - {"id":"cache_group","type":"text","text":"缓存节点组","x":600,"y":370,"width":150,"height":60,"color":"5"}, - {"id":"cache_node_1","type":"text","text":"缓存节点1","x":800,"y":320,"width":150,"height":60,"color":"5"}, - {"id":"cache_node_2","type":"text","text":"缓存节点2","x":800,"y":400,"width":150,"height":60,"color":"5"}, - {"id":"cache_node_3","type":"text","text":"缓存节点3","x":800,"y":480,"width":150,"height":60,"color":"5"}, - {"id":"batch_manager","type":"text","text":"# BatchManager\n\n## 管理功能\n- 创建传输任务\n- 分配请求ID\n- 跟踪传输状态\n- 错误恢复\n\n## 数据处理\n- 分块管理\n- 数据校验\n- 内存复用\n- 并发控制","x":-700,"y":700,"width":300,"height":300,"color":"1"}, - {"id":"batch_transfer","type":"text","text":"# BatchTransfer\n\n## 传输控制\n- 数据分块\n- 进度跟踪\n- 错误处理\n- 资源管理\n\n## 数据流\n- 发送队列\n- 接收缓冲\n- 内存池\n- 流量控制","x":-700,"y":1020,"width":300,"height":300,"color":"2"}, - {"id":"parallel_executor","type":"text","text":"# 并发执行器\n\n## 任务调度\n- 优先级队列\n- 负载均衡\n- 资源限制\n- 任务分组\n\n## 执行控制\n- 状态跟踪\n- 超时处理\n- 错误恢复\n- 取消机制","x":-700,"y":1540,"width":300,"height":300,"color":"3"}, - {"id":"task_group","type":"text","text":"# 任务组\n\n## 组织结构\n- 任务依赖\n- 执行顺序\n- 资源分配\n- 状态同步\n\n## 控制功能\n- 进度监控\n- 故障处理\n- 数据一致性\n- 完成确认","x":-340,"y":1540,"width":300,"height":300,"color":"4"}, - {"id":"batch_initiator","type":"text","text":"# 发起节点 [DataGeneral]\n\n## call_batch_data()\n- 分割数据块(1MB)\n- 创建有界任务池\n- 建议并发数=3\n- 任务队列控制","x":-300,"y":700,"width":300,"height":180,"color":"1"}, - {"id":"batch_request1","type":"text","text":"# BatchDataRequest(1)\n- request_id\n- block_type\n- block_index: 0\n- data","x":100,"y":700,"width":250,"height":120,"color":"2"}, - {"id":"batch_request2","type":"text","text":"# BatchDataRequest(2)\n- request_id\n- block_type\n- block_index: 1\n- data","x":100,"y":840,"width":250,"height":120,"color":"2"}, - {"id":"batch_request3","type":"text","text":"# BatchDataRequest(3)\n- request_id\n- block_type\n- block_index: 2\n- data","x":100,"y":980,"width":250,"height":120,"color":"2"}, - {"id":"batch_response1","type":"text","text":"# BatchDataResponse(1)\n- request_id\n- success\n- error_message\n- version","x":450,"y":700,"width":250,"height":120,"color":"3"}, - {"id":"batch_response2","type":"text","text":"# BatchDataResponse(2)\n- request_id\n- success\n- error_message\n- version","x":450,"y":840,"width":250,"height":120,"color":"3"}, - {"id":"batch_response3","type":"text","text":"# BatchDataResponse(3)\n- request_id\n- success\n- error_message\n- version","x":450,"y":980,"width":250,"height":120,"color":"3"} - ], - "edges":[ - {"id":"storage_to_task1","fromNode":"storage_node_1","fromSide":"right","toNode":"write_task_1","toSide":"left","label":"分片数据"}, - {"id":"storage_to_task2","fromNode":"storage_node_2","fromSide":"right","toNode":"write_task_2","toSide":"left","label":"分片数据"}, - {"id":"task_to_local1","fromNode":"write_task_1","fromSide":"right","toNode":"local_storage_1","toSide":"left","label":"持久化"}, - {"id":"task_to_local2","fromNode":"write_task_2","fromSide":"right","toNode":"local_storage_2","toSide":"left","label":"持久化"}, - {"id":"local_to_result1","fromNode":"local_storage_1","fromSide":"right","toNode":"write_result_1","toSide":"left","label":"写入状态"}, - {"id":"local_to_result2","fromNode":"local_storage_2","fromSide":"right","toNode":"write_result_2","toSide":"left","label":"写入状态"}, - {"id":"phase1_to_phase2","fromNode":"general_phase1","fromSide":"bottom","toNode":"general_phase2","toSide":"top","label":"DataItems"}, - {"id":"phase2_to_master","fromNode":"general_phase2","fromSide":"right","toNode":"master_node","toSide":"left","label":"调度请求"}, - {"id":"master_to_phase2","fromNode":"master_node","fromSide":"left","toNode":"general_phase2","toSide":"right","label":"调度决策\n- version\n- splits\n- nodes"}, - {"id":"phase2_to_phase3","fromNode":"general_phase2","fromSide":"bottom","toNode":"general_phase3","toSide":"top","label":"决策信息"}, - {"id":"phase3_to_storage","fromNode":"general_phase3","fromSide":"right","toNode":"storage_group","toSide":"left","label":"分发存储任务"}, - {"id":"storage_to_nodes","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_3","toSide":"left"}, - {"id":"storage_to_nodes2","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_4","toSide":"left"}, - {"id":"storage_to_nodes3","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_5","toSide":"left"}, - {"id":"phase3_to_cache","fromNode":"general_phase3","fromSide":"right","toNode":"cache_group","toSide":"left","label":"分发缓存任务"}, - {"id":"cache_to_nodes","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_1","toSide":"left"}, - {"id":"cache_to_nodes2","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_2","toSide":"left"}, - {"id":"cache_to_nodes3","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_3","toSide":"left"}, - {"id":"batch_flow1","fromNode":"batch_manager","fromSide":"right","toNode":"batch_transfer","toSide":"left","label":"创建传输"}, - {"id":"batch_flow2","fromNode":"batch_transfer","fromSide":"right","toNode":"parallel_executor","toSide":"left","label":"执行任务"}, - {"id":"parallel_flow","fromNode":"parallel_executor","fromSide":"right","toNode":"task_group","toSide":"left","label":"任务调度"}, - {"id":"initiator_to_manager","fromNode":"batch_initiator","fromSide":"left","toNode":"batch_manager","toSide":"right","label":"创建批量传输"}, - {"id":"initiator_to_request1","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_request1","toSide":"left","label":"并发发送\n数据块1"}, - {"id":"initiator_to_request2","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_request2","toSide":"left","label":"并发发送\n数据块2"}, - {"id":"initiator_to_request3","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_request3","toSide":"left","label":"并发发送\n数据块3"}, - {"id":"request1_to_response1","fromNode":"batch_request1","fromSide":"right","toNode":"batch_response1","toSide":"left","label":"处理响应"}, - {"id":"request2_to_response2","fromNode":"batch_request2","fromSide":"right","toNode":"batch_response2","toSide":"left","label":"处理响应"}, - {"id":"request3_to_response3","fromNode":"batch_request3","fromSide":"right","toNode":"batch_response3","toSide":"left","label":"处理响应"} - ] -} diff --git a/design.canvas.tmp.20250206221714 b/design.canvas.tmp.20250206221714 deleted file mode 100755 index 70199ee..0000000 --- a/design.canvas.tmp.20250206221714 +++ /dev/null @@ -1,82 +0,0 @@ -{ - "nodes":[ - {"id":"cb82b904dab26671","type":"group","x":-1600,"y":-960,"width":2780,"height":2660,"label":"data"}, - {"id":"batch_transfer_group","type":"group","x":-1600,"y":640,"width":2740,"height":1060,"label":"Batch数据传输实现"}, - {"id":"core_module_group","type":"group","x":-1600,"y":-820,"width":1920,"height":780,"label":"数据管理核心模块"}, - {"id":"data_write_flow","type":"group","x":-1600,"y":80,"width":2680,"height":520,"label":"数据写入流程"}, - {"id":"2e84a4ef9e137fb7","type":"group","x":-1560,"y":1300,"width":2680,"height":820,"label":"batch handler 具体逻辑"}, - {"id":"storage_write_flow","type":"group","x":0,"y":140,"width":1020,"height":400,"label":"存储节点写入流程"}, - {"id":"cache_group","type":"text","text":"缓存节点组","x":-620,"y":370,"width":150,"height":60,"color":"5"}, - {"id":"storage_node_4","type":"text","text":"存储节点2","x":-420,"y":200,"width":150,"height":60,"color":"3"}, - {"id":"storage_node_5","type":"text","text":"存储节点3","x":-420,"y":280,"width":150,"height":60,"color":"3"}, - {"id":"cache_node_1","type":"text","text":"缓存节点1","x":-420,"y":320,"width":150,"height":60,"color":"5"}, - {"id":"general_phase1","type":"text","text":"General阶段1:准备\n- 初始化DataItems\n- 计算数据大小\n- 创建SharedMemHolder","x":-1560,"y":170,"width":200,"height":100,"color":"1"}, - {"id":"general_phase2","type":"text","text":"General阶段2:调度\n- 生成unique_id\n- 发送调度请求\n- 等待决策返回","x":-1560,"y":300,"width":200,"height":100,"color":"1"}, - {"id":"general_phase3","type":"text","text":"General阶段3:分发\n- 解析调度决策\n- 创建写入任务组\n- 初始化并发控制","x":-1560,"y":430,"width":200,"height":100,"color":"1"}, - {"id":"master_node","type":"text","text":"Master节点 [DataMaster]\n- schedule_data()\n1. 生成DataSetMeta\n2. 创建DataSplits\n3. 分配存储节点\n4. 返回调度决策","x":-1100,"y":170,"width":200,"height":160,"color":"2"}, - {"id":"storage_group","type":"text","text":"存储节点组","x":-620,"y":170,"width":150,"height":60,"color":"3"}, - {"id":"cache_node_2","type":"text","text":"缓存节点2","x":-420,"y":400,"width":150,"height":60,"color":"5"}, - {"id":"cache_node_3","type":"text","text":"缓存节点3","x":-420,"y":480,"width":150,"height":60,"color":"5"}, - {"id":"batch_transfer","type":"text","text":"# BatchTransfer\n\n## 传输控制\n- 数据分块\n- 进度跟踪\n- 错误处理\n- 资源管理\n\n## 数据流\n- 发送队列\n- 接收缓冲\n- 内存池\n- 流量控制","x":-1215,"y":1120,"width":430,"height":460,"color":"2"}, - {"id":"batch_request3","type":"text","text":"# BatchDataRequest(3)\n- request_id\n- block_type\n- block_index: 2\n- data","x":-660,"y":1120,"width":250,"height":120,"color":"2"}, - {"id":"batch_response3","type":"text","text":"# BatchDataResponse(3)\n- request_id\n- success\n- error_message\n- version","x":-310,"y":1120,"width":250,"height":120,"color":"3"}, - {"id":"batch_manager","type":"text","text":"# BatchManager\n\n## 管理功能\n- 创建传输任务\n- 分配请求ID\n- 跟踪传输状态\n- 错误恢复\n\n## 数据处理\n- 分块管理\n- 数据校验\n- 内存复用\n- 并发控制","x":-1560,"y":700,"width":300,"height":300,"color":"1"}, - {"id":"batch_initiator","type":"text","text":"# 发起节点 [DataGeneral]\n\n## call_batch_data()\n- 分割数据块(1MB)\n- 创建有界任务池\n- 建议并发数=3\n- 任务队列控制","x":-1160,"y":700,"width":300,"height":300,"color":"1"}, - {"id":"batch_request1","type":"text","text":"# BatchDataRequest(1)\n- request_id\n- block_type\n- block_index: 0\n- data","x":-660,"y":700,"width":250,"height":240,"color":"2"}, - {"id":"batch_response1","type":"text","text":"# BatchDataResponse(1)\n- request_id\n- success\n- error_message\n- version","x":-310,"y":700,"width":250,"height":240,"color":"3"}, - {"id":"batch_request2","type":"text","text":"# BatchDataRequest(2)\n- request_id\n- block_type\n- block_index: 1\n- data","x":-660,"y":980,"width":250,"height":120,"color":"2"}, - {"id":"batch_response2","type":"text","text":"# BatchDataResponse(2)\n- request_id\n- success\n- error_message\n- version","x":-310,"y":980,"width":310,"height":60,"color":"3"}, - {"id":"storage_node_3","type":"text","text":"存储节点1","x":-425,"y":130,"width":150,"height":60,"color":"3"}, - {"id":"storage_node_1","type":"text","text":"存储节点1\n接收层:\n- 接收分片请求\n- 版本号验证\n- 数据完整性校验\n写入任务层:\n- 分片范围验证\n- 并发写入控制\n- 错误重试机制\n本地存储层:\n- 数据持久化\n- 版本管理\n- 空间回收\n结果返回:\n- 写入状态\n- 远程版本号\n- 错误信息","x":40,"y":180,"width":200,"height":280,"color":"1"}, - {"id":"write_task_1","type":"text","text":"写入任务1\n- 分片范围验证\n- 数据完整性检查\n- 并发写入控制\n- 错误重试","x":380,"y":180,"width":200,"height":120,"color":"2"}, - {"id":"local_storage_1","type":"text","text":"本地存储1\n- 持久化数据\n- 版本管理\n- 空间回收","x":700,"y":180,"width":200,"height":100,"color":"3"}, - {"id":"write_result_1","type":"text","text":"写入结果1\n- 成功/失败\n- 远程版本号\n- 错误信息","x":640,"y":400,"width":200,"height":100,"color":"4"}, - {"id":"1ec171d545e8995d","x":214,"y":-636,"width":250,"height":60,"type":"text","text":""}, - {"id":"b31695207931d96e","type":"text","text":"## fn get_or_del_data\n- 数据检索和删除\n- 资源清理\n- 缓存一致性\n- 并发访问控制","x":-730,"y":-742,"width":330,"height":156,"color":"4"}, - {"id":"data_item","type":"text","text":"# 数据项处理\n\n## WriteSplitDataTaskGroup\n- 管理数据分片写入任务组\n- 分片合并优化\n- 状态同步\n- 并行控制\n## SharedMemHolder\n- 共享内存数据访问\n- 资源自动管理\n## SharedMemOwnedAccess\n- 共享内存所有权控制\n- 访问安全保证\n- 生命周期管理","x":-160,"y":-612,"width":460,"height":520,"color":"3"}, - {"id":"133214da264cfe72","type":"text","text":"## struct DataGeneral\n- 提供数据读写接口\n- 管理元数据\n- 协调各子模块功能\n- 错误处理和恢复\n- 资源生命周期","x":-1200,"y":-800,"width":340,"height":214,"color":"4"}, - {"id":"core_functions","type":"text","text":"## fn write_data\n- 同步/异步写入\n- 数据完整性保证\n- 分片并行写入\n- 缓存节点同步\n- 错误重试机制","x":-845,"y":-547,"width":280,"height":275,"color":"4"}, - {"id":"data_general_core","type":"text","text":"# 数据管理核心模块\n- 数据流向控制\n- 并行结构管理\n- 错误处理链\n- 资源管理","x":-1490,"y":-526,"width":330,"height":234,"color":"4"}, - {"id":"821e415b6438e20d","type":"text","text":"## struct DataSplit\n- 数据分片管理\n- 分片信息维护\n- 分片操作协调\n- 存储节点分配\n- 局部性优化","x":-1372,"y":-212,"width":342,"height":158,"color":"4"}, - {"id":"5c4357fc2216ea51","type":"text","text":"## batch写入接口\n- 缓存主动推送\n- 并行写入支持\n- 错误恢复机制\n- 内存复用优化","x":-620,"y":-212,"width":250,"height":120,"color":"4"}, - {"id":"batch_handler_1","type":"text","text":"# BatchHandler 核心组件\n\n## call_batch_data()\n- 分块大小: 1MB\n- 数据分割\n- 创建channel\n- 创建传输任务\n- 并发发送数据块\n- 等待响应","x":-1520,"y":1340,"width":300,"height":240,"color":"1"}, - {"id":"batch_handler_2","type":"text","text":"# BatchManager 管理器\n\n## 核心功能\n- create_transfer()\n * 生成请求ID\n * 创建BatchTransfer\n * 管理传输生命周期\n\n## 状态管理\n- 传输进度跟踪\n- 错误处理与恢复\n- 并发控制","x":-1120,"y":1340,"width":300,"height":300,"color":"2"}, - {"id":"batch_handler_3","type":"text","text":"# BatchTransfer 传输器\n\n## 属性\n- unique_id\n- version\n- block_type\n- total_blocks\n\n## 数据通道\n- data_sender\n- write_task\n- tx","x":-720,"y":1340,"width":300,"height":300,"color":"3"}, - {"id":"batch_handler_4","type":"text","text":"# 数据块处理\n\n## add_block()\n- 校验块索引\n- 发送数据到channel\n- 返回处理状态\n\n## complete()\n- 关闭data_sender\n- 等待write_task\n- 发送结果","x":-320,"y":1340,"width":300,"height":300,"color":"4"}, - {"id":"batch_handler_5","type":"text","text":"# 错误处理\n\n## 错误类型\n- BatchTransferError\n- InvalidDataType\n- WriteTaskError\n\n## 错误恢复\n- 重试机制\n- 超时控制\n- 资源清理","x":80,"y":1340,"width":300,"height":300,"color":"5"}, - {"id":"batch_handler_6","type":"text","text":"# 并发控制\n\n## 并发限制\n- 建议并发数=3\n- 有界任务池\n- 队列管理\n\n## 资源管理\n- 内存复用\n- 通道缓冲\n- 任务调度","x":480,"y":1340,"width":300,"height":300,"color":"6"}, - {"id":"batch_handler_7","type":"text","text":"# 数据分片\n\n## calculate_splits()\n- 计算分片范围\n- 优化分片大小\n- 内存占用控制\n\n## 分片策略\n- 固定大小(1MB)\n- 动态调整\n- 性能优化","x":880,"y":1340,"width":300,"height":300,"color":"3"} - ], - "edges":[ - {"id":"storage_to_task1","fromNode":"storage_node_1","fromSide":"right","toNode":"write_task_1","toSide":"left","label":"分片数据"}, - {"id":"task_to_local1","fromNode":"write_task_1","fromSide":"right","toNode":"local_storage_1","toSide":"left","label":"持久化"}, - {"id":"local_to_result1","fromNode":"local_storage_1","fromSide":"right","toNode":"write_result_1","toSide":"left","label":"写入状态"}, - {"id":"phase1_to_phase2","fromNode":"general_phase1","fromSide":"bottom","toNode":"general_phase2","toSide":"top","label":"DataItems"}, - {"id":"phase2_to_master","fromNode":"general_phase2","fromSide":"right","toNode":"master_node","toSide":"left","label":"调度请求"}, - {"id":"master_to_phase2","fromNode":"master_node","fromSide":"left","toNode":"general_phase2","toSide":"right","label":"调度决策\n- version\n- splits\n- nodes"}, - {"id":"phase2_to_phase3","fromNode":"general_phase2","fromSide":"bottom","toNode":"general_phase3","toSide":"top","label":"决策信息"}, - {"id":"phase3_to_storage","fromNode":"general_phase3","fromSide":"right","toNode":"storage_group","toSide":"left","label":"分发存储任务"}, - {"id":"storage_to_nodes","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_3","toSide":"left"}, - {"id":"storage_to_nodes2","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_4","toSide":"left"}, - {"id":"storage_to_nodes3","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_5","toSide":"left"}, - {"id":"phase3_to_cache","fromNode":"general_phase3","fromSide":"right","toNode":"cache_group","toSide":"left","label":"分发缓存任务"}, - {"id":"cache_to_nodes","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_1","toSide":"left"}, - {"id":"cache_to_nodes2","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_2","toSide":"left"}, - {"id":"cache_to_nodes3","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_3","toSide":"left"}, - {"id":"batch_flow1","fromNode":"batch_manager","fromSide":"right","toNode":"batch_transfer","toSide":"left","label":"创建传输"}, - {"id":"initiator_to_manager","fromNode":"batch_initiator","fromSide":"left","toNode":"batch_manager","toSide":"right","label":"创建批量传输"}, - {"id":"initiator_to_request1","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_request1","toSide":"left","label":"并发发送\n数据块1"}, - {"id":"initiator_to_request2","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_request2","toSide":"left","label":"并发发送\n数据块2"}, - {"id":"initiator_to_request3","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_request3","toSide":"left","label":"并发发送\n数据块3"}, - {"id":"request1_to_response1","fromNode":"batch_request1","fromSide":"right","toNode":"batch_response1","toSide":"left","label":"处理响应"}, - {"id":"request2_to_response2","fromNode":"batch_request2","fromSide":"right","toNode":"batch_response2","toSide":"left","label":"处理响应"}, - {"id":"request3_to_response3","fromNode":"batch_request3","fromSide":"right","toNode":"batch_response3","toSide":"left","label":"处理响应"}, - {"id":"b5a17c0afede8e4a","fromNode":"data_general_core","fromSide":"right","toNode":"133214da264cfe72","toSide":"bottom"}, - {"id":"2ad5991c43fd6098","fromNode":"data_general_core","fromSide":"right","toNode":"821e415b6438e20d","toSide":"top"}, - {"id":"caa45c92a135042c","fromNode":"data_general_core","fromSide":"right","toNode":"core_functions","toSide":"left"}, - {"id":"09c7b9957992d62d","fromNode":"data_general_core","fromSide":"right","toNode":"b31695207931d96e","toSide":"left"}, - {"id":"adfa1cca1009ff43","fromNode":"data_general_core","fromSide":"right","toNode":"5c4357fc2216ea51","toSide":"left"}, - {"id":"ef995a514a2210bb","fromNode":"5c4357fc2216ea51","fromSide":"right","toNode":"data_item","toSide":"left"}, - {"id":"3d79872a234731c0","fromNode":"cache_node_3","fromSide":"bottom","toNode":"batch_transfer_group","toSide":"top"} - ] -} \ No newline at end of file diff --git a/design.canvas.tmp.20250206221714.backup b/design.canvas.tmp.20250206221714.backup deleted file mode 100755 index 08a2b9b..0000000 --- a/design.canvas.tmp.20250206221714.backup +++ /dev/null @@ -1,75 +0,0 @@ -{ - "nodes":[ - {"id":"cb82b904dab26671","type":"group","x":-1600,"y":-960,"width":2780,"height":2660,"label":"data"}, - {"id":"batch_transfer_group","type":"group","x":-1600,"y":640,"width":2740,"height":1060,"label":"Batch数据传输实现"}, - {"id":"core_module_group","type":"group","x":-1600,"y":-820,"width":1920,"height":780,"label":"数据管理核心模块"}, - {"id":"data_write_flow","type":"group","x":-1600,"y":80,"width":2680,"height":520,"label":"数据写入流程"}, - {"id":"2e84a4ef9e137fb7","x":-737,"y":1300,"width":1377,"height":460,"type":"group","label":"batch handler 具体逻辑"}, - {"id":"storage_write_flow","type":"group","x":0,"y":140,"width":1020,"height":400,"label":"存储节点写入流程"}, - {"id":"cache_group","type":"text","text":"缓存节点组","x":-620,"y":370,"width":150,"height":60,"color":"5"}, - {"id":"storage_node_4","type":"text","text":"存储节点2","x":-420,"y":200,"width":150,"height":60,"color":"3"}, - {"id":"storage_node_5","type":"text","text":"存储节点3","x":-420,"y":280,"width":150,"height":60,"color":"3"}, - {"id":"cache_node_1","type":"text","text":"缓存节点1","x":-420,"y":320,"width":150,"height":60,"color":"5"}, - {"id":"general_phase1","type":"text","text":"General阶段1:准备\n- 初始化DataItems\n- 计算数据大小\n- 创建SharedMemHolder","x":-1560,"y":170,"width":200,"height":100,"color":"1"}, - {"id":"general_phase2","type":"text","text":"General阶段2:调度\n- 生成unique_id\n- 发送调度请求\n- 等待决策返回","x":-1560,"y":300,"width":200,"height":100,"color":"1"}, - {"id":"general_phase3","type":"text","text":"General阶段3:分发\n- 解析调度决策\n- 创建写入任务组\n- 初始化并发控制","x":-1560,"y":430,"width":200,"height":100,"color":"1"}, - {"id":"master_node","type":"text","text":"Master节点 [DataMaster]\n- schedule_data()\n1. 生成DataSetMeta\n2. 创建DataSplits\n3. 分配存储节点\n4. 返回调度决策","x":-1100,"y":170,"width":200,"height":160,"color":"2"}, - {"id":"storage_group","type":"text","text":"存储节点组","x":-620,"y":170,"width":150,"height":60,"color":"3"}, - {"id":"cache_node_2","type":"text","text":"缓存节点2","x":-420,"y":400,"width":150,"height":60,"color":"5"}, - {"id":"cache_node_3","type":"text","text":"缓存节点3","x":-420,"y":480,"width":150,"height":60,"color":"5"}, - {"id":"batch_transfer","type":"text","text":"# BatchTransfer\n\n## 传输控制\n- 数据分块\n- 进度跟踪\n- 错误处理\n- 资源管理\n\n## 数据流\n- 发送队列\n- 接收缓冲\n- 内存池\n- 流量控制","x":-1215,"y":1120,"width":430,"height":460,"color":"2"}, - {"id":"batch_request3","type":"text","text":"# BatchDataRequest(3)\n- request_id\n- block_type\n- block_index: 2\n- data","x":-660,"y":1120,"width":250,"height":120,"color":"2"}, - {"id":"batch_response3","type":"text","text":"# BatchDataResponse(3)\n- request_id\n- success\n- error_message\n- version","x":-310,"y":1120,"width":250,"height":120,"color":"3"}, - {"id":"batch_manager","type":"text","text":"# BatchManager\n\n## 管理功能\n- 创建传输任务\n- 分配请求ID\n- 跟踪传输状态\n- 错误恢复\n\n## 数据处理\n- 分块管理\n- 数据校验\n- 内存复用\n- 并发控制","x":-1560,"y":700,"width":300,"height":300,"color":"1"}, - {"id":"batch_initiator","type":"text","text":"# 发起节点 [DataGeneral]\n\n## call_batch_data()\n- 分割数据块(1MB)\n- 创建有界任务池\n- 建议并发数=3\n- 任务队列控制","x":-1160,"y":700,"width":300,"height":300,"color":"1"}, - {"id":"batch_request1","type":"text","text":"# BatchDataRequest(1)\n- request_id\n- block_type\n- block_index: 0\n- data","x":-660,"y":700,"width":250,"height":240,"color":"2"}, - {"id":"batch_response1","type":"text","text":"# BatchDataResponse(1)\n- request_id\n- success\n- error_message\n- version","x":-310,"y":700,"width":250,"height":240,"color":"3"}, - {"id":"batch_request2","type":"text","text":"# BatchDataRequest(2)\n- request_id\n- block_type\n- block_index: 1\n- data","x":-660,"y":980,"width":250,"height":120,"color":"2"}, - {"id":"batch_response2","type":"text","text":"# BatchDataResponse(2)\n- request_id\n- success\n- error_message\n- version","x":-310,"y":980,"width":310,"height":60,"color":"3"}, - {"id":"storage_node_3","type":"text","text":"存储节点1","x":-425,"y":130,"width":150,"height":60,"color":"3"}, - {"id":"storage_node_1","type":"text","text":"存储节点1\n接收层:\n- 接收分片请求\n- 版本号验证\n- 数据完整性校验\n写入任务层:\n- 分片范围验证\n- 并发写入控制\n- 错误重试机制\n本地存储层:\n- 数据持久化\n- 版本管理\n- 空间回收\n结果返回:\n- 写入状态\n- 远程版本号\n- 错误信息","x":40,"y":180,"width":200,"height":280,"color":"1"}, - {"id":"write_task_1","type":"text","text":"写入任务1\n- 分片范围验证\n- 数据完整性检查\n- 并发写入控制\n- 错误重试","x":380,"y":180,"width":200,"height":120,"color":"2"}, - {"id":"local_storage_1","type":"text","text":"本地存储1\n- 持久化数据\n- 版本管理\n- 空间回收","x":700,"y":180,"width":200,"height":100,"color":"3"}, - {"id":"write_result_1","type":"text","text":"写入结果1\n- 成功/失败\n- 远程版本号\n- 错误信息","x":640,"y":400,"width":200,"height":100,"color":"4"}, - {"id":"1ec171d545e8995d","x":214,"y":-636,"width":250,"height":60,"type":"text","text":""}, - {"id":"b31695207931d96e","type":"text","text":"## fn get_or_del_data\n- 数据检索和删除\n- 资源清理\n- 缓存一致性\n- 并发访问控制","x":-730,"y":-742,"width":330,"height":156,"color":"4"}, - {"id":"data_item","type":"text","text":"# 数据项处理\n\n## WriteSplitDataTaskGroup\n- 管理数据分片写入任务组\n- 分片合并优化\n- 状态同步\n- 并行控制\n## SharedMemHolder\n- 共享内存数据访问\n- 资源自动管理\n## SharedMemOwnedAccess\n- 共享内存所有权控制\n- 访问安全保证\n- 生命周期管理","x":-160,"y":-612,"width":460,"height":520,"color":"3"}, - {"id":"133214da264cfe72","type":"text","text":"## struct DataGeneral\n- 提供数据读写接口\n- 管理元数据\n- 协调各子模块功能\n- 错误处理和恢复\n- 资源生命周期","x":-1200,"y":-800,"width":340,"height":214,"color":"4"}, - {"id":"core_functions","type":"text","text":"## fn write_data\n- 同步/异步写入\n- 数据完整性保证\n- 分片并行写入\n- 缓存节点同步\n- 错误重试机制","x":-845,"y":-547,"width":280,"height":275,"color":"4"}, - {"id":"data_general_core","type":"text","text":"# 数据管理核心模块\n- 数据流向控制\n- 并行结构管理\n- 错误处理链\n- 资源管理","x":-1490,"y":-526,"width":330,"height":234,"color":"4"}, - {"id":"821e415b6438e20d","type":"text","text":"## struct DataSplit\n- 数据分片管理\n- 分片信息维护\n- 分片操作协调\n- 存储节点分配\n- 局部性优化","x":-1372,"y":-212,"width":342,"height":158,"color":"4"}, - {"id":"5c4357fc2216ea51","type":"text","text":"## batch写入接口\n- 缓存主动推送\n- 并行写入支持\n- 错误恢复机制\n- 内存复用优化","x":-620,"y":-212,"width":250,"height":120,"color":"4"} - ], - "edges":[ - {"id":"storage_to_task1","fromNode":"storage_node_1","fromSide":"right","toNode":"write_task_1","toSide":"left","label":"分片数据"}, - {"id":"task_to_local1","fromNode":"write_task_1","fromSide":"right","toNode":"local_storage_1","toSide":"left","label":"持久化"}, - {"id":"local_to_result1","fromNode":"local_storage_1","fromSide":"right","toNode":"write_result_1","toSide":"left","label":"写入状态"}, - {"id":"phase1_to_phase2","fromNode":"general_phase1","fromSide":"bottom","toNode":"general_phase2","toSide":"top","label":"DataItems"}, - {"id":"phase2_to_master","fromNode":"general_phase2","fromSide":"right","toNode":"master_node","toSide":"left","label":"调度请求"}, - {"id":"master_to_phase2","fromNode":"master_node","fromSide":"left","toNode":"general_phase2","toSide":"right","label":"调度决策\n- version\n- splits\n- nodes"}, - {"id":"phase2_to_phase3","fromNode":"general_phase2","fromSide":"bottom","toNode":"general_phase3","toSide":"top","label":"决策信息"}, - {"id":"phase3_to_storage","fromNode":"general_phase3","fromSide":"right","toNode":"storage_group","toSide":"left","label":"分发存储任务"}, - {"id":"storage_to_nodes","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_3","toSide":"left"}, - {"id":"storage_to_nodes2","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_4","toSide":"left"}, - {"id":"storage_to_nodes3","fromNode":"storage_group","fromSide":"right","toNode":"storage_node_5","toSide":"left"}, - {"id":"phase3_to_cache","fromNode":"general_phase3","fromSide":"right","toNode":"cache_group","toSide":"left","label":"分发缓存任务"}, - {"id":"cache_to_nodes","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_1","toSide":"left"}, - {"id":"cache_to_nodes2","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_2","toSide":"left"}, - {"id":"cache_to_nodes3","fromNode":"cache_group","fromSide":"right","toNode":"cache_node_3","toSide":"left"}, - {"id":"batch_flow1","fromNode":"batch_manager","fromSide":"right","toNode":"batch_transfer","toSide":"left","label":"创建传输"}, - {"id":"initiator_to_manager","fromNode":"batch_initiator","fromSide":"left","toNode":"batch_manager","toSide":"right","label":"创建批量传输"}, - {"id":"initiator_to_request1","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_request1","toSide":"left","label":"并发发送\n数据块1"}, - {"id":"initiator_to_request2","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_request2","toSide":"left","label":"并发发送\n数据块2"}, - {"id":"initiator_to_request3","fromNode":"batch_initiator","fromSide":"right","toNode":"batch_request3","toSide":"left","label":"并发发送\n数据块3"}, - {"id":"request1_to_response1","fromNode":"batch_request1","fromSide":"right","toNode":"batch_response1","toSide":"left","label":"处理响应"}, - {"id":"request2_to_response2","fromNode":"batch_request2","fromSide":"right","toNode":"batch_response2","toSide":"left","label":"处理响应"}, - {"id":"request3_to_response3","fromNode":"batch_request3","fromSide":"right","toNode":"batch_response3","toSide":"left","label":"处理响应"}, - {"id":"b5a17c0afede8e4a","fromNode":"data_general_core","fromSide":"right","toNode":"133214da264cfe72","toSide":"bottom"}, - {"id":"2ad5991c43fd6098","fromNode":"data_general_core","fromSide":"right","toNode":"821e415b6438e20d","toSide":"top"}, - {"id":"caa45c92a135042c","fromNode":"data_general_core","fromSide":"right","toNode":"core_functions","toSide":"left"}, - {"id":"09c7b9957992d62d","fromNode":"data_general_core","fromSide":"right","toNode":"b31695207931d96e","toSide":"left"}, - {"id":"adfa1cca1009ff43","fromNode":"data_general_core","fromSide":"right","toNode":"5c4357fc2216ea51","toSide":"left"}, - {"id":"ef995a514a2210bb","fromNode":"5c4357fc2216ea51","fromSide":"right","toNode":"data_item","toSide":"left"}, - {"id":"3d79872a234731c0","fromNode":"cache_node_3","fromSide":"bottom","toNode":"batch_transfer_group","toSide":"top"} - ] -} \ No newline at end of file diff --git a/review.md b/review.md old mode 100755 new mode 100644 diff --git a/scripts/sync_md_files.py b/scripts/sync_md_files.py deleted file mode 100644 index 3c82478..0000000 --- a/scripts/sync_md_files.py +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/env python3 -import os -import shutil -import argparse -import datetime -import tarfile -from pathlib import Path - -def backup_files(directory, file_types=('.md', '.canvas')): - # Get current timestamp - timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') - - # Create backup filename - backup_name = f'backup_{timestamp}.tar.gz' - backup_path = Path(directory).parent / backup_name - - # Create tar archive - with tarfile.open(backup_path, 'w:gz') as tar: - # Walk through the directory - for root, _, files in os.walk(directory): - # Filter for target file types - target_files = [f for f in files if f.endswith(file_types)] - - for file in target_files: - file_path = Path(root) / file - # Add file to archive with its relative path - tar.add(file_path, arcname=file_path.relative_to(directory)) - - print(f'Created backup: {backup_path}') - return backup_path - -def sync_md_files(source_dir, target_dir): - # Convert to Path objects for easier handling - source_path = Path(source_dir).resolve() - target_path = Path(target_dir).resolve() - - # Create target directory if it doesn't exist - target_path.mkdir(parents=True, exist_ok=True) - - # Counter for statistics - copied_files = 0 - - # Walk through the source directory - for root, _, files in os.walk(source_path): - # Filter for .md and .canvas files - target_files = [f for f in files if f.endswith(('.md', '.canvas'))] - - for target_file in target_files: - # Get the full source path - source_file = Path(root) / target_file - - # Calculate relative path from source_dir - rel_path = source_file.relative_to(source_path) - - # Create target file path - target_file = target_path / rel_path - - # Create target directory if it doesn't exist - target_file.parent.mkdir(parents=True, exist_ok=True) - - # Copy the file - shutil.copy2(source_file, target_file) - copied_files += 1 - print(f"Copied: {rel_path}") - - print(f"\nSync complete! Copied {copied_files} Markdown and Canvas files.") - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Sync markdown and canvas files between local and s3fs') - parser.add_argument('direction', choices=['to_s3fs', 'from_s3fs'], - help='Direction of sync: to_s3fs or from_s3fs') - args = parser.parse_args() - - local_dir = "/root/prjs/waverless" - s3fs_dir = "/mnt/s3fs/waverless" - - if args.direction == 'to_s3fs': - source_dir = local_dir - target_dir = s3fs_dir - else: # from_s3fs - source_dir = s3fs_dir - target_dir = local_dir - - # Backup target directory before sync - print(f"Creating backup of target directory: {target_dir}") - backup_path = backup_files(target_dir) - - print(f"Starting sync from {source_dir} to {target_dir}") - sync_md_files(source_dir, target_dir) diff --git a/src/main/src/general/data/m_data_general/README.md b/src/main/src/general/data/m_data_general/README.md new file mode 100644 index 0000000..0887dc7 --- /dev/null +++ b/src/main/src/general/data/m_data_general/README.md @@ -0,0 +1,15 @@ +# 数据管理模块文档 + +## 模块文档索引 + +- [批量传输系统](batch.md) +- [数据项处理](dataitem.md) +- [数据管理核心模块](mod.md) + +## 模块说明 + +本目录包含了数据管理模块的核心实现,主要包括: + +1. 批量传输系统 (batch.rs):处理大文件的高效传输 +2. 数据项处理 (dataitem.rs):管理数据分片和共享内存访问 +3. 数据管理核心 (mod.rs):提供数据读写和元数据管理 diff --git a/src/main/src/general/data/m_data_general/batch.md b/src/main/src/general/data/m_data_general/batch.md new file mode 100644 index 0000000000000000000000000000000000000000..9f2e790dd58a5be8826c8d300cb6a63c1284b3ca GIT binary patch literal 2340 zcmeHIO;Z{{5Y2gi#a4Mq4x1m~G56eZ4p|YS6+%*g$rVsxF$g3m7{RZK3egg=R01r< zACv8w<>X&T&-4sgHCC!ThvQDqd#`)?JuJ&IW+HZSJ|0O-8m4KP>>>@r66`JVa`t+- z0i7)DHCZ+1xm|FwJfHJVTqsqc-&K{R&!bctoDQ$I*-p-1&hSiMB6&R*mRqdY!gEC8 ztEX(O!&cg?kYtlQ_83|3>Nt+e-q4dq` z9YcGI7dup{p~M9#2MPmlj=VupUPKJ9S7B!@{6LyVwez52!+2}Q?AMWaXl6bZPR_lH zsR4mg2Y28Ncl@6f0+zEKy_1&VU*e!r7OI> zi_<6Y>{nRY_goz1uKH};5)`(h(a@Bg4A07GOq$rSfANFm`=eZmzyoUI@57;4J2oAS z+`L0g1n&4JKiFwTiU`MXMIs^H05ZrKuxf{IED|;;l+_wN`Y!wiOC>O@ta-h|KQpp$ z?~DnPG^#P3x!m10AInaj#uHBraz8d z@$Oy^^Qd)nw9eKX;`^1#po9sNOJ1JuRewMZ1{(SYWwKNfst-ZCMUsMnSY%o>M)4tL6|)`gWbtusX zkQ|f=nU+vtvYj4WEl>$}D{N~~v*UWqhm#4?`b>E9Ii-m%P~jpTQktFRQy7Z=RUK0f zlMLGlExaE{hJl{*=iTQ6jc31nzufwCT_;2W}q|`4ELp|I8ut$ Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 17/26] Revert "fixing batch" This reverts commit 393a44d3b9c52cc86fec2d64983456cbe1c7803a. --- .cursorrules | 1016 ++++++++++++++++- .cursorrules copy | 977 ---------------- compilelog | 191 +++- review.md | 480 +------- .../src/general/data/m_data_general/README.md | 15 - .../src/general/data/m_data_general/batch.md | Bin 2340 -> 0 bytes .../src/general/data/m_data_general/batch.rs | 3 +- .../src/general/data/m_data_general/data.rs | 37 + .../general/data/m_data_general/dataitem.md | 57 - .../src/general/data/m_data_general/mod.md | 58 - .../src/general/data/m_data_general/mod.rs | 245 ++-- src/main/src/result.rs | 4 - 12 files changed, 1333 insertions(+), 1750 deletions(-) delete mode 100644 .cursorrules copy delete mode 100644 src/main/src/general/data/m_data_general/README.md delete mode 100644 src/main/src/general/data/m_data_general/batch.md create mode 100644 src/main/src/general/data/m_data_general/data.rs delete mode 100644 src/main/src/general/data/m_data_general/dataitem.md delete mode 100644 src/main/src/general/data/m_data_general/mod.md diff --git a/.cursorrules b/.cursorrules index 8f40ffc..f4a4825 100644 --- a/.cursorrules +++ b/.cursorrules @@ -1,43 +1,973 @@ -# Waverless 项目规则列表 - -## 1. 任务执行强制等待规则 -- 制定计划后必须等待用户确认: - - 即使计划看起来很完善 - - 即使修改很简单 - - 即使是修复明显的错误 - - 没有任何例外情况 - -- 执行前检查清单: - - [ ] 任务是否已标记为 working? - - [ ] 修改计划是否已制定? - - [ ] 计划是否已经得到用户确认? - - [ ] 是否在正确的位置记录了计划? - -- 执行顺序强制要求: - 1. 标记任务状态 - 2. 制定修改计划 - 3. **等待用户确认** - 4. 得到确认后执行 - 5. 记录执行结果 - 6. 等待用户下一步指示 - -## 2. 基础工作流规则 -- 开始执行分析任务时: - 先标记当前任务、或子任务为 (working) 状态,working状态同一时间只应该有一个 - -- 处理任务时: - - 如果review还没有计划,则进行计划 - - 如有计划: - - 未执行过计划:等待用户确认后执行 - - 已执行过计划:等待用户指示 - -- 分析完或执行完需要回写review规划或记录时: - 在对应working处更新内容,不要乱选择更新位置 - -- 编译相关: - - agent自行需要编译或用户指明需要编译时: - sudo -E $HOME/.cargo/bin/cargo build 2>&1 | tee compilelog - - 需要分析当前问题时,先阅读 compilelog - -- 步骤管理: - 每次执行完一个大步骤(更新计划 或 执行计划)后,等待用户下一步指示 \ No newline at end of file +# Waverless 项目关键设计笔记 + +## 1. 函数执行上下文设计 + +### 1.1 基础结构 +- `FnExeCtx`: 私有的基础结构体,包含函数执行的基本信息 + ```rust + struct FnExeCtx { + pub app: String, + pub app_type: AppType, + pub func: String, + pub func_meta: FnMeta, + pub req_id: ReqId, + pub event_ctx: EventCtx, + pub res: Option, + pub sub_waiters: Vec>, + _dummy_private: (), + } + ``` + +### 1.2 公开特化类型 +- `FnExeCtxAsync` 和 `FnExeCtxSync`: + - 异步执行上下文支持 Jar、Wasm、Native 类型,包含子任务支持和完整的性能监控和日志。 + - 同步执行上下文仅支持 Native 类型,不支持子任务,包含基本的性能监控和日志。 + +### 1.3 类型安全 +- `FnExeCtxAsyncAllowedType` 和 `FnExeCtxSyncAllowedType`: + - 异步允许的类型 (Jar, Wasm, Native) + - 同步允许的类型 (仅 Native) + - 通过 `TryFrom` 在编译时强制类型安全 + +## 2. 实例管理设计 + +### 2.1 实例类型与管理器 +- `Instance` 和 `InstanceManager`: + - `Instance` 包含 Owned、Shared 和 Native 类型。 + - `InstanceManager` 管理应用实例和运行时函数上下文。 + ```rust + pub enum Instance { + Owned(OwnedInstance), + Shared(SharedInstance), + Native(NativeAppInstance), + } + + pub struct InstanceManager { + pub app_instances: SkipMap, + pub instance_running_function: DashMap, + } + ``` + +### 2.2 运行时函数上下文 +- `UnsafeFunctionCtx`: + - 包含 Sync 和 Async 类型,分别对应 `FnExeCtxSync` 和 `FnExeCtxAsync`。 + +## 3. 关键修改记录 + +### 3.1 同步/异步执行流程优化与错误处理增强 +- 简化 `finish_using`,移除不必要的异步版本,统一使用同步实现。 +- 添加同步版本的 `load_instance_sync`,仅支持 Native 类型。 +- 优化 `execute_sync` 中的异步调用处理,统一性能监控和日志记录格式。 +- 添加 `UnsupportedAppType` 错误类型,完善同步执行时的类型检查。 + +## 4. 待办事项 +- [x] 考虑添加同步版本的 `load_instance` +- [ ] 优化 `execute_sync` 中的异步-同步转换 +- [ ] 完善错误处理和日志记录 + +## 5. 核心设计原则 + +### 5.1 基础原则与 View 模式设计规则 +- 同步/异步分离,类型安全,性能监控,资源管理。 +- View 生成: + - View 结构体和 `LogicalModule` trait 的实现由宏生成。 + - 只需实现 `inner_new` 函数,使用 `logical_module_view_impl!` 生成访问函数。 + - 每个需要访问的模块都需要单独的 impl 宏调用。 + +### 5.2 去掉 #[derive(LogicalModule)] 的原因和注意事项 +- 实现特定功能:根据需求在 `DataGeneralView` 中实现特定功能,检查冲突。 +- `inner` 字段的管理:由宏管理,不能直接操作,通过宏生成的接口使用。 +- 错误分析:去掉派生后,仔细分析和解决可能出现的错误。 + +## 6. msg_pack 消息封装 + +### 6.1 基本原则与实现示例 +- 使用 `msg_pack.rs` 中的宏实现 trait,使用 `define_msg_ids!` 管理消息类型。 +- 通过 `RPCReq` trait 定义请求-响应关系。 + ```rust + define_msg_ids!( + (proto::sche::BatchDataRequest, pack, { true }), + (proto::sche::BatchDataResponse, _pack, { true }) + ); + + impl RPCReq for proto::sche::BatchDataRequest { + type Resp = proto::sche::BatchDataResponse; + } + ``` + +### 6.2 最佳实践 +- 新增消息类型时:在 `define_msg_ids!` 中添加定义,实现 `RPCReq` trait。 +- 使用消息时:使用 `RPCCaller` 和 `RPCHandler`,遵循统一的错误处理。 + +## 7. Waverless 代码规范核心规则 + +### 7.0 最高优先级规则 +- 在没有经过明确允许的情况下,不要擅自开始操作 +- 必须等待用户明确指示后再进行修改 +- 在进行任何修改前,先提出修改方案并等待确认 +- 有明确指令的情况下,不要擅自做其他操作 +- 删除代码时必须说明: + - 被删除代码的原有功能和作用 + - 删除的具体原因 + - 删除可能带来的影响 +- 修改代码时必须: + - 先提出完整的修改方案 + - 说明每处修改的原因和影响 + - 等待用户确认后再执行 + - 严格按照确认的方案执行,不额外修改 + - 如需额外修改,必须重新提出方案并确认 +- 修改规则文件时必须: + - 确认文件名必须是 `.cursorrules` + - 确认文件以 "# Waverless 项目关键设计笔记" 开头 + - 确认包含完整的设计笔记结构 + - 确认包含所有规则章节(1-7) + - 修改前使用搜索工具确认是正确的规则文件 + - 修改前检查文件的完整内容 + - 修改前确认修改的具体位置 + - 只修改规则相关部分 + - 保持其他内容不变 + - 保持文档结构完整 +- 执行命令时必须: + - 先提出执行计划 + - 说明执行目的和预期结果 + - 等待用户确认后再执行 + - 记录执行结果和遇到的问题 + - 如遇问题,提出解决方案并等待确认 + - 例外情况: + 1. 编译命令(sudo -E $HOME/.cargo/bin/cargo build)可以直接执行,无需等待确认 + 2. 编译命令必须将输出重定向到 compilelog 文件 + 3. 编译命令执行后必须分析结果并更新 review.md + +- 编译验证规则: + - 当用户要求检查编译状态时: + 1. 必须立即执行实际的编译命令,无需等待确认 + 2. 禁止仅查看历史编译日志 + 3. 必须使用正确的编译命令:`sudo -E $HOME/.cargo/bin/cargo build 2>&1 | tee compilelog` + 4. 必须等待编译完成并分析结果 + 5. 必须将编译结果记录到 review.md 中 + - 编译执行前必须: + 1. 确认已经在 review.md 中记录了执行计划 + 2. 确认编译环境已经准备就绪 + 3. 确认使用了正确的编译命令和参数 + - 编译执行后必须: + 1. 分析编译输出中的每个错误和警告 + 2. 更新 review.md 中的任务状态 + 3. 如果发现新的错误,创建相应的任务记录 + - 禁止行为: + 1. 禁止在没有执行编译的情况下判断编译状态 + 2. 禁止仅根据历史记录回答编译相关问题 + 3. 禁止忽略编译警告 + 4. 禁止在编译失败时不更新任务状态 + +- 编译后问题处理规则: + 1. 每次编译完成后,如果发现新的问题: + - 必须先暂停当前操作 + - 立即在 review.md 中记录新问题 + - 对新问题进行完整的分析记录 + - 等待用户确认后再继续处理 + 2. 禁止在发现新问题后未经记录就直接处理 + 3. 禁止在未经用户确认的情况下处理新问题 + 4. 每个新问题必须包含: + - 与父问题的关系分析 + - 问题的具体表现和影响 + - 初步的解决方案建议 + - 预期的处理步骤 + 5. 违反以上规则的行为将被拒绝执行 + +- review.md 使用规则: + - 在执行任何操作前必须: + 1. 先检查 review.md 文件是否存在 + 2. 阅读完整的 review.md 内容 + 3. 理解当前任务的上下文和父问题 + 4. 在合适的位置添加新的任务记录 + + - 更新位置确定原则: + 1. 必须仔细分析当前对话正在处理的具体问题 + 2. 找到该问题在 review.md 中的对应位置 + 3. 将新内容添加到该问题的相关位置 + 4. 禁止简单地追加到文件末尾 + 5. 如果找不到明确的对应位置,必须先在对应任务描述下标记为 (working) 并询问用户确认 + 6. 对于正在计划或执行中的任务,必须标记为 (working);同一时间系统中只允许存在一个 (working) 状态的任务记录。如果发现多个 (working) 标记,必须暂停后续操作,并等待用户确认后再统一标记 + + - 任务记录必须遵循以下格式: + ```markdown + - 任务:[任务描述] + - 分析: + - 父问题相关性: + 1. 父问题:[引用具体的父问题] + 2. 相关性:[说明与父问题的关系] + 3. 必要性:[说明为什么需要解决] + 4. 优先级:[说明优先级和原因] + + - 当前问题: + 1. [具体问题点1] + 2. [具体问题点2] + ... + + - 修改计划: + 1. [具体步骤1] + 2. [具体步骤2] + ... + + - 执行记录: + - 已完成: + - [已完成的步骤1] + - [已完成的步骤2] + + - 遇到的问题: + - 问题1:[问题描述] + - 解决方案:[方案描述] + - 解决过程:[过程记录] + ``` + + - 任务状态管理: + 1. 新任务必须添加在未完成任务的最前面 + 2. 已完成任务必须标记为 (done) + 3. 已完成任务必须移到未完成任务后面 + 4. 子任务必须保持正确的缩进层级 + 5. 任务完成状态必须实时更新 + + - 强制执行要求: + 1. 禁止在未更新 review.md 的情况下执行任何操作 + 2. 禁止在未经确认的情况下修改已有任务记录 + 3. 禁止删除任何历史记录 + 4. 必须在每次操作前后更新执行记录 + 5. 必须在遇到问题时立即记录 + 6. 必须在解决问题后更新解决方案 + 7. 违反以上规则的操作将被拒绝执行 + +- 执行计划必须: + 1. 在执行任何操作前,必须先在 review.md 中记录执行计划 + 2. 执行计划必须包含: + - 任务描述和目标 + - 父问题相关性分析 + - 当前问题分析 + - 具体执行步骤 + - 预期结果 + - 可能的风险 + - 验证方法 + 3. 执行计划必须遵循 review.md 的格式要求: + - 新计划添加在未完成任务的最前面 + - 使用正确的缩进和层级 + - 包含完整的分析和计划部分 + 4. 执行过程必须: + - 严格按照计划执行 + - 实时记录执行结果 + - 遇到问题时立即记录 + - 完成后更新任务状态 + 5. 禁止在没有执行计划的情况下: + - 执行任何命令 + - 修改任何文件 + - 进行任何操作 + 6. 如需修改计划: + - 必须先记录原计划的问题 + - 提出新的计划 + - 等待确认后再继续 + +### 7.1 文档维护与代码组织原则 +- 文档压缩原则:保持无损压缩,合并重复内容,简化表述,重构文档结构。 +- 文档更新规则:确认信息完整性,保留技术细节,使用清晰结构展示信息。 +- 代码组织规则:宏生成的访问函数直接使用,非 pub 函数只在一个地方定义,View 负责核心实现,具体模块负责自己的功能,通过 View 访问其他模块。 + +### 7.2 代码修改原则 + +#### 7.2.1 问题解决原则 +- 仅解决当前 review 中关注的问题和遇到的子问题 +- 解决问题前必须先写出解决方案的规划: + 1. 分析问题的根本原因 + 2. 列出可能的解决方案 + 3. 评估每个方案的优缺点 + 4. 选择最优方案并说明原因 + 5. 列出具体的实施步骤 + 6. 考虑可能的风险和应对措施 + + +- 不随意删除或修改已有的正确实现 +- 不在多处实现同一功能 +- 保持代码结构清晰简单 +- 修改前先理解设计原则 + +#### 异步任务处理原则 +- 分析生命周期和所有权需求 +- 避免盲目克隆,只克隆必要数据 +- 考虑类型特征(如 P2PModule 的轻量级 Clone) +- 评估替代方案 + +```rust +// 反例:过度克隆 +let p2p = self.p2p().clone(); // 不必要,P2PModule 本身就是轻量级的 +let data_general = self.data_general().clone(); // 不必要,同上 + +// 正例:按需克隆 +let split_info = split.clone(); // 必要,因为来自临时变量的引用 +``` + +分析要点: +- 使用场景:确认异步任务中的实际需求 +- 类型特征:检查是否已实现轻量级 Clone +- 生命周期:特别关注临时变量引用 +- 替代方案:考虑其他实现方式 + +### 7.3 错误与正确示例 +- 错误示例:手动实现已有的宏生成函数,在两个地方都实现同一个函数,过度修改已有代码结构,有损压缩文档内容。 +- 正确示例:使用宏生成的访问函数,在合适的位置添加新功能,遵循已有的代码组织方式,保持文档的完整性和准确性。 + +### 7.4 异步任务变量处理规范 + +#### 1. 变量分析原则 +- 生命周期分析:确定变量在异步任务中的生存期 +- 所有权需求:判断是否需要克隆或移动所有权 +- 类型特征:考虑变量的类型特性(如 Clone、Send、'static 等) +- 数据共享:评估是否需要在多个任务间共享数据 + +#### 2. 克隆策略 +必须克隆的情况: +- 临时变量引用:`split_info.clone()`(来自迭代器) +- 多任务共享:`unique_id.clone()`(多个任务需要) +- 部分数据:`data_item.clone_split_range()`(只克隆需要的范围) + +不需要克隆的情况: +- 值类型复制:`version`(直接复制即可) +- 已实现 Copy:基本数据类型 +- 单一任务使用:不需要在多个任务间共享的数据 + +#### 3. View 模式使用规范 +基本原则: +- View 本身已经是完整引用:不需要额外的 view 字段 +- 异步任务中使用:`self.clone()` +- 模块访问:通过 view 直接访问其他模块 + +示例代码: +```rust +// 正确示例 +let view = self.clone(); // View 本身克隆 +let resp = view.data_general().rpc_call_write_once_data... + +// 错误示例 +let view = self.view.clone(); // 错误:不需要额外的 view 字段 +let data_general = self.data_general().clone(); // 错误:不需要单独克隆模块 +``` + +#### 4. 异步任务数据处理检查清单 +- [ ] 是否只克隆必要的数据? +- [ ] 临时变量是否正确处理? +- [ ] View 的使用是否符合规范? +- [ ] 是否避免了重复克隆? +- [ ] 数据共享策略是否合理? + +#### 5. 常见场景示例 + +1. 批量数据处理: +```rust +// 正确处理临时变量和部分数据 +let split_info = split_info.clone(); // 临时变量必须克隆 +let data_item = data_item.clone_split_range(range); // 只克隆需要的部分 +let view = self.clone(); // View 克隆用于异步任务 +``` + +2. 并发任务处理: +```rust +// 使用信号量和数据共享 +let semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT)); +let view = self.clone(); // 一次克隆,多处使用 +for node_id in nodes { + let permit = semaphore.clone(); + let view = view.clone(); // View 在任务间共享 + tokio::spawn(async move { ... }); +} +``` + +### 7.3 变量类型难分辨的情况 + +#### 7.3.1 Proto生成的Rust类型 +1. proto中的普通字段在Rust中的表现: + - proto中的 `string file_name_opt = 1` 生成的是普通 `String` 类型,而不是 `Option` + - proto中的 `bool is_dir_opt = 2` 生成的是普通 `bool` 类型,而不是 `Option` + - 字段名带 `_opt` 后缀不代表它在Rust中是 `Option` 类型 + +2. proto中的message嵌套在Rust中的表现: + - `DataItem` 中的 `oneof data_item_dispatch` 在Rust中是一个字段 + - 访问路径是: `data.data_item_dispatch` 而不是 `data.data.data_item_dispatch` + - `Option` 需要先 `unwrap()` 才能访问其内部字段 + +#### 7.3.2 容易混淆的类型转换 +1. proto生成的类型和标准库类型的关系: + - proto生成的 `String` 字段不能直接用 `unwrap_or_default()` + - proto生成的 `bool` 字段不能直接用 `unwrap_or()` + +### 7.5 思维方式原则 +- 思维优先于行动: + - 在开始任何操作前,先理解"为什么"而不是"怎么做" + - 确保完全理解当前上下文中的所有信息 + - 避免机械性思维和跳过思考的行为模式 + - 对于涉及代码逻辑的命令,必须先阅读和理解相关代码,再执行命令 + - 当需要复用或参考现有代码逻辑时,必须先在项目中查找并理解相关实现 + - 在理解代码时,需要关注: + - 代码的执行流程和依赖关系 + - 数据结构和状态管理方式 + - 错误处理和异常情况的处理方式 + +- 代码分析记录原则: + - 在修改任何代码之前,必须在 review.md 中记录完整的代码分析: + 1. 问题代码:截取导致问题的具体代码片段 + 2. 上下文代码:截取理解问题所需的相关代码 + 3. 问题成因:详细分析问题的具体原因 + 4. 修复方案:说明如何修复以及为什么这样修复 + 5. 修改验证:列出验证修改正确性的方法 + - 分析记录必须: + - 使用代码块格式展示代码 + - 保持代码片段的完整性和可读性 + - 确保分析逻辑清晰 + - 说明修改的影响范围 + +- 父问题相关性分析: + - 在开始分析任何问题之前,必须首先进行父问题相关性分析 + - 分析步骤: + 1. 确认当前问题的父问题是什么 + 2. 回溯父问题的执行计划和记录 + 3. 判断当前问题是否是父问题引起的 + 4. 确认解决当前问题是否必要且有助于解决父问题 + - 分析结果必须包含: + 1. 父问题的明确引用 + 2. 相关性的具体分析 + 3. 解决必要性说明 + 4. 优先级判断 + - 如果当前问题与父问题无关: + 1. 记录分析结果 + 2. 暂时搁置该问题 + 3. 继续专注于父问题的解决 + +- 内化规则: + - 把规则视为思维框架而不是外部约束 + - 养成先检查当前上下文的习惯 + - 避免在已有信息的情况下去外部搜索 +- 关注本质: + - 理解问题的根本原因比立即解决问题更重要 + - 分析失误的思维模式而不是简单记住正确操作 + - 把经验转化为思维方式而不是操作步骤 + +## 8. 代码评审与修改文档规则 + +### 8.1 修改计划与记录要求 +- 每次修改代码前: + 1. 必须查看项目根目录的 `review.md` 文件 + 2. 根据现有内容确定修改计划的位置和层级 + 3. 在对应位置添加修改计划 + 4. 使用 markdown 格式记录,保持层级结构清晰 + +### 8.2 文档结构规范 +- 所有修改记录必须使用以下简化的问题树结构: + ```markdown + - 任务/问题:xxxx + - 分析:xxxx + - 计划任务1:xxxx + 新问题1:xxxx + - 分析:xxxx + - 计划任务3:xxxx + 已完成 + + - 计划任务2:xxxx + 已完成 + ``` + +- 结构规则: + 1. 父节点必须是具体的任务或问题描述 + 2. 第一个子节点必须是对问题的分析 + 3. 后续子节点是具体的计划任务 + 4. 每个计划任务下可以包含新的问题,遵循相同的结构 + 5. 已完成的任务标记为"已完成" + 6. 保持缩进层级清晰 + +- 示例说明: + ```markdown + - 任务:修复类型转换错误 + - 分析:当前代码在类型转换时未考虑空值情况 + - 计划任务1:添加空值检查 + 新问题:如何处理空值转换失败 + - 分析:需要在转换失败时提供默认值 + - 计划任务:实现 Option 转换 + 已完成 + + - 计划任务2:添加单元测试 + 已完成 + ``` + +### 8.3 记录要求 +1. 修改计划必须包含: + - 修改目的 + - 预期效果 + - 可能的风险 + - 具体步骤 + +2. 修改过程必须记录: + - 实际执行的步骤 + - 遇到的每个问题 + - 解决方案和结果 + +3. 问题记录必须包含: + - 问题的具体表现 + - 问题的可能原因 + - 尝试的解决方案 + - 最终的解决方案 + - 预防措施(如果适用) + +### 8.4 维护原则 +- 保持文档的实时更新 +- 确保问题树结构清晰 +- 定期回顾和整理文档 +- 记录经验教训和最佳实践 + +### 8.5 任务识别规则 + +#### 8.5.1 任务状态判断 +1. 完成状态标记: + - 已完成任务必须标记为 `(done)` + - 未标记 `(done)` 的任务视为未完成 + - 不使用其他状态标记 + +2. 任务顺序规则: + - 文档开头说明:`(顺序:新的在前面;先解决就的未完成的;完成的有标注;问题可能存在子问题)` + - 新任务添加到未完成任务的最前面 + - 已完成任务移到未完成任务的后面 + - 子任务跟随父任务,保持缩进层级 + +3. 最老未完成任务识别: + - 从上到下扫描所有顶级任务 + - 跳过带有 `(done)` 标记的任务 + - 第一个不带 `(done)` 标记的任务即为最老未完成任务 + - 子任务不影响父任务的完成状态判断 + +4. 任务优先级: + - 未完成任务按出现顺序表示优先级(越靠后优先级越高) + - 子任务优先级高于同级后续任务 + - 阻塞性问题优先级最高 + +#### 8.5.2 任务解析检查清单 +在识别和处理任务时,必须检查: +- [ ] 任务是否有 `(done)` 标记 +- [ ] 任务是否为顶级任务 +- [ ] 是否有未完成的子任务 +- [ ] 任务的位置是否符合顺序规则 +- [ ] 是否存在阻塞性问题 + +## 9. 批量数据接口设计 + +### 9.1 BatchTransfer 设计规范 + +#### 9.1.1 组件职责定义 + +1. **数据结构职责划分** + - BatchTransfer(单个传输任务管理器)必须: + - 维护单个传输任务的完整状态(unique_id, version, block_type, total_blocks) + - 使用 DashMap 存储接收到的数据块,确保并发安全 + - 通过 Option 管理完成状态通知 + - 负责数据块的接收、验证和重组 + + - BatchManager(全局传输任务管理器)必须: + - 使用 DashMap 维护所有进行中的传输任务 + - 使用原子计数器生成唯一的请求序列号 + - 负责传输任务的创建、数据块处理和生命周期管理 + +2. **函数职责要求** + - call_batch_data(发送端)必须: + - 使用固定大小(1MB)进行数据分块 + - 通过 BatchManager 创建传输任务 + - 负责数据块的发送 + - 等待传输完成通知 + + - handle_block(接收端)必须: + - 接收并验证单个数据块 + - 更新传输状态 + - 在接收完所有块时触发完成处理 + + - complete(完成处理)必须: + - 校验所有数据块的完整性 + - 根据类型(内存/文件)重组数据 + - 发送完成通知 + +#### 9.1.2 数据流转规范 + +1. **发送流程要求** + - 必须按照以下顺序执行: + 1. 接收原始数据并验证 + 2. 计算分块策略 + 3. 创建传输任务 + 4. 按序发送数据块 + +2. **接收流程要求** + - 必须按照以下顺序处理: + 1. 接收数据块并验证 + 2. 存储到对应的 BatchTransfer + 3. 检查完整性 + 4. 触发完成处理 + 5. 通知发送端 + +#### 9.1.3 错误处理规范 + +1. **组件错误处理职责** + - BatchTransfer 必须处理: + - 数据块完整性验证错误 + - 数据重组过程错误 + + - BatchManager 必须处理: + - 传输任务存在性检查错误 + - 并发访问保护错误 + + - 调用方必须处理: + - 网络传输错误 + - 超时错误 + +2. **错误恢复策略** + - 必须支持以下错误恢复机制: + - 单个数据块的重试 + - 传输任务的取消 + - 资源的正确释放 + +#### 9.1.4 资源管理规范 + +1. **内存管理** + - 必须预分配适当的缓冲区大小 + - 必须及时释放不再需要的内存 + - 必须控制并发数据块的最大数量 + +2. **文件管理** + - 必须使用唯一的临时文件名 + - 必须在完成后清理临时文件 + - 必须正确处理文件权限 + +3. **并发控制** + - 必须使用 DashMap 确保并发安全 + - 必须使用原子操作处理计数器 + - 必须正确管理 channel 资源 + +### 9.2 批量写入实现 + +#### 9.2.1 总体流程 + +1. **数据切分** + - 内存数据按 1MB 切块 + - 文件数据按 4MB 切块 + - 计算总块数和最后一块大小 + +2. **任务池初始化** + - 创建 4 个传输任务槽位 + - 每个任务负责一个数据块的传输 + - 任务完成后自动释放槽位 + +3. **数据块获取** + - 空闲任务会请求新的数据块 + - 最多预取 8 个块 + - 超过限制则等待其他块处理完成 + +4. **传输过程** + - 任务获取到数据块后开始传输 + - 每个请求包含块索引和数据类型 + - 单个请求超时时间为 30 秒 + +5. **完成处理** + - 所有块传输完成后结束 + - 失败的块会重试最多 3 次 + - 重试间隔为 1 秒 + +#### 9.2.2 接收方处理 + +1. **数据管理** + - 复用 get_data 的文件和内存管理逻辑 + - 文件使用 FileManager 管理可变文件 + - 内存使用 MemoryManager 管理内存块 + +2. **并行写入** + - 每个数据块作为独立的写入任务 + - 文件写入使用 seek + write 定位写入 + - 内存写入使用偏移量计算地址 + +3. **并发控制** + - 使用 RwLock 保护共享资源 + - 文件操作使用 async 文件 I/O + - 内存操作使用原子操作保证并发安全 + +4. **状态管理** + - 记录每个块的写入状态 + - 支持断点续传和重试 + - 完成后更新元数据 + ``` + +3. **接收方处理** + ```rust + struct BatchDataWriter { + // 文件缓存,使用 unique_id 作为 key + file_cache: HashMap, BatchFileCache>, + // 内存缓存,使用 unique_id 作为 key + memory_cache: HashMap, BatchMemoryCache>, + } + + impl BatchDataWriter { + async fn handle_request(&mut self, req: BatchDataRequest) -> BatchDataResponse { + let cache = match req.block_type { + DataBlockType::Memory => &mut self.memory_cache, + DataBlockType::File => &mut self.file_cache, + }; + + // 获取或创建缓存 + let block_cache = cache.entry(req.unique_id.clone()) + .or_insert_with(|| self.create_cache(req.block_type)); + + // 写入数据块 + match block_cache.write_block(req.block_index, req.data).await { + Ok(()) => BatchDataResponse { + request_id: req.request_id, + success: true, + error_message: String::new(), + version: req.version, + }, + Err(e) => BatchDataResponse { + request_id: req.request_id, + success: false, + error_message: e.to_string(), + version: req.version, + }, + } + } + } + ``` + +#### 9.2.2 缓存管理 + +1. **文件缓存** + ```rust + struct BatchFileCache { + path: PathBuf, // 临时文件路径 + file: File, // 文件句柄 + received_blocks: HashSet, // 已接收的块 + } + + impl BatchFileCache { + async fn write_block(&mut self, index: u32, data: Vec) -> Result<()> { + // 记录块并写入文件 + self.received_blocks.insert(index); + self.file.seek(SeekFrom::Start((index as u64) * BLOCK_SIZE))?; + self.file.write_all(&data)?; + Ok(()) + } + } + ``` + +2. **内存缓存** + ```rust + struct BatchMemoryCache { + blocks: HashMap>, // 块索引 -> 数据 + total_size: usize, // 总大小 + } + + impl BatchMemoryCache { + async fn write_block(&mut self, index: u32, data: Vec) -> Result<()> { + // 直接存储到内存 + self.blocks.insert(index, data); + Ok(()) + } + } + ``` + +#### 9.2.3 注意事项 + +1. **并发控制** + - 使用 MAX_CONCURRENT_TASKS 控制带宽使用 + - 通过 MAX_PENDING_BLOCKS 实现背压控制 + - 任务完成后及时释放资源 + +2. **内存管理** + - 预取块数量不超过 MAX_PENDING_BLOCKS + - 使用 Arc<[u8]> 避免数据复制 + - 大文件优先使用文件缓存 + +3. **错误处理** + - 记录失败的块以便重试 + - 最多重试 MAX_RETRIES 次 + - 重试间隔为 RETRY_DELAY_MS + - 单个任务超过 TASK_TIMEOUT_MS 自动取消 + +4. **性能优化** + - 使用异步 I/O 提高并发性 + - 任务空闲时自动获取新块 + - 支持乱序处理和断点续传 + +5. **监控和调试** + - 记录每个块的处理状态 + - 统计传输速率和成功率 + - 支持取消整个传输任务 + +### 9.3 请求方逻辑 + +1. **请求预处理**: + - 生成唯一的 request_id + - 验证数据项数量不超过 max_batch_size + - 设置适当的超时时间 + +### 9.3 并行写入实现规范 + +#### 9.3.1 WriteSplitDataTaskGroup 设计模式 +1. **基础结构设计** + ```rust + enum WriteSplitDataTaskGroup { + ToFile { + file_path: PathBuf, + tasks: Vec>>, + }, + ToMem { + shared_mem: SharedMemHolder, + tasks: Vec>>, + }, + } + ``` + +2. **职责划分** + - 任务组管理: + - 创建和初始化写入任务 + - 跟踪任务状态和完成情况 + - 提供统一的任务管理接口 + - 数据写入: + - 文件写入使用 FileExt::write_at + - 内存写入使用 SharedMemOwnedAccess + - 支持并发安全的数据访问 + +3. **并发控制要求** + - 文件写入: + - 使用 tokio::task::spawn_blocking 处理 I/O + - 通过文件偏移确保并发安全 + - 每个任务独占写入区域 + - 内存写入: + - 使用 SharedMemOwnedAccess 保证访问安全 + - 通过 Range 隔离数据区域 + - Arc 管理共享内存生命周期 + +4. **错误处理规范** + - 数据验证: + - 检查数据块类型匹配 + - 验证数据长度一致性 + - 确保写入位置正确 + - 错误传播: + - 使用 Result 类型传递错误 + - 支持任务级别的错误处理 + - 实现错误重试机制 + +#### 9.3.2 复用规范 +1. **接口设计要求** + - 提供统一的数据写入接口 + - 支持文件和内存两种模式 + - 保持与现有实现兼容 + +2. **数据管理规范** + - 文件数据: + - 使用文件偏移管理数据位置 + - 支持并发写入和随机访问 + - 实现临时文件清理 + - 内存数据: + - 使用 SharedMemOwnedAccess 管理 + - 支持数据分片和并发访问 + - 确保内存安全释放 + +3. **任务管理要求** + - 并发控制: + - 使用信号量限制并发任务数 + - 支持任务取消和超时处理 + - 实现资源自动释放 + - 状态同步: + - 跟踪任务完成状态 + - 支持等待所有任务完成 + - 提供任务进度反馈 + +4. **性能优化准则** + - 预分配资源: + - 文件空间预分配 + - 内存缓冲区预分配 + - 任务队列容量预设 + - 并发调优: + - 根据系统资源调整并发度 + - 优化任务调度策略 + - 减少数据复制开销 + +## 10. 构建规则 + +### 10.1 编译命令规范 + +#### 10.1.1 使用 sudo 编译 +- 项目编译前必须确保已设置默认工具链: + ```bash + rustup default stable + ``` + +- 项目编译必须使用 sudo 权限: + ```bash + sudo -E $HOME/.cargo/bin/cargo build + ``` + +#### 10.1.2 使用场景 +1. 首次编译项目 +2. 依赖更新后的完整编译 +3. 涉及系统级权限的功能修改 + +#### 10.1.3 安全注意事项 +1. 确保使用 sudo 的必要性: + - 仅在确实需要系统权限时使用 + - 优先考虑其他解决方案 + +2. 权限管理: + - 确保开发者具有必要的 sudo 权限 + - 遵循最小权限原则 + - 避免在非必要情况下使用 sudo + +3. 环境一致性: + - 保持开发环境权限配置一致 + - 记录所有需要 sudo 权限的依赖 + - 在文档中说明使用 sudo 的原因 + +4. 编译环境检查: + - 确保 rustup 工具链已正确安装 + - 确保已设置默认工具链:`rustup default stable` + - 检查 cargo 路径是否正确 + +### 8.3 处理方逻辑 + +1. **并发处理**: + - 使用工作池处理批量请求 + - 控制并发度 + - 实现公平调度 + +2. **资源管理**: + - 内存使用限制 + - 连接数限制 + - CPU 使用限制 + +3. **监控和日志**: + - 记录处理时间 + - 记录成功/失败率 + - 记录资源使用情况 + +### 8.4 最佳实践 + +1. **批量大小**: + - 建议单批次处理 100-1000 个数据项 + - 根据数据大小动态调整 + +2. **超时设置**: + - 基础超时:30秒 + - 根据批量大小线性增加 + - 最大超时:120秒 + +3. **错误处理**: + - 提供详细的错误信息 + - 支持部分成功的情况 + - 实现幂等性 + +4. **性能考虑**: + - 使用异步处理 + - 实现批量压缩 + - 考虑网络带宽限制 + + - 把规则视为思维框架而不是外部约束 + - 养成先检查当前上下文的习惯 + - 避免在已有信息的情况下去外部搜索 +- 关注本质: + - 理解问题的根本原因比立即解决问题更重要 + - 分析失误的思维模式而不是简单记住正确操作 + - 把经验转化为思维方式而不是操作步骤 diff --git a/.cursorrules copy b/.cursorrules copy deleted file mode 100644 index 3c0bb19..0000000 --- a/.cursorrules copy +++ /dev/null @@ -1,977 +0,0 @@ - - - -*/ -# Waverless 项目关键设计笔记 - -## 1. 函数执行上下文设计 - -### 1.1 基础结构 -- `FnExeCtx`: 私有的基础结构体,包含函数执行的基本信息 - ```rust - struct FnExeCtx { - pub app: String, - pub app_type: AppType, - pub func: String, - pub func_meta: FnMeta, - pub req_id: ReqId, - pub event_ctx: EventCtx, - pub res: Option, - pub sub_waiters: Vec>, - _dummy_private: (), - } - ``` - -### 1.2 公开特化类型 -- `FnExeCtxAsync` 和 `FnExeCtxSync`: - - 异步执行上下文支持 Jar、Wasm、Native 类型,包含子任务支持和完整的性能监控和日志。 - - 同步执行上下文仅支持 Native 类型,不支持子任务,包含基本的性能监控和日志。 - -### 1.3 类型安全 -- `FnExeCtxAsyncAllowedType` 和 `FnExeCtxSyncAllowedType`: - - 异步允许的类型 (Jar, Wasm, Native) - - 同步允许的类型 (仅 Native) - - 通过 `TryFrom` 在编译时强制类型安全 - -## 2. 实例管理设计 - -### 2.1 实例类型与管理器 -- `Instance` 和 `InstanceManager`: - - `Instance` 包含 Owned、Shared 和 Native 类型。 - - `InstanceManager` 管理应用实例和运行时函数上下文。 - ```rust - pub enum Instance { - Owned(OwnedInstance), - Shared(SharedInstance), - Native(NativeAppInstance), - } - - pub struct InstanceManager { - pub app_instances: SkipMap, - pub instance_running_function: DashMap, - } - ``` - -### 2.2 运行时函数上下文 -- `UnsafeFunctionCtx`: - - 包含 Sync 和 Async 类型,分别对应 `FnExeCtxSync` 和 `FnExeCtxAsync`。 - -## 3. 关键修改记录 - -### 3.1 同步/异步执行流程优化与错误处理增强 -- 简化 `finish_using`,移除不必要的异步版本,统一使用同步实现。 -- 添加同步版本的 `load_instance_sync`,仅支持 Native 类型。 -- 优化 `execute_sync` 中的异步调用处理,统一性能监控和日志记录格式。 -- 添加 `UnsupportedAppType` 错误类型,完善同步执行时的类型检查。 - -## 4. 待办事项 -- [x] 考虑添加同步版本的 `load_instance` -- [ ] 优化 `execute_sync` 中的异步-同步转换 -- [ ] 完善错误处理和日志记录 - -## 5. 核心设计原则 - -### 5.1 基础原则与 View 模式设计规则 -- 同步/异步分离,类型安全,性能监控,资源管理。 -- View 生成: - - View 结构体和 `LogicalModule` trait 的实现由宏生成。 - - 只需实现 `inner_new` 函数,使用 `logical_module_view_impl!` 生成访问函数。 - - 每个需要访问的模块都需要单独的 impl 宏调用。 - -### 5.2 去掉 #[derive(LogicalModule)] 的原因和注意事项 -- 实现特定功能:根据需求在 `DataGeneralView` 中实现特定功能,检查冲突。 -- `inner` 字段的管理:由宏管理,不能直接操作,通过宏生成的接口使用。 -- 错误分析:去掉派生后,仔细分析和解决可能出现的错误。 - -## 6. msg_pack 消息封装 - -### 6.1 基本原则与实现示例 -- 使用 `msg_pack.rs` 中的宏实现 trait,使用 `define_msg_ids!` 管理消息类型。 -- 通过 `RPCReq` trait 定义请求-响应关系。 - ```rust - define_msg_ids!( - (proto::sche::BatchDataRequest, pack, { true }), - (proto::sche::BatchDataResponse, _pack, { true }) - ); - - impl RPCReq for proto::sche::BatchDataRequest { - type Resp = proto::sche::BatchDataResponse; - } - ``` - -### 6.2 最佳实践 -- 新增消息类型时:在 `define_msg_ids!` 中添加定义,实现 `RPCReq` trait。 -- 使用消息时:使用 `RPCCaller` 和 `RPCHandler`,遵循统一的错误处理。 - -## 7. Waverless 代码规范核心规则 - -### 7.0 最高优先级规则 -- 在没有经过明确允许的情况下,不要擅自开始操作 -- 必须等待用户明确指示后再进行修改 -- 在进行任何修改前,先提出修改方案并等待确认 -- 有明确指令的情况下,不要擅自做其他操作 -- 删除代码时必须说明: - - 被删除代码的原有功能和作用 - - 删除的具体原因 - - 删除可能带来的影响 -- 修改代码时必须: - - 先提出完整的修改方案 - - 说明每处修改的原因和影响 - - 等待用户确认后再执行 - - 严格按照确认的方案执行,不额外修改 - - 如需额外修改,必须重新提出方案并确认 -- 修改规则文件时必须: - - 确认文件名必须是 `.cursorrules` - - 确认文件以 "# Waverless 项目关键设计笔记" 开头 - - 确认包含完整的设计笔记结构 - - 确认包含所有规则章节(1-7) - - 修改前使用搜索工具确认是正确的规则文件 - - 修改前检查文件的完整内容 - - 修改前确认修改的具体位置 - - 只修改规则相关部分 - - 保持其他内容不变 - - 保持文档结构完整 -- 执行命令时必须: - - 先提出执行计划 - - 说明执行目的和预期结果 - - 等待用户确认后再执行 - - 记录执行结果和遇到的问题 - - 如遇问题,提出解决方案并等待确认 - - 例外情况: - 1. 编译命令(sudo -E $HOME/.cargo/bin/cargo build)可以直接执行,无需等待确认 - 2. 编译命令必须将输出重定向到 compilelog 文件 - 3. 编译命令执行后必须分析结果并更新 review.md - -- 编译验证规则: - - 当用户要求检查编译状态时: - 1. 必须立即执行实际的编译命令,无需等待确认 - 2. 禁止仅查看历史编译日志 - 3. 必须使用正确的编译命令:`sudo -E $HOME/.cargo/bin/cargo build 2>&1 | tee compilelog` - 4. 必须等待编译完成并分析结果 - 5. 必须将编译结果记录到 review.md 中 - - 编译执行前必须: - 1. 确认已经在 review.md 中记录了执行计划 - 2. 确认编译环境已经准备就绪 - 3. 确认使用了正确的编译命令和参数 - - 编译执行后必须: - 1. 分析编译输出中的每个错误和警告 - 2. 更新 review.md 中的任务状态 - 3. 如果发现新的错误,创建相应的任务记录 - - 禁止行为: - 1. 禁止在没有执行编译的情况下判断编译状态 - 2. 禁止仅根据历史记录回答编译相关问题 - 3. 禁止忽略编译警告 - 4. 禁止在编译失败时不更新任务状态 - -- 编译后问题处理规则: - 1. 每次编译完成后,如果发现新的问题: - - 必须先暂停当前操作 - - 立即在 review.md 中记录新问题 - - 对新问题进行完整的分析记录 - - 等待用户确认后再继续处理 - 2. 禁止在发现新问题后未经记录就直接处理 - 3. 禁止在未经用户确认的情况下处理新问题 - 4. 每个新问题必须包含: - - 与父问题的关系分析 - - 问题的具体表现和影响 - - 初步的解决方案建议 - - 预期的处理步骤 - 5. 违反以上规则的行为将被拒绝执行 - -- review.md 使用规则: - - 在执行任何操作前必须: - 1. 先检查 review.md 文件是否存在 - 2. 阅读完整的 review.md 内容 - 3. 理解当前任务的上下文和父问题 - 4. 在合适的位置添加新的任务记录 - - - 更新位置确定原则: - 1. 必须仔细分析当前对话正在处理的具体问题 - 2. 找到该问题在 review.md 中的对应位置 - 3. 将新内容添加到该问题的相关位置 - 4. 禁止简单地追加到文件末尾 - 5. 如果找不到明确的对应位置,必须先在对应任务描述下标记为 (working) 并询问用户确认 - 6. 对于正在计划或执行中的任务,必须标记为 (working);同一时间系统中只允许存在一个 (working) 状态的任务记录。如果发现多个 (working) 标记,必须暂停后续操作,并等待用户确认后再统一标记 - - - 任务记录必须遵循以下格式: - ```markdown - - 任务:[任务描述] - - 分析: - - 父问题相关性: - 1. 父问题:[引用具体的父问题] - 2. 相关性:[说明与父问题的关系] - 3. 必要性:[说明为什么需要解决] - 4. 优先级:[说明优先级和原因] - - - 当前问题: - 1. [具体问题点1] - 2. [具体问题点2] - ... - - - 修改计划: - 1. [具体步骤1] - 2. [具体步骤2] - ... - - - 执行记录: - - 已完成: - - [已完成的步骤1] - - [已完成的步骤2] - - - 遇到的问题: - - 问题1:[问题描述] - - 解决方案:[方案描述] - - 解决过程:[过程记录] - ``` - - - 任务状态管理: - 1. 新任务必须添加在未完成任务的最前面 - 2. 已完成任务必须标记为 (done) - 3. 已完成任务必须移到未完成任务后面 - 4. 子任务必须保持正确的缩进层级 - 5. 任务完成状态必须实时更新 - - - 强制执行要求: - 1. 禁止在未更新 review.md 的情况下执行任何操作 - 2. 禁止在未经确认的情况下修改已有任务记录 - 3. 禁止删除任何历史记录 - 4. 必须在每次操作前后更新执行记录 - 5. 必须在遇到问题时立即记录 - 6. 必须在解决问题后更新解决方案 - 7. 违反以上规则的操作将被拒绝执行 - -- 执行计划必须: - 1. 在执行任何操作前,必须先在 review.md 中记录执行计划 - 2. 执行计划必须包含: - - 任务描述和目标 - - 父问题相关性分析 - - 当前问题分析 - - 具体执行步骤 - - 预期结果 - - 可能的风险 - - 验证方法 - 3. 执行计划必须遵循 review.md 的格式要求: - - 新计划添加在未完成任务的最前面 - - 使用正确的缩进和层级 - - 包含完整的分析和计划部分 - 4. 执行过程必须: - - 严格按照计划执行 - - 实时记录执行结果 - - 遇到问题时立即记录 - - 完成后更新任务状态 - 5. 禁止在没有执行计划的情况下: - - 执行任何命令 - - 修改任何文件 - - 进行任何操作 - 6. 如需修改计划: - - 必须先记录原计划的问题 - - 提出新的计划 - - 等待确认后再继续 - -### 7.1 文档维护与代码组织原则 -- 文档压缩原则:保持无损压缩,合并重复内容,简化表述,重构文档结构。 -- 文档更新规则:确认信息完整性,保留技术细节,使用清晰结构展示信息。 -- 代码组织规则:宏生成的访问函数直接使用,非 pub 函数只在一个地方定义,View 负责核心实现,具体模块负责自己的功能,通过 View 访问其他模块。 - -### 7.2 代码修改原则 - -#### 7.2.1 问题解决原则 -- 仅解决当前 review 中关注的问题和遇到的子问题 -- 解决问题前必须先写出解决方案的规划: - 1. 分析问题的根本原因 - 2. 列出可能的解决方案 - 3. 评估每个方案的优缺点 - 4. 选择最优方案并说明原因 - 5. 列出具体的实施步骤 - 6. 考虑可能的风险和应对措施 - - -- 不随意删除或修改已有的正确实现 -- 不在多处实现同一功能 -- 保持代码结构清晰简单 -- 修改前先理解设计原则 - -#### 异步任务处理原则 -- 分析生命周期和所有权需求 -- 避免盲目克隆,只克隆必要数据 -- 考虑类型特征(如 P2PModule 的轻量级 Clone) -- 评估替代方案 - -```rust -// 反例:过度克隆 -let p2p = self.p2p().clone(); // 不必要,P2PModule 本身就是轻量级的 -let data_general = self.data_general().clone(); // 不必要,同上 - -// 正例:按需克隆 -let split_info = split.clone(); // 必要,因为来自临时变量的引用 -``` - -分析要点: -- 使用场景:确认异步任务中的实际需求 -- 类型特征:检查是否已实现轻量级 Clone -- 生命周期:特别关注临时变量引用 -- 替代方案:考虑其他实现方式 - -### 7.3 错误与正确示例 -- 错误示例:手动实现已有的宏生成函数,在两个地方都实现同一个函数,过度修改已有代码结构,有损压缩文档内容。 -- 正确示例:使用宏生成的访问函数,在合适的位置添加新功能,遵循已有的代码组织方式,保持文档的完整性和准确性。 - -### 7.4 异步任务变量处理规范 - -#### 1. 变量分析原则 -- 生命周期分析:确定变量在异步任务中的生存期 -- 所有权需求:判断是否需要克隆或移动所有权 -- 类型特征:考虑变量的类型特性(如 Clone、Send、'static 等) -- 数据共享:评估是否需要在多个任务间共享数据 - -#### 2. 克隆策略 -必须克隆的情况: -- 临时变量引用:`split_info.clone()`(来自迭代器) -- 多任务共享:`unique_id.clone()`(多个任务需要) -- 部分数据:`data_item.clone_split_range()`(只克隆需要的范围) - -不需要克隆的情况: -- 值类型复制:`version`(直接复制即可) -- 已实现 Copy:基本数据类型 -- 单一任务使用:不需要在多个任务间共享的数据 - -#### 3. View 模式使用规范 -基本原则: -- View 本身已经是完整引用:不需要额外的 view 字段 -- 异步任务中使用:`self.clone()` -- 模块访问:通过 view 直接访问其他模块 - -示例代码: -```rust -// 正确示例 -let view = self.clone(); // View 本身克隆 -let resp = view.data_general().rpc_call_write_once_data... - -// 错误示例 -let view = self.view.clone(); // 错误:不需要额外的 view 字段 -let data_general = self.data_general().clone(); // 错误:不需要单独克隆模块 -``` - -#### 4. 异步任务数据处理检查清单 -- [ ] 是否只克隆必要的数据? -- [ ] 临时变量是否正确处理? -- [ ] View 的使用是否符合规范? -- [ ] 是否避免了重复克隆? -- [ ] 数据共享策略是否合理? - -#### 5. 常见场景示例 - -1. 批量数据处理: -```rust -// 正确处理临时变量和部分数据 -let split_info = split_info.clone(); // 临时变量必须克隆 -let data_item = data_item.clone_split_range(range); // 只克隆需要的部分 -let view = self.clone(); // View 克隆用于异步任务 -``` - -2. 并发任务处理: -```rust -// 使用信号量和数据共享 -let semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT)); -let view = self.clone(); // 一次克隆,多处使用 -for node_id in nodes { - let permit = semaphore.clone(); - let view = view.clone(); // View 在任务间共享 - tokio::spawn(async move { ... }); -} -``` - -### 7.3 变量类型难分辨的情况 - -#### 7.3.1 Proto生成的Rust类型 -1. proto中的普通字段在Rust中的表现: - - proto中的 `string file_name_opt = 1` 生成的是普通 `String` 类型,而不是 `Option` - - proto中的 `bool is_dir_opt = 2` 生成的是普通 `bool` 类型,而不是 `Option` - - 字段名带 `_opt` 后缀不代表它在Rust中是 `Option` 类型 - -2. proto中的message嵌套在Rust中的表现: - - `DataItem` 中的 `oneof data_item_dispatch` 在Rust中是一个字段 - - 访问路径是: `data.data_item_dispatch` 而不是 `data.data.data_item_dispatch` - - `Option` 需要先 `unwrap()` 才能访问其内部字段 - -#### 7.3.2 容易混淆的类型转换 -1. proto生成的类型和标准库类型的关系: - - proto生成的 `String` 字段不能直接用 `unwrap_or_default()` - - proto生成的 `bool` 字段不能直接用 `unwrap_or()` - -### 7.5 思维方式原则 -- 思维优先于行动: - - 在开始任何操作前,先理解"为什么"而不是"怎么做" - - 确保完全理解当前上下文中的所有信息 - - 避免机械性思维和跳过思考的行为模式 - - 对于涉及代码逻辑的命令,必须先阅读和理解相关代码,再执行命令 - - 当需要复用或参考现有代码逻辑时,必须先在项目中查找并理解相关实现 - - 在理解代码时,需要关注: - - 代码的执行流程和依赖关系 - - 数据结构和状态管理方式 - - 错误处理和异常情况的处理方式 - -- 代码分析记录原则: - - 在修改任何代码之前,必须在 review.md 中记录完整的代码分析: - 1. 问题代码:截取导致问题的具体代码片段 - 2. 上下文代码:截取理解问题所需的相关代码 - 3. 问题成因:详细分析问题的具体原因 - 4. 修复方案:说明如何修复以及为什么这样修复 - 5. 修改验证:列出验证修改正确性的方法 - - 分析记录必须: - - 使用代码块格式展示代码 - - 保持代码片段的完整性和可读性 - - 确保分析逻辑清晰 - - 说明修改的影响范围 - -- 父问题相关性分析: - - 在开始分析任何问题之前,必须首先进行父问题相关性分析 - - 分析步骤: - 1. 确认当前问题的父问题是什么 - 2. 回溯父问题的执行计划和记录 - 3. 判断当前问题是否是父问题引起的 - 4. 确认解决当前问题是否必要且有助于解决父问题 - - 分析结果必须包含: - 1. 父问题的明确引用 - 2. 相关性的具体分析 - 3. 解决必要性说明 - 4. 优先级判断 - - 如果当前问题与父问题无关: - 1. 记录分析结果 - 2. 暂时搁置该问题 - 3. 继续专注于父问题的解决 - -- 内化规则: - - 把规则视为思维框架而不是外部约束 - - 养成先检查当前上下文的习惯 - - 避免在已有信息的情况下去外部搜索 -- 关注本质: - - 理解问题的根本原因比立即解决问题更重要 - - 分析失误的思维模式而不是简单记住正确操作 - - 把经验转化为思维方式而不是操作步骤 - -## 8. 代码评审与修改文档规则 - -### 8.1 修改计划与记录要求 -- 每次修改代码前: - 1. 必须查看项目根目录的 `review.md` 文件 - 2. 根据现有内容确定修改计划的位置和层级 - 3. 在对应位置添加修改计划 - 4. 使用 markdown 格式记录,保持层级结构清晰 - -### 8.2 文档结构规范 -- 所有修改记录必须使用以下简化的问题树结构: - ```markdown - - 任务/问题:xxxx - - 分析:xxxx - - 计划任务1:xxxx - 新问题1:xxxx - - 分析:xxxx - - 计划任务3:xxxx - 已完成 - - - 计划任务2:xxxx - 已完成 - ``` - -- 结构规则: - 1. 父节点必须是具体的任务或问题描述 - 2. 第一个子节点必须是对问题的分析 - 3. 后续子节点是具体的计划任务 - 4. 每个计划任务下可以包含新的问题,遵循相同的结构 - 5. 已完成的任务标记为"已完成" - 6. 保持缩进层级清晰 - -- 示例说明: - ```markdown - - 任务:修复类型转换错误 - - 分析:当前代码在类型转换时未考虑空值情况 - - 计划任务1:添加空值检查 - 新问题:如何处理空值转换失败 - - 分析:需要在转换失败时提供默认值 - - 计划任务:实现 Option 转换 - 已完成 - - - 计划任务2:添加单元测试 - 已完成 - ``` - -### 8.3 记录要求 -1. 修改计划必须包含: - - 修改目的 - - 预期效果 - - 可能的风险 - - 具体步骤 - -2. 修改过程必须记录: - - 实际执行的步骤 - - 遇到的每个问题 - - 解决方案和结果 - -3. 问题记录必须包含: - - 问题的具体表现 - - 问题的可能原因 - - 尝试的解决方案 - - 最终的解决方案 - - 预防措施(如果适用) - -### 8.4 维护原则 -- 保持文档的实时更新 -- 确保问题树结构清晰 -- 定期回顾和整理文档 -- 记录经验教训和最佳实践 - -### 8.5 任务识别规则 - -#### 8.5.1 任务状态判断 -1. 完成状态标记: - - 已完成任务必须标记为 `(done)` - - 未标记 `(done)` 的任务视为未完成 - - 不使用其他状态标记 - -2. 任务顺序规则: - - 文档开头说明:`(顺序:新的在前面;先解决就的未完成的;完成的有标注;问题可能存在子问题)` - - 新任务添加到未完成任务的最前面 - - 已完成任务移到未完成任务的后面 - - 子任务跟随父任务,保持缩进层级 - -3. 最老未完成任务识别: - - 从上到下扫描所有顶级任务 - - 跳过带有 `(done)` 标记的任务 - - 第一个不带 `(done)` 标记的任务即为最老未完成任务 - - 子任务不影响父任务的完成状态判断 - -4. 任务优先级: - - 未完成任务按出现顺序表示优先级(越靠后优先级越高) - - 子任务优先级高于同级后续任务 - - 阻塞性问题优先级最高 - -#### 8.5.2 任务解析检查清单 -在识别和处理任务时,必须检查: -- [ ] 任务是否有 `(done)` 标记 -- [ ] 任务是否为顶级任务 -- [ ] 是否有未完成的子任务 -- [ ] 任务的位置是否符合顺序规则 -- [ ] 是否存在阻塞性问题 - -## 9. 批量数据接口设计 - -### 9.1 BatchTransfer 设计规范 - -#### 9.1.1 组件职责定义 - -1. **数据结构职责划分** - - BatchTransfer(单个传输任务管理器)必须: - - 维护单个传输任务的完整状态(unique_id, version, block_type, total_blocks) - - 使用 DashMap 存储接收到的数据块,确保并发安全 - - 通过 Option 管理完成状态通知 - - 负责数据块的接收、验证和重组 - - - BatchManager(全局传输任务管理器)必须: - - 使用 DashMap 维护所有进行中的传输任务 - - 使用原子计数器生成唯一的请求序列号 - - 负责传输任务的创建、数据块处理和生命周期管理 - -2. **函数职责要求** - - call_batch_data(发送端)必须: - - 使用固定大小(1MB)进行数据分块 - - 通过 BatchManager 创建传输任务 - - 负责数据块的发送 - - 等待传输完成通知 - - - handle_block(接收端)必须: - - 接收并验证单个数据块 - - 更新传输状态 - - 在接收完所有块时触发完成处理 - - - complete(完成处理)必须: - - 校验所有数据块的完整性 - - 根据类型(内存/文件)重组数据 - - 发送完成通知 - -#### 9.1.2 数据流转规范 - -1. **发送流程要求** - - 必须按照以下顺序执行: - 1. 接收原始数据并验证 - 2. 计算分块策略 - 3. 创建传输任务 - 4. 按序发送数据块 - -2. **接收流程要求** - - 必须按照以下顺序处理: - 1. 接收数据块并验证 - 2. 存储到对应的 BatchTransfer - 3. 检查完整性 - 4. 触发完成处理 - 5. 通知发送端 - -#### 9.1.3 错误处理规范 - -1. **组件错误处理职责** - - BatchTransfer 必须处理: - - 数据块完整性验证错误 - - 数据重组过程错误 - - - BatchManager 必须处理: - - 传输任务存在性检查错误 - - 并发访问保护错误 - - - 调用方必须处理: - - 网络传输错误 - - 超时错误 - -2. **错误恢复策略** - - 必须支持以下错误恢复机制: - - 单个数据块的重试 - - 传输任务的取消 - - 资源的正确释放 - -#### 9.1.4 资源管理规范 - -1. **内存管理** - - 必须预分配适当的缓冲区大小 - - 必须及时释放不再需要的内存 - - 必须控制并发数据块的最大数量 - -2. **文件管理** - - 必须使用唯一的临时文件名 - - 必须在完成后清理临时文件 - - 必须正确处理文件权限 - -3. **并发控制** - - 必须使用 DashMap 确保并发安全 - - 必须使用原子操作处理计数器 - - 必须正确管理 channel 资源 - -### 9.2 批量写入实现 - -#### 9.2.1 总体流程 - -1. **数据切分** - - 内存数据按 1MB 切块 - - 文件数据按 4MB 切块 - - 计算总块数和最后一块大小 - -2. **任务池初始化** - - 创建 4 个传输任务槽位 - - 每个任务负责一个数据块的传输 - - 任务完成后自动释放槽位 - -3. **数据块获取** - - 空闲任务会请求新的数据块 - - 最多预取 8 个块 - - 超过限制则等待其他块处理完成 - -4. **传输过程** - - 任务获取到数据块后开始传输 - - 每个请求包含块索引和数据类型 - - 单个请求超时时间为 30 秒 - -5. **完成处理** - - 所有块传输完成后结束 - - 失败的块会重试最多 3 次 - - 重试间隔为 1 秒 - -#### 9.2.2 接收方处理 - -1. **数据管理** - - 复用 get_data 的文件和内存管理逻辑 - - 文件使用 FileManager 管理可变文件 - - 内存使用 MemoryManager 管理内存块 - -2. **并行写入** - - 每个数据块作为独立的写入任务 - - 文件写入使用 seek + write 定位写入 - - 内存写入使用偏移量计算地址 - -3. **并发控制** - - 使用 RwLock 保护共享资源 - - 文件操作使用 async 文件 I/O - - 内存操作使用原子操作保证并发安全 - -4. **状态管理** - - 记录每个块的写入状态 - - 支持断点续传和重试 - - 完成后更新元数据 - ``` - -3. **接收方处理** - ```rust - struct BatchDataWriter { - // 文件缓存,使用 unique_id 作为 key - file_cache: HashMap, BatchFileCache>, - // 内存缓存,使用 unique_id 作为 key - memory_cache: HashMap, BatchMemoryCache>, - } - - impl BatchDataWriter { - async fn handle_request(&mut self, req: BatchDataRequest) -> BatchDataResponse { - let cache = match req.block_type { - DataBlockType::Memory => &mut self.memory_cache, - DataBlockType::File => &mut self.file_cache, - }; - - // 获取或创建缓存 - let block_cache = cache.entry(req.unique_id.clone()) - .or_insert_with(|| self.create_cache(req.block_type)); - - // 写入数据块 - match block_cache.write_block(req.block_index, req.data).await { - Ok(()) => BatchDataResponse { - request_id: req.request_id, - success: true, - error_message: String::new(), - version: req.version, - }, - Err(e) => BatchDataResponse { - request_id: req.request_id, - success: false, - error_message: e.to_string(), - version: req.version, - }, - } - } - } - ``` - -#### 9.2.2 缓存管理 - -1. **文件缓存** - ```rust - struct BatchFileCache { - path: PathBuf, // 临时文件路径 - file: File, // 文件句柄 - received_blocks: HashSet, // 已接收的块 - } - - impl BatchFileCache { - async fn write_block(&mut self, index: u32, data: Vec) -> Result<()> { - // 记录块并写入文件 - self.received_blocks.insert(index); - self.file.seek(SeekFrom::Start((index as u64) * BLOCK_SIZE))?; - self.file.write_all(&data)?; - Ok(()) - } - } - ``` - -2. **内存缓存** - ```rust - struct BatchMemoryCache { - blocks: HashMap>, // 块索引 -> 数据 - total_size: usize, // 总大小 - } - - impl BatchMemoryCache { - async fn write_block(&mut self, index: u32, data: Vec) -> Result<()> { - // 直接存储到内存 - self.blocks.insert(index, data); - Ok(()) - } - } - ``` - -#### 9.2.3 注意事项 - -1. **并发控制** - - 使用 MAX_CONCURRENT_TASKS 控制带宽使用 - - 通过 MAX_PENDING_BLOCKS 实现背压控制 - - 任务完成后及时释放资源 - -2. **内存管理** - - 预取块数量不超过 MAX_PENDING_BLOCKS - - 使用 Arc<[u8]> 避免数据复制 - - 大文件优先使用文件缓存 - -3. **错误处理** - - 记录失败的块以便重试 - - 最多重试 MAX_RETRIES 次 - - 重试间隔为 RETRY_DELAY_MS - - 单个任务超过 TASK_TIMEOUT_MS 自动取消 - -4. **性能优化** - - 使用异步 I/O 提高并发性 - - 任务空闲时自动获取新块 - - 支持乱序处理和断点续传 - -5. **监控和调试** - - 记录每个块的处理状态 - - 统计传输速率和成功率 - - 支持取消整个传输任务 - -### 9.3 请求方逻辑 - -1. **请求预处理**: - - 生成唯一的 request_id - - 验证数据项数量不超过 max_batch_size - - 设置适当的超时时间 - -### 9.3 并行写入实现规范 - -#### 9.3.1 WriteSplitDataTaskGroup 设计模式 -1. **基础结构设计** - ```rust - enum WriteSplitDataTaskGroup { - ToFile { - file_path: PathBuf, - tasks: Vec>>, - }, - ToMem { - shared_mem: SharedMemHolder, - tasks: Vec>>, - }, - } - ``` - -2. **职责划分** - - 任务组管理: - - 创建和初始化写入任务 - - 跟踪任务状态和完成情况 - - 提供统一的任务管理接口 - - 数据写入: - - 文件写入使用 FileExt::write_at - - 内存写入使用 SharedMemOwnedAccess - - 支持并发安全的数据访问 - -3. **并发控制要求** - - 文件写入: - - 使用 tokio::task::spawn_blocking 处理 I/O - - 通过文件偏移确保并发安全 - - 每个任务独占写入区域 - - 内存写入: - - 使用 SharedMemOwnedAccess 保证访问安全 - - 通过 Range 隔离数据区域 - - Arc 管理共享内存生命周期 - -4. **错误处理规范** - - 数据验证: - - 检查数据块类型匹配 - - 验证数据长度一致性 - - 确保写入位置正确 - - 错误传播: - - 使用 Result 类型传递错误 - - 支持任务级别的错误处理 - - 实现错误重试机制 - -#### 9.3.2 复用规范 -1. **接口设计要求** - - 提供统一的数据写入接口 - - 支持文件和内存两种模式 - - 保持与现有实现兼容 - -2. **数据管理规范** - - 文件数据: - - 使用文件偏移管理数据位置 - - 支持并发写入和随机访问 - - 实现临时文件清理 - - 内存数据: - - 使用 SharedMemOwnedAccess 管理 - - 支持数据分片和并发访问 - - 确保内存安全释放 - -3. **任务管理要求** - - 并发控制: - - 使用信号量限制并发任务数 - - 支持任务取消和超时处理 - - 实现资源自动释放 - - 状态同步: - - 跟踪任务完成状态 - - 支持等待所有任务完成 - - 提供任务进度反馈 - -4. **性能优化准则** - - 预分配资源: - - 文件空间预分配 - - 内存缓冲区预分配 - - 任务队列容量预设 - - 并发调优: - - 根据系统资源调整并发度 - - 优化任务调度策略 - - 减少数据复制开销 - -## 10. 构建规则 - -### 10.1 编译命令规范 - -#### 10.1.1 使用 sudo 编译 -- 项目编译前必须确保已设置默认工具链: - ```bash - rustup default stable - ``` - -- 项目编译必须使用 sudo 权限: - ```bash - sudo -E $HOME/.cargo/bin/cargo build - ``` - -#### 10.1.2 使用场景 -1. 首次编译项目 -2. 依赖更新后的完整编译 -3. 涉及系统级权限的功能修改 - -#### 10.1.3 安全注意事项 -1. 确保使用 sudo 的必要性: - - 仅在确实需要系统权限时使用 - - 优先考虑其他解决方案 - -2. 权限管理: - - 确保开发者具有必要的 sudo 权限 - - 遵循最小权限原则 - - 避免在非必要情况下使用 sudo - -3. 环境一致性: - - 保持开发环境权限配置一致 - - 记录所有需要 sudo 权限的依赖 - - 在文档中说明使用 sudo 的原因 - -4. 编译环境检查: - - 确保 rustup 工具链已正确安装 - - 确保已设置默认工具链:`rustup default stable` - - 检查 cargo 路径是否正确 - -### 8.3 处理方逻辑 - -1. **并发处理**: - - 使用工作池处理批量请求 - - 控制并发度 - - 实现公平调度 - -2. **资源管理**: - - 内存使用限制 - - 连接数限制 - - CPU 使用限制 - -3. **监控和日志**: - - 记录处理时间 - - 记录成功/失败率 - - 记录资源使用情况 - -### 8.4 最佳实践 - -1. **批量大小**: - - 建议单批次处理 100-1000 个数据项 - - 根据数据大小动态调整 - -2. **超时设置**: - - 基础超时:30秒 - - 根据批量大小线性增加 - - 最大超时:120秒 - -3. **错误处理**: - - 提供详细的错误信息 - - 支持部分成功的情况 - - 实现幂等性 - -4. **性能考虑**: - - 使用异步处理 - - 实现批量压缩 - - 考虑网络带宽限制 - - - 把规则视为思维框架而不是外部约束 - - 养成先检查当前上下文的习惯 - - 避免在已有信息的情况下去外部搜索 -- 关注本质: - - 理解问题的根本原因比立即解决问题更重要 - - 分析失误的思维模式而不是简单记住正确操作 - - 把经验转化为思维方式而不是操作步骤 diff --git a/compilelog b/compilelog index f071912..73445c7 100644 --- a/compilelog +++ b/compilelog @@ -1,7 +1,6 @@ warning: profiles for the non root package will be ignored, specify profiles at the workspace root: package: /home/nature/padev/waverless/src/main/Cargo.toml workspace: /home/nature/padev/waverless/Cargo.toml - Compiling wasm_serverless v0.1.0 (/home/nature/padev/waverless/src/main) warning: function `path_is_option` is never used --> ws_derive/src/lib.rs:21:4 | @@ -11,6 +10,7 @@ warning: function `path_is_option` is never used = note: `#[warn(dead_code)]` on by default warning: `ws_derive` (lib) generated 1 warning + Compiling wasm_serverless v0.1.0 (/home/nature/padev/waverless/src/main) warning: unused import: `crate::general::app::m_executor::FnExeCtxAsync` --> src/main/src/general/app/app_owned/wasm_host_funcs/result.rs:2:5 | @@ -79,6 +79,12 @@ warning: unused import: `tokio::io::AsyncWriteExt` 31 | use tokio::io::AsyncWriteExt; | ^^^^^^^^^^^^^^^^^^^^^^^^ +warning: unused import: `crate::general::data::m_data_general::dataitem::WantIdxIter` + --> src/main/src/general/data/m_data_general/mod.rs:6:5 + | +6 | use crate::general::data::m_data_general::dataitem::WantIdxIter; + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + warning: unused imports: `DataMetaGetRequest` and `DataVersionScheduleRequest` --> src/main/src/general/data/m_data_general/mod.rs:16:29 | @@ -97,18 +103,18 @@ warning: unused import: `enum_as_inner::EnumAsInner` 36 | use enum_as_inner::EnumAsInner; | ^^^^^^^^^^^^^^^^^^^^^^^^^^ -warning: unused import: `dashmap::DashMap` - --> src/main/src/general/data/m_data_general/mod.rs:38:5 - | -38 | use dashmap::DashMap; - | ^^^^^^^^^^^^^^^^ - warning: unused import: `std::ops::Range` --> src/main/src/general/data/m_data_general/mod.rs:40:5 | 40 | use std::ops::Range; | ^^^^^^^^^^^^^^^ +warning: unused imports: `AtomicU32` and `Ordering` + --> src/main/src/general/data/m_data_general/mod.rs:45:20 + | +45 | sync::atomic::{AtomicU32, Ordering}, + | ^^^^^^^^^ ^^^^^^^^ + warning: unused import: `std::future::Future` --> src/main/src/general/data/m_data_general/mod.rs:51:5 | @@ -243,51 +249,176 @@ warning: unused import: `std::time::Duration` 6 | use std::time::Duration; | ^^^^^^^^^^^^^^^^^^^ -error: fields `version`, `block_type`, and `total_blocks` are never read - --> src/main/src/general/data/m_data_general/batch.rs:52:9 +error: fields `batch_manager` and `batch_transfers` are never read + --> src/main/src/general/data/m_data_general/mod.rs:96:5 + | +94 | pub struct DataGeneral { + | ----------- fields in this struct +95 | view: DataGeneralView, +96 | batch_manager: Arc, + | ^^^^^^^^^^^^^ +... +110 | batch_transfers: DashMap)>, // 修改类型为 (unique_id -> (version, data)) + | ^^^^^^^^^^^^^^^ + | +note: the lint level is defined here + --> src/main/src/main.rs:7:5 + | +7 | dead_code, + | ^^^^^^^^^ + +error: function `flush_the_data` is never used + --> src/main/src/general/data/m_data_general/mod.rs:1500:4 + | +1500 | fn flush_the_data( + | ^^^^^^^^^^^^^^ + +error: enum `WantIdxIter` is never used + --> src/main/src/general/data/m_data_general/dataitem.rs:21:17 + | +21 | pub(super) enum WantIdxIter<'a> { + | ^^^^^^^^^^^ + +error: associated function `new` is never used + --> src/main/src/general/data/m_data_general/dataitem.rs:37:19 + | +36 | impl<'a> WantIdxIter<'a> { + | ------------------------ associated function in this implementation +37 | pub(super) fn new(ty: &'a GetOrDelDataArgType, itemcnt: DataItemIdx) -> Self { + | ^^^ + +error: multiple fields are never read + --> src/main/src/general/data/m_data_general/batch.rs:51:9 | 50 | pub(super) struct BatchTransfer { | ------------- fields in this struct 51 | pub unique_id: Vec, + | ^^^^^^^^^ 52 | pub version: u64, | ^^^^^^^ 53 | pub block_type: proto::BatchDataBlockType, | ^^^^^^^^^^ 54 | pub total_blocks: u32, | ^^^^^^^^^^^^ - | -note: the lint level is defined here - --> src/main/src/main.rs:7:5 - | -7 | dead_code, - | ^^^^^^^^^ - -error: method `add_block` is never used - --> src/main/src/general/data/m_data_general/batch.rs:104:18 +55 | // 使用 channel 进行数据传输 +56 | data_sender: mpsc::Sender>, + | ^^^^^^^^^^^ +57 | // 写入任务 +58 | write_task: JoinHandle>, + | ^^^^^^^^^^ +59 | // 完成通知 channel +60 | pub tx: Option>>, + | ^^ + +error: associated items `new`, `add_block`, `complete`, and `calculate_splits` are never used + --> src/main/src/general/data/m_data_general/batch.rs:64:18 | 63 | impl BatchTransfer { - | ------------------ method in this implementation + | ------------------ associated items in this implementation +64 | pub async fn new( + | ^^^ ... 104 | pub async fn add_block(&self, index: u32, data: Vec) -> WSResult { | ^^^^^^^^^ +... +121 | pub async fn complete(mut self) -> WSResult<()> { + | ^^^^^^^^ +... +154 | fn calculate_splits(total_size: usize, block_size: usize) -> Vec> { + | ^^^^^^^^^^^^^^^^ -error: method `handle_block` is never used - --> src/main/src/general/data/m_data_general/batch.rs:211:18 +error: fields `transfers` and `sequence` are never read + --> src/main/src/general/data/m_data_general/batch.rs:168:5 + | +167 | pub(super) struct BatchManager { + | ------------ fields in this struct +168 | transfers: DashMap, + | ^^^^^^^^^ +169 | sequence: AtomicU64, + | ^^^^^^^^ + +error: methods `next_sequence`, `create_transfer`, and `handle_block` are never used + --> src/main/src/general/data/m_data_general/batch.rs:180:12 | -173 | impl BatchManager { - | ----------------- method in this implementation +172 | impl BatchManager { + | ----------------- methods in this implementation ... -211 | pub async fn handle_block( +180 | pub fn next_sequence(&self) -> u64 { + | ^^^^^^^^^^^^^ +... +184 | pub async fn create_transfer( + | ^^^^^^^^^^^^^^^ +... +210 | pub async fn handle_block( | ^^^^^^^^^^^^ error: method `call_batch_data` is never used - --> src/main/src/general/data/m_data_general/batch.rs:238:25 + --> src/main/src/general/data/m_data_general/batch.rs:237:25 | -236 | impl DataGeneral { +235 | impl DataGeneral { | ---------------- method in this implementation -237 | /// 发起批量数据传输 -238 | pub(super) async fn call_batch_data( +236 | /// 发起批量数据传输 +237 | pub(super) async fn call_batch_data( | ^^^^^^^^^^^^^^^ -warning: `wasm_serverless` (bin "wasm_serverless") generated 38 warnings -error: could not compile `wasm_serverless` (bin "wasm_serverless") due to 4 previous errors; 38 warnings emitted +error: unused result of type `std::option::Option` + --> src/main/src/general/data/m_data_general/mod.rs:308:21 + | +308 | data_map.insert(idx, resp.data[0].clone()); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | +note: the lint level is defined here + --> src/main/src/main.rs:9:5 + | +9 | unused_results, + | ^^^^^^^^^^^^^^ + +error: unused result of type `std::option::Option` + --> src/main/src/general/data/m_data_general/mod.rs:337:21 + | +337 | data_map.insert(idx, resp.data[0].clone()); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: unused result of type `std::option::Option` + --> src/main/src/general/data/m_data_general/mod.rs:364:17 + | +364 | data_map.insert(idx, resp.data[0].clone()); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: unused result of type `std::option::Option` + --> src/main/src/general/data/m_data_general/mod.rs:391:21 + | +391 | data_map.insert(idx, resp.data[0].clone()); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: unused result of type `WriteOneDataResponse` + --> src/main/src/general/data/m_data_general/mod.rs:561:17 + | +561 | task.await??; + | ^^^^^^^^^^^^^ + +error: unused `Result` that must be used + --> src/main/src/general/data/m_data_general/mod.rs:1451:25 + | +1451 | view.data_general().rpc_handle_batch_data(responsor, req).await; + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | + = note: this `Result` may be an `Err` variant, which should be handled +note: the lint level is defined here + --> src/main/src/main.rs:12:5 + | +12 | unused_must_use, + | ^^^^^^^^^^^^^^^ +help: use `let _ = ...` to ignore the resulting value + | +1451 | let _ = view.data_general().rpc_handle_batch_data(responsor, req).await; + | +++++++ + +error: unused result of type `std::option::Option` + --> src/main/src/general/data/m_data_general/batch.rs:206:9 + | +206 | self.transfers.insert(request_id.clone(), transfer); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +warning: `wasm_serverless` (bin "wasm_serverless") generated 39 warnings +error: could not compile `wasm_serverless` (bin "wasm_serverless") due to 16 previous errors; 39 warnings emitted diff --git a/review.md b/review.md index 286bb17..46ae44e 100644 --- a/review.md +++ b/review.md @@ -1,435 +1,9 @@ (顺序:新的在前面;先解决就的未完成的;完成的有标注;问题可能存在子问题) -- 任务: error: method `call_batch_data` is never used - error: method `handle_block` is never used - error: method `add_block` is never used - error: fields `version`, `block_type`, and `total_blocks` are never read - --> src/main/src/general/data/m_data_general/batch.rs:52:9 - 分析一下,这些应该是batch的核心逻辑,为什么没有用起来?把batch组件间调用关系以及各自作用都分析到当前文档里 - - 修改计划: - - 修改目的: - - 分析 batch 组件中未使用的方法和字段的原因 - - 理清 batch 组件的调用关系和各部分作用 - - 确定是否需要保留这些未使用的代码 - - 预期效果: - - 完整理解 batch 组件的设计和实现 - - 明确各个组件的作用和调用关系 - - 为后续可能的代码优化提供依据 - - 可能的风险: - - 如果这些未使用的代码确实有其设计意图,贸然删除可能影响未来功能 - - 可能存在隐藏的依赖关系未被发现 - - 具体步骤: - 1. 分析 batch.rs 中的代码结构和设计文档 - 2. 检查相关调用代码 - 3. 确定各组件作用和关系 - 4. 记录分析结果 - - 修改过程: - - 代码分析: - 1. 组件结构分析: - - BatchTransfer:管理单个批量传输的状态 - - 主要字段: - - unique_id:传输标识 - - version、block_type、total_blocks:传输元数据(未使用) - - data_sender:数据传输 channel - - write_task:写入任务 - - tx:完成通知 channel - - BatchManager:管理所有进行中的批量传输 - - 提供 create_transfer 和 handle_block 等接口 - - DataGeneral:实际的数据传输实现 - - rpc_call_batch_data:RPC 调用接口 - - write_data_batch:实际使用的批量写入方法 - - 2. 调用关系分析: - - write_data_batch 是实际使用的入口 - - 通过 BatchManager.create_transfer 创建传输任务 - - 使用 rpc_call_batch_data 发送数据 - - 但 handle_block 和 add_block 确实未被使用 - - 3. 未使用代码分析: - - version、block_type、total_blocks:这些字段虽然在 BatchTransfer 中定义,但实际操作都在 DataGeneral 中完成 - - handle_block 和 add_block:这些方法可能是为了支持更细粒度的批量传输控制,但目前的实现采用了更简单的方式 - - * 数据写入流程 -``` - +------------------------+ - | 发起节点 | - | [DataGeneral] | - | - write_data() | - | 1. 准备DataItems | - | 2. 计算每个DataItem大小| - +------------------------+ - | - | DataVersionScheduleRequest - | - unique_id: 数据标识 - | - version: 版本号 - | - context: 调度上下文 - ↓ - +------------------------+ - | Master节点 | - | [DataMaster] | - | - schedule_data() | - | 1. 生成DataSetMeta | - | 2. 创建DataSplits | - | 3. 分配存储节点 | - +------------------------+ - | - | DataVersionScheduleResponse - | - version: 版本号 - | - split: 数据分片信息 - ↓ - +------------------------+ - | 发起节点 | - | [DataGeneral] | - | - flush_the_data() | - | (并发处理每个DataItem) | - +------------------------+ - | - +--------------------+--------------------+ - | | - ↓ ↓ - +-----------------------+ +-----------------------+ - | 主存储节点写入 | | 缓存节点写入 | - | [DataGeneral] | | [DataGeneral] | - | WriteOneDataRequest: | | BatchDataRequest: | - | - unique_id | | - request_id | - | - version | | - block_type | - | - data (DataItems) | | - block_index | - | - rpc_handle_write_one_data() | | - data | - | 并发处理每个Split | | - version | - | | | - write_data_batch() | - +-----------------------+ +-----------------------+ - / | \ / | \ - / | \ / | \ - Node1 Node2 NodeN Node1 Node2 NodeN - (SplitA)(SplitB)(SplitX) (DataItem)(DataItem)(DataItem) - \ | / \ | / - \ | / \ | / - \ | / \ | / - \|/ \|/ - | | - | 并行写入完成 | - +------------------+-------------------+ - | - ↓ - +------------------------+ - | 发起节点 | - | 1. 等待所有并行完成 | - | 2. 检查所有结果 | - | 3. 返回最终状态 | - +------------------------+ -``` - - * Batch 数据传输实现 (待优化版本) -``` - +------------------------+ - | 发起节点 | - | [DataGeneral] | - | - call_batch_data() | - | 1. 分割数据块(1MB) | - | 2. 创建有界任务池 | - | (建议并发数=3) | - +------------------------+ - | - | 并发发送数据块 - | (有界队列控制) - ↓ - +--------------------+--------------------+ - | | - ↓ ↓ - +-----------------------+ +-----------------------+ - | BatchDataRequest(1) | | BatchDataRequest(N) | - | - request_id | | - request_id | - | - block_type | | - block_type | - | - block_index: 0 | | - block_index: N | - | - data | | - data | - +-----------------------+ +-----------------------+ - | - | RPC 请求 - ↓ - +------------------------+ - | 目标节点 | - | [DataGeneral] | - | - rpc_handle_batch_data()| - | 1. 获取元信息 | - | 2. 创建WriteTaskGroup | - +------------------------+ - | - | 创建两个 channel - ↓ - +------------------------------------------------+ - | 接收方任务管理 | - | [BatchTransfer] | - | | - | (data_sender, data_receiver) ←→ 数据块传输 | - | (tx, rx) ←→ 完成通知 | - | | - | write_task → 异步写入任务 | - +------------------------------------------------+ - | - | 创建任务组 - ↓ - +------------------------------------------------+ - | 并发写入控制 | - | [WriteSplitDataTaskGroup] | - | | - | data_receiver ←←← 接收数据块 | - | ↓ | - | 并发任务池 | - | ↓ | - | 完成通知 →→→ tx | - +------------------------------------------------+ - | - | 完成回调 - ↓ - +------------------------+ - | 传输完成 | - | BatchDataResponse | - | - success: true | - | - version | - +------------------------+ -``` - -* 核心数据结构: - * DataItem: 单个数据项,可能被分片 - * DataSplit: 数据分片信息,包含偏移量和大小 - * DataSetMeta: 数据集元信息,包含版本号、分片信息和缓存模式 - - -- (done) 任务:将项目 main 中的 md 文档总结为 Obsidian Canvas - - 修改计划: - - 修改目的: - - 将分散在 main 目录中的 md 文档内容整理成可视化的知识图谱 - - 提高文档的可读性和关联性 - - 便于团队理解项目结构和设计思路 - - 预期效果: - - 生成一个清晰的项目知识图谱 - - 展示各个模块之间的关系 - - 突出重要的设计决策和实现细节 - - 可能的风险: - - 文档内容可能有遗漏 - - Canvas 布局可能不够直观 - - 具体步骤: - 1. 收集并阅读 main 目录下所有的 md 文档 - 2. 分析文档内容,提取关键信息 - 3. 设计 Canvas 布局结构 - 4. 创建 Canvas 文件并实现布局 - 5. 添加节点之间的关联关系 - 6. 检查和优化最终效果 - +- context提示 + 编译时应当输出到compilelog文件 -- (done) 任务:总结当前git未提交的变更 - - 分析: - - 主要变更文件: - 1. src/main/src/general/data/m_data_general/mod.rs - 2. src/main/src/result.rs - 3. .cursorrules - 4. wiki.md - - - 核心变更内容: - 1. 数据结构优化: - - 移除了未使用的 batch_transfers 字段 - - 保留并标记了 next_batch_id 方法为 #[allow(dead_code)] - - 添加了新的错误类型 WriteDataFailed - - 2. 批量写入逻辑优化: - - 简化了 write_data_batch 实现,移除了复杂的批处理逻辑 - - 使用现有的 call_batch_data 函数替代自定义实现 - - 改进了错误处理和日志记录 - - 3. 并行写入改进: - - 使用 WantIdxIter 优化迭代逻辑 - - 分离主节点和缓存节点的任务处理 - - 增强了错误处理机制 - - 4. 文档更新: - - 更新了 wiki.md 中的模块说明 - - 精简了 .cursorrules 文件内容 - - -- (done) 任务:完善 write_data 数据分片同时对接缓存节点的并行写入设计 - - 分析:当前需要在数据分片过程中,同时将数据通过两个不同的 RPC 调用分别发送到主存储节点和缓存节点。由于调用的 RPC 不同,需要在同一个数据块处理逻辑中并行启动两个任务,一个调用 rpc_call_batch_data,另一个调用缓存节点的 RPC(例如 rpc_call_cache_data)。两任务并行执行,最终收集各自结果,并综合判断整体成功情况。错误处理部分简化:记录错误日志,失败时返回提示信息,不做过细重试处理。 - - 修改计划: - 1. 在 call_batch_data(或相应写入数据逻辑)中,对每个数据块的处理循环增加两路并行任务: - - primary_task:调用现有的 rpc_call_batch_data 发送该块数据; - - cache_task:启动一个新的异步任务,调用缓存节点的 RPC 发送数据; - * 注意:cache_task 不应该只传输单个分片,而是负责传输整个 batch 数据。经过对 BatchManager 的分析,发现 BatchManager 可能自动并行内部任务,因此在外部调用时,对每个缓存节点只启动一个 task 来处理整个 batch 写入。 - 2. 使用 tokio::spawn 或 join_all 同时启动这两个任务,并等待它们完成。 - 3. 整合两个任务的返回结果。若任一任务返回失败,则记录错误日志并提示失败;否则认为整体写入成功。 - 4. 最终,整个写入流程将在原有数据分片基础上,增加了并行的缓存节点数据写入逻辑,保证数据在两边同时写入: - - 对于主数据分片写入任务:保持原有策略,每个分片分别创建一个独立的并行任务; - - 对于缓存节点写入任务:采用 batch 接口传输整块数据,每个缓存节点只启动一个 task 来处理整个 batch 数据。 - - 伪代码: - ```rust - // 主数据分片写入任务:每个分片启动一个独立的任务 - let mut primary_tasks = Vec::new(); - for (i, chunk) in data_bytes.chunks(block_size).enumerate() { - // 构造当前分片请求,保持现有逻辑不变 - let req = build_primary_request(chunk, i); - let primary_task = tokio::spawn(async move { - // 调用 rpc_call_batch_data 发送当前分片数据 - rpc_call_batch_data.call(..., req, ...).await - }); - primary_tasks.push(primary_task); - } - - // 缓存节点写入任务:每个缓存节点只启动一次任务,传输整个 batch 数据 - let mut cache_tasks = Vec::new(); - for cache_node in cache_nodes { - let cache_task = tokio::spawn(async move { - // 调用 rpc_call_cache_data 发送整个 batch 数据给该缓存节点 - rpc_call_cache_data.call(..., full_data, cache_node, ...).await - }); - cache_tasks.push(cache_task); - } - - // 等待所有任务完成 - let primary_results = futures::future::join_all(primary_tasks).await; - let cache_results = futures::future::join_all(cache_tasks).await; - - // 整合结果:如果任一 primary 或 cache 任务失败,则记录错误并返回整体失败;否则返回成功 - if primary_results.iter().any(|res| res.is_err()) || cache_results.iter().any(|res| res.is_err()) { - tracing::error!("数据写入失败"); - return Err(String::from("整体写入失败").into()); - } - ``` - 5. 新问题: - - 任务:field `batch_manager` is never read - error: method `next_batch_id` is never used - function `flush_the_data` is never used - enum `WantIdxIter` is never used - 这几个内容都应该和write data强相关,为什么都没有用到了 - - 分析: - - 父问题相关性: - 1. 父问题:完善 write_data 数据分片同时对接缓存节点的并行写入设计 - 2. 相关性:直接关系到数据写入的实现机制和优化 - - 问题分类:代码清理和优化问题 - - 问题原因: - 1. batch_manager 字段: - - 虽然在 call_batch_data 函数中使用,但 call_batch_data 本身在新的并行写入设计中未被调用 - - write_data 函数中对缓存节点的写入直接使用 write_data_batch,跳过了 batch_manager - - 这表明 batch_manager 和相关的批处理机制在新设计中被替代 - - review: 应该使用batch manager,其实现了流式加载内存或文件分片,避免一次性读出全部 - 2. next_batch_id 方法: - - 原本用于生成批处理 ID - - 在新的设计中,批处理 ID 生成逻辑已移至 write_data 函数内部 - - 使用 version_schedule_resp 中的 version 作为版本控制 - - review: next_batch_id 这个应该是 batch_manager 自己用的,需要保留;batch功能并不完全和write_data耦合 - 3. flush_the_data 函数: - - 原本用于单个数据项的写入刷新 - - 在新的并行写入设计中,使用 tokio::spawn 创建异步任务 - - 数据写入通过 primary_tasks 和 cache_tasks 两组并行任务处理 - - 使用 futures::future::join_all 等待任务完成,替代了显式的刷新操作 - - review: 这个函数确实不需要了 - 4. WantIdxIter 枚举: - - 原本用于数据索引的迭代控制 - - 在新设计中,使用 enumerate() 和 zip() 迭代处理数据项 - - 数据分片通过 split.splits.iter().enumerate() 处理 - - 缓存节点通过 cache_nodes.iter().enumerate() 处理 - - review:这个也应该加回来,用于遍历item idx - - - 计划: - 1. 改进 write_data_batch 函数: - - 修改目的: - - 使用 batch_manager 实现流式分片传输 - - 避免大文件一次性加载到内存 - - 具体改动: - 1. 移除直接的数据分片逻辑: - ```rust - // 移除这部分 - let total_size = data.data_sz_bytes(); - let total_batches = (total_size + batch_size - 1) / batch_size; - ``` - 2. 添加 batch_manager 创建传输任务: - ```rust - // 创建 channel 接收数据块 - let (tx, mut rx) = mpsc::channel(1); - - // 创建传输任务 - let request_id = self.batch_manager.create_transfer( - unique_id.clone(), - version, - block_type, - data.data_sz_bytes() as u32, - tx, - ).await?; - ``` - 3. 使用 call_batch_data 发送数据: - ```rust - // 使用现有的 call_batch_data 函数 - let response = self.call_batch_data( - node_id, - unique_id.clone(), - version, - data, - block_type, - ).await?; - ``` - - 2. 恢复 WantIdxIter 的使用: - - 修改目的: - - 使用专门的索引迭代器替代通用的 enumerate() - - 保持与数据分片的对应关系 - - 具体改动: - 1. 修改 write_data 函数中的遍历: - ```rust - // 替换这部分 - for (data_item_idx, (data_item, split)) in datas.iter().zip(splits.iter()).enumerate() - - // 改为 - let mut iter = WantIdxIter::new(datas.len()); - while let Some(data_item_idx) = iter.next() { - let data_item = &datas[data_item_idx]; - let split = &splits[data_item_idx]; - ``` - 2. 修改缓存节点处理: - ```rust - // 替换这部分 - for (cache_idx, &node_id) in cache_nodes.iter().enumerate() - - // 改为 - let mut cache_iter = WantIdxIter::new(cache_nodes.len()); - while let Some(cache_idx) = cache_iter.next() { - let node_id = cache_nodes[cache_idx]; - ``` - - -- (done) 任务:处理 error[E0425]: cannot find function `log_error` in this scope - - 修改计划: - - 修改目的: - - 修复编译错误,使用正确的错误处理方式 - - 确保错误处理符合项目规范 - - 预期效果: - - 编译通过 - - 错误处理更加规范和统一 - - 可能的风险: - - 错误处理方式的改变可能影响其他依赖此处错误处理的代码 - - 错误场景分析: - - 错误发生在并行写入数据时 - - 写入目标包括主存储节点和缓存节点 - - 当任何一个节点写入失败时,需要返回整体写入失败错误 - - - 具体步骤: - 1. 分析代码中的错误处理模式 - - 检查现有的 `WSError` 和 `WsDataError` 类型定义 - - 检查现有的错误处理模式 - - 确认需要新增 `WriteDataFailed` 错误类型 - 2. 创建数据写入相关的错误类型 - - 在 `WsDataError` 枚举中添加 `WriteDataFailed` 变体 - - 变体包含字段:`unique_id: Vec` 和 `message: String` - - 确保错误类型转换正确 - 3. 将 `log_error` 替换为 `tracing::error!` - - 确保错误日志信息准确完整 - - 保留原有的中文错误提示 - 4. 修改错误返回方式 - - 使用新创建的 `WsDataError::WriteDataFailed` - - 包含数据 ID 和错误信息 - 5. 编译验证修改 - - 检查编译错误和警告 - - -- 将本地meta获取函数换一个更直观的名字 - -- (done)任务:罗列compilelog中各种未使用问题(error, import类的 warning 不看),并逐个解决 +- 任务:罗列compilelog中各种未使用问题(error, import类的 warning 不看),并逐个解决 - 分析: 1. next_batch_id 方法未被使用,需确认是否有用途;如无用途,则删除或添加注释说明准备将来可能使用。 2. DataGeneral 结构体中的 batch_transfers 字段未被使用,需评估其在业务逻辑中的必要性;若无实际作用,则建议删除。 @@ -439,22 +13,13 @@ 2. 对于确认无用的项,直接删除;对于可能需要保留但目前未使用的项,添加 TODO 注释说明其预期用途; 3. 修改后重新编译,确保无额外问题。 - 执行记录: - - 开始处理未使用问题,目前处于初步整理阶段,待后续逐项跟进。 + - (working)开始处理未使用问题,目前处于初步整理阶段,待后续逐项跟进。 - 下一步:检查 next_batch_id 方法引用情况;如果确认未使用,则删除该方法或添加 TODO 注释。 - 检查结果:通过 grep 搜索,发现 next_batch_id 方法仅在其定义处出现,未被实际引用。建议删除该方法或添加 TODO 注释说明可能的预期用途。 - 检查结果:通过 grep 搜索发现,DataGeneral 结构体中的 batch_transfers 字段仅在其定义(行 109)和初始化(行 1414)处出现,未在后续代码中被引用。建议删除该字段,或如果有保留意图则添加 TODO 注释说明预期用途。 - 下一步:整理编译日志中其他未使用项,逐一确认其用途;对于确认无用的项,逐项删除或添加 TODO 注释。 - 整理结果:初步整理显示,除了上述 next_batch_id 和 batch_transfers 未使用问题外,其它警告多为未使用导入或辅助函数(如 path_is_option、FnExeCtxAsync、FnExeCtxBase 等),这些均非核心逻辑,暂时忽略;后续可根据需要进一步清理。 - 下一步:分析log中还有没有error - - 分析结果:当前 compilelog 中剩余的 error 主要包括: - - "fields `batch_manager` and `batch_transfers` are never read"。 - - "function `flush_the_data` is never used"。 - - "enum `WantIdxIter` is never used"。 - - "associated function `new` is never used"。 - - "methods `next_sequence`, `create_transfer`, and `handle_block` are never used"。 - - "method `call_batch_data` is never used"。 - - "unused result" 错误(如 Option、WriteOneDataResponse 和 unused Result)。 - - 下一步计划:逐项检查上述 error 信息,确认是否删除相应未使用的代码或补充必要的错误处理逻辑,然后重新编译验证修改是否有效。 - (done)任务:编译分析发现的问题 - 修改计划: @@ -1543,40 +1108,3 @@ - - - 执行记录: - 1. 在 .cursorrules 文件中的 7.2 代码修改原则章节添加新规则 - 2. 删除了 DataGeneralView 中的 get_or_del_datameta_from_master 代理方法 - 3. 更新了调用处代码,改为直接使用 data_general().get_or_del_datameta_from_master - 4. 所有修改已完成 - -- 任务:修复 unique_id 移动问题: - - 分析: - - 父问题相关性: - 1. 父问题:编译错误修复 - 2. 相关性:直接导致编译失败的问题 - 3. 必要性:必须解决以通过编译 - 4. 优先级:高,阻塞编译 - - - 当前问题: - 1. 在 batch.rs 中,unique_id 在异步任务中被移动后仍然尝试使用 - 2. 问题出现在 BatchTransfer::new 函数中 - 3. 涉及 tokio::spawn 创建的异步任务 - - - 修改计划: - 1. 在 BatchTransfer::new 中: - - 在创建异步任务前克隆 unique_id - - 使用克隆的版本传入异步任务 - - 保留原始 unique_id 用于其他用途 - - - 执行记录: - - 已完成: - - 在 BatchTransfer::new 中添加了 unique_id_for_task = unique_id.clone() - - 修改异步任务使用 unique_id_for_task 代替 unique_id.clone() - - - 下一步: - - 执行编译验证修改是否解决问题 - - 检查是否有其他相关的所有权问题 - - - diff --git a/src/main/src/general/data/m_data_general/README.md b/src/main/src/general/data/m_data_general/README.md deleted file mode 100644 index 0887dc7..0000000 --- a/src/main/src/general/data/m_data_general/README.md +++ /dev/null @@ -1,15 +0,0 @@ -# 数据管理模块文档 - -## 模块文档索引 - -- [批量传输系统](batch.md) -- [数据项处理](dataitem.md) -- [数据管理核心模块](mod.md) - -## 模块说明 - -本目录包含了数据管理模块的核心实现,主要包括: - -1. 批量传输系统 (batch.rs):处理大文件的高效传输 -2. 数据项处理 (dataitem.rs):管理数据分片和共享内存访问 -3. 数据管理核心 (mod.rs):提供数据读写和元数据管理 diff --git a/src/main/src/general/data/m_data_general/batch.md b/src/main/src/general/data/m_data_general/batch.md deleted file mode 100644 index 9f2e790dd58a5be8826c8d300cb6a63c1284b3ca..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2340 zcmeHIO;Z{{5Y2gi#a4Mq4x1m~G56eZ4p|YS6+%*g$rVsxF$g3m7{RZK3egg=R01r< zACv8w<>X&T&-4sgHCC!ThvQDqd#`)?JuJ&IW+HZSJ|0O-8m4KP>>>@r66`JVa`t+- z0i7)DHCZ+1xm|FwJfHJVTqsqc-&K{R&!bctoDQ$I*-p-1&hSiMB6&R*mRqdY!gEC8 ztEX(O!&cg?kYtlQ_83|3>Nt+e-q4dq` z9YcGI7dup{p~M9#2MPmlj=VupUPKJ9S7B!@{6LyVwez52!+2}Q?AMWaXl6bZPR_lH zsR4mg2Y28Ncl@6f0+zEKy_1&VU*e!r7OI> zi_<6Y>{nRY_goz1uKH};5)`(h(a@Bg4A07GOq$rSfANFm`=eZmzyoUI@57;4J2oAS z+`L0g1n&4JKiFwTiU`MXMIs^H05ZrKuxf{IED|;;l+_wN`Y!wiOC>O@ta-h|KQpp$ z?~DnPG^#P3x!m10AInaj#uHBraz8d z@$Oy^^Qd)nw9eKX;`^1#po9sNOJ1JuRewMZ1{(SYWwKNfst-ZCMUsMnSY%o>M)4tL6|)`gWbtusX zkQ|f=nU+vtvYj4WEl>$}D{N~~v*UWqhm#4?`b>E9Ii-m%P~jpTQktFRQy7Z=RUK0f zlMLGlExaE{hJl{*=iTQ6jc31nzufwCT_;2W}q|`4ELp|I8ut$ WSResult<()> { // 定义错误转换函数 let join_error = |e| WsDataError::BatchTransferError { @@ -204,7 +203,7 @@ impl BatchManager { tx, ).await?; - let _ = self.transfers.insert(request_id.clone(), transfer); + self.transfers.insert(request_id.clone(), transfer); Ok(request_id) } diff --git a/src/main/src/general/data/m_data_general/data.rs b/src/main/src/general/data/m_data_general/data.rs new file mode 100644 index 0000000..a27fce7 --- /dev/null +++ b/src/main/src/general/data/m_data_general/data.rs @@ -0,0 +1,37 @@ +/// Data Interface for Distributed Storage +/// +/// # Design Overview +/// The data interface provides a general-purpose solution for distributed data storage +/// and retrieval. It implements a shard-based approach that differs from the batch +/// interface in its core design: +/// +/// ## Data Interface +/// - Purpose: General-purpose data read/write operations +/// - Write Process: +/// * Data is sharded according to distribution strategy +/// * Shards are distributed to different nodes +/// * Each node stores its assigned shards +/// * Metadata is updated after all writes complete +/// - Read Process: +/// * Metadata is retrieved to locate shards +/// * Shards are collected from respective nodes +/// * Complete data is reassembled from shards +/// +/// ## Comparison with Batch Interface +/// While the batch interface (see batch.rs) focuses on efficient streaming transfer +/// from data holders, the data interface: +/// - Ensures data consistency across nodes +/// - Provides random access to data +/// - Supports complex distribution strategies +/// - Maintains complete metadata for all operations +/// +/// # Implementation Details +/// This interface implements: +/// - Distributed shard management +/// - Concurrent read/write operations +/// - Metadata synchronization +/// - Data consistency verification +/// +/// For streaming transfer functionality, see the batch.rs module. +use super::*; +// ... existing code ... \ No newline at end of file diff --git a/src/main/src/general/data/m_data_general/dataitem.md b/src/main/src/general/data/m_data_general/dataitem.md deleted file mode 100644 index fb9c124..0000000 --- a/src/main/src/general/data/m_data_general/dataitem.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -structs: - - WriteSplitDataTaskGroup: 管理数据分片写入任务组 - - SharedMemHolder: 共享内存数据访问管理 - - SharedMemOwnedAccess: 共享内存的所有权访问控制 - -task_group_functions: - - 任务组管理 - - 分片合并优化 - - 状态同步 - -mem_holder_functions: - - 高效的内存访问 - - 资源自动管理 - -functions: - - new_shared_mem: 创建共享内存数据结构 - - write_split_data: 写入分片数据 ---- - -# 数据项处理 (dataitem.rs) - -数据项处理模块负责管理单个数据项的处理流程,包括数据分片和共享内存访问。 - -## 核心数据结构 ^dataitem-structs - -### WriteSplitDataTaskGroup ^dataitem-task-group -- 管理数据分片写入任务组 -- 为 batch 和 get 操作提供高效的分片合并封装 -- 主要功能: - - 任务组管理 - - 分片合并优化 - - 状态同步 - -### SharedMemHolder ^dataitem-mem-holder -- 共享内存数据访问管理 -- 提供安全的内存共享机制 -- 特点: - - 高效的内存访问 - - 资源自动管理 - -### SharedMemOwnedAccess ^dataitem-mem-access -- 共享内存的所有权访问控制 -- 确保内存访问的安全性和独占性 - -## 核心功能 ^dataitem-functions - -### new_shared_mem ^dataitem-new-mem -- 创建共享内存数据结构 -- 初始化内存访问控制 - -### write_split_data ^dataitem-write-split -- 写入分片数据 -- 功能特点: - - 支持数据分片 - - 并发写入控制 - - 数据完整性校验 diff --git a/src/main/src/general/data/m_data_general/mod.md b/src/main/src/general/data/m_data_general/mod.md deleted file mode 100644 index e29a5fe..0000000 --- a/src/main/src/general/data/m_data_general/mod.md +++ /dev/null @@ -1,58 +0,0 @@ ---- -structs: - - DataGeneral: 数据管理的核心实现 - - DataSplit: 数据分片相关结构 - -data_general_functions: - - 提供数据读写接口 - - 管理元数据 - - 协调各子模块功能 - -functions: - - write_data: 写入数据的主要入口 - - get_or_del_data: 获取或删除数据 - - write_data_batch: 批量写入数据 ---- - -# 数据管理核心模块 (mod.rs) - -数据管理的核心模块,提供数据读写和元数据管理的基础功能。 - -## 核心数据结构 ^mod-structs - -### DataGeneral ^mod-data-general -- 数据管理的核心实现 -- 主要职责: - - 提供数据读写接口 - - 管理元数据 - - 协调各子模块功能 - -### DataSplit ^mod-data-split -- 数据分片相关结构 -- 功能: - - 数据分片管理 - - 分片信息维护 - - 分片操作协调 - -## 核心功能 ^mod-functions - -### write_data ^mod-write -- 写入数据的主要入口 -- 特点: - - 支持同步/异步写入 - - 数据完整性保证 - - 错误处理机制 - -### get_or_del_data ^mod-get-del -- 获取或删除数据 -- 功能: - - 数据检索 - - 数据删除 - - 资源清理 - -### write_data_batch ^mod-write-batch -- 批量写入数据 -- 优势: - - 提高写入效率 - - 减少系统开销 - - 支持事务性操作 diff --git a/src/main/src/general/data/m_data_general/mod.rs b/src/main/src/general/data/m_data_general/mod.rs index 34fc0ed..c231195 100644 --- a/src/main/src/general/data/m_data_general/mod.rs +++ b/src/main/src/general/data/m_data_general/mod.rs @@ -105,14 +105,13 @@ pub struct DataGeneral { rpc_handler_data_meta_update: RPCHandler, rpc_handler_get_data_meta: RPCHandler, rpc_handler_get_data: RPCHandler, + + // 用于跟踪批量传输的状态 + batch_transfers: DashMap)>, // 修改类型为 (unique_id -> (version, data)) } impl DataGeneral { - #[allow(dead_code)] - fn next_batch_id(&self) -> u32 { - static NEXT_BATCH_ID: AtomicU32 = AtomicU32::new(1); // 从1开始,保留0作为特殊值 - NEXT_BATCH_ID.fetch_add(1, Ordering::Relaxed) - } + // next_batch_id 方法已被移除,因为在当前代码中未被引用。如果将来需要,可重新实现该功能。 async fn write_data_batch( &self, @@ -121,46 +120,85 @@ impl DataGeneral { data: proto::DataItem, data_item_idx: usize, node_id: NodeID, - _batch_size: usize, + batch_size: usize, ) -> WSResult<()> { - let block_type = proto::BatchDataBlockType::Memory; + let total_size = data.data_sz_bytes(); + let total_batches = (total_size + batch_size - 1) / batch_size; - // 创建 channel 接收数据块 - let (tx, _rx) = tokio::sync::mpsc::channel(1); + // 克隆整个 view + let view = self.view.clone(); - // 创建传输任务 - let request_id = self.batch_manager.create_transfer( - unique_id.to_vec(), + // Initialize batch transfer + let init_req = proto::BatchDataRequest { + unique_id: unique_id.to_vec(), version, - block_type, - data.data_sz_bytes() as u32, - tx, - ).await?; - - // 使用现有的 call_batch_data 函数发送数据 - let response = self.rpc_call_batch_data.call( - self.view.p2p(), - node_id, - proto::BatchDataRequest { + request_id: Some(proto::BatchRequestId { + node_id: 0, + sequence: 0, + }), // 使用 0 作为初始化标记 + block_type: proto::BatchDataBlockType::Memory as i32, + block_index: data_item_idx as u32, + operation: proto::DataOpeType::Write as i32, + data: vec![] + }; + + let init_resp = self + .rpc_call_batch_data + .call( + view.p2p(), + node_id, + init_req, + Some(Duration::from_secs(60)), + ) + .await?; + + if !init_resp.success { + return Err(WsDataError::BatchTransferFailed { + node: node_id, + batch: 0, + reason: init_resp.error_message, + } + .into()); + } + + let request_id = init_resp.request_id; + + // Send data in batches + for batch_idx in 0..total_batches { + let start = batch_idx * batch_size; + let end = (start + batch_size).min(total_size); + + let batch_data = data.clone_split_range(start..end); + let batch_req = proto::BatchDataRequest { unique_id: unique_id.to_vec(), version, - request_id: Some(request_id.clone()), - block_type: block_type as i32, + request_id: request_id.clone(), + block_type: proto::BatchDataBlockType::Memory as i32, + data: batch_data.encode_persist(), block_index: data_item_idx as u32, operation: proto::DataOpeType::Write as i32, - data: data.encode_persist(), - }, - Some(Duration::from_secs(60)), - ).await?; - - if !response.success { - return Err(WsDataError::BatchTransferFailed { - node: node_id, - batch: 0, - reason: response.error_message, - }.into()); + }; + + let batch_resp = self + .rpc_call_batch_data + .call( + view.p2p(), + node_id, + batch_req, + Some(Duration::from_secs(60)), + ) + .await?; + + if !batch_resp.success { + return Err(WsDataError::BatchTransferFailed { + node: node_id, + batch: batch_idx as u32, + reason: batch_resp.error_message, + } + .into()); + } } - + Ok(()) } @@ -267,7 +305,7 @@ impl DataGeneral { .into()); } - let _ = data_map.insert(idx, resp.data[0].clone()); + data_map.insert(idx, resp.data[0].clone()); } } GetOrDelDataArgType::Delete => { @@ -296,7 +334,7 @@ impl DataGeneral { .into()); } - let _ = data_map.insert(idx, resp.data[0].clone()); + data_map.insert(idx, resp.data[0].clone()); } } GetOrDelDataArgType::PartialOne { idx } => { @@ -323,7 +361,7 @@ impl DataGeneral { .into()); } - let _ = data_map.insert(idx, resp.data[0].clone()); + data_map.insert(idx, resp.data[0].clone()); } GetOrDelDataArgType::PartialMany { idxs } => { for idx in idxs { @@ -350,7 +388,7 @@ impl DataGeneral { .into()); } - let _ = data_map.insert(idx, resp.data[0].clone()); + data_map.insert(idx, resp.data[0].clone()); } } } @@ -402,74 +440,111 @@ impl DataGeneral { let splits = version_schedule_resp.split.clone(); // 处理每个数据项 - let mut iter = WantIdxIter::new(&GetOrDelDataArgType::All, datas.len() as u8); - while let Some(data_item_idx) = iter.next() { - let data_item = &datas[data_item_idx as usize]; - let split = &splits[data_item_idx as usize]; - let mut primary_tasks = Vec::new(); - + for (data_item_idx, (data_item, split)) in datas + .iter() + .zip(splits.iter()) + .enumerate() + { + let mut tasks = Vec::new(); + tracing::debug!( + "{} processing data item {}/{}", + log_tag, + data_item_idx + 1, + datas.len() + ); + // 1. 并行写入所有主数据分片 - let mut split_iter = WantIdxIter::new(&GetOrDelDataArgType::All, split.splits.len() as u8); - while let Some(split_idx) = split_iter.next() { - let split_info = &split.splits[split_idx as usize]; - tracing::debug!("{} creating split write task {}/{} for node {}, offset={}, size={}", - log_tag, split_idx + 1, split.splits.len(), split_info.node_id, split_info.data_offset, split_info.data_size); - let split_info = split_info.clone(); - let unique_id_clone = unique_id.clone(); - let data_item_primary = data_item.clone_split_range( + for (split_idx, split_info) in split.splits.iter().enumerate() { + tracing::debug!( + "{} creating split write task {}/{} for node {}, offset={}, size={}", + log_tag, + split_idx + 1, + split.splits.len(), + split_info.node_id, + split_info.data_offset, + split_info.data_size + ); + + // 克隆必要的数据 + let split_info = split_info.clone(); // 必须克隆,来自临时变量 + let unique_id = unique_id.clone(); // 必须克隆,多个任务需要 + let data_item = data_item.clone_split_range( // 克隆必要的数据范围 split_info.data_offset as usize - ..(split_info.data_offset + split_info.data_size) as usize + ..(split_info.data_offset + split_info.data_size) as usize, ); - let view = self.view.clone(); - let version_copy = version; + let view = self.view.clone(); // 克隆 view,包含所有模块引用 + let version = version; // 复制值类型 + let task = tokio::spawn(async move { - view.data_general() + let resp = view.data_general() .rpc_call_write_once_data .call( view.p2p(), split_info.node_id, proto::WriteOneDataRequest { - unique_id: unique_id_clone, - version: version_copy, + unique_id, + version, data: vec![proto::DataItemWithIdx { idx: data_item_idx as u32, - data: Some(data_item_primary), + data: Some(data_item), }], }, Some(Duration::from_secs(60)), ) - .await + .await?; + Ok::(resp) }); - primary_tasks.push(task); + tasks.push(task); } // 2. 并行写入缓存数据(完整数据) - let visitor = CacheModeVisitor(version_schedule_resp.cache_mode[data_item_idx as usize] as u16); + let visitor = CacheModeVisitor(version_schedule_resp.cache_mode[data_item_idx] as u16); let need_cache = visitor.is_map_common_kv() || visitor.is_map_file(); + let cache_nodes: Vec = if need_cache { split.splits.iter().map(|s| s.node_id).collect() } else { vec![] }; - let mut cache_tasks = Vec::new(); if !cache_nodes.is_empty() { - tracing::debug!("{} found {} cache nodes: {:?}", log_tag, cache_nodes.len(), cache_nodes); + tracing::debug!( + "{} found {} cache nodes: {:?}", + log_tag, + cache_nodes.len(), + cache_nodes + ); + + // 使用信号量限制并发的批量传输数量 const MAX_CONCURRENT_TRANSFERS: usize = 3; let semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT_TRANSFERS)); - - let mut cache_iter = WantIdxIter::new(&GetOrDelDataArgType::All, cache_nodes.len() as u8); - while let Some(cache_idx) = cache_iter.next() { - let node_id = cache_nodes[cache_idx as usize]; + + for (cache_idx, &node_id) in cache_nodes.iter().enumerate() { let permit = semaphore.clone().acquire_owned().await.unwrap(); - tracing::debug!("{} creating cache write task {}/{} for node {}", log_tag, cache_idx + 1, cache_nodes.len(), node_id); - let unique_id_clone = unique_id.clone(); - let data_item_cache = data_item.clone(); + tracing::debug!( + "{} creating cache write task {}/{} for node {}", + log_tag, + cache_idx + 1, + cache_nodes.len(), + node_id + ); + + // 创建批量传输任务 + let unique_id = unique_id.clone(); + let data_item = data_item.clone(); let view = self.view.clone(); + let task = tokio::spawn(async move { let _permit = permit; view.data_general() - .write_data_batch(&unique_id_clone, version, data_item_cache, data_item_idx as usize, node_id, 1024 * 1024) + .write_data_batch( + &unique_id, + version, + data_item.clone(), + data_item_idx, + node_id, + 1024 * 1024, // 1MB batch size + ) .await?; Ok::(proto::WriteOneDataResponse { remote_version: version, @@ -477,20 +552,13 @@ impl DataGeneral { message: String::new(), }) }); - cache_tasks.push(task); + tasks.push(task); } } - let primary_results = futures::future::join_all(primary_tasks).await; - let cache_results = futures::future::join_all(cache_tasks).await; - - if primary_results.iter().any(|res| res.is_err()) || cache_results.iter().any(|res| res.is_err()) { - let error_msg = format!("主节点或缓存节点数据写入失败"); - tracing::error!("{}", error_msg); - return Err(WSError::WsDataError(WsDataError::WriteDataFailed { - unique_id: unique_id.clone(), - message: error_msg, - })); + // 等待所有写入任务完成 + for task in tasks { + task.await??; } } @@ -1343,6 +1411,8 @@ impl LogicalModule for DataGeneral { rpc_handler_data_meta_update: RPCHandler::new(), rpc_handler_get_data_meta: RPCHandler::new(), rpc_handler_get_data: RPCHandler::new(), + + batch_transfers: DashMap::new(), } } @@ -1378,7 +1448,7 @@ impl LogicalModule for DataGeneral { req: proto::BatchDataRequest| { let view = view.clone(); let _ = tokio::spawn(async move { - let _ = view.data_general().rpc_handle_batch_data(responsor, req).await; + view.data_general().rpc_handle_batch_data(responsor, req).await; }); Ok(()) }, @@ -1426,7 +1496,7 @@ impl LogicalModule for DataGeneral { Ok(vec![]) } } -#[allow(dead_code)] + fn flush_the_data( log_tag: &str, unique_id: &[u8], @@ -1465,4 +1535,3 @@ fn flush_the_data( }); write_source_data_tasks.push(t); } - diff --git a/src/main/src/result.rs b/src/main/src/result.rs index 62afdfe..50186e4 100644 --- a/src/main/src/result.rs +++ b/src/main/src/result.rs @@ -200,10 +200,6 @@ pub enum WsDataError { expect: usize, actual: usize, }, - WriteDataFailed { - unique_id: Vec, - message: String, - }, KvDeserializeErr { unique_id: Vec, context: String, From 7d974fc40d9b4b92294a715e51f95b4adebb4f30 Mon Sep 17 00:00:00 2001 From: pa <1020401660@qq.com> Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 18/26] Revert "fix progress" This reverts commit d2f5ec7df359a129d220fa6dcbe280d9de6c9776. --- .cursorrules | 711 ----------- compilelog | 424 ------- review.md | 1110 ----------------- src/main/build.rs | 5 +- .../src/general/data/m_data_general/batch.rs | 445 ------- .../src/general/data/m_data_general/data.rs | 37 - .../general/data/m_data_general/dataitem.rs | 16 +- .../src/general/data/m_data_general/mod.rs | 123 +- src/main/src/general/network/msg_pack.rs | 18 +- .../src/general/network/proto_src/data.proto | 27 - .../src/general/network/proto_src/sche.proto | 14 + src/main/src/result.rs | 16 - 12 files changed, 79 insertions(+), 2867 deletions(-) delete mode 100644 compilelog delete mode 100644 review.md delete mode 100644 src/main/src/general/data/m_data_general/batch.rs delete mode 100644 src/main/src/general/data/m_data_general/data.rs diff --git a/.cursorrules b/.cursorrules index f4a4825..3f57139 100644 --- a/.cursorrules +++ b/.cursorrules @@ -127,142 +127,6 @@ - 只修改规则相关部分 - 保持其他内容不变 - 保持文档结构完整 -- 执行命令时必须: - - 先提出执行计划 - - 说明执行目的和预期结果 - - 等待用户确认后再执行 - - 记录执行结果和遇到的问题 - - 如遇问题,提出解决方案并等待确认 - - 例外情况: - 1. 编译命令(sudo -E $HOME/.cargo/bin/cargo build)可以直接执行,无需等待确认 - 2. 编译命令必须将输出重定向到 compilelog 文件 - 3. 编译命令执行后必须分析结果并更新 review.md - -- 编译验证规则: - - 当用户要求检查编译状态时: - 1. 必须立即执行实际的编译命令,无需等待确认 - 2. 禁止仅查看历史编译日志 - 3. 必须使用正确的编译命令:`sudo -E $HOME/.cargo/bin/cargo build 2>&1 | tee compilelog` - 4. 必须等待编译完成并分析结果 - 5. 必须将编译结果记录到 review.md 中 - - 编译执行前必须: - 1. 确认已经在 review.md 中记录了执行计划 - 2. 确认编译环境已经准备就绪 - 3. 确认使用了正确的编译命令和参数 - - 编译执行后必须: - 1. 分析编译输出中的每个错误和警告 - 2. 更新 review.md 中的任务状态 - 3. 如果发现新的错误,创建相应的任务记录 - - 禁止行为: - 1. 禁止在没有执行编译的情况下判断编译状态 - 2. 禁止仅根据历史记录回答编译相关问题 - 3. 禁止忽略编译警告 - 4. 禁止在编译失败时不更新任务状态 - -- 编译后问题处理规则: - 1. 每次编译完成后,如果发现新的问题: - - 必须先暂停当前操作 - - 立即在 review.md 中记录新问题 - - 对新问题进行完整的分析记录 - - 等待用户确认后再继续处理 - 2. 禁止在发现新问题后未经记录就直接处理 - 3. 禁止在未经用户确认的情况下处理新问题 - 4. 每个新问题必须包含: - - 与父问题的关系分析 - - 问题的具体表现和影响 - - 初步的解决方案建议 - - 预期的处理步骤 - 5. 违反以上规则的行为将被拒绝执行 - -- review.md 使用规则: - - 在执行任何操作前必须: - 1. 先检查 review.md 文件是否存在 - 2. 阅读完整的 review.md 内容 - 3. 理解当前任务的上下文和父问题 - 4. 在合适的位置添加新的任务记录 - - - 更新位置确定原则: - 1. 必须仔细分析当前对话正在处理的具体问题 - 2. 找到该问题在 review.md 中的对应位置 - 3. 将新内容添加到该问题的相关位置 - 4. 禁止简单地追加到文件末尾 - 5. 如果找不到明确的对应位置,必须先在对应任务描述下标记为 (working) 并询问用户确认 - 6. 对于正在计划或执行中的任务,必须标记为 (working);同一时间系统中只允许存在一个 (working) 状态的任务记录。如果发现多个 (working) 标记,必须暂停后续操作,并等待用户确认后再统一标记 - - - 任务记录必须遵循以下格式: - ```markdown - - 任务:[任务描述] - - 分析: - - 父问题相关性: - 1. 父问题:[引用具体的父问题] - 2. 相关性:[说明与父问题的关系] - 3. 必要性:[说明为什么需要解决] - 4. 优先级:[说明优先级和原因] - - - 当前问题: - 1. [具体问题点1] - 2. [具体问题点2] - ... - - - 修改计划: - 1. [具体步骤1] - 2. [具体步骤2] - ... - - - 执行记录: - - 已完成: - - [已完成的步骤1] - - [已完成的步骤2] - - - 遇到的问题: - - 问题1:[问题描述] - - 解决方案:[方案描述] - - 解决过程:[过程记录] - ``` - - - 任务状态管理: - 1. 新任务必须添加在未完成任务的最前面 - 2. 已完成任务必须标记为 (done) - 3. 已完成任务必须移到未完成任务后面 - 4. 子任务必须保持正确的缩进层级 - 5. 任务完成状态必须实时更新 - - - 强制执行要求: - 1. 禁止在未更新 review.md 的情况下执行任何操作 - 2. 禁止在未经确认的情况下修改已有任务记录 - 3. 禁止删除任何历史记录 - 4. 必须在每次操作前后更新执行记录 - 5. 必须在遇到问题时立即记录 - 6. 必须在解决问题后更新解决方案 - 7. 违反以上规则的操作将被拒绝执行 - -- 执行计划必须: - 1. 在执行任何操作前,必须先在 review.md 中记录执行计划 - 2. 执行计划必须包含: - - 任务描述和目标 - - 父问题相关性分析 - - 当前问题分析 - - 具体执行步骤 - - 预期结果 - - 可能的风险 - - 验证方法 - 3. 执行计划必须遵循 review.md 的格式要求: - - 新计划添加在未完成任务的最前面 - - 使用正确的缩进和层级 - - 包含完整的分析和计划部分 - 4. 执行过程必须: - - 严格按照计划执行 - - 实时记录执行结果 - - 遇到问题时立即记录 - - 完成后更新任务状态 - 5. 禁止在没有执行计划的情况下: - - 执行任何命令 - - 修改任何文件 - - 进行任何操作 - 6. 如需修改计划: - - 必须先记录原计划的问题 - - 提出新的计划 - - 等待确认后再继续 ### 7.1 文档维护与代码组织原则 - 文档压缩原则:保持无损压缩,合并重复内容,简化表述,重构文档结构。 @@ -270,18 +134,6 @@ - 代码组织规则:宏生成的访问函数直接使用,非 pub 函数只在一个地方定义,View 负责核心实现,具体模块负责自己的功能,通过 View 访问其他模块。 ### 7.2 代码修改原则 - -#### 7.2.1 问题解决原则 -- 仅解决当前 review 中关注的问题和遇到的子问题 -- 解决问题前必须先写出解决方案的规划: - 1. 分析问题的根本原因 - 2. 列出可能的解决方案 - 3. 评估每个方案的优缺点 - 4. 选择最优方案并说明原因 - 5. 列出具体的实施步骤 - 6. 考虑可能的风险和应对措施 - - - 不随意删除或修改已有的正确实现 - 不在多处实现同一功能 - 保持代码结构清晰简单 @@ -400,43 +252,6 @@ for node_id in nodes { - 在开始任何操作前,先理解"为什么"而不是"怎么做" - 确保完全理解当前上下文中的所有信息 - 避免机械性思维和跳过思考的行为模式 - - 对于涉及代码逻辑的命令,必须先阅读和理解相关代码,再执行命令 - - 当需要复用或参考现有代码逻辑时,必须先在项目中查找并理解相关实现 - - 在理解代码时,需要关注: - - 代码的执行流程和依赖关系 - - 数据结构和状态管理方式 - - 错误处理和异常情况的处理方式 - -- 代码分析记录原则: - - 在修改任何代码之前,必须在 review.md 中记录完整的代码分析: - 1. 问题代码:截取导致问题的具体代码片段 - 2. 上下文代码:截取理解问题所需的相关代码 - 3. 问题成因:详细分析问题的具体原因 - 4. 修复方案:说明如何修复以及为什么这样修复 - 5. 修改验证:列出验证修改正确性的方法 - - 分析记录必须: - - 使用代码块格式展示代码 - - 保持代码片段的完整性和可读性 - - 确保分析逻辑清晰 - - 说明修改的影响范围 - -- 父问题相关性分析: - - 在开始分析任何问题之前,必须首先进行父问题相关性分析 - - 分析步骤: - 1. 确认当前问题的父问题是什么 - 2. 回溯父问题的执行计划和记录 - 3. 判断当前问题是否是父问题引起的 - 4. 确认解决当前问题是否必要且有助于解决父问题 - - 分析结果必须包含: - 1. 父问题的明确引用 - 2. 相关性的具体分析 - 3. 解决必要性说明 - 4. 优先级判断 - - 如果当前问题与父问题无关: - 1. 记录分析结果 - 2. 暂时搁置该问题 - 3. 继续专注于父问题的解决 - - 内化规则: - 把规则视为思维框架而不是外部约束 - 养成先检查当前上下文的习惯 @@ -445,529 +260,3 @@ for node_id in nodes { - 理解问题的根本原因比立即解决问题更重要 - 分析失误的思维模式而不是简单记住正确操作 - 把经验转化为思维方式而不是操作步骤 - -## 8. 代码评审与修改文档规则 - -### 8.1 修改计划与记录要求 -- 每次修改代码前: - 1. 必须查看项目根目录的 `review.md` 文件 - 2. 根据现有内容确定修改计划的位置和层级 - 3. 在对应位置添加修改计划 - 4. 使用 markdown 格式记录,保持层级结构清晰 - -### 8.2 文档结构规范 -- 所有修改记录必须使用以下简化的问题树结构: - ```markdown - - 任务/问题:xxxx - - 分析:xxxx - - 计划任务1:xxxx - 新问题1:xxxx - - 分析:xxxx - - 计划任务3:xxxx - 已完成 - - - 计划任务2:xxxx - 已完成 - ``` - -- 结构规则: - 1. 父节点必须是具体的任务或问题描述 - 2. 第一个子节点必须是对问题的分析 - 3. 后续子节点是具体的计划任务 - 4. 每个计划任务下可以包含新的问题,遵循相同的结构 - 5. 已完成的任务标记为"已完成" - 6. 保持缩进层级清晰 - -- 示例说明: - ```markdown - - 任务:修复类型转换错误 - - 分析:当前代码在类型转换时未考虑空值情况 - - 计划任务1:添加空值检查 - 新问题:如何处理空值转换失败 - - 分析:需要在转换失败时提供默认值 - - 计划任务:实现 Option 转换 - 已完成 - - - 计划任务2:添加单元测试 - 已完成 - ``` - -### 8.3 记录要求 -1. 修改计划必须包含: - - 修改目的 - - 预期效果 - - 可能的风险 - - 具体步骤 - -2. 修改过程必须记录: - - 实际执行的步骤 - - 遇到的每个问题 - - 解决方案和结果 - -3. 问题记录必须包含: - - 问题的具体表现 - - 问题的可能原因 - - 尝试的解决方案 - - 最终的解决方案 - - 预防措施(如果适用) - -### 8.4 维护原则 -- 保持文档的实时更新 -- 确保问题树结构清晰 -- 定期回顾和整理文档 -- 记录经验教训和最佳实践 - -### 8.5 任务识别规则 - -#### 8.5.1 任务状态判断 -1. 完成状态标记: - - 已完成任务必须标记为 `(done)` - - 未标记 `(done)` 的任务视为未完成 - - 不使用其他状态标记 - -2. 任务顺序规则: - - 文档开头说明:`(顺序:新的在前面;先解决就的未完成的;完成的有标注;问题可能存在子问题)` - - 新任务添加到未完成任务的最前面 - - 已完成任务移到未完成任务的后面 - - 子任务跟随父任务,保持缩进层级 - -3. 最老未完成任务识别: - - 从上到下扫描所有顶级任务 - - 跳过带有 `(done)` 标记的任务 - - 第一个不带 `(done)` 标记的任务即为最老未完成任务 - - 子任务不影响父任务的完成状态判断 - -4. 任务优先级: - - 未完成任务按出现顺序表示优先级(越靠后优先级越高) - - 子任务优先级高于同级后续任务 - - 阻塞性问题优先级最高 - -#### 8.5.2 任务解析检查清单 -在识别和处理任务时,必须检查: -- [ ] 任务是否有 `(done)` 标记 -- [ ] 任务是否为顶级任务 -- [ ] 是否有未完成的子任务 -- [ ] 任务的位置是否符合顺序规则 -- [ ] 是否存在阻塞性问题 - -## 9. 批量数据接口设计 - -### 9.1 BatchTransfer 设计规范 - -#### 9.1.1 组件职责定义 - -1. **数据结构职责划分** - - BatchTransfer(单个传输任务管理器)必须: - - 维护单个传输任务的完整状态(unique_id, version, block_type, total_blocks) - - 使用 DashMap 存储接收到的数据块,确保并发安全 - - 通过 Option 管理完成状态通知 - - 负责数据块的接收、验证和重组 - - - BatchManager(全局传输任务管理器)必须: - - 使用 DashMap 维护所有进行中的传输任务 - - 使用原子计数器生成唯一的请求序列号 - - 负责传输任务的创建、数据块处理和生命周期管理 - -2. **函数职责要求** - - call_batch_data(发送端)必须: - - 使用固定大小(1MB)进行数据分块 - - 通过 BatchManager 创建传输任务 - - 负责数据块的发送 - - 等待传输完成通知 - - - handle_block(接收端)必须: - - 接收并验证单个数据块 - - 更新传输状态 - - 在接收完所有块时触发完成处理 - - - complete(完成处理)必须: - - 校验所有数据块的完整性 - - 根据类型(内存/文件)重组数据 - - 发送完成通知 - -#### 9.1.2 数据流转规范 - -1. **发送流程要求** - - 必须按照以下顺序执行: - 1. 接收原始数据并验证 - 2. 计算分块策略 - 3. 创建传输任务 - 4. 按序发送数据块 - -2. **接收流程要求** - - 必须按照以下顺序处理: - 1. 接收数据块并验证 - 2. 存储到对应的 BatchTransfer - 3. 检查完整性 - 4. 触发完成处理 - 5. 通知发送端 - -#### 9.1.3 错误处理规范 - -1. **组件错误处理职责** - - BatchTransfer 必须处理: - - 数据块完整性验证错误 - - 数据重组过程错误 - - - BatchManager 必须处理: - - 传输任务存在性检查错误 - - 并发访问保护错误 - - - 调用方必须处理: - - 网络传输错误 - - 超时错误 - -2. **错误恢复策略** - - 必须支持以下错误恢复机制: - - 单个数据块的重试 - - 传输任务的取消 - - 资源的正确释放 - -#### 9.1.4 资源管理规范 - -1. **内存管理** - - 必须预分配适当的缓冲区大小 - - 必须及时释放不再需要的内存 - - 必须控制并发数据块的最大数量 - -2. **文件管理** - - 必须使用唯一的临时文件名 - - 必须在完成后清理临时文件 - - 必须正确处理文件权限 - -3. **并发控制** - - 必须使用 DashMap 确保并发安全 - - 必须使用原子操作处理计数器 - - 必须正确管理 channel 资源 - -### 9.2 批量写入实现 - -#### 9.2.1 总体流程 - -1. **数据切分** - - 内存数据按 1MB 切块 - - 文件数据按 4MB 切块 - - 计算总块数和最后一块大小 - -2. **任务池初始化** - - 创建 4 个传输任务槽位 - - 每个任务负责一个数据块的传输 - - 任务完成后自动释放槽位 - -3. **数据块获取** - - 空闲任务会请求新的数据块 - - 最多预取 8 个块 - - 超过限制则等待其他块处理完成 - -4. **传输过程** - - 任务获取到数据块后开始传输 - - 每个请求包含块索引和数据类型 - - 单个请求超时时间为 30 秒 - -5. **完成处理** - - 所有块传输完成后结束 - - 失败的块会重试最多 3 次 - - 重试间隔为 1 秒 - -#### 9.2.2 接收方处理 - -1. **数据管理** - - 复用 get_data 的文件和内存管理逻辑 - - 文件使用 FileManager 管理可变文件 - - 内存使用 MemoryManager 管理内存块 - -2. **并行写入** - - 每个数据块作为独立的写入任务 - - 文件写入使用 seek + write 定位写入 - - 内存写入使用偏移量计算地址 - -3. **并发控制** - - 使用 RwLock 保护共享资源 - - 文件操作使用 async 文件 I/O - - 内存操作使用原子操作保证并发安全 - -4. **状态管理** - - 记录每个块的写入状态 - - 支持断点续传和重试 - - 完成后更新元数据 - ``` - -3. **接收方处理** - ```rust - struct BatchDataWriter { - // 文件缓存,使用 unique_id 作为 key - file_cache: HashMap, BatchFileCache>, - // 内存缓存,使用 unique_id 作为 key - memory_cache: HashMap, BatchMemoryCache>, - } - - impl BatchDataWriter { - async fn handle_request(&mut self, req: BatchDataRequest) -> BatchDataResponse { - let cache = match req.block_type { - DataBlockType::Memory => &mut self.memory_cache, - DataBlockType::File => &mut self.file_cache, - }; - - // 获取或创建缓存 - let block_cache = cache.entry(req.unique_id.clone()) - .or_insert_with(|| self.create_cache(req.block_type)); - - // 写入数据块 - match block_cache.write_block(req.block_index, req.data).await { - Ok(()) => BatchDataResponse { - request_id: req.request_id, - success: true, - error_message: String::new(), - version: req.version, - }, - Err(e) => BatchDataResponse { - request_id: req.request_id, - success: false, - error_message: e.to_string(), - version: req.version, - }, - } - } - } - ``` - -#### 9.2.2 缓存管理 - -1. **文件缓存** - ```rust - struct BatchFileCache { - path: PathBuf, // 临时文件路径 - file: File, // 文件句柄 - received_blocks: HashSet, // 已接收的块 - } - - impl BatchFileCache { - async fn write_block(&mut self, index: u32, data: Vec) -> Result<()> { - // 记录块并写入文件 - self.received_blocks.insert(index); - self.file.seek(SeekFrom::Start((index as u64) * BLOCK_SIZE))?; - self.file.write_all(&data)?; - Ok(()) - } - } - ``` - -2. **内存缓存** - ```rust - struct BatchMemoryCache { - blocks: HashMap>, // 块索引 -> 数据 - total_size: usize, // 总大小 - } - - impl BatchMemoryCache { - async fn write_block(&mut self, index: u32, data: Vec) -> Result<()> { - // 直接存储到内存 - self.blocks.insert(index, data); - Ok(()) - } - } - ``` - -#### 9.2.3 注意事项 - -1. **并发控制** - - 使用 MAX_CONCURRENT_TASKS 控制带宽使用 - - 通过 MAX_PENDING_BLOCKS 实现背压控制 - - 任务完成后及时释放资源 - -2. **内存管理** - - 预取块数量不超过 MAX_PENDING_BLOCKS - - 使用 Arc<[u8]> 避免数据复制 - - 大文件优先使用文件缓存 - -3. **错误处理** - - 记录失败的块以便重试 - - 最多重试 MAX_RETRIES 次 - - 重试间隔为 RETRY_DELAY_MS - - 单个任务超过 TASK_TIMEOUT_MS 自动取消 - -4. **性能优化** - - 使用异步 I/O 提高并发性 - - 任务空闲时自动获取新块 - - 支持乱序处理和断点续传 - -5. **监控和调试** - - 记录每个块的处理状态 - - 统计传输速率和成功率 - - 支持取消整个传输任务 - -### 9.3 请求方逻辑 - -1. **请求预处理**: - - 生成唯一的 request_id - - 验证数据项数量不超过 max_batch_size - - 设置适当的超时时间 - -### 9.3 并行写入实现规范 - -#### 9.3.1 WriteSplitDataTaskGroup 设计模式 -1. **基础结构设计** - ```rust - enum WriteSplitDataTaskGroup { - ToFile { - file_path: PathBuf, - tasks: Vec>>, - }, - ToMem { - shared_mem: SharedMemHolder, - tasks: Vec>>, - }, - } - ``` - -2. **职责划分** - - 任务组管理: - - 创建和初始化写入任务 - - 跟踪任务状态和完成情况 - - 提供统一的任务管理接口 - - 数据写入: - - 文件写入使用 FileExt::write_at - - 内存写入使用 SharedMemOwnedAccess - - 支持并发安全的数据访问 - -3. **并发控制要求** - - 文件写入: - - 使用 tokio::task::spawn_blocking 处理 I/O - - 通过文件偏移确保并发安全 - - 每个任务独占写入区域 - - 内存写入: - - 使用 SharedMemOwnedAccess 保证访问安全 - - 通过 Range 隔离数据区域 - - Arc 管理共享内存生命周期 - -4. **错误处理规范** - - 数据验证: - - 检查数据块类型匹配 - - 验证数据长度一致性 - - 确保写入位置正确 - - 错误传播: - - 使用 Result 类型传递错误 - - 支持任务级别的错误处理 - - 实现错误重试机制 - -#### 9.3.2 复用规范 -1. **接口设计要求** - - 提供统一的数据写入接口 - - 支持文件和内存两种模式 - - 保持与现有实现兼容 - -2. **数据管理规范** - - 文件数据: - - 使用文件偏移管理数据位置 - - 支持并发写入和随机访问 - - 实现临时文件清理 - - 内存数据: - - 使用 SharedMemOwnedAccess 管理 - - 支持数据分片和并发访问 - - 确保内存安全释放 - -3. **任务管理要求** - - 并发控制: - - 使用信号量限制并发任务数 - - 支持任务取消和超时处理 - - 实现资源自动释放 - - 状态同步: - - 跟踪任务完成状态 - - 支持等待所有任务完成 - - 提供任务进度反馈 - -4. **性能优化准则** - - 预分配资源: - - 文件空间预分配 - - 内存缓冲区预分配 - - 任务队列容量预设 - - 并发调优: - - 根据系统资源调整并发度 - - 优化任务调度策略 - - 减少数据复制开销 - -## 10. 构建规则 - -### 10.1 编译命令规范 - -#### 10.1.1 使用 sudo 编译 -- 项目编译前必须确保已设置默认工具链: - ```bash - rustup default stable - ``` - -- 项目编译必须使用 sudo 权限: - ```bash - sudo -E $HOME/.cargo/bin/cargo build - ``` - -#### 10.1.2 使用场景 -1. 首次编译项目 -2. 依赖更新后的完整编译 -3. 涉及系统级权限的功能修改 - -#### 10.1.3 安全注意事项 -1. 确保使用 sudo 的必要性: - - 仅在确实需要系统权限时使用 - - 优先考虑其他解决方案 - -2. 权限管理: - - 确保开发者具有必要的 sudo 权限 - - 遵循最小权限原则 - - 避免在非必要情况下使用 sudo - -3. 环境一致性: - - 保持开发环境权限配置一致 - - 记录所有需要 sudo 权限的依赖 - - 在文档中说明使用 sudo 的原因 - -4. 编译环境检查: - - 确保 rustup 工具链已正确安装 - - 确保已设置默认工具链:`rustup default stable` - - 检查 cargo 路径是否正确 - -### 8.3 处理方逻辑 - -1. **并发处理**: - - 使用工作池处理批量请求 - - 控制并发度 - - 实现公平调度 - -2. **资源管理**: - - 内存使用限制 - - 连接数限制 - - CPU 使用限制 - -3. **监控和日志**: - - 记录处理时间 - - 记录成功/失败率 - - 记录资源使用情况 - -### 8.4 最佳实践 - -1. **批量大小**: - - 建议单批次处理 100-1000 个数据项 - - 根据数据大小动态调整 - -2. **超时设置**: - - 基础超时:30秒 - - 根据批量大小线性增加 - - 最大超时:120秒 - -3. **错误处理**: - - 提供详细的错误信息 - - 支持部分成功的情况 - - 实现幂等性 - -4. **性能考虑**: - - 使用异步处理 - - 实现批量压缩 - - 考虑网络带宽限制 - - - 把规则视为思维框架而不是外部约束 - - 养成先检查当前上下文的习惯 - - 避免在已有信息的情况下去外部搜索 -- 关注本质: - - 理解问题的根本原因比立即解决问题更重要 - - 分析失误的思维模式而不是简单记住正确操作 - - 把经验转化为思维方式而不是操作步骤 diff --git a/compilelog b/compilelog deleted file mode 100644 index 73445c7..0000000 --- a/compilelog +++ /dev/null @@ -1,424 +0,0 @@ -warning: profiles for the non root package will be ignored, specify profiles at the workspace root: -package: /home/nature/padev/waverless/src/main/Cargo.toml -workspace: /home/nature/padev/waverless/Cargo.toml -warning: function `path_is_option` is never used - --> ws_derive/src/lib.rs:21:4 - | -21 | fn path_is_option(path: &syn::Path) -> bool { - | ^^^^^^^^^^^^^^ - | - = note: `#[warn(dead_code)]` on by default - -warning: `ws_derive` (lib) generated 1 warning - Compiling wasm_serverless v0.1.0 (/home/nature/padev/waverless/src/main) -warning: unused import: `crate::general::app::m_executor::FnExeCtxAsync` - --> src/main/src/general/app/app_owned/wasm_host_funcs/result.rs:2:5 - | -2 | use crate::general::app::m_executor::FnExeCtxAsync; - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = note: `#[warn(unused_imports)]` on by default - -warning: unused import: `FnExeCtxBase` - --> src/main/src/general/app/app_owned/wasm_host_funcs/mod.rs:16:58 - | -16 | use crate::general::app::m_executor::{FnExeCtxAsync, FnExeCtxBase}; - | ^^^^^^^^^^^^ - -warning: unused import: `WsFuncError` - --> src/main/src/general/app/app_owned/mod.rs:7:31 - | -7 | use crate::result::{WSResult, WsFuncError}; - | ^^^^^^^^^^^ - -warning: unused import: `std::path::Path` - --> src/main/src/general/app/app_shared/java.rs:9:5 - | -9 | use std::path::Path; - | ^^^^^^^^^^^^^^^ - -warning: unused import: `WSError` - --> src/main/src/general/app/app_shared/process.rs:11:21 - | -11 | use crate::result::{WSError, WsFuncError}; - | ^^^^^^^ - -warning: unused import: `kv_interface::KvOps` - --> src/main/src/general/app/mod.rs:21:13 - | -21 | kv_interface::KvOps, - | ^^^^^^^^^^^^^^^^^^^ - -warning: unused import: `ErrCvt` - --> src/main/src/general/app/mod.rs:37:14 - | -37 | result::{ErrCvt, WSResult, WsFuncError}, - | ^^^^^^ - -warning: unused import: `std::path::PathBuf` - --> src/main/src/general/app/mod.rs:46:5 - | -46 | use std::path::PathBuf; - | ^^^^^^^^^^^^^^^^^^ - -warning: unused import: `super::CacheModeVisitor` - --> src/main/src/general/data/m_data_general/dataitem.rs:17:5 - | -17 | use super::CacheModeVisitor; - | ^^^^^^^^^^^^^^^^^^^^^^^ - -warning: unused import: `base64::Engine` - --> src/main/src/general/data/m_data_general/batch.rs:29:5 - | -29 | use base64::Engine; - | ^^^^^^^^^^^^^^ - -warning: unused import: `tokio::io::AsyncWriteExt` - --> src/main/src/general/data/m_data_general/batch.rs:31:5 - | -31 | use tokio::io::AsyncWriteExt; - | ^^^^^^^^^^^^^^^^^^^^^^^^ - -warning: unused import: `crate::general::data::m_data_general::dataitem::WantIdxIter` - --> src/main/src/general/data/m_data_general/mod.rs:6:5 - | -6 | use crate::general::data::m_data_general::dataitem::WantIdxIter; - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -warning: unused imports: `DataMetaGetRequest` and `DataVersionScheduleRequest` - --> src/main/src/general/data/m_data_general/mod.rs:16:29 - | -16 | self, DataMeta, DataMetaGetRequest, DataVersionScheduleRequest, WriteOneDataRequest, - | ^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^ - -warning: unused import: `WsRuntimeErr` - --> src/main/src/general/data/m_data_general/mod.rs:28:46 - | -28 | result::{WSError, WSResult, WSResultExt, WsRuntimeErr, WsSerialErr, WsNetworkLogicErr}, - | ^^^^^^^^^^^^ - -warning: unused import: `enum_as_inner::EnumAsInner` - --> src/main/src/general/data/m_data_general/mod.rs:36:5 - | -36 | use enum_as_inner::EnumAsInner; - | ^^^^^^^^^^^^^^^^^^^^^^^^^^ - -warning: unused import: `std::ops::Range` - --> src/main/src/general/data/m_data_general/mod.rs:40:5 - | -40 | use std::ops::Range; - | ^^^^^^^^^^^^^^^ - -warning: unused imports: `AtomicU32` and `Ordering` - --> src/main/src/general/data/m_data_general/mod.rs:45:20 - | -45 | sync::atomic::{AtomicU32, Ordering}, - | ^^^^^^^^^ ^^^^^^^^ - -warning: unused import: `std::future::Future` - --> src/main/src/general/data/m_data_general/mod.rs:51:5 - | -51 | use std::future::Future; - | ^^^^^^^^^^^^^^^^^^^ - -warning: unused imports: `m_data_general::DataItemIdx`, `network::proto`, and `self` - --> src/main/src/master/app/fddg.rs:6:16 - | -6 | data::{self, m_data_general::DataItemIdx}, - | ^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^ -7 | network::proto, - | ^^^^^^^^^^^^^^ - -warning: unused import: `dashmap::DashMap` - --> src/main/src/master/app/fddg.rs:11:5 - | -11 | use dashmap::DashMap; - | ^^^^^^^^^^^^^^^^ - -warning: unused import: `std::collections::HashSet` - --> src/main/src/master/app/fddg.rs:13:5 - | -13 | use std::collections::HashSet; - | ^^^^^^^^^^^^^^^^^^^^^^^^^ - -warning: unused imports: `AffinityPattern`, `AffinityRule`, `AppType`, `FnMeta`, and `NodeTag` - --> src/main/src/master/app/m_app_master.rs:3:27 - | -3 | use crate::general::app::{AffinityPattern, AffinityRule, AppType, FnMeta, NodeTag}; - | ^^^^^^^^^^^^^^^ ^^^^^^^^^^^^ ^^^^^^^ ^^^^^^ ^^^^^^^ - -warning: unused import: `crate::general::network::m_p2p::RPCCaller` - --> src/main/src/master/app/m_app_master.rs:5:5 - | -5 | use crate::general::network::m_p2p::RPCCaller; - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -warning: unused imports: `distribute_task_req::Trigger` and `self` - --> src/main/src/master/app/m_app_master.rs:6:44 - | -6 | use crate::general::network::proto::sche::{self, distribute_task_req::Trigger}; - | ^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -warning: unused import: `FunctionTriggerContext` - --> src/main/src/master/app/m_app_master.rs:9:31 - | -9 | use crate::master::m_master::{FunctionTriggerContext, Master}; - | ^^^^^^^^^^^^^^^^^^^^^^ - -warning: unused import: `WsFuncError` - --> src/main/src/master/app/m_app_master.rs:10:31 - | -10 | use crate::result::{WSResult, WsFuncError}; - | ^^^^^^^^^^^ - -warning: unused import: `crate::sys::NodeID` - --> src/main/src/master/app/m_app_master.rs:11:5 - | -11 | use crate::sys::NodeID; - | ^^^^^^^^^^^^^^^^^^ - -warning: unused imports: `HashMap` and `HashSet` - --> src/main/src/master/app/m_app_master.rs:15:24 - | -15 | use std::collections::{HashMap, HashSet}; - | ^^^^^^^ ^^^^^^^ - -warning: unused imports: `AtomicU32` and `Ordering` - --> src/main/src/master/app/m_app_master.rs:16:25 - | -16 | use std::sync::atomic::{AtomicU32, Ordering}; - | ^^^^^^^^^ ^^^^^^^^ - -warning: unused import: `std::time::Duration` - --> src/main/src/master/app/m_app_master.rs:17:5 - | -17 | use std::time::Duration; - | ^^^^^^^^^^^^^^^^^^^ - -warning: unused import: `crate::general::app::m_executor::EventCtx` - --> src/main/src/master/data/m_data_master.rs:1:5 - | -1 | use crate::general::app::m_executor::EventCtx; - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -warning: unused import: `crate::general::app::m_executor::FnExeCtxAsync` - --> src/main/src/master/data/m_data_master.rs:3:5 - | -3 | use crate::general::app::m_executor::FnExeCtxAsync; - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -warning: unused import: `crate::general::app::m_executor::FnExeCtxAsyncAllowedType` - --> src/main/src/master/data/m_data_master.rs:4:5 - | -4 | use crate::general::app::m_executor::FnExeCtxAsyncAllowedType; - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -warning: unused imports: `AffinityPattern`, `AffinityRule`, and `NodeTag` - --> src/main/src/master/data/m_data_master.rs:7:27 - | -7 | use crate::general::app::{AffinityPattern, AffinityRule, NodeTag}; - | ^^^^^^^^^^^^^^^ ^^^^^^^^^^^^ ^^^^^^^ - -warning: unused imports: `DataItemIdx` and `DataSetMeta` - --> src/main/src/master/data/m_data_master.rs:19:37 - | -19 | CacheMode, DataGeneral, DataItemIdx, DataSetMeta, DataSetMetaBuilder, DataSplit, - | ^^^^^^^^^^^ ^^^^^^^^^^^ - -warning: unused imports: `AffinityPattern`, `AffinityRule`, `AppType`, and `FnMeta` - --> src/main/src/master/m_master.rs:16:15 - | -16 | app::{AffinityPattern, AffinityRule, AppMetaManager, AppType, DataEventTrigger, FnMeta}, - | ^^^^^^^^^^^^^^^ ^^^^^^^^^^^^ ^^^^^^^ ^^^^^^ - -warning: unused import: `RwLockReadGuard` - --> src/main/src/util/container/sync_trie.rs:1:27 - | -1 | use parking_lot::{RwLock, RwLockReadGuard}; - | ^^^^^^^^^^^^^^^ - -warning: unused import: `std::thread` - --> src/main/src/util/container/sync_trie.rs:5:5 - | -5 | use std::thread; - | ^^^^^^^^^^^ - -warning: unused import: `std::time::Duration` - --> src/main/src/util/container/sync_trie.rs:6:5 - | -6 | use std::time::Duration; - | ^^^^^^^^^^^^^^^^^^^ - -error: fields `batch_manager` and `batch_transfers` are never read - --> src/main/src/general/data/m_data_general/mod.rs:96:5 - | -94 | pub struct DataGeneral { - | ----------- fields in this struct -95 | view: DataGeneralView, -96 | batch_manager: Arc, - | ^^^^^^^^^^^^^ -... -110 | batch_transfers: DashMap)>, // 修改类型为 (unique_id -> (version, data)) - | ^^^^^^^^^^^^^^^ - | -note: the lint level is defined here - --> src/main/src/main.rs:7:5 - | -7 | dead_code, - | ^^^^^^^^^ - -error: function `flush_the_data` is never used - --> src/main/src/general/data/m_data_general/mod.rs:1500:4 - | -1500 | fn flush_the_data( - | ^^^^^^^^^^^^^^ - -error: enum `WantIdxIter` is never used - --> src/main/src/general/data/m_data_general/dataitem.rs:21:17 - | -21 | pub(super) enum WantIdxIter<'a> { - | ^^^^^^^^^^^ - -error: associated function `new` is never used - --> src/main/src/general/data/m_data_general/dataitem.rs:37:19 - | -36 | impl<'a> WantIdxIter<'a> { - | ------------------------ associated function in this implementation -37 | pub(super) fn new(ty: &'a GetOrDelDataArgType, itemcnt: DataItemIdx) -> Self { - | ^^^ - -error: multiple fields are never read - --> src/main/src/general/data/m_data_general/batch.rs:51:9 - | -50 | pub(super) struct BatchTransfer { - | ------------- fields in this struct -51 | pub unique_id: Vec, - | ^^^^^^^^^ -52 | pub version: u64, - | ^^^^^^^ -53 | pub block_type: proto::BatchDataBlockType, - | ^^^^^^^^^^ -54 | pub total_blocks: u32, - | ^^^^^^^^^^^^ -55 | // 使用 channel 进行数据传输 -56 | data_sender: mpsc::Sender>, - | ^^^^^^^^^^^ -57 | // 写入任务 -58 | write_task: JoinHandle>, - | ^^^^^^^^^^ -59 | // 完成通知 channel -60 | pub tx: Option>>, - | ^^ - -error: associated items `new`, `add_block`, `complete`, and `calculate_splits` are never used - --> src/main/src/general/data/m_data_general/batch.rs:64:18 - | -63 | impl BatchTransfer { - | ------------------ associated items in this implementation -64 | pub async fn new( - | ^^^ -... -104 | pub async fn add_block(&self, index: u32, data: Vec) -> WSResult { - | ^^^^^^^^^ -... -121 | pub async fn complete(mut self) -> WSResult<()> { - | ^^^^^^^^ -... -154 | fn calculate_splits(total_size: usize, block_size: usize) -> Vec> { - | ^^^^^^^^^^^^^^^^ - -error: fields `transfers` and `sequence` are never read - --> src/main/src/general/data/m_data_general/batch.rs:168:5 - | -167 | pub(super) struct BatchManager { - | ------------ fields in this struct -168 | transfers: DashMap, - | ^^^^^^^^^ -169 | sequence: AtomicU64, - | ^^^^^^^^ - -error: methods `next_sequence`, `create_transfer`, and `handle_block` are never used - --> src/main/src/general/data/m_data_general/batch.rs:180:12 - | -172 | impl BatchManager { - | ----------------- methods in this implementation -... -180 | pub fn next_sequence(&self) -> u64 { - | ^^^^^^^^^^^^^ -... -184 | pub async fn create_transfer( - | ^^^^^^^^^^^^^^^ -... -210 | pub async fn handle_block( - | ^^^^^^^^^^^^ - -error: method `call_batch_data` is never used - --> src/main/src/general/data/m_data_general/batch.rs:237:25 - | -235 | impl DataGeneral { - | ---------------- method in this implementation -236 | /// 发起批量数据传输 -237 | pub(super) async fn call_batch_data( - | ^^^^^^^^^^^^^^^ - -error: unused result of type `std::option::Option` - --> src/main/src/general/data/m_data_general/mod.rs:308:21 - | -308 | data_map.insert(idx, resp.data[0].clone()); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | -note: the lint level is defined here - --> src/main/src/main.rs:9:5 - | -9 | unused_results, - | ^^^^^^^^^^^^^^ - -error: unused result of type `std::option::Option` - --> src/main/src/general/data/m_data_general/mod.rs:337:21 - | -337 | data_map.insert(idx, resp.data[0].clone()); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error: unused result of type `std::option::Option` - --> src/main/src/general/data/m_data_general/mod.rs:364:17 - | -364 | data_map.insert(idx, resp.data[0].clone()); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error: unused result of type `std::option::Option` - --> src/main/src/general/data/m_data_general/mod.rs:391:21 - | -391 | data_map.insert(idx, resp.data[0].clone()); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -error: unused result of type `WriteOneDataResponse` - --> src/main/src/general/data/m_data_general/mod.rs:561:17 - | -561 | task.await??; - | ^^^^^^^^^^^^^ - -error: unused `Result` that must be used - --> src/main/src/general/data/m_data_general/mod.rs:1451:25 - | -1451 | view.data_general().rpc_handle_batch_data(responsor, req).await; - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = note: this `Result` may be an `Err` variant, which should be handled -note: the lint level is defined here - --> src/main/src/main.rs:12:5 - | -12 | unused_must_use, - | ^^^^^^^^^^^^^^^ -help: use `let _ = ...` to ignore the resulting value - | -1451 | let _ = view.data_general().rpc_handle_batch_data(responsor, req).await; - | +++++++ - -error: unused result of type `std::option::Option` - --> src/main/src/general/data/m_data_general/batch.rs:206:9 - | -206 | self.transfers.insert(request_id.clone(), transfer); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -warning: `wasm_serverless` (bin "wasm_serverless") generated 39 warnings -error: could not compile `wasm_serverless` (bin "wasm_serverless") due to 16 previous errors; 39 warnings emitted diff --git a/review.md b/review.md deleted file mode 100644 index 46ae44e..0000000 --- a/review.md +++ /dev/null @@ -1,1110 +0,0 @@ -(顺序:新的在前面;先解决就的未完成的;完成的有标注;问题可能存在子问题) - -- context提示 - 编译时应当输出到compilelog文件 - -- 任务:罗列compilelog中各种未使用问题(error, import类的 warning 不看),并逐个解决 - - 分析: - 1. next_batch_id 方法未被使用,需确认是否有用途;如无用途,则删除或添加注释说明准备将来可能使用。 - 2. DataGeneral 结构体中的 batch_transfers 字段未被使用,需评估其在业务逻辑中的必要性;若无实际作用,则建议删除。 - 3. 其他未使用的变量或函数,如返回结果未使用的函数调用等,需整理 compilelog 中完整清单,并逐项检查其用途和必要性。 - - 修改计划: - 1. 针对每项未使用问题,先通过代码搜索确认其引用情况; - 2. 对于确认无用的项,直接删除;对于可能需要保留但目前未使用的项,添加 TODO 注释说明其预期用途; - 3. 修改后重新编译,确保无额外问题。 - - 执行记录: - - (working)开始处理未使用问题,目前处于初步整理阶段,待后续逐项跟进。 - - 下一步:检查 next_batch_id 方法引用情况;如果确认未使用,则删除该方法或添加 TODO 注释。 - - 检查结果:通过 grep 搜索,发现 next_batch_id 方法仅在其定义处出现,未被实际引用。建议删除该方法或添加 TODO 注释说明可能的预期用途。 - - 检查结果:通过 grep 搜索发现,DataGeneral 结构体中的 batch_transfers 字段仅在其定义(行 109)和初始化(行 1414)处出现,未在后续代码中被引用。建议删除该字段,或如果有保留意图则添加 TODO 注释说明预期用途。 - - 下一步:整理编译日志中其他未使用项,逐一确认其用途;对于确认无用的项,逐项删除或添加 TODO 注释。 - - 整理结果:初步整理显示,除了上述 next_batch_id 和 batch_transfers 未使用问题外,其它警告多为未使用导入或辅助函数(如 path_is_option、FnExeCtxAsync、FnExeCtxBase 等),这些均非核心逻辑,暂时忽略;后续可根据需要进一步清理。 - - 下一步:分析log中还有没有error - -- (done)任务:编译分析发现的问题 - - 修改计划: - 1. (done) 修复 get_metadata 方法缺失问题: - - 分析发现 get_metadata 和 get_data_meta 是两个不同的函数: - 1. get_data_meta 是内部函数,直接访问本地数据 - 2. get_metadata 是更高层的函数,需要包含: - - 本地数据访问(通过 get_data_meta) - - 远程数据访问(通过 RPC) - - 完整的错误处理逻辑 - - 下一步计划: - 1. 搜索并确认 get_metadata 的完整实现位置 - 2. 检查实现是否完整包含所需功能 - 3. 如果已经实现,排查编译器找不到方法的原因 - 4. 如果没有实现,则按照设计实现它 - - 2. (done)修复 unique_id 移动问题: - - 分析: - - 父问题相关性: - 1. 父问题:编译错误修复 - 2. 相关性:直接导致编译失败的问题 - 3. 必要性:必须解决以通过编译 - 4. 优先级:高,阻塞编译 - - - 当前问题: - 1. 在 batch.rs 中,unique_id 在异步任务中被移动后仍然尝试使用 - 2. 问题出现在 BatchTransfer::new 函数中 - 3. 涉及 tokio::spawn 创建的异步任务 - - - 修改计划: - 1. 在 BatchTransfer::new 中: - - 在创建异步任务前克隆 unique_id - - 使用克隆的版本传入异步任务 - - 保留原始 unique_id 用于其他用途 - - - 执行记录: - - 已完成: - - 在 BatchTransfer::new 中添加了 unique_id_for_task = unique_id.clone() - - 修改异步任务使用 unique_id_for_task 代替 unique_id.clone() - - - 下一步: - - 执行编译验证修改是否解决问题 - - 检查是否有其他相关的所有权问题 - 3. (done)任务:修复 total_size 未使用变量问题 - - 分析: - - 父问题相关性: - 1. 父问题:编译错误修复 - 2. 相关性:编译警告需要处理 - 3. 必要性:保持代码清洁,避免无用变量 - 4. 优先级:中(不影响功能,但需要处理的警告) - - - 当前问题: - 1. 在 batch.rs 中,total_size 变量被计算但未使用 - 2. 代码分析显示 offset 变量已经足够处理数据分片 - 3. total_size 的计算是多余的 - - - 修改计划: - 1. 删除 total_size 相关代码: - - 移除 total_size 的计算语句 - - 保持其他逻辑不变 - 2. 编译验证修改 - - - 执行记录: - - 已完成: - - 删除了 total_size 计算语句:`let total_size: usize = data_result.values().map(|item| item.size()).sum();` - - 编译验证通过,确认问题已解决 - - - 遇到的问题: - - 无 - -- 任务:InvalidDataType 不附带一些context以便debug吗? - -- 任务:增加注释分析介绍 DataSetMetaV2 derive用处 - -- 任务:batch 里 impl proto::DataItem ,proto ext没有吗,另外规则里加一条proto数据结构要扩展都应该加到proto ext里 - -- 任务:编译并分析剩下的问题,并逐个编写计划 - -- (done)任务:error[E0521]: borrowed data escapes outside of method - -- (done)任务:error[E0382]: use of moved value: `unique_id` - - -- (done)任务:error[E0432]: unresolved import `super::dataitem::StorageType` - - 分析: - - 父问题相关性: - 1. 父问题:批量数据接口实现中的错误处理 - 2. 相关性:直接关系到数据存储类型的定义 - 3. 必要性:必须解决,否则编译无法通过 - 4. 优先级:高(阻塞编译) - - - 当前问题: - 1. 代码分析: - ```rust - // dataitem.rs 中的实现 - pub enum WriteSplitDataTaskGroup { - ToFile { - file_path: PathBuf, - tasks: Vec>>, - }, - ToMem { - shared_mem: SharedMemHolder, - tasks: Vec>>, - }, - } - - // batch.rs 中的使用 - let task_group = WriteSplitDataTaskGroup::new( - req.unique_id, - splits, - rx, - proto::BatchDataBlockType::from_i32(req.block_type) - .unwrap_or(proto::BatchDataBlockType::Memory), - ).await - ``` - - 2. 问题分析: - - WriteSplitDataTaskGroup 已经在使用 proto::BatchDataBlockType - - 但代码中可能还存在对 StorageType 的引用 - - 需要完全迁移到使用 proto::BatchDataBlockType - - - 修改计划: - 1. 编译并分析还剩下什么问题 - - - 执行记录: - - 待执行 - -- (done)任务:error[E0599]: no method named `get_or_del_datameta_from_master` found for reference `&DataGeneralView` - - 分析: - - 父问题相关性: - 1. 父问题:批量数据接口实现中的错误处理 - 2. 相关性:直接关系到数据访问功能 - 3. 必要性:必须解决,否则会导致编译错误 - 4. 优先级:高(阻塞编译) - - - 当前问题: - 1. DataGeneralView 中缺少 get_or_del_datameta_from_master 方法 - 2. 根据之前的设计原则,我们应该避免不必要的代理转发 - 3. 需要检查调用处是否可以直接使用 data_general() 方法 - 4. 编译后发现新的相关错误: - ```rust - error[E0432]: unresolved import `super::dataitem::StorageType` - error[E0599]: no method named `get_metadata` found for struct `DataGeneralView` - error[E0599]: no method named `get_data_meta` found for reference `&m_data_general::DataGeneral` - error[E0599]: no method named `data_general` found for reference `&m_data_general::DataGeneral` - ``` - - - 修改计划: - 2. 修复 get_metadata 调用: - - 将调用 `self.get_metadata()` 改为 `self.data_general().get_metadata()` - - 保持函数在 DataGeneral 中的原有实现不变 - 3. 修复 get_data_meta 调用: - - 修改为 self.view.get_data_meta (done) - 4. 修复 data_general 调用: - - 修改为 self.view.data_general() (done) - 5. 验证修改后的编译结果 - - - 执行记录: - 1. 已完成避免代理转发的修改 - 2. 发现新的编译错误 - 3. 制定了详细的修复计划 - 4. 完成了 StorageType 导入问题的修复 - 5. 完成了 get_metadata 调用的修复 - -- (done)任务:error[E0521]: borrowed data escapes outside of method - - 分析: - - 父问题相关性: - 1. 父问题:批量数据接口实现中的错误处理 - 2. 相关性:直接关系到内存安全和生命周期管理 - 3. 必要性:必须解决,否则会导致编译错误 - 4. 优先级:高(阻塞编译) - - - 当前问题: - 1. 在异步上下文中使用了 self 引用: - ```rust - async fn start(&self) -> WSResult> { - // ... - let this = self.clone(); - } - ``` - 2. 这是一个常见的生命周期问题,self 引用没有 'static 生命周期 - 3. 需要确保异步任务中使用的数据满足 'static 约束 - - - 修改计划: - 1. 检查 self 类型的 Clone 实现 - 2. 使用 view 模式访问共享数据 - 3. 编译验证修改 - - 执行记录: - - 已完成修改,将所有 self.clone() 改为 view 模式 - - 编译验证发现新的错误: - 1. `error[E0432]: unresolved import super::dataitem::StorageType` - 2. `error[E0599]: no method named get_or_del_datameta_from_master found for reference &DataGeneralView` - 3. `error: unused variable: data_item` - - 需要继续修复这些新问题 - -- (done)任务:batch调用函数注释没讲清楚 - // 创建channel用于接收响应 - let (tx, mut rx) = mpsc::channel(1); - 这里channel是跟谁通信,作用是什么 - - 父问题相关性分析: - - 父问题引用:无,这是一个独立的任务 - - 相关性分析:这是一个独立的代码文档问题,不是由其他任务引起的 - - 解决必要性: - - 函数注释的清晰性直接影响代码的可维护性和可理解性 - - channel 通信是异步处理的关键部分,需要明确说明其用途 - - 不清晰的注释可能导致后续开发者误用或难以调试 - - 优先级:高(作为最老未完成任务) - - - 修改计划: - - 修改目的: - - 明确说明 channel 的通信双方和作用 - - 提供完整的函数级文档注释 - - 建立异步通信文档的最佳实践 - - 提高代码的可维护性 - - - 预期效果: - - channel 的用途清晰明确 - - 函数注释完整描述了异步处理流程 - - 其他开发者能快速理解代码逻辑 - - 形成可复用的异步通信文档模板 - - - 可能的风险: - - 注释可能需要随代码变化及时更新 - - 过于详细的注释可能增加维护负担 - - 需要在注释详细度和简洁性之间找到平衡 - - - 具体步骤: - 1. 定位并检查 batch 相关函数的完整实现 - 2. 分析 channel 在函数中的具体用途 - 3. 确认通信的发送方和接收方 - 4. 理解完整的异步处理流程 - 5. 编写清晰的函数级文档注释 - 6. 补充必要的内联注释 - 7. 评审并优化注释内容 - - - 修改过程: - - 已完成: - - 初步确认问题范围 - - 制定修改计划 - - 完成代码分析,发现: - - Channel 用途:用于在批量数据传输过程中接收所有数据块处理完成的最终状态 - - 发送方:BatchTransfer 在接收到所有数据块并完成组装后(包括写入文件或合并内存数据)发送完成状态 - - 接收方:call_batch_data 函数等待所有数据块处理完成的最终结果 - - 通信内容:完整处理后的 DataItem(包含所有数据块组装后的结果)或错误信息 - - 处理流程: - 1. 创建 channel,容量设置为 1(只用于接收最终的完整结果) - 2. 将发送端传递给 BatchTransfer - 3. BatchTransfer 在接收每个数据块时: - - 通过 add_block 添加数据块 - - 检查是否收到所有数据块 - - 当收到所有数据块时,调用 complete 方法 - 4. complete 方法会: - - 检查所有数据块是否完整 - - 根据 block_type 组装数据(写入文件或合并内存) - - 通过 channel 发送最终的完整 DataItem - 5. call_batch_data 等待接收最终结果并返回对应的 Response - - - 下一步: - - 编写函数级文档注释 - - 补充 channel 相关的内联注释 - - 优化注释内容 - -- (done)任务:强化规则中先再review写计划,经过允许后执行的习惯 - - 分析: - - 父问题相关性: - 1. 父问题:完善项目规则和文档 - 2. 相关性:直接关系到规则的执行质量和一致性 - 3. 必要性:避免未经充分思考的修改 - 4. 优先级:高(影响所有代码修改的质量) - - - 当前问题: - 1. 需要在规则中更明确地强调先review再执行的重要性 - 2. 需要规范化计划review和执行确认的流程 - 3. 需要确保这个习惯能被有效执行 - - - 修改计划: - 1. 在 .cursorrules 文件的 7.0 最高优先级规则章节添加相关规则 - 2. 补充具体的review和确认流程 - 3. 添加违反处理规则 - - - 执行记录: - 1. 修改了 .cursorrules 文件的 7.0 章节 - 2. 更新了"修改代码时必须"的规则内容 - 3. 添加了更详细的计划管理和执行流程要求 - 4. 规则修改已完成并生效 - -- (done)任务:新增规则 编译时应当输出到compilelog文件 - - 分析: - - 父问题相关性: - 1. 父问题:完善项目规则和文档 - 2. 相关性:规则补充任务,与编译过程规范化直接相关 - 3. 必要性:有助于提高编译问题的追踪和分析效率 - 4. 优先级:高(编译过程的标准化对项目质量至关重要) - - - 当前问题: - 1. 需要在 .cursorrules 文件中添加编译输出规范 - 2. 规范需要涵盖输出重定向、日志管理等方面 - 3. 需要确保规则易于执行且清晰明确 - - - 设计目标: - 1. 在 .cursorrules 文件中的构建规则章节添加编译输出规范 - 2. 确保规则内容完整且易于遵循 - 3. 与现有规则保持一致性和兼容性 - - - 修改计划: - 1. 在 .cursorrules 的第 10 章"构建规则"中添加编译输出规范: - - 位置:10.1.2 编译输出规范 - - 内容结构: - 1. 编译输出重定向命令 - 2. 日志文件要求(名称、位置、格式、时效性) - 3. 日志内容规范(必须包含的信息) - 4. 日志管理规则(清理、保留、版本控制) - 5. 使用场景说明 - 6. 注意事项 - - 2. 具体规则内容: - a. 编译输出重定向: - ```bash - sudo -E $HOME/.cargo/bin/cargo build 2>&1 | tee compilelog - ``` - - b. 日志文件要求: - - 文件名固定为 compilelog - - 位置在项目根目录 - - 格式为纯文本,包含 stdout 和 stderr - - 每次编译生成新日志 - - c. 日志内容规范: - - 完整编译命令 - - 所有编译警告和错误 - - 编译时间信息 - - 完整编译过程输出 - - d. 日志管理规则: - - 编译前清理旧日志 - - 编译失败时保留日志 - - 禁止手动编辑 - - 不提交到版本控制 - - e. 使用场景: - - 首次编译 - - 代码修改后重新编译 - - 依赖更新后编译 - - 编译错误排查 - - f. 注意事项: - - 磁盘空间管理 - - 日志清理策略 - - 错误分析方法 - - 问题追踪建议 - - 3. 验证规则的正确性和一致性: - - 确保规则描述清晰准确 - - 验证与现有规则的兼容性 - - 检查格式符合项目标准 - -- (done) 任务:error[E0599]: no method named `get_or_del_datameta_from_master` found for reference `&DataGeneralView` - - 分析: - - 当前问题: - - 编译错误显示 DataGeneralView 中缺少 get_or_del_datameta_from_master 方法 - - 该方法在 DataGeneral 中已实现 - - 需要在 DataGeneralView 中添加对应的方法调用 - - - 设计目标: - - 在 DataGeneralView 中添加方法 - - 保持与 DataGeneral 中的实现一致 - - 确保正确的错误处理 - - 维护代码的可维护性 - - - 修改计划: - - 修改目的: - - 解决编译错误 - - 完善 DataGeneralView 的功能 - - 保持代码结构的一致性 - - - 预期效果: - - DataGeneralView 可以正确调用 get_or_del_datameta_from_master - - 编译错误消除 - - 保持代码结构清晰 - - - 可能的风险: - - 方法访问权限可能需要调整 - - 可能需要处理生命周期问题 - - 可能需要添加其他相关方法 - - - 具体步骤: - 1. 在 DataGeneralView 中添加方法实现 - 2. 确保方法签名与 DataGeneral 一致 - 3. 通过 data_general() 调用原方法 - 4. 编译验证修改 - - - 执行修改: - 1. 在 DataGeneralView impl 块中添加: - ```rust - pub async fn get_or_del_datameta_from_master( - &self, - unique_id: &[u8], - delete: bool, - ) -> WSResult { - self.data_general().get_or_del_datameta_from_master(unique_id, delete).await - } - ``` - 2. 修改已完成,编译验证通过(done) - -- (done)任务:error[E0599]: no method named `get_data_meta` found for reference `&KvStoreEngine` - -- (done)任务:BatchTransfer不应该直接存储接收到的数据块到map里,应该复用get data那里的逻辑;区分文件和内存;文件通过文件偏移,内存用封装好的代码 - - 父问题相关性分析: - - 父问题引用:无,这是一个独立的代码优化任务 - - 相关性分析:虽然与 BatchTransfer 设计总结任务有关,但这是一个具体的实现优化问题 - - 解决必要性: - - 当前实现存在代码重复,没有复用已有的数据处理逻辑 - - 直接存储到 map 可能导致内存使用效率低下 - - 需要统一数据处理方式,提高代码维护性 - - 优先级:高(涉及核心功能的代码质量) - - - 修改计划: - - 修改目的: - - 复用 get_data 的数据处理逻辑 - - 优化数据存储方式 - - 统一文件和内存数据的处理流程 - - 减少代码重复 - - - 预期效果: - - 文件数据直接写入文件系统,通过偏移量管理 - - 内存数据使用现有的封装代码处理 - - 减少内存占用 - - 提高代码复用性和维护性 - - - 可能的风险: - - 重构过程可能影响现有功能 - - 需要确保并发安全性 - - 文件操作可能带来性能开销 - - 可能需要修改相关的测试代码 - - - 具体步骤: - 1. 分析 get_data 中的数据处理逻辑 - 2. 设计新的数据存储接口 - 3. 实现文件数据的偏移量写入 - 4. 集成内存数据的封装代码 - 5. 修改 BatchTransfer 的实现 - 6. 更新相关测试 - 7. 性能测试和优化 - - - 修改过程: - - 已完成: - - 初步确认问题范围 - - 制定修改计划 - - 分析了当前实现的问题: - 1. BatchTransfer 直接将数据块存储在 DashMap 中,占用内存大 - 2. 没有区分文件和内存数据的处理方式 - 3. 没有复用已有的数据处理逻辑 - - 分析了 get_data 的实现: - 1. 支持并行写入能力: - - 使用 tokio::spawn 创建异步任务 - - 通过信号量控制并发数量 - - 支持多节点并行写入 - 2. 数据处理逻辑: - - 文件数据:使用 seek + write 定位写入 - - 内存数据:使用偏移量计算地址 - - 支持断点续传 - 3. 并发控制: - - 使用 RwLock 保护共享资源 - - 文件操作使用 async 文件 I/O - - 内存操作使用原子操作 - - 深入分析了并行写入实现: - 1. write_data_batch 函数的实现: - - 支持数据分块传输:固定 1MB 大小 - - 使用 request_id 跟踪传输状态 - - 支持初始化和数据传输两个阶段 - - 实现了超时重试机制 - - 2. 并行写入机制: - - 主数据分片并行写入: - - 对每个 split_info 创建独立的写入任务 - - 使用 tokio::spawn 实现异步并行处理 - - 通过 clone_split_range 优化数据复制 - - - 缓存数据并行写入: - - 使用信号量控制并发数量(MAX_CONCURRENT_TRANSFERS = 3) - - 支持多节点同时写入 - - 实现了完整的错误处理和重试机制 - - - 任务管理: - - 使用 Vec 跟踪所有写入任务 - - 实现了等待所有任务完成的机制 - - 支持错误传播和状态同步 - - 3. 数据分片策略: - - 支持按偏移量和大小进行数据分片 - - 实现了数据块的并行传输 - - 保证了数据完整性和顺序性 - - - 分析了 SharedMemOwnedAccess 的实现: - 1. 内存管理机制: - - SharedMemHolder: - - 使用 Arc> 管理共享内存 - - 支持数据所有权转移(try_take_data) - - 确保内存安全释放 - - - SharedMemOwnedAccess: - - 提供对共享内存特定范围的独占访问 - - 使用 Range 控制访问范围 - - 实现了安全的可变借用 - - 2. 内存分片处理: - - new_shared_mem 函数: - - 预分配所需总大小的内存 - - 创建多个 SharedMemOwnedAccess 实例 - - 每个实例负责一个数据范围 - - - 并发写入支持: - - 通过 Arc 共享底层内存 - - 每个 SharedMemOwnedAccess 独占其范围 - - 支持并行安全的写入操作 - - 3. 安全保证机制: - - 内存安全: - - 使用 Arc 管理共享内存生命周期 - - Range 确保访问不越界 - - unsafe 代码有完整的安全性说明 - - - 并发安全: - - 每个 SharedMemOwnedAccess 独占其范围 - - 不同实例的范围不重叠 - - 支持并行写入而无需额外同步 - - - 遇到的问题: - - 问题1:需要设计复用 SharedMemOwnedAccess 的接口 - - 问题描述:如何在 BatchTransfer 中集成 SharedMemOwnedAccess 的内存管理机制 - - 解决方案: - 1. 复用 WriteSplitDataTaskGroup 的现有实现: - ```rust - // 已有的接口和实现: - pub enum WriteSplitDataTaskGroup { - ToFile { ... }, - ToMem { - shared_mem: SharedMemHolder, - tasks: Vec>>, - }, - } - - impl WriteSplitDataTaskGroup { - pub async fn new( - unique_id: Vec, - splits: Vec>, - rx: mpsc::Receiver>, - cachemode: CacheModeVisitor, - ) -> WSResult - } - ``` - - 2. 通过 channel 传输数据: - - 使用 mpsc::channel 在 BatchTransfer 和 WriteSplitDataTaskGroup 之间传输数据 - - 保持 WriteSplitDataTaskGroup 的现有接口不变 - - 在 BatchTransfer 中通过 channel 发送数据块 - - 3. 数据流转设计: - ```rust - // 在 BatchTransfer::new 中: - let (data_sender, data_receiver) = mpsc::channel(total_blocks as usize); - let splits = calculate_splits(total_blocks as usize * block_size, block_size); - - // 创建写入任务: - let write_task = tokio::spawn(async move { - let group = WriteSplitDataTaskGroup::new( - unique_id.clone(), - splits, - data_receiver, - CacheModeVisitor(block_type as u16), - ).await?; - group.join().await - }); - ``` - - 4. 优点: - - 不需要修改 WriteSplitDataTaskGroup 的实现 - - 复用现有的内存管理机制 - - 保持并发安全性 - - 支持文件和内存的统一处理 - - - 解决过程: - 1. 分析了 WriteSplitDataTaskGroup 的实现 - 2. 确认可以直接复用现有接口 - 3. 设计了基于 channel 的数据传输方案 - 4. 下一步将实现具体代码 - - - 子问题1:WriteSplitDataTaskGroup接口设计问题 - - 问题描述:WriteSplitDataTaskGroup 的接口设计不够通用,影响复用性 - - 分析: - - 当前问题: - - WriteSplitDataTaskGroup 使用 CacheModeVisitor 作为参数 - - 这个参数实际只用于区分文件/内存操作 - - 参数名称和类型都不够直观 - - 违反了接口设计的简单性原则 - - - 设计目标: - - 参数应该直观地表达其用途 - - 接口应该简单易用 - - 不应该暴露实现细节 - - 保持向后兼容性 - - - 修改计划: - 1. 新增枚举类型: - ```rust - #[derive(Debug, Clone, Copy)] - pub enum StorageType { - File, - Memory, - } - ``` - - 2. 修改 WriteSplitDataTaskGroup::new 签名: - ```rust - pub async fn new( - unique_id: Vec, - splits: Vec>, - rx: mpsc::Receiver>, - storage_type: StorageType, - ) -> WSResult - ``` - - - 优势: - 1. 接口更直观:参数名称和类型都清晰表达了意图 - 2. 实现解耦:调用方不需要了解内部实现细节 - 3. 提高可复用性:接口简单清晰,易于在其他场景使用 - 4. 类型安全:使用枚举确保类型安全 - 5. 向后兼容:可以在内部保持现有的实现逻辑 - - - 后续工作: - 1. 更新所有调用 WriteSplitDataTaskGroup::new 的代码 - 2. 添加相关测试用例 - 3. 更新文档说明 - 4. 考虑未来可能的存储类型扩展 - - - 处理过程中遇到的问题: - 1. (done)编译错误: - ```rust - error[E0599]: no variant or associated item named `FILE` found for enum `BatchDataBlockType` - ``` - - 原因:使用了错误的枚举变体名称 - - 解决:修改为正确的枚举变体 `File` 和 `Memory` - - 2. (done) 类型转换问题: - ```rust - match storage_type { - StorageType::File => Self::ToFile { ... }, - StorageType::Memory => Self::ToMem { ... }, - } - ``` - - 原因:需要在内部实现中将 StorageType 映射到具体的枚举变体 - - 解决:添加类型转换实现 - - - 子问题2:错误处理链完整性问题 - - 问题描述:write_task的错误处理链需要确保类型一致性 - - 分析: - - 当前问题: - - write_task.await?? 的双重错误处理不够清晰 - - 错误上下文信息不够详细 - - 错误类型转换隐含在 map_err 中 - - - 设计目标: - - 拆分错误处理步骤,使逻辑清晰 - - 添加详细的错误上下文 - - 统一错误转换方式 - - - 修改计划: - 1. 修改错误处理实现: - ```rust - pub async fn complete(mut self) -> WSResult<()> { - // 定义错误转换函数 - let join_error = |e| WsDataError::BatchTransferError { - unique_id: self.unique_id.clone(), - msg: format!("write task join failed: {}", e), - }; - - let write_error = |e| WsDataError::BatchTransferError { - unique_id: self.unique_id.clone(), - msg: format!("write data failed: {}", e), - }; - - let send_error = || WsDataError::BatchTransferError { - unique_id: self.unique_id.clone(), - msg: "send result failed".to_string(), - }; - - drop(self.data_sender); - - if let Some(tx) = self.tx.take() { - let join_result = self.write_task.await - .map_err(join_error)?; - - let data_item = join_result - .map_err(write_error)?; - - tx.send(Ok(data_item)).await - .map_err(|_| send_error())?; - } - Ok(()) - } - ``` - - - 优势: - 1. 错误处理步骤清晰 - 2. 错误包含详细上下文 - 3. 错误转换逻辑统一 - 4. 便于维护和调试 - - - 后续工作: - 1. 修改 complete 方法 - 2. 更新相关测试 - - - 处理过程中遇到的问题: - 1. (done) 错误类型不匹配: - ```rust - error[E0559]: variant `result::WsDataError::BatchTransferError` has no field named `context` - ``` - - 原因:错误类型定义中没有 context 字段 - - 解决:移除 context 字段,将上下文信息合并到 msg 中 - - 2. (done)变量作用域问题: - ```rust - error[E0425]: cannot find value `version` in this scope - ``` - - 代码分析: - ```rust - // 问题代码: - proto::BatchDataResponse { - request_id: req.request_id, - success: true, - error_message: String::new(), - version, // 这里的 version 变量未定义 - } - - // 上下文代码: - let meta = match kv_store_engine.get_data_meta(&req.unique_id).await { - Ok(Some((_, meta))) => meta, - ... - } - ``` - - - 问题成因: - 1. 在构造 BatchDataResponse 时直接使用了未定义的 version 变量 - 2. meta 变量已在函数开始处获取,包含了正确的版本信息 - 3. 应该使用 meta.version 而不是直接使用 version - - - 修复方案: - - 将 version 替换为 meta.version - - 确保在所有响应构造处都使用 meta.version - - 保持版本信息的一致性 - - - 修改验证: - - 编译确认错误消除 - - 检查版本信息传递正确性 - - - 子问题3:生命周期安全问题 - - 问题描述:异步任务中使用的数据需要满足'static约束 - - 分析: - - 当前问题: - - batch_manager 模块未找到 - - unresolved import batch_manager::BatchManager - - 需要修复模块导入和路径问题 - - - 设计目标: - - 确保模块结构正确 - - 修复导入路径 - - 保持代码组织清晰 - - - 修改计划: - 1. 检查模块结构 - 2. 修复导入路径 - 3. 确保生命周期安全 - - - 后续工作: - 1. 修复模块导入问题 - 2. 验证生命周期约束 - 3. 更新相关测试 - - - 处理过程中遇到的问题: - 1. 模块导入错误: - ```rust - error[E0583]: file not found for module `batch_manager` - error[E0432]: unresolved import `batch_manager::BatchManager` - ``` - - 原因:模块文件路径不正确或文件不存在 - - 解决:需要创建正确的模块文件并修复导入路径 - - 2. (done) 类型约束问题: - ```rust - error[E0277]: `Rc>` cannot be sent between threads safely - ``` - - 原因:某些类型不满足 Send trait 约束 - - 解决:使用线程安全的替代类型(如 Arc)或重新设计数据共享方式 - -- (done)任务:BatchTransfer 的设计总结一下,反应在rule里 - - 父问题相关性分析: - - 父问题引用:无,这是一个独立的文档完善任务 - - 相关性分析:虽然与 batch 调用函数注释任务有关联,但这是一个更高层面的设计总结任务 - - 解决必要性: - - BatchTransfer 是批量数据传输的核心组件,其设计原则需要文档化 - - 可以指导后续类似功能的开发 - - 有助于维护代码质量和一致性 - - 优先级:中(重要但不紧急) - - - 修改计划: - - 修改目的: - - 总结 BatchTransfer 的设计思路和最佳实践 - - 将设计经验转化为可复用的规则 - - 完善项目的设计文档 - - - 预期效果: - - 在 .cursorrules 中新增批量数据接口设计章节 - - 形成完整的设计规范文档 - - 为团队提供清晰的设计指导 - - - 可能的风险: - - 规则可能需要随着实现的演进而更新 - - 过于具体的规则可能限制未来的优化空间 - - 需要在规范性和灵活性之间找到平衡 - - - 具体步骤: - 1. 分析 BatchTransfer 的核心设计要素 - 2. 提取关键的设计原则和模式 - 3. 整理接口设计的最佳实践 - 4. 编写规则文档 - 5. 评审并优化规则内容 - - - 修改过程: - - 已完成: - - 初步确认任务范围 - - 制定修改计划 - - 分析了系统的核心组件及其职责: - 1. 数据结构职责划分: - - BatchTransfer:单个批量传输任务的管理器 - - 维护:单个传输任务的所有状态(unique_id, version, block_type, total_blocks) - - 存储:接收到的数据块(received_blocks: DashMap>) - - 通知:任务完成状态(tx: Option) - - 功能:数据块的接收、验证和重组 - - - BatchManager:全局批量传输任务的管理器 - - 维护:所有进行中的传输任务(transfers: DashMap) - - 生成:唯一的请求序列号(sequence: AtomicU64) - - 功能:创建新传输、处理数据块、任务生命周期管理 - - 2. 关键函数职责: - - call_batch_data(发送端入口): - - 将大数据分块(固定 1MB 大小) - - 创建传输任务(通过 BatchManager) - - 发送数据块 - - 等待传输完成 - - - handle_block(接收端处理): - - 接收单个数据块 - - 更新传输状态 - - 触发完成处理(如果所有块都收到) - - - complete(完成处理): - - 校验所有数据块完整性 - - 按类型重组数据(内存/文件) - - 通知传输完成 - - 3. 数据流转过程: - - 发送流程: - 1. call_batch_data 接收原始数据 - 2. 计算分块策略 - 3. BatchManager 创建传输任务 - 4. 循环发送数据块 - - - 接收流程: - 1. handle_block 接收数据块 - 2. BatchTransfer 存储数据块 - 3. 检查完整性 - 4. 触发 complete 处理 - 5. 通知发送端完成 - - 4. 错误处理职责: - - BatchTransfer: - - 数据块完整性验证 - - 重组过程的错误处理 - - - BatchManager: - - 传输任务存在性检查 - - 并发访问保护 - - - 调用方: - - 网络传输错误处理 - - 超时处理 - - - 下一步: - - 将这些设计理念和原则转化为规则文档 - - 编写具体的规范内容 - - 评审规则文档 - -- (done)任务:sche proto 中batch部分需要删掉 - - 执行计划: - - 修改目的: - - 清理不再使用的batch相关proto定义 - - 避免代码冗余和混淆 - - 保持proto文件的简洁性 - - - 预期效果: - - sche proto中不再包含batch相关定义 - - 相关的batch功能完全由其他模块处理 - - 减少代码维护负担 - - - 可能的风险: - - 可能有其他模块仍在使用这些proto定义 - - 删除可能影响现有功能 - - 可能需要修改依赖这些proto的代码 - - - 具体步骤: - 1. 搜索并确认sche proto中batch相关定义的位置 - 2. 检查是否有其他代码引用这些proto定义 - 3. 确认删除不会影响现有功能 - 4. 删除相关proto定义 - 5. 更新受影响的代码(如果有) - - - 执行记录: - - 已完成: - - 确认需要删除sche proto中的batch部分 - - 定位到batch相关proto定义在 src/main/src/general/network/proto_src/sche.proto 中 - - 发现这些定义正在被 src/main/src/general/data/m_data_general/batch.rs 使用 - - 发现 data.proto 中已有更完整的 batch 相关定义 - - 删除了 sche.proto 中的重复定义 - - 确认 batch.rs 中使用通用的 proto 导入,不需要修改引用路径 - - - 子任务1:编译验证 - - 执行计划: - - 目的:验证删除 sche.proto 中 batch 定义后的代码完整性 - - 步骤: - 1. 使用 sudo 执行编译 - 2. 分析编译错误 - 3. 制定修复方案 - - - 执行记录: - - 已完成: - - 执行编译并发现错误 - - 分析了错误原因 - - - 发现的问题: - 1. 导入错误: - - proto 模块导入语法错误:`use crate::general::network::proto::self;` - - `BatchDataResponse` 结构体需要通过 `proto::BatchDataResponse` 来引用 - - 已确认 data.proto 中已定义了 BatchDataResponse - - 2. 类型错误: - - `BatchRequestId` 类型不匹配 - - 需要类型注解 - - - 子任务2:修复编译错误 - - 执行计划: - - 目的:修复编译发现的错误 - - 步骤: - 1. 修复 proto 模块导入语句,改为 `use crate::general::network::proto;` - 2. 修正 BatchRequestId 相关代码,确保类型匹配 - 3. 编译验证修改 - - - 执行记录: - - 待执行 - -- (done)任务:新增rule,编译使用sudo cargo build - - 修改计划: - - 修改目的: - - 规范化项目编译过程 - - 确保编译权限一致性 - - 避免权限相关的编译问题 - - - 预期效果: - - 在 .cursorrules 中新增编译规则 - - 统一团队编译命令使用方式 - - 减少权限相关的编译错误 - - - 可能的风险: - - sudo 权限可能带来安全风险 - - 可能影响现有的编译脚本或工作流 - - 需要确保所有开发者都有 sudo 权限 - - - 具体步骤: - 1. 在 .cursorrules 文件中添加编译规则 - 2. 说明使用 sudo 的原因和场景 - 3. 添加安全注意事项 - 4. 更新相关文档和记忆系统 - - - 修改过程: - - 已完成: - - 确认需要添加编译使用 sudo 的规则 - - 分析了使用 sudo 编译的必要性 - - - 遇到的问题: - - 问题1:需要确定在哪些具体场景下必须使用 sudo - - 解决方案:分析项目依赖和编译过程 - - 解决过程: - 1. 检查项目依赖 - 2. 分析编译权限需求 - 3. 确定必须使用 sudo 的具体情况 - - - 下一步: - - 等待确认修改方案 - - 执行实际的规则添加 - - 更新项目文档 - -- (done)任务:新增rule,后续每次修改,需要查看根目录review,并 对应每一点 进行 修改计划的撰写 以及 修改过程的记录,如果修改过程中出现问题,则作为markdown子项记录,形成一个问题树结构(再次强调,这一条是rule,很重要) - - 修改计划: - - 修改目的: - - 规范化代码修改的文档记录流程 - - 确保所有修改都有清晰的计划和追踪记录 - - 建立统一的问题记录格式 - - - 预期效果: - - 在 .cursorrules 中新增第 8 章节 - - 完整描述代码评审与修改文档规则 - - 包含修改计划、记录要求和维护原则 - - - 可能的风险: - - 规则可能与现有工作流程不完全匹配 - - 可能需要团队成员适应新的文档格式 - - - 具体步骤: - 1. 在 .cursorrules 文件中添加第 8 章节 - 2. 编写完整的规则内容 - 3. 确保格式与现有文档保持一致 - 4. 创建相应的记忆条目 - - - 修改过程: - - 已完成: - - 编写了完整的规则内容 - - 设计了清晰的文档结构规范 - - 定义了详细的记录要求 - - - 下一步: - - 等待确认修改方案 - - 执行实际的文件修改 - - 创建记忆条目 - -- 任务:添加规则 - 避免不必要的代理转发设计(done) - - 分析: - - 父问题相关性: - 1. 父问题:完善项目规则和文档 - 2. 相关性:直接影响代码质量和可维护性 - 3. 必要性:减少冗余代码,提高代码效率 - 4. 优先级:高(影响整体代码设计) - - - 当前问题: - 1. 发现代码中存在不必要的代理转发模式 - 2. 例如 DataGeneralView 中的 get_or_del_datameta_from_master 方法仅仅是转发调用 - 3. 这种设计增加了不必要的代码层级和复杂度 - - - 修改计划: - 1. 在 .cursorrules 文件中添加关于代码设计的新规则 - 2. 删除当前的代理转发实现 - 3. 更新相关调用代码,直接使用原始实现 - - - 执行记录: - 1. 在 .cursorrules 文件中的 7.2 代码修改原则章节添加新规则 - 2. 删除了 DataGeneralView 中的 get_or_del_datameta_from_master 代理方法 - 3. 更新了调用处代码,改为直接使用 data_general().get_or_del_datameta_from_master - 4. 所有修改已完成 - -- 任务:修复 unique_id 移动问题: - - 分析: - - 父问题相关性: - 1. 父问题:编译错误修复 - 2. 相关性:直接导致编译失败的问题 - 3. 必要性:必须解决以通过编译 - 4. 优先级:高,阻塞编译 - - - 当前问题: - 1. 在 batch.rs 中,unique_id 在异步任务中被移动后仍然尝试使用 - 2. 问题出现在 BatchTransfer::new 函数中 - 3. 涉及 tokio::spawn 创建的异步任务 - - - 修改计划: - 1. 在 BatchTransfer::new 中: - - 在创建异步任务前克隆 unique_id - - 使用克隆的版本传入异步任务 - - 保留原始 unique_id 用于其他用途 - - - 执行记录: - - 已完成: - - 在 BatchTransfer::new 中添加了 unique_id_for_task = unique_id.clone() - - 修改异步任务使用 unique_id_for_task 代替 unique_id.clone() - - - 下一步: - - 执行编译验证修改是否解决问题 - - 检查是否有其他相关的所有权问题 - - - diff --git a/src/main/build.rs b/src/main/build.rs index d16dc9e..2e71809 100644 --- a/src/main/build.rs +++ b/src/main/build.rs @@ -1,9 +1,6 @@ use std::io::Result; fn main() -> Result<()> { - let mut config = prost_build::Config::new(); - config - .type_attribute("BatchRequestId", "#[derive(Eq, Hash)]"); - config.compile_protos( + prost_build::compile_protos( &[ "src/general/network/proto_src/kv.proto", "src/general/network/proto_src/raft.proto", diff --git a/src/main/src/general/data/m_data_general/batch.rs b/src/main/src/general/data/m_data_general/batch.rs deleted file mode 100644 index 1d4cb25..0000000 --- a/src/main/src/general/data/m_data_general/batch.rs +++ /dev/null @@ -1,445 +0,0 @@ -/// Batch Data Transfer Interface -/// -/// # Design Overview -/// The batch interface is designed for efficient large-scale data transfer from data holders (writers) -/// to the system. It differs from the regular data interface in several key aspects: -/// -/// ## Batch Interface -/// - Purpose: Optimized for data holders to push complete datasets -/// - Key Feature: Supports streaming transfer during data writing process -/// - Use Case: Allows transfer before local sharding is complete -/// - Operation: Uses fixed-size block transfer with real-time processing -/// -/// ## Data Interface (For Comparison) -/// - Purpose: General-purpose data read/write operations -/// - Write Flow: Data is sharded and distributed across nodes -/// - Read Flow: Shards are collected from nodes and reassembled -/// - Operation: Requires complete data and consistency checks -/// -/// # Implementation Details -/// The batch interface implements this through: -/// - Efficient block-based streaming transfer -/// - Concurrent processing of received blocks -/// - Support for both memory and file-based transfers -/// - Real-time block validation and assembly -/// -/// For detailed implementation of the regular data interface, see the data.rs module. -use super::*; -use crate::general::network::proto; -use base64::Engine; -use crate::general::network::m_p2p::RPCResponsor; -use tokio::io::AsyncWriteExt; -use dashmap::DashMap; -use std::sync::atomic::{AtomicU64, Ordering}; -use std::time::Duration; -use tokio::sync::mpsc; -use tokio::task::JoinHandle; -use std::ops::Range; - -impl proto::DataItem { - pub fn size(&self) -> usize { - match &self.data_item_dispatch { - Some(proto::data_item::DataItemDispatch::RawBytes(bytes)) => bytes.len(), - Some(proto::data_item::DataItemDispatch::File(file_data)) => file_data.file_content.len(), - None => 0, - } - } -} - -/// 管理单个批量传输的状态 -pub(super) struct BatchTransfer { - pub unique_id: Vec, - pub version: u64, - pub block_type: proto::BatchDataBlockType, - pub total_blocks: u32, - // 使用 channel 进行数据传输 - data_sender: mpsc::Sender>, - // 写入任务 - write_task: JoinHandle>, - // 完成通知 channel - pub tx: Option>>, -} - -impl BatchTransfer { - pub async fn new( - unique_id: Vec, - version: u64, - block_type: proto::BatchDataBlockType, - total_blocks: u32, - block_size: usize, - tx: mpsc::Sender>, - ) -> WSResult { - // 创建数据传输 channel - let (data_sender, data_receiver) = mpsc::channel(total_blocks as usize); - - // 计算数据分片 - let splits = Self::calculate_splits(total_blocks as usize * block_size, block_size); - - // 为异步任务克隆 unique_id - let unique_id_for_task = unique_id.clone(); - - // 创建写入任务 - let write_task = tokio::spawn(async move { - let group = WriteSplitDataTaskGroup::new( - unique_id_for_task, - splits, - data_receiver, - block_type, - ).await?; - - group.join().await - }); - - Ok(Self { - unique_id, - version, - block_type, - total_blocks, - data_sender, - write_task, - tx: Some(tx), - }) - } - - pub async fn add_block(&self, index: u32, data: Vec) -> WSResult { - if index >= self.total_blocks { - return Ok(false); - } - - // 通过 channel 发送数据块 - self.data_sender.send(Ok(( - index as usize, - proto::DataItem::new_raw_bytes(data), - ))).await.map_err(|_| WsDataError::BatchTransferError { - unique_id: self.unique_id.clone(), - msg: "failed to send data block".to_string(), - })?; - - Ok(index == self.total_blocks - 1) - } - - pub async fn complete(mut self) -> WSResult<()> { - // 定义错误转换函数 - let join_error = |e| WsDataError::BatchTransferError { - unique_id: self.unique_id.clone(), - msg: format!("write task join failed: {}", e), - }; - - let write_error = |e| WsDataError::BatchTransferError { - unique_id: self.unique_id.clone(), - msg: format!("write data failed: {}", e), - }; - - let send_error = || WsDataError::BatchTransferError { - unique_id: self.unique_id.clone(), - msg: "send result failed".to_string(), - }; - - drop(self.data_sender); - - if let Some(tx) = self.tx.take() { - let join_result = self.write_task.await - .map_err(join_error)?; - - let data_item = join_result - .map_err(write_error)?; - - tx.send(Ok(data_item)).await - .map_err(|_| send_error())?; - } - Ok(()) - } - - // 辅助函数:计算数据分片 - fn calculate_splits(total_size: usize, block_size: usize) -> Vec> { - let mut splits = Vec::new(); - let mut offset = 0; - while offset < total_size { - let end = (offset + block_size).min(total_size); - splits.push(offset..end); - offset = end; - } - splits - } -} - -/// 管理所有进行中的批量传输 -pub(super) struct BatchManager { - transfers: DashMap, - sequence: AtomicU64, -} - -impl BatchManager { - pub fn new() -> Self { - Self { - transfers: DashMap::new(), - sequence: AtomicU64::new(0), - } - } - - pub fn next_sequence(&self) -> u64 { - self.sequence.fetch_add(1, Ordering::Relaxed) - } - - pub async fn create_transfer( - &self, - unique_id: Vec, - version: u64, - block_type: proto::BatchDataBlockType, - total_blocks: u32, - tx: mpsc::Sender>, - ) -> WSResult { - let request_id = proto::BatchRequestId { - node_id: 0, // TODO: Get from config - sequence: self.next_sequence(), - }; - - let transfer = BatchTransfer::new( - unique_id.clone(), - version, - block_type, - total_blocks, - 1024 * 1024, // 1MB block size - tx, - ).await?; - - self.transfers.insert(request_id.clone(), transfer); - Ok(request_id) - } - - pub async fn handle_block( - &self, - request_id: proto::BatchRequestId, - block_index: u32, - data: Vec, - ) -> WSResult { - if let Some(transfer) = self.transfers.get(&request_id) { - let is_complete = transfer.add_block(block_index, data).await?; - if is_complete { - // Remove and complete the transfer - if let Some((_, transfer)) = self.transfers.remove(&request_id) { - transfer.complete().await? - } - } - Ok(is_complete) - } else { - Err(WsDataError::BatchTransferNotFound { - node_id: request_id.node_id, - sequence: request_id.sequence, - } - .into()) - } - } -} - -impl DataGeneral { - /// 发起批量数据传输 - pub(super) async fn call_batch_data( - &self, - node_id: NodeID, - unique_id: Vec, - version: u64, - data: proto::DataItem, - block_type: proto::BatchDataBlockType, - ) -> WSResult { - // 将数据分割成块 - let block_size = 1024 * 1024; // 1MB per block - let data_bytes = match data { - proto::DataItem { data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes(bytes)) } => bytes, - proto::DataItem { data_item_dispatch: Some(proto::data_item::DataItemDispatch::File(file_data)) } => file_data.file_content, - _ => return Err(WsDataError::InvalidDataType.into()), - }; - - let total_blocks = (data_bytes.len() + block_size - 1) / block_size; - - // 创建channel用于接收响应 - let (tx, mut rx) = mpsc::channel(1); - - // 创建传输任务 - let request_id = self.batch_manager.create_transfer( - unique_id.clone(), - version, - block_type, - total_blocks as u32, - tx, - ).await?; - - // 发送数据块 - for (i, chunk) in data_bytes.chunks(block_size).enumerate() { - let request = proto::BatchDataRequest { - request_id: Some(request_id.clone()), - block_type: block_type as i32, - block_index: i as u32, - data: chunk.to_vec(), - operation: proto::DataOpeType::Write as i32, - unique_id: unique_id.clone(), - version, - }; - - let response = self - .rpc_call_batch_data - .call( - self.view.p2p(), - node_id, - request, - Some(Duration::from_secs(30)), - ) - .await?; - - if !response.success { - return Ok(response); - } - } - - // 等待所有块处理完成 - match rx.recv().await { - Some(Ok(_data_item)) => Ok(proto::BatchDataResponse { - request_id: Some(request_id), - success: true, - error_message: String::new(), - version, - }), - Some(Err(err)) => Ok(proto::BatchDataResponse { - request_id: Some(request_id), - success: false, - error_message: err.to_string(), - version, - }), - None => Ok(proto::BatchDataResponse { - request_id: Some(request_id), - success: false, - error_message: "transfer channel closed unexpectedly".to_string(), - version, - }), - } - } - - /// 处理批量数据请求 - - pub(super) async fn rpc_handle_batch_data( - &self, - responsor: RPCResponsor, - req: proto::BatchDataRequest, - ) -> WSResult<()> { - // Step 1: 获取数据元信息 - let meta = match self.view.get_metadata(&req.unique_id, false).await { - Ok(meta) => meta, - Err(err) => { - tracing::warn!("get data meta failed: {}", err); - responsor - .send_resp(proto::BatchDataResponse { - request_id: req.request_id, - success: false, - error_message: format!("get data meta failed: {}", err), - version: 0, - }) - .await?; - return Ok(()); - } - }; - - // Step 2: 复用 get_data 逻辑获取数据 - let get_arg = GetOrDelDataArg { - meta: Some(meta.clone()), - unique_id: req.unique_id.clone(), - ty: GetOrDelDataArgType::All, - }; - - let data_result = match self.get_or_del_data(get_arg).await { - Ok((_, data)) => data, - Err(err) => { - tracing::warn!("get data failed: {}", err); - responsor - .send_resp(proto::BatchDataResponse { - request_id: req.request_id, - success: false, - error_message: format!("get data failed: {}", err), - version: meta.version, - }) - .await?; - return Ok(()); - } - }; - - // Step 3: 创建数据分片并设置写入任务 - let mut splits = Vec::new(); - let mut offset = 0; - - for item in data_result.values() { - let size = item.size(); - splits.push(offset..offset + size); - offset += size; - } - - // 创建channel用于传输数据 - let (tx, rx) = mpsc::channel(splits.len()); - - // 发送数据到channel - for (idx, item) in data_result.into_iter() { - if let Err(err) = tx.send(Ok((idx as usize, item))).await { - tracing::error!("send data to channel failed: {}", err); - responsor - .send_resp(proto::BatchDataResponse { - request_id: req.request_id, - success: false, - error_message: format!("internal error: {}", err), - version: meta.version, - }) - .await?; - return Ok(()); - } - } - drop(tx); // 关闭发送端 - - // Step 4: 根据请求类型选择写入方式并执行 - let task_group = match WriteSplitDataTaskGroup::new( - req.unique_id, - splits, - rx, - proto::BatchDataBlockType::from_i32(req.block_type).unwrap_or(proto::BatchDataBlockType::Memory), - ) - .await - { - Ok(group) => group, - Err(err) => { - tracing::warn!("create write task group failed: {}", err); - responsor - .send_resp(proto::BatchDataResponse { - request_id: req.request_id, - success: false, - error_message: format!("create write task group failed: {}", err), - version: meta.version, - }) - .await?; - return Ok(()); - } - }; - - // Step 5: 等待所有写入任务完成 - match task_group.join().await { - Ok(_) => { - responsor - .send_resp(proto::BatchDataResponse { - request_id: req.request_id, - success: true, - error_message: String::new(), - version: meta.version, - }) - .await?; - Ok(()) - } - Err(err) => { - tracing::warn!("write data failed: {}", err); - responsor - .send_resp(proto::BatchDataResponse { - request_id: req.request_id, - success: false, - error_message: format!("write data failed: {}", err), - version: meta.version, - }) - .await?; - Ok(()) - } - } - } -} diff --git a/src/main/src/general/data/m_data_general/data.rs b/src/main/src/general/data/m_data_general/data.rs deleted file mode 100644 index a27fce7..0000000 --- a/src/main/src/general/data/m_data_general/data.rs +++ /dev/null @@ -1,37 +0,0 @@ -/// Data Interface for Distributed Storage -/// -/// # Design Overview -/// The data interface provides a general-purpose solution for distributed data storage -/// and retrieval. It implements a shard-based approach that differs from the batch -/// interface in its core design: -/// -/// ## Data Interface -/// - Purpose: General-purpose data read/write operations -/// - Write Process: -/// * Data is sharded according to distribution strategy -/// * Shards are distributed to different nodes -/// * Each node stores its assigned shards -/// * Metadata is updated after all writes complete -/// - Read Process: -/// * Metadata is retrieved to locate shards -/// * Shards are collected from respective nodes -/// * Complete data is reassembled from shards -/// -/// ## Comparison with Batch Interface -/// While the batch interface (see batch.rs) focuses on efficient streaming transfer -/// from data holders, the data interface: -/// - Ensures data consistency across nodes -/// - Provides random access to data -/// - Supports complex distribution strategies -/// - Maintains complete metadata for all operations -/// -/// # Implementation Details -/// This interface implements: -/// - Distributed shard management -/// - Concurrent read/write operations -/// - Metadata synchronization -/// - Data consistency verification -/// -/// For streaming transfer functionality, see the batch.rs module. -use super::*; -// ... existing code ... \ No newline at end of file diff --git a/src/main/src/general/data/m_data_general/dataitem.rs b/src/main/src/general/data/m_data_general/dataitem.rs index b755ab0..27ef392 100644 --- a/src/main/src/general/data/m_data_general/dataitem.rs +++ b/src/main/src/general/data/m_data_general/dataitem.rs @@ -150,15 +150,15 @@ impl WriteSplitDataTaskGroup { unique_id: Vec, splits: Vec>, mut rx: tokio::sync::mpsc::Receiver>, - block_type: proto::BatchDataBlockType, + cachemode: CacheModeVisitor, ) -> WSResult { tracing::debug!( - "new merge task group for uid({:?}), block_type({:?})", + "new merge task group for uid({:?}), cachemode({})", unique_id, - block_type + cachemode.0 ); - if block_type == proto::BatchDataBlockType::File { - tracing::debug!("block_type is file"); + if cachemode.is_map_file() { + tracing::debug!("cachemode is map_file"); // base64 // let file_path = PathBuf::from(format!("{:?}.data", unique_id)); let file_path = PathBuf::from(format!( @@ -220,8 +220,8 @@ impl WriteSplitDataTaskGroup { } } Ok(Self::ToFile { file_path, tasks }) - } else if block_type == proto::BatchDataBlockType::Memory { - tracing::debug!("block_type is memory"); + } else if cachemode.is_map_common_kv() { + tracing::debug!("cachemode is map_common_kv"); let (shared_mem, owned_accesses) = new_shared_mem(&splits); let mut owned_accesses = owned_accesses .into_iter() @@ -265,7 +265,7 @@ impl WriteSplitDataTaskGroup { } Ok(Self::ToMem { shared_mem, tasks }) } else { - panic!("block_type should be file or memory"); + panic!("cachemode should be map_file or map_mem"); } } diff --git a/src/main/src/general/data/m_data_general/mod.rs b/src/main/src/general/data/m_data_general/mod.rs index c231195..a34fc75 100644 --- a/src/main/src/general/data/m_data_general/mod.rs +++ b/src/main/src/general/data/m_data_general/mod.rs @@ -1,7 +1,4 @@ mod dataitem; -mod batch; - -use crate::general::data::m_data_general::batch::BatchManager; use crate::general::data::m_data_general::dataitem::WantIdxIter; use crate::general::data::m_data_general::dataitem::WriteSplitDataTaskGroup; @@ -93,15 +90,14 @@ pub fn new_data_unique_id_fn_kv(key: &[u8]) -> Vec { #[derive(LogicalModule)] pub struct DataGeneral { view: DataGeneralView, - batch_manager: Arc, pub rpc_call_data_version_schedule: RPCCaller, rpc_call_write_once_data: RPCCaller, - rpc_call_batch_data: RPCCaller, + rpc_call_batch_data: RPCCaller, rpc_call_get_data_meta: RPCCaller, rpc_call_get_data: RPCCaller, rpc_handler_write_once_data: RPCHandler, - rpc_handler_batch_data: RPCHandler, + rpc_handler_batch_data: RPCHandler, rpc_handler_data_meta_update: RPCHandler, rpc_handler_get_data_meta: RPCHandler, rpc_handler_get_data: RPCHandler, @@ -111,7 +107,10 @@ pub struct DataGeneral { } impl DataGeneral { - // next_batch_id 方法已被移除,因为在当前代码中未被引用。如果将来需要,可重新实现该功能。 + fn next_batch_id(&self) -> u32 { + static NEXT_BATCH_ID: AtomicU32 = AtomicU32::new(1); // 从1开始,保留0作为特殊值 + NEXT_BATCH_ID.fetch_add(1, Ordering::Relaxed) + } async fn write_data_batch( &self, @@ -129,17 +128,14 @@ impl DataGeneral { let view = self.view.clone(); // Initialize batch transfer - let init_req = proto::BatchDataRequest { + let init_req = proto::sche::BatchDataRequest { unique_id: unique_id.to_vec(), version, - request_id: Some(proto::BatchRequestId { - node_id: 0, - sequence: 0, - }), // 使用 0 作为初始化标记 - block_type: proto::BatchDataBlockType::Memory as i32, - block_index: data_item_idx as u32, - operation: proto::DataOpeType::Write as i32, - data: vec![] + batch_id: 0, // 使用 0 作为初始化标记 + total_batches: total_batches as u32, + data: vec![], + data_item_idx: data_item_idx as u32, + is_complete: false, }; let init_resp = self @@ -156,27 +152,28 @@ impl DataGeneral { return Err(WsDataError::BatchTransferFailed { node: node_id, batch: 0, - reason: init_resp.error_message, + reason: init_resp.error, } .into()); } - let request_id = init_resp.request_id; + let batch_id = init_resp.batch_id; // Send data in batches for batch_idx in 0..total_batches { let start = batch_idx * batch_size; let end = (start + batch_size).min(total_size); + let is_last = batch_idx == total_batches - 1; let batch_data = data.clone_split_range(start..end); - let batch_req = proto::BatchDataRequest { - unique_id: unique_id.to_vec(), - version, - request_id: request_id.clone(), - block_type: proto::BatchDataBlockType::Memory as i32, + let batch_req = proto::sche::BatchDataRequest { + unique_id: unique_id.to_vec(), + version, + batch_id, + total_batches: total_batches as u32, data: batch_data.encode_persist(), - block_index: data_item_idx as u32, - operation: proto::DataOpeType::Write as i32, + data_item_idx: data_item_idx as u32, + is_complete: is_last, }; let batch_resp = self @@ -193,7 +190,7 @@ impl DataGeneral { return Err(WsDataError::BatchTransferFailed { node: node_id, batch: batch_idx as u32, - reason: batch_resp.error_message, + reason: batch_resp.error, } .into()); } @@ -840,15 +837,15 @@ impl DataGeneral { responsor: RPCResponsor, ) -> WSResult<()> { tracing::debug!("rpc_handle_get_data_meta with req({:?})", req); - let meta = self.view.get_metadata(&req.unique_id, req.delete).await?; - tracing::debug!("rpc_handle_get_data_meta data meta found"); - - let serialized_meta = bincode::serialize(&meta).map_err(|err| { - WsSerialErr::BincodeErr { - err, - context: "rpc_handle_get_data_meta".to_owned(), - } - })?; + let meta = self.view.get_data_meta(&req.unique_id, req.delete)?; + if meta.is_none() { + tracing::debug!("rpc_handle_get_data_meta data meta not found"); + } else { + tracing::debug!("rpc_handle_get_data_meta data meta found"); + } + let serialized_meta = meta.map_or(vec![], |(_kvversion, meta)| { + bincode::serialize(&meta).unwrap() + }); responsor .send_resp(proto::DataMetaGetResponse { serialized_meta }) @@ -866,10 +863,9 @@ impl DataGeneral { let kv_store_engine = self.view.kv_store_engine(); let _ = self.view - .get_metadata(&req.unique_id, req.delete) - .await + .get_data_meta(&req.unique_id, req.delete) .map_err(|err| { - tracing::warn!("rpc_handle_get_one_data get_metadata failed: {:?}", err); + tracing::warn!("rpc_handle_get_one_data get_data_meta failed: {:?}", err); err })?; @@ -980,7 +976,7 @@ pub type CacheMode = u16; /// attention: new from `DataSetMetaBuilder` /// /// https://fvd360f8oos.feishu.cn/docx/XoFudWhAgox84MxKC3ccP1TcnUh#share-Tqqkdxubpokwi5xREincb1sFnLc -#[derive(Serialize, Deserialize, Debug,Clone)] +#[derive(Serialize, Deserialize, Debug)] pub struct DataSetMetaV2 { // unique_id: Vec, api_version: u8, @@ -1369,20 +1365,6 @@ impl DataGeneralView { }; Ok(meta_opt) } - - pub async fn get_metadata( - &self, - unique_id: &[u8], - delete: bool, - ) -> WSResult { - // 先尝试从本地获取 - if let Some((_version, meta)) = self.get_data_meta(unique_id, delete)? { - return Ok(meta); - } - - // 本地不存在,从 master 获取 - self.data_general().get_or_del_datameta_from_master(unique_id, delete).await - } } impl From for WSError { @@ -1399,7 +1381,6 @@ impl LogicalModule for DataGeneral { { Self { view: DataGeneralView::new(args.logical_modules_ref.clone()), - batch_manager: Arc::new(BatchManager::new()), rpc_call_data_version_schedule: RPCCaller::new(), rpc_call_write_once_data: RPCCaller::new(), rpc_call_batch_data: RPCCaller::new(), @@ -1431,62 +1412,62 @@ impl LogicalModule for DataGeneral { // register rpc handlers { - let view = self.view.clone(); + let this = self.clone(); self.rpc_handler_write_once_data .regist(p2p, move |responsor, req| { - let view = view.clone(); + let this = this.clone(); let _ = tokio::spawn(async move { - view.data_general().rpc_handle_write_one_data(responsor, req).await; + this.rpc_handle_write_one_data(responsor, req).await; }); Ok(()) }); - let view = self.view.clone(); + let this = self.clone(); self.rpc_handler_batch_data.regist( p2p, - move |responsor: RPCResponsor, - req: proto::BatchDataRequest| { - let view = view.clone(); + move |responsor: RPCResponsor, + req: proto::sche::BatchDataRequest| { + let this = this.clone(); let _ = tokio::spawn(async move { - view.data_general().rpc_handle_batch_data(responsor, req).await; + this.rpc_handle_batch_data(responsor, req).await; }); Ok(()) }, ); - let view = self.view.clone(); + let this = self.clone(); self.rpc_handler_data_meta_update.regist( p2p, move |responsor: RPCResponsor, req: proto::DataMetaUpdateRequest| { - let view = view.clone(); + let this = this.clone(); let _ = tokio::spawn(async move { - view.data_general().rpc_handle_data_meta_update(responsor, req).await + this.rpc_handle_data_meta_update(responsor, req).await }); Ok(()) }, ); - let view = self.view.clone(); + let this = self.clone(); self.rpc_handler_get_data_meta .regist(p2p, move |responsor, req| { - let view = view.clone(); + let this = this.clone(); let _ = tokio::spawn(async move { - view.data_general().rpc_handle_get_data_meta(req, responsor) + this.rpc_handle_get_data_meta(req, responsor) .await .todo_handle(); }); Ok(()) }); - let view = self.view.clone(); + let this = self.clone(); self.rpc_handler_get_data.regist( p2p, move |responsor: RPCResponsor, req: proto::GetOneDataRequest| { - let view = view.clone(); + let this = this.clone(); let _ = tokio::spawn(async move { - view.data_general().rpc_handle_get_one_data(responsor, req).await + this.rpc_handle_get_one_data(responsor, req).await }); Ok(()) }, diff --git a/src/main/src/general/network/msg_pack.rs b/src/main/src/general/network/msg_pack.rs index 90c4f82..30bf6d7 100644 --- a/src/main/src/general/network/msg_pack.rs +++ b/src/main/src/general/network/msg_pack.rs @@ -133,18 +133,8 @@ define_msg_ids!( } }), (proto::kv::KvLockResponse, _pack, { true }), - (proto::BatchDataRequest, _pack, { - // 验证关键字段非空 - // 1. request_id 必须存在,用于请求追踪 - // 2. unique_id 必须存在,标识数据集 - // 3. data 必须存在,实际数据内容 - let req = _pack; - match (req.request_id.is_some(), req.unique_id.is_empty(), req.data.is_empty()) { - (true, false, false) => true, - _ => false - } - }), - (proto::BatchDataResponse, _pack, { true }) + (proto::sche::BatchDataRequest, _pack, { true }), + (proto::sche::BatchDataResponse, _pack, { true }) ); pub trait RPCReq: MsgPack + Default { @@ -199,8 +189,8 @@ impl RPCReq for proto::kv::KvLockRequest { type Resp = proto::kv::KvLockResponse; } -impl RPCReq for proto::BatchDataRequest { - type Resp = proto::BatchDataResponse; +impl RPCReq for proto::sche::BatchDataRequest { + type Resp = proto::sche::BatchDataResponse; } // impl RPCReq for proto::kv::KvLockWaitAcquireNotifyRequest { diff --git a/src/main/src/general/network/proto_src/data.proto b/src/main/src/general/network/proto_src/data.proto index 7984fcf..cb290b2 100644 --- a/src/main/src/general/network/proto_src/data.proto +++ b/src/main/src/general/network/proto_src/data.proto @@ -169,31 +169,4 @@ message GetOneDataResponse{ bool success=1; repeated DataItem data =2; string message=3; -} - -enum BatchDataBlockType { - MEMORY = 0; // 内存数据块 - FILE = 1; // 文件数据块 -} - -message BatchRequestId { - uint32 node_id = 1; // 节点ID - uint64 sequence = 2; // 原子自增序列号 -} - -message BatchDataRequest { - BatchRequestId request_id = 1; // 请求唯一标识(节点ID + 序列号) - BatchDataBlockType block_type = 2; // 数据块类型(文件/内存) - uint32 block_index = 3; // 数据块索引 - bytes data = 4; // 数据块内容 - DataOpeType operation = 5; // 操作类型 - bytes unique_id = 6; // 数据唯一标识 - uint64 version = 7; // 数据版本 -} - -message BatchDataResponse { - BatchRequestId request_id = 1; // 对应请求ID - bool success = 2; // 处理状态 - string error_message = 3; // 错误信息 - uint64 version = 4; // 处理后的版本 } \ No newline at end of file diff --git a/src/main/src/general/network/proto_src/sche.proto b/src/main/src/general/network/proto_src/sche.proto index fdec748..a3cba7d 100644 --- a/src/main/src/general/network/proto_src/sche.proto +++ b/src/main/src/general/network/proto_src/sche.proto @@ -47,5 +47,19 @@ message DistributeTaskResp { string err_msg = 2; } +message BatchDataRequest { + bytes unique_id = 1; + uint64 version = 2; + uint32 batch_id = 3; // 当前批次ID + uint32 total_batches = 4; // 总批次数 + bytes data = 5; // 当前批次的数据 + uint32 data_item_idx = 6; // 数据项索引 + bool is_complete = 7; // 是否是最后一个批次 +} +message BatchDataResponse { + bool success = 1; + string error = 2; + uint32 batch_id = 3; +} diff --git a/src/main/src/result.rs b/src/main/src/result.rs index 50186e4..11f2785 100644 --- a/src/main/src/result.rs +++ b/src/main/src/result.rs @@ -178,7 +178,6 @@ pub enum WsFuncError { #[derive(Debug)] pub enum WsDataError { - InvalidDataType, DataSetNotFound { uniqueid: Vec, }, @@ -261,21 +260,6 @@ pub enum WsDataError { batch: u32, reason: String, }, - - BatchTransferNotFound { - node_id: u32, - sequence: u64, - }, - - BatchBlockMissing { - unique_id: Vec, - block_index: u32, - }, - - BatchTransferError { - unique_id: Vec, - msg: String, - }, } #[derive(Error, Debug)] From 2b560e23305f4f525deae469c23e954d06d2c5d5 Mon Sep 17 00:00:00 2001 From: pa <1020401660@qq.com> Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 19/26] Revert "feat: fixed partial" This reverts commit f8f4f42227ef52eaa8b1339b06d05ed1bd3fc9e2. --- .cursorrules | 328 +++--------- claude_notes.md | 203 ++++++++ .../src/general/data/m_data_general/mod.rs | 486 +----------------- 3 files changed, 283 insertions(+), 734 deletions(-) create mode 100644 claude_notes.md diff --git a/.cursorrules b/.cursorrules index 3f57139..81bc931 100644 --- a/.cursorrules +++ b/.cursorrules @@ -1,262 +1,66 @@ -# Waverless 项目关键设计笔记 - -## 1. 函数执行上下文设计 - -### 1.1 基础结构 -- `FnExeCtx`: 私有的基础结构体,包含函数执行的基本信息 - ```rust - struct FnExeCtx { - pub app: String, - pub app_type: AppType, - pub func: String, - pub func_meta: FnMeta, - pub req_id: ReqId, - pub event_ctx: EventCtx, - pub res: Option, - pub sub_waiters: Vec>, - _dummy_private: (), - } - ``` - -### 1.2 公开特化类型 -- `FnExeCtxAsync` 和 `FnExeCtxSync`: - - 异步执行上下文支持 Jar、Wasm、Native 类型,包含子任务支持和完整的性能监控和日志。 - - 同步执行上下文仅支持 Native 类型,不支持子任务,包含基本的性能监控和日志。 - -### 1.3 类型安全 -- `FnExeCtxAsyncAllowedType` 和 `FnExeCtxSyncAllowedType`: - - 异步允许的类型 (Jar, Wasm, Native) - - 同步允许的类型 (仅 Native) - - 通过 `TryFrom` 在编译时强制类型安全 - -## 2. 实例管理设计 - -### 2.1 实例类型与管理器 -- `Instance` 和 `InstanceManager`: - - `Instance` 包含 Owned、Shared 和 Native 类型。 - - `InstanceManager` 管理应用实例和运行时函数上下文。 - ```rust - pub enum Instance { - Owned(OwnedInstance), - Shared(SharedInstance), - Native(NativeAppInstance), - } - - pub struct InstanceManager { - pub app_instances: SkipMap, - pub instance_running_function: DashMap, - } - ``` - -### 2.2 运行时函数上下文 -- `UnsafeFunctionCtx`: - - 包含 Sync 和 Async 类型,分别对应 `FnExeCtxSync` 和 `FnExeCtxAsync`。 - -## 3. 关键修改记录 - -### 3.1 同步/异步执行流程优化与错误处理增强 -- 简化 `finish_using`,移除不必要的异步版本,统一使用同步实现。 -- 添加同步版本的 `load_instance_sync`,仅支持 Native 类型。 -- 优化 `execute_sync` 中的异步调用处理,统一性能监控和日志记录格式。 -- 添加 `UnsupportedAppType` 错误类型,完善同步执行时的类型检查。 - -## 4. 待办事项 -- [x] 考虑添加同步版本的 `load_instance` -- [ ] 优化 `execute_sync` 中的异步-同步转换 -- [ ] 完善错误处理和日志记录 - -## 5. 核心设计原则 - -### 5.1 基础原则与 View 模式设计规则 -- 同步/异步分离,类型安全,性能监控,资源管理。 -- View 生成: - - View 结构体和 `LogicalModule` trait 的实现由宏生成。 - - 只需实现 `inner_new` 函数,使用 `logical_module_view_impl!` 生成访问函数。 - - 每个需要访问的模块都需要单独的 impl 宏调用。 - -### 5.2 去掉 #[derive(LogicalModule)] 的原因和注意事项 -- 实现特定功能:根据需求在 `DataGeneralView` 中实现特定功能,检查冲突。 -- `inner` 字段的管理:由宏管理,不能直接操作,通过宏生成的接口使用。 -- 错误分析:去掉派生后,仔细分析和解决可能出现的错误。 - -## 6. msg_pack 消息封装 - -### 6.1 基本原则与实现示例 -- 使用 `msg_pack.rs` 中的宏实现 trait,使用 `define_msg_ids!` 管理消息类型。 -- 通过 `RPCReq` trait 定义请求-响应关系。 - ```rust - define_msg_ids!( - (proto::sche::BatchDataRequest, pack, { true }), - (proto::sche::BatchDataResponse, _pack, { true }) - ); - - impl RPCReq for proto::sche::BatchDataRequest { - type Resp = proto::sche::BatchDataResponse; - } - ``` - -### 6.2 最佳实践 -- 新增消息类型时:在 `define_msg_ids!` 中添加定义,实现 `RPCReq` trait。 -- 使用消息时:使用 `RPCCaller` 和 `RPCHandler`,遵循统一的错误处理。 - -## 7. Waverless 代码规范核心规则 - -### 7.0 最高优先级规则 -- 在没有经过明确允许的情况下,不要擅自开始操作 -- 必须等待用户明确指示后再进行修改 -- 在进行任何修改前,先提出修改方案并等待确认 -- 有明确指令的情况下,不要擅自做其他操作 -- 删除代码时必须说明: - - 被删除代码的原有功能和作用 - - 删除的具体原因 - - 删除可能带来的影响 -- 修改代码时必须: - - 先提出完整的修改方案 - - 说明每处修改的原因和影响 - - 等待用户确认后再执行 - - 严格按照确认的方案执行,不额外修改 - - 如需额外修改,必须重新提出方案并确认 -- 修改规则文件时必须: - - 确认文件名必须是 `.cursorrules` - - 确认文件以 "# Waverless 项目关键设计笔记" 开头 - - 确认包含完整的设计笔记结构 - - 确认包含所有规则章节(1-7) - - 修改前使用搜索工具确认是正确的规则文件 - - 修改前检查文件的完整内容 - - 修改前确认修改的具体位置 - - 只修改规则相关部分 - - 保持其他内容不变 - - 保持文档结构完整 - -### 7.1 文档维护与代码组织原则 -- 文档压缩原则:保持无损压缩,合并重复内容,简化表述,重构文档结构。 -- 文档更新规则:确认信息完整性,保留技术细节,使用清晰结构展示信息。 -- 代码组织规则:宏生成的访问函数直接使用,非 pub 函数只在一个地方定义,View 负责核心实现,具体模块负责自己的功能,通过 View 访问其他模块。 - -### 7.2 代码修改原则 -- 不随意删除或修改已有的正确实现 -- 不在多处实现同一功能 -- 保持代码结构清晰简单 -- 修改前先理解设计原则 - -#### 异步任务处理原则 -- 分析生命周期和所有权需求 -- 避免盲目克隆,只克隆必要数据 -- 考虑类型特征(如 P2PModule 的轻量级 Clone) -- 评估替代方案 - -```rust -// 反例:过度克隆 -let p2p = self.p2p().clone(); // 不必要,P2PModule 本身就是轻量级的 -let data_general = self.data_general().clone(); // 不必要,同上 - -// 正例:按需克隆 -let split_info = split.clone(); // 必要,因为来自临时变量的引用 -``` - -分析要点: -- 使用场景:确认异步任务中的实际需求 -- 类型特征:检查是否已实现轻量级 Clone -- 生命周期:特别关注临时变量引用 -- 替代方案:考虑其他实现方式 - -### 7.3 错误与正确示例 -- 错误示例:手动实现已有的宏生成函数,在两个地方都实现同一个函数,过度修改已有代码结构,有损压缩文档内容。 -- 正确示例:使用宏生成的访问函数,在合适的位置添加新功能,遵循已有的代码组织方式,保持文档的完整性和准确性。 - -### 7.4 异步任务变量处理规范 - -#### 1. 变量分析原则 -- 生命周期分析:确定变量在异步任务中的生存期 -- 所有权需求:判断是否需要克隆或移动所有权 -- 类型特征:考虑变量的类型特性(如 Clone、Send、'static 等) -- 数据共享:评估是否需要在多个任务间共享数据 - -#### 2. 克隆策略 -必须克隆的情况: -- 临时变量引用:`split_info.clone()`(来自迭代器) -- 多任务共享:`unique_id.clone()`(多个任务需要) -- 部分数据:`data_item.clone_split_range()`(只克隆需要的范围) - -不需要克隆的情况: -- 值类型复制:`version`(直接复制即可) -- 已实现 Copy:基本数据类型 -- 单一任务使用:不需要在多个任务间共享的数据 - -#### 3. View 模式使用规范 -基本原则: -- View 本身已经是完整引用:不需要额外的 view 字段 -- 异步任务中使用:`self.clone()` -- 模块访问:通过 view 直接访问其他模块 - -示例代码: -```rust -// 正确示例 -let view = self.clone(); // View 本身克隆 -let resp = view.data_general().rpc_call_write_once_data... - -// 错误示例 -let view = self.view.clone(); // 错误:不需要额外的 view 字段 -let data_general = self.data_general().clone(); // 错误:不需要单独克隆模块 -``` - -#### 4. 异步任务数据处理检查清单 -- [ ] 是否只克隆必要的数据? -- [ ] 临时变量是否正确处理? -- [ ] View 的使用是否符合规范? -- [ ] 是否避免了重复克隆? -- [ ] 数据共享策略是否合理? - -#### 5. 常见场景示例 - -1. 批量数据处理: -```rust -// 正确处理临时变量和部分数据 -let split_info = split_info.clone(); // 临时变量必须克隆 -let data_item = data_item.clone_split_range(range); // 只克隆需要的部分 -let view = self.clone(); // View 克隆用于异步任务 -``` - -2. 并发任务处理: -```rust -// 使用信号量和数据共享 -let semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT)); -let view = self.clone(); // 一次克隆,多处使用 -for node_id in nodes { - let permit = semaphore.clone(); - let view = view.clone(); // View 在任务间共享 - tokio::spawn(async move { ... }); -} -``` - -### 7.3 变量类型难分辨的情况 - -#### 7.3.1 Proto生成的Rust类型 -1. proto中的普通字段在Rust中的表现: - - proto中的 `string file_name_opt = 1` 生成的是普通 `String` 类型,而不是 `Option` - - proto中的 `bool is_dir_opt = 2` 生成的是普通 `bool` 类型,而不是 `Option` - - 字段名带 `_opt` 后缀不代表它在Rust中是 `Option` 类型 - -2. proto中的message嵌套在Rust中的表现: - - `DataItem` 中的 `oneof data_item_dispatch` 在Rust中是一个字段 - - 访问路径是: `data.data_item_dispatch` 而不是 `data.data.data_item_dispatch` - - `Option` 需要先 `unwrap()` 才能访问其内部字段 - -#### 7.3.2 容易混淆的类型转换 -1. proto生成的类型和标准库类型的关系: - - proto生成的 `String` 字段不能直接用 `unwrap_or_default()` - - proto生成的 `bool` 字段不能直接用 `unwrap_or()` - -### 7.5 思维方式原则 -- 思维优先于行动: - - 在开始任何操作前,先理解"为什么"而不是"怎么做" - - 确保完全理解当前上下文中的所有信息 - - 避免机械性思维和跳过思考的行为模式 -- 内化规则: - - 把规则视为思维框架而不是外部约束 - - 养成先检查当前上下文的习惯 - - 避免在已有信息的情况下去外部搜索 -- 关注本质: - - 理解问题的根本原因比立即解决问题更重要 - - 分析失误的思维模式而不是简单记住正确操作 - - 把经验转化为思维方式而不是操作步骤 +# Waverless Project Rules + +## Code Style +- 使用 Rust 2021 edition +- 遵循标准 Rust 命名约定 +- 使用 4 空格缩进 +- 对于有意未使用的变量,使用下划线前缀(如 _pack) +- 如果变量可能会被使用但目前未使用,保留原名 +- 如果确定不需要验证的参数,直接使用下划线前缀 + +## RPC Message Handling +- 对于 RPC 消息的 Option 字段: + - 如果是必需字段(不能缺省),需要在 verify 中进行 is_some() 校验 + - 如果是可选字段,使用 _pack 忽略变量,直接返回 true +- 消息验证规则: + - 对于包含关键业务数据的请求(如 WriteOneDataRequest),必须验证所有必需字段 + - 对于简单的状态同步或查询请求(如 BatchDataRequest),可以跳过验证 +- 验证函数实现: + - 需要验证时,使用 pack 并编写完整的验证逻辑 + - 不需要验证时,使用 _pack 并直接返回 true + +## Error Handling +- 使用 WSResult 作为错误返回类型 +- 顶层错误类型应该实现 std::error::Error trait +- 子错误类型只需要实现 Debug trait +- 使用 ? 运算符进行错误传播 + +## Type System +- 为复杂的数据结构实现必要的 traits (Clone, Debug 等) +- 使用强类型,避免类型转换 +- 为枚举类型实现必要的派生宏 + +## Development Process +- 每解决一个问题后立即进行编译检查 +- 确保修改不会引入新的编译错误 +- 按优先级逐个解决编译错误 + +## Documentation +- 为公共 API 提供文档注释 +- 使用中文注释说明复杂的业务逻辑 +- 包含示例代码说明用法 + +## Performance +- 避免不必要的克隆操作 +- 使用异步操作处理 I/O +- 合理使用并发和并行 + +## Testing +- 为公共 API 编写单元测试 +- 使用 cargo test 运行测试 +- 包含集成测试 + +## Dependencies +- 明确指定依赖版本 +- 最小化依赖数量 +- 及时更新依赖版本 + +## Security +- 不在代码中硬编码敏感信息 +- 使用安全的加密算法 +- 正确处理用户输入 + +## Logging +- 使用 tracing 进行日志记录 +- 包含适当的日志级别 +- 记录关键操作和错误信息 diff --git a/claude_notes.md b/claude_notes.md new file mode 100644 index 0000000..a55c7f5 --- /dev/null +++ b/claude_notes.md @@ -0,0 +1,203 @@ +# Waverless 项目关键设计笔记 + +## 1. 函数执行上下文设计 + +### 1.1 基础结构 +- `FnExeCtx`: 私有的基础结构体,包含函数执行的基本信息 + ```rust + struct FnExeCtx { + pub app: String, + pub app_type: AppType, + pub func: String, + pub func_meta: FnMeta, + pub req_id: ReqId, + pub event_ctx: EventCtx, + pub res: Option, + pub sub_waiters: Vec>, + _dummy_private: (), + } + ``` + +### 1.2 公开特化类型 +- `FnExeCtxAsync` 和 `FnExeCtxSync`: + - 异步执行上下文支持 Jar、Wasm、Native 类型,包含子任务支持和完整的性能监控和日志。 + - 同步执行上下文仅支持 Native 类型,不支持子任务,包含基本的性能监控和日志。 + +### 1.3 类型安全 +- `FnExeCtxAsyncAllowedType` 和 `FnExeCtxSyncAllowedType`: + - 异步允许的类型 (Jar, Wasm, Native) + - 同步允许的类型 (仅 Native) + - 通过 `TryFrom` 在编译时强制类型安全 + +## 2. 实例管理设计 + +### 2.1 实例类型与管理器 +- `Instance` 和 `InstanceManager`: + - `Instance` 包含 Owned、Shared 和 Native 类型。 + - `InstanceManager` 管理应用实例和运行时函数上下文。 + ```rust + pub enum Instance { + Owned(OwnedInstance), + Shared(SharedInstance), + Native(NativeAppInstance), + } + + pub struct InstanceManager { + pub app_instances: SkipMap, + pub instance_running_function: DashMap, + } + ``` + +### 2.2 运行时函数上下文 +- `UnsafeFunctionCtx`: + - 包含 Sync 和 Async 类型,分别对应 `FnExeCtxSync` 和 `FnExeCtxAsync`。 + +## 3. 关键修改记录 + +### 3.1 同步/异步执行流程优化与错误处理增强 +- 简化 `finish_using`,移除不必要的异步版本,统一使用同步实现。 +- 添加同步版本的 `load_instance_sync`,仅支持 Native 类型。 +- 优化 `execute_sync` 中的异步调用处理,统一性能监控和日志记录格式。 +- 添加 `UnsupportedAppType` 错误类型,完善同步执行时的类型检查。 + +## 4. 待办事项 +- [x] 考虑添加同步版本的 `load_instance` +- [ ] 优化 `execute_sync` 中的异步-同步转换 +- [ ] 完善错误处理和日志记录 + +## 5. 核心设计原则 + +### 5.1 基础原则与 View 模式设计规则 +- 同步/异步分离,类型安全,性能监控,资源管理。 +- View 生成: + - View 结构体和 `LogicalModule` trait 的实现由宏生成。 + - 只需实现 `inner_new` 函数,使用 `logical_module_view_impl!` 生成访问函数。 + - 每个需要访问的模块都需要单独的 impl 宏调用。 + +### 5.2 去掉 #[derive(LogicalModule)] 的原因和注意事项 +- 实现特定功能:根据需求在 `DataGeneralView` 中实现特定功能,检查冲突。 +- `inner` 字段的管理:由宏管理,不能直接操作,通过宏生成的接口使用。 +- 错误分析:去掉派生后,仔细分析和解决可能出现的错误。 + +## 6. msg_pack 消息封装 + +### 6.1 基本原则与实现示例 +- 使用 `msg_pack.rs` 中的宏实现 trait,使用 `define_msg_ids!` 管理消息类型。 +- 通过 `RPCReq` trait 定义请求-响应关系。 + ```rust + define_msg_ids!( + (proto::sche::BatchDataRequest, pack, { true }), + (proto::sche::BatchDataResponse, _pack, { true }) + ); + + impl RPCReq for proto::sche::BatchDataRequest { + type Resp = proto::sche::BatchDataResponse; + } + ``` + +### 6.2 最佳实践 +- 新增消息类型时:在 `define_msg_ids!` 中添加定义,实现 `RPCReq` trait。 +- 使用消息时:使用 `RPCCaller` 和 `RPCHandler`,遵循统一的错误处理。 + +## 7. Waverless 代码规范核心规则 + +### 7.1 文档维护与代码组织原则 +- 文档压缩原则:保持无损压缩,合并重复内容,简化表述,重构文档结构。 +- 文档更新规则:确认信息完整性,保留技术细节,使用清晰结构展示信息。 +- 代码组织规则:宏生成的访问函数直接使用,非 pub 函数只在一个地方定义,View 负责核心实现,具体模块负责自己的功能,通过 View 访问其他模块。 + +### 7.2 代码修改原则 +- 不随意删除或修改已有的正确实现 +- 不在多处实现同一功能 +- 保持代码结构清晰简单 +- 修改前先理解设计原则 + +#### 异步任务处理原则 +- 分析生命周期和所有权需求 +- 避免盲目克隆,只克隆必要数据 +- 考虑类型特征(如 P2PModule 的轻量级 Clone) +- 评估替代方案 + +```rust +// 反例:过度克隆 +let p2p = self.p2p().clone(); // 不必要,P2PModule 本身就是轻量级的 +let data_general = self.data_general().clone(); // 不必要,同上 + +// 正例:按需克隆 +let split_info = split.clone(); // 必要,因为来自临时变量的引用 +``` + +分析要点: +- 使用场景:确认异步任务中的实际需求 +- 类型特征:检查是否已实现轻量级 Clone +- 生命周期:特别关注临时变量引用 +- 替代方案:考虑其他实现方式 + +### 7.3 错误与正确示例 +- 错误示例:手动实现已有的宏生成函数,在两个地方都实现同一个函数,过度修改已有代码结构,有损压缩文档内容。 +- 正确示例:使用宏生成的访问函数,在合适的位置添加新功能,遵循已有的代码组织方式,保持文档的完整性和准确性。 + +### 7.4 异步任务变量处理规范 + +#### 1. 变量分析原则 +- 生命周期分析:确定变量在异步任务中的生存期 +- 所有权需求:判断是否需要克隆或移动所有权 +- 类型特征:考虑变量的类型特性(如 Clone、Send、'static 等) +- 数据共享:评估是否需要在多个任务间共享数据 + +#### 2. 克隆策略 +必须克隆的情况: +- 临时变量引用:`split_info.clone()`(来自迭代器) +- 多任务共享:`unique_id.clone()`(多个任务需要) +- 部分数据:`data_item.clone_split_range()`(只克隆需要的范围) + +不需要克隆的情况: +- 值类型复制:`version`(直接复制即可) +- 已实现 Copy:基本数据类型 +- 单一任务使用:不需要在多个任务间共享的数据 + +#### 3. View 模式使用规范 +基本原则: +- View 本身已经是完整引用:不需要额外的 view 字段 +- 异步任务中使用:`self.clone()` +- 模块访问:通过 view 直接访问其他模块 + +示例代码: +```rust +// 正确示例 +let view = self.clone(); // View 本身克隆 +let resp = view.data_general().rpc_call_write_once_data... + +// 错误示例 +let view = self.view.clone(); // 错误:不需要额外的 view 字段 +let data_general = self.data_general().clone(); // 错误:不需要单独克隆模块 +``` + +#### 4. 异步任务数据处理检查清单 +- [ ] 是否只克隆必要的数据? +- [ ] 临时变量是否正确处理? +- [ ] View 的使用是否符合规范? +- [ ] 是否避免了重复克隆? +- [ ] 数据共享策略是否合理? + +#### 5. 常见场景示例 + +1. 批量数据处理: +```rust +// 正确处理临时变量和部分数据 +let split_info = split_info.clone(); // 临时变量必须克隆 +let data_item = data_item.clone_split_range(range); // 只克隆需要的部分 +let view = self.clone(); // View 克隆用于异步任务 +``` + +2. 并发任务处理: +```rust +// 使用信号量和数据共享 +let semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT)); +let view = self.clone(); // 一次克隆,多处使用 +for node_id in nodes { + let permit = semaphore.clone(); + let view = view.clone(); // View 在任务间共享 + tokio::spawn(async move { ... }); +} +``` diff --git a/src/main/src/general/data/m_data_general/mod.rs b/src/main/src/general/data/m_data_general/mod.rs index a34fc75..17a4fd9 100644 --- a/src/main/src/general/data/m_data_general/mod.rs +++ b/src/main/src/general/data/m_data_general/mod.rs @@ -561,379 +561,6 @@ impl DataGeneral { Ok(()) } - - async fn rpc_handle_write_one_data( - &self, - responsor: RPCResponsor, - req: WriteOneDataRequest, - ) { - tracing::debug!("verify data meta bf write data"); - let kv_store_engine = self.view.kv_store_engine(); - - // Step1: verify version - // take old meta - #[allow(unused_assignments)] - let mut required_meta: Option<(usize, DataSetMetaV2)> = None; - { - let keybytes: Vec = KeyTypeDataSetMeta(&req.unique_id).make_key(); - let fail_by_overwrite = || async { - let message = "New data version overwrite".to_owned(); - tracing::warn!("{}", message); - responsor - .send_resp(WriteOneDataResponse { - remote_version: 0, - success: false, - message, - }) - .await - .todo_handle(); - }; - let fail_with_msg = |message: String| async { - tracing::warn!("{}", message); - responsor - .send_resp(WriteOneDataResponse { - remote_version: 0, - success: false, - message, - }) - .await - .todo_handle(); - }; - - loop { - // tracing::debug!("verify version loop"); - let lock = - kv_store_engine.with_rwlock(&KeyTypeDataSetMeta(&req.unique_id).make_key()); - let guard = KeyLockGuard::Read(lock.read()); - required_meta = kv_store_engine.get( - &KeyTypeDataSetMeta(&req.unique_id), - true, - KvAdditionalConf {}, - ); //tofix, master send maybe not synced - let old_dataset_version = if required_meta.is_none() { - 0 - } else { - required_meta.as_ref().unwrap().1.version - }; - // need to wait for new version - if required_meta.is_none() - || required_meta.as_ref().unwrap().1.version < req.version - { - if required_meta.is_none() { - tracing::debug!("no data version, waiting for notify"); - } else { - tracing::debug!( - "data version is old({}) at node({}), waiting for new notify({})", - required_meta.as_ref().unwrap().1.version, - self.view.p2p().nodes_config.this_node(), - req.version - ); - } - - let (kv_version, new_value) = kv_store_engine - .register_waiter_for_new(&keybytes, guard) - .await - .unwrap_or_else(|err| { - panic!("fail to wait for new data version: {:?}", err); - }); - - let Some(new_value) = new_value.as_raw_data() else { - fail_with_msg(format!( - "fatal error, kv value supposed to be DataSetMeta, rathe than {:?}", - new_value - )) - .await; - return; - }; - - // deserialize - let new_value = bincode::deserialize::(&new_value); - if let Err(err) = new_value { - fail_with_msg(format!( - "fatal error, kv value deserialization failed: {}", - err - )) - .await; - return; - } - let new_value = new_value.unwrap(); - - // version check - if new_value.version > req.version { - fail_by_overwrite().await; - return; - } else if new_value.version < req.version { - tracing::debug!("recv data version({}) is old than required({}), waiting for new notify",new_value.version, req.version); - // still need to wait for new version - continue; - } else { - required_meta = Some((kv_version, new_value)); - break; - } - } else if old_dataset_version > req.version { - drop(guard); - fail_by_overwrite().await; - return; - } else { - tracing::debug!( - "data version is matched cur({}) require({}) // 0 should be invalid", - old_dataset_version, - req.version - ); - break; - } - } - } - - // Step3: write data - tracing::debug!("start to write data"); - let lock = kv_store_engine.with_rwlock(&KeyTypeDataSetMeta(&req.unique_id).make_key()); - let guard = KeyLockGuard::Write(lock.write()); - let check_meta = kv_store_engine.get( - &KeyTypeDataSetMeta(&req.unique_id), - true, - KvAdditionalConf {}, - ); //tofix, master send maybe not synced - if check_meta.is_none() - || check_meta.as_ref().unwrap().0 != required_meta.as_ref().unwrap().0 - { - drop(guard); - responsor - .send_resp(WriteOneDataResponse { - remote_version: if check_meta.is_none() { - 0 - } else { - check_meta.as_ref().unwrap().1.version - }, - success: false, - message: "meta is updated again, cancel write".to_owned(), - }) - .await - .todo_handle(); - return; - } - - for data_with_idx in req.data.into_iter() { - let proto::DataItemWithIdx { idx, data } = data_with_idx; - let data = data.unwrap(); - let serialize = data.encode_persist(); - tracing::debug!( - "writing data part uid({:?}) idx({}) item({})", - req.unique_id, - idx, - data.to_string() - ); - if let Err(err) = kv_store_engine.set( - KeyTypeDataSetItem { - uid: req.unique_id.as_ref(), - idx: idx as u8, - }, - &serialize, - true, - ) { - tracing::warn!("flush error: {}", err) - } - } - kv_store_engine.flush(); - drop(guard); - tracing::debug!("data is written"); - responsor - .send_resp(WriteOneDataResponse { - remote_version: req.version, - success: true, - message: "".to_owned(), - }) - .await - .todo_handle(); - } - - async fn rpc_handle_data_meta_update( - &self, - responsor: RPCResponsor, - mut req: proto::DataMetaUpdateRequest, - ) { - struct Defer { - node: NodeID, - } - impl Drop for Defer { - fn drop(&mut self) { - tracing::debug!("rpc_handle_data_meta_update return at node({})", self.node); - } - } - let _defer = Defer { - node: self.view.p2p().nodes_config.this_node(), - }; - - let key = KeyTypeDataSetMeta(&req.unique_id); - let keybytes = key.make_key(); - - tracing::debug!("rpc_handle_data_meta_update {:?}", req); - let kv_lock = self.view.kv_store_engine().with_rwlock(&keybytes); - let _kv_write_lock_guard = kv_lock.write(); - - if let Some((_old_version, mut old_meta)) = - self.view.kv_store_engine().get(&key, true, KvAdditionalConf {}) - { - if old_meta.version > req.version { - drop(_kv_write_lock_guard); - let err_msg = "New data version is smaller, failed update"; - tracing::warn!("{}", err_msg); - responsor - .send_resp(proto::DataMetaUpdateResponse { - version: old_meta.version, - message: err_msg.to_owned(), - }) - .await - .todo_handle(); - return; - } - old_meta.version = req.version; - if req.serialized_meta.len() > 0 { - self.view.kv_store_engine() - .set_raw(&keybytes, std::mem::take(&mut req.serialized_meta), true) - .todo_handle(); - } else { - self.view.kv_store_engine() - .set(key, &old_meta, true) - .todo_handle(); - } - } else { - if req.serialized_meta.len() > 0 { - tracing::debug!( - "set new meta data, {:?}", - bincode::deserialize::(&req.serialized_meta) - ); - self.view.kv_store_engine() - .set_raw(&keybytes, std::mem::take(&mut req.serialized_meta), true) - .todo_handle(); - } else { - drop(_kv_write_lock_guard); - let err_msg = "Old meta data not found and missing new meta"; - tracing::warn!("{}", err_msg); - responsor - .send_resp(proto::DataMetaUpdateResponse { - version: 0, - message: err_msg.to_owned(), - }) - .await - .todo_handle(); - return; - } - } - drop(_kv_write_lock_guard); - tracing::debug!("rpc_handle_data_meta_update success"); - responsor - .send_resp(proto::DataMetaUpdateResponse { - version: req.version, - message: "Update success".to_owned(), - }) - .await - .todo_handle(); - } - - async fn rpc_handle_get_data_meta( - &self, - req: proto::DataMetaGetRequest, - responsor: RPCResponsor, - ) -> WSResult<()> { - tracing::debug!("rpc_handle_get_data_meta with req({:?})", req); - let meta = self.view.get_data_meta(&req.unique_id, req.delete)?; - if meta.is_none() { - tracing::debug!("rpc_handle_get_data_meta data meta not found"); - } else { - tracing::debug!("rpc_handle_get_data_meta data meta found"); - } - let serialized_meta = meta.map_or(vec![], |(_kvversion, meta)| { - bincode::serialize(&meta).unwrap() - }); - - responsor - .send_resp(proto::DataMetaGetResponse { serialized_meta }) - .await?; - - Ok(()) - } - - async fn rpc_handle_get_one_data( - &self, - responsor: RPCResponsor, - req: proto::GetOneDataRequest, - ) -> WSResult<()> { - tracing::debug!("starting rpc_handle_get_one_data {:?}", req); - - let kv_store_engine = self.view.kv_store_engine(); - let _ = self.view - .get_data_meta(&req.unique_id, req.delete) - .map_err(|err| { - tracing::warn!("rpc_handle_get_one_data get_data_meta failed: {:?}", err); - err - })?; - - let mut got_or_deleted = vec![]; - let mut kv_ope_err = vec![]; - - for idx in req.idxs { - let value = if req.delete { - match kv_store_engine.del( - KeyTypeDataSetItem { - uid: req.unique_id.as_ref(), - idx: idx as u8, - }, - false, - ) { - Ok(value) => value, - Err(e) => { - kv_ope_err.push(e); - None - } - } - } else { - kv_store_engine.get( - &KeyTypeDataSetItem { - uid: req.unique_id.as_ref(), - idx: idx as u8, - }, - false, - KvAdditionalConf {}, - ) - }; - got_or_deleted.push(value); - } - - let (success, message): (bool, String) = if kv_ope_err.len() > 0 { - (false, { - let mut msg = String::from("KvEngine operation failed: "); - for e in kv_ope_err.iter() { - msg.push_str(&format!("{:?}", e)); - } - msg - }) - } else if got_or_deleted.iter().all(|v| v.is_some()) { - (true, "success".to_owned()) - } else { - tracing::warn!("some data not found"); - (false, "some data not found".to_owned()) - }; - - let mut got_or_deleted_checked: Vec = vec![]; - if success { - for v in got_or_deleted { - let decode_res = proto::DataItem::decode_persist(v.unwrap().1); - tracing::debug!("decode_res type: {:?}", decode_res.to_string()); - got_or_deleted_checked.push(decode_res); - } - } - - responsor - .send_resp(proto::GetOneDataResponse { - success, - data: got_or_deleted_checked, - message, - }) - .await?; - - Ok(()) - } } #[derive(Serialize, Deserialize)] @@ -1343,30 +970,21 @@ pub enum GetOrDelDataArgType { } impl DataGeneralView { - fn get_data_meta( - &self, - unique_id: &[u8], - delete: bool, - ) -> WSResult> { - let ope_name = if delete { "delete" } else { "get" }; - tracing::debug!("{} data meta for uid({:?})", ope_name, unique_id); - - let kv_store_engine = self.kv_store_engine(); - let key = KeyTypeDataSetMeta(&unique_id); - let keybytes = key.make_key(); - - let write_lock = kv_store_engine.with_rwlock(&keybytes); - let _guard = write_lock.write(); - - let meta_opt = if delete { - kv_store_engine.del(key, true)? - } else { - kv_store_engine.get(&key, true, KvAdditionalConf {}) - }; - Ok(meta_opt) + fn inner_new(args: LogicalModuleNewArgs) -> Self { + Self { + inner: args.logical_modules_ref, + } } } +// 为 proto::EachNodeSplit 实现 cache_mode_visitor +// impl proto::EachNodeSplit { +// pub fn cache_mode_visitor(&self) -> CacheModeVisitor { +// CacheModeVisitor(self.cache_mode as u16) +// } +// } + +// 实现 From trait 处理错误转换 impl From for WSError { fn from(err: JoinError) -> Self { WsNetworkLogicErr::TaskJoinError { err }.into() @@ -1386,94 +1004,18 @@ impl LogicalModule for DataGeneral { rpc_call_batch_data: RPCCaller::new(), rpc_call_get_data_meta: RPCCaller::new(), rpc_call_get_data: RPCCaller::new(), - + rpc_handler_write_once_data: RPCHandler::new(), rpc_handler_batch_data: RPCHandler::new(), rpc_handler_data_meta_update: RPCHandler::new(), rpc_handler_get_data_meta: RPCHandler::new(), rpc_handler_get_data: RPCHandler::new(), - + batch_transfers: DashMap::new(), } } async fn start(&self) -> WSResult> { - tracing::info!("start as master"); - - let p2p = self.view.p2p(); - // register rpc callers - { - self.rpc_call_data_version_schedule.regist(p2p); - self.rpc_call_write_once_data.regist(p2p); - self.rpc_call_batch_data.regist(p2p); - self.rpc_call_get_data_meta.regist(p2p); - self.rpc_call_get_data.regist(p2p); - } - - // register rpc handlers - { - let this = self.clone(); - self.rpc_handler_write_once_data - .regist(p2p, move |responsor, req| { - let this = this.clone(); - let _ = tokio::spawn(async move { - this.rpc_handle_write_one_data(responsor, req).await; - }); - Ok(()) - }); - - let this = self.clone(); - self.rpc_handler_batch_data.regist( - p2p, - move |responsor: RPCResponsor, - req: proto::sche::BatchDataRequest| { - let this = this.clone(); - let _ = tokio::spawn(async move { - this.rpc_handle_batch_data(responsor, req).await; - }); - Ok(()) - }, - ); - - let this = self.clone(); - self.rpc_handler_data_meta_update.regist( - p2p, - move |responsor: RPCResponsor, - req: proto::DataMetaUpdateRequest| { - let this = this.clone(); - let _ = tokio::spawn(async move { - this.rpc_handle_data_meta_update(responsor, req).await - }); - Ok(()) - }, - ); - - let this = self.clone(); - self.rpc_handler_get_data_meta - .regist(p2p, move |responsor, req| { - let this = this.clone(); - let _ = tokio::spawn(async move { - this.rpc_handle_get_data_meta(req, responsor) - .await - .todo_handle(); - }); - Ok(()) - }); - - let this = self.clone(); - self.rpc_handler_get_data.regist( - p2p, - move |responsor: RPCResponsor, - req: proto::GetOneDataRequest| { - let this = this.clone(); - let _ = tokio::spawn(async move { - this.rpc_handle_get_one_data(responsor, req).await - }); - Ok(()) - }, - ); - } - Ok(vec![]) } } From d146459c8e44808c30d7c700662e5c43b6e80034 Mon Sep 17 00:00:00 2001 From: pa <1020401660@qq.com> Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 20/26] Revert "backup" This reverts commit cb9d85c3008909a66cbb7cc1a8fa4697fd09880c. --- .cursorrules | 66 - Cargo.lock | 7 - Cargo.toml | 1 - claude_notes.md | 203 --- src/main/Cargo.toml | 1 - src/main/src/general/app/app_native/mod.rs | 60 +- src/main/src/general/app/app_owned/mod.rs | 12 +- src/main/src/general/app/app_owned/wasm.rs | 21 +- .../app/app_owned/wasm_host_funcs/kv.rs | 19 +- .../app/app_owned/wasm_host_funcs/mod.rs | 16 +- .../app/app_owned/wasm_host_funcs/result.rs | 5 +- src/main/src/general/app/app_shared/mod.rs | 7 +- .../src/general/app/app_shared/process.rs | 88 +- src/main/src/general/app/fn_event.rs | 76 + .../app/instance/m_instance_manager.rs | 38 +- src/main/src/general/app/m_executor.rs | 255 +-- src/main/src/general/app/mod.rs | 205 +-- .../general/data/m_data_general/dataitem.rs | 8 +- .../src/general/data/m_data_general/mod.rs | 1584 ++++++++++++----- src/main/src/general/network/msg_pack.rs | 11 +- src/main/src/general/network/proto_ext.rs | 75 - .../src/general/network/proto_src/sche.proto | 47 +- src/main/src/master/app/m_app_master.rs | 15 +- src/main/src/master/data/m_data_master.rs | 250 +-- src/main/src/master/m_master.rs | 103 +- .../src/modules_global_bridge/process_func.rs | 2 +- src/main/src/result.rs | 13 +- src/main/src/sys.rs | 8 - 28 files changed, 1628 insertions(+), 1568 deletions(-) delete mode 100644 .cursorrules delete mode 100644 claude_notes.md create mode 100644 src/main/src/general/app/fn_event.rs diff --git a/.cursorrules b/.cursorrules deleted file mode 100644 index 81bc931..0000000 --- a/.cursorrules +++ /dev/null @@ -1,66 +0,0 @@ -# Waverless Project Rules - -## Code Style -- 使用 Rust 2021 edition -- 遵循标准 Rust 命名约定 -- 使用 4 空格缩进 -- 对于有意未使用的变量,使用下划线前缀(如 _pack) -- 如果变量可能会被使用但目前未使用,保留原名 -- 如果确定不需要验证的参数,直接使用下划线前缀 - -## RPC Message Handling -- 对于 RPC 消息的 Option 字段: - - 如果是必需字段(不能缺省),需要在 verify 中进行 is_some() 校验 - - 如果是可选字段,使用 _pack 忽略变量,直接返回 true -- 消息验证规则: - - 对于包含关键业务数据的请求(如 WriteOneDataRequest),必须验证所有必需字段 - - 对于简单的状态同步或查询请求(如 BatchDataRequest),可以跳过验证 -- 验证函数实现: - - 需要验证时,使用 pack 并编写完整的验证逻辑 - - 不需要验证时,使用 _pack 并直接返回 true - -## Error Handling -- 使用 WSResult 作为错误返回类型 -- 顶层错误类型应该实现 std::error::Error trait -- 子错误类型只需要实现 Debug trait -- 使用 ? 运算符进行错误传播 - -## Type System -- 为复杂的数据结构实现必要的 traits (Clone, Debug 等) -- 使用强类型,避免类型转换 -- 为枚举类型实现必要的派生宏 - -## Development Process -- 每解决一个问题后立即进行编译检查 -- 确保修改不会引入新的编译错误 -- 按优先级逐个解决编译错误 - -## Documentation -- 为公共 API 提供文档注释 -- 使用中文注释说明复杂的业务逻辑 -- 包含示例代码说明用法 - -## Performance -- 避免不必要的克隆操作 -- 使用异步操作处理 I/O -- 合理使用并发和并行 - -## Testing -- 为公共 API 编写单元测试 -- 使用 cargo test 运行测试 -- 包含集成测试 - -## Dependencies -- 明确指定依赖版本 -- 最小化依赖数量 -- 及时更新依赖版本 - -## Security -- 不在代码中硬编码敏感信息 -- 使用安全的加密算法 -- 正确处理用户输入 - -## Logging -- 使用 tracing 进行日志记录 -- 包含适当的日志级别 -- 记录关键操作和错误信息 diff --git a/Cargo.lock b/Cargo.lock index 01085c2..028b3a8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -990,12 +990,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" -[[package]] -name = "hex" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" - [[package]] name = "hmac" version = "0.12.1" @@ -3546,7 +3540,6 @@ dependencies = [ "downcast-rs", "enum-as-inner", "futures", - "hex", "hyper 0.14.31", "lazy_static", "md-5", diff --git a/Cargo.toml b/Cargo.toml index 65dd577..452a321 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,7 +60,6 @@ md-5 = "0.10.1" path-absolutize = "3.0.13" dashmap = "6.1.0" base64 = "0.22.1" -hex = "0.4.3" [profile.test] # 0: no optimizations diff --git a/claude_notes.md b/claude_notes.md deleted file mode 100644 index a55c7f5..0000000 --- a/claude_notes.md +++ /dev/null @@ -1,203 +0,0 @@ -# Waverless 项目关键设计笔记 - -## 1. 函数执行上下文设计 - -### 1.1 基础结构 -- `FnExeCtx`: 私有的基础结构体,包含函数执行的基本信息 - ```rust - struct FnExeCtx { - pub app: String, - pub app_type: AppType, - pub func: String, - pub func_meta: FnMeta, - pub req_id: ReqId, - pub event_ctx: EventCtx, - pub res: Option, - pub sub_waiters: Vec>, - _dummy_private: (), - } - ``` - -### 1.2 公开特化类型 -- `FnExeCtxAsync` 和 `FnExeCtxSync`: - - 异步执行上下文支持 Jar、Wasm、Native 类型,包含子任务支持和完整的性能监控和日志。 - - 同步执行上下文仅支持 Native 类型,不支持子任务,包含基本的性能监控和日志。 - -### 1.3 类型安全 -- `FnExeCtxAsyncAllowedType` 和 `FnExeCtxSyncAllowedType`: - - 异步允许的类型 (Jar, Wasm, Native) - - 同步允许的类型 (仅 Native) - - 通过 `TryFrom` 在编译时强制类型安全 - -## 2. 实例管理设计 - -### 2.1 实例类型与管理器 -- `Instance` 和 `InstanceManager`: - - `Instance` 包含 Owned、Shared 和 Native 类型。 - - `InstanceManager` 管理应用实例和运行时函数上下文。 - ```rust - pub enum Instance { - Owned(OwnedInstance), - Shared(SharedInstance), - Native(NativeAppInstance), - } - - pub struct InstanceManager { - pub app_instances: SkipMap, - pub instance_running_function: DashMap, - } - ``` - -### 2.2 运行时函数上下文 -- `UnsafeFunctionCtx`: - - 包含 Sync 和 Async 类型,分别对应 `FnExeCtxSync` 和 `FnExeCtxAsync`。 - -## 3. 关键修改记录 - -### 3.1 同步/异步执行流程优化与错误处理增强 -- 简化 `finish_using`,移除不必要的异步版本,统一使用同步实现。 -- 添加同步版本的 `load_instance_sync`,仅支持 Native 类型。 -- 优化 `execute_sync` 中的异步调用处理,统一性能监控和日志记录格式。 -- 添加 `UnsupportedAppType` 错误类型,完善同步执行时的类型检查。 - -## 4. 待办事项 -- [x] 考虑添加同步版本的 `load_instance` -- [ ] 优化 `execute_sync` 中的异步-同步转换 -- [ ] 完善错误处理和日志记录 - -## 5. 核心设计原则 - -### 5.1 基础原则与 View 模式设计规则 -- 同步/异步分离,类型安全,性能监控,资源管理。 -- View 生成: - - View 结构体和 `LogicalModule` trait 的实现由宏生成。 - - 只需实现 `inner_new` 函数,使用 `logical_module_view_impl!` 生成访问函数。 - - 每个需要访问的模块都需要单独的 impl 宏调用。 - -### 5.2 去掉 #[derive(LogicalModule)] 的原因和注意事项 -- 实现特定功能:根据需求在 `DataGeneralView` 中实现特定功能,检查冲突。 -- `inner` 字段的管理:由宏管理,不能直接操作,通过宏生成的接口使用。 -- 错误分析:去掉派生后,仔细分析和解决可能出现的错误。 - -## 6. msg_pack 消息封装 - -### 6.1 基本原则与实现示例 -- 使用 `msg_pack.rs` 中的宏实现 trait,使用 `define_msg_ids!` 管理消息类型。 -- 通过 `RPCReq` trait 定义请求-响应关系。 - ```rust - define_msg_ids!( - (proto::sche::BatchDataRequest, pack, { true }), - (proto::sche::BatchDataResponse, _pack, { true }) - ); - - impl RPCReq for proto::sche::BatchDataRequest { - type Resp = proto::sche::BatchDataResponse; - } - ``` - -### 6.2 最佳实践 -- 新增消息类型时:在 `define_msg_ids!` 中添加定义,实现 `RPCReq` trait。 -- 使用消息时:使用 `RPCCaller` 和 `RPCHandler`,遵循统一的错误处理。 - -## 7. Waverless 代码规范核心规则 - -### 7.1 文档维护与代码组织原则 -- 文档压缩原则:保持无损压缩,合并重复内容,简化表述,重构文档结构。 -- 文档更新规则:确认信息完整性,保留技术细节,使用清晰结构展示信息。 -- 代码组织规则:宏生成的访问函数直接使用,非 pub 函数只在一个地方定义,View 负责核心实现,具体模块负责自己的功能,通过 View 访问其他模块。 - -### 7.2 代码修改原则 -- 不随意删除或修改已有的正确实现 -- 不在多处实现同一功能 -- 保持代码结构清晰简单 -- 修改前先理解设计原则 - -#### 异步任务处理原则 -- 分析生命周期和所有权需求 -- 避免盲目克隆,只克隆必要数据 -- 考虑类型特征(如 P2PModule 的轻量级 Clone) -- 评估替代方案 - -```rust -// 反例:过度克隆 -let p2p = self.p2p().clone(); // 不必要,P2PModule 本身就是轻量级的 -let data_general = self.data_general().clone(); // 不必要,同上 - -// 正例:按需克隆 -let split_info = split.clone(); // 必要,因为来自临时变量的引用 -``` - -分析要点: -- 使用场景:确认异步任务中的实际需求 -- 类型特征:检查是否已实现轻量级 Clone -- 生命周期:特别关注临时变量引用 -- 替代方案:考虑其他实现方式 - -### 7.3 错误与正确示例 -- 错误示例:手动实现已有的宏生成函数,在两个地方都实现同一个函数,过度修改已有代码结构,有损压缩文档内容。 -- 正确示例:使用宏生成的访问函数,在合适的位置添加新功能,遵循已有的代码组织方式,保持文档的完整性和准确性。 - -### 7.4 异步任务变量处理规范 - -#### 1. 变量分析原则 -- 生命周期分析:确定变量在异步任务中的生存期 -- 所有权需求:判断是否需要克隆或移动所有权 -- 类型特征:考虑变量的类型特性(如 Clone、Send、'static 等) -- 数据共享:评估是否需要在多个任务间共享数据 - -#### 2. 克隆策略 -必须克隆的情况: -- 临时变量引用:`split_info.clone()`(来自迭代器) -- 多任务共享:`unique_id.clone()`(多个任务需要) -- 部分数据:`data_item.clone_split_range()`(只克隆需要的范围) - -不需要克隆的情况: -- 值类型复制:`version`(直接复制即可) -- 已实现 Copy:基本数据类型 -- 单一任务使用:不需要在多个任务间共享的数据 - -#### 3. View 模式使用规范 -基本原则: -- View 本身已经是完整引用:不需要额外的 view 字段 -- 异步任务中使用:`self.clone()` -- 模块访问:通过 view 直接访问其他模块 - -示例代码: -```rust -// 正确示例 -let view = self.clone(); // View 本身克隆 -let resp = view.data_general().rpc_call_write_once_data... - -// 错误示例 -let view = self.view.clone(); // 错误:不需要额外的 view 字段 -let data_general = self.data_general().clone(); // 错误:不需要单独克隆模块 -``` - -#### 4. 异步任务数据处理检查清单 -- [ ] 是否只克隆必要的数据? -- [ ] 临时变量是否正确处理? -- [ ] View 的使用是否符合规范? -- [ ] 是否避免了重复克隆? -- [ ] 数据共享策略是否合理? - -#### 5. 常见场景示例 - -1. 批量数据处理: -```rust -// 正确处理临时变量和部分数据 -let split_info = split_info.clone(); // 临时变量必须克隆 -let data_item = data_item.clone_split_range(range); // 只克隆需要的部分 -let view = self.clone(); // View 克隆用于异步任务 -``` - -2. 并发任务处理: -```rust -// 使用信号量和数据共享 -let semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT)); -let view = self.clone(); // 一次克隆,多处使用 -for node_id in nodes { - let permit = semaphore.clone(); - let view = view.clone(); // View 在任务间共享 - tokio::spawn(async move { ... }); -} -``` diff --git a/src/main/Cargo.toml b/src/main/Cargo.toml index b61ac6c..a2d63da 100644 --- a/src/main/Cargo.toml +++ b/src/main/Cargo.toml @@ -62,7 +62,6 @@ md-5.workspace = true path-absolutize.workspace = true dashmap.workspace = true base64.workspace = true -hex = "0.4.3" [dependencies.uuid] version = "1.8.0" diff --git a/src/main/src/general/app/app_native/mod.rs b/src/main/src/general/app/app_native/mod.rs index 5bf2a7e..37dae7c 100644 --- a/src/main/src/general/app/app_native/mod.rs +++ b/src/main/src/general/app/app_native/mod.rs @@ -2,15 +2,18 @@ pub mod app_checkpoint; use std::collections::HashMap; -use super::{ - AffinityPattern, AffinityRule, AppMeta, AppType, DataAccess, DataEventTrigger, FnMeta, - KeyPattern, NodeTag, -}; -use crate::general::app::instance::{Instance, InstanceTrait}; -use crate::general::app::m_executor::{FnExeCtxAsync, FnExeCtxSync}; +use super::AppMeta; +use super::AppType; +use crate::general::app::instance::Instance; +use crate::general::app::instance::InstanceTrait; +use crate::general::app::m_executor::FnExeCtx; +use crate::general::app::DataAccess; +use crate::general::app::DataEventTrigger; +use crate::general::app::FnMeta; +use crate::general::app::KeyPattern; use crate::general::data::m_data_general::DATA_UID_PREFIX_APP_META; use crate::new_map; -use crate::result::{WSResult, WsFuncError}; +use crate::result::WSResult; use async_trait::async_trait; pub struct NativeAppInstance { @@ -25,19 +28,12 @@ impl NativeAppInstance { #[async_trait] impl InstanceTrait for NativeAppInstance { - // don't need instance name + // don’t need instance name fn instance_name(&self) -> String { "native_app_dummy_instance".to_string() } - async fn execute(&self, _fn_ctx: &mut FnExeCtxAsync) -> WSResult> { - // Native apps don't support async execution - Err(WsFuncError::UnsupportedAppType.into()) - } - - fn execute_sync(&self, _fn_ctx: &mut FnExeCtxSync) -> WSResult> { - // For now, just return None as native apps don't produce results + async fn execute(&self, _fn_ctx: &mut FnExeCtx) -> WSResult> { todo!() - // Ok(None) } } @@ -67,7 +63,6 @@ pub fn native_apps() -> HashMap { AppType::Native, new_map!(HashMap { "checkpointable".to_string() => FnMeta { - sync_async: super::FnSyncAsyncSupport::Sync, calls: vec![], data_accesses: Some(new_map!(HashMap { KeyPattern(DATA_UID_PREFIX_APP_META.to_string()) => DataAccess { @@ -76,28 +71,21 @@ pub fn native_apps() -> HashMap { delete: false, event: None, } - })), - affinity: Some(AffinityRule { - tags: vec![NodeTag::Worker], - nodes: AffinityPattern::All, - }), + })) }, "checkpoint".to_string() => FnMeta { - sync_async: super::FnSyncAsyncSupport::Async, calls: vec![], - data_accesses: Some(new_map!(HashMap { - KeyPattern(DATA_UID_PREFIX_APP_META.to_string()) => DataAccess { - get: true, - set: false, - delete: false, - event: Some(DataEventTrigger::WriteWithCondition { - condition: "checkpointable".to_string(), - }), - } - })), - affinity: Some(AffinityRule { - tags: vec![NodeTag::Worker], - nodes: AffinityPattern::All, + data_accesses: Some({ + new_map!(HashMap { + KeyPattern(DATA_UID_PREFIX_APP_META.to_string()) => DataAccess { + get: true, + set: false, + delete: false, + event: Some(DataEventTrigger::WriteWithCondition { + condition: "checkpointable".to_string(), + }), + } + }) }), }, }), diff --git a/src/main/src/general/app/app_owned/mod.rs b/src/main/src/general/app/app_owned/mod.rs index 782b24c..c02aecc 100644 --- a/src/main/src/general/app/app_owned/mod.rs +++ b/src/main/src/general/app/app_owned/mod.rs @@ -3,8 +3,8 @@ pub mod wasm_host_funcs; use crate::general::app::instance::InstanceTrait; use crate::general::app::instance::OwnedInstance; -use crate::general::app::m_executor::{FnExeCtxAsync, FnExeCtxSync}; -use crate::result::{WSResult, WsFuncError}; +use crate::general::app::m_executor::FnExeCtx; +use crate::result::WSResult; use async_trait::async_trait; #[async_trait] @@ -14,15 +14,9 @@ impl InstanceTrait for OwnedInstance { OwnedInstance::WasmInstance(v) => v.instance_name(), } } - async fn execute(&self, fn_ctx: &mut FnExeCtxAsync) -> WSResult> { + async fn execute(&self, fn_ctx: &mut FnExeCtx) -> WSResult> { match self { OwnedInstance::WasmInstance(v) => v.execute(fn_ctx).await, } } - - fn execute_sync(&self, fn_ctx: &mut FnExeCtxSync) -> WSResult> { - match self { - OwnedInstance::WasmInstance(v) => v.execute_sync(fn_ctx), - } - } } diff --git a/src/main/src/general/app/app_owned/wasm.rs b/src/main/src/general/app/app_owned/wasm.rs index 183339f..342d5ad 100644 --- a/src/main/src/general/app/app_owned/wasm.rs +++ b/src/main/src/general/app/app_owned/wasm.rs @@ -1,7 +1,7 @@ use crate::general::app::app_owned::wasm_host_funcs; use crate::general::app::instance::InstanceTrait; use crate::general::app::instance::OwnedInstance; -use crate::general::app::m_executor::{EventCtx, FnExeCtxAsync, FnExeCtxBase, FnExeCtxSync}; +use crate::general::app::m_executor::{EventCtx, FnExeCtx}; use crate::result::{WSResult, WsFuncError}; use async_trait::async_trait; use std::{mem::ManuallyDrop, path::Path}; @@ -70,7 +70,7 @@ impl InstanceTrait for WasmInstance { .next() .unwrap() } - async fn execute(&self, fn_ctx: &mut FnExeCtxAsync) -> WSResult> { + async fn execute(&self, fn_ctx: &mut FnExeCtx) -> WSResult> { #[cfg(target_os = "linux")] { let mut final_err = None; @@ -80,9 +80,9 @@ impl InstanceTrait for WasmInstance { } // retry loop - let mut params = fn_ctx.event_ctx().conv_to_wasm_params(&self); + let mut params = fn_ctx.event_ctx.conv_to_wasm_params(&self); for turn in 0..2 { - let func = fn_ctx.func().to_owned(); + let func = fn_ctx.func.clone(); let Err(err) = self .run_func_async( &AsyncState::new(), @@ -108,27 +108,22 @@ impl InstanceTrait for WasmInstance { } if turn == 0 && fn_ctx.empty_http() && is_func_type_mismatch(&err) { - fn_ctx.set_result(None); + fn_ctx.res = None; continue; } else { tracing::error!("run func failed with err: {}", err); final_err = Some(err); break; + // return None; } } if let Some(err) = final_err { - Err(WsFuncError::WasmError(err).into()) + Err(WsFuncError::WasmError(*err).into()) } else { - Ok(fn_ctx.take_result()) + Ok(fn_ctx.res.take()) } } } - - /// WASM instances don't support synchronous execution - /// See [`FnExeCtxSyncAllowedType`] for supported types (currently only Native) - fn execute_sync(&self, _fn_ctx: &mut FnExeCtxSync) -> WSResult> { - Err(WsFuncError::UnsupportedAppType.into()) - } } // pub fn new_java_instance(_config: NewJavaInstanceConfig) -> ProcessInstance {} diff --git a/src/main/src/general/app/app_owned/wasm_host_funcs/kv.rs b/src/main/src/general/app/app_owned/wasm_host_funcs/kv.rs index 74f2d34..1a65bc8 100644 --- a/src/main/src/general/app/app_owned/wasm_host_funcs/kv.rs +++ b/src/main/src/general/app/app_owned/wasm_host_funcs/kv.rs @@ -1,5 +1,4 @@ use super::{utils, utils::m_kv_user_client, HostFuncRegister}; -use crate::general::app::m_executor::FnExeCtxBase; use crate::general::network::proto::{ self, kv::{KeyRange, KvPair, KvRequest, KvRequests, KvResponses}, @@ -221,20 +220,20 @@ async fn kv_batch_ope( } } // tracing::debug!("requests:{:?}", requests); - let prev_kv_opeid = func_ctx - .event_ctx_mut() - .take_prev_kv_opeid() - .map_or(-1, |v| v as i64); match m_kv_user_client() .kv_requests( - func_ctx.app(), - func_ctx.func(), + &func_ctx.app, + &func_ctx.func, KvRequests { requests, - app: func_ctx.app().to_owned(), - func: func_ctx.func().to_owned(), - prev_kv_opeid, + app: func_ctx.app.clone(), + func: func_ctx.func.clone(), + prev_kv_opeid: func_ctx + .event_ctx + .take_prev_kv_opeid() + .map_or(-1, |v| v as i64), }, + // KvOptions::new(), ) .await { diff --git a/src/main/src/general/app/app_owned/wasm_host_funcs/mod.rs b/src/main/src/general/app/app_owned/wasm_host_funcs/mod.rs index c3df65c..abac07f 100644 --- a/src/main/src/general/app/app_owned/wasm_host_funcs/mod.rs +++ b/src/main/src/general/app/app_owned/wasm_host_funcs/mod.rs @@ -4,7 +4,6 @@ mod fs; mod kv; mod result; -use crate::general::app::instance::m_instance_manager::UnsafeFunctionCtx; use crate::sys::LogicalModulesRef; use fs::FsFuncsRegister; use kv::KvFuncsRegister; @@ -12,8 +11,7 @@ use result::ResultFuncsRegister; mod utils { - use super::UnsafeFunctionCtx; - use crate::general::app::m_executor::{FnExeCtxAsync, FnExeCtxBase}; + use crate::general::app::m_executor::FnExeCtx; use crate::general::app::InstanceManager; use crate::{ general::m_os::OperatingSystem, sys::LogicalModulesRef, util::SendNonNull, @@ -107,17 +105,15 @@ mod utils { } } - pub fn current_app_fn_ctx(caller: &impl WasmCtx) -> SendNonNull { + pub fn current_app_fn_ctx(caller: &impl WasmCtx) -> SendNonNull { let app_fn = SendNonNull( - match m_instance_manager() + m_instance_manager() .instance_running_function + .read() .get(&caller.i_instance().unwrap().name().unwrap()) .unwrap() - .value() - { - UnsafeFunctionCtx::Async(ptr) => ptr.clone(), - UnsafeFunctionCtx::Sync(_) => panic!("Expected async function context"), - }, + .0 + .clone(), ); app_fn } diff --git a/src/main/src/general/app/app_owned/wasm_host_funcs/result.rs b/src/main/src/general/app/app_owned/wasm_host_funcs/result.rs index ff83530..5185ece 100644 --- a/src/main/src/general/app/app_owned/wasm_host_funcs/result.rs +++ b/src/main/src/general/app/app_owned/wasm_host_funcs/result.rs @@ -1,5 +1,4 @@ use super::{utils, HostFuncRegister}; -use crate::general::app::m_executor::FnExeCtxAsync; #[cfg(target_os = "macos")] use wasmer::{imports, Function, FunctionType, Imports}; @@ -14,8 +13,8 @@ type WriteResultArgs = (i32, i32); #[host_function] fn write_result(caller: Caller, args: Vec) -> Result, HostFuncError> { let fname = utils::u8slice(&caller, args[0].to_i32(), args[1].to_i32()); - unsafe { utils::current_app_fn_ctx(&caller).0.as_mut() } - .set_result(Some(std::str::from_utf8(fname).unwrap().to_string())); + unsafe { utils::current_app_fn_ctx(&caller).0.as_mut() }.res = + Some(std::str::from_utf8(fname).unwrap().to_string()); Ok(vec![]) } diff --git a/src/main/src/general/app/app_shared/mod.rs b/src/main/src/general/app/app_shared/mod.rs index 4a10431..c02dd4f 100644 --- a/src/main/src/general/app/app_shared/mod.rs +++ b/src/main/src/general/app/app_shared/mod.rs @@ -4,7 +4,7 @@ pub mod process_instance_man_related; pub mod process_rpc; use crate::general::app::instance::InstanceTrait; -use crate::general::app::m_executor::{FnExeCtxAsync, FnExeCtxSync}; +use crate::general::app::m_executor::FnExeCtx; use async_trait::async_trait; pub struct SharedInstance(pub process::ProcessInstance); @@ -20,10 +20,7 @@ impl InstanceTrait for SharedInstance { fn instance_name(&self) -> String { self.0.instance_name() } - async fn execute(&self, fn_ctx: &mut FnExeCtxAsync) -> crate::result::WSResult> { + async fn execute(&self, fn_ctx: &mut FnExeCtx) -> crate::result::WSResult> { self.0.execute(fn_ctx).await } - fn execute_sync(&self, fn_ctx: &mut FnExeCtxSync) -> crate::result::WSResult> { - self.0.execute_sync(fn_ctx) - } } diff --git a/src/main/src/general/app/app_shared/process.rs b/src/main/src/general/app/app_shared/process.rs index 298d13e..89c0dbe 100644 --- a/src/main/src/general/app/app_shared/process.rs +++ b/src/main/src/general/app/app_shared/process.rs @@ -3,12 +3,11 @@ use super::process_rpc::{self, proc_proto}; use crate::general::app::app_shared::java; use crate::general::app::instance::InstanceTrait; -use crate::general::app::m_executor::{FnExeCtxAsync, FnExeCtxBase, FnExeCtxSync}; +use crate::general::app::m_executor::FnExeCtx; use crate::general::{ app::AppType, network::rpc_model::{self, HashValue}, }; -use crate::result::{WSError, WsFuncError}; use async_trait::async_trait; use enum_as_inner::EnumAsInner; use parking_lot::RwLock; @@ -200,24 +199,75 @@ impl InstanceTrait for ProcessInstance { fn instance_name(&self) -> String { self.app.clone() } - async fn execute(&self, fn_ctx: &mut FnExeCtxAsync) -> crate::result::WSResult> { - let _ = self.wait_for_verify().await; - tracing::debug!( - "wait_for_verify done, call app:{}, func:{}", - fn_ctx.app(), - fn_ctx.func() - ); - tracing::debug!("before process_rpc::call_func "); - let res = - process_rpc::call_func(fn_ctx.app(), fn_ctx.func(), fn_ctx.http_str_unwrap()).await; - tracing::debug!("after process_rpc::call_func "); - return res.map(|v| Some(v.ret_str)); - } + async fn execute(&self, fn_ctx: &mut FnExeCtx) -> crate::result::WSResult> { + // if rpc_model::start_remote_once(rpc_model::HashValue::Str(fn_ctx.func.to_owned())) { + // // cold start the java process + // } + + // if fn_ctx.func_meta.allow_rpc_call() + { + let _ = self.wait_for_verify().await; + tracing::debug!( + "wait_for_verify done, call app:{}, func:{}", + fn_ctx.app, + fn_ctx.func + ); + tracing::debug!("before process_rpc::call_func "); + let res = + process_rpc::call_func(&fn_ctx.app, &fn_ctx.func, fn_ctx.http_str_unwrap()).await; + tracing::debug!("after process_rpc::call_func "); + return res.map(|v| Some(v.ret_str)); + // return process_rpc::call_func(&fn_ctx.app, &fn_ctx.func, fn_ctx.http_str_unwrap()) + // .await + // .map(|v| Some(v.ret_str)); + } + + // if let Some(httpmethod) = fn_ctx.func_meta.allow_http_call() { + // let fnverify = self.wait_for_verify().await; + // let Some(http_port) = &fnverify.http_port else { + // return Err(WsFuncError::FuncBackendHttpNotSupported { + // fname: fn_ctx.func.to_owned(), + // } + // .into()); + // }; + // let http_url = format!("http://127.0.0.1:{}/{}", http_port, fn_ctx.func); + // let res = match httpmethod { + // HttpMethod::Get => reqwest::get(http_url).await, + // HttpMethod::Post => { + // reqwest::Client::new() + // .post(http_url) + // .body(fn_ctx.http_str_unwrap()) + // .send() + // .await + // } + // }; + + // let ok = match res { + // Err(e) => { + // return Err(WsFuncError::FuncHttpFail { + // app: fn_ctx.app.clone(), + // func: fn_ctx.func.clone(), + // http_err: e, + // } + // .into()); + // } + // Ok(ok) => ok, + // }; - /// Process instances don't support synchronous execution - /// See [`FnExeCtxSyncAllowedType`] for supported types (currently only Native) - fn execute_sync(&self, _fn_ctx: &mut FnExeCtxSync) -> crate::result::WSResult> { - Err(WsFuncError::UnsupportedAppType.into()) + // return ok + // .text() + // .await + // .map_err(|e| { + // WsFuncError::FuncHttpFail { + // app: fn_ctx.app.clone(), + // func: fn_ctx.func.clone(), + // http_err: e, + // } + // .into() + // }) + // .map(|ok| Some(ok)); + // } + // unreachable!("Missing call description in func meta"); } } diff --git a/src/main/src/general/app/fn_event.rs b/src/main/src/general/app/fn_event.rs new file mode 100644 index 0000000..5df5f80 --- /dev/null +++ b/src/main/src/general/app/fn_event.rs @@ -0,0 +1,76 @@ +use crate::general::{ + network::proto::sche::distribute_task_req::{Trigger, TriggerKvSet}, +}; + +use super::{ + super::network::proto::{self, kv::KvRequest}, +}; + +pub struct EventTriggerInfo { + pub trigger_appfns: Vec<(String, String)>, + pub kvreq: KvRequest, +} +impl EventTriggerInfo { + pub fn to_trigger(&self, opeid: u32) -> Trigger { + match self.kvreq.op.as_ref().unwrap() { + proto::kv::kv_request::Op::Set(set) => { + let kv = set.kv.as_ref().unwrap(); + Trigger::KvSet(TriggerKvSet { + key: kv.key.clone(), + opeid, + }) + } + _ => unimplemented!(), + } + } +} + +// impl Into for EventTriggerInfo { +// fn into(self) -> Trigger { +// match self.kvreq.op.unwrap() { +// proto::kv::kv_request::Op::Set(set) => Trigger::KvSet(TriggerKvSet{ +// key: +// }), +// _ => unimplemented!(), +// } +// } +// } + +// pub async fn try_match_kv_event( +// app_metas: &AppMetas, +// req: &KvRequest, +// source_app: &str, +// source_fn: &str, +// ) -> Option { +// // find source app +// let Some(appmeta) = app_metas.get_app_meta(source_app).await else { +// tracing::warn!("source app:{} not found", source_app); +// return None; +// }; +// // find source func +// let Some(fnmeta) = appmeta.get_fn_meta(source_fn) else { +// tracing::warn!("app {} source func:{} not found", source_app, source_fn); +// return None; +// }; + +// match req.op.as_ref().unwrap() { +// proto::kv::kv_request::Op::Set(set) => { +// let kv = set.kv.as_ref().unwrap(); +// // match kv pattern +// let Some(pattern) = fnmeta.match_key(&kv.key, KvOps::Set) else { +// return None; +// }; +// // find trigger func +// app_metas +// .pattern_2_app_fn +// .get(&pattern.0) +// .map(|triggers| EventTriggerInfo { +// trigger_appfns: triggers.clone(), +// kvreq: req.clone(), +// }) +// } +// proto::kv::kv_request::Op::Get(_) => None, +// proto::kv::kv_request::Op::Delete(_) => None, +// proto::kv::kv_request::Op::Lock(_) => None, +// } +// } diff --git a/src/main/src/general/app/instance/m_instance_manager.rs b/src/main/src/general/app/instance/m_instance_manager.rs index 1d07c3e..da0ce4a 100644 --- a/src/main/src/general/app/instance/m_instance_manager.rs +++ b/src/main/src/general/app/instance/m_instance_manager.rs @@ -3,8 +3,6 @@ use crate::general::app::app_owned::wasm; use crate::general::app::app_shared::process_rpc::ProcessRpc; use crate::general::app::app_shared::SharedInstance; use crate::general::app::instance::Instance; -use crate::general::app::m_executor::FnExeCtxAsync; -use crate::general::app::m_executor::FnExeCtxSync; use crate::general::m_os::OperatingSystem; use crate::general::network::rpc_model; use crate::result::{WSError, WsFuncError}; @@ -219,10 +217,7 @@ pub struct InstanceManager { logical_module_view_impl!(InstanceManagerView); logical_module_view_impl!(InstanceManagerView, os, OperatingSystem); -pub enum UnsafeFunctionCtx { - Sync(NonNull), - Async(NonNull), -} +pub struct UnsafeFunctionCtx(pub NonNull); unsafe impl Send for UnsafeFunctionCtx {} unsafe impl Sync for UnsafeFunctionCtx {} @@ -238,7 +233,7 @@ impl LogicalModule for InstanceManager { Self { app_instances: SkipMap::new(), file_dir: args.nodes_config.file_dir.clone(), - instance_running_function: DashMap::new(), + instance_running_function: parking_lot::RwLock::new(HashMap::new()), next_instance_id: AtomicU64::new(0), view: InstanceManagerView::new(args.logical_modules_ref.clone()), } @@ -249,15 +244,6 @@ impl LogicalModule for InstanceManager { // - create file with crac_config_path let mut f = { let crac_config_path = crac_config_path.clone(); - // 确保父目录存在 - if let Some(parent) = crac_config_path.parent() { - tokio::fs::create_dir_all(parent).await.map_err(|err| { - WSError::from(WsFuncError::CreateCracConfigFailed { - path: parent.to_str().unwrap().to_owned(), - err: err, - }) - })?; - } tokio::fs::File::options() .create(true) .write(true) @@ -308,7 +294,7 @@ impl InstanceManager { // Ok(()) // } - pub fn finish_using(&self, instance_name: &str, instance: Instance) { + pub async fn finish_using(&self, instance_name: &str, instance: Instance) { match instance { Instance::Owned(v) => { self.app_instances @@ -322,7 +308,6 @@ impl InstanceManager { Instance::Native(_) => {} } } - pub async fn load_instance(&self, app_type: &AppType, instance_name: &str) -> Instance { match &app_type { AppType::Jar => self.get_process_instance(app_type, instance_name).into(), @@ -338,23 +323,6 @@ impl InstanceManager { AppType::Native => NativeAppInstance::new().into(), } } - - /// Synchronous version of instance loading - /// Only supports [`FnExeCtxSyncAllowedType`] app types (currently only Native) - /// For other types like Jar and Wasm, returns UnsupportedAppType error - pub fn load_instance_sync( - &self, - app_type: &AppType, - _instance_name: &str, // 添加下划线前缀表示有意未使用 - ) -> WSResult { - match &app_type { - // Native 类型可以直接同步创建 - AppType::Native => Ok(NativeAppInstance::new().into()), - // Jar 和 Wasm 类型不支持同步加载 - AppType::Jar | AppType::Wasm => Err(WSError::from(WsFuncError::UnsupportedAppType)), - } - } - pub async fn drap_app_instances(&self, app: &str) { let _inss = self.app_instances.remove(app); // if let Some(inss) = inss { diff --git a/src/main/src/general/app/m_executor.rs b/src/main/src/general/app/m_executor.rs index 7e90948..4aade2d 100644 --- a/src/main/src/general/app/m_executor.rs +++ b/src/main/src/general/app/m_executor.rs @@ -57,8 +57,8 @@ struct FnExeCtx { pub app: String, pub app_type: AppType, pub func: String, - pub _func_meta: FnMeta, - pub _req_id: ReqId, + pub func_meta: FnMeta, + pub req_id: ReqId, pub event_ctx: EventCtx, pub res: Option, /// remote scheduling tasks @@ -110,58 +110,22 @@ impl FnExeCtxAsync { inner: FnExeCtx { app, func, - _req_id: req_id, + req_id, event_ctx, res: None, sub_waiters: vec![], app_type: apptype.into(), - _func_meta: func_meta, + func_meta, _dummy_private: (), }, } } - - pub fn event_ctx(&self) -> &EventCtx { - &self.inner.event_ctx - } - - pub fn empty_http(&self) -> bool { - match &self.inner.event_ctx { - EventCtx::Http(text) => text.is_empty(), - _ => false, - } - } - - pub fn http_str_unwrap(&self) -> String { - match &self.inner.event_ctx { - EventCtx::Http(text) => text.clone(), - _ => panic!("not http event ctx"), - } - } - - pub fn set_result(&mut self, result: Option) { - self.inner.res = result; - } - - pub fn take_result(&mut self) -> Option { - self.inner.res.take() - } } pub enum FnExeCtxSyncAllowedType { Native, } -impl TryFrom for FnExeCtxSyncAllowedType { - type Error = WSError; - fn try_from(v: AppType) -> Result { - match v { - AppType::Native => Ok(FnExeCtxSyncAllowedType::Native), - AppType::Jar | AppType::Wasm => Err(WSError::from(WsFuncError::UnsupportedAppType)), - } - } -} - impl Into for FnExeCtxSyncAllowedType { fn into(self) -> AppType { AppType::Native @@ -185,33 +149,33 @@ impl FnExeCtxSync { inner: FnExeCtx { app, func, - _req_id: req_id, + req_id, event_ctx, res: None, sub_waiters: vec![], app_type: apptype.into(), - _func_meta: func_meta, + func_meta, _dummy_private: (), }, } } } -// impl FnExeCtx { -// pub fn empty_http(&self) -> bool { -// match &self.event_ctx { -// EventCtx::Http(str) => str.len() == 0, -// _ => false, -// } -// } -// /// call this when you are sure it's a http event -// pub fn http_str_unwrap(&self) -> String { -// match &self.event_ctx { -// EventCtx::Http(str) => str.to_owned(), -// _ => panic!("not a http event"), -// } -// } -// } +impl FnExeCtx { + pub fn empty_http(&self) -> bool { + match &self.event_ctx { + EventCtx::Http(str) => str.len() == 0, + _ => false, + } + } + /// call this when you are sure it's a http event + pub fn http_str_unwrap(&self) -> String { + match &self.event_ctx { + EventCtx::Http(str) => str.to_owned(), + _ => panic!("not a http event"), + } + } +} logical_module_view_impl!(ExecutorView); logical_module_view_impl!(ExecutorView, p2p, P2PModule); @@ -227,47 +191,28 @@ pub struct Executor { view: ExecutorView, } -/// Base trait for function execution contexts -pub trait FnExeCtxBase { - /// Get the application name - fn app(&self) -> &str; - /// Get the function name - fn func(&self) -> &str; - /// Get the event context - fn event_ctx(&self) -> &EventCtx; - /// Get mutable reference to event context - fn event_ctx_mut(&mut self) -> &mut EventCtx; -} - -impl FnExeCtxBase for FnExeCtxAsync { - fn app(&self) -> &str { - &self.inner.app - } - fn func(&self) -> &str { - &self.inner.func - } - fn event_ctx(&self) -> &EventCtx { - &self.inner.event_ctx - } - fn event_ctx_mut(&mut self) -> &mut EventCtx { - &mut self.inner.event_ctx - } -} - -impl FnExeCtxBase for FnExeCtxSync { - fn app(&self) -> &str { - &self.inner.app - } - fn func(&self) -> &str { - &self.inner.func - } - fn event_ctx(&self) -> &EventCtx { - &self.inner.event_ctx - } - fn event_ctx_mut(&mut self) -> &mut EventCtx { - &mut self.inner.event_ctx - } -} +// pub struct FunctionCtxBuilder { +// pub app: String, +// pub req_id: ReqId, +// // pub trigger_node: NodeID, +// } +// impl FunctionCtxBuilder { +// pub fn new(app: String, req_id: ReqId) -> Self { +// Self { +// app, +// req_id, +// // trigger_node: 0, +// } +// } +// pub fn build(self, func: String) -> FunctionCtx { +// FunctionCtx { +// app: self.app, +// func, +// req_id: self.req_id, +// // trigger_node: self.trigger_node, +// } +// } +// } #[async_trait] impl LogicalModule for Executor { @@ -318,7 +263,7 @@ impl Executor { } pub async fn local_call_execute_async(&self, ctx: FnExeCtxAsync) -> WSResult> { - self.execute(ctx).await + self.execute(ctx.inner).await } pub fn local_call_execute_sync(&self, ctx: FnExeCtxSync) -> WSResult> { @@ -421,13 +366,9 @@ impl Executor { fnmeta.clone(), req.task_id as usize, match req.trigger.unwrap() { - distribute_task_req::Trigger::EventNew(new) => EventCtx::KvSet { - key: new.key, - opeid: Some(new.opeid), - }, - distribute_task_req::Trigger::EventWrite(write) => EventCtx::KvSet { - key: write.key, - opeid: Some(write.opeid), + distribute_task_req::Trigger::KvSet(set) => EventCtx::KvSet { + key: set.key, + opeid: Some(set.opeid), }, }, ); @@ -441,7 +382,7 @@ impl Executor { { tracing::error!("send sche resp for app:{app} fn:{func} failed with err: {err}"); } - let _ = self.execute(ctx).await; + let _ = self.execute(ctx.inner).await; } pub async fn handle_http_task(&self, route: &str, text: String) -> WSResult> { @@ -580,92 +521,38 @@ impl Executor { // // .await // } - fn execute_sync(&self, mut ctx: FnExeCtxSync) -> WSResult> { - let instance = self - .view - .instance_manager() - .load_instance_sync(&ctx.inner.app_type, &ctx.inner.app)?; - - let _ = self - .view - .instance_manager() - .instance_running_function - .insert( - instance.instance_name().to_owned(), - UnsafeFunctionCtx::Sync( - NonNull::new(&ctx as *const FnExeCtxSync as *mut FnExeCtxSync).unwrap(), - ), - ); - - tracing::debug!( - "start run sync instance {} app {} fn {}", - instance.instance_name(), - ctx.inner.app, - ctx.inner.func - ); - - let bf_exec_time = SystemTime::now() - .duration_since(UNIX_EPOCH) - .expect("Time went backwards") - .as_millis() as u64; - - tracing::debug!("start execute sync"); - let res = instance.execute_sync(&mut ctx)?; - - let res = res.map(|v| { - let mut res: serde_json::Value = serde_json::from_str(&*v).unwrap(); - let _ = res.as_object_mut().unwrap().insert( - "bf_exec_time".to_owned(), - serde_json::Value::from(bf_exec_time), - ); - serde_json::to_string(&res).unwrap() - }); - - let _ = self - .view - .instance_manager() - .instance_running_function - .remove(&instance.instance_name()); - - tracing::debug!( - "finish run sync instance {} fn {}, res:{:?}", - instance.instance_name(), - ctx.inner.func, - res - ); - - self.view - .instance_manager() - .finish_using(&ctx.inner.app, instance); - - Ok(res) - } + fn execute_sync(&self, ctx: FnExeCtxSync) -> WSResult> {} /// prepare app and func before call execute async fn execute(&self, mut fn_ctx: FnExeCtxAsync) -> WSResult> { + // let app = fn_ctx.app.clone(); + // let func = fn_ctx.func.clone(); + // let event = fn_ctx.event_ctx.clone(); + let instance = self .view .instance_manager() - .load_instance(&fn_ctx.inner.app_type, &fn_ctx.inner.app) + .load_instance(&fn_ctx.app_type, &fn_ctx.app) .await; let _ = self .view .instance_manager() .instance_running_function + .write() .insert( instance.instance_name().to_owned(), - UnsafeFunctionCtx::Async( - NonNull::new(&fn_ctx as *const FnExeCtxAsync as *mut FnExeCtxAsync).unwrap(), + UnsafeFunctionCtx( + NonNull::new(&fn_ctx as *const FnExeCtx as *mut FnExeCtx).unwrap(), ), ); - tracing::debug!( "start run instance {} app {} fn {}", instance.instance_name(), - fn_ctx.inner.app, - fn_ctx.inner.func + fn_ctx.app, + fn_ctx.func ); + // TODO: input value should be passed from context, like http request or prev trigger let bf_exec_time = SystemTime::now() .duration_since(UNIX_EPOCH) @@ -675,6 +562,11 @@ impl Executor { tracing::debug!("start execute"); let res = instance.execute(&mut fn_ctx).await; + // let return_to_agent_time = SystemTime::now() + // .duration_since(UNIX_EPOCH) + // .expect("Time went backwards") + // .as_millis() as u64; + let res = res.map(|v| { v.map(|v| { let mut res: serde_json::Value = serde_json::from_str(&*v).unwrap(); @@ -682,6 +574,10 @@ impl Executor { "bf_exec_time".to_owned(), serde_json::Value::from(bf_exec_time), ); + // let _ = res.as_object_mut().unwrap().insert( + // "return_to_agent_time".to_owned(), + // serde_json::Value::from(return_to_agent_time), + // ); serde_json::to_string(&res).unwrap() }) }); @@ -690,23 +586,26 @@ impl Executor { .view .instance_manager() .instance_running_function - .remove(&instance.instance_name()); + .write() + .remove(&instance.instance_name()) + .unwrap(); tracing::debug!( "finish run instance {} fn {}, res:{:?}", instance.instance_name(), - fn_ctx.inner.func, + fn_ctx.func, res ); - while let Some(t) = fn_ctx.inner.sub_waiters.pop() { + while let Some(t) = fn_ctx.sub_waiters.pop() { let _ = t.await.unwrap(); } - self.view .instance_manager() - .finish_using(&fn_ctx.inner.app, instance); + .finish_using(&fn_ctx.app, instance) + .await; res + // TODO:wait for related tasks triggered. } } diff --git a/src/main/src/general/app/mod.rs b/src/main/src/general/app/mod.rs index a1f154c..6db3aa9 100644 --- a/src/main/src/general/app/mod.rs +++ b/src/main/src/general/app/mod.rs @@ -1,6 +1,7 @@ pub mod app_native; pub mod app_owned; pub mod app_shared; +pub mod fn_event; mod http; pub mod instance; pub mod m_executor; @@ -10,7 +11,6 @@ use super::data::m_data_general::{DataSetMetaV2, GetOrDelDataArg, GetOrDelDataAr use crate::general::app::app_native::native_apps; use crate::general::app::instance::m_instance_manager::InstanceManager; use crate::general::app::m_executor::Executor; -use crate::general::app::m_executor::FnExeCtxAsyncAllowedType; use crate::general::app::v_os::AppMetaVisitOs; use crate::general::network::proto_ext::ProtoExtDataItem; use crate::util::VecExt; @@ -35,14 +35,13 @@ use crate::{ logical_module_view_impl, master::m_master::Master, result::{ErrCvt, WSResult, WsFuncError}, - sys::{LogicalModule, LogicalModuleNewArgs, LogicalModulesRef, NodeID}, + sys::{LogicalModule, LogicalModuleNewArgs, LogicalModulesRef}, util::{self, JoinHandleWrapper}, }; use async_trait::async_trait; use axum::body::Bytes; use enum_as_inner::EnumAsInner; -use m_executor::FnExeCtxSyncAllowedType; -use serde::{de::Error, Deserialize, Deserializer, Serialize}; +use serde::{Deserialize, Deserializer, Serialize}; use std::path::PathBuf; use std::{ borrow::Borrow, @@ -133,11 +132,9 @@ pub enum FnCallMeta { #[derive(Debug)] pub struct FnMetaYaml { - /// "sync" or "async" - pub sync: Option, + /// key to operations pub calls: Vec, pub kvs: Option>>, - pub affinity: Option, } impl<'de> Deserialize<'de> for FnMetaYaml { @@ -148,38 +145,39 @@ impl<'de> Deserialize<'de> for FnMetaYaml { let mut map = serde_yaml::Value::deserialize(deserializer)?; let map = map .as_mapping_mut() - .ok_or_else(|| D::Error::custom("not a map"))?; - + .ok_or_else(|| serde::de::Error::custom("not a map"))?; + // let calls = map.remove("calls").ok_or_else(|| serde::de::Error::missing_field("calls"))?; let mut calls = vec![]; - - // Helper block for parsing HTTP call configuration from YAML - // This block encapsulates the logic for extracting and validating HTTP call parameters - let parse_http_call = |v: &serde_yaml::Value| -> Result { - let map = v + fn parse_http_call<'de, D: Deserializer<'de>>( + map: &serde_yaml::Value, + ) -> Result { + let map = map .as_mapping() - .ok_or_else(|| D::Error::custom("not a map"))?; + .ok_or_else(|| serde::de::Error::custom("not a map"))?; let call = map .get("call") - .ok_or_else(|| D::Error::missing_field("call"))?; + .ok_or_else(|| serde::de::Error::missing_field("call"))?; let call = call .as_str() - .ok_or_else(|| D::Error::custom("not a string"))?; - match call { - "direct" => Ok(HttpCall::Direct), - "indirect" => Ok(HttpCall::Indirect), - _ => Err(D::Error::custom("invalid call type")), - } - }; - + .ok_or_else(|| serde::de::Error::custom("not a string"))?; + let call = if call == "direct" { + HttpCall::Direct + } else if call == "indirect" { + HttpCall::Indirect + } else { + return Err(serde::de::Error::custom("invalid call type")); + }; + Ok(call) + } if let Some(v) = map.get("http.get") { - let call = parse_http_call(v)?; + let call = parse_http_call::(v)?; calls.push(FnCallMeta::Http { method: HttpMethod::Get, call, }); } if let Some(v) = map.get("http.post") { - let call = parse_http_call(v)?; + let call = parse_http_call::(v)?; calls.push(FnCallMeta::Http { method: HttpMethod::Post, call, @@ -191,37 +189,13 @@ impl<'de> Deserialize<'de> for FnMetaYaml { let kvs = map.remove("kvs"); let kvs = if let Some(kvs) = kvs { - serde_yaml::from_value(kvs).map_err(|e| D::Error::custom(e.to_string()))? - } else { - None - }; - - let sync = if let Some(sync) = map.get("sync") { - let sync = sync - .as_str() - .ok_or_else(|| D::Error::custom("sync value must be a string"))?; - match sync { - "sync" | "async" => Some(sync.to_string()), - _ => return Err(D::Error::custom("sync value must be 'sync' or 'async'")), - } - } else { - None - }; - - let affinity = map.remove("affinity"); - let affinity = if let Some(affinity) = affinity { - serde_yaml::from_value(affinity).map_err(|e| D::Error::custom(e.to_string()))? + serde_yaml::from_value(kvs).map_err(serde::de::Error::custom)? } else { None }; tracing::debug!("FnMetaYaml constructed, calls:{:?}", calls); - Ok(Self { - calls, - kvs, - sync, - affinity, - }) + Ok(Self { calls, kvs }) } } @@ -281,7 +255,6 @@ pub struct FnMeta { // pub event: Vec, // pub args: Vec, pub data_accesses: Option>, - pub affinity: Option, } #[derive(Debug, Deserialize)] @@ -317,15 +290,15 @@ impl AppMeta { app_name: &str, meta_fs: &AppMetaVisitOs, ) -> WSResult { - let app_type = meta_fs.get_app_type(app_name).await?; let fns = metayaml .fns .into_iter() .map(|(fnname, fnmeta)| { - let fnmeta = (app_type, fnmeta).into(); + let fnmeta = fnmeta.into(); (fnname, fnmeta) }) .collect(); + let app_type = meta_fs.get_app_type(app_name).await?; Ok(Self { app_type, fns, @@ -465,65 +438,9 @@ impl KeyPattern { // } } -impl From<(AppType, FnMetaYaml)> for FnMeta { - fn from((app_type, yaml): (AppType, FnMetaYaml)) -> Self { - let sync_or_async = yaml.sync.as_deref().map(|s| s == "sync").unwrap_or(true); - - // if sync but not allowed, set sync_or_async to false - let sync_or_async = if sync_or_async && FnExeCtxSyncAllowedType::try_from(app_type).is_err() - { - false - } else { - sync_or_async - }; - - // if async but not allowed, set sync_or_async to true - let sync_or_async = - if !sync_or_async && FnExeCtxAsyncAllowedType::try_from(app_type).is_err() { - true - } else { - sync_or_async - }; - - let sync_async = if sync_or_async { - FnSyncAsyncSupport::Sync - } else { - FnSyncAsyncSupport::Async - }; - - // 处理亲和性规则 - let affinity = yaml.affinity.map(|affinity_yaml| { - let tags = affinity_yaml - .tags - .unwrap_or_else(|| vec!["worker".to_string()]) - .into_iter() - .map(|tag| match tag.as_str() { - "worker" => NodeTag::Worker, - "master" => NodeTag::Master, - custom => NodeTag::Custom(custom.to_string()), - }) - .collect(); - - let nodes = match affinity_yaml.nodes { - Some(nodes_str) => { - if nodes_str == "*" { - AffinityPattern::All - } else if let Ok(count) = nodes_str.parse::() { - AffinityPattern::NodeCount(count) - } else { - AffinityPattern::List( - nodes_str.split(',').map(|s| s.parse().unwrap()).collect(), - ) - } - } - None => AffinityPattern::All, - }; - - AffinityRule { tags, nodes } - }); - - Self { - sync_async, +impl From for FnMeta { + fn from(yaml: FnMetaYaml) -> Self { + let res = Self { calls: yaml.calls, data_accesses: if let Some(kvs) = yaml.kvs { Some( @@ -581,6 +498,13 @@ impl From<(AppType, FnMetaYaml)> for FnMeta { panic!("invalid op: {:?}", op); } } + // // TODO: check key pattern + // KvMeta { + // delete, + // set, + // get, + // pattern: KeyPattern::new(key), + // } ( KeyPattern::new(key), @@ -597,8 +521,9 @@ impl From<(AppType, FnMetaYaml)> for FnMeta { } else { None }, - affinity, - } + }; + // assert!(res.check_kv_valid()); + res } } @@ -1054,7 +979,8 @@ impl AppMetaManager { }, ]; tracing::debug!( - "app data size: {:?}", + "2broadcast meta and appfile, datasetid: {}, datas: {:?}", + write_data_id, write_datas .iter() .map(|v| v.to_string()) @@ -1065,6 +991,16 @@ impl AppMetaManager { .write_data( write_data_id, write_datas, + // vec![ + // DataMeta { + // cache: DataModeCache::AlwaysInMem as i32, + // distribute: DataModeDistribute::BroadcastRough as i32, + // }, + // DataMeta { + // cache: DataModeCache::AlwaysInFs as i32, + // distribute: DataModeDistribute::BroadcastRough as i32, + // }, + // ], Some(( self.view.p2p().nodes_config.this_node(), proto::DataOpeType::Write, @@ -1174,6 +1110,7 @@ impl AppMetaManager { // }, // None, // ) + // .await // { // Ok(res) => res, // Err(e) => { @@ -1252,6 +1189,7 @@ impl AppMetaManager { // }, // Some(Duration::from_secs(10)), // ) + // .await // { // Ok(res) => res, // Err(err) => { @@ -1275,39 +1213,6 @@ impl AppMetaManager { // } } -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum NodeTag { - Worker, - Master, - Custom(String), -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct AffinityRule { - // 节点必须具有的标签列表,默认包含 worker - pub tags: Vec, - // 节点 ID 匹配规则 - pub nodes: AffinityPattern, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum AffinityPattern { - // 匹配所有节点 - All, - // 匹配指定节点列表 - List(Vec), - // 限定节点数量 - NodeCount(usize), -} - -#[derive(Debug, Deserialize)] -pub struct AffinityYaml { - // 标签列表,使用字符串表示 - pub tags: Option>, - // 节点列表,使用 "*" 表示所有节点,数字表示节点数量,或节点 ID 列表 "1,2,3" - pub nodes: Option, -} - #[cfg(test)] mod test { use crate::util; diff --git a/src/main/src/general/data/m_data_general/dataitem.rs b/src/main/src/general/data/m_data_general/dataitem.rs index 27ef392..681ae9c 100644 --- a/src/main/src/general/data/m_data_general/dataitem.rs +++ b/src/main/src/general/data/m_data_general/dataitem.rs @@ -42,7 +42,7 @@ impl<'a> WantIdxIter<'a> { itercnt: 0, len: itemcnt, }, - GetOrDelDataArgType::PartialOne { idx } => Self::PartialOne { + GetOrDelDataArgType::PartialOne { idx } => Self::PartialOne { idx: *idx, itercnt: 0, }, @@ -71,12 +71,12 @@ impl<'a> Iterator for WantIdxIter<'a> { let ret = *itercnt; *itercnt += 1; Some(ret) - } - } + } + } GetOrDelDataArgType::PartialMany { .. } | GetOrDelDataArgType::PartialOne { .. } => { panic!("PartialMany should be handled by iter") -} + } }, } } diff --git a/src/main/src/general/data/m_data_general/mod.rs b/src/main/src/general/data/m_data_general/mod.rs index 17a4fd9..dd096ce 100644 --- a/src/main/src/general/data/m_data_general/mod.rs +++ b/src/main/src/general/data/m_data_general/mod.rs @@ -22,7 +22,7 @@ use crate::{ network::{msg_pack::MsgPack, proto_ext::DataItemExt}, }, logical_module_view_impl, - result::{WSError, WSResult, WSResultExt, WsRuntimeErr, WsSerialErr, WsNetworkLogicErr}, + result::{WSError, WSResult, WSResultExt, WsRuntimeErr, WsSerialErr}, sys::{LogicalModule, LogicalModuleNewArgs, NodeID}, util::JoinHandleWrapper, }; @@ -32,20 +32,15 @@ use camelpaste::paste; use core::str; use enum_as_inner::EnumAsInner; -use dashmap::DashMap; use serde::{Deserialize, Serialize}; use std::ops::Range; use std::{ collections::{BTreeSet, HashMap, HashSet}, sync::Arc, time::Duration, - sync::atomic::{AtomicU32, Ordering}, }; -use tokio::sync::Semaphore; use tokio::task::JoinHandle; -use tokio::task::JoinError; use ws_derive::LogicalModule; -use std::future::Future; // use super::m_appmeta_manager::AppMeta; @@ -90,151 +85,607 @@ pub fn new_data_unique_id_fn_kv(key: &[u8]) -> Vec { #[derive(LogicalModule)] pub struct DataGeneral { view: DataGeneralView, - pub rpc_call_data_version_schedule: RPCCaller, - rpc_call_write_once_data: RPCCaller, - rpc_call_batch_data: RPCCaller, - rpc_call_get_data_meta: RPCCaller, + + // // unique_id,idx -> file_path + // auto_cache: moka::sync::Cache<(String, u8), (DataVersion, proto::DataItem)>, + + // // unique_id,idx -> serialized value + // forever_cache: dashmap::DashMap<(String, u8), (DataVersion, proto::DataItem)>, + pub rpc_call_data_version_schedule: RPCCaller, + rpc_call_write_once_data: RPCCaller, + rpc_call_get_data_meta: RPCCaller, rpc_call_get_data: RPCCaller, - rpc_handler_write_once_data: RPCHandler, - rpc_handler_batch_data: RPCHandler, + rpc_handler_write_once_data: RPCHandler, rpc_handler_data_meta_update: RPCHandler, - rpc_handler_get_data_meta: RPCHandler, + rpc_handler_get_data_meta: RPCHandler, rpc_handler_get_data: RPCHandler, +} + +#[async_trait] +impl LogicalModule for DataGeneral { + fn inner_new(args: LogicalModuleNewArgs) -> Self + where + Self: Sized, + { + Self { + view: DataGeneralView::new(args.logical_modules_ref.clone()), + + // auto_cache: moka::sync::Cache::new(100), + // forever_cache: dashmap::DashMap::new(), + rpc_call_data_version_schedule: RPCCaller::new(), + rpc_call_write_once_data: RPCCaller::new(), + rpc_call_get_data_meta: RPCCaller::new(), + rpc_call_get_data: RPCCaller::new(), + + rpc_handler_write_once_data: RPCHandler::new(), + rpc_handler_data_meta_update: RPCHandler::new(), + rpc_handler_get_data_meta: RPCHandler::new(), + rpc_handler_get_data: RPCHandler::new(), + } + } + async fn start(&self) -> WSResult> { + tracing::info!("start as master"); + + let p2p = self.view.p2p(); + // register rpc callers + { + self.rpc_call_data_version_schedule.regist(p2p); + self.rpc_call_write_once_data.regist(p2p); + self.rpc_call_get_data_meta.regist(p2p); + self.rpc_call_get_data.regist(p2p); + } - // 用于跟踪批量传输的状态 - batch_transfers: DashMap)>, // 修改类型为 (unique_id -> (version, data)) + // register rpc handlers + { + let view = self.view.clone(); + self.rpc_handler_write_once_data + .regist(p2p, move |responsor, req| { + let view = view.clone(); + let _ = tokio::spawn(async move { + view.rpc_handle_write_one_data(responsor, req).await; + }); + Ok(()) + }); + let view = self.view.clone(); + self.rpc_handler_data_meta_update.regist( + p2p, + move |responsor: RPCResponsor, + req: proto::DataMetaUpdateRequest| { + let view = view.clone(); + let _ = tokio::spawn(async move { + view.rpc_handle_data_meta_update(responsor, req).await + }); + Ok(()) + }, + ); + let view = self.view.clone(); + self.rpc_handler_get_data_meta + .regist(p2p, move |responsor, req| { + let view = view.clone(); + let _ = tokio::spawn(async move { + view.rpc_handle_get_data_meta(req, responsor) + .await + .todo_handle(); + }); + Ok(()) + }); + let view = self.view.clone(); + self.rpc_handler_get_data.regist( + p2p, + move |responsor: RPCResponsor, + req: proto::GetOneDataRequest| { + let view = view.clone(); + let _ = + tokio::spawn( + async move { view.rpc_handle_get_one_data(responsor, req).await }, + ); + Ok(()) + }, + ); + } + + Ok(vec![]) + } } -impl DataGeneral { - fn next_batch_id(&self) -> u32 { - static NEXT_BATCH_ID: AtomicU32 = AtomicU32::new(1); // 从1开始,保留0作为特殊值 - NEXT_BATCH_ID.fetch_add(1, Ordering::Relaxed) +impl DataGeneralView { + async fn rpc_handle_data_meta_update( + self, + responsor: RPCResponsor, + mut req: proto::DataMetaUpdateRequest, + ) { + struct Defer { + node: NodeID, + } + impl Drop for Defer { + fn drop(&mut self) { + tracing::debug!("rpc_handle_data_meta_update return at node({})", self.node); + } + } + let _defer = Defer { + node: self.p2p().nodes_config.this_node(), + }; + + let key = KeyTypeDataSetMeta(&req.unique_id); + let keybytes = key.make_key(); + + tracing::debug!("rpc_handle_data_meta_update {:?}", req); + let kv_lock = self.kv_store_engine().with_rwlock(&keybytes); + let _kv_write_lock_guard = kv_lock.write(); + + if let Some((_old_version, mut old_meta)) = + self.kv_store_engine().get(&key, true, KvAdditionalConf {}) + { + if old_meta.version > req.version { + drop(_kv_write_lock_guard); + let err_msg = "New data version is smaller, failed update"; + tracing::warn!("{}", err_msg); + responsor + .send_resp(proto::DataMetaUpdateResponse { + version: old_meta.version, + message: err_msg.to_owned(), + }) + .await + .todo_handle(); + return; + } + old_meta.version = req.version; + if req.serialized_meta.len() > 0 { + self.kv_store_engine() + .set_raw(&keybytes, std::mem::take(&mut req.serialized_meta), true) + .todo_handle(); + } else { + self.kv_store_engine() + .set(key, &old_meta, true) + .todo_handle(); + } + } else { + if req.serialized_meta.len() > 0 { + tracing::debug!( + "set new meta data, {:?}", + bincode::deserialize::(&req.serialized_meta) + ); + self.kv_store_engine() + .set_raw(&keybytes, std::mem::take(&mut req.serialized_meta), true) + .todo_handle(); + } else { + drop(_kv_write_lock_guard); + let err_msg = "Old meta data not found and missing new meta"; + tracing::warn!("{}", err_msg); + responsor + .send_resp(proto::DataMetaUpdateResponse { + version: 0, + message: err_msg.to_owned(), + }) + .await + .todo_handle(); + return; + } + } + drop(_kv_write_lock_guard); + tracing::debug!("rpc_handle_data_meta_update success"); + responsor + .send_resp(proto::DataMetaUpdateResponse { + version: req.version, + message: "Update success".to_owned(), + }) + .await + .todo_handle(); } - async fn write_data_batch( - &self, - unique_id: &[u8], - version: u64, - data: proto::DataItem, - data_item_idx: usize, - node_id: NodeID, - batch_size: usize, + async fn rpc_handle_get_one_data( + self, + responsor: RPCResponsor, + req: proto::GetOneDataRequest, ) -> WSResult<()> { - let total_size = data.data_sz_bytes(); - let total_batches = (total_size + batch_size - 1) / batch_size; - - // 克隆整个 view - let view = self.view.clone(); - - // Initialize batch transfer - let init_req = proto::sche::BatchDataRequest { - unique_id: unique_id.to_vec(), - version, - batch_id: 0, // 使用 0 作为初始化标记 - total_batches: total_batches as u32, - data: vec![], - data_item_idx: data_item_idx as u32, - is_complete: false, - }; + tracing::debug!("starting rpc_handle_get_one_data {:?}", req); + + // req.unique_id + let kv_store_engine = self.kv_store_engine(); + let _ = self + .get_data_meta(&req.unique_id, req.delete) + .map_err(|err| { + tracing::warn!("rpc_handle_get_one_data get_data_meta failed: {:?}", err); + err + })?; + // let meta = bincode::deserialize::(&req.serialized_meta).map_err(|err| { + // WsSerialErr::BincodeErr { + // err, + // context: "rpc_handle_get_one_data".to_owned(), + // } + // })?; + let mut got_or_deleted = vec![]; + + let mut kv_ope_err = vec![]; + + for idx in req.idxs { + let value = if req.delete { + match kv_store_engine.del( + KeyTypeDataSetItem { + uid: req.unique_id.as_ref(), //req.unique_id.clone(), + idx: idx as u8, + }, + false, + ) { + Ok(value) => value, + Err(e) => { + kv_ope_err.push(e); + None + } + } + } else { + kv_store_engine.get( + &KeyTypeDataSetItem { + uid: req.unique_id.as_ref(), //req.unique_id.clone(), + idx: idx as u8, + }, + false, + KvAdditionalConf {}, + ) + }; + got_or_deleted.push(value); + } - let init_resp = self - .rpc_call_batch_data - .call( - view.p2p(), - node_id, - init_req, - Some(Duration::from_secs(60)), - ) - .await?; + // tracing::warn!("temporaly no data response"); + + let (success, message): (bool, String) = if kv_ope_err.len() > 0 { + (false, { + let mut msg = String::from("KvEngine operation failed: "); + for e in kv_ope_err.iter() { + msg.push_str(&format!("{:?}", e)); + } + msg + }) + } else if got_or_deleted.iter().all(|v| v.is_some()) { + (true, "success".to_owned()) + } else { + tracing::warn!("some data not found"); + (false, "some data not found".to_owned()) + }; - if !init_resp.success { - return Err(WsDataError::BatchTransferFailed { - node: node_id, - batch: 0, - reason: init_resp.error, + let mut got_or_deleted_checked: Vec = vec![]; + if success { + for v in got_or_deleted { + let decode_res = proto::DataItem::decode_persist(v.unwrap().1); + tracing::debug!("decode_res type: {:?}", decode_res.to_string()); + // if let Ok(v) = decode_res { + got_or_deleted_checked.push(decode_res); + // } else { + // success = false; + // got_or_deleted_checked = vec![]; + // message = format!("decode data item failed {:?}", decode_res.unwrap_err()); + // tracing::warn!("{}", message); + // break; + // } } - .into()); } - let batch_id = init_resp.batch_id; - - // Send data in batches - for batch_idx in 0..total_batches { - let start = batch_idx * batch_size; - let end = (start + batch_size).min(total_size); - let is_last = batch_idx == total_batches - 1; - - let batch_data = data.clone_split_range(start..end); - let batch_req = proto::sche::BatchDataRequest { - unique_id: unique_id.to_vec(), - version, - batch_id, - total_batches: total_batches as u32, - data: batch_data.encode_persist(), - data_item_idx: data_item_idx as u32, - is_complete: is_last, + // = got_or_deleted + // .into_iter() + // .map(|one| proto::FileData::decode(bytes::Bytes::from(one.unwrap().1))) + // .all(|one|one.is_ok()) + // .collect::>(); + responsor + .send_resp(proto::GetOneDataResponse { + success, + data: got_or_deleted_checked, + message, + }) + .await?; + + Ok(()) + } + async fn rpc_handle_write_one_data( + self, + responsor: RPCResponsor, + req: WriteOneDataRequest, + ) { + tracing::debug!("verify data meta bf write data"); + let kv_store_engine = self.kv_store_engine(); + + // Step1: verify version + // take old meta + #[allow(unused_assignments)] + let mut required_meta: Option<(usize, DataSetMetaV2)> = None; + { + let keybytes: Vec = KeyTypeDataSetMeta(&req.unique_id).make_key(); + let fail_by_overwrite = || async { + let message = "New data version overwrite".to_owned(); + tracing::warn!("{}", message); + responsor + .send_resp(WriteOneDataResponse { + remote_version: 0, + success: false, + message, + }) + .await + .todo_handle(); + }; + let fail_with_msg = |message: String| async { + tracing::warn!("{}", message); + responsor + .send_resp(WriteOneDataResponse { + remote_version: 0, + success: false, + message, + }) + .await + .todo_handle(); }; - let batch_resp = self - .rpc_call_batch_data - .call( - view.p2p(), - node_id, - batch_req, - Some(Duration::from_secs(60)), - ) - .await?; + loop { + // tracing::debug!("verify version loop"); + let lock = + kv_store_engine.with_rwlock(&KeyTypeDataSetMeta(&req.unique_id).make_key()); + let guard = KeyLockGuard::Read(lock.read()); + required_meta = kv_store_engine.get( + &KeyTypeDataSetMeta(&req.unique_id), + true, + KvAdditionalConf {}, + ); //tofix, master send maybe not synced + let old_dataset_version = if required_meta.is_none() { + 0 + } else { + required_meta.as_ref().unwrap().1.version + }; + // need to wait for new version + if required_meta.is_none() + || required_meta.as_ref().unwrap().1.version < req.version + { + if required_meta.is_none() { + tracing::debug!("no data version, waiting for notify"); + } else { + tracing::debug!( + "data version is old({}) at node({}), waiting for new notify({})", + required_meta.as_ref().unwrap().1.version, + self.p2p().nodes_config.this_node(), + req.version + ); + } - if !batch_resp.success { - return Err(WsDataError::BatchTransferFailed { - node: node_id, - batch: batch_idx as u32, - reason: batch_resp.error, + let (kv_version, new_value) = kv_store_engine + .register_waiter_for_new(&keybytes, guard) + .await + .unwrap_or_else(|err| { + panic!("fail to wait for new data version: {:?}", err); + }); + + let Some(new_value) = new_value.as_raw_data() else { + fail_with_msg(format!( + "fatal error, kv value supposed to be DataSetMeta, rathe than {:?}", + new_value + )) + .await; + return; + }; + + // deserialize + let new_value = bincode::deserialize::(&new_value); + if let Err(err) = new_value { + fail_with_msg(format!( + "fatal error, kv value deserialization failed: {}", + err + )) + .await; + return; + } + let new_value = new_value.unwrap(); + + // version check + if new_value.version > req.version { + fail_by_overwrite().await; + return; + } else if new_value.version < req.version { + tracing::debug!("recv data version({}) is old than required({}), waiting for new notify",new_value.version, req.version); + // still need to wait for new version + continue; + } else { + required_meta = Some((kv_version, new_value)); + break; + } + } else if old_dataset_version > req.version { + drop(guard); + fail_by_overwrite().await; + return; + } else { + tracing::debug!( + "data version is matched cur({}) require({}) // 0 should be invalid", + old_dataset_version, + req.version + ); + break; } - .into()); } } + // Step3: write data + tracing::debug!("start to write data"); + let lock = kv_store_engine.with_rwlock(&KeyTypeDataSetMeta(&req.unique_id).make_key()); + let guard = KeyLockGuard::Write(lock.write()); + let check_meta = kv_store_engine.get( + &KeyTypeDataSetMeta(&req.unique_id), + true, + KvAdditionalConf {}, + ); //tofix, master send maybe not synced + if check_meta.is_none() + || check_meta.as_ref().unwrap().0 != required_meta.as_ref().unwrap().0 + { + drop(guard); + responsor + .send_resp(WriteOneDataResponse { + remote_version: if check_meta.is_none() { + 0 + } else { + check_meta.as_ref().unwrap().1.version + }, + success: false, + message: "meta is updated again, cancel write".to_owned(), + }) + .await + .todo_handle(); + return; + } + // let old_dataset_version = if res.is_none() { + // 0 + // } else { + // res.as_ref().unwrap().1.version + // }; + + for data_with_idx in req.data.into_iter() { + let proto::DataItemWithIdx { idx, data } = data_with_idx; + let data = data.unwrap(); + let serialize = data.encode_persist(); + tracing::debug!( + "writing data part uid({:?}) idx({}) item({})", + req.unique_id, + idx, + data.to_string() + ); + if let Err(err) = kv_store_engine.set( + KeyTypeDataSetItem { + uid: req.unique_id.as_ref(), //req.unique_id.clone(), + idx: idx as u8, + }, + &serialize, + true, + ) { + tracing::warn!("flush error: {}", err) + } + } + kv_store_engine.flush(); + drop(guard); + tracing::debug!("data is written"); + responsor + .send_resp(WriteOneDataResponse { + remote_version: req.version, + success: true, + message: "".to_owned(), + }) + .await + .todo_handle(); + // ## response + } + + async fn rpc_handle_get_data_meta( + self, + req: proto::DataMetaGetRequest, + responsor: RPCResponsor, + ) -> WSResult<()> { + tracing::debug!("rpc_handle_get_data_meta with req({:?})", req); + let meta = self.get_data_meta(&req.unique_id, req.delete)?; + if meta.is_none() { + tracing::debug!("rpc_handle_get_data_meta data meta not found"); + } else { + tracing::debug!("rpc_handle_get_data_meta data meta found"); + } + let serialized_meta = meta.map_or(vec![], |(_kvversion, meta)| { + bincode::serialize(&meta).unwrap() + }); + + responsor + .send_resp(proto::DataMetaGetResponse { serialized_meta }) + .await?; + Ok(()) } + // pub async fn + fn get_data_meta( + &self, + unique_id: &[u8], + delete: bool, + ) -> WSResult> { + let ope_name = if delete { "delete" } else { "get" }; + tracing::debug!("{} data meta for uid({:?})", ope_name, unique_id); + + let kv_store_engine = self.kv_store_engine(); + let key = KeyTypeDataSetMeta(&unique_id); + let keybytes = key.make_key(); + + let write_lock = kv_store_engine.with_rwlock(&keybytes); + let _guard = write_lock.write(); + + let meta_opt = if delete { + kv_store_engine.del(key, true)? + } else { + kv_store_engine.get(&key, true, KvAdditionalConf {}) + }; + Ok(meta_opt) + } +} + +// pub enum DataWrapper { +// Bytes(Vec), +// File(PathBuf), +// } + +pub enum DataUidMeta { + Meta { + unique_id: Vec, + meta: DataSetMetaV2, + }, + UniqueId(Vec), +} + +#[derive(EnumAsInner, Clone)] +pub enum GetOrDelDataArgType { + Delete, + All, + PartialOne { + // partial can't be deleted + idx: DataItemIdx, + }, + PartialMany { + idxs: BTreeSet, + }, +} + +pub struct GetOrDelDataArg { + pub meta: Option, + pub unique_id: Vec, + pub ty: GetOrDelDataArgType, +} + +impl DataGeneral { pub async fn get_or_del_datameta_from_master( &self, unique_id: &[u8], delete: bool, ) -> WSResult { let p2p = self.view.p2p(); + let data_general = self.view.data_general(); // get meta from master - let meta = self + let meta = data_general .rpc_call_get_data_meta .call( p2p, p2p.nodes_config.get_master_node(), - proto::DataMetaGetRequest { - unique_id: unique_id.to_vec(), + DataMetaGetRequest { + unique_id: unique_id.to_owned(), delete, }, - Some(Duration::from_secs(60)), + Some(Duration::from_secs(30)), ) .await?; - if meta.serialized_meta.is_empty() { return Err(WsDataError::DataSetNotFound { - uniqueid: unique_id.to_vec(), + uniqueid: unique_id.to_owned(), } .into()); } - - bincode::deserialize(&meta.serialized_meta).map_err(|err| { - WsSerialErr::BincodeErr { - err, - context: "get_or_del_datameta_from_master".to_owned(), - } - .into() + bincode::deserialize::(&meta.serialized_meta).map_err(|e| { + WSError::from(WsSerialErr::BincodeErr { + err: e, + context: format!( + "get_datameta_from_master failed, meta:{:?}", + meta.serialized_meta + ), + }) }) } + // should return real dataitem, rather than split dataitem pub async fn get_or_del_data( &self, GetOrDelDataArg { @@ -243,8 +694,6 @@ impl DataGeneral { ty, }: GetOrDelDataArg, ) -> WSResult<(DataSetMetaV2, HashMap)> { - let mut data_map = HashMap::new(); - // get meta from master let meta = if let Some(meta) = meta { meta @@ -269,133 +718,379 @@ impl DataGeneral { } Ok(()) }; + // not proper desig, skip + // https://fvd360f8oos.feishu.cn/wiki/DYAHw4oPLiZ5NYkTG56cFtJdnKg#share-Div9dUq11oGFOBxJO9ic3RtnnSf + // fn check_cache_pos(meta: &DataSetMetaV2) -> WSResult<()> { + // if !meta.cache_mode_visitor().is_pos_allnode() + // && !meta.cache_mode_visitor().is_pos_auto() + // && !meta.cache_mode_visitor().is_pos_specnode() + // { + // return Err(WsDataError::UnknownCachePosMode { + // mode: meta.cache_mode_visitor().0, + // } + // .into()); + // } + // if meta.cache_mode_visitor().is_pos_specnode() { + // // check this node is in the spec node list + // panic!("TODO: check this node is in the spec node list"); + // } + // Ok(()) + // } + let check_cache_time = |meta: &DataSetMetaV2| -> WSResult<()> { + if !meta.cache_mode_visitor(idx).is_time_auto() + && !meta.cache_mode_visitor(idx).is_time_forever() + { + return Err(WsDataError::UnknownCacheTimeMode { + mode: meta.cache_mode_visitor(idx).0, + } + .into()); + } + Ok(()) + }; check_cache_map(&meta)?; + // not proper desig, skip + // check_cache_pos(&meta)?; + check_cache_time(&meta)?; } - // get data - let p2p = self.view.p2p(); - - match ty { - GetOrDelDataArgType::All => { - for idx in 0..meta.data_item_cnt() { - let idx = idx as DataItemIdx; - let resp = self - .rpc_call_get_data - .call( - p2p, - meta.get_data_node(idx), - proto::GetOneDataRequest { - unique_id: unique_id.to_vec(), - idxs: vec![idx as u32], - delete: false, - return_data: true, - }, - Some(Duration::from_secs(60)), - ) - .await?; - - if !resp.success { - return Err(WsDataError::GetDataFailed { - unique_id: unique_id.to_vec(), - msg: resp.message, + // verify idx range & get whether to delete + let delete = match &ty { + GetOrDelDataArgType::Delete => true, + GetOrDelDataArgType::All => false, + GetOrDelDataArgType::PartialOne { idx } => { + if *idx as usize >= meta.data_item_cnt() { + return Err(WsDataError::ItemIdxOutOfRange { + wanted: *idx, + len: meta.data_item_cnt() as u8, } .into()); } - - data_map.insert(idx, resp.data[0].clone()); - } + false } - GetOrDelDataArgType::Delete => { - for idx in 0..meta.data_item_cnt() { - let idx = idx as DataItemIdx; - let resp = self - .rpc_call_get_data - .call( - p2p, - meta.get_data_node(idx), - proto::GetOneDataRequest { - unique_id: unique_id.to_vec(), - idxs: vec![idx as u32], - delete: true, - return_data: true, - }, - Some(Duration::from_secs(60)), - ) - .await?; - - if !resp.success { - return Err(WsDataError::GetDataFailed { - unique_id: unique_id.to_vec(), - msg: resp.message, + GetOrDelDataArgType::PartialMany { idxs } => { + let Some(biggest_idx) = idxs.iter().rev().next() else { + return Err(WsDataError::ItemIdxEmpty.into()); + }; + if *biggest_idx >= meta.data_item_cnt() as u8 { + return Err(WsDataError::ItemIdxOutOfRange { + wanted: *biggest_idx, + len: meta.data_item_cnt() as u8, } .into()); } + false + } + }; - data_map.insert(idx, resp.data[0].clone()); + // // TODO 读取数据的时候先看看缓存有没有,如果没有再读数据源,如果有从缓存里面拿,需要校验 version + // if !delete { + // let mut cached_items = HashMap::new(); + // for idx in WantIdxIter::new(&ty) { + // let cache_list = if meta.cache_mode_visitor(idx).is_time_auto() { + // self.auto_cache.clone() + // } else if meta.cache_mode_visitor(idx).is_time_forever() { + // self.forever_cache.clone() + // } else { + // None + // }; + // if cache_list.is_none() { + // continue; + // } + // // 从缓存中获取数据 + // let cache_key = (unique_id.clone(), idx); + // let cached_value = cache_list.get(&cache_key); + // // 如果找到缓存且版本匹配 + // if let Some((cached_version, cached_item)) = cached_value { + // if cached_version == meta.version { + // cached_items.insert(idx, cached_item.clone()); + // tracing::debug!("Cache hit for idx: {}, version: {}", idx, cached_version); + // } else { + // // 如果缓存版本不匹配,从缓存中删除掉 + // cache_list.remove(&cache_key); + // tracing::debug!( + // "Cache version mismatch for idx: {}, cached: {}, current: {}", + // idx, + // cached_version, + // meta.version + // ); + // } + // } + // } + // // 如果所有请求的数据都在缓存中找到,直接返回 + // if matches!(ty, GetOrDelDataArgType::All) + // && cached_items.len() == meta.datas_splits.len() + // || matches!(ty, GetOrDelDataArgType::PartialOne { .. }) && cached_items.len() == 1 + // || matches!(ty, GetOrDelDataArgType::PartialMany { idxs }) + // && cached_items.len() == idxs.len() + // { + // tracing::debug!("All requested data found in cache, returning early"); + // return Ok((meta, cached_items)); + // } + // } + + // 如果缓存里没有,则需要从数据源读取 + let mut cache: Vec = Vec::new(); + for _ in 0..meta.data_item_cnt() { + match &ty { + GetOrDelDataArgType::Delete => { + cache.push(false); + } + GetOrDelDataArgType::All + | GetOrDelDataArgType::PartialOne { .. } + | GetOrDelDataArgType::PartialMany { .. } => { + cache.push(true); } } - GetOrDelDataArgType::PartialOne { idx } => { - let resp = self - .rpc_call_get_data - .call( - p2p, - meta.get_data_node(idx), + } + + // Step2: get/delete data on each node + // nodeid -> (getdata_req, splitidx) + let mut each_node_getdata: HashMap)> = + HashMap::new(); + let mut each_item_idx_receive_worker_tx_rx_splits: HashMap< + u8, + ( + tokio::sync::mpsc::Sender>, + tokio::sync::mpsc::Receiver>, + Vec>, // split ranges + ), + > = HashMap::new(); + + for idx in WantIdxIter::new(&ty, meta.data_item_cnt() as DataItemIdx) { + tracing::debug!("prepare get data slices request with idx:{}", idx); + let data_splits = &meta.datas_splits[idx as usize]; + for (splitidx, split) in data_splits.splits.iter().enumerate() { + let _ = each_node_getdata + .entry(split.node_id) + .and_modify(|(req, splitidxs)| { + req.idxs.push(idx as u32); + splitidxs.push(splitidx); + }) + .or_insert(( proto::GetOneDataRequest { - unique_id: unique_id.to_vec(), + unique_id: unique_id.to_owned(), idxs: vec![idx as u32], - delete: false, + delete, return_data: true, }, - Some(Duration::from_secs(60)), - ) - .await?; - - if !resp.success { - return Err(WsDataError::GetDataFailed { - unique_id: unique_id.to_vec(), - msg: resp.message, + vec![splitidx], + )); + } + let (tx, rx) = + tokio::sync::mpsc::channel::>(3); + let _ = each_item_idx_receive_worker_tx_rx_splits.insert( + idx, + ( + tx, + rx, + data_splits + .splits + .iter() + .map(|split| { + split.data_offset as usize + ..split.data_offset as usize + split.data_size as usize + }) + .collect::>(), + ), + ); + } + + // this part is a little complex + // 1. all the splits will be read parallelly + // 2. for one dataitem (unique by idx), we want one worker to wait for ready dataitem(split) + + // 1. read tasks + + for (node_id, (req, splitidxs)) in each_node_getdata { + let view = self.view.clone(); + // let req_idxs = req.idxs.clone(); + // let idx_2_sender_to_recv_worker = each_item_idx_receive_worker_tx_rx_splitcnt.clone(); + let idx_of_idx_and_sender_to_recv_worker = req + .idxs + .iter() + .enumerate() + .map(|(idx_of_idx, reqidx)| { + let tx_rx_splits = each_item_idx_receive_worker_tx_rx_splits + .get(&(*reqidx as DataItemIdx)) + .unwrap(); + (idx_of_idx, tx_rx_splits.0.clone()) + }) + .collect::>(); + let unique_id = unique_id.clone(); + let _task = tokio::spawn(async move { + tracing::debug!("rpc_call_get_data start, remote({})", node_id); + let mut res = view + .data_general() + .rpc_call_get_data + .call(view.p2p(), node_id, req, Some(Duration::from_secs(30))) + .await; + tracing::debug!("rpc_call_get_data returned, remote({})", node_id); + + // result will contain multiple splits of dataitems + // so we need to send the result to the corresponding tx + + if res.is_err() { + let e = Arc::new(res.err().unwrap()); + for (_idx_of_idx, tx) in idx_of_idx_and_sender_to_recv_worker { + tracing::warn!("send to data merge tasks failed: {:?}", e); + tx.send(Err(WSError::ArcWrapper(e.clone()))) + .await + .expect("send to data merge tasks failed"); + } + } else { + for (idx_of_idx, tx) in idx_of_idx_and_sender_to_recv_worker { + let res = res.as_mut().unwrap(); + if !res.success { + tx.send(Err(WsDataError::GetDataFailed { + unique_id: unique_id.clone(), + msg: std::mem::take(&mut res.message), + } + .into())) + .await + .expect("send to data merge tasks failed"); + } else { + let _ = tx + .send(Ok(( + splitidxs[idx_of_idx], + std::mem::take(&mut res.data[idx_of_idx]), + ))) + .await + .expect("send to data merge tasks failed"); + } } - .into()); } + }); + } - data_map.insert(idx, resp.data[0].clone()); - } - GetOrDelDataArgType::PartialMany { idxs } => { - for idx in idxs { - let resp = self - .rpc_call_get_data - .call( - p2p, - meta.get_data_node(idx), - proto::GetOneDataRequest { - unique_id: unique_id.to_vec(), - idxs: vec![idx as u32], - delete: false, - return_data: true, - }, - Some(Duration::from_secs(60)), - ) - .await?; - - if !resp.success { - return Err(WsDataError::GetDataFailed { - unique_id: unique_id.to_vec(), - msg: resp.message, + // 2. data merge tasks + let mut merge_task_group_tasks = vec![]; + for idx in WantIdxIter::new(&ty, meta.data_item_cnt() as DataItemIdx) { + let (_, rx, splits) = each_item_idx_receive_worker_tx_rx_splits + .remove(&idx) + .unwrap(); + let unique_id = unique_id.clone(); + let cache_mode = meta.cache_mode_visitor(idx); + let task = tokio::spawn(async move { + WriteSplitDataTaskGroup::new(unique_id.clone(), splits, rx, cache_mode) + }); + merge_task_group_tasks.push((idx, task)); + } + + // 3. wait for results + let mut idx_2_data_item = HashMap::new(); + for (idx, task) in merge_task_group_tasks { + let merge_group = task.await; + match merge_group { + Err(e) => { + return Err(WsRuntimeErr::TokioJoin { + err: e, + context: format!("get data split failed, idx:{}", idx), } .into()); } - - data_map.insert(idx, resp.data[0].clone()); - } + Ok(merge_group) => match merge_group.await { + Err(e) => { + return Err(e); + } + Ok(res) => { + let res = res.join().await; + match res { + Err(e) => { + return Err(e); + } + Ok(res) => { + let _ = idx_2_data_item.insert(idx, res); + } + } + } + }, } } - Ok((meta, data_map)) + // // TODO: 将这里获取到的数据写入到缓存中 + // for (idx, data_item) in idx_2_data_item.iter() { + // // 只缓存需要缓存的数据,前面拿到过 + // if !cache[*idx as usize] { + // continue; + // } + // let cache_mode = meta.cache_mode_visitor(*idx); + // let cache_key = (unique_id.clone(), *idx); + // let cache_value = (meta.version, data_item.clone()); + // if cache_mode.is_time_forever() { + // self.forever_cache.insert(cache_key, cache_value); + // } else if cache_mode.is_time_auto() { + // self.auto_cache.insert(cache_key, cache_value); + // } + // } + + Ok((meta, idx_2_data_item)) } + // pub async fn get_data( + // &self, + // unique_id: impl Into>, + // ) -> WSResult<(DataSetMetaV2, HashMap<(NodeID, usize), proto::DataItem>)> { + // let unique_id: Vec = unique_id.into(); + // tracing::debug!("get_or_del_datameta_from_master start"); + // // Step1: get meta + // let meta: DataSetMetaV2 = self + // .get_or_del_datameta_from_master(&unique_id, false) + // .await + // .map_err(|err| { + // if let WSError::WsDataError(WsDataError::DataSetNotFound { uniqueid }) = err { + // tracing::debug!("data not found, uniqueid:{:?}", uniqueid); + // return WSError::WsDataError(WsDataError::DataSetNotFound { uniqueid }); + // } + // tracing::warn!("`get_data` failed, err:{}", err); + // err + // })?; + // tracing::debug!("get_or_del_datameta_from_master end\n get_data_by_meta start"); + // let res = self.get_data_by_meta(GetDataArg::All{ + + // }).await; + // tracing::debug!("get_data_by_meta end"); + // res + // } + + // /// return (meta, data_map) + // /// data_map: (node_id, idx) -> data_items + // pub async fn delete_data( + // &self, + // unique_id: impl Into>, + // ) -> WSResult<(DataSetMetaV2, HashMap<(NodeID, usize), proto::DataItem>)> { + // let unique_id: Vec = unique_id.into(); + + // // Step1: get meta + // let meta: DataSetMetaV2 = self + // .get_or_del_datameta_from_master(&unique_id, true) + // .await + // .map_err(|err| { + // if let WSError::WsDataError(WsDataError::DataSetNotFound { uniqueid }) = err { + // tracing::debug!("data not found, uniqueid:{:?}", uniqueid); + // return WSError::WsDataError(WsDataError::DataSetNotFound { uniqueid }); + // } + // tracing::warn!("`get_data` failed, err:{}", err); + // err + // })?; + // // .default_log_err("`delete_data`")?; + + // return self.get_data_by_meta(GetDataArg::Delete{ + // unique_id, + // }&, meta, true).await + // // + // } + + /// The user's data write entry + /// + /// - check the design here + /// + /// - check the uid from DATA_UID_PREFIX_XXX + /// + /// - https://fvd360f8oos.feishu.cn/docx/XoFudWhAgox84MxKC3ccP1TcnUh#share-Rtxod8uDqoIcRwxOM1rccuXxnQg pub async fn write_data( &self, unique_id: impl Into>, + // data_metas: Vec, datas: Vec, context_openode_opetype_operole: Option<( NodeID, @@ -403,164 +1098,244 @@ impl DataGeneral { proto::data_schedule_context::OpeRole, )>, ) -> WSResult<()> { - let unique_id = unique_id.into(); - let log_tag = format!("[write_data({})]", String::from_utf8_lossy(&unique_id)); - tracing::debug!("{} start write data", log_tag); - - // 获取数据调度计划 - let version_schedule_resp = self - .rpc_call_data_version_schedule - .call( - self.view.p2p(), - self.view.p2p().nodes_config.get_master_node(), - proto::DataVersionScheduleRequest { - unique_id: unique_id.clone(), - context: context_openode_opetype_operole.map(|(node, ope, role)| { - proto::DataScheduleContext { - each_data_sz_bytes: datas - .iter() - .map(|d| d.data_sz_bytes() as u32) - .collect(), - ope_node: node as i64, - ope_type: ope as i32, - ope_role: Some(role), - } - }), - version: 0, - }, - Some(Duration::from_secs(60)), - ) - .await?; - - // Clone the response to extend its lifetime - let version = version_schedule_resp.version; - let splits = version_schedule_resp.split.clone(); + let p2p = self.view.p2p(); + let unique_id: Vec = unique_id.into(); + tracing::debug!("write_data {:?} start", unique_id.clone()); + + let log_tag = Arc::new(format!( + "write_data,uid:{:?},operole:{:?}", + str::from_utf8(&unique_id), + context_openode_opetype_operole.as_ref().map(|v| &v.2) + )); + + // Step 1: need the master to do the decision + // - require for the latest version for write permission + // - require for the distribution and cache mode + let version_schedule_req = DataVersionScheduleRequest { + unique_id: unique_id.clone(), + version: 0, + context: context_openode_opetype_operole.map(|(ope_node, ope_type, ope_role)| { + proto::DataScheduleContext { + ope_node: ope_node as i64, + ope_type: ope_type as i32, + each_data_sz_bytes: datas + .iter() + .map(|data_item| data_item.data_sz_bytes() as u32) + .collect::>(), + ope_role: Some(ope_role), + } + }), + }; + tracing::debug!( + "{} data version schedule requesting {:?}", + log_tag, + version_schedule_req + ); + let version_schedule_resp = { + let resp = self + .rpc_call_data_version_schedule + .call( + self.view.p2p(), + p2p.nodes_config.get_master_node(), + version_schedule_req, + Some(Duration::from_secs(60)), + ) + .await; + + let resp = match resp { + Err(inner_e) => { + let e = WsDataError::WriteDataRequireVersionErr { + unique_id, + err: Box::new(inner_e), + }; + tracing::warn!("{:?}", e); + return Err(e.into()); + + // tracing::warn!("write_data require version error: {:?}", e); + // return e; + } + Ok(ok) => ok, + }; + resp + }; + tracing::debug!( + "{} data version scheduled, resp: {:?}", + log_tag, + version_schedule_resp + ); - // 处理每个数据项 - for (data_item_idx, (data_item, split)) in datas - .iter() - .zip(splits.iter()) - .enumerate() + // Step2: dispatch the data source and caches { - let mut tasks = Vec::new(); - tracing::debug!( - "{} processing data item {}/{}", - log_tag, - data_item_idx + 1, - datas.len() - ); - - // 1. 并行写入所有主数据分片 - for (split_idx, split_info) in split.splits.iter().enumerate() { - tracing::debug!( - "{} creating split write task {}/{} for node {}, offset={}, size={}", - log_tag, - split_idx + 1, - split.splits.len(), - split_info.node_id, - split_info.data_offset, - split_info.data_size - ); - - // 克隆必要的数据 - let split_info = split_info.clone(); // 必须克隆,来自临时变量 - let unique_id = unique_id.clone(); // 必须克隆,多个任务需要 - let data_item = data_item.clone_split_range( // 克隆必要的数据范围 - split_info.data_offset as usize - ..(split_info.data_offset + split_info.data_size) as usize, - ); - let view = self.view.clone(); // 克隆 view,包含所有模块引用 - let version = version; // 复制值类型 - - let task = tokio::spawn(async move { - let resp = view.data_general() - .rpc_call_write_once_data - .call( - view.p2p(), - split_info.node_id, - proto::WriteOneDataRequest { - unique_id, - version, - data: vec![proto::DataItemWithIdx { - idx: data_item_idx as u32, - data: Some(data_item), - }], - }, - Some(Duration::from_secs(60)), - ) - .await?; - Ok::(resp) - }); - tasks.push(task); + // resp.split is decision for each data, so the length should be verified + if version_schedule_resp.split.len() != datas.len() { + let e = WsDataError::WriteDataSplitLenNotMatch { + unique_id, + expect: datas.len(), + actual: version_schedule_resp.split.len(), + }; + tracing::warn!("{:?}", e); + return Err(e.into()); } - // 2. 并行写入缓存数据(完整数据) - let visitor = CacheModeVisitor(version_schedule_resp.cache_mode[data_item_idx] as u16); - let need_cache = visitor.is_map_common_kv() || visitor.is_map_file(); - - let cache_nodes: Vec = if need_cache { - split.splits.iter().map(|s| s.node_id).collect() - } else { - vec![] - }; - - if !cache_nodes.is_empty() { - tracing::debug!( - "{} found {} cache nodes: {:?}", - log_tag, - cache_nodes.len(), - cache_nodes - ); - - // 使用信号量限制并发的批量传输数量 - const MAX_CONCURRENT_TRANSFERS: usize = 3; - let semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT_TRANSFERS)); - - for (cache_idx, &node_id) in cache_nodes.iter().enumerate() { - let permit = semaphore.clone().acquire_owned().await.unwrap(); - tracing::debug!( - "{} creating cache write task {}/{} for node {}", - log_tag, - cache_idx + 1, - cache_nodes.len(), - node_id - ); - - // 创建批量传输任务 - let unique_id = unique_id.clone(); - let data_item = data_item.clone(); - let view = self.view.clone(); - - let task = tokio::spawn(async move { - let _permit = permit; + let mut write_source_data_tasks = vec![]; + + // write the data split to kv + for (dataitem_idx, (one_data_splits, one_data_item)) in version_schedule_resp + .split + .into_iter() + .zip(datas) + .enumerate() + { + // let mut last_node_begin: Option<(NodeID, usize)> = None; + fn flush_the_data( + log_tag: &str, + unique_id: &[u8], + version: u64, + split_size: usize, + view: &DataGeneralView, + one_data_item: &proto::DataItem, + nodeid: NodeID, + offset: usize, + dataitem_idx: usize, + write_source_data_tasks: &mut Vec>>, + ) { + let log_tag = log_tag.to_owned(); + let unique_id = unique_id.to_owned(); + let view = view.clone(); + // let version = version_schedule_resp.version; + // let split_size = one_data_splits.split_size as usize; + let one_data_item_split = + one_data_item.clone_split_range(offset..offset + split_size); + let t = tokio::spawn(async move { + let req = WriteOneDataRequest { + unique_id, + version, + data: vec![proto::DataItemWithIdx { + idx: dataitem_idx as u32, + data: Some(one_data_item_split), + }], + }; + tracing::debug!( + "[{}] write_data flushing, target node: {}, `WriteOneDataRequest` msg_id: {}", + log_tag, + nodeid, + req.msg_id() + ); view.data_general() - .write_data_batch( - &unique_id, - version, - data_item.clone(), - data_item_idx, - node_id, - 1024 * 1024, // 1MB batch size - ) - .await?; - Ok::(proto::WriteOneDataResponse { - remote_version: version, - success: true, - message: String::new(), - }) + .rpc_call_write_once_data + .call(view.p2p(), nodeid, req, Some(Duration::from_secs(60))) + .await }); - tasks.push(task); + write_source_data_tasks.push(t); + } + for split in one_data_splits.splits.iter() { + flush_the_data( + &log_tag, + &unique_id, + version_schedule_resp.version, + split.data_size as usize, + &self.view, + &one_data_item, + split.node_id, + split.data_offset as usize, + dataitem_idx, + &mut write_source_data_tasks, + ); } } - // 等待所有写入任务完成 - for task in tasks { - task.await??; + // count and hanlde failed + let mut failed = false; + for t in write_source_data_tasks { + let res = t.await; + match res { + Ok(res) => match res { + Ok(_) => {} + Err(e) => { + failed = true; + tracing::warn!("write source data failed: {}", e); + } + }, + + Err(e) => { + failed = true; + tracing::warn!("write_source_data_tasks failed: {}", e); + } + } + } + if failed { + tracing::warn!("TODO: need to rollback"); } + // let res = join_all(write_source_data_tasks).await; + // // check if there's error + // if let Some(err)=res.iter().filter(|res|{res.is_err()}).next(){ + // tracing::warn!("failed to write data {}") + // panic!("failed to write data"); + // } } Ok(()) + // if DataModeDistribute::BroadcastRough as i32 == data_metas[0].distribute { + // self.write_data_broadcast_rough(unique_id, data_metas, datas) + // .await; + // } } + + // async fn write_data_broadcast_rough( + // &self, + // unique_id: String, + // data_metas: Vec, + // datas: Vec, + // ) { + // let p2p = self.view.p2p(); + + // tracing::debug!("start broadcast data with version"); + // let version = resp.version; + // // use the got version to send to global paralell + // let mut tasks = vec![]; + + // for (_idx, node) in p2p.nodes_config.all_nodes_iter().enumerate() { + // let n = *node.0; + // let view = self.view.clone(); + // let datas = datas.clone(); + // let unique_id = unique_id.clone(); + // // let datas = unsafe { util::SendNonNull(util::non_null(&datas)) }; + + // let t = tokio::spawn(async move { + // view.data_general() + // .rpc_call_write_once_data + // .call( + // view.p2p(), + // n, + // WriteOneDataRequest { + // unique_id, + // version, + // data: datas, + // }, + // Some(Duration::from_secs(60)), + // ) + // .await + // }); + + // tasks.push(t); + // } + // for t in tasks { + // let res = t.await.unwrap(); + // match res { + // Err(e) => { + // tracing::warn!("write_data_broadcast_rough broadcast error: {:?}", e); + // } + // Ok(ok) => { + // if !ok.success { + // tracing::warn!( + // "write_data_broadcast_rough broadcast error: {:?}", + // ok.message + // ); + // } + // } + // } + // } + // } } #[derive(Serialize, Deserialize)] @@ -617,15 +1392,9 @@ impl DataSetMetaV2 { pub fn cache_mode_visitor(&self, idx: DataItemIdx) -> CacheModeVisitor { CacheModeVisitor(self.cache_mode[idx as usize]) } - pub fn data_item_cnt(&self) -> usize { self.datas_splits.len() } - - pub fn get_data_node(&self, idx: DataItemIdx) -> NodeID { - // 获取指定数据项的主节点 - self.datas_splits[idx as usize].splits[0].node_id - } } pub type DataSetMeta = DataSetMetaV2; @@ -641,18 +1410,11 @@ pub struct EachNodeSplit { pub node_id: NodeID, pub data_offset: u32, pub data_size: u32, - pub cache_mode: u32, // 添加 cache_mode 字段 -} - -impl EachNodeSplit { - pub fn cache_mode_visitor(&self) -> CacheModeVisitor { - CacheModeVisitor(self.cache_mode as u16) - } } /// the split of one dataitem /// we need to know the split size for one data -#[derive(Serialize, Deserialize, Debug, Clone)] +#[derive(Serialize, Deserialize, Debug)] pub struct DataSplit { pub splits: Vec, } @@ -954,107 +1716,3 @@ mod test { ); } } - -pub struct GetOrDelDataArg { - pub meta: Option, - pub unique_id: Vec, - pub ty: GetOrDelDataArgType, -} - -#[derive(Clone)] -pub enum GetOrDelDataArgType { - All, - Delete, - PartialOne { idx: DataItemIdx }, - PartialMany { idxs: BTreeSet }, -} - -impl DataGeneralView { - fn inner_new(args: LogicalModuleNewArgs) -> Self { - Self { - inner: args.logical_modules_ref, - } - } -} - -// 为 proto::EachNodeSplit 实现 cache_mode_visitor -// impl proto::EachNodeSplit { -// pub fn cache_mode_visitor(&self) -> CacheModeVisitor { -// CacheModeVisitor(self.cache_mode as u16) -// } -// } - -// 实现 From trait 处理错误转换 -impl From for WSError { - fn from(err: JoinError) -> Self { - WsNetworkLogicErr::TaskJoinError { err }.into() - } -} - -#[async_trait] -impl LogicalModule for DataGeneral { - fn inner_new(args: LogicalModuleNewArgs) -> Self - where - Self: Sized, - { - Self { - view: DataGeneralView::new(args.logical_modules_ref.clone()), - rpc_call_data_version_schedule: RPCCaller::new(), - rpc_call_write_once_data: RPCCaller::new(), - rpc_call_batch_data: RPCCaller::new(), - rpc_call_get_data_meta: RPCCaller::new(), - rpc_call_get_data: RPCCaller::new(), - - rpc_handler_write_once_data: RPCHandler::new(), - rpc_handler_batch_data: RPCHandler::new(), - rpc_handler_data_meta_update: RPCHandler::new(), - rpc_handler_get_data_meta: RPCHandler::new(), - rpc_handler_get_data: RPCHandler::new(), - - batch_transfers: DashMap::new(), - } - } - - async fn start(&self) -> WSResult> { - Ok(vec![]) - } -} - -fn flush_the_data( - log_tag: &str, - unique_id: &[u8], - version: u64, - split_size: usize, - view: &DataGeneralView, - one_data_item: &proto::DataItem, - nodeid: NodeID, - offset: usize, - dataitem_idx: usize, - write_source_data_tasks: &mut Vec>>, -) { - let log_tag = log_tag.to_owned(); - let unique_id = unique_id.to_owned(); - let view = view.clone(); - let one_data_item_split = one_data_item.clone_split_range(offset..offset + split_size); - let t = tokio::spawn(async move { - let req = WriteOneDataRequest { - unique_id, - version, - data: vec![proto::DataItemWithIdx { - idx: dataitem_idx as u32, - data: Some(one_data_item_split), - }], - }; - tracing::debug!( - "[{}] write_data flushing, target node: {}, `WriteOneDataRequest` msg_id: {}", - log_tag, - nodeid, - req.msg_id() - ); - view.data_general() - .rpc_call_write_once_data - .call(view.p2p(), nodeid, req, Some(Duration::from_secs(60))) - .await - }); - write_source_data_tasks.push(t); -} diff --git a/src/main/src/general/network/msg_pack.rs b/src/main/src/general/network/msg_pack.rs index 30bf6d7..6361b60 100644 --- a/src/main/src/general/network/msg_pack.rs +++ b/src/main/src/general/network/msg_pack.rs @@ -132,9 +132,10 @@ define_msg_ids!( _ => false, } }), - (proto::kv::KvLockResponse, _pack, { true }), - (proto::sche::BatchDataRequest, _pack, { true }), - (proto::sche::BatchDataResponse, _pack, { true }) + (proto::kv::KvLockResponse, _pack, { true }) // (proto::kv::KvLockWaitAcquireNotifyRequest, _pack, { true }), + // (proto::kv::KvLockWaitAcquireNotifyResponse, _pack, { true }) + // (proto::DataDeleteRequest, _pack, { true }), + // (proto::DataDeleteResponse, _pack, { true }) ); pub trait RPCReq: MsgPack + Default { @@ -189,10 +190,6 @@ impl RPCReq for proto::kv::KvLockRequest { type Resp = proto::kv::KvLockResponse; } -impl RPCReq for proto::sche::BatchDataRequest { - type Resp = proto::sche::BatchDataResponse; -} - // impl RPCReq for proto::kv::KvLockWaitAcquireNotifyRequest { // type Resp = proto::kv::KvLockWaitAcquireNotifyResponse; // } diff --git a/src/main/src/general/network/proto_ext.rs b/src/main/src/general/network/proto_ext.rs index 60f64fd..7ddfe70 100644 --- a/src/main/src/general/network/proto_ext.rs +++ b/src/main/src/general/network/proto_ext.rs @@ -1,8 +1,4 @@ -use crate::general::app::DataEventTrigger; use crate::general::data::m_dist_lock::DistLockOpe; -use crate::general::network::proto::sche::distribute_task_req::{ - DataEventTriggerNew, DataEventTriggerWrite, Trigger, -}; use super::proto::{self, kv::KvResponse, FileData}; @@ -238,74 +234,3 @@ impl DataItemExt for proto::DataItem { } } } - -pub trait ProtoExtDataEventTrigger { - fn into_proto_trigger(self, key: Vec, opeid: u32) -> Trigger; -} - -impl ProtoExtDataEventTrigger for DataEventTrigger { - fn into_proto_trigger(self, key: Vec, opeid: u32) -> Trigger { - match self { - DataEventTrigger::Write | DataEventTrigger::WriteWithCondition { .. } => { - Trigger::EventWrite(DataEventTriggerWrite { key, opeid }) - } - DataEventTrigger::New | DataEventTrigger::NewWithCondition { .. } => { - Trigger::EventNew(DataEventTriggerNew { key, opeid }) - } - } - } -} - -// Example usage in tests -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_data_event_trigger_conversion() { - let key = b"test_key".to_vec(); - let opeid = 1; - - // Test Write - let write_trigger = DataEventTrigger::Write.into_proto_trigger(key.clone(), opeid); - if let Trigger::EventWrite(trigger) = write_trigger { - assert_eq!(trigger.key, key); - assert_eq!(trigger.opeid, opeid); - } else { - panic!("Expected EventWrite trigger"); - } - - // Test WriteWithCondition (should produce same proto as Write) - let write_cond_trigger = DataEventTrigger::WriteWithCondition { - condition: "test_condition".to_string(), - } - .into_proto_trigger(key.clone(), opeid); - if let Trigger::EventWrite(trigger) = write_cond_trigger { - assert_eq!(trigger.key, key); - assert_eq!(trigger.opeid, opeid); - } else { - panic!("Expected EventWrite trigger"); - } - - // Test New - let new_trigger = DataEventTrigger::New.into_proto_trigger(key.clone(), opeid); - if let Trigger::EventNew(trigger) = new_trigger { - assert_eq!(trigger.key, key); - assert_eq!(trigger.opeid, opeid); - } else { - panic!("Expected EventNew trigger"); - } - - // Test NewWithCondition (should produce same proto as New) - let new_cond_trigger = DataEventTrigger::NewWithCondition { - condition: "test_condition".to_string(), - } - .into_proto_trigger(key.clone(), opeid); - if let Trigger::EventNew(trigger) = new_cond_trigger { - assert_eq!(trigger.key, key); - assert_eq!(trigger.opeid, opeid); - } else { - panic!("Expected EventNew trigger"); - } - } -} diff --git a/src/main/src/general/network/proto_src/sche.proto b/src/main/src/general/network/proto_src/sche.proto index a3cba7d..402d2fb 100644 --- a/src/main/src/general/network/proto_src/sche.proto +++ b/src/main/src/general/network/proto_src/sche.proto @@ -22,44 +22,21 @@ package sche; // uint32 data_target_node=2; // } -message DistributeTaskReq { - message DataEventTriggerWrite { - bytes key = 1; - uint32 opeid = 2; +message DistributeTaskReq{ + message TriggerKvSet{ + bytes key=1; + uint32 opeid=2; } - - message DataEventTriggerNew { - bytes key = 1; - uint32 opeid = 2; - } - - string app = 1; - string func = 2; - uint32 task_id = 3; - oneof trigger { - DataEventTriggerWrite event_write = 4; // For Write/WriteWithCondition - DataEventTriggerNew event_new = 5; // For New/NewWithCondition + string app=1; + string func=2; + uint32 task_id=3; + oneof trigger{ + TriggerKvSet kv_set=4; } } -message DistributeTaskResp { - bool success = 1; - string err_msg = 2; -} - -message BatchDataRequest { - bytes unique_id = 1; - uint64 version = 2; - uint32 batch_id = 3; // 当前批次ID - uint32 total_batches = 4; // 总批次数 - bytes data = 5; // 当前批次的数据 - uint32 data_item_idx = 6; // 数据项索引 - bool is_complete = 7; // 是否是最后一个批次 -} - -message BatchDataResponse { - bool success = 1; - string error = 2; - uint32 batch_id = 3; +message DistributeTaskResp{ + bool success=1; + string err_msg=2; } diff --git a/src/main/src/master/app/m_app_master.rs b/src/main/src/master/app/m_app_master.rs index 52f2d86..6c6399b 100644 --- a/src/main/src/master/app/m_app_master.rs +++ b/src/main/src/master/app/m_app_master.rs @@ -1,28 +1,15 @@ -use crate::general::app::m_executor::Executor; use crate::general::app::AppMetaManager; -use crate::general::app::{AffinityPattern, AffinityRule, AppType, FnMeta, NodeTag}; -use crate::general::network::m_p2p::P2PModule; -use crate::general::network::m_p2p::RPCCaller; -use crate::general::network::proto::sche::{self, distribute_task_req::Trigger}; use crate::logical_module_view_impl; use crate::master::app::fddg::FDDGMgmt; -use crate::master::m_master::{FunctionTriggerContext, Master}; -use crate::result::{WSResult, WsFuncError}; -use crate::sys::NodeID; +use crate::result::WSResult; use crate::sys::{LogicalModule, LogicalModuleNewArgs, LogicalModulesRef}; use crate::util::JoinHandleWrapper; use async_trait::async_trait; -use std::collections::{HashMap, HashSet}; -use std::sync::atomic::{AtomicU32, Ordering}; -use std::time::Duration; use ws_derive::LogicalModule; logical_module_view_impl!(MasterAppMgmtView); // access general app logical_module_view_impl!(MasterAppMgmtView, appmeta_manager, AppMetaManager); -logical_module_view_impl!(MasterAppMgmtView, p2p, P2PModule); -logical_module_view_impl!(MasterAppMgmtView, executor, Executor); -logical_module_view_impl!(MasterAppMgmtView, master, Option); #[derive(LogicalModule)] pub struct MasterAppMgmt { diff --git a/src/main/src/master/data/m_data_master.rs b/src/main/src/master/data/m_data_master.rs index bd6605c..d6ec3ba 100644 --- a/src/main/src/master/data/m_data_master.rs +++ b/src/main/src/master/data/m_data_master.rs @@ -1,15 +1,9 @@ -use crate::general::app::m_executor::EventCtx; use crate::general::app::m_executor::Executor; -use crate::general::app::m_executor::FnExeCtxAsync; -use crate::general::app::m_executor::FnExeCtxAsyncAllowedType; -use crate::general::app::AppMetaManager; use crate::general::app::DataEventTrigger; -use crate::general::app::{AffinityPattern, AffinityRule, NodeTag}; use crate::general::network::m_p2p::{P2PModule, RPCCaller, RPCHandler, RPCResponsor}; use crate::general::network::proto::{ self, DataVersionScheduleRequest, DataVersionScheduleResponse, }; -use crate::master::m_master::{FunctionTriggerContext, Master}; use crate::result::{WSResult, WSResultExt}; use crate::sys::{LogicalModulesRef, NodeID}; use crate::util::JoinHandleWrapper; @@ -17,7 +11,7 @@ use crate::{ general::data::{ m_data_general::{ CacheMode, DataGeneral, DataItemIdx, DataSetMeta, DataSetMetaBuilder, DataSplit, - EachNodeSplit, CACHE_MODE_MAP_COMMON_KV_MASK, CACHE_MODE_TIME_FOREVER_MASK, + EachNodeSplit, }, m_kv_store_engine::{KeyType, KeyTypeDataSetMeta, KvAdditionalConf, KvStoreEngine}, }, @@ -37,13 +31,11 @@ use ws_derive::LogicalModule; logical_module_view_impl!(DataMasterView); logical_module_view_impl!(DataMasterView, data_master, Option); logical_module_view_impl!(DataMasterView, data_general, DataGeneral); -logical_module_view_impl!(DataMasterView, appmeta_manager, AppMetaManager); logical_module_view_impl!(DataMasterView, app_master, Option); logical_module_view_impl!(DataMasterView, p2p, P2PModule); logical_module_view_impl!(DataMasterView, http_handler, Box); logical_module_view_impl!(DataMasterView, kv_store_engine, KvStoreEngine); logical_module_view_impl!(DataMasterView, executor, Executor); -logical_module_view_impl!(DataMasterView, master, Option); #[derive(LogicalModule)] pub struct DataMaster { @@ -72,9 +64,8 @@ impl LogicalModule for DataMaster { async fn start(&self) -> WSResult> { tracing::info!("start as master"); let view = self.view.clone(); - let _ = self.rpc_caller_data_meta_update.regist(view.p2p()); - let _ = self - .rpc_handler + self.rpc_caller_data_meta_update.regist(view.p2p()); + self.rpc_handler .regist(self.view.p2p(), move |responsor, req| { let view = view.clone(); let _ = tokio::spawn(async move { @@ -90,69 +81,64 @@ impl LogicalModule for DataMaster { } impl DataMaster { - async fn plan_for_write_data( + // return cache mode, splits, cache nodes + fn plan_for_write_data( &self, data_unique_id: &[u8], - context: &proto::DataScheduleContext, + _context: &proto::DataScheduleContext, func_trigger_type: FuncTriggerType, ) -> WSResult<(Vec, Vec, Vec)> { - // 如果不是有效的 UTF-8 字符串,直接返回空结果 - let data_unique_id_str = match std::str::from_utf8(data_unique_id) { - Ok(s) => s, - Err(_) => return Ok((DataSetMetaBuilder::new().build().cache_mode, vec![], vec![])), - }; - - // 获取绑定的函数 - let binded_funcs = self - .view - .app_master() - .fddg - .get_binded_funcs(data_unique_id_str, func_trigger_type); - - // 收集所有调度节点作为缓存节点 - let mut cache_nodes = HashSet::new(); - - // 对每个绑定的函数进行调度 - for (app_name, (_, fn_names)) in &binded_funcs { - for (fn_name, _unused) in fn_names { - // 选择调度节点 (暂时不考虑亲和性规则) - let target_node = self.view.master().select_node(); - - // 将调度节点加入缓存节点集合 - let _ = cache_nodes.insert(target_node); - - // 创建函数触发上下文 - let ctx = FunctionTriggerContext { - app_name: app_name.clone(), - fn_name: fn_name.clone(), - data_unique_id: data_unique_id.to_vec(), - target_nodes: vec![target_node], // 只在选中的节点上触发 - timeout: Duration::from_secs(60), - event_type: DataEventTrigger::Write, // 使用Write事件类型 - }; - - // 发送触发请求并处理可能的错误 - if let Err(e) = self.view.master().trigger_func_call(ctx).await { - tracing::error!( - "Failed to trigger function {}/{} on node {}: {:?}", - app_name, - fn_name, - target_node, - e - ); + fn set_data_cache_mode_for_meta( + req: &DataVersionScheduleRequest, + builder: &mut DataSetMetaBuilder, + ) { + fn default_set_data_cache_mode_for_meta( + req: &DataVersionScheduleRequest, + builder: &mut DataSetMetaBuilder, + ) { + // for each item(by split length), set cache mode + for idx in 0..req.context.as_ref().unwrap().each_data_sz_bytes.len() { + let _ = builder + .cache_mode_time_forever(idx as DataItemIdx) + .cache_mode_pos_allnode(idx as DataItemIdx) + .cache_mode_map_common_kv(idx as DataItemIdx); } } + if let Some(context) = req.context.as_ref() { + match context.ope_role.as_ref().unwrap() { + proto::data_schedule_context::OpeRole::UploadApp(_data_ope_role_upload_app) => { + let _ = builder + // 0 is app meta data, map to common kv + .cache_mode_time_forever(0) + .cache_mode_pos_allnode(0) + .cache_mode_map_common_kv(0) + // 1 is app package data, map to file + .cache_mode_time_forever(1) + .cache_mode_pos_allnode(1) + .cache_mode_map_file(1); + } + proto::data_schedule_context::OpeRole::FuncCall(_data_ope_role_func_call) => { + default_set_data_cache_mode_for_meta(req, builder); + } + } + } else { + tracing::warn!( + "context is None, use default cache mode, maybe we need to suitable for this case" + ); + default_set_data_cache_mode_for_meta(req, builder); + } } - // 将缓存节点集合转换为向量 - let cache_nodes: Vec = cache_nodes.into_iter().collect(); + fn decide_each_data_split( + datamaster: &DataMaster, + ctx: &proto::DataScheduleContext, + ) -> Vec { + // let DEFAULT_SPLIT_SIZE = 4 * 1024 * 1024; + let mut datasplits = vec![]; + let p2p = datamaster.view.p2p(); - // 根据缓存节点生成数据分片 - let mut splits = Vec::new(); - for sz in context.each_data_sz_bytes.iter() { - // 选择主分片节点 (使用第一个缓存节点或随机选择) - let primary_node = cache_nodes.first().copied().unwrap_or_else(|| { - let p2p = self.view.p2p(); + // simply select a node + let node_id = { let rand_node_idx = thread_rng().gen_range(0..p2p.nodes_config.node_cnt()); let mut iter = p2p.nodes_config.all_nodes_iter(); for _ in 0..rand_node_idx { @@ -162,47 +148,80 @@ impl DataMaster { .next() .expect("node count doesn't match all_nodes_iter") .0 - }); - - // 创建主分片 - let mut split = DataSplit { - splits: vec![EachNodeSplit { - node_id: primary_node, - data_offset: 0, - data_size: *sz, - cache_mode: 0, - }], }; - // 为每个缓存节点添加完整数据副本 (除了主分片节点) - for &cache_node in cache_nodes.iter() { - if cache_node != primary_node { - split.splits.push(EachNodeSplit { - node_id: cache_node, + for sz in ctx.each_data_sz_bytes.iter() { + datasplits.push(DataSplit { + splits: vec![EachNodeSplit { + node_id, data_offset: 0, data_size: *sz, - cache_mode: 0, - }); - } + }], + }); } - - splits.push(split); + tracing::debug!("decide_each_data_split res: {:?}", datasplits); + datasplits } - // 设置缓存模式 - let mut builder = DataSetMetaBuilder::new(); + if let Ok(data_unique_id_str) = std::str::from_utf8(data_unique_id) { + // get data binded functions + // https://fvd360f8oos.feishu.cn/wiki/Zp19wf9sdilwVKk5PlKcGy0CnBd + // app_name -> (apptype, fn_name -> fn_meta) + let binded_funcs: std::collections::HashMap< + String, + ( + crate::general::app::AppType, + std::collections::HashMap, + ), + > = self + .view + .app_master() + .fddg + .get_binded_funcs(data_unique_id_str, func_trigger_type); - // 设置数据分片 - let _ = builder.set_data_splits(splits.clone()); + // filter functions by condition function + for (appname, (app_type, fn_names)) in binded_funcs.iter_mut() { + fn_names.retain(|fn_name, fn_meta| { + if let Some(data_accesses) = fn_meta.data_accesses.as_ref() { + // find data access discription + if let Some((key_pattern, data_access)) = + data_accesses.iter().find(|(key_pattern, data_access)| { + if let Some(event) = data_access.event.as_ref() { + match event { + DataEventTrigger::WriteWithCondition { condition } + | DataEventTrigger::NewWithCondition { condition } => false, + DataEventTrigger::Write | DataEventTrigger::New => false, + } + } else { + false + } + }) + { + // with condition, call the condition function + let condition_func = match data_access.event.as_ref().unwrap() { + DataEventTrigger::WriteWithCondition { condition } + | DataEventTrigger::NewWithCondition { condition } => condition, + _ => panic!("logical error, find condition must be with condition, so current code is wrong"), + }; - // 设置缓存模式 - 对所有缓存节点启用永久缓存 - let cache_modes = vec![ - CACHE_MODE_TIME_FOREVER_MASK | CACHE_MODE_MAP_COMMON_KV_MASK; - context.each_data_sz_bytes.len() - ]; - let _ = builder.set_cache_mode_for_all(cache_modes.clone()); + // call the condition function + self.view.executor().handle_local_call(resp, req) + + } else { + // without condition + true + } + } else { + true + } + }); + } - Ok((cache_modes, splits, cache_nodes)) + // we need master to schedule functions to get func target nodes + // then we make the splits to prefer the target nodes and write cache when wrting splits + } + + Ok((DataSetMetaBuilder::new().build().cache_mode, vec![], vec![])) } /// Check the dataset sync flow here: @@ -224,17 +243,17 @@ impl DataMaster { // now we expand the meta let (new_meta, cache_nodes) = { - // the we will make the split plan and cache plan - // then expand the meta - // this process will fail if other write updated the unique id - let (item_cache_modes, new_splits, cache_nodes) = self - .plan_for_write_data(&req.unique_id, ctx, FuncTriggerType::DataWrite) - .await?; - let update_version_lock = kv_store_engine.with_rwlock(&metakey_bytes); let _guard = update_version_lock.write(); + let dataset_meta = kv_store_engine.get(&metakey, true, KvAdditionalConf::default()); + // the we will make the split plan and cache plan + // then expand the meta + // this process will fail if other write updated the unique id + let (item_cache_modes, new_splits, cache_nodes) = + self.plan_for_write_data(&req.unique_id, ctx, FuncTriggerType::DataWrite)?; + // let takeonce=Some((new_meta,new_)) let set_meta = if let Some((_kv_version, set_meta)) = dataset_meta { tracing::debug!("update dataset meta for data({:?})", req.unique_id); @@ -265,12 +284,15 @@ impl DataMaster { req.unique_id, set_meta ); - let _ = kv_store_engine.set(KeyTypeDataSetMeta(&req.unique_id), &set_meta, true)?; + let _ = kv_store_engine + .set(KeyTypeDataSetMeta(&req.unique_id), &set_meta, true) + .unwrap(); kv_store_engine.flush(); (set_meta, cache_nodes) }; // update version peers + // let mut call_tasks = vec![]; let need_notify_nodes = { let mut need_notify_nodes = HashSet::new(); for one_data_splits in &new_meta.datas_splits { @@ -284,16 +306,21 @@ impl DataMaster { for need_notify_node in need_notify_nodes { let view = self.view.clone(); + // let mut req = req.clone(); + // req.version = new_meta.version; + + // don't need to retry or wait let serialized_meta = bincode::serialize(&new_meta).unwrap(); let unique_id = req.unique_id.clone(); let version = new_meta.version; - let _ = tokio::spawn(async move { + let _call_task = tokio::spawn(async move { let p2p = view.p2p(); - let display_id = std::str::from_utf8(&unique_id) - .map_or_else(|_err| format!("{:?}", unique_id), |ok| ok.to_owned()); tracing::debug!( "updating version for data({:?}) to node: {}, this_node: {}", - display_id, + std::str::from_utf8(&unique_id).map_or_else( + |_err| { format!("{:?}", unique_id) }, + |ok| { ok.to_owned() } + ), need_notify_node, p2p.nodes_config.this_node() ); @@ -330,6 +357,11 @@ impl DataMaster { }); } + // call_tasks.push(call_task); + + // let cache_nodes = + // Self::decide_cache_nodes(req.context.as_ref().unwrap(), new_meta.cache_mode); + tracing::debug!( "data:{:?} version required({}) and schedule done, caller will do following thing after receive `DataVersionScheduleResponse`", req.unique_id, diff --git a/src/main/src/master/m_master.rs b/src/main/src/master/m_master.rs index 92e53f0..2f6f00a 100644 --- a/src/main/src/master/m_master.rs +++ b/src/main/src/master/m_master.rs @@ -1,9 +1,4 @@ -use std::{ - collections::hash_map::DefaultHasher, - hash::Hasher, - sync::atomic::{AtomicU32, Ordering}, - time::Duration, -}; +use std::{collections::hash_map::DefaultHasher, hash::Hasher, time::Duration}; use async_trait::async_trait; use axum::response::Redirect; @@ -12,19 +7,15 @@ use ws_derive::LogicalModule; use crate::{ config::NodesConfig, - general::{ - app::{AffinityPattern, AffinityRule, AppMetaManager, AppType, DataEventTrigger, FnMeta}, - network::{ - m_p2p::{P2PModule, RPCCaller}, - proto::{ - self, - sche::{self, distribute_task_req::Trigger, DistributeTaskReq}, - }, - proto_ext::ProtoExtDataEventTrigger, + general::network::{ + m_p2p::{P2PModule, RPCCaller}, + proto::{ + self, + sche::{distribute_task_req::Trigger, DistributeTaskReq}, }, }, logical_module_view_impl, - result::{WSResult, WsFuncError}, + result::WSResult, sys::{LogicalModule, LogicalModuleNewArgs, LogicalModulesRef, NodeID}, util::JoinHandleWrapper, }; @@ -85,24 +76,11 @@ impl NodeSelector for HashNodeSelector { logical_module_view_impl!(MasterView); logical_module_view_impl!(MasterView, p2p, P2PModule); logical_module_view_impl!(MasterView, master, Option); -logical_module_view_impl!(MasterView, appmeta_manager, AppMetaManager); - -#[derive(Clone)] -pub struct FunctionTriggerContext { - pub app_name: String, - pub fn_name: String, - pub data_unique_id: Vec, - pub target_nodes: Vec, - pub timeout: Duration, - pub event_type: DataEventTrigger, -} #[derive(LogicalModule)] pub struct Master { pub rpc_caller_distribute_task: RPCCaller, view: MasterView, - task_id_allocator: AtomicU32, - ope_id_allocator: AtomicU32, } #[async_trait] @@ -114,8 +92,6 @@ impl LogicalModule for Master { Self { view: MasterView::new(args.logical_modules_ref.clone()), rpc_caller_distribute_task: RPCCaller::default(), - task_id_allocator: AtomicU32::new(0), - ope_id_allocator: AtomicU32::new(0), } } async fn start(&self) -> WSResult> { @@ -180,73 +156,10 @@ impl Master { } } } - pub fn select_node(&self) -> NodeID { + fn select_node(&self) -> NodeID { let workers = self.view.p2p().nodes_config.get_worker_nodes(); let mut rng = rand::thread_rng(); let idx = rng.gen_range(0..workers.len()); workers.iter().nth(idx).unwrap().clone() } - - /// Trigger a function execution on target nodes - /// - /// # Arguments - /// * `ctx` - The context containing function and target information - /// - /// # Returns - /// * `WSResult<()>` - Result indicating success or failure - pub async fn trigger_func_call(&self, ctx: FunctionTriggerContext) -> WSResult<()> { - // Validate function exists and is executable - let app_meta = self - .view - .appmeta_manager() - .get_app_meta(&ctx.app_name) - .await? - .ok_or_else(|| WsFuncError::AppNotFound { - app: ctx.app_name.clone(), - })?; - - let fn_meta = - app_meta - .0 - .get_fn_meta(&ctx.fn_name) - .ok_or_else(|| WsFuncError::FuncNotFound { - app: ctx.app_name.clone(), - func: ctx.fn_name.clone(), - })?; - - if !fn_meta.sync_async.asyncable() { - return Err(WsFuncError::FuncHttpNotSupported { - fname: ctx.fn_name, - fmeta: fn_meta.clone(), - } - .into()); - } - - // Generate task and operation IDs - let task_id = self.task_id_allocator.fetch_add(1, Ordering::Relaxed); - let opeid = self.ope_id_allocator.fetch_add(1, Ordering::Relaxed); - - // Create trigger using the ProtoExtDataEventTrigger trait - let trigger = DataEventTrigger::Write.into_proto_trigger(ctx.data_unique_id, opeid); - - // Create and send tasks to target nodes - for &node in &ctx.target_nodes { - let req = sche::DistributeTaskReq { - app: ctx.app_name.clone(), - func: ctx.fn_name.clone(), - task_id, - trigger: Some(trigger.clone()), - }; - - // Send request with timeout - let _ = tokio::time::timeout( - ctx.timeout, - self.rpc_caller_distribute_task - .call(self.view.p2p(), node, req, Some(ctx.timeout)), - ) - .await; - } - - Ok(()) - } } diff --git a/src/main/src/modules_global_bridge/process_func.rs b/src/main/src/modules_global_bridge/process_func.rs index 6c33fe7..dfcbdeb 100644 --- a/src/main/src/modules_global_bridge/process_func.rs +++ b/src/main/src/modules_global_bridge/process_func.rs @@ -8,7 +8,7 @@ pub trait ModulesGlobalBrigeInstanceManager: Sized + 'static { impl ModulesGlobalBrigeInstanceManager for ProcessRpc { unsafe fn global_m_instance_manager() -> &'static InstanceManager { - &super::modules().instance_manager + super::modules().instance_manager() } } diff --git a/src/main/src/result.rs b/src/main/src/result.rs index 11f2785..2d6fbb7 100644 --- a/src/main/src/result.rs +++ b/src/main/src/result.rs @@ -31,9 +31,6 @@ pub enum WsNetworkLogicErr { DecodeError(DecodeError), MsgIdNotDispatchable(u32), InvaidNodeID(NodeID), - TaskJoinError { - err: tokio::task::JoinError - }, } #[derive(Debug)] @@ -127,7 +124,7 @@ pub enum WsPermissionErr { #[derive(Debug)] pub enum WsFuncError { - WasmError(Box), + WasmError(wasmedge_sdk::error::WasmEdgeError), AppNotFound { app: String, }, @@ -173,7 +170,6 @@ pub enum WsFuncError { InstanceJavaPidNotFound(String), InstanceProcessStartFailed(std::io::Error), InsranceVerifyFailed(String), - UnsupportedAppType, } #[derive(Debug)] @@ -255,11 +251,6 @@ pub enum WsDataError { len: u8, }, ItemIdxEmpty, - BatchTransferFailed { - node: NodeID, - batch: u32, - reason: String, - }, } #[derive(Error, Debug)] @@ -348,7 +339,7 @@ impl From for WSError { impl From for WSError { fn from(e: WasmEdgeError) -> Self { - WSError::WsFuncError(WsFuncError::WasmError(Box::new(e))) + WSError::WsFuncError(WsFuncError::WasmError(e)) } } diff --git a/src/main/src/sys.rs b/src/main/src/sys.rs index d10de92..4781980 100644 --- a/src/main/src/sys.rs +++ b/src/main/src/sys.rs @@ -143,8 +143,6 @@ macro_rules! logical_module_view_impl { .unwrap(); #[cfg(feature="unsafe-log")] tracing::debug!("unsafe ptr end"); - - let _: &dyn Send = res; res } } @@ -163,9 +161,6 @@ macro_rules! logical_module_view_impl { #[cfg(feature="unsafe-log")] tracing::debug!("unsafe ptr end2 {}",tag); - // 编译期校验 $type 是Send的类型 - let _: &dyn Send = res; - res } } @@ -184,9 +179,6 @@ macro_rules! logical_module_view_impl { // self.inner.setup(modules); // } } - - // unsafe send - unsafe impl Send for $module {} }; } From 04dd7256cfa6f69e6bd46136c830b931107c5152 Mon Sep 17 00:00:00 2001 From: pa <1020401660@qq.com> Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 21/26] Revert "in progress" This reverts commit 108b2ed20c763211a2e2d3342444db534a517dd8. --- entrypoint.sh | 72 --- src/main/build.rs | 2 +- .../general/app/app_native/app_checkpoint.rs | 0 src/main/src/general/app/app_native/mod.rs | 96 --- src/main/src/general/app/app_shared/java.rs | 107 ---- src/main/src/general/app/instance/mod.rs | 73 --- src/main/src/general/app/mod.rs | 584 ++++++++---------- src/main/src/general/app/v_os.rs | 21 +- .../general/data/m_data_general/dataitem.rs | 9 +- .../src/general/data/m_data_general/mod.rs | 207 +++---- src/main/src/general/m_os/mod.rs | 5 - src/main/src/general/network/msg_pack.rs | 16 +- src/main/src/general/network/proto_ext.rs | 12 +- .../src/general/network/proto_src/data.proto | 9 +- src/main/src/master/app/fddg.rs | 89 --- src/main/src/master/app/m_app_master.rs | 57 -- src/main/src/master/app/mod.rs | 2 - src/main/src/master/data/mod.rs | 2 - .../src/master/{data => }/m_data_master.rs | 316 ++++------ src/main/src/master/{data => }/m_master_kv.rs | 0 src/main/src/master/mod.rs | 4 +- src/main/src/modules_global_bridge/mod.rs | 3 +- .../src/modules_global_bridge/process_func.rs | 13 +- src/main/src/result.rs | 8 - src/main/src/sys.rs | 40 +- src/main/src/{util/mod.rs => util.rs} | 2 - src/main/src/util/container/map.rs | 14 - src/main/src/util/container/mod.rs | 2 - src/main/src/util/container/sync_trie.rs | 275 --------- src/main/src/worker/func/fn_event/kv_event.rs | 155 +++++ src/main/src/worker/func/fn_event/mod.rs | 1 + .../func}/m_instance_manager.rs | 51 +- src/main/src/worker/func/mod.rs | 114 ++++ .../app_owned => worker/func/owned}/mod.rs | 8 +- .../app_owned => worker/func/owned}/wasm.rs | 10 +- src/main/src/worker/func/shared/java.rs | 59 ++ .../app_shared => worker/func/shared}/mod.rs | 13 +- .../func/shared}/process.rs | 33 +- .../shared}/process_instance_man_related.rs | 28 +- .../func/shared}/process_rpc.rs | 14 +- .../func/shared}/process_rpc_proto.proto | 0 .../func}/wasm_host_funcs/fs.rs | 0 .../func}/wasm_host_funcs/kv.rs | 0 .../func}/wasm_host_funcs/mod.rs | 19 +- .../func}/wasm_host_funcs/result.rs | 0 .../src/{general/app => worker}/m_executor.rs | 281 ++------- src/main/src/worker/m_http_handler.rs | 10 +- src/main/src/worker/mod.rs | 4 + 48 files changed, 979 insertions(+), 1861 deletions(-) delete mode 100755 entrypoint.sh delete mode 100644 src/main/src/general/app/app_native/app_checkpoint.rs delete mode 100644 src/main/src/general/app/app_native/mod.rs delete mode 100644 src/main/src/general/app/app_shared/java.rs delete mode 100644 src/main/src/general/app/instance/mod.rs delete mode 100644 src/main/src/master/app/fddg.rs delete mode 100644 src/main/src/master/app/m_app_master.rs delete mode 100644 src/main/src/master/app/mod.rs delete mode 100644 src/main/src/master/data/mod.rs rename src/main/src/master/{data => }/m_data_master.rs (51%) rename src/main/src/master/{data => }/m_master_kv.rs (100%) rename src/main/src/{util/mod.rs => util.rs} (99%) delete mode 100644 src/main/src/util/container/map.rs delete mode 100644 src/main/src/util/container/mod.rs delete mode 100644 src/main/src/util/container/sync_trie.rs create mode 100644 src/main/src/worker/func/fn_event/kv_event.rs create mode 100644 src/main/src/worker/func/fn_event/mod.rs rename src/main/src/{general/app/instance => worker/func}/m_instance_manager.rs (84%) create mode 100644 src/main/src/worker/func/mod.rs rename src/main/src/{general/app/app_owned => worker/func/owned}/mod.rs (72%) rename src/main/src/{general/app/app_owned => worker/func/owned}/wasm.rs (96%) create mode 100644 src/main/src/worker/func/shared/java.rs rename src/main/src/{general/app/app_shared => worker/func/shared}/mod.rs (71%) rename src/main/src/{general/app/app_shared => worker/func/shared}/process.rs (95%) rename src/main/src/{general/app/app_shared => worker/func/shared}/process_instance_man_related.rs (82%) rename src/main/src/{general/app/app_shared => worker/func/shared}/process_rpc.rs (96%) rename src/main/src/{general/app/app_shared => worker/func/shared}/process_rpc_proto.proto (100%) rename src/main/src/{general/app/app_owned => worker/func}/wasm_host_funcs/fs.rs (100%) rename src/main/src/{general/app/app_owned => worker/func}/wasm_host_funcs/kv.rs (100%) rename src/main/src/{general/app/app_owned => worker/func}/wasm_host_funcs/mod.rs (94%) rename src/main/src/{general/app/app_owned => worker/func}/wasm_host_funcs/result.rs (100%) rename src/main/src/{general/app => worker}/m_executor.rs (62%) diff --git a/entrypoint.sh b/entrypoint.sh deleted file mode 100755 index 0ad7f50..0000000 --- a/entrypoint.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/bash -set -eo pipefail - -echo "Starting OpenHands..." -if [[ $NO_SETUP == "true" ]]; then - echo "Skipping setup, running as $(whoami)" - "$@" - exit 0 -fi - -if [ "$(id -u)" -ne 0 ]; then - echo "The OpenHands entrypoint.sh must run as root" - exit 1 -fi - -echo "hosts file:" -cat /etc/hosts - -if [ -z "$SANDBOX_USER_ID" ]; then - echo "SANDBOX_USER_ID is not set" - exit 1 -fi - -if [ -z "$WORKSPACE_MOUNT_PATH" ]; then - # This is set to /opt/workspace in the Dockerfile. But if the user isn't mounting, we want to unset it so that OpenHands doesn't mount at all - unset WORKSPACE_BASE -fi - -if [[ "$SANDBOX_USER_ID" -eq 0 ]]; then - echo "Running OpenHands as root" - export RUN_AS_OPENHANDS=false - mkdir -p /root/.cache/ms-playwright/ - if [ -d "/home/openhands/.cache/ms-playwright/" ]; then - mv /home/openhands/.cache/ms-playwright/ /root/.cache/ - fi - "$@" -else - echo "Setting up enduser with id $SANDBOX_USER_ID" - if id "enduser" &>/dev/null; then - echo "User enduser already exists. Skipping creation." - else - if ! useradd -l -m -u $SANDBOX_USER_ID -s /bin/bash enduser; then - echo "Failed to create user enduser with id $SANDBOX_USER_ID. Moving openhands user." - incremented_id=$(($SANDBOX_USER_ID + 1)) - usermod -u $incremented_id openhands - if ! useradd -l -m -u $SANDBOX_USER_ID -s /bin/bash enduser; then - echo "Failed to create user enduser with id $SANDBOX_USER_ID for a second time. Exiting." - exit 1 - fi - fi - fi - usermod -aG app enduser - # get the user group of /var/run/docker.sock and set openhands to that group - DOCKER_SOCKET_GID=$(stat -c '%g' /var/run/docker.sock) - echo "Docker socket group id: $DOCKER_SOCKET_GID" - if getent group $DOCKER_SOCKET_GID; then - echo "Group with id $DOCKER_SOCKET_GID already exists" - else - echo "Creating group with id $DOCKER_SOCKET_GID" - groupadd -g $DOCKER_SOCKET_GID docker - fi - - mkdir -p /home/enduser/.cache/huggingface/hub/ - mkdir -p /home/enduser/.cache/ms-playwright/ - if [ -d "/home/openhands/.cache/ms-playwright/" ]; then - mv /home/openhands/.cache/ms-playwright/ /home/enduser/.cache/ - fi - - usermod -aG $DOCKER_SOCKET_GID enduser - echo "Running as enduser" - su enduser /bin/bash -c "${*@Q}" # This magically runs any arguments passed to the script as a command -fi \ No newline at end of file diff --git a/src/main/build.rs b/src/main/build.rs index 2e71809..acd9818 100644 --- a/src/main/build.rs +++ b/src/main/build.rs @@ -8,7 +8,7 @@ fn main() -> Result<()> { "src/general/network/proto_src/metric.proto", "src/general/network/proto_src/remote_sys.proto", "src/general/network/proto_src/data.proto", - "src/general/app/app_shared/process_rpc_proto.proto", + "src/worker/func/shared/process_rpc_proto.proto", ], &["src/"], )?; diff --git a/src/main/src/general/app/app_native/app_checkpoint.rs b/src/main/src/general/app/app_native/app_checkpoint.rs deleted file mode 100644 index e69de29..0000000 diff --git a/src/main/src/general/app/app_native/mod.rs b/src/main/src/general/app/app_native/mod.rs deleted file mode 100644 index 37dae7c..0000000 --- a/src/main/src/general/app/app_native/mod.rs +++ /dev/null @@ -1,96 +0,0 @@ -pub mod app_checkpoint; - -use std::collections::HashMap; - -use super::AppMeta; -use super::AppType; -use crate::general::app::instance::Instance; -use crate::general::app::instance::InstanceTrait; -use crate::general::app::m_executor::FnExeCtx; -use crate::general::app::DataAccess; -use crate::general::app::DataEventTrigger; -use crate::general::app::FnMeta; -use crate::general::app::KeyPattern; -use crate::general::data::m_data_general::DATA_UID_PREFIX_APP_META; -use crate::new_map; -use crate::result::WSResult; -use async_trait::async_trait; - -pub struct NativeAppInstance { - _dummy_private: (), // avoid empty struct -} - -impl NativeAppInstance { - pub fn new() -> Self { - Self { _dummy_private: () } - } -} - -#[async_trait] -impl InstanceTrait for NativeAppInstance { - // don’t need instance name - fn instance_name(&self) -> String { - "native_app_dummy_instance".to_string() - } - async fn execute(&self, _fn_ctx: &mut FnExeCtx) -> WSResult> { - todo!() - } -} - -impl From for Instance { - fn from(v: NativeAppInstance) -> Self { - Self::Native(v) - } -} - -pub fn native_apps() -> HashMap { - let mut nativeapps = HashMap::new(); - // https://fvd360f8oos.feishu.cn/wiki/GGUnw0H1diVoHSkgm3vcMhtbnjI - // app_checkpoint: - // checkpointable: - // inner_dataset: - // app_{}: - // - get - // checkpoint: - // inner_dataset: - // app_{}: - // - trigger_by_write: - // condition: checkpointable - // - get - let _ = nativeapps.insert( - "app_checkpoint".to_string(), - AppMeta::new( - AppType::Native, - new_map!(HashMap { - "checkpointable".to_string() => FnMeta { - calls: vec![], - data_accesses: Some(new_map!(HashMap { - KeyPattern(DATA_UID_PREFIX_APP_META.to_string()) => DataAccess { - get: true, - set: false, - delete: false, - event: None, - } - })) - }, - "checkpoint".to_string() => FnMeta { - calls: vec![], - data_accesses: Some({ - new_map!(HashMap { - KeyPattern(DATA_UID_PREFIX_APP_META.to_string()) => DataAccess { - get: true, - set: false, - delete: false, - event: Some(DataEventTrigger::WriteWithCondition { - condition: "checkpointable".to_string(), - }), - } - }) - }), - }, - }), - ), - ); - - nativeapps -} diff --git a/src/main/src/general/app/app_shared/java.rs b/src/main/src/general/app/app_shared/java.rs deleted file mode 100644 index d70304a..0000000 --- a/src/main/src/general/app/app_shared/java.rs +++ /dev/null @@ -1,107 +0,0 @@ -use std::{path::PathBuf, str::from_utf8, time::Duration}; - -use tokio::process::{self, Command}; - -use crate::{ - general::m_os::{OperatingSystem, OsProcessType}, - result::{WSError, WSResult, WsFuncError}, -}; -use std::path::Path; - -use super::process::PID; - -pub(super) struct JavaColdStart { - _dummy_private: (), -} - -impl JavaColdStart { - pub(super) async fn mksure_checkpoint(appdir: PathBuf) -> Self { - let mut i = 0; - loop { - // if dir not exist, continue - if !appdir.join("checkpoint-dir").exists() { - continue; - } - - let checkpoint_dir = appdir.join("checkpoint-dir"); - - // let lsres = Command::new("ls") - // .arg("-l") - // .arg(checkpoint_dir.to_str().unwrap()) - // .output() - // .await - // .expect("ls failed"); - - // tracing::debug!("ls checkpoint-dir output: {:?}", lsres); - - let res = Command::new("lsof") - .arg("+D") // check all process with files in checkpoint-dir - .arg(checkpoint_dir.to_str().unwrap()) - .output() - .await - .expect("lsof failed"); - - tracing::debug!("lsof checkpoint-dir output: {:?}", res); - - let output = from_utf8(&res.stdout).expect("failed to parse output to string"); - if output == "" { - break; - } - - let sleep_time = match i { - 0 => 1000, - 1 => 500, - _ => 200, - }; - tokio::time::sleep(Duration::from_millis(sleep_time)).await; - i += 1; - } - - Self { _dummy_private: () } - } - - pub fn direct_start() -> Self { - Self { _dummy_private: () } - } - - pub(super) fn cold_start(self, app: &str, os: &OperatingSystem) -> WSResult { - tracing::debug!("java cold start {}", app); - let p = os.start_process(OsProcessType::JavaApp(app.to_owned())); - Ok(p) - } -} - -pub(super) async fn find_pid(app: &str) -> WSResult { - let res = Command::new("jcmd") - .arg("-l") - .output() - .await - .map_err(|e| WSError::from(WsFuncError::InstanceProcessStartFailed(e)))?; - let res = from_utf8(&res.stdout).expect("failed to parse output to string"); - let res = res.split(|x| x == '\n').collect::>(); - tracing::debug!("jcmd output: {:?}", res); - let err = || Err(WsFuncError::InstanceJavaPidNotFound(app.to_owned()).into()); - let Some(res) = res - .iter() - .filter(|x| x.contains(&format!("--appName={}", app))) - .next() - else { - return err(); - }; - let Some(res) = res.split(|x| x == ' ').next() else { - return err(); - }; - let Ok(pid) = res.parse::() else { - return err(); - }; - Ok(pid) -} - -pub(super) async fn take_snapshot(app: &str, os: &OperatingSystem) { - let res = os - .start_process(OsProcessType::JavaCheckpoints(app.to_owned())) - .wait() - .await - .unwrap(); - assert!(res.success()); -} diff --git a/src/main/src/general/app/instance/mod.rs b/src/main/src/general/app/instance/mod.rs deleted file mode 100644 index 4197bce..0000000 --- a/src/main/src/general/app/instance/mod.rs +++ /dev/null @@ -1,73 +0,0 @@ -pub mod m_instance_manager; - -use super::app_native::NativeAppInstance; -use super::app_shared::SharedInstance; -use super::m_executor::{FnExeCtxAsync, FnExeCtxSync}; -use crate::general::app::app_owned::wasm::WasmInstance; -use crate::general::app::app_shared::process::ProcessInstance; -use crate::result::WSResult; -use async_trait::async_trait; -use enum_as_inner::EnumAsInner; - -#[derive(EnumAsInner)] -pub enum OwnedInstance { - WasmInstance(WasmInstance), -} - -pub enum Instance { - Owned(OwnedInstance), - Shared(SharedInstance), - Native(NativeAppInstance), -} -impl From for Instance { - fn from(v: OwnedInstance) -> Self { - Self::Owned(v) - } -} - -impl From for Instance { - fn from(v: SharedInstance) -> Self { - Self::Shared(v) - } -} - -impl From for Instance { - fn from(v: ProcessInstance) -> Self { - Self::Shared(SharedInstance(v)) - } -} - -#[async_trait] -impl InstanceTrait for Instance { - fn instance_name(&self) -> String { - match self { - Instance::Owned(v) => v.instance_name(), - Instance::Shared(v) => v.instance_name(), - Instance::Native(v) => v.instance_name(), - } - } - async fn execute(&self, fn_ctx: &mut FnExeCtxAsync) -> WSResult> { - match self { - Instance::Owned(v) => v.execute(fn_ctx).await, - Instance::Shared(v) => v.execute(fn_ctx).await, - Instance::Native(v) => v.execute(fn_ctx).await, - } - } - - fn execute_sync(&self, fn_ctx: &mut FnExeCtxSync) -> WSResult> { - match self { - Instance::Owned(v) => v.execute_sync(fn_ctx), - Instance::Shared(v) => v.execute_sync(fn_ctx), - Instance::Native(v) => v.execute_sync(fn_ctx), - } - } -} - -pub enum NewJavaInstanceConfig {} - -#[async_trait] -pub trait InstanceTrait { - fn instance_name(&self) -> String; - async fn execute(&self, fn_ctx: &mut FnExeCtxAsync) -> WSResult>; - fn execute_sync(&self, fn_ctx: &mut FnExeCtxSync) -> WSResult>; -} diff --git a/src/main/src/general/app/mod.rs b/src/main/src/general/app/mod.rs index 6db3aa9..58dd793 100644 --- a/src/main/src/general/app/mod.rs +++ b/src/main/src/general/app/mod.rs @@ -1,20 +1,10 @@ -pub mod app_native; -pub mod app_owned; -pub mod app_shared; pub mod fn_event; mod http; -pub mod instance; -pub mod m_executor; -pub mod v_os; - -use super::data::m_data_general::{DataSetMetaV2, GetOrDelDataArg, GetOrDelDataArgType}; -use crate::general::app::app_native::native_apps; -use crate::general::app::instance::m_instance_manager::InstanceManager; -use crate::general::app::m_executor::Executor; -use crate::general::app::v_os::AppMetaVisitOs; -use crate::general::network::proto_ext::ProtoExtDataItem; -use crate::util::VecExt; -use crate::{general::network::proto, result::WSResultExt}; +mod v_os; + +use self::v_os::AppMetaVisitOs; +use crate::{general::network::proto, result::WSResultExt, worker::m_executor::Executor}; +use crate::{general::network::proto_ext::ProtoExtDataItem, util::VecExt}; use crate::{ general::{ data::{ @@ -37,12 +27,13 @@ use crate::{ result::{ErrCvt, WSResult, WsFuncError}, sys::{LogicalModule, LogicalModuleNewArgs, LogicalModulesRef}, util::{self, JoinHandleWrapper}, + worker::func::m_instance_manager::InstanceManager, }; + use async_trait::async_trait; use axum::body::Bytes; use enum_as_inner::EnumAsInner; use serde::{Deserialize, Deserializer, Serialize}; -use std::path::PathBuf; use std::{ borrow::Borrow, collections::{BTreeMap, HashMap}, @@ -51,8 +42,11 @@ use std::{ path::Path, }; use tokio::sync::RwLock; + use ws_derive::LogicalModule; +use super::data::m_data_general::{GetOrDelDataArg, GetOrDelDataArgType}; + logical_module_view_impl!(View); logical_module_view_impl!(View, os, OperatingSystem); logical_module_view_impl!(View, kv_store_engine, KvStoreEngine); @@ -60,9 +54,9 @@ logical_module_view_impl!(View, http_handler, Box); logical_module_view_impl!(View, appmeta_manager, AppMetaManager); logical_module_view_impl!(View, p2p, P2PModule); logical_module_view_impl!(View, master, Option); -logical_module_view_impl!(View, instance_manager, InstanceManager); +logical_module_view_impl!(View, instance_manager, Option); logical_module_view_impl!(View, data_general, DataGeneral); -logical_module_view_impl!(View, executor, Executor); +logical_module_view_impl!(View, executor, Option); #[derive(Debug, Serialize, Deserialize)] #[serde(untagged)] @@ -127,14 +121,13 @@ pub enum HttpCall { pub enum FnCallMeta { Http { method: HttpMethod, call: HttpCall }, Rpc, - Event, } #[derive(Debug)] pub struct FnMetaYaml { /// key to operations pub calls: Vec, - pub kvs: Option>>, + pub kvs: Option>>, } impl<'de> Deserialize<'de> for FnMetaYaml { @@ -199,62 +192,23 @@ impl<'de> Deserialize<'de> for FnMetaYaml { } } -#[derive(Hash, Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] +#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] pub struct KeyPattern(pub String); -// #[derive(Debug, Clone, Serialize, Deserialize)] -// pub struct KvMeta { -// set: bool, -// get: bool, -// delete: bool, -// pub pattern: KeyPattern, -// } - -#[derive(Debug, Clone, Serialize, Deserialize, EnumAsInner)] -pub enum DataEventTrigger { - Write, - New, - WriteWithCondition { condition: String }, - NewWithCondition { condition: String }, -} - #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct DataAccess { +pub struct KvMeta { set: bool, get: bool, delete: bool, - pub event: Option, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum FnSyncAsyncSupport { - Sync, - Async, - SyncAndAsync, -} - -impl FnSyncAsyncSupport { - pub fn syncable(&self) -> bool { - matches!( - self, - FnSyncAsyncSupport::Sync | FnSyncAsyncSupport::SyncAndAsync - ) - } - pub fn asyncable(&self) -> bool { - matches!( - self, - FnSyncAsyncSupport::Async | FnSyncAsyncSupport::SyncAndAsync - ) - } + pub pattern: KeyPattern, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FnMeta { - pub sync_async: FnSyncAsyncSupport, pub calls: Vec, // pub event: Vec, // pub args: Vec, - pub data_accesses: Option>, + pub kvs: Option>, } #[derive(Debug, Deserialize)] @@ -262,55 +216,20 @@ pub struct AppMetaYaml { pub fns: HashMap, } -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Clone, Serialize, Deserialize, PartialEq, Eq)] pub enum AppType { Jar, Wasm, - Native, } #[derive(Serialize, Deserialize, Clone)] pub struct AppMeta { pub app_type: AppType, - pub fns: HashMap, + fns: HashMap, cache_contains_http_fn: Option, } impl AppMeta { - pub fn new(app_type: AppType, fns: HashMap) -> Self { - Self { - app_type, - fns, - cache_contains_http_fn: None, - } - } - - pub async fn new_from_yaml( - metayaml: AppMetaYaml, - app_name: &str, - meta_fs: &AppMetaVisitOs, - ) -> WSResult { - let fns = metayaml - .fns - .into_iter() - .map(|(fnname, fnmeta)| { - let fnmeta = fnmeta.into(); - (fnname, fnmeta) - }) - .collect(); - let app_type = meta_fs.get_app_type(app_name).await?; - Ok(Self { - app_type, - fns, - cache_contains_http_fn: None, - }) - } - pub fn fns(&self) -> Vec { - self.fns.iter().map(|(fnname, _)| fnname.clone()).collect() - } - pub fn get_fn_meta(&self, fnname: &str) -> Option<&FnMeta> { - self.fns.get(fnname) - } pub fn contains_http_fn(&self) -> bool { if let Some(v) = self.cache_contains_http_fn { return v; @@ -394,6 +313,85 @@ impl FnMeta { _ => None, }) } + + pub fn match_key(&self, key: &[u8], ope: KvOps) -> Option { + let key = if let Ok(key) = std::str::from_utf8(key) { + key + } else { + return None; + }; + if let Some(kvs) = &self.kvs { + for kv in kvs { + if kv.pattern.match_key(key) { + match ope { + KvOps::Get => { + if kv.get { + return Some(kv.pattern.clone()); + } + } + KvOps::Set => { + if kv.set { + return Some(kv.pattern.clone()); + } + } + KvOps::Delete => { + if kv.delete { + return Some(kv.pattern.clone()); + } + } + } + tracing::info!("allow ope {:?}, cur ope:{:?}", kv, ope); + } + } + // tracing::info!("no key pattern matched for key: {}", key); + } + + None + } + + pub fn try_get_kv_meta_by_index(&self, index: usize) -> Option<&KvMeta> { + if let Some(kvs) = &self.kvs { + return kvs.get(index); + } + None + } + + // / index should be valid + // fn get_kv_meta_by_index_unwrap(&self, index: usize) -> &KvMeta { + // self.try_get_kv_meta_by_index(index).unwrap() + // } + // /// get event related kvmeta matches operation + // pub fn get_event_kv(&self, ope: KvOps, event: &FnEvent) -> Option<&KvMeta> { + // match event { + // FnEvent::KvSet(kv_set) => { + // if ope == KvOps::Set { + // return Some(self.get_kv_meta_by_index_unwrap(*kv_set)); + // } + // } + // FnEvent::HttpApp => {} + // } + // None + // } + + // / find kv event trigger with match the `pattern` and `ope` + // pub fn find_will_trigger_kv_event(&self, _pattern: &KeyPattern, _ope: KvOps) -> Option<&KvMeta> { + // unimplemented!() + // // self.event.iter().find_map(|event| { + // // match event { + // // FnEvent::HttpApp => {} + // // FnEvent::KvSet(key_index) => { + // // if ope == KvOps::Set { + // // let res = self.get_kv_meta_by_index_unwrap(*key_index); + // // if res.pattern == *pattern { + // // return Some(res); + // // } + // // } + // // } + // // FnEvent::HttpFn => {} + // // } + // // None + // // }) + // } } impl KeyPattern { @@ -402,7 +400,7 @@ impl KeyPattern { } // match {} for any words // "xxxx_{}_{}" matches "xxxx_abc_123" - // "xxxx{}{}" matches "xxxxabc123" + // “xxxx{}{}" matches "xxxxabc123" pub fn match_key(&self, key: &str) -> bool { let re = self.0.replace("{}", "[a-zA-Z0-9]+"); // let pattern_len = re.len(); @@ -440,87 +438,40 @@ impl KeyPattern { impl From for FnMeta { fn from(yaml: FnMetaYaml) -> Self { + let kvs = if let Some(kvs) = yaml.kvs { + Some( + kvs.into_iter() + .map(|(key, ops)| { + let mut set = false; + let mut get = false; + let mut delete = false; + for op in ops { + if op == "set" { + set = true; + } else if op == "get" { + get = true; + } else if op == "delete" { + delete = true; + } else { + panic!("invalid operation: {}", op); + } + } + // TODO: check key pattern + KvMeta { + delete, + set, + get, + pattern: KeyPattern::new(key), + } + }) + .collect(), + ) + } else { + None + }; let res = Self { calls: yaml.calls, - data_accesses: if let Some(kvs) = yaml.kvs { - Some( - kvs.into_iter() - .map(|(key, ops)| { - let mut set = false; - let mut get = false; - let mut delete = false; - let mut event = None; - for op in ops { - #[derive(Serialize, Deserialize)] - struct TriggerWithCondition { - condition: String, - } - if let Some(opstr) = op.as_str() { - match opstr { - "write" | "set" => set = true, - "read" | "get" => get = true, - "delete" => delete = true, - "trigger_by_write" => { - event = Some(DataEventTrigger::Write); - } - "trigger_by_new" => { - event = Some(DataEventTrigger::New); - } - _ => { - panic!("invalid op: {:?}", op); - } - } - } else if let Ok(trigger_with_condition) = - serde_yaml::from_value::>( - op.clone(), - ) - { - if trigger_with_condition.len() == 1 { - if let Some(t) = - trigger_with_condition.get("trigger_by_write") - { - event = Some(DataEventTrigger::WriteWithCondition { - condition: t.condition.clone(), - }); - } else if let Some(t) = - trigger_with_condition.get("trigger_by_new") - { - event = Some(DataEventTrigger::NewWithCondition { - condition: t.condition.clone(), - }); - } else { - panic!("invalid op: {:?}", op); - } - } else { - panic!("invalid op: {:?}", op); - } - } else { - panic!("invalid op: {:?}", op); - } - } - // // TODO: check key pattern - // KvMeta { - // delete, - // set, - // get, - // pattern: KeyPattern::new(key), - // } - - ( - KeyPattern::new(key), - DataAccess { - delete, - set, - get, - event, - }, - ) - }) - .collect(), - ) - } else { - None - }, + kvs, }; // assert!(res.check_kv_valid()); res @@ -538,6 +489,44 @@ impl From for FnMeta { // } // } +impl AppMeta { + pub async fn new( + metayaml: AppMetaYaml, + app_name: &str, + meta_fs: &AppMetaVisitOs, + ) -> WSResult { + let fns = metayaml + .fns + .into_iter() + .map(|(fnname, fnmeta)| { + let fnmeta = fnmeta.into(); + (fnname, fnmeta) + }) + .collect(); + let app_type = meta_fs.get_app_type(app_name).await?; + Ok(Self { + app_type, + fns, + cache_contains_http_fn: None, + }) + } + pub fn fns(&self) -> Vec { + self.fns.iter().map(|(fnname, _)| fnname.clone()).collect() + } + pub fn get_fn_meta(&self, fnname: &str) -> Option<&FnMeta> { + self.fns.get(fnname) + } + // pub fn http_trigger_fn(&self) -> Option<&str> { + // self.fns.iter().find_map(|(fnname, fnmeta)| { + // if fnmeta.event.iter().any(|e| e == &FnEvent::HttpApp) { + // Some(fnname.as_str()) + // } else { + // None + // } + // }) + // } +} + lazy_static::lazy_static! { static ref VIEW: Option = None; } @@ -553,7 +542,6 @@ pub struct AppMetaManager { meta: RwLock, pub fs_layer: AppMetaVisitOs, view: View, - pub native_apps: HashMap, // app_meta_list_lock: Mutex<()>, } @@ -579,31 +567,25 @@ impl LogicalModule for AppMetaManager { }), view, fs_layer, - native_apps: native_apps(), // app_meta_list_lock: Mutex::new(()), } } async fn init(&self) -> WSResult<()> { - { - let mut router = self.view.http_handler().building_router(); + let mut router = self.view.http_handler().building_router(); - let take = router.option_mut().take().unwrap(); - let take = http::binds(take, self.view.clone()); - let _ = router.option_mut().replace(take); - // .route("/appman/upload", post(handler2)) - } - self.load_apps().await?; + let take = router.option_mut().take().unwrap(); + let take = http::binds(take, self.view.clone()); + let _ = router.option_mut().replace(take); + // .route("/appman/upload", post(handler2)) Ok(()) } async fn start(&self) -> WSResult> { - // load apps - - // self.meta - // .write() - // .await - // .load_all_app_meta(&self.view.os().file_path, &self.fs_layer) - // .await?; + self.meta + .write() + .await + .load_all_app_meta(&self.view.os().file_path, &self.fs_layer) + .await?; Ok(vec![]) } } @@ -626,80 +608,76 @@ impl AppMetas { ) -> Option<&Vec<(String, String)>> { self.pattern_2_app_fn.get(pattern.borrow()) } - // async fn load_all_app_meta( - // &mut self, - // file_dir: impl AsRef, - // meta_fs: &AppMetaVisitOs, - // ) -> WSResult<()> { - // if !file_dir.as_ref().join("apps").exists() { - // fs::create_dir_all(file_dir.as_ref().join("apps")).unwrap(); - // return Ok(()); - // } - // let entries = - // fs::read_dir(file_dir.as_ref().join("apps")).map_err(|e| ErrCvt(e).to_ws_io_err())?; - - // // 遍历文件夹中的每个条目 - // for entry in entries { - // // 获取目录项的 Result - // let entry = entry.map_err(|e| ErrCvt(e).to_ws_io_err())?; - // // 获取目录项的文件名 - // let file_name = entry.file_name(); - // // dir name is the app name - // let app_name = file_name.to_str().unwrap().to_owned(); - - // // allow spec files - // if entry.file_type().unwrap().is_file() { - // let allowed_files = vec!["crac_config"]; - // assert!(allowed_files - // .contains(&&*(*entry.file_name().as_os_str().to_string_lossy()).to_owned())); - // continue; - // } + async fn load_all_app_meta( + &mut self, + file_dir: impl AsRef, + meta_fs: &AppMetaVisitOs, + ) -> WSResult<()> { + if !file_dir.as_ref().join("apps").exists() { + fs::create_dir_all(file_dir.as_ref().join("apps")).unwrap(); + return Ok(()); + } + let entries = + fs::read_dir(file_dir.as_ref().join("apps")).map_err(|e| ErrCvt(e).to_ws_io_err())?; + + // 遍历文件夹中的每个条目 + for entry in entries { + // 获取目录项的 Result + let entry = entry.map_err(|e| ErrCvt(e).to_ws_io_err())?; + // 获取目录项的文件名 + let file_name = entry.file_name(); + // dir name is the app name + let app_name = file_name.to_str().unwrap().to_owned(); + + // allow spec files + if entry.file_type().unwrap().is_file() { + let allowed_files = vec!["crac_config"]; + assert!(allowed_files + .contains(&&*(*entry.file_name().as_os_str().to_string_lossy()).to_owned())); + continue; + } - // // allow only dir - // assert!(entry.file_type().unwrap().is_dir()); + // allow only dir + assert!(entry.file_type().unwrap().is_dir()); - // // read app config yaml - // let meta_yaml = { - // let apps_dir = file_dir.as_ref().join("apps"); - // let file_name_str = app_name.clone(); - // tokio::task::spawn_blocking(move || AppMetaYaml::read(apps_dir, &*file_name_str)) - // .await - // .unwrap() - // }; + // read app config yaml + let meta_yaml = { + let apps_dir = file_dir.as_ref().join("apps"); + let file_name_str = app_name.clone(); + tokio::task::spawn_blocking(move || AppMetaYaml::read(apps_dir, &*file_name_str)) + .await + .unwrap() + }; - // // transform - // let meta = AppMeta::new(meta_yaml, &app_name, meta_fs).await.unwrap(); - - // //TODO: build and checks - // // - build up key pattern to app fn - - // // for (fnname, fnmeta) in &meta.fns { - // // for event in &fnmeta.event { - // // match event { - // // // not kv event, no key pattern - // // FnEvent::HttpFn => {} - // // FnEvent::HttpApp => {} - // // FnEvent::KvSet(key_index) => { - // // let kvmeta = fnmeta.try_get_kv_meta_by_index(*key_index).unwrap(); - // // self.pattern_2_app_fn - // // .entry(kvmeta.pattern.0.clone()) - // // .or_insert_with(Vec::new) - // // .push((app_name.clone(), fnname.clone())); - // // } - // // } - // // } - // // } - // let _ = self.tmp_app_metas.insert(app_name, meta); - // } - // Ok(()) - // } + // transform + let meta = AppMeta::new(meta_yaml, &app_name, meta_fs).await.unwrap(); + + //TODO: build and checks + // - build up key pattern to app fn + + // for (fnname, fnmeta) in &meta.fns { + // for event in &fnmeta.event { + // match event { + // // not kv event, no key pattern + // FnEvent::HttpFn => {} + // FnEvent::HttpApp => {} + // FnEvent::KvSet(key_index) => { + // let kvmeta = fnmeta.try_get_kv_meta_by_index(*key_index).unwrap(); + // self.pattern_2_app_fn + // .entry(kvmeta.pattern.0.clone()) + // .or_insert_with(Vec::new) + // .push((app_name.clone(), fnname.clone())); + // } + // } + // } + // } + let _ = self.tmp_app_metas.insert(app_name, meta); + } + Ok(()) + } } impl AppMetaManager { - async fn load_apps(&self) -> WSResult<()> { - // TODO: Implement app loading logic - Ok(()) - } async fn construct_tmp_app(&self, tmpapp: &str) -> WSResult { // 1.meta // let appdir = self.fs_layer.concat_app_dir(app); @@ -762,76 +740,14 @@ impl AppMetaManager { } } - /// get app by idx 1 - pub async fn load_app_file(&self, app: &str, datameta: DataSetMetaV2) -> WSResult<()> { - tracing::debug!( - "calling get_or_del_data to load app file, app: {}, datameta: {:?}", - app, - datameta - ); - let mut data = match self - .view - .data_general() - .get_or_del_data(GetOrDelDataArg { - meta: Some(datameta), - unique_id: format!("{}{}", DATA_UID_PREFIX_APP_META, app).into(), - ty: GetOrDelDataArgType::PartialOne { idx: 1 }, - }) - .await - { - Err(err) => { - tracing::warn!("get app file failed, err: {:?}", err); - return Err(err); - } - Ok((_datameta, data)) => data, - }; - - let proto::DataItem { - data_item_dispatch: Some(proto::data_item::DataItemDispatch::File(appfiledata)), - } = data.remove(&1).unwrap() - else { - return Err(WsFuncError::InvalidAppMetaDataItem { - app: app.to_owned(), - } - .into()); - }; - - // extract app file - let zipfilepath = appfiledata.file_name_opt; - let appdir = self.fs_layer.concat_app_dir(app); - let res = tokio::task::spawn_blocking(move || { - // remove old app dir - if appdir.exists() { - fs::remove_dir_all(&appdir).unwrap(); - } - // open zip file - let zipfile = std::fs::File::open(zipfilepath)?; - zip_extract::extract(zipfile, &appdir, false) - }) - .await - .unwrap(); - - if let Err(err) = res { - tracing::warn!("extract app file failed, err: {:?}", err); - return Err(WsFuncError::AppPackFailedZip(err).into()); - } - - Ok(()) - } - /// get app meta by idx 0 - /// None DataSetMetaV2 means temp app prepared - /// Some DataSetMetaV2 means app from inner storage - pub async fn get_app_meta( - &self, - app: &str, - ) -> WSResult)>> { + // call inner AppMetas.get_app_meta + pub async fn get_app_meta(&self, app: &str) -> WSResult> { if let Some(res) = self.meta.read().await.get_tmp_app_meta(app) { - return Ok(Some((res, None))); + return Ok(Some(res)); } // self.app_metas.get(app) - tracing::debug!("calling get_or_del_data to get app meta, app: {}", app); - let datameta = view() + let meta = view() .data_general() .get_or_del_data(GetOrDelDataArg { meta: None, @@ -841,7 +757,8 @@ impl AppMetaManager { .await; // only one data item - let (datameta, meta): (DataSetMetaV2, proto::DataItem) = match datameta { + let (_, meta): (_, proto::DataItem) = match meta { + Ok((_, datas)) => datas.into_iter().next().unwrap(), Err(err) => match err { WSError::WsDataError(WsDataError::DataSetNotFound { uniqueid }) => { tracing::debug!( @@ -855,7 +772,6 @@ impl AppMetaManager { return Err(err); } }, - Ok((datameta, mut datas)) => (datameta, datas.remove(&0).unwrap()), }; let proto::DataItem { @@ -883,7 +799,7 @@ impl AppMetaManager { } Ok(meta) => meta, }; - Ok(Some((meta, Some(datameta)))) + Ok(Some(meta)) } pub async fn app_uploaded(&self, appname: String, data: Bytes) -> WSResult<()> { diff --git a/src/main/src/general/app/v_os.rs b/src/main/src/general/app/v_os.rs index e8c302b..bd43a82 100644 --- a/src/main/src/general/app/v_os.rs +++ b/src/main/src/general/app/v_os.rs @@ -13,21 +13,16 @@ impl AppMetaVisitOs { Self { view } } - pub fn crac_file_path(&self) -> PathBuf { - self.view - .os() - .file_path - .clone() - .join("apps") - .join("crac_config") + pub fn crac_file_path(&self) -> String { + let sys_dir = &self.view.os().file_path; + let app_dir = Path::new(sys_dir).join("apps").join("crac_config"); + (*app_dir.as_os_str().to_string_lossy()).to_owned() } pub fn concat_app_dir(&self, app: &str) -> PathBuf { - self.view.os().file_path.clone().join("apps").join(app) - } - - pub fn app_dir(&self) -> PathBuf { - self.view.os().file_path.clone().join("apps") + let sys_dir = &self.view.os().file_path; + let app_dir = Path::new(sys_dir).join("apps").join(app); + app_dir } pub async fn read_app_meta(&self, app: &str) -> WSResult { @@ -49,7 +44,7 @@ impl AppMetaVisitOs { } Ok(ok) => ok, }; - AppMeta::new_from_yaml(yml, app, self).await + AppMeta::new(yml, app, self).await } pub async fn get_app_type_in_dir(&self, app_dir: impl AsRef) -> WSResult { diff --git a/src/main/src/general/data/m_data_general/dataitem.rs b/src/main/src/general/data/m_data_general/dataitem.rs index 681ae9c..3ccbd85 100644 --- a/src/main/src/general/data/m_data_general/dataitem.rs +++ b/src/main/src/general/data/m_data_general/dataitem.rs @@ -57,7 +57,6 @@ impl<'a> Iterator for WantIdxIter<'a> { WantIdxIter::PartialMany { iter, .. } => iter.next().map(|v| *v as DataItemIdx), WantIdxIter::PartialOne { idx, itercnt } => { if *itercnt == 0 { - *itercnt += 1; Some(*idx) } else { None @@ -185,12 +184,8 @@ impl WriteSplitDataTaskGroup { let split_range = splits[splitidx as usize].clone(); let task = tokio::task::spawn_blocking(move || { - let Some(proto::FileData { - file_content: split_data_bytes, - .. - }) = split_data_item.as_file_data() - else { - return Err(WsDataError::SplitDataItemNotFileData { + let Some(split_data_bytes) = split_data_item.as_raw_bytes() else { + return Err(WsDataError::SplitDataItemNotRawBytes { unique_id: unique_id.clone(), splitidx, } diff --git a/src/main/src/general/data/m_data_general/mod.rs b/src/main/src/general/data/m_data_general/mod.rs index dd096ce..3e6d1a9 100644 --- a/src/main/src/general/data/m_data_general/mod.rs +++ b/src/main/src/general/data/m_data_general/mod.rs @@ -279,7 +279,7 @@ impl DataGeneralView { responsor: RPCResponsor, req: proto::GetOneDataRequest, ) -> WSResult<()> { - tracing::debug!("starting rpc_handle_get_one_data {:?}", req); + tracing::debug!("rpc_handle_get_one_data {:?}", req); // req.unique_id let kv_store_engine = self.kv_store_engine(); @@ -348,7 +348,6 @@ impl DataGeneralView { if success { for v in got_or_deleted { let decode_res = proto::DataItem::decode_persist(v.unwrap().1); - tracing::debug!("decode_res type: {:?}", decode_res.to_string()); // if let Ok(v) = decode_res { got_or_deleted_checked.push(decode_res); // } else { @@ -384,6 +383,32 @@ impl DataGeneralView { tracing::debug!("verify data meta bf write data"); let kv_store_engine = self.kv_store_engine(); + // Step 0: pre-check + { + if req.data.is_empty() { + responsor + .send_resp(WriteOneDataResponse { + remote_version: 0, + success: false, + message: "Request data is empty".to_owned(), + }) + .await + .todo_handle(); + return; + } + if req.data[0].data_item_dispatch.is_none() { + responsor + .send_resp(WriteOneDataResponse { + remote_version: 0, + success: false, + message: "Request data enum is none".to_owned(), + }) + .await + .todo_handle(); + return; + } + } + // Step1: verify version // take old meta #[allow(unused_assignments)] @@ -532,16 +557,8 @@ impl DataGeneralView { // res.as_ref().unwrap().1.version // }; - for data_with_idx in req.data.into_iter() { - let proto::DataItemWithIdx { idx, data } = data_with_idx; - let data = data.unwrap(); + for (idx, data) in req.data.into_iter().enumerate() { let serialize = data.encode_persist(); - tracing::debug!( - "writing data part uid({:?}) idx({}) item({})", - req.unique_id, - idx, - data.to_string() - ); if let Err(err) = kv_store_engine.set( KeyTypeDataSetItem { uid: req.unique_id.as_ref(), //req.unique_id.clone(), @@ -785,19 +802,20 @@ impl DataGeneral { // // TODO 读取数据的时候先看看缓存有没有,如果没有再读数据源,如果有从缓存里面拿,需要校验 version // if !delete { // let mut cached_items = HashMap::new(); + // // 遍历需要获取的索引 // for idx in WantIdxIter::new(&ty) { - // let cache_list = if meta.cache_mode_visitor(idx).is_time_auto() { - // self.auto_cache.clone() + // let cache_key = (unique_id.clone(), idx); + // // 根据缓存模式选择缓存源 + // let cached = if meta.cache_mode_visitor(idx).is_time_auto() { + // self.auto_cache.get(&cache_key) // } else if meta.cache_mode_visitor(idx).is_time_forever() { - // self.forever_cache.clone() + // self.forever_cache.get(&cache_key) // } else { // None // }; - // if cache_list.is_none() { - // continue; - // } + // let Some(cached) = cached else { + // }; // // 从缓存中获取数据 - // let cache_key = (unique_id.clone(), idx); // let cached_value = cache_list.get(&cache_key); // // 如果找到缓存且版本匹配 // if let Some((cached_version, cached_item)) = cached_value { @@ -827,8 +845,8 @@ impl DataGeneral { // return Ok((meta, cached_items)); // } // } + // TODO 如果缓存里只有一部分或者没有,则需要从数据源读取,并且要在数据源读取时判断是不是已经在缓存里找到了 - // 如果缓存里没有,则需要从数据源读取 let mut cache: Vec = Vec::new(); for _ in 0..meta.data_item_cnt() { match &ty { @@ -1180,11 +1198,8 @@ impl DataGeneral { let mut write_source_data_tasks = vec![]; // write the data split to kv - for (dataitem_idx, (one_data_splits, one_data_item)) in version_schedule_resp - .split - .into_iter() - .zip(datas) - .enumerate() + for (one_data_splits, one_data_item) in + version_schedule_resp.split.into_iter().zip(datas) { // let mut last_node_begin: Option<(NodeID, usize)> = None; fn flush_the_data( @@ -1196,7 +1211,6 @@ impl DataGeneral { one_data_item: &proto::DataItem, nodeid: NodeID, offset: usize, - dataitem_idx: usize, write_source_data_tasks: &mut Vec>>, ) { let log_tag = log_tag.to_owned(); @@ -1210,10 +1224,7 @@ impl DataGeneral { let req = WriteOneDataRequest { unique_id, version, - data: vec![proto::DataItemWithIdx { - idx: dataitem_idx as u32, - data: Some(one_data_item_split), - }], + data: vec![one_data_item_split], }; tracing::debug!( "[{}] write_data flushing, target node: {}, `WriteOneDataRequest` msg_id: {}", @@ -1238,7 +1249,6 @@ impl DataGeneral { &one_data_item, split.node_id, split.data_offset as usize, - dataitem_idx, &mut write_source_data_tasks, ); } @@ -1371,8 +1381,6 @@ pub struct DataSetMetaV1 { pub synced_nodes: HashSet, } -pub type CacheMode = u16; - /// the data's all in one meta /// /// attention: new from `DataSetMetaBuilder` @@ -1383,7 +1391,7 @@ pub struct DataSetMetaV2 { // unique_id: Vec, api_version: u8, pub version: u64, - pub cache_mode: Vec, + pub cache_mode: Vec, /// the data splits for each data item, the index is the data item index pub datas_splits: Vec, } @@ -1615,21 +1623,6 @@ impl DataSetMetaBuilder { self } - pub fn set_cache_mode(&mut self, idx: DataItemIdx, mode: u16) -> &mut Self { - self.building.as_mut().unwrap().cache_mode[idx as usize] = mode; - self - } - - pub fn set_cache_mode_for_all(&mut self, mode: Vec) -> &mut Self { - self.building.as_mut().unwrap().cache_mode = mode; - assert_eq!( - self.building.as_mut().unwrap().cache_mode.len(), - self.building.as_mut().unwrap().datas_splits.len(), - "cache mode len must be equal to data splits len" - ); - self - } - pub fn build(&mut self) -> DataSetMetaV2 { self.building.take().unwrap() } @@ -1655,64 +1648,62 @@ impl DataSetMetaBuilder { // } // } -mod test { - #[test] - fn test_option_and_vec_serialization_size() { - // 定义一个具体的值 - let value: i32 = 42; - - // 创建 Option 类型的变量 - let some_value: Option = Some(value); - let none_value: Option = None; - - // 创建 Vec 类型的变量 - let empty_vec: Vec = Vec::new(); - let single_element_vec: Vec = vec![value]; - - let some_empty_vec: Option> = Some(vec![]); - let some_one_vec: Option> = Some(vec![value]); - - // 序列化 - let serialized_some = bincode::serialize(&some_value).unwrap(); - let serialized_none = bincode::serialize(&none_value).unwrap(); - let serialized_empty_vec = bincode::serialize(&empty_vec).unwrap(); - let serialized_single_element_vec = bincode::serialize(&single_element_vec).unwrap(); - let serialized_some_empty_vec = bincode::serialize(&some_empty_vec).unwrap(); - let serialized_some_one_vec = bincode::serialize(&some_one_vec).unwrap(); - - // 获取序列化后的字节大小 - let size_some = serialized_some.len(); - let size_none = serialized_none.len(); - let size_empty_vec = serialized_empty_vec.len(); - let size_single_element_vec = serialized_single_element_vec.len(); - let size_some_empty_vec = serialized_some_empty_vec.len(); - let size_some_one_vec = serialized_some_one_vec.len(); - - // 打印结果 - println!("Size of serialized Some(42): {}", size_some); - println!("Size of serialized None: {}", size_none); - println!("Size of serialized empty Vec: {}", size_empty_vec); - println!( - "Size of serialized Vec with one element (42): {}", - size_single_element_vec - ); - println!( - "Size of serialized Some(empty Vec): {}", - size_some_empty_vec - ); - println!( - "Size of serialized Some(one element Vec): {}", - size_some_one_vec - ); - - // 比较大小 - assert!( - size_some > size_none, - "Expected serialized Some to be larger than serialized None" - ); - assert!( - size_single_element_vec > size_empty_vec, - "Expected serialized Vec with one element to be larger than serialized empty Vec" - ); - } +#[test] +fn test_option_and_vec_serialization_size() { + // 定义一个具体的值 + let value: i32 = 42; + + // 创建 Option 类型的变量 + let some_value: Option = Some(value); + let none_value: Option = None; + + // 创建 Vec 类型的变量 + let empty_vec: Vec = Vec::new(); + let single_element_vec: Vec = vec![value]; + + let some_empty_vec: Option> = Some(vec![]); + let some_one_vec: Option> = Some(vec![value]); + + // 序列化 + let serialized_some = bincode::serialize(&some_value).unwrap(); + let serialized_none = bincode::serialize(&none_value).unwrap(); + let serialized_empty_vec = bincode::serialize(&empty_vec).unwrap(); + let serialized_single_element_vec = bincode::serialize(&single_element_vec).unwrap(); + let serialized_some_empty_vec = bincode::serialize(&some_empty_vec).unwrap(); + let serialized_some_one_vec = bincode::serialize(&some_one_vec).unwrap(); + + // 获取序列化后的字节大小 + let size_some = serialized_some.len(); + let size_none = serialized_none.len(); + let size_empty_vec = serialized_empty_vec.len(); + let size_single_element_vec = serialized_single_element_vec.len(); + let size_some_empty_vec = serialized_some_empty_vec.len(); + let size_some_one_vec = serialized_some_one_vec.len(); + + // 打印结果 + println!("Size of serialized Some(42): {}", size_some); + println!("Size of serialized None: {}", size_none); + println!("Size of serialized empty Vec: {}", size_empty_vec); + println!( + "Size of serialized Vec with one element (42): {}", + size_single_element_vec + ); + println!( + "Size of serialized Some(empty Vec): {}", + size_some_empty_vec + ); + println!( + "Size of serialized Some(one element Vec): {}", + size_some_one_vec + ); + + // 比较大小 + assert!( + size_some > size_none, + "Expected serialized Some to be larger than serialized None" + ); + assert!( + size_single_element_vec > size_empty_vec, + "Expected serialized Vec with one element to be larger than serialized empty Vec" + ); } diff --git a/src/main/src/general/m_os/mod.rs b/src/main/src/general/m_os/mod.rs index 9e86d35..1500871 100644 --- a/src/main/src/general/m_os/mod.rs +++ b/src/main/src/general/m_os/mod.rs @@ -111,11 +111,6 @@ impl OperatingSystem { pub fn app_path(&self, app: &str) -> PathBuf { self.view.appmeta_manager().fs_layer.concat_app_dir(app) } - - pub fn app_rootdir(&self) -> PathBuf { - self.view.appmeta_manager().fs_layer.app_dir() - } - pub fn start_process(&self, p: OsProcessType) -> process::Child { let (mut binding, log_file) = match p { OsProcessType::JavaApp(app) => { diff --git a/src/main/src/general/network/msg_pack.rs b/src/main/src/general/network/msg_pack.rs index 6361b60..81e7d11 100644 --- a/src/main/src/general/network/msg_pack.rs +++ b/src/main/src/general/network/msg_pack.rs @@ -104,21 +104,7 @@ define_msg_ids!( pack.context.is_some() }), (proto::DataVersionScheduleResponse, _pack, { true }), - (proto::WriteOneDataRequest, pack, { - if pack.data.is_empty() { - return false; - } - for data_with_idx in &pack.data { - let proto::DataItemWithIdx { data, .. } = data_with_idx; - if data.is_none() { - return false; - } - if data.as_ref().unwrap().data_item_dispatch.is_none() { - return false; - } - } - true - }), + (proto::WriteOneDataRequest, _pack, { true }), (proto::WriteOneDataResponse, _pack, { true }), (proto::DataMetaUpdateRequest, _pack, { true }), (proto::DataMetaUpdateResponse, _pack, { true }), diff --git a/src/main/src/general/network/proto_ext.rs b/src/main/src/general/network/proto_ext.rs index 7ddfe70..a6166da 100644 --- a/src/main/src/general/network/proto_ext.rs +++ b/src/main/src/general/network/proto_ext.rs @@ -1,4 +1,4 @@ -use crate::general::data::m_dist_lock::DistLockOpe; +use crate::{general::data::m_dist_lock::DistLockOpe}; use super::proto::{self, kv::KvResponse, FileData}; @@ -11,7 +11,6 @@ pub trait ProtoExtDataItem { fn new_raw_bytes(rawbytes: impl Into>) -> Self; fn as_raw_bytes<'a>(&'a self) -> Option<&'a [u8]>; fn new_file_data(filepath: impl AsRef, is_dir: bool) -> Self; - fn as_file_data(&self) -> Option<&proto::FileData>; } impl ProtoExtDataItem for proto::DataItem { @@ -84,13 +83,6 @@ impl ProtoExtDataItem for proto::DataItem { } } } - - fn as_file_data(&self) -> Option<&proto::FileData> { - match &self.data_item_dispatch { - Some(proto::data_item::DataItemDispatch::File(file_data)) => Some(file_data), - _ => None, - } - } } impl AsRef<[u8]> for proto::DataItem { @@ -210,7 +202,7 @@ impl DataItemExt for proto::DataItem { }), 1 => proto::data_item::DataItemDispatch::RawBytes(data[1..].to_owned()), _ => { - panic!("unknown data item type id: {}", data[0]) + panic!("unknown data type") } }; Self { diff --git a/src/main/src/general/network/proto_src/data.proto b/src/main/src/general/network/proto_src/data.proto index cb290b2..90310b1 100644 --- a/src/main/src/general/network/proto_src/data.proto +++ b/src/main/src/general/network/proto_src/data.proto @@ -78,9 +78,9 @@ message DataVersionScheduleResponse { // required // split of each data part // DataCachePlan cache_plan = 2; + repeated uint32 cache_mode=2; repeated DataSplit split = 3; - repeated uint32 cache_nodes=4; } message DataMetaUpdateRequest{ @@ -140,15 +140,10 @@ message OneDataMeta{ string type=2; } -message DataItemWithIdx{ - uint32 idx=1; - DataItem data=2; -} - message WriteOneDataRequest { bytes unique_id = 1; uint64 version = 2; - repeated DataItemWithIdx data = 3; + repeated DataItem data = 3; } message WriteOneDataResponse { diff --git a/src/main/src/master/app/fddg.rs b/src/main/src/master/app/fddg.rs deleted file mode 100644 index 3bbad97..0000000 --- a/src/main/src/master/app/fddg.rs +++ /dev/null @@ -1,89 +0,0 @@ -use crate::new_map; -use crate::util::container::sync_trie::SyncedTrie; -use crate::{ - general::{ - app::{AppType, FnMeta}, - data::{self, m_data_general::DataItemIdx}, - network::proto, - }, - result::WSResult, -}; -use dashmap::DashMap; -use std::collections::HashMap; -use std::collections::HashSet; - -// function data dependency graph -// - need update when app uploaded -// - to find data binded functions -// - co-scheduling data & functions -pub struct FDDGMgmt { - // data_unique_id prefix -> app name -> (app_type, function names -> fn_meta) - prefix_key_to_functions: SyncedTrie)>>, -} - -// https://fvd360f8oos.feishu.cn/wiki/GGUnw0H1diVoHSkgm3vcMhtbnjI#share-QElHdn6dSoKVBUx5UssccxAZnnd -pub enum FuncTriggerType { - DataWrite, - DataNew, - DataDelete, -} - -impl FDDGMgmt { - pub fn new() -> Self { - Self { - prefix_key_to_functions: SyncedTrie::new(), - } - } - - // return app_name -> (apptype, fn_name -> fn_meta) - pub fn get_binded_funcs( - &self, - _data_unique_id: &str, - _ope: FuncTriggerType, - ) -> HashMap)> { - let mut binded_funcs = HashMap::new(); - let binded_matchers = self.prefix_key_to_functions.match_partial(_data_unique_id); - for matcher in binded_matchers { - let node = matcher.1.read(); - for (app_name, (app_type, _fn_names)) in node.iter() { - let _ = binded_funcs - .entry(app_name.to_string()) - .or_insert((*app_type, HashMap::new())); - } - } - binded_funcs - } - - pub fn add_fn_trigger( - &self, - (app_name, app_type): (&str, AppType), - (fn_name, fn_meta): (&str, &FnMeta), - ) -> WSResult<()> { - if let Some(data_accesses) = fn_meta.data_accesses.as_ref() { - for (key_pattern, data_access) in data_accesses { - let Some(_event) = data_access.event.as_ref() else { - continue; - }; - let node = self - .prefix_key_to_functions - .search_or_insert(&key_pattern.0, || { - new_map! (HashMap { - app_name.to_string() => { - (app_type, new_map! (HashMap { - fn_name.to_string() => fn_meta.clone(), - })) - } - }) - }); - let mut node = node.write(); - let _ = node - .entry(app_name.to_string()) - .and_modify(|(_app_type, fn_names)| { - let _ = fn_names.insert(fn_name.to_string(), fn_meta.clone()); - }) - .or_insert_with(|| panic!("app_name not found, should be created when search")); - } - } - Ok(()) - } -} diff --git a/src/main/src/master/app/m_app_master.rs b/src/main/src/master/app/m_app_master.rs deleted file mode 100644 index 6c6399b..0000000 --- a/src/main/src/master/app/m_app_master.rs +++ /dev/null @@ -1,57 +0,0 @@ -use crate::general::app::AppMetaManager; -use crate::logical_module_view_impl; -use crate::master::app::fddg::FDDGMgmt; -use crate::result::WSResult; -use crate::sys::{LogicalModule, LogicalModuleNewArgs, LogicalModulesRef}; -use crate::util::JoinHandleWrapper; -use async_trait::async_trait; -use ws_derive::LogicalModule; - -logical_module_view_impl!(MasterAppMgmtView); -// access general app -logical_module_view_impl!(MasterAppMgmtView, appmeta_manager, AppMetaManager); - -#[derive(LogicalModule)] -pub struct MasterAppMgmt { - view: MasterAppMgmtView, - pub fddg: FDDGMgmt, -} - -#[async_trait] -impl LogicalModule for MasterAppMgmt { - fn inner_new(args: LogicalModuleNewArgs) -> Self - where - Self: Sized, - { - Self { - view: MasterAppMgmtView::new(args.logical_modules_ref.clone()), - fddg: FDDGMgmt::new(), - } - } - - async fn init(&self) -> WSResult<()> { - self.load_apps().await?; - Ok(()) - } - - async fn start(&self) -> WSResult> { - Ok(vec![]) - } -} - -impl MasterAppMgmt { - async fn load_apps(&self) -> WSResult<()> { - // load app triggers to fddg - // - for each native apps - for (app_name, app_meta) in &self.view.appmeta_manager().native_apps { - for (fn_name, fn_meta) in app_meta.fns.iter() { - self.fddg - .add_fn_trigger((&app_name, app_meta.app_type), (&fn_name, &fn_meta))?; - } - } - - // - for each existing apps - - Ok(()) - } -} diff --git a/src/main/src/master/app/mod.rs b/src/main/src/master/app/mod.rs deleted file mode 100644 index 0a559e8..0000000 --- a/src/main/src/master/app/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod fddg; -pub mod m_app_master; diff --git a/src/main/src/master/data/mod.rs b/src/main/src/master/data/mod.rs deleted file mode 100644 index 5ee147e..0000000 --- a/src/main/src/master/data/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod m_data_master; -pub mod m_master_kv; diff --git a/src/main/src/master/data/m_data_master.rs b/src/main/src/master/m_data_master.rs similarity index 51% rename from src/main/src/master/data/m_data_master.rs rename to src/main/src/master/m_data_master.rs index d6ec3ba..9b60388 100644 --- a/src/main/src/master/data/m_data_master.rs +++ b/src/main/src/master/m_data_master.rs @@ -1,22 +1,20 @@ -use crate::general::app::m_executor::Executor; -use crate::general::app::DataEventTrigger; +use std::collections::HashSet; +use std::time::Duration; + +use crate::general::data::{ + m_data_general::{ + DataGeneral, DataItemIdx, DataSetMetaBuilder, DataSplit, EachNodeSplit, + }, + m_kv_store_engine::{KeyType, KeyTypeDataSetMeta, KvAdditionalConf, KvStoreEngine}, +}; + use crate::general::network::m_p2p::{P2PModule, RPCCaller, RPCHandler, RPCResponsor}; use crate::general::network::proto::{ self, DataVersionScheduleRequest, DataVersionScheduleResponse, }; use crate::result::{WSResult, WSResultExt}; -use crate::sys::{LogicalModulesRef, NodeID}; +use crate::sys::{LogicalModulesRef}; use crate::util::JoinHandleWrapper; -use crate::{ - general::data::{ - m_data_general::{ - CacheMode, DataGeneral, DataItemIdx, DataSetMeta, DataSetMetaBuilder, DataSplit, - EachNodeSplit, - }, - m_kv_store_engine::{KeyType, KeyTypeDataSetMeta, KvAdditionalConf, KvStoreEngine}, - }, - master::app::{fddg::FuncTriggerType, m_app_master::MasterAppMgmt}, -}; use crate::{ general::network::http_handler::HttpHandler, logical_module_view_impl, @@ -24,18 +22,14 @@ use crate::{ }; use async_trait::async_trait; use rand::{thread_rng, Rng}; -use std::collections::HashSet; -use std::time::Duration; use ws_derive::LogicalModule; logical_module_view_impl!(DataMasterView); logical_module_view_impl!(DataMasterView, data_master, Option); logical_module_view_impl!(DataMasterView, data_general, DataGeneral); -logical_module_view_impl!(DataMasterView, app_master, Option); logical_module_view_impl!(DataMasterView, p2p, P2PModule); logical_module_view_impl!(DataMasterView, http_handler, Box); logical_module_view_impl!(DataMasterView, kv_store_engine, KvStoreEngine); -logical_module_view_impl!(DataMasterView, executor, Executor); #[derive(LogicalModule)] pub struct DataMaster { @@ -81,147 +75,107 @@ impl LogicalModule for DataMaster { } impl DataMaster { - // return cache mode, splits, cache nodes - fn plan_for_write_data( - &self, - data_unique_id: &[u8], - _context: &proto::DataScheduleContext, - func_trigger_type: FuncTriggerType, - ) -> WSResult<(Vec, Vec, Vec)> { - fn set_data_cache_mode_for_meta( + // fn set_data_cache_mode_default(builder: &mut DataSetMetaBuilder) { + // if builder.building. + // // default cache mode + // let _ = builder + // .cache_mode_map_common_kv() + // .cache_mode_pos_auto() + // .cache_mode_time_auto(); + // } + fn set_data_cache_mode_for_meta( + req: &DataVersionScheduleRequest, + builder: &mut DataSetMetaBuilder, + ) { + fn default_set_data_cache_mode_for_meta( req: &DataVersionScheduleRequest, builder: &mut DataSetMetaBuilder, ) { - fn default_set_data_cache_mode_for_meta( - req: &DataVersionScheduleRequest, - builder: &mut DataSetMetaBuilder, - ) { - // for each item(by split length), set cache mode - for idx in 0..req.context.as_ref().unwrap().each_data_sz_bytes.len() { - let _ = builder - .cache_mode_time_forever(idx as DataItemIdx) - .cache_mode_pos_allnode(idx as DataItemIdx) - .cache_mode_map_common_kv(idx as DataItemIdx); - } - } - if let Some(context) = req.context.as_ref() { - match context.ope_role.as_ref().unwrap() { - proto::data_schedule_context::OpeRole::UploadApp(_data_ope_role_upload_app) => { - let _ = builder - // 0 is app meta data, map to common kv - .cache_mode_time_forever(0) - .cache_mode_pos_allnode(0) - .cache_mode_map_common_kv(0) - // 1 is app package data, map to file - .cache_mode_time_forever(1) - .cache_mode_pos_allnode(1) - .cache_mode_map_file(1); - } - proto::data_schedule_context::OpeRole::FuncCall(_data_ope_role_func_call) => { - default_set_data_cache_mode_for_meta(req, builder); - } - } - } else { - tracing::warn!( - "context is None, use default cache mode, maybe we need to suitable for this case" - ); - default_set_data_cache_mode_for_meta(req, builder); + // for each item(by split length), set cache mode + for idx in 0..req.context.as_ref().unwrap().each_data_sz_bytes.len() { + let _ = builder + .cache_mode_time_forever(idx as DataItemIdx) + .cache_mode_pos_allnode(idx as DataItemIdx) + .cache_mode_map_common_kv(idx as DataItemIdx); } } - - fn decide_each_data_split( - datamaster: &DataMaster, - ctx: &proto::DataScheduleContext, - ) -> Vec { - // let DEFAULT_SPLIT_SIZE = 4 * 1024 * 1024; - let mut datasplits = vec![]; - let p2p = datamaster.view.p2p(); - - // simply select a node - let node_id = { - let rand_node_idx = thread_rng().gen_range(0..p2p.nodes_config.node_cnt()); - let mut iter = p2p.nodes_config.all_nodes_iter(); - for _ in 0..rand_node_idx { - let _ = iter.next(); + if let Some(context) = req.context.as_ref() { + match context.ope_role.as_ref().unwrap() { + proto::data_schedule_context::OpeRole::UploadApp(_data_ope_role_upload_app) => { + let _ = builder + // 0 is app meta data, map to common kv + .cache_mode_time_forever(0) + .cache_mode_pos_allnode(0) + .cache_mode_map_common_kv(0) + // 1 is app package data, map to file + .cache_mode_time_forever(1) + .cache_mode_pos_allnode(1) + .cache_mode_map_file(1); + } + proto::data_schedule_context::OpeRole::FuncCall(_data_ope_role_func_call) => { + default_set_data_cache_mode_for_meta(req, builder); } - *iter - .next() - .expect("node count doesn't match all_nodes_iter") - .0 - }; - - for sz in ctx.each_data_sz_bytes.iter() { - datasplits.push(DataSplit { - splits: vec![EachNodeSplit { - node_id, - data_offset: 0, - data_size: *sz, - }], - }); } - tracing::debug!("decide_each_data_split res: {:?}", datasplits); - datasplits + } else { + tracing::warn!( + "context is None, use default cache mode, maybe we need to suitable for this case" + ); + default_set_data_cache_mode_for_meta(req, builder); } + } - if let Ok(data_unique_id_str) = std::str::from_utf8(data_unique_id) { - // get data binded functions - // https://fvd360f8oos.feishu.cn/wiki/Zp19wf9sdilwVKk5PlKcGy0CnBd - // app_name -> (apptype, fn_name -> fn_meta) - let binded_funcs: std::collections::HashMap< - String, - ( - crate::general::app::AppType, - std::collections::HashMap, - ), - > = self - .view - .app_master() - .fddg - .get_binded_funcs(data_unique_id_str, func_trigger_type); + // fn decide_cache_nodes( + // _ctx: &proto::DataScheduleContext, + // each_item_cache_mode: CacheModeVisitor, + // ) -> Vec { + // if cache_mode.is_time_auto() { + // // for time auto, we just do the cache when data is get + // return vec![]; + // } else if cache_mode.is_time_forever() { + // if cache_mode.is_pos_auto() { + // // for pos auto, we just do the cache when data is get + // // simple strategy temporarily + // return vec![]; + // } else if cache_mode.is_pos_specnode() { + // return vec![]; + // } else { + // // all node just return empty, can be just refered from cache_mode + // // no need to redundant info in cache nodes + // return vec![]; + // } + // } else { + // panic!("not supported time mode {:?}", cache_mode) + // } + // } - // filter functions by condition function - for (appname, (app_type, fn_names)) in binded_funcs.iter_mut() { - fn_names.retain(|fn_name, fn_meta| { - if let Some(data_accesses) = fn_meta.data_accesses.as_ref() { - // find data access discription - if let Some((key_pattern, data_access)) = - data_accesses.iter().find(|(key_pattern, data_access)| { - if let Some(event) = data_access.event.as_ref() { - match event { - DataEventTrigger::WriteWithCondition { condition } - | DataEventTrigger::NewWithCondition { condition } => false, - DataEventTrigger::Write | DataEventTrigger::New => false, - } - } else { - false - } - }) - { - // with condition, call the condition function - let condition_func = match data_access.event.as_ref().unwrap() { - DataEventTrigger::WriteWithCondition { condition } - | DataEventTrigger::NewWithCondition { condition } => condition, - _ => panic!("logical error, find condition must be with condition, so current code is wrong"), - }; + fn decide_each_data_split(&self, ctx: &proto::DataScheduleContext) -> Vec { + // let DEFAULT_SPLIT_SIZE = 4 * 1024 * 1024; + let mut datasplits = vec![]; - // call the condition function - self.view.executor().handle_local_call(resp, req) - - } else { - // without condition - true - } - } else { - true - } - }); + // simply select a node + let node_id = { + let rand_node_idx = thread_rng().gen_range(0..self.view.p2p().nodes_config.node_cnt()); + let mut iter = self.view.p2p().nodes_config.all_nodes_iter(); + for _ in 0..rand_node_idx { + let _ = iter.next(); } + *iter + .next() + .expect("node count doesn't match all_nodes_iter") + .0 + }; - // we need master to schedule functions to get func target nodes - // then we make the splits to prefer the target nodes and write cache when wrting splits + for sz in ctx.each_data_sz_bytes.iter() { + datasplits.push(DataSplit { + splits: vec![EachNodeSplit { + node_id, + data_offset: 0, + data_size: *sz, + }], + }); } - - Ok((DataSetMetaBuilder::new().build().cache_mode, vec![], vec![])) + tracing::debug!("decide_each_data_split res: {:?}", datasplits); + datasplits } /// Check the dataset sync flow here: @@ -232,52 +186,51 @@ impl DataMaster { responsor: RPCResponsor, req: DataVersionScheduleRequest, ) -> WSResult<()> { + // ## check version and decide data split let kv_store_engine = self.view.kv_store_engine(); let ctx = req .context .as_ref() .expect("context is required for DataScheduleContext"); - let metakey = KeyTypeDataSetMeta(&req.unique_id); - let metakey_bytes = metakey.make_key(); tracing::debug!("check version for data({:?})", req.unique_id); + // update the dataset's version + let new_meta = { + let key_bytes = KeyTypeDataSetMeta(&req.unique_id).make_key(); - // now we expand the meta - let (new_meta, cache_nodes) = { - let update_version_lock = kv_store_engine.with_rwlock(&metakey_bytes); + let update_version_lock = kv_store_engine.with_rwlock(&key_bytes); let _guard = update_version_lock.write(); - let dataset_meta = kv_store_engine.get(&metakey, true, KvAdditionalConf::default()); - - // the we will make the split plan and cache plan - // then expand the meta - // this process will fail if other write updated the unique id - let (item_cache_modes, new_splits, cache_nodes) = - self.plan_for_write_data(&req.unique_id, ctx, FuncTriggerType::DataWrite)?; - - // let takeonce=Some((new_meta,new_)) - let set_meta = if let Some((_kv_version, set_meta)) = dataset_meta { - tracing::debug!("update dataset meta for data({:?})", req.unique_id); - let version = set_meta.version; - let mut builder = DataSetMetaBuilder::from(set_meta); - // version - let _ = builder.version(version + 1); - // data splits bf cache mod - let _ = builder.set_data_splits(new_splits); - // cache mode - let _ = builder.set_cache_mode_for_all(item_cache_modes); - builder.build() - } else { - tracing::debug!("new dataset meta for data({:?})", req.unique_id); - let mut builder = DataSetMetaBuilder::new(); - // version - let _ = builder.version(1); - // data splits bf cache mod - let _ = builder.set_data_splits(new_splits); - // cache mode - let _ = builder.set_cache_mode_for_all(item_cache_modes); - builder.build() - }; + let dataset_meta = kv_store_engine.get( + &KeyTypeDataSetMeta(&req.unique_id), + true, + KvAdditionalConf::default(), + ); + let set_meta = dataset_meta.map_or_else( + || { + tracing::debug!("new dataset meta for data({:?})", req.unique_id); + let mut builder = DataSetMetaBuilder::new(); + // version + let _ = builder.version(1); + // data splits bf cache mod + let _ = builder.set_data_splits(self.decide_each_data_split(ctx)); + // cache mode + Self::set_data_cache_mode_for_meta(&req, &mut builder); + builder.build() + }, + |(_kv_version, set_meta)| { + tracing::debug!("update dataset meta for data({:?})", req.unique_id); + let version = set_meta.version; + let mut builder = DataSetMetaBuilder::from(set_meta); + // version + let _ = builder.version(version + 1); + // data splits bf cache mod + let _ = builder.set_data_splits(self.decide_each_data_split(ctx)); + // cache mode + Self::set_data_cache_mode_for_meta(&req, &mut builder); + builder.build() + }, + ); // ## update version local tracing::debug!( "update version local for data({:?}), the updated meta is {:?}", @@ -288,7 +241,7 @@ impl DataMaster { .set(KeyTypeDataSetMeta(&req.unique_id), &set_meta, true) .unwrap(); kv_store_engine.flush(); - (set_meta, cache_nodes) + set_meta }; // update version peers @@ -377,7 +330,6 @@ impl DataMaster { .into_iter() .map(|v| v.into()) .collect(), - cache_nodes, }) .await .todo_handle(); diff --git a/src/main/src/master/data/m_master_kv.rs b/src/main/src/master/m_master_kv.rs similarity index 100% rename from src/main/src/master/data/m_master_kv.rs rename to src/main/src/master/m_master_kv.rs diff --git a/src/main/src/master/mod.rs b/src/main/src/master/mod.rs index 0a7e261..04f2e5c 100644 --- a/src/main/src/master/mod.rs +++ b/src/main/src/master/mod.rs @@ -1,5 +1,5 @@ -pub mod app; -pub mod data; +pub mod m_data_master; pub mod m_http_handler; pub mod m_master; +pub mod m_master_kv; pub mod m_metric_observor; diff --git a/src/main/src/modules_global_bridge/mod.rs b/src/main/src/modules_global_bridge/mod.rs index c7618fa..1d82081 100644 --- a/src/main/src/modules_global_bridge/mod.rs +++ b/src/main/src/modules_global_bridge/mod.rs @@ -1,5 +1,4 @@ -use crate::sys::LogicalModules; -use crate::sys::LogicalModulesRef; +use crate::sys::{LogicalModules, LogicalModulesRef}; pub mod process_func; diff --git a/src/main/src/modules_global_bridge/process_func.rs b/src/main/src/modules_global_bridge/process_func.rs index dfcbdeb..77e20d3 100644 --- a/src/main/src/modules_global_bridge/process_func.rs +++ b/src/main/src/modules_global_bridge/process_func.rs @@ -1,14 +1,15 @@ -use crate::general::app::app_shared::process_rpc::ProcessRpc; -use crate::general::app::instance::m_instance_manager::InstanceManager; -use crate::general::app::AppMetaManager; +use crate::{ + general::app::AppMetaManager, + worker::func::{m_instance_manager::InstanceManager, shared::process_rpc::ProcessRpc}, +}; pub trait ModulesGlobalBrigeInstanceManager: Sized + 'static { - unsafe fn global_m_instance_manager() -> &'static InstanceManager; + unsafe fn global_m_instance_manager() -> Option<&'static InstanceManager>; } impl ModulesGlobalBrigeInstanceManager for ProcessRpc { - unsafe fn global_m_instance_manager() -> &'static InstanceManager { - super::modules().instance_manager() + unsafe fn global_m_instance_manager() -> Option<&'static InstanceManager> { + super::modules().instance_manager.as_ref() } } diff --git a/src/main/src/result.rs b/src/main/src/result.rs index 2d6fbb7..ced7cdd 100644 --- a/src/main/src/result.rs +++ b/src/main/src/result.rs @@ -163,10 +163,6 @@ pub enum WsFuncError { app: String, want: String, }, - CreateCracConfigFailed { - path: String, - err: std::io::Error, - }, InstanceJavaPidNotFound(String), InstanceProcessStartFailed(std::io::Error), InsranceVerifyFailed(String), @@ -227,10 +223,6 @@ pub enum WsDataError { unique_id: Vec, splitidx: DataSplitIdx, }, - SplitDataItemNotFileData { - unique_id: Vec, - splitidx: DataSplitIdx, - }, SplitLenMismatch { unique_id: Vec, splitidx: DataSplitIdx, diff --git a/src/main/src/sys.rs b/src/main/src/sys.rs index 4781980..f7c1e37 100644 --- a/src/main/src/sys.rs +++ b/src/main/src/sys.rs @@ -1,6 +1,3 @@ -use crate::general::app::app_owned::wasm_host_funcs; -use crate::general::app::instance::m_instance_manager::InstanceManager; -use crate::general::app::m_executor::Executor; use crate::{ config::NodesConfig, general::{ @@ -12,12 +9,14 @@ use crate::{ m_os::OperatingSystem, network::{http_handler::HttpHandlerDispatch, m_p2p::P2PModule}, }, - master::{ - app::m_app_master::MasterAppMgmt, data::m_data_master::DataMaster, m_master::Master, - m_metric_observor::MetricObservor, - }, + master::{m_data_master::DataMaster, m_master::Master, m_metric_observor::MetricObservor}, modules_global_bridge, util, - worker::{m_kv_user_client::KvUserClient, m_worker::WorkerCore}, + worker::{ + func::{m_instance_manager::InstanceManager, wasm_host_funcs}, + m_executor::Executor, + m_kv_user_client::KvUserClient, + m_worker::WorkerCore, + }, }; use crate::{result::WSResult, util::JoinHandleWrapper}; use async_trait::async_trait; @@ -331,11 +330,7 @@ start_modules!( http_handler, HttpHandlerDispatch, dist_lock, - DistLock, - instance_manager, - InstanceManager, - executor, - Executor + DistLock ], [ metric_observor, @@ -347,9 +342,20 @@ start_modules!( // master_kv, // MasterKv, data_master, - DataMaster, - app_master, - MasterAppMgmt + DataMaster ], - [worker, WorkerCore, kv_user_client, KvUserClient] + [ + worker, + WorkerCore, + kv_user_client, + KvUserClient, + instance_manager, + InstanceManager, + // worker_http, + // WorkerHttpHandler, + // kv_storage, + // KvStorage, + executor, + Executor + ] ); diff --git a/src/main/src/util/mod.rs b/src/main/src/util.rs similarity index 99% rename from src/main/src/util/mod.rs rename to src/main/src/util.rs index 702b9f7..3aff9e6 100644 --- a/src/main/src/util/mod.rs +++ b/src/main/src/util.rs @@ -1,5 +1,3 @@ -pub mod container; - use std::{ fmt::Debug, future::Future, diff --git a/src/main/src/util/container/map.rs b/src/main/src/util/container/map.rs deleted file mode 100644 index 3c7bae0..0000000 --- a/src/main/src/util/container/map.rs +++ /dev/null @@ -1,14 +0,0 @@ -#[macro_export] -macro_rules! new_map { - // 匹配空映射 - ($map_type:ident { }) => { - $map_type::new() - }; - // 匹配一个或多个键值对 - ($map_type:ident { $($key:expr => $value:expr),+ $(,)? }) => {{ - let map = $map_type::from([ - $( ($key, $value), )+ - ]); - map - }}; -} diff --git a/src/main/src/util/container/mod.rs b/src/main/src/util/container/mod.rs deleted file mode 100644 index 1c9a676..0000000 --- a/src/main/src/util/container/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod map; -pub mod sync_trie; diff --git a/src/main/src/util/container/sync_trie.rs b/src/main/src/util/container/sync_trie.rs deleted file mode 100644 index a91fae0..0000000 --- a/src/main/src/util/container/sync_trie.rs +++ /dev/null @@ -1,275 +0,0 @@ -use parking_lot::{RwLock, RwLockReadGuard}; -use std::collections::HashMap; -use std::ops::{Deref, DerefMut}; -use std::sync::Arc; -use std::thread; -use std::time::Duration; - -pub struct TrieNode { - children: HashMap>>>, - payload: Option, // Payload to store additional data -} - -impl Deref for TrieNode { - type Target = T; - fn deref(&self) -> &Self::Target { - self.payload.as_ref().unwrap() - } -} - -impl DerefMut for TrieNode { - fn deref_mut(&mut self) -> &mut Self::Target { - self.payload.as_mut().unwrap() - } -} - -impl Default for TrieNode { - fn default() -> Self { - TrieNode { - children: HashMap::new(), - payload: None, - } - } -} - -pub struct SyncedTrie { - root: Arc>>, -} - -// current impl is grow only -impl SyncedTrie -where - T: Clone + Send + Sync + 'static, -{ - pub fn new() -> Self { - SyncedTrie { - root: Arc::new(RwLock::new(TrieNode::default())), - } - } - - // pub fn insert(&self, word: &str, payload: T) { - // let mut current_node = self.root.clone(); - // for ch in word.chars() { - // let mut node = current_node.write(); - // let child = node.children.entry(ch).or_default().clone(); - // drop(node); - // current_node = child; - // } - // let mut node = current_node.write(); - // node.payload = Some(payload); - // } - - // don't guarantee the return node still exists after the function returns - pub fn search_or_insert( - &self, - word: &str, - payload: impl FnOnce() -> T, - ) -> Arc>> { - let mut current_node = self.root.clone(); - for ch in word.chars() { - let mut node = current_node.write(); - let child = if let Some(child) = node.children.get(&ch) { - child.clone() - } else { - // Create intermediate nodes without payload - let new_inner_node = TrieNode::default(); - let new_child = Arc::new(RwLock::new(new_inner_node)); - let _ = node.children.insert(ch, new_child.clone()); - new_child - }; - drop(node); - current_node = child; - } - // Set payload only at the final node - let mut final_node = current_node.write(); - if final_node.payload.is_none() { - final_node.payload = Some(payload()); - } - drop(final_node); - current_node - } - - // return (matchlen, node) array that is the prefix of the word - pub fn match_partial(&self, word: &str) -> Vec<(usize, Arc>>)> { - let mut current_node = self.root.clone(); - let mut nodes = Vec::new(); - let mut len = 1; - for ch in word.chars() { - let node = current_node.write(); - let child = if let Some(child) = node.children.get(&ch) { - child.clone() - } else { - return nodes; - }; - drop(node); - current_node = child; - - if current_node.read().payload.is_some() { - nodes.push((len, current_node.clone())); - } - len += 1; - } - nodes - } - // pub fn search(&self, word: &str) -> Option>>> { - // let mut current_node = self.root.clone(); - // for ch in word.chars() { - // let node = current_node.write(); - // let child = if let Some(child) = node.children.get(&ch) { - // child.clone() - // } else { - // return None; - // }; - // drop(node); - // current_node = child; - // } - // Some(current_node) - // } -} - -/// commands to run: -/// cargo test --test test_basic_operations -- --nocapture -/// cargo test --test test_concurrent_insert -- --nocapture -/// cargo test --test test_concurrent_mixed_operations -- --nocapture -#[cfg(test)] -mod tests { - use super::*; - use std::thread; - - #[test] - fn test_basic_operations() { - let trie = SyncedTrie::new(); - let _ = trie.search_or_insert("test", || 42); - let nodes = trie.match_partial("test"); - assert_eq!(nodes.len(), 1); - assert_eq!(**nodes[0].1.read(), 42); - assert!(trie.match_partial("none").is_empty()); - } - - #[test] - fn test_concurrent_insert() { - let trie = Arc::new(SyncedTrie::new()); - let mut handles = vec![]; - - // 并发插入 - for i in 0..1000 { - let trie = trie.clone(); - let key = format!("key{}", i); - let value = i; - handles.push(thread::spawn(move || { - let _ = trie.search_or_insert(&key, || value); - })); - } - - // 等待所有插入完成 - for handle in handles { - handle.join().unwrap(); - } - - // 验证插入结果 - for i in 0..1000 { - let key = format!("key{}", i); - let node = trie.match_partial(&key); - assert_eq!(node.len(), 1); - assert_eq!(**node[0].1.read(), i); - } - } - - #[test] - fn test_concurrent_mixed_operations() { - let trie = Arc::new(SyncedTrie::new()); - let mut handles = vec![]; - - // 预先插入一些数据 - for i in 0..10 { - let _ = trie.search_or_insert(&format!("key{}", i), || i); - } - - // 混合读写操作 - for i in 0..1000 { - let trie = trie.clone(); - handles.push(thread::spawn(move || { - let key = format!("key{}", i % 10); - if i % 3 == 0 { - let _ = trie.search_or_insert(&key, || i); - } else { - let nodes = trie.match_partial(&key); - assert_eq!(nodes.len(), 1); - assert_eq!(**nodes[0].1.read(), i); - } - })); - } - - // 等待所有操作完成 - for handle in handles { - handle.join().unwrap(); - } - } - - #[test] - fn test_concurrent_prefix_operations() { - let trie = Arc::new(SyncedTrie::new()); - let mut handles = vec![]; - - // 测试相同前缀的并发操作 - for i in 0..100 { - let trie = trie.clone(); - handles.push(thread::spawn(move || { - let key = format!("prefix{}", i % 10); - let _ = trie.search_or_insert(&key, || i); - let nodes = trie.match_partial(&key); - assert_eq!(nodes.len(), 1); - assert_eq!(**nodes[0].1.read(), i); - })); - } - - // 等待所有操作完成 - for handle in handles { - handle.join().unwrap(); - } - } - - #[test] - fn test_match_partial() { - let trie = SyncedTrie::new(); - - // Insert some test data - let _ = trie.search_or_insert("test", || 1); - let _ = trie.search_or_insert("testing", || 2); - let _ = trie.search_or_insert("te", || 3); - - // Test partial matches - let matches = trie.match_partial("testing"); - assert_eq!(matches.len(), 1); // Should get the node for "testing" - assert_eq!(**matches.last().unwrap().1.read(), 2); - - // Test prefix - let matches = trie.match_partial("te"); - assert_eq!(matches.len(), 1); // Should get the node for "te" - assert_eq!(**matches.last().unwrap().1.read(), 3); - - // Test non-existent prefix - let matches = trie.match_partial("xyz"); - assert!(matches.is_empty()); - } - - #[test] - fn test_match_partial_empty_nodes() { - let trie = SyncedTrie::new(); - - // Insert a long word - let _ = trie.search_or_insert("hello", || 1); - - // Test that intermediate nodes don't have payloads - let matches = trie.match_partial("h"); - assert!(matches.is_empty()); // No payload at 'h' - - let matches = trie.match_partial("hel"); - assert!(matches.is_empty()); // No payload at 'hel' - - // But full match should work - let matches = trie.match_partial("hello"); - assert_eq!(matches.len(), 1); - assert_eq!(**matches.last().unwrap().1.read(), 1); - } -} diff --git a/src/main/src/worker/func/fn_event/kv_event.rs b/src/main/src/worker/func/fn_event/kv_event.rs new file mode 100644 index 0000000..d73fed6 --- /dev/null +++ b/src/main/src/worker/func/fn_event/kv_event.rs @@ -0,0 +1,155 @@ +// use std::time::Duration; + +// use crate::{ +// general::{ +// kv_interface::{KvInterface, KvOps, KvOptions}, +// network::proto::{self, kv::KvPairs}, +// }, +// util, +// worker::{ +// app_meta::{AppMetaManager, KeyPattern, KvMeta}, +// executor::FunctionCtx, +// kv_user_client::KvUserClient, +// }, +// }; + +// async fn handle_set_event( +// kv: proto::kv::KvPair, +// kv_client: &KvUserClient, +// prev_fn_ctx: &FunctionCtx, +// triggers: Vec<(&String, &String, &KvMeta)>, +// ) { +// let worker = kv_client.view.worker(); +// let p2p = kv_client.view.instance_manager().view.p2p(); +// let schecmd = match worker +// .rpc_caller_make_sche +// .call( +// p2p, +// p2p.nodes_config.get_master_node(), +// proto::sche::MakeSchePlanReq { +// app_fns: triggers +// .iter() +// .map(|(app, func, _)| proto::sche::make_sche_plan_req::AppFn { +// app: app.to_string(), +// func: func.to_string(), +// }) +// .collect::>(), +// trigger_type: proto::sche::make_sche_plan_req::TriggerType::SetKv as i32, +// }, +// None, +// ) +// .await +// { +// Ok(schecmd) => schecmd, +// Err(e) => { +// tracing::error!("rpc call error: {:?}", e); +// return; +// } +// }; +// tracing::info!("got sche plan from master: {:?}", schecmd); +// // 1. sync set kv +// // TODO: use schecmd.data_target_node +// let key = kv.key.clone(); +// if let Ok(_res) = kv_client +// .call( +// proto::kv::KvRequests { +// requests: vec![proto::kv::KvRequest { +// op: Some(proto::kv::kv_request::Op::Set( +// proto::kv::kv_request::KvPutRequest { +// kvs: Some(KvPairs { kvs: vec![kv] }), +// }, +// )), +// }], +// }, +// KvOptions::new().with_spec_node(schecmd.data_target_node), +// ) +// .await +// { +// for (&target_node, (app, func, _)) in schecmd.sche_target_node.iter().zip(triggers) { +// let view = kv_client.view.clone(); +// let app = app.to_owned(); +// let func = func.to_owned(); +// let key = key.clone(); + +// let remote_sche_task = tokio::spawn(async move { +// let sub_task = view.executor().register_sub_task(); +// if let Err(err) = view +// .worker() +// .rpc_caller_distribute_task +// .call( +// view.p2p(), +// target_node, +// proto::sche::DistributeTaskReq { +// app, +// func, +// task_id: sub_task, +// trigger: Some(proto::sche::distribute_task_req::Trigger::KvKeySet(key)), +// }, +// // max wait time +// Some(Duration::from_secs(60 * 30)), +// ) +// .await +// { +// tracing::error!("sche sub fn failed with err: {}", err); +// } +// }); +// unsafe { +// util::unsafe_mut(prev_fn_ctx) +// .sub_waiters +// .push(remote_sche_task) +// } +// } +// } +// } + +// pub async fn check_and_handle_event( +// // record triggerd events +// fn_ctx: &FunctionCtx, +// // tigger pattern +// pattern: &KeyPattern, +// // kv operation +// kv_client: &KvUserClient, +// // app meta to get trigger infos +// app_meta_man: &AppMetaManager, +// // may trigger op +// op: KvOps, +// // kv to set +// kv: proto::kv::KvPair, +// ) { +// match op { +// KvOps::Get | KvOps::Delete => { +// tracing::warn!("kv event not support get/delete"); +// return; +// } +// KvOps::Set => {} +// } +// tracing::info!("event trigger kv ope matched"); +// let triggers = if let Some(triggers) = app_meta_man.get_pattern_triggers(&*pattern.0) { +// if triggers.is_empty() { +// return; +// } +// triggers +// } else { +// return; +// }; +// tracing::info!("kv pattern has potential triggers"); +// // collect must consume triggers +// let triggers: Vec<(&String, &String, &KvMeta)> = triggers +// .iter() +// .filter_map(|(app, func)| { +// let maytrigger_fnmeta = app_meta_man +// .get_app_meta(app) +// .unwrap() +// .get_fn_meta(func) +// .unwrap(); + +// if let Some(kvmeta) = maytrigger_fnmeta.find_will_trigger_kv_event(pattern, op) { +// Some((app, func, kvmeta)) +// } else { +// None +// } +// }) +// .collect::>(); +// tracing::info!("kv pattern has {} triggers", triggers.len()); +// handle_set_event(kv, kv_client, fn_ctx, triggers).await; +// } diff --git a/src/main/src/worker/func/fn_event/mod.rs b/src/main/src/worker/func/fn_event/mod.rs new file mode 100644 index 0000000..3da07d7 --- /dev/null +++ b/src/main/src/worker/func/fn_event/mod.rs @@ -0,0 +1 @@ +pub mod kv_event; diff --git a/src/main/src/general/app/instance/m_instance_manager.rs b/src/main/src/worker/func/m_instance_manager.rs similarity index 84% rename from src/main/src/general/app/instance/m_instance_manager.rs rename to src/main/src/worker/func/m_instance_manager.rs index da0ce4a..dcfabee 100644 --- a/src/main/src/general/app/instance/m_instance_manager.rs +++ b/src/main/src/worker/func/m_instance_manager.rs @@ -1,11 +1,7 @@ -use crate::general::app::app_native::NativeAppInstance; -use crate::general::app::app_owned::wasm; -use crate::general::app::app_shared::process_rpc::ProcessRpc; -use crate::general::app::app_shared::SharedInstance; -use crate::general::app::instance::Instance; +use super::shared::process_rpc::ProcessRpc; +use super::{owned::wasm, shared::SharedInstance, FnExeCtx, Instance, OwnedInstance}; use crate::general::m_os::OperatingSystem; use crate::general::network::rpc_model; -use crate::result::{WSError, WsFuncError}; use crate::sys::LogicalModulesRef; use crate::{ general::app::AppType, // worker::host_funcs, @@ -16,7 +12,6 @@ use crate::{ use crate::{logical_module_view_impl, util}; use async_trait::async_trait; use crossbeam_skiplist::SkipMap; -use dashmap::DashMap; use enum_as_inner::EnumAsInner; use std::{ collections::{HashMap, VecDeque}, @@ -28,12 +23,9 @@ use std::{ }, time::Duration, }; -use tokio::io::AsyncWriteExt; use tokio::sync::Notify; use ws_derive::LogicalModule; -use super::OwnedInstance; - pub struct LRUCache { capacity: usize, cache: HashMap, @@ -209,7 +201,7 @@ pub struct InstanceManager { pub app_instances: SkipMap, file_dir: PathBuf, /// instance addr 2 running function - pub instance_running_function: DashMap, + pub instance_running_function: parking_lot::RwLock>, pub next_instance_id: AtomicU64, pub view: InstanceManagerView, } @@ -239,41 +231,6 @@ impl LogicalModule for InstanceManager { } } async fn start(&self) -> WSResult> { - // create crac_config - let crac_config_path = self.view.os().app_path("crac_config"); - // - create file with crac_config_path - let mut f = { - let crac_config_path = crac_config_path.clone(); - tokio::fs::File::options() - .create(true) - .write(true) - .open(&crac_config_path) - .await - .map_err(|err| { - WSError::from(WsFuncError::CreateCracConfigFailed { - path: crac_config_path.to_str().unwrap().to_owned(), - err: err, - }) - })? - }; - - // - write datas - f.write_all( - b"type: FILE -action: ignore ---- -type: SOCKET -action: close", - ) - .await - .map_err(|err| { - WSError::from(WsFuncError::CreateCracConfigFailed { - path: crac_config_path.to_str().unwrap().to_owned(), - err: err, - }) - })?; - - // start process rpc Ok(vec![rpc_model::spawn::( self.file_dir .join("agent.sock") @@ -305,7 +262,6 @@ impl InstanceManager { .put(v); } Instance::Shared(v) => drop(v), - Instance::Native(_) => {} } } pub async fn load_instance(&self, app_type: &AppType, instance_name: &str) -> Instance { @@ -320,7 +276,6 @@ impl InstanceManager { .get(&self.file_dir, instance_name) .await .into(), - AppType::Native => NativeAppInstance::new().into(), } } pub async fn drap_app_instances(&self, app: &str) { diff --git a/src/main/src/worker/func/mod.rs b/src/main/src/worker/func/mod.rs new file mode 100644 index 0000000..f779ba3 --- /dev/null +++ b/src/main/src/worker/func/mod.rs @@ -0,0 +1,114 @@ +pub mod m_instance_manager; +pub mod owned; +pub mod shared; +pub mod wasm_host_funcs; + +use crate::{ + general::{ + app::{AppType, FnMeta}, + network::http_handler::ReqId, + }, + result::WSResult, +}; +use async_trait::async_trait; +use enum_as_inner::EnumAsInner; +use tokio::task::JoinHandle; + +use self::{ + owned::wasm::WasmInstance, + shared::{process::ProcessInstance, SharedInstance}, +}; + +#[derive(EnumAsInner)] +pub enum OwnedInstance { + WasmInstance(WasmInstance), +} + +pub enum Instance { + Owned(OwnedInstance), + Shared(SharedInstance), +} +impl From for Instance { + fn from(v: OwnedInstance) -> Self { + Self::Owned(v) + } +} + +impl From for Instance { + fn from(v: SharedInstance) -> Self { + Self::Shared(v) + } +} + +impl From for Instance { + fn from(v: ProcessInstance) -> Self { + Self::Shared(SharedInstance(v)) + } +} + +#[async_trait] +impl InstanceTrait for Instance { + fn instance_name(&self) -> String { + match self { + Instance::Owned(v) => v.instance_name(), + Instance::Shared(v) => v.instance_name(), + } + } + async fn execute(&self, fn_ctx: &mut FnExeCtx) -> WSResult> { + match self { + Instance::Owned(v) => v.execute(fn_ctx).await, + Instance::Shared(v) => v.execute(fn_ctx).await, + } + } +} + +pub enum NewJavaInstanceConfig {} + +#[async_trait] +pub trait InstanceTrait { + fn instance_name(&self) -> String; + async fn execute(&self, fn_ctx: &mut FnExeCtx) -> WSResult>; +} + +#[derive(Clone, Debug)] +pub enum EventCtx { + Http(String), + KvSet { key: Vec, opeid: Option }, +} + +impl EventCtx { + pub fn take_prev_kv_opeid(&mut self) -> Option { + match self { + EventCtx::KvSet { opeid, .. } => opeid.take(), + _ => None, + } + } +} + +pub struct FnExeCtx { + pub app: String, + pub app_type: AppType, + pub func: String, + pub func_meta: FnMeta, + pub req_id: ReqId, + pub event_ctx: EventCtx, + pub res: Option, + /// remote scheduling tasks + pub sub_waiters: Vec>, // pub trigger_node: NodeID, +} + +impl FnExeCtx { + pub fn empty_http(&self) -> bool { + match &self.event_ctx { + EventCtx::Http(str) => str.len() == 0, + _ => false, + } + } + /// call this when you are sure it's a http event + pub fn http_str_unwrap(&self) -> String { + match &self.event_ctx { + EventCtx::Http(str) => str.to_owned(), + _ => panic!("not a http event"), + } + } +} diff --git a/src/main/src/general/app/app_owned/mod.rs b/src/main/src/worker/func/owned/mod.rs similarity index 72% rename from src/main/src/general/app/app_owned/mod.rs rename to src/main/src/worker/func/owned/mod.rs index c02aecc..e7cabd6 100644 --- a/src/main/src/general/app/app_owned/mod.rs +++ b/src/main/src/worker/func/owned/mod.rs @@ -1,12 +1,10 @@ pub mod wasm; -pub mod wasm_host_funcs; -use crate::general::app::instance::InstanceTrait; -use crate::general::app::instance::OwnedInstance; -use crate::general::app::m_executor::FnExeCtx; -use crate::result::WSResult; use async_trait::async_trait; +use super::{FnExeCtx, InstanceTrait, OwnedInstance}; +use crate::result::WSResult; + #[async_trait] impl InstanceTrait for OwnedInstance { fn instance_name(&self) -> String { diff --git a/src/main/src/general/app/app_owned/wasm.rs b/src/main/src/worker/func/owned/wasm.rs similarity index 96% rename from src/main/src/general/app/app_owned/wasm.rs rename to src/main/src/worker/func/owned/wasm.rs index 342d5ad..ba55c37 100644 --- a/src/main/src/general/app/app_owned/wasm.rs +++ b/src/main/src/worker/func/owned/wasm.rs @@ -1,9 +1,9 @@ -use crate::general::app::app_owned::wasm_host_funcs; -use crate::general::app::instance::InstanceTrait; -use crate::general::app::instance::OwnedInstance; -use crate::general::app::m_executor::{EventCtx, FnExeCtx}; -use crate::result::{WSResult, WsFuncError}; +use crate::{ + result::{WSResult, WsFuncError}, + worker::func::{wasm_host_funcs, EventCtx, FnExeCtx, InstanceTrait, OwnedInstance}, +}; use async_trait::async_trait; + use std::{mem::ManuallyDrop, path::Path}; use wasmedge_sdk::{ config::{CommonConfigOptions, ConfigBuilder, HostRegistrationConfigOptions}, diff --git a/src/main/src/worker/func/shared/java.rs b/src/main/src/worker/func/shared/java.rs new file mode 100644 index 0000000..210be5f --- /dev/null +++ b/src/main/src/worker/func/shared/java.rs @@ -0,0 +1,59 @@ +use std::str::from_utf8; + +use tokio::process::{self, Command}; + +use crate::{ + general::m_os::{OperatingSystem, OsProcessType}, + result::{WSError, WSResult, WsFuncError}, +}; + +use super::process::PID; + +pub(super) fn cold_start(app: &str, os: &OperatingSystem) -> WSResult { + tracing::debug!("java cold start {}", app); + let p = os.start_process(OsProcessType::JavaApp(app.to_owned())); + // .filter(|x| x.starts_with(app)) + // .next() + // .expect("no pid found") + // .split(|x| x == ' ') + // .next() + // .expect("no pid found") + // .parse() + // .expect("failed to parse pid"); + Ok(p) +} + +pub(super) async fn find_pid(app: &str) -> WSResult { + let res = Command::new("jcmd") + .arg("-l") + .output() + .await + .map_err(|e| WSError::from(WsFuncError::InstanceProcessStartFailed(e)))?; + let res = from_utf8(&res.stdout).expect("failed to parse output to string"); + let res = res.split(|x| x == '\n').collect::>(); + tracing::debug!("jcmd output: {:?}", res); + let err = || Err(WsFuncError::InstanceJavaPidNotFound(app.to_owned()).into()); + let Some(res) = res + .iter() + .filter(|x| x.contains(&format!("--appName={}", app))) + .next() + else { + return err(); + }; + let Some(res) = res.split(|x| x == ' ').next() else { + return err(); + }; + let Ok(pid) = res.parse::() else { + return err(); + }; + Ok(pid) +} + +pub(super) async fn take_snapshot(app: &str, os: &OperatingSystem) { + let res = os + .start_process(OsProcessType::JavaCheckpoints(app.to_owned())) + .wait() + .await + .unwrap(); + assert!(res.success()); +} diff --git a/src/main/src/general/app/app_shared/mod.rs b/src/main/src/worker/func/shared/mod.rs similarity index 71% rename from src/main/src/general/app/app_shared/mod.rs rename to src/main/src/worker/func/shared/mod.rs index c02dd4f..170e3aa 100644 --- a/src/main/src/general/app/app_shared/mod.rs +++ b/src/main/src/worker/func/shared/mod.rs @@ -1,12 +1,12 @@ +use async_trait::async_trait; + +use super::InstanceTrait; + pub mod java; pub mod process; pub mod process_instance_man_related; pub mod process_rpc; -use crate::general::app::instance::InstanceTrait; -use crate::general::app::m_executor::FnExeCtx; -use async_trait::async_trait; - pub struct SharedInstance(pub process::ProcessInstance); impl From for SharedInstance { @@ -20,7 +20,10 @@ impl InstanceTrait for SharedInstance { fn instance_name(&self) -> String { self.0.instance_name() } - async fn execute(&self, fn_ctx: &mut FnExeCtx) -> crate::result::WSResult> { + async fn execute( + &self, + fn_ctx: &mut crate::worker::func::FnExeCtx, + ) -> crate::result::WSResult> { self.0.execute(fn_ctx).await } } diff --git a/src/main/src/general/app/app_shared/process.rs b/src/main/src/worker/func/shared/process.rs similarity index 95% rename from src/main/src/general/app/app_shared/process.rs rename to src/main/src/worker/func/shared/process.rs index 89c0dbe..ff69fe8 100644 --- a/src/main/src/general/app/app_shared/process.rs +++ b/src/main/src/worker/func/shared/process.rs @@ -1,19 +1,22 @@ // process function just run in unique process -use super::process_rpc::{self, proc_proto}; -use crate::general::app::app_shared::java; -use crate::general::app::instance::InstanceTrait; -use crate::general::app::m_executor::FnExeCtx; -use crate::general::{ - app::AppType, - network::rpc_model::{self, HashValue}, -}; +use std::sync::Arc; + use async_trait::async_trait; use enum_as_inner::EnumAsInner; use parking_lot::RwLock; -use std::sync::Arc; use tokio::{process::Command, sync::oneshot}; +use crate::{ + general::{ + app::AppType, + network::rpc_model::{self, HashValue}, + }, + worker::func::{shared::java, InstanceTrait}, +}; + +use super::process_rpc::{self, proc_proto}; + #[derive(EnumAsInner)] pub enum ProcessInstanceConnState { Connecting(Vec>), @@ -199,7 +202,10 @@ impl InstanceTrait for ProcessInstance { fn instance_name(&self) -> String { self.app.clone() } - async fn execute(&self, fn_ctx: &mut FnExeCtx) -> crate::result::WSResult> { + async fn execute( + &self, + fn_ctx: &mut crate::worker::func::FnExeCtx, + ) -> crate::result::WSResult> { // if rpc_model::start_remote_once(rpc_model::HashValue::Str(fn_ctx.func.to_owned())) { // // cold start the java process // } @@ -213,10 +219,11 @@ impl InstanceTrait for ProcessInstance { fn_ctx.func ); tracing::debug!("before process_rpc::call_func "); - let res = - process_rpc::call_func(&fn_ctx.app, &fn_ctx.func, fn_ctx.http_str_unwrap()).await; + let res = process_rpc::call_func(&fn_ctx.app, &fn_ctx.func, fn_ctx.http_str_unwrap()) + .await; tracing::debug!("after process_rpc::call_func "); - return res.map(|v| Some(v.ret_str)); + return res + .map(|v| Some(v.ret_str)); // return process_rpc::call_func(&fn_ctx.app, &fn_ctx.func, fn_ctx.http_str_unwrap()) // .await // .map(|v| Some(v.ret_str)); diff --git a/src/main/src/general/app/app_shared/process_instance_man_related.rs b/src/main/src/worker/func/shared/process_instance_man_related.rs similarity index 82% rename from src/main/src/general/app/app_shared/process_instance_man_related.rs rename to src/main/src/worker/func/shared/process_instance_man_related.rs index ed8e9e7..987b62b 100644 --- a/src/main/src/general/app/app_shared/process_instance_man_related.rs +++ b/src/main/src/worker/func/shared/process_instance_man_related.rs @@ -2,16 +2,17 @@ use std::time::Duration; use tokio::process::Command; -use crate::general::app::instance::m_instance_manager::EachAppCache; use crate::{ - general::app::app_shared::java, - general::app::app_shared::process::ProcessInstance, - general::app::app_shared::SharedInstance, - general::app::instance::m_instance_manager::InstanceManager, general::app::AppType, result::{WSResult, WsFuncError}, + worker::func::{ + m_instance_manager::{EachAppCache, InstanceManager}, + shared::java, + }, }; +use super::{process::ProcessInstance, SharedInstance}; + impl InstanceManager { pub async fn update_checkpoint(&self, app_name: &str, restart: bool) -> WSResult<()> { async fn debug_port_left() { @@ -48,18 +49,14 @@ impl InstanceManager { tracing::debug!("taking snapshot for app: {}", app_name); match proc_ins.app_type { AppType::Jar => java::take_snapshot(app_name, self.view.os()).await, - AppType::Wasm | AppType::Native => { - panic!("wasm/native can't take snapshot") - } + AppType::Wasm => unreachable!(), } } // recover by criu - // tokio::time::sleep(Duration::from_secs(3)).await; + tokio::time::sleep(Duration::from_secs(3)).await; tracing::debug!("restart app after snapshot: {}", app_name); - let res = java::JavaColdStart::mksure_checkpoint(self.view.os().app_path(app_name)) - .await - .cold_start(app_name, self.view.os()); + let res = java::cold_start(app_name, self.view.os()); let p = match res { Err(e) => { tracing::warn!("cold start failed: {:?}", e); @@ -116,9 +113,7 @@ impl InstanceManager { // let app = app.to_owned(); // let instance = instance.clone(); - let p = java::JavaColdStart::direct_start() - .cold_start(&app, self.view.os()) - .unwrap(); + let p = java::cold_start(&app, self.view.os()).unwrap(); instance.bind_process(p); } @@ -127,8 +122,7 @@ impl InstanceManager { EachAppCache::Shared(instance.into()) } - AppType::Wasm => panic!("wasm only support owned instance"), - AppType::Native => panic!("native only support owned instance"), + AppType::Wasm => unreachable!("wasm only support owned instance"), } }); diff --git a/src/main/src/general/app/app_shared/process_rpc.rs b/src/main/src/worker/func/shared/process_rpc.rs similarity index 96% rename from src/main/src/general/app/app_shared/process_rpc.rs rename to src/main/src/worker/func/shared/process_rpc.rs index e7b115e..84aa59d 100644 --- a/src/main/src/general/app/app_shared/process_rpc.rs +++ b/src/main/src/worker/func/shared/process_rpc.rs @@ -2,14 +2,14 @@ pub mod proc_proto { include!(concat!(env!("OUT_DIR"), "/process_rpc_proto.rs")); } -use self::proc_proto::{FuncCallReq, FuncCallResp}; -use super::SharedInstance; -use crate::general::app::app_shared::process_rpc::proc_proto::AppStarted; use crate::{ general::network::rpc_model::{self, HashValue, MsgIdBind, ReqMsg, RpcCustom}, - modules_global_bridge::process_func::ModulesGlobalBrigeInstanceManager, + modules_global_bridge::process_func::{ + ModulesGlobalBrigeInstanceManager, + }, result::WSResult, sys::LogicalModulesRef, + worker::func::shared::process_rpc::proc_proto::AppStarted, }; use async_trait::async_trait; use parking_lot::Mutex; @@ -17,6 +17,10 @@ use prost::Message; use std::{collections::HashMap, path::Path, time::Duration}; use tokio::sync::oneshot; +use self::proc_proto::{FuncCallReq, FuncCallResp}; + +use super::SharedInstance; + // const AGENT_SOCK_PATH: &str = "agent.sock"; fn clean_sock_file(path: impl AsRef) { @@ -86,7 +90,7 @@ impl RpcCustom for ProcessRpc { // } // update to the instance - let insman = ProcessRpc::global_m_instance_manager(); + let insman = ProcessRpc::global_m_instance_manager().unwrap(); let instance = insman.app_instances.get(&res.appid).expect(&format!( "instance should be inited before get the verify {}", res.appid diff --git a/src/main/src/general/app/app_shared/process_rpc_proto.proto b/src/main/src/worker/func/shared/process_rpc_proto.proto similarity index 100% rename from src/main/src/general/app/app_shared/process_rpc_proto.proto rename to src/main/src/worker/func/shared/process_rpc_proto.proto diff --git a/src/main/src/general/app/app_owned/wasm_host_funcs/fs.rs b/src/main/src/worker/func/wasm_host_funcs/fs.rs similarity index 100% rename from src/main/src/general/app/app_owned/wasm_host_funcs/fs.rs rename to src/main/src/worker/func/wasm_host_funcs/fs.rs diff --git a/src/main/src/general/app/app_owned/wasm_host_funcs/kv.rs b/src/main/src/worker/func/wasm_host_funcs/kv.rs similarity index 100% rename from src/main/src/general/app/app_owned/wasm_host_funcs/kv.rs rename to src/main/src/worker/func/wasm_host_funcs/kv.rs diff --git a/src/main/src/general/app/app_owned/wasm_host_funcs/mod.rs b/src/main/src/worker/func/wasm_host_funcs/mod.rs similarity index 94% rename from src/main/src/general/app/app_owned/wasm_host_funcs/mod.rs rename to src/main/src/worker/func/wasm_host_funcs/mod.rs index abac07f..9116858 100644 --- a/src/main/src/general/app/app_owned/wasm_host_funcs/mod.rs +++ b/src/main/src/worker/func/wasm_host_funcs/mod.rs @@ -4,20 +4,25 @@ mod fs; mod kv; mod result; -use crate::sys::LogicalModulesRef; use fs::FsFuncsRegister; use kv::KvFuncsRegister; use result::ResultFuncsRegister; +use crate::sys::LogicalModulesRef; + mod utils { - use crate::general::app::m_executor::FnExeCtx; - use crate::general::app::InstanceManager; + use wasmedge_sdk::{Caller, Instance, Memory}; + use crate::{ - general::m_os::OperatingSystem, sys::LogicalModulesRef, util::SendNonNull, - worker::m_kv_user_client::KvUserClient, + general::m_os::OperatingSystem, + sys::LogicalModulesRef, + util::SendNonNull, + worker::{ + func::{m_instance_manager::InstanceManager, FnExeCtx}, + m_kv_user_client::KvUserClient, + }, }; - use wasmedge_sdk::{Caller, Instance, Memory}; pub trait WasmCtx { fn i_memory(&self, idx: u32) -> Option; @@ -148,6 +153,8 @@ mod utils { .as_ref() .unwrap() .instance_manager + .as_ref() + .unwrap() } } } diff --git a/src/main/src/general/app/app_owned/wasm_host_funcs/result.rs b/src/main/src/worker/func/wasm_host_funcs/result.rs similarity index 100% rename from src/main/src/general/app/app_owned/wasm_host_funcs/result.rs rename to src/main/src/worker/func/wasm_host_funcs/result.rs diff --git a/src/main/src/general/app/m_executor.rs b/src/main/src/worker/m_executor.rs similarity index 62% rename from src/main/src/general/app/m_executor.rs rename to src/main/src/worker/m_executor.rs index 4aade2d..da69b25 100644 --- a/src/main/src/general/app/m_executor.rs +++ b/src/main/src/worker/m_executor.rs @@ -1,9 +1,3 @@ -use crate::general::app::instance::m_instance_manager::InstanceManager; -use crate::general::app::instance::m_instance_manager::UnsafeFunctionCtx; -use crate::general::app::instance::InstanceTrait; -use crate::general::app::AppType; -use crate::general::app::FnMeta; -use crate::result::WSError; use crate::{ general::{ app::AppMetaManager, @@ -20,168 +14,32 @@ use crate::{ result::{WSResult, WsFuncError}, sys::{LogicalModule, LogicalModuleNewArgs, LogicalModulesRef}, util::JoinHandleWrapper, + worker::func::{m_instance_manager::UnsafeFunctionCtx, EventCtx, FnExeCtx, InstanceTrait}, }; use async_trait::async_trait; + use std::{ ptr::NonNull, sync::atomic::{AtomicU32, AtomicUsize}, time::{SystemTime, UNIX_EPOCH}, }; use tokio::sync::oneshot; -use tokio::task::JoinHandle; #[cfg(target_os = "linux")] use ws_derive::LogicalModule; +use super::func::m_instance_manager::InstanceManager; + pub type SubTaskId = u32; pub type SubTaskNotifier = oneshot::Sender; pub type SubTaskWaiter = oneshot::Receiver; -#[derive(Clone, Debug)] -pub enum EventCtx { - Http(String), - KvSet { key: Vec, opeid: Option }, -} - -impl EventCtx { - pub fn take_prev_kv_opeid(&mut self) -> Option { - match self { - EventCtx::KvSet { opeid, .. } => opeid.take(), - _ => None, - } - } -} - -struct FnExeCtx { - pub app: String, - pub app_type: AppType, - pub func: String, - pub func_meta: FnMeta, - pub req_id: ReqId, - pub event_ctx: EventCtx, - pub res: Option, - /// remote scheduling tasks - pub sub_waiters: Vec>, // pub trigger_node: NodeID, - _dummy_private: (), -} - -pub enum FnExeCtxAsyncAllowedType { - Jar, - Wasm, - Native, -} - -impl TryFrom for FnExeCtxAsyncAllowedType { - type Error = WSError; - fn try_from(v: AppType) -> Result { - match v { - AppType::Jar => Ok(FnExeCtxAsyncAllowedType::Jar), - AppType::Wasm => Ok(FnExeCtxAsyncAllowedType::Wasm), - AppType::Native => Ok(FnExeCtxAsyncAllowedType::Native), - } - } -} - -impl Into for FnExeCtxAsyncAllowedType { - fn into(self) -> AppType { - match self { - FnExeCtxAsyncAllowedType::Jar => AppType::Jar, - FnExeCtxAsyncAllowedType::Wasm => AppType::Wasm, - FnExeCtxAsyncAllowedType::Native => AppType::Native, - } - } -} - -pub struct FnExeCtxAsync { - inner: FnExeCtx, -} - -impl FnExeCtxAsync { - pub fn new( - apptype: FnExeCtxAsyncAllowedType, - app: String, - func: String, - func_meta: FnMeta, - req_id: ReqId, - event_ctx: EventCtx, - ) -> Self { - Self { - inner: FnExeCtx { - app, - func, - req_id, - event_ctx, - res: None, - sub_waiters: vec![], - app_type: apptype.into(), - func_meta, - _dummy_private: (), - }, - } - } -} - -pub enum FnExeCtxSyncAllowedType { - Native, -} - -impl Into for FnExeCtxSyncAllowedType { - fn into(self) -> AppType { - AppType::Native - } -} - -pub struct FnExeCtxSync { - inner: FnExeCtx, -} - -impl FnExeCtxSync { - pub fn new( - apptype: FnExeCtxAsyncAllowedType, - app: String, - func: String, - func_meta: FnMeta, - req_id: ReqId, - event_ctx: EventCtx, - ) -> Self { - Self { - inner: FnExeCtx { - app, - func, - req_id, - event_ctx, - res: None, - sub_waiters: vec![], - app_type: apptype.into(), - func_meta, - _dummy_private: (), - }, - } - } -} - -impl FnExeCtx { - pub fn empty_http(&self) -> bool { - match &self.event_ctx { - EventCtx::Http(str) => str.len() == 0, - _ => false, - } - } - /// call this when you are sure it's a http event - pub fn http_str_unwrap(&self) -> String { - match &self.event_ctx { - EventCtx::Http(str) => str.to_owned(), - _ => panic!("not a http event"), - } - } -} - logical_module_view_impl!(ExecutorView); logical_module_view_impl!(ExecutorView, p2p, P2PModule); logical_module_view_impl!(ExecutorView, appmeta_manager, AppMetaManager); -logical_module_view_impl!(ExecutorView, instance_manager, InstanceManager); -logical_module_view_impl!(ExecutorView, executor, Executor); +logical_module_view_impl!(ExecutorView, instance_manager, Option); +logical_module_view_impl!(ExecutorView, executor, Option); #[derive(LogicalModule)] pub struct Executor { @@ -261,15 +119,6 @@ impl Executor { .fetch_add(1, std::sync::atomic::Ordering::Relaxed); taskid } - - pub async fn local_call_execute_async(&self, ctx: FnExeCtxAsync) -> WSResult> { - self.execute(ctx.inner).await - } - - pub fn local_call_execute_sync(&self, ctx: FnExeCtxSync) -> WSResult> { - self.execute_sync(ctx) - } - pub async fn handle_distribute_task( &self, resp: RPCResponsor, @@ -278,7 +127,7 @@ impl Executor { tracing::debug!("receive distribute task: {:?}", req); let app = req.app.to_owned(); let func = req.func.to_owned(); - let (appmeta, _) = match self.view.appmeta_manager().get_app_meta(&app).await { + let appmeta = match self.view.appmeta_manager().get_app_meta(&app).await { Ok(Some(appmeta)) => appmeta, Ok(None) => { tracing::warn!("app {} not found in data meta", app); @@ -323,56 +172,21 @@ impl Executor { return; }; - // distribute task requires async support - if !fnmeta.sync_async.asyncable() { - let warn = format!( - "func {} not support async, meta:{:?}", - func, fnmeta.sync_async - ); - tracing::warn!("{}", warn); - if let Err(err) = resp - .send_resp(DistributeTaskResp { - success: false, - err_msg: warn, - }) - .await - { - tracing::error!("send distribute task resp failed with err: {}", err); - } - return; - } - - // construct async fn exe ctx - let ctx = FnExeCtxAsync::new( - match FnExeCtxAsyncAllowedType::try_from(apptype) { - Ok(v) => v, - Err(err) => { - let warn = format!("app type {:?} not supported, err: {}", apptype, err); - tracing::warn!("{}", warn); - if let Err(err) = resp - .send_resp(DistributeTaskResp { - success: false, - err_msg: warn, - }) - .await - { - tracing::error!("send distribute task resp failed with err: {}", err); - } - return; - } - }, - req.app, - req.func, - fnmeta.clone(), - req.task_id as usize, - match req.trigger.unwrap() { + let ctx = FnExeCtx { + app: req.app, + app_type: apptype, + func_meta: fnmeta.clone(), + func: req.func, + req_id: 0, + res: None, + event_ctx: match req.trigger.unwrap() { distribute_task_req::Trigger::KvSet(set) => EventCtx::KvSet { key: set.key, opeid: Some(set.opeid), }, }, - ); - + sub_waiters: vec![], + }; if let Err(err) = resp .send_resp(DistributeTaskResp { success: true, @@ -382,7 +196,7 @@ impl Executor { { tracing::error!("send sche resp for app:{app} fn:{func} failed with err: {err}"); } - let _ = self.execute(ctx.inner).await; + let _ = self.execute(ctx).await; } pub async fn handle_http_task(&self, route: &str, text: String) -> WSResult> { @@ -415,10 +229,7 @@ impl Executor { let funcname = split[1]; // check app exist - tracing::debug!("calling get_app_meta to check app exist, app: {}", appname); - let Some((appmeta, datameta_opt)) = - self.view.appmeta_manager().get_app_meta(appname).await? - else { + let Some(app) = self.view.appmeta_manager().get_app_meta(appname).await? else { tracing::warn!("app {} not found", appname); return Err(WsFuncError::AppNotFound { app: appname.to_owned(), @@ -426,8 +237,8 @@ impl Executor { .into()); }; // check func exist - let Some(func) = appmeta.get_fn_meta(funcname) else { - tracing::warn!("func {} not found, exist:{:?}", funcname, appmeta.fns()); + let Some(func) = app.get_fn_meta(funcname) else { + tracing::warn!("func {} not found, exist:{:?}", funcname, app.fns()); return Err(WsFuncError::FuncNotFound { app: appname.to_owned(), func: funcname.to_owned(), @@ -435,14 +246,6 @@ impl Executor { .into()); }; - // get app file and extract to execute dir - if let Some(datameta) = datameta_opt { - self.view - .appmeta_manager() - .load_app_file(appname, datameta) - .await?; - } - ///////////////////////////////////////////////// // valid call /////////////////////////////////// if func @@ -465,30 +268,20 @@ impl Executor { } ///////////////////////////////////////////////// - // prepare ctx and run ////////////////////////// - - if func.sync_async.asyncable() { - let ctx = FnExeCtxAsync::new( - FnExeCtxAsyncAllowedType::try_from(appmeta.app_type.clone()).unwrap(), - appname.to_owned(), - funcname.to_owned(), - func.clone(), - req_id, - EventCtx::Http(text), - ); - self.execute(ctx).await - } else { - let ctx = FnExeCtxSync::new( - FnExeCtxAsyncAllowedType::try_from(appmeta.app_type.clone()).unwrap(), - appname.to_owned(), - funcname.to_owned(), - func.clone(), - req_id, - EventCtx::Http(text), - ); + // run ////////////////////////////////////////// + + let ctx = FnExeCtx { + app: appname.to_owned(), + app_type: app.app_type.clone(), + func: funcname.to_owned(), + req_id, + res: None, + event_ctx: EventCtx::Http(text), + sub_waiters: vec![], + func_meta: func.clone(), + }; - self.execute_sync(ctx) - } + self.execute(ctx).await } // pub async fn execute_http_app(&self, fn_ctx_builder: FunctionCtxBuilder) { // let app_meta_man = self.view.instance_manager().app_meta_manager.read().await; @@ -520,11 +313,7 @@ impl Executor { // // .finish_using(&sche_req.app, vm) // // .await // } - - fn execute_sync(&self, ctx: FnExeCtxSync) -> WSResult> {} - - /// prepare app and func before call execute - async fn execute(&self, mut fn_ctx: FnExeCtxAsync) -> WSResult> { + async fn execute(&self, mut fn_ctx: FnExeCtx) -> WSResult> { // let app = fn_ctx.app.clone(); // let func = fn_ctx.func.clone(); // let event = fn_ctx.event_ctx.clone(); diff --git a/src/main/src/worker/m_http_handler.rs b/src/main/src/worker/m_http_handler.rs index 1e80ac0..04112b9 100644 --- a/src/main/src/worker/m_http_handler.rs +++ b/src/main/src/worker/m_http_handler.rs @@ -1,4 +1,3 @@ -use crate::general::app::m_executor::Executor; use crate::{ general::network::http_handler::{start_http_handler, HttpHandler}, logical_module_view_impl, @@ -7,10 +6,15 @@ use crate::{ util::{JoinHandleWrapper, WithBind}, }; use async_trait::async_trait; -use axum::{response::Response, Router}; +use axum::{ + response::{Response}, + Router, +}; use parking_lot::Mutex; use ws_derive::LogicalModule; +use super::m_executor::Executor; + #[derive(LogicalModule)] pub struct WorkerHttpHandler { view: WorkerHttpHandlerView, @@ -41,7 +45,7 @@ impl LogicalModule for WorkerHttpHandler { } logical_module_view_impl!(WorkerHttpHandlerView); -logical_module_view_impl!(WorkerHttpHandlerView, executor, Executor); +logical_module_view_impl!(WorkerHttpHandlerView, executor, Option); #[async_trait] impl HttpHandler for WorkerHttpHandler { diff --git a/src/main/src/worker/mod.rs b/src/main/src/worker/mod.rs index efe51cf..bc253a9 100644 --- a/src/main/src/worker/mod.rs +++ b/src/main/src/worker/mod.rs @@ -1,3 +1,7 @@ +// pub mod function_event; +pub mod func; +// pub mod m_data_follower; +pub mod m_executor; pub mod m_http_handler; pub mod m_kv_user_client; pub mod m_worker; From db816436a410beec55a0a6f0f25378cf515f8821 Mon Sep 17 00:00:00 2001 From: pa <1020401660@qq.com> Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 22/26] Revert "feat: fix data read with parallel" This reverts commit 1dd284e48b7bf2b3f598ab12533bf8ca3f987d71. --- Cargo.lock | 1 - Cargo.toml | 1 - scripts/build/1.1build_core.py | 3 +- scripts/build/template/run_node.py | 1 - scripts/deploy_cluster/node_config.yaml | 8 +- src/main/Cargo.toml | 1 - .../general/data/m_data_general/dataitem.rs | 417 --------- src/main/src/general/data/mod.rs | 4 - .../src/general/{data => }/kv_interface.rs | 3 +- .../{app => m_appmeta_manager}/fn_event.rs | 0 .../{app => m_appmeta_manager}/http.rs | 2 - .../general/{app => m_appmeta_manager}/mod.rs | 197 ++--- .../{app => m_appmeta_manager}/v_os.rs | 0 .../mod.rs => m_data_general.rs} | 827 ++++++------------ .../src/general/{data => }/m_dist_lock.rs | 17 +- .../general/{data => }/m_kv_store_engine.rs | 22 +- src/main/src/general/m_os/mod.rs | 4 +- src/main/src/general/mod.rs | 8 +- src/main/src/general/network/msg_pack.rs | 5 +- src/main/src/general/network/proto_ext.rs | 54 +- .../src/general/network/proto_src/data.proto | 14 +- .../src/general/network/proto_src/sche.proto | 5 +- src/main/src/general/network/rpc_model.rs | 65 +- src/main/src/general/test_utils.rs | 3 +- src/main/src/main.rs | 15 +- src/main/src/master/m_data_master.rs | 139 ++- src/main/src/master/m_http_handler.rs | 40 +- src/main/src/master/m_master.rs | 2 - .../src/modules_global_bridge/process_func.rs | 2 +- src/main/src/result.rs | 51 +- src/main/src/sys.rs | 8 +- src/main/src/util.rs | 20 +- .../src/worker/func/m_instance_manager.rs | 2 +- src/main/src/worker/func/mod.rs | 2 +- src/main/src/worker/func/shared/process.rs | 18 +- .../shared/process_instance_man_related.rs | 4 +- .../src/worker/func/shared/process_rpc.rs | 34 +- .../src/worker/func/wasm_host_funcs/mod.rs | 36 +- src/main/src/worker/m_executor.rs | 75 +- src/main/src/worker/m_kv_user_client.rs | 122 +-- telego/README | 1 - telego/bin_waverless/deployment.yml | 20 - 42 files changed, 556 insertions(+), 1697 deletions(-) delete mode 100644 src/main/src/general/data/m_data_general/dataitem.rs delete mode 100644 src/main/src/general/data/mod.rs rename src/main/src/general/{data => }/kv_interface.rs (96%) rename src/main/src/general/{app => m_appmeta_manager}/fn_event.rs (100%) rename src/main/src/general/{app => m_appmeta_manager}/http.rs (97%) rename src/main/src/general/{app => m_appmeta_manager}/mod.rs (88%) rename src/main/src/general/{app => m_appmeta_manager}/v_os.rs (100%) rename src/main/src/general/{data/m_data_general/mod.rs => m_data_general.rs} (62%) rename src/main/src/general/{data => }/m_dist_lock.rs (98%) rename src/main/src/general/{data => }/m_kv_store_engine.rs (96%) delete mode 100644 telego/README delete mode 100644 telego/bin_waverless/deployment.yml diff --git a/Cargo.lock b/Cargo.lock index 028b3a8..ad35ee5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3531,7 +3531,6 @@ dependencies = [ "async-raft", "async-trait", "axum", - "base64 0.22.1", "bincode", "camelpaste", "clap", diff --git a/Cargo.toml b/Cargo.toml index 452a321..ae25ebd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -59,7 +59,6 @@ hyper = { version = "0.14.18", features = ["server"] } md-5 = "0.10.1" path-absolutize = "3.0.13" dashmap = "6.1.0" -base64 = "0.22.1" [profile.test] # 0: no optimizations diff --git a/scripts/build/1.1build_core.py b/scripts/build/1.1build_core.py index 7dbf134..7ff215d 100644 --- a/scripts/build/1.1build_core.py +++ b/scripts/build/1.1build_core.py @@ -37,6 +37,5 @@ def print_title(title): os_system_sure("mkdir -p pack/waverless_backend") BACKEND_PATH=os.path.abspath("pack/waverless_backend") os.chdir("../../") -os_system_sure("rustup default 1.79.0") os_system_sure("$HOME/.cargo/bin/cargo build --release") -os_system_sure(f"cp target/release/wasm_serverless {BACKEND_PATH}") +os_system_sure(f"cp target/release/wasm_serverless {BACKEND_PATH}") \ No newline at end of file diff --git a/scripts/build/template/run_node.py b/scripts/build/template/run_node.py index 7f5422d..042abe3 100644 --- a/scripts/build/template/run_node.py +++ b/scripts/build/template/run_node.py @@ -1,4 +1,3 @@ -#!/usr/bin/python3 # NODE_ID=$1 # wasm_serverless $NODE_ID test_dir diff --git a/scripts/deploy_cluster/node_config.yaml b/scripts/deploy_cluster/node_config.yaml index c7d83ba..7f54c57 100644 --- a/scripts/deploy_cluster/node_config.yaml +++ b/scripts/deploy_cluster/node_config.yaml @@ -1,11 +1,11 @@ nodes: - 2: - addr: 192.168.31.87:2500 + 9: + addr: 192.168.31.9:2500 spec: - meta - master - 3: - addr: 192.168.31.96:2500 + 10: + addr: 192.168.31.240:2500 spec: - meta - worker diff --git a/src/main/Cargo.toml b/src/main/Cargo.toml index a2d63da..f4d1de5 100644 --- a/src/main/Cargo.toml +++ b/src/main/Cargo.toml @@ -61,7 +61,6 @@ hyper.workspace = true md-5.workspace = true path-absolutize.workspace = true dashmap.workspace = true -base64.workspace = true [dependencies.uuid] version = "1.8.0" diff --git a/src/main/src/general/data/m_data_general/dataitem.rs b/src/main/src/general/data/m_data_general/dataitem.rs deleted file mode 100644 index 3ccbd85..0000000 --- a/src/main/src/general/data/m_data_general/dataitem.rs +++ /dev/null @@ -1,417 +0,0 @@ -use crate::general::data::m_data_general::DataItemIdx; -use crate::general::data::m_data_general::GetOrDelDataArgType; -use crate::general::network::proto; -use crate::general::network::proto_ext::ProtoExtDataItem; -use crate::result::WSError; -use crate::result::WSResult; -use crate::result::WsDataError; -use crate::result::WsIoErr; -use crate::result::WsRuntimeErr; -use base64::Engine; -use futures::future::join_all; -use std::collections::btree_set; -use std::ops::Range; -use std::path::PathBuf; -use std::sync::Arc; - -use super::CacheModeVisitor; -use super::DataSplitIdx; - -// iterator for wanted dataitem idxs -pub(super) enum WantIdxIter<'a> { - PartialMany { - iter: btree_set::Iter<'a, DataItemIdx>, - }, - PartialOne { - idx: DataItemIdx, - itercnt: u8, - }, - Other { - ty: GetOrDelDataArgType, - itercnt: u8, - len: u8, - }, -} - -impl<'a> WantIdxIter<'a> { - pub(super) fn new(ty: &'a GetOrDelDataArgType, itemcnt: DataItemIdx) -> Self { - match ty { - GetOrDelDataArgType::PartialMany { idxs } => Self::PartialMany { iter: idxs.iter() }, - GetOrDelDataArgType::Delete | GetOrDelDataArgType::All => Self::Other { - ty: ty.clone(), - itercnt: 0, - len: itemcnt, - }, - GetOrDelDataArgType::PartialOne { idx } => Self::PartialOne { - idx: *idx, - itercnt: 0, - }, - } - } -} - -impl<'a> Iterator for WantIdxIter<'a> { - type Item = DataItemIdx; - fn next(&mut self) -> Option { - match self { - WantIdxIter::PartialMany { iter, .. } => iter.next().map(|v| *v as DataItemIdx), - WantIdxIter::PartialOne { idx, itercnt } => { - if *itercnt == 0 { - Some(*idx) - } else { - None - } - } - WantIdxIter::Other { ty, itercnt, len } => match ty { - GetOrDelDataArgType::Delete | GetOrDelDataArgType::All => { - if itercnt == len { - None - } else { - let ret = *itercnt; - *itercnt += 1; - Some(ret) - } - } - GetOrDelDataArgType::PartialMany { .. } - | GetOrDelDataArgType::PartialOne { .. } => { - panic!("PartialMany should be handled by iter") - } - }, - } - } -} - -pub struct SharedMemHolder { - data: Arc>, -} - -impl SharedMemHolder { - pub fn try_take_data(self) -> Option> { - // SAFETY: - // 1. We're only replacing the Arc with an empty Vec - // 2. The original Arc will be dropped properly - // 3. This is safe as long as this is the only reference to the Arc - // unsafe { - // let ptr = &self.data as *const Arc> as *mut Arc>; - if Arc::strong_count(&self.data) == 1 { - Some(Arc::try_unwrap(self.data).unwrap()) - } else { - None - } - } - // } -} - -pub struct SharedMemOwnedAccess { - data: Arc>, - range: Range, -} - -impl SharedMemOwnedAccess { - pub unsafe fn as_bytes_mut(&self) -> &mut [u8] { - // SAFETY: - // 1. We have &mut self, so we have exclusive access to this data - // 2. The underlying memory is valid for the entire Arc allocation - let full_slice = unsafe { - std::slice::from_raw_parts_mut(self.data.as_ptr() as *mut u8, self.data.len()) - }; - &mut full_slice[self.range.clone()] - } -} - -pub fn new_shared_mem(splits: &Vec>) -> (SharedMemHolder, Vec) { - let len = splits.iter().map(|range| range.len()).sum(); - let data = Arc::new(vec![0; len]); - let owned_accesses = splits - .iter() - .map(|range| SharedMemOwnedAccess { - data: Arc::clone(&data), - range: range.clone(), - }) - .collect(); - tracing::debug!("new_shared_mem, total_len: {}, splits: {:?}", len, splits); - (SharedMemHolder { data }, owned_accesses) -} - -pub enum WriteSplitDataTaskGroup { - ToFile { - file_path: PathBuf, - tasks: Vec>>, - }, - ToMem { - shared_mem: SharedMemHolder, - tasks: Vec>>, - }, -} - -impl WriteSplitDataTaskGroup { - pub async fn new( - unique_id: Vec, - splits: Vec>, - mut rx: tokio::sync::mpsc::Receiver>, - cachemode: CacheModeVisitor, - ) -> WSResult { - tracing::debug!( - "new merge task group for uid({:?}), cachemode({})", - unique_id, - cachemode.0 - ); - if cachemode.is_map_file() { - tracing::debug!("cachemode is map_file"); - // base64 - // let file_path = PathBuf::from(format!("{:?}.data", unique_id)); - let file_path = PathBuf::from(format!( - "{}.data", - base64::engine::general_purpose::STANDARD.encode(&unique_id) - )); - - let file = std::fs::OpenOptions::new() - .create(true) - .write(true) - .open(&file_path)?; - let file = std::sync::Arc::new(file); - - let mut tasks = vec![]; - for _ in 0..splits.len() { - let parital_data = rx.recv().await.unwrap(); - match parital_data { - Err(e) => { - return Err(e); - } - Ok((splitidx, split_data_item)) => { - let file = file.clone(); - let unique_id = unique_id.clone(); - let split_range = splits[splitidx as usize].clone(); - - let task = tokio::task::spawn_blocking(move || { - let Some(split_data_bytes) = split_data_item.as_raw_bytes() else { - return Err(WsDataError::SplitDataItemNotRawBytes { - unique_id: unique_id.clone(), - splitidx, - } - .into()); - }; - - if split_range.len() != split_data_bytes.len() { - return Err(WsDataError::SplitLenMismatch { - unique_id, - splitidx, - expect: split_range.len(), - actual: split_data_bytes.len(), - } - .into()); - } - // SAFETY: Each task writes to a different non-overlapping portion of the file - use std::os::unix::fs::FileExt; - if let Err(e) = - file.write_at(split_data_bytes, split_range.start as u64) - { - return Err(WSError::WsIoErr(WsIoErr::Io(e))); - } - Ok(()) - }); - tasks.push(task); - } - } - } - Ok(Self::ToFile { file_path, tasks }) - } else if cachemode.is_map_common_kv() { - tracing::debug!("cachemode is map_common_kv"); - let (shared_mem, owned_accesses) = new_shared_mem(&splits); - let mut owned_accesses = owned_accesses - .into_iter() - .map(|access| Some(access)) - .collect::>(); - let mut tasks = vec![]; - for _ in 0..splits.len() { - let parital_data = rx.recv().await.unwrap(); - match parital_data { - Err(e) => { - return Err(e); - } - Ok((splitidx, split_data_item)) => { - let owned_access = owned_accesses[splitidx].take().unwrap(); - let unique_id = unique_id.clone(); - let task = tokio::spawn(async move { - // write to shared memory - let access = unsafe { owned_access.as_bytes_mut() }; - let Some(split_data_item) = split_data_item.as_raw_bytes() else { - return Err(WsDataError::SplitDataItemNotRawBytes { - unique_id: unique_id.clone(), - splitidx, - } - .into()); - }; - if access.len() != split_data_item.len() { - return Err(WsDataError::SplitLenMismatch { - unique_id: unique_id.clone(), - splitidx, - expect: access.len(), - actual: split_data_item.len(), - } - .into()); - } - access.copy_from_slice(split_data_item); - Ok(()) - }); - tasks.push(task); - } - } - } - Ok(Self::ToMem { shared_mem, tasks }) - } else { - panic!("cachemode should be map_file or map_mem"); - } - } - - pub async fn join(self) -> WSResult { - match self { - WriteSplitDataTaskGroup::ToFile { file_path, tasks } => { - let taskress = join_all(tasks).await; - for res in taskress { - if res.is_err() { - return Err(WSError::from(WsRuntimeErr::TokioJoin { - err: res.unwrap_err(), - context: "write split data to file".to_owned(), - })); - } - if res.as_ref().unwrap().is_err() { - return Err(res.unwrap().unwrap_err()); - } - } - Ok(proto::DataItem::new_file_data(file_path, false)) - } - WriteSplitDataTaskGroup::ToMem { - shared_mem: shared_mems, - tasks, - } => { - let taskress = join_all(tasks).await; - for res in taskress { - if res.is_err() { - return Err(WSError::from(WsRuntimeErr::TokioJoin { - err: res.unwrap_err(), - context: "write split data to file".to_owned(), - })); - } - if res.as_ref().unwrap().is_err() { - return Err(res.unwrap().unwrap_err()); - } - } - // convert to dataitem - Ok(proto::DataItem::new_raw_bytes( - shared_mems - .try_take_data() - .expect("shared_mems should be take when all partial task stoped"), - )) - } - } - } -} - -// pub async fn read_splitdata_from_nodes_to_file<'a>( -// ty: &GetOrDelDataArgType, -// unique_id: &[u8], -// view: &DataGeneralView, -// meta: &DataSetMetaV2, -// each_node_data: HashMap, -// ) ->ReadSplitDataTask{ -// // prepare file with meta size -// let file_path = format!("{}.data", unique_id); -// let file = File::create(file_path)?; - -// // parallel read and write to position of file with pwrite -// let mut tasks = vec![]; -// // get idxs, one idx one file - -// for (node_id, req) in each_node_data { -// let view = view.clone(); -// let task = tokio::spawn(async move { -// let res = view -// .data_general() -// .rpc_call_get_data -// .call(view.p2p(), node_id, req, Some(Duration::from_secs(30))) -// .await; -// match res { -// Err(err) => { -// tracing::warn!("get/delete data failed {}", err); -// vec![] -// } -// Ok(res) => { -// res. -// // get offset and size by meta with got - -// vec![] -// }, -// } -// }); -// tasks.push(task); -// } -// Ok(HashMap::new()) -// } - -// pub async fn read_splitdata_from_nodes_to_mem<'a>( -// ty: &GetOrDelDataArgType, -// unique_id: &[u8], -// view: &DataGeneralView, -// meta: &DataSetMetaV2, -// each_node_data: HashMap, -// ) -> ReadSplitDataTask { -// // read to mem -// let mut tasks = vec![]; -// for (node_id, req) in each_node_data { -// let view = view.clone(); -// let task = tokio::spawn(async move { -// let req_idxs = req.idxs.clone(); -// tracing::debug!("rpc_call_get_data start, remote({})", node_id); -// let res = view -// .data_general() -// .rpc_call_get_data -// .call(view.p2p(), node_id, req, Some(Duration::from_secs(30))) -// .await; -// tracing::debug!("rpc_call_get_data returned, remote({})", node_id); -// let res: WSResult> = res.map(|response| { -// if !response.success { -// tracing::warn!("get/delete data failed {}", response.message); -// vec![] -// } else { -// req_idxs.into_iter().zip(response.data).collect() -// } -// }); -// (node_id, res) -// }); -// tasks.push(task); -// } - -// let mut node_partialdatas: HashMap<(NodeID, DataItemIdx), proto::DataItem> = HashMap::new(); -// for tasks in tasks { -// let (node_id, partdata) = tasks.await.map_err(|err| { -// WSError::from(WsRuntimeErr::TokioJoin { -// err, -// context: "get_or_del_data - get_or_del ing remote data".to_owned(), -// }) -// })?; - -// match partdata { -// Err(err) => { -// return Err(err); -// } -// Ok(partdata) => { -// for (idx, data_item) in partdata { -// let _ = node_partialdatas.insert((node_id, idx as u8), data_item); -// } -// } -// } -// } - -// let mut idx_2_data_item: HashMap = HashMap::new(); -// for idx in WantIdxIter::new(&ty) { -// let data_split = &meta.datas_splits[idx as usize]; -// let data_item = data_split.recorver_data(unique_id, idx, &mut node_partialdatas)?; - -// idx_2_data_item -// .insert(idx, proto::DataItem::new_raw_bytes(data_item)) -// .expect("dataitem should be unique with idx"); -// } - -// Ok(idx_2_data_item) -// } diff --git a/src/main/src/general/data/mod.rs b/src/main/src/general/data/mod.rs deleted file mode 100644 index b88a7a9..0000000 --- a/src/main/src/general/data/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -pub mod kv_interface; -pub mod m_data_general; -pub mod m_dist_lock; -pub mod m_kv_store_engine; diff --git a/src/main/src/general/data/kv_interface.rs b/src/main/src/general/kv_interface.rs similarity index 96% rename from src/main/src/general/data/kv_interface.rs rename to src/main/src/general/kv_interface.rs index 011a12e..2667dcd 100644 --- a/src/main/src/general/data/kv_interface.rs +++ b/src/main/src/general/kv_interface.rs @@ -1,10 +1,11 @@ use crate::{ - general::network::proto, result::WSResult, sys::{LogicalModule, NodeID}, }; use async_trait::async_trait; +use super::network::proto; + pub struct KvOptions { spec_node: Option, } diff --git a/src/main/src/general/app/fn_event.rs b/src/main/src/general/m_appmeta_manager/fn_event.rs similarity index 100% rename from src/main/src/general/app/fn_event.rs rename to src/main/src/general/m_appmeta_manager/fn_event.rs diff --git a/src/main/src/general/app/http.rs b/src/main/src/general/m_appmeta_manager/http.rs similarity index 97% rename from src/main/src/general/app/http.rs rename to src/main/src/general/m_appmeta_manager/http.rs index aab77e3..49bfbc0 100644 --- a/src/main/src/general/app/http.rs +++ b/src/main/src/general/m_appmeta_manager/http.rs @@ -40,9 +40,7 @@ pub(super) fn binds(router: Router, view: super::View) -> Router { } async fn call_app_fn(Path((app, func)): Path<(String, String)>, body: String) -> Response { - tracing::debug!("handle func request app: {}, func: {}", app, func); if view().p2p().nodes_config.this.1.is_master() { - tracing::debug!("app: {:?}, func: {:?}", app, func); view() .http_handler() .handle_request(&format!("{app}/{func}"), body) diff --git a/src/main/src/general/app/mod.rs b/src/main/src/general/m_appmeta_manager/mod.rs similarity index 88% rename from src/main/src/general/app/mod.rs rename to src/main/src/general/m_appmeta_manager/mod.rs index 58dd793..79cd346 100644 --- a/src/main/src/general/app/mod.rs +++ b/src/main/src/general/m_appmeta_manager/mod.rs @@ -3,25 +3,18 @@ mod http; mod v_os; use self::v_os::AppMetaVisitOs; +use super::{ + m_data_general::{DataGeneral, DATA_UID_PREFIX_APP_META}, + m_kv_store_engine::{KeyTypeServiceList, KvAdditionalConf, KvStoreEngine}, + m_os::OperatingSystem, + network::{http_handler::HttpHandler, m_p2p::P2PModule}, +}; use crate::{general::network::proto, result::WSResultExt, worker::m_executor::Executor}; -use crate::{general::network::proto_ext::ProtoExtDataItem, util::VecExt}; use crate::{ general::{ - data::{ - kv_interface::KvOps, - m_data_general::{DataGeneral, DATA_UID_PREFIX_APP_META}, - m_kv_store_engine::{KeyTypeServiceList, KvAdditionalConf, KvStoreEngine}, - }, - m_os::OperatingSystem, - network::{ - http_handler::HttpHandler, - m_p2p::P2PModule, - proto::{data_schedule_context::OpeRole, DataOpeRoleUploadApp}, - }, + kv_interface::KvOps, + network::proto::{data_schedule_context::OpeRole, DataOpeRoleUploadApp}, }, - result::{WSError, WsDataError}, -}; -use crate::{ logical_module_view_impl, master::m_master::Master, result::{ErrCvt, WSResult, WsFuncError}, @@ -29,7 +22,6 @@ use crate::{ util::{self, JoinHandleWrapper}, worker::func::m_instance_manager::InstanceManager, }; - use async_trait::async_trait; use axum::body::Bytes; use enum_as_inner::EnumAsInner; @@ -45,8 +37,6 @@ use tokio::sync::RwLock; use ws_derive::LogicalModule; -use super::data::m_data_general::{GetOrDelDataArg, GetOrDelDataArgType}; - logical_module_view_impl!(View); logical_module_view_impl!(View, os, OperatingSystem); logical_module_view_impl!(View, kv_store_engine, KvStoreEngine); @@ -259,7 +249,7 @@ impl AppMeta { // } pub struct AppMetas { - tmp_app_metas: HashMap, + app_metas: HashMap, pattern_2_app_fn: HashMap>, } @@ -539,7 +529,7 @@ fn view() -> &'static View { #[derive(LogicalModule)] pub struct AppMetaManager { - meta: RwLock, + pub meta: RwLock, pub fs_layer: AppMetaVisitOs, view: View, // app_meta_list_lock: Mutex<()>, @@ -562,7 +552,7 @@ impl LogicalModule for AppMetaManager { let fs_layer = AppMetaVisitOs::new(view.clone()); Self { meta: RwLock::new(AppMetas { - tmp_app_metas: HashMap::new(), + app_metas: HashMap::new(), pattern_2_app_fn: HashMap::new(), }), view, @@ -599,9 +589,35 @@ impl AppMetas { // } // pub async fn set_tmp_appmeta(&self, ) fn get_tmp_app_meta(&self, app: &str) -> Option { - self.tmp_app_metas.get(app).cloned() + self.app_metas.get(app).cloned() } + pub async fn get_app_meta(&self, app: &str) -> Option { + if let Some(res) = self.get_tmp_app_meta(app) { + return Some(res); + } + + // self.app_metas.get(app) + let meta = view() + .data_general() + .get_data_item(format!("{}{}", DATA_UID_PREFIX_APP_META, app).as_bytes(), 0) + .await; + let Some(proto::DataItem { + data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes(metabytes)), + }) = meta + else { + return None; + }; + let meta = bincode::deserialize_from::<_, AppMeta>(Cursor::new(metabytes)); + let meta = match meta { + Err(e) => { + tracing::warn!("meta decode failed {:?}", e); + return None; + } + Ok(meta) => meta, + }; + Some(meta) + } pub fn get_pattern_triggers( &self, pattern: impl Borrow, @@ -671,7 +687,7 @@ impl AppMetas { // } // } // } - let _ = self.tmp_app_metas.insert(app_name, meta); + let _ = self.app_metas.insert(app_name, meta); } Ok(()) } @@ -690,7 +706,7 @@ impl AppMetaManager { .meta .write() .await - .tmp_app_metas + .app_metas .insert(tmpapp.to_owned(), appmeta.clone()); tracing::debug!("record app meta to make checkpoint {}", tmpapp); self.view @@ -707,7 +723,7 @@ impl AppMetaManager { .meta .write() .await - .tmp_app_metas + .app_metas .remove(tmpapp) .unwrap_or_else(|| { panic!("remove app meta failed, app: {}", tmpapp); @@ -717,91 +733,13 @@ impl AppMetaManager { Ok(appmeta) } pub async fn app_available(&self, app: &str) -> WSResult { - match self + Ok(self .view .data_general() - .get_or_del_datameta_from_master( - format!("{}{}", DATA_UID_PREFIX_APP_META, app).as_bytes(), - false, - ) + .get_data_item(format!("{}{}", DATA_UID_PREFIX_APP_META, app).as_bytes(), 0) .await - { - Err(err) => match err { - WSError::WsDataError(WsDataError::DataSetNotFound { uniqueid }) => { - tracing::debug!( - "app meta not found, app: {}", - std::str::from_utf8(&*uniqueid).unwrap() - ); - Ok(false) - } - _ => Err(err), - }, - Ok(_) => Ok(true), - } - } - - // call inner AppMetas.get_app_meta - pub async fn get_app_meta(&self, app: &str) -> WSResult> { - if let Some(res) = self.meta.read().await.get_tmp_app_meta(app) { - return Ok(Some(res)); - } - - // self.app_metas.get(app) - let meta = view() - .data_general() - .get_or_del_data(GetOrDelDataArg { - meta: None, - unique_id: format!("{}{}", DATA_UID_PREFIX_APP_META, app).into(), - ty: GetOrDelDataArgType::PartialOne { idx: 0 }, - }) - .await; - - // only one data item - let (_, meta): (_, proto::DataItem) = match meta { - Ok((_, datas)) => datas.into_iter().next().unwrap(), - Err(err) => match err { - WSError::WsDataError(WsDataError::DataSetNotFound { uniqueid }) => { - tracing::debug!( - "get_app_meta not exist, uniqueid: {:?}", - std::str::from_utf8(&*uniqueid) - ); - return Ok(None); - } - _ => { - tracing::warn!("get_app_meta failed with err {:?}", err); - return Err(err); - } - }, - }; - - let proto::DataItem { - data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes(metabytes)), - } = meta - else { - return Err(WsFuncError::InvalidAppMetaDataItem { - app: app.to_owned(), - } - .into()); - }; - - let meta = bincode::deserialize_from::<_, AppMeta>(Cursor::new(&metabytes)); - let meta = match meta { - Err(e) => { - tracing::warn!( - "meta decode failed with data:{:?}, err:{:?}", - metabytes.limit_range_debug(0..100), - e - ); - return Err(WsFuncError::InvalidAppMetaDataItem { - app: app.to_owned(), - } - .into()); - } - Ok(meta) => meta, - }; - Ok(Some(meta)) + .is_some()) } - pub async fn app_uploaded(&self, appname: String, data: Bytes) -> WSResult<()> { // 1. tmpapp name & dir // TODO: fobidden tmpapp public access @@ -813,7 +751,7 @@ impl AppMetaManager { // 2. unzip app pack let tmpappdir2 = tmpappdir.clone(); // remove old dir&app - if let Some(_) = self.meta.write().await.tmp_app_metas.remove(&tmpapp) { + if let Some(_) = self.meta.write().await.app_metas.remove(&tmpapp) { tracing::debug!("remove old app meta {}", tmpapp); } let ins = self.view.instance_manager().app_instances.remove(&tmpapp); @@ -877,36 +815,27 @@ impl AppMetaManager { // let _ = fs::remove_dir_all(&tmpappdir).map_err(|e| WSError::from(WsIoErr::Io(e)))?; // 3. broadcast meta and appfile - let write_data_id = format!("{}{}", DATA_UID_PREFIX_APP_META, appname); - let write_datas = vec![ - proto::DataItem { - data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes( - bincode::serialize(&appmeta).unwrap(), - )), - }, - proto::DataItem { - data_item_dispatch: Some(proto::data_item::DataItemDispatch::File( - proto::FileData { - file_name_opt: format!("apps/{}", appname), - is_dir_opt: true, - file_content: zipfiledata, - }, - )), - }, - ]; - tracing::debug!( - "2broadcast meta and appfile, datasetid: {}, datas: {:?}", - write_data_id, - write_datas - .iter() - .map(|v| v.to_string()) - .collect::>() - ); + tracing::debug!("broadcast meta and appfile"); self.view .data_general() .write_data( - write_data_id, - write_datas, + format!("{}{}", DATA_UID_PREFIX_APP_META, appname), + vec![ + proto::DataItem { + data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes( + bincode::serialize(&appmeta).unwrap(), + )), + }, + proto::DataItem { + data_item_dispatch: Some(proto::data_item::DataItemDispatch::File( + proto::FileData { + file_name_opt: format!("apps/{}", appname), + is_dir_opt: true, + file_content: zipfiledata, + }, + )), + }, + ], // vec![ // DataMeta { // cache: DataModeCache::AlwaysInMem as i32, diff --git a/src/main/src/general/app/v_os.rs b/src/main/src/general/m_appmeta_manager/v_os.rs similarity index 100% rename from src/main/src/general/app/v_os.rs rename to src/main/src/general/m_appmeta_manager/v_os.rs diff --git a/src/main/src/general/data/m_data_general/mod.rs b/src/main/src/general/m_data_general.rs similarity index 62% rename from src/main/src/general/data/m_data_general/mod.rs rename to src/main/src/general/m_data_general.rs index 3e6d1a9..438309e 100644 --- a/src/main/src/general/data/m_data_general/mod.rs +++ b/src/main/src/general/m_data_general.rs @@ -1,9 +1,5 @@ -mod dataitem; - -use crate::general::data::m_data_general::dataitem::WantIdxIter; -use crate::general::data::m_data_general::dataitem::WriteSplitDataTaskGroup; -use crate::general::{ - data::m_kv_store_engine::{ +use super::{ + m_kv_store_engine::{ KeyTypeDataSetItem, KeyTypeDataSetMeta, KvAdditionalConf, KvStoreEngine, KvVersion, }, m_os::OperatingSystem, @@ -18,7 +14,7 @@ use crate::general::{ }; use crate::{ general::{ - data::m_kv_store_engine::{KeyLockGuard, KeyType}, + m_kv_store_engine::{KeyLockGuard, KeyType}, network::{msg_pack::MsgPack, proto_ext::DataItemExt}, }, logical_module_view_impl, @@ -30,12 +26,11 @@ use crate::{result::WsDataError, sys::LogicalModulesRef}; use async_trait::async_trait; use camelpaste::paste; use core::str; -use enum_as_inner::EnumAsInner; +use prost::{bytes, Message}; use serde::{Deserialize, Serialize}; -use std::ops::Range; use std::{ - collections::{BTreeSet, HashMap, HashSet}, + collections::{HashMap, HashSet}, sync::Arc, time::Duration, }; @@ -51,7 +46,6 @@ logical_module_view_impl!(DataGeneralView, kv_store_engine, KvStoreEngine); logical_module_view_impl!(DataGeneralView, os, OperatingSystem); pub type DataVersion = u64; -pub type DataItemIdx = u8; pub const DATA_UID_PREFIX_APP_META: &str = "app"; pub const DATA_UID_PREFIX_FN_KV: &str = "fkv"; @@ -86,11 +80,6 @@ pub fn new_data_unique_id_fn_kv(key: &[u8]) -> Vec { pub struct DataGeneral { view: DataGeneralView, - // // unique_id,idx -> file_path - // auto_cache: moka::sync::Cache<(String, u8), (DataVersion, proto::DataItem)>, - - // // unique_id,idx -> serialized value - // forever_cache: dashmap::DashMap<(String, u8), (DataVersion, proto::DataItem)>, pub rpc_call_data_version_schedule: RPCCaller, rpc_call_write_once_data: RPCCaller, rpc_call_get_data_meta: RPCCaller, @@ -111,8 +100,6 @@ impl LogicalModule for DataGeneral { Self { view: DataGeneralView::new(args.logical_modules_ref.clone()), - // auto_cache: moka::sync::Cache::new(100), - // forever_cache: dashmap::DashMap::new(), rpc_call_data_version_schedule: RPCCaller::new(), rpc_call_write_once_data: RPCCaller::new(), rpc_call_get_data_meta: RPCCaller::new(), @@ -197,7 +184,7 @@ impl DataGeneralView { ) { struct Defer { node: NodeID, - } + }; impl Drop for Defer { fn drop(&mut self) { tracing::debug!("rpc_handle_data_meta_update return at node({})", self.node); @@ -411,7 +398,6 @@ impl DataGeneralView { // Step1: verify version // take old meta - #[allow(unused_assignments)] let mut required_meta: Option<(usize, DataSetMetaV2)> = None; { let keybytes: Vec = KeyTypeDataSetMeta(&req.unique_id).make_key(); @@ -637,35 +623,8 @@ impl DataGeneralView { // File(PathBuf), // } -pub enum DataUidMeta { - Meta { - unique_id: Vec, - meta: DataSetMetaV2, - }, - UniqueId(Vec), -} - -#[derive(EnumAsInner, Clone)] -pub enum GetOrDelDataArgType { - Delete, - All, - PartialOne { - // partial can't be deleted - idx: DataItemIdx, - }, - PartialMany { - idxs: BTreeSet, - }, -} - -pub struct GetOrDelDataArg { - pub meta: Option, - pub unique_id: Vec, - pub ty: GetOrDelDataArgType, -} - impl DataGeneral { - pub async fn get_or_del_datameta_from_master( + async fn get_or_del_datameta_from_master( &self, unique_id: &[u8], delete: bool, @@ -702,401 +661,139 @@ impl DataGeneral { }) } - // should return real dataitem, rather than split dataitem - pub async fn get_or_del_data( + async fn get_data_by_meta( &self, - GetOrDelDataArg { - meta, - unique_id, - ty, - }: GetOrDelDataArg, - ) -> WSResult<(DataSetMetaV2, HashMap)> { - // get meta from master - let meta = if let Some(meta) = meta { - meta - } else { - self.get_or_del_datameta_from_master(&unique_id, false) - .await? - }; - - tracing::debug!("get_or_del_data uid: {:?},meta: {:?}", unique_id, meta); - - // basical verify - for idx in 0..meta.data_item_cnt() { - let idx = idx as DataItemIdx; - let check_cache_map = |meta: &DataSetMetaV2| -> WSResult<()> { - if !meta.cache_mode_visitor(idx).is_map_common_kv() - && !meta.cache_mode_visitor(idx).is_map_file() - { - return Err(WsDataError::UnknownCacheMapMode { - mode: meta.cache_mode_visitor(idx).0, - } - .into()); - } - Ok(()) - }; - // not proper desig, skip - // https://fvd360f8oos.feishu.cn/wiki/DYAHw4oPLiZ5NYkTG56cFtJdnKg#share-Div9dUq11oGFOBxJO9ic3RtnnSf - // fn check_cache_pos(meta: &DataSetMetaV2) -> WSResult<()> { - // if !meta.cache_mode_visitor().is_pos_allnode() - // && !meta.cache_mode_visitor().is_pos_auto() - // && !meta.cache_mode_visitor().is_pos_specnode() - // { - // return Err(WsDataError::UnknownCachePosMode { - // mode: meta.cache_mode_visitor().0, - // } - // .into()); - // } - // if meta.cache_mode_visitor().is_pos_specnode() { - // // check this node is in the spec node list - // panic!("TODO: check this node is in the spec node list"); - // } - // Ok(()) - // } - let check_cache_time = |meta: &DataSetMetaV2| -> WSResult<()> { - if !meta.cache_mode_visitor(idx).is_time_auto() - && !meta.cache_mode_visitor(idx).is_time_forever() - { - return Err(WsDataError::UnknownCacheTimeMode { - mode: meta.cache_mode_visitor(idx).0, - } - .into()); - } - Ok(()) - }; - check_cache_map(&meta)?; - // not proper desig, skip - // check_cache_pos(&meta)?; - check_cache_time(&meta)?; - } - - // verify idx range & get whether to delete - let delete = match &ty { - GetOrDelDataArgType::Delete => true, - GetOrDelDataArgType::All => false, - GetOrDelDataArgType::PartialOne { idx } => { - if *idx as usize >= meta.data_item_cnt() { - return Err(WsDataError::ItemIdxOutOfRange { - wanted: *idx, - len: meta.data_item_cnt() as u8, - } - .into()); - } - false - } - GetOrDelDataArgType::PartialMany { idxs } => { - let Some(biggest_idx) = idxs.iter().rev().next() else { - return Err(WsDataError::ItemIdxEmpty.into()); - }; - if *biggest_idx >= meta.data_item_cnt() as u8 { - return Err(WsDataError::ItemIdxOutOfRange { - wanted: *biggest_idx, - len: meta.data_item_cnt() as u8, - } - .into()); - } - false - } - }; - - // // TODO 读取数据的时候先看看缓存有没有,如果没有再读数据源,如果有从缓存里面拿,需要校验 version - // if !delete { - // let mut cached_items = HashMap::new(); - // // 遍历需要获取的索引 - // for idx in WantIdxIter::new(&ty) { - // let cache_key = (unique_id.clone(), idx); - // // 根据缓存模式选择缓存源 - // let cached = if meta.cache_mode_visitor(idx).is_time_auto() { - // self.auto_cache.get(&cache_key) - // } else if meta.cache_mode_visitor(idx).is_time_forever() { - // self.forever_cache.get(&cache_key) - // } else { - // None - // }; - // let Some(cached) = cached else { - // }; - // // 从缓存中获取数据 - // let cached_value = cache_list.get(&cache_key); - // // 如果找到缓存且版本匹配 - // if let Some((cached_version, cached_item)) = cached_value { - // if cached_version == meta.version { - // cached_items.insert(idx, cached_item.clone()); - // tracing::debug!("Cache hit for idx: {}, version: {}", idx, cached_version); - // } else { - // // 如果缓存版本不匹配,从缓存中删除掉 - // cache_list.remove(&cache_key); - // tracing::debug!( - // "Cache version mismatch for idx: {}, cached: {}, current: {}", - // idx, - // cached_version, - // meta.version - // ); - // } - // } - // } - // // 如果所有请求的数据都在缓存中找到,直接返回 - // if matches!(ty, GetOrDelDataArgType::All) - // && cached_items.len() == meta.datas_splits.len() - // || matches!(ty, GetOrDelDataArgType::PartialOne { .. }) && cached_items.len() == 1 - // || matches!(ty, GetOrDelDataArgType::PartialMany { idxs }) - // && cached_items.len() == idxs.len() - // { - // tracing::debug!("All requested data found in cache, returning early"); - // return Ok((meta, cached_items)); - // } - // } - // TODO 如果缓存里只有一部分或者没有,则需要从数据源读取,并且要在数据源读取时判断是不是已经在缓存里找到了 - - let mut cache: Vec = Vec::new(); - for _ in 0..meta.data_item_cnt() { - match &ty { - GetOrDelDataArgType::Delete => { - cache.push(false); - } - GetOrDelDataArgType::All - | GetOrDelDataArgType::PartialOne { .. } - | GetOrDelDataArgType::PartialMany { .. } => { - cache.push(true); - } - } - } - - // Step2: get/delete data on each node - // nodeid -> (getdata_req, splitidx) - let mut each_node_getdata: HashMap)> = - HashMap::new(); - let mut each_item_idx_receive_worker_tx_rx_splits: HashMap< - u8, - ( - tokio::sync::mpsc::Sender>, - tokio::sync::mpsc::Receiver>, - Vec>, // split ranges - ), - > = HashMap::new(); - - for idx in WantIdxIter::new(&ty, meta.data_item_cnt() as DataItemIdx) { - tracing::debug!("prepare get data slices request with idx:{}", idx); - let data_splits = &meta.datas_splits[idx as usize]; - for (splitidx, split) in data_splits.splits.iter().enumerate() { - let _ = each_node_getdata + unique_id: &[u8], + meta: DataSetMetaV2, + delete: bool, + ) -> WSResult<(DataSetMetaV2, HashMap<(NodeID, usize), proto::DataItem>)> { + let view = &self.view; + // Step2: delete data on each node + let mut each_node_data: HashMap = HashMap::new(); + for (idx, data_splits) in meta.datas_splits.iter().enumerate() { + for split in &data_splits.splits { + let _ = each_node_data .entry(split.node_id) - .and_modify(|(req, splitidxs)| { - req.idxs.push(idx as u32); - splitidxs.push(splitidx); + .and_modify(|old| { + old.idxs.push(idx as u32); }) - .or_insert(( - proto::GetOneDataRequest { - unique_id: unique_id.to_owned(), - idxs: vec![idx as u32], - delete, - return_data: true, - }, - vec![splitidx], - )); + .or_insert(proto::GetOneDataRequest { + unique_id: unique_id.to_owned(), + idxs: vec![idx as u32], + delete, + return_data: true, + }); } - let (tx, rx) = - tokio::sync::mpsc::channel::>(3); - let _ = each_item_idx_receive_worker_tx_rx_splits.insert( - idx, - ( - tx, - rx, - data_splits - .splits - .iter() - .map(|split| { - split.data_offset as usize - ..split.data_offset as usize + split.data_size as usize - }) - .collect::>(), - ), - ); } - // this part is a little complex - // 1. all the splits will be read parallelly - // 2. for one dataitem (unique by idx), we want one worker to wait for ready dataitem(split) - - // 1. read tasks - - for (node_id, (req, splitidxs)) in each_node_getdata { - let view = self.view.clone(); - // let req_idxs = req.idxs.clone(); - // let idx_2_sender_to_recv_worker = each_item_idx_receive_worker_tx_rx_splitcnt.clone(); - let idx_of_idx_and_sender_to_recv_worker = req - .idxs - .iter() - .enumerate() - .map(|(idx_of_idx, reqidx)| { - let tx_rx_splits = each_item_idx_receive_worker_tx_rx_splits - .get(&(*reqidx as DataItemIdx)) - .unwrap(); - (idx_of_idx, tx_rx_splits.0.clone()) - }) - .collect::>(); - let unique_id = unique_id.clone(); - let _task = tokio::spawn(async move { + let mut tasks = vec![]; + for (node_id, req) in each_node_data { + let view = view.clone(); + let task = tokio::spawn(async move { + let req_idxs = req.idxs.clone(); tracing::debug!("rpc_call_get_data start, remote({})", node_id); - let mut res = view + let res = view .data_general() .rpc_call_get_data .call(view.p2p(), node_id, req, Some(Duration::from_secs(30))) .await; tracing::debug!("rpc_call_get_data returned, remote({})", node_id); - - // result will contain multiple splits of dataitems - // so we need to send the result to the corresponding tx - - if res.is_err() { - let e = Arc::new(res.err().unwrap()); - for (_idx_of_idx, tx) in idx_of_idx_and_sender_to_recv_worker { - tracing::warn!("send to data merge tasks failed: {:?}", e); - tx.send(Err(WSError::ArcWrapper(e.clone()))) - .await - .expect("send to data merge tasks failed"); - } - } else { - for (idx_of_idx, tx) in idx_of_idx_and_sender_to_recv_worker { - let res = res.as_mut().unwrap(); - if !res.success { - tx.send(Err(WsDataError::GetDataFailed { - unique_id: unique_id.clone(), - msg: std::mem::take(&mut res.message), - } - .into())) - .await - .expect("send to data merge tasks failed"); - } else { - let _ = tx - .send(Ok(( - splitidxs[idx_of_idx], - std::mem::take(&mut res.data[idx_of_idx]), - ))) - .await - .expect("send to data merge tasks failed"); - } + let res: WSResult> = res.map(|response| { + if !response.success { + tracing::warn!("get/delete data failed {}", response.message); + vec![] + } else { + req_idxs.into_iter().zip(response.data).collect() } - } - }); - } - - // 2. data merge tasks - let mut merge_task_group_tasks = vec![]; - for idx in WantIdxIter::new(&ty, meta.data_item_cnt() as DataItemIdx) { - let (_, rx, splits) = each_item_idx_receive_worker_tx_rx_splits - .remove(&idx) - .unwrap(); - let unique_id = unique_id.clone(); - let cache_mode = meta.cache_mode_visitor(idx); - let task = tokio::spawn(async move { - WriteSplitDataTaskGroup::new(unique_id.clone(), splits, rx, cache_mode) + }); + (node_id, res) }); - merge_task_group_tasks.push((idx, task)); + tasks.push(task); } - // 3. wait for results - let mut idx_2_data_item = HashMap::new(); - for (idx, task) in merge_task_group_tasks { - let merge_group = task.await; - match merge_group { - Err(e) => { - return Err(WsRuntimeErr::TokioJoin { - err: e, - context: format!("get data split failed, idx:{}", idx), - } - .into()); - } - Ok(merge_group) => match merge_group.await { - Err(e) => { - return Err(e); - } - Ok(res) => { - let res = res.join().await; - match res { - Err(e) => { - return Err(e); - } - Ok(res) => { - let _ = idx_2_data_item.insert(idx, res); - } - } - } - }, + let mut node_2_datas: HashMap<(NodeID, usize), proto::DataItem> = HashMap::new(); + for tasks in tasks { + let (node_id, data) = tasks.await.map_err(|err| { + WSError::from(WsRuntimeErr::TokioJoin { + err, + context: "delete_data - deleting remote data".to_owned(), + }) + })?; + for (idx, data_item) in data? { + let _ = node_2_datas.insert((node_id, idx as usize), data_item); } } - // // TODO: 将这里获取到的数据写入到缓存中 - // for (idx, data_item) in idx_2_data_item.iter() { - // // 只缓存需要缓存的数据,前面拿到过 - // if !cache[*idx as usize] { - // continue; - // } - // let cache_mode = meta.cache_mode_visitor(*idx); - // let cache_key = (unique_id.clone(), *idx); - // let cache_value = (meta.version, data_item.clone()); - // if cache_mode.is_time_forever() { - // self.forever_cache.insert(cache_key, cache_value); - // } else if cache_mode.is_time_auto() { - // self.auto_cache.insert(cache_key, cache_value); - // } - // } + Ok((meta, node_2_datas)) + } - Ok((meta, idx_2_data_item)) + pub async fn get_data( + &self, + unique_id: impl Into>, + ) -> WSResult<(DataSetMetaV2, HashMap<(NodeID, usize), proto::DataItem>)> { + let unique_id: Vec = unique_id.into(); + tracing::debug!("get_or_del_datameta_from_master start"); + // Step1: get meta + let meta: DataSetMetaV2 = self + .get_or_del_datameta_from_master(&unique_id, false) + .await + .map_err(|err| { + if let WSError::WsDataError(WsDataError::DataSetNotFound { uniqueid }) = err { + tracing::debug!("data not found, uniqueid:{:?}", uniqueid); + return WSError::WsDataError(WsDataError::DataSetNotFound { uniqueid }); + } + tracing::warn!("`get_data` failed, err:{}", err); + err + })?; + tracing::debug!("get_or_del_datameta_from_master end"); + tracing::debug!("get_data_by_meta start"); + let res = self.get_data_by_meta(&unique_id, meta, false).await; + tracing::debug!("get_data_by_meta end"); + res } - // pub async fn get_data( - // &self, - // unique_id: impl Into>, - // ) -> WSResult<(DataSetMetaV2, HashMap<(NodeID, usize), proto::DataItem>)> { - // let unique_id: Vec = unique_id.into(); - // tracing::debug!("get_or_del_datameta_from_master start"); - // // Step1: get meta - // let meta: DataSetMetaV2 = self - // .get_or_del_datameta_from_master(&unique_id, false) - // .await - // .map_err(|err| { - // if let WSError::WsDataError(WsDataError::DataSetNotFound { uniqueid }) = err { - // tracing::debug!("data not found, uniqueid:{:?}", uniqueid); - // return WSError::WsDataError(WsDataError::DataSetNotFound { uniqueid }); - // } - // tracing::warn!("`get_data` failed, err:{}", err); - // err - // })?; - // tracing::debug!("get_or_del_datameta_from_master end\n get_data_by_meta start"); - // let res = self.get_data_by_meta(GetDataArg::All{ - - // }).await; - // tracing::debug!("get_data_by_meta end"); - // res - // } + /// return (meta, data_map) + /// data_map: (node_id, idx) -> data_items + pub async fn delete_data( + &self, + unique_id: impl Into>, + ) -> WSResult<(DataSetMetaV2, HashMap<(NodeID, usize), proto::DataItem>)> { + let unique_id: Vec = unique_id.into(); - // /// return (meta, data_map) - // /// data_map: (node_id, idx) -> data_items - // pub async fn delete_data( - // &self, - // unique_id: impl Into>, - // ) -> WSResult<(DataSetMetaV2, HashMap<(NodeID, usize), proto::DataItem>)> { - // let unique_id: Vec = unique_id.into(); - - // // Step1: get meta - // let meta: DataSetMetaV2 = self - // .get_or_del_datameta_from_master(&unique_id, true) - // .await - // .map_err(|err| { - // if let WSError::WsDataError(WsDataError::DataSetNotFound { uniqueid }) = err { - // tracing::debug!("data not found, uniqueid:{:?}", uniqueid); - // return WSError::WsDataError(WsDataError::DataSetNotFound { uniqueid }); - // } - // tracing::warn!("`get_data` failed, err:{}", err); - // err - // })?; - // // .default_log_err("`delete_data`")?; - - // return self.get_data_by_meta(GetDataArg::Delete{ - // unique_id, - // }&, meta, true).await - // // - // } + // Step1: get meta + let meta: DataSetMetaV2 = self + .get_or_del_datameta_from_master(&unique_id, true) + .await + .map_err(|err| { + if let WSError::WsDataError(WsDataError::DataSetNotFound { uniqueid }) = err { + tracing::debug!("data not found, uniqueid:{:?}", uniqueid); + return WSError::WsDataError(WsDataError::DataSetNotFound { uniqueid }); + } + tracing::warn!("`get_data` failed, err:{}", err); + err + })?; + // .default_log_err("`delete_data`")?; + + self.get_data_by_meta(&unique_id, meta, true).await + // + } + + /// - check the uid from DATA_UID_PREFIX_XXX + pub async fn get_data_item(&self, unique_id: &[u8], idx: u8) -> Option { + let Some((_, itembytes)) = self.view.kv_store_engine().get( + &KeyTypeDataSetItem { + uid: unique_id, + idx: idx as u8, + }, + false, + KvAdditionalConf {}, + ) else { + return None; + }; + Some(proto::DataItem { + data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes(itembytes)), + }) + } /// The user's data write entry /// @@ -1118,8 +815,6 @@ impl DataGeneral { ) -> WSResult<()> { let p2p = self.view.p2p(); let unique_id: Vec = unique_id.into(); - tracing::debug!("write_data {:?} start", unique_id.clone()); - let log_tag = Arc::new(format!( "write_data,uid:{:?},operole:{:?}", str::from_utf8(&unique_id), @@ -1129,33 +824,28 @@ impl DataGeneral { // Step 1: need the master to do the decision // - require for the latest version for write permission // - require for the distribution and cache mode - let version_schedule_req = DataVersionScheduleRequest { - unique_id: unique_id.clone(), - version: 0, - context: context_openode_opetype_operole.map(|(ope_node, ope_type, ope_role)| { - proto::DataScheduleContext { - ope_node: ope_node as i64, - ope_type: ope_type as i32, - each_data_sz_bytes: datas - .iter() - .map(|data_item| data_item.data_sz_bytes() as u32) - .collect::>(), - ope_role: Some(ope_role), - } - }), - }; - tracing::debug!( - "{} data version schedule requesting {:?}", - log_tag, - version_schedule_req - ); + tracing::debug!("{} data version scheduling", log_tag); let version_schedule_resp = { let resp = self .rpc_call_data_version_schedule .call( self.view.p2p(), p2p.nodes_config.get_master_node(), - version_schedule_req, + DataVersionScheduleRequest { + unique_id: unique_id.clone(), + version: 0, + context: context_openode_opetype_operole.map( + |(ope_node, ope_type, ope_role)| proto::DataScheduleContext { + ope_node: ope_node as i64, + ope_type: ope_type as i32, + each_data_sz_bytes: datas + .iter() + .map(|data_item| data_item.data_sz_bytes() as u32) + .collect::>(), + ope_role: Some(ope_role), + }, + ), + }, Some(Duration::from_secs(60)), ) .await; @@ -1391,20 +1081,10 @@ pub struct DataSetMetaV2 { // unique_id: Vec, api_version: u8, pub version: u64, - pub cache_mode: Vec, - /// the data splits for each data item, the index is the data item index + pub cache_mode: u16, pub datas_splits: Vec, } -impl DataSetMetaV2 { - pub fn cache_mode_visitor(&self, idx: DataItemIdx) -> CacheModeVisitor { - CacheModeVisitor(self.cache_mode[idx as usize]) - } - pub fn data_item_cnt(&self) -> usize { - self.datas_splits.len() - } -} - pub type DataSetMeta = DataSetMetaV2; // message EachNodeSplit{ @@ -1420,74 +1100,71 @@ pub struct EachNodeSplit { pub data_size: u32, } -/// the split of one dataitem /// we need to know the split size for one data #[derive(Serialize, Deserialize, Debug)] pub struct DataSplit { pub splits: Vec, } -pub type DataSplitIdx = usize; - -// impl DataSplit { -// /// node_2_datas will be consumed partially -// pub fn recorver_data( -// &self, -// unique_id: &[u8], -// idx: DataItemIdx, -// node_2_datas: &mut HashMap<(NodeID, DataItemIdx), proto::DataItem>, -// ) -> WSResult> { -// let nodes = node_2_datas -// .iter() -// .filter(|v| v.0 .1 == idx) -// .map(|v| v.0 .0) -// .collect::>(); - -// let mut each_node_splits: HashMap)> = -// HashMap::new(); - -// for node in nodes { -// let data = node_2_datas.remove(&(node, idx)).unwrap(); -// let _ = each_node_splits.insert(node, (data, None)); -// } - -// let mut max_size = 0; -// let mut missing = vec![]; - -// // zip with split info -// // by the way, check if the split is missing -// for split in &self.splits { -// let Some(find) = each_node_splits.get_mut(&split.node_id) else { -// missing.push((*split).clone()); -// continue; -// }; -// find.1 = Some(split.clone()); -// if split.data_offset + split.data_size > max_size { -// max_size = split.data_offset + split.data_size; -// } -// } - -// if missing.len() > 0 { -// return Err(WsDataError::SplitRecoverMissing { -// unique_id: unique_id.to_owned(), -// idx, -// missing, -// } -// .into()); -// } - -// let mut recover = vec![0; max_size.try_into().unwrap()]; - -// for (_node, (data, splitmeta)) in each_node_splits { -// let splitmeta = splitmeta.unwrap(); -// let begin = splitmeta.data_offset as usize; -// let end = begin + splitmeta.data_size as usize; -// recover[begin..end].copy_from_slice(data.as_ref()); -// } - -// Ok(recover) -// } -// } +impl DataSplit { + /// node_2_datas will be consumed partially + pub fn recorver_data( + &self, + unique_id: &[u8], + idx: usize, + node_2_datas: &mut HashMap<(NodeID, usize), proto::DataItem>, + ) -> WSResult> { + let nodes = node_2_datas + .iter() + .filter(|v| v.0 .1 == idx) + .map(|v| v.0 .0) + .collect::>(); + + let mut each_node_splits: HashMap)> = + HashMap::new(); + + for node in nodes { + let data = node_2_datas.remove(&(node, idx)).unwrap(); + let _ = each_node_splits.insert(node, (data, None)); + } + + let mut max_size = 0; + let mut missing = vec![]; + + // zip with split info + // by the way, check if the split is missing + for split in &self.splits { + let Some(find) = each_node_splits.get_mut(&split.node_id) else { + missing.push((*split).clone()); + continue; + }; + find.1 = Some(split.clone()); + if split.data_offset + split.data_size > max_size { + max_size = split.data_offset + split.data_size; + } + } + + if missing.len() > 0 { + return Err(WsDataError::SplitRecoverMissing { + unique_id: unique_id.to_owned(), + idx, + missing, + } + .into()); + } + + let mut recover = vec![0; max_size.try_into().unwrap()]; + + for (_node, (data, splitmeta)) in each_node_splits { + let splitmeta = splitmeta.unwrap(); + let begin = splitmeta.data_offset as usize; + let end = begin + splitmeta.data_size as usize; + recover[begin..end].copy_from_slice(data.as_ref()); + } + + Ok(recover) + } +} impl Into for EachNodeSplit { fn into(self) -> proto::EachNodeSplit { @@ -1519,22 +1196,12 @@ macro_rules! generate_cache_mode_methods { impl CacheModeVisitor { $( pub fn [](&self) -> bool { - (self.0 & []) == - ([] & []) - } - )* - } - impl DataSetMetaBuilder { - $( - pub fn [](&mut self, idx: DataItemIdx) -> &mut Self { - self.assert_cache_mode_len(); - self.building.as_mut().unwrap().cache_mode[idx as usize] = - (self.building.as_mut().unwrap().cache_mode[idx as usize] & ![]) | - ([] & []); - self + self.0 & [] + == self.0 & [] & [] } )* } + } }; } @@ -1562,27 +1229,6 @@ fn test_cache_mode_visitor() { let cache_mode_visitor = CacheModeVisitor(CACHE_MODE_MAP_FILE_MASK); assert!(cache_mode_visitor.is_map_file()); assert!(!cache_mode_visitor.is_map_common_kv()); - - // test builder - - let meta = DataSetMetaBuilder::new() - .set_data_splits(vec![DataSplit { splits: vec![] }]) - .cache_mode_map_file(0) - .cache_mode_time_forever(0) - .build(); - assert!(meta.cache_mode_visitor(0).is_map_file()); - assert!(!meta.cache_mode_visitor(0).is_map_common_kv()); - assert!(meta.cache_mode_visitor(0).is_time_forever()); - assert!(!meta.cache_mode_visitor(0).is_time_auto()); - let meta = DataSetMetaBuilder::new() - .set_data_splits(vec![DataSplit { splits: vec![] }]) - .cache_mode_map_common_kv(0) - .cache_mode_time_forever(0) - .build(); - assert!(meta.cache_mode_visitor(0).is_map_common_kv()); - assert!(!meta.cache_mode_visitor(0).is_map_file()); - assert!(meta.cache_mode_visitor(0).is_time_forever()); - assert!(!meta.cache_mode_visitor(0).is_time_auto()); } pub struct DataSetMetaBuilder { @@ -1598,16 +1244,45 @@ impl DataSetMetaBuilder { Self { building: Some(DataSetMetaV2 { version: 0, - cache_mode: vec![], + cache_mode: 0, api_version: 2, datas_splits: vec![], }), } } - fn assert_cache_mode_len(&self) { - if self.building.as_ref().unwrap().cache_mode.len() == 0 { - panic!("please set_data_splits before set_cache_mode"); - } + pub fn cache_mode_time_forever(&mut self) -> &mut Self { + self.building.as_mut().unwrap().cache_mode &= CACHE_MODE_TIME_FOREVER_MASK; + self + } + + pub fn cache_mode_time_auto(&mut self) -> &mut Self { + self.building.as_mut().unwrap().cache_mode &= CACHE_MODE_TIME_AUTO_MASK; + self + } + + pub fn cache_mode_pos_allnode(&mut self) -> &mut Self { + self.building.as_mut().unwrap().cache_mode &= CACHE_MODE_POS_ALLNODE_MASK; + self + } + + pub fn cache_mode_pos_specnode(&mut self) -> &mut Self { + self.building.as_mut().unwrap().cache_mode &= CACHE_MODE_POS_SPECNODE_MASK; + self + } + + pub fn cache_mode_pos_auto(&mut self) -> &mut Self { + self.building.as_mut().unwrap().cache_mode &= CACHE_MODE_POS_AUTO_MASK; + self + } + + pub fn cache_mode_map_common_kv(&mut self) -> &mut Self { + self.building.as_mut().unwrap().cache_mode &= CACHE_MODE_MAP_COMMON_KV_MASK; + self + } + + pub fn cache_mode_map_file(&mut self) -> &mut Self { + self.building.as_mut().unwrap().cache_mode &= CACHE_MODE_MAP_FILE_MASK; + self } pub fn version(&mut self, version: u64) -> &mut Self { @@ -1617,9 +1292,7 @@ impl DataSetMetaBuilder { #[must_use] pub fn set_data_splits(&mut self, splits: Vec) -> &mut Self { - let building = self.building.as_mut().unwrap(); - building.datas_splits = splits; - building.cache_mode = vec![0; building.datas_splits.len()]; + self.building.as_mut().unwrap().datas_splits = splits; self } @@ -1628,25 +1301,25 @@ impl DataSetMetaBuilder { } } -// impl From for DataSetMetaV2 { -// fn from( -// DataSetMetaV1 { -// version, -// data_metas: _, -// synced_nodes: _, -// }: DataSetMetaV1, -// ) -> Self { -// DataSetMetaBuilder::new() -// .version(version) -// .cache_mode_pos_allnode() -// .build() -// // DataSetMetaV2 { -// // version, -// // data_metas, -// // synced_nodes, -// // } -// } -// } +impl From for DataSetMetaV2 { + fn from( + DataSetMetaV1 { + version, + data_metas: _, + synced_nodes: _, + }: DataSetMetaV1, + ) -> Self { + DataSetMetaBuilder::new() + .version(version) + .cache_mode_pos_allnode() + .build() + // DataSetMetaV2 { + // version, + // data_metas, + // synced_nodes, + // } + } +} #[test] fn test_option_and_vec_serialization_size() { diff --git a/src/main/src/general/data/m_dist_lock.rs b/src/main/src/general/m_dist_lock.rs similarity index 98% rename from src/main/src/general/data/m_dist_lock.rs rename to src/main/src/general/m_dist_lock.rs index 293b48e..d9ca84e 100644 --- a/src/main/src/general/data/m_dist_lock.rs +++ b/src/main/src/general/m_dist_lock.rs @@ -1,6 +1,8 @@ -use std::collections::hash_map::DefaultHasher; use std::collections::HashMap; use std::collections::HashSet; +use std::fmt; +use std::fmt::Debug; +use std::hash::DefaultHasher; use std::hash::Hash; use std::hash::Hasher; use std::sync::atomic::AtomicUsize; @@ -20,13 +22,16 @@ use parking_lot::Mutex; use rand::thread_rng; use rand::Rng; use tokio::sync::Notify; +use tokio::sync::OwnedRwLockReadGuard; +use tokio::sync::OwnedRwLockWriteGuard; use tokio::sync::RwLock; use ws_derive::LogicalModule; -use crate::general::network::{ - m_p2p::{P2PModule, RPCCaller, RPCHandler, RPCResponsor}, - proto, -}; +use super::network::m_p2p::P2PModule; +use super::network::m_p2p::RPCCaller; +use super::network::m_p2p::RPCHandler; +use super::network::m_p2p::RPCResponsor; +use super::network::proto; logical_module_view_impl!(View); logical_module_view_impl!(View, p2p, P2PModule); @@ -448,7 +453,7 @@ impl DistLock { #[cfg(test)] mod test { - use std::{time::Duration}; + use std::{sync::Arc, time::Duration}; use super::View; use crate::general::{network::proto, test_utils}; diff --git a/src/main/src/general/data/m_kv_store_engine.rs b/src/main/src/general/m_kv_store_engine.rs similarity index 96% rename from src/main/src/general/data/m_kv_store_engine.rs rename to src/main/src/general/m_kv_store_engine.rs index 8338032..532176f 100644 --- a/src/main/src/general/data/m_kv_store_engine.rs +++ b/src/main/src/general/m_kv_store_engine.rs @@ -7,7 +7,7 @@ use camelpaste::paste; use dashmap::DashMap; use enum_as_inner::EnumAsInner; -use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard}; +use parking_lot::{Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard}; use serde::Serialize; use serde::{de::DeserializeOwned, ser::SerializeTuple}; @@ -19,9 +19,8 @@ use std::sync::Arc; use std::sync::OnceLock; use std::time::Duration; -use crate::general::{ - data::m_data_general::DataSetMetaV2, m_os::OperatingSystem, network::m_p2p::P2PModule, -}; +use super::{m_data_general::DataSetMetaV1, m_os::OperatingSystem, network::m_p2p::P2PModule}; +use crate::general::m_data_general::DataSetMetaV2; use crate::{ logical_module_view_impl, @@ -463,7 +462,7 @@ pub struct KeyTypeServiceList; generate_key_struct!([KeyTypeServiceList], 3, Vec); pub struct KeyTypeDataSetMeta<'a>(pub &'a [u8]); -generate_key_struct!([KeyTypeDataSetMeta,'_], 4, DataSetMetaV2); +generate_key_struct!([KeyTypeDataSetMeta,'_], 4, DataSetMetaV2, [DataSetMetaV1]); pub struct KeyTypeDataSetItem<'a> { pub uid: &'a [u8], @@ -553,13 +552,12 @@ impl Serialize for KeyTypeDataSetItem<'_> { mod test { use crate::{ general::{ - data::{ - m_data_general::DataSetMetaBuilder, - m_kv_store_engine::{KeyTypeDataSetMeta, KvAdditionalConf}, - }, + m_data_general::{DataSetMetaBuilder, DataSetMetaV2}, + m_kv_store_engine::{KeyTypeDataSetMeta, KvAdditionalConf}, test_utils, }, result::WSResultExt, + sys::LogicalModuleNewArgs, }; use super::View; @@ -573,9 +571,9 @@ mod test { .set( KeyTypeDataSetMeta(key.as_bytes()), &DataSetMetaBuilder::new() - .cache_mode_map_common_kv(0) - .cache_mode_pos_allnode(0) - .cache_mode_time_auto(0) + .cache_mode_map_common_kv() + .cache_mode_pos_allnode() + .cache_mode_time_auto() .version(3) .build(), false, diff --git a/src/main/src/general/m_os/mod.rs b/src/main/src/general/m_os/mod.rs index 1500871..c3de2bd 100644 --- a/src/main/src/general/m_os/mod.rs +++ b/src/main/src/general/m_os/mod.rs @@ -1,7 +1,7 @@ pub mod zip; -use crate::general::{ - app::AppMetaManager, +use super::{ + m_appmeta_manager::AppMetaManager, network::{ m_p2p::{P2PModule, RPCCaller, RPCHandler, RPCResponsor}, proto::remote_sys::{ diff --git a/src/main/src/general/mod.rs b/src/main/src/general/mod.rs index 60f1427..3ce8450 100644 --- a/src/main/src/general/mod.rs +++ b/src/main/src/general/mod.rs @@ -1,8 +1,10 @@ -pub mod app; -pub mod data; +pub mod kv_interface; +pub mod m_appmeta_manager; +pub mod m_data_general; +pub mod m_dist_lock; +pub mod m_kv_store_engine; pub mod m_metric_publisher; pub mod m_os; pub mod network; - #[cfg(test)] pub mod test_utils; diff --git a/src/main/src/general/network/msg_pack.rs b/src/main/src/general/network/msg_pack.rs index 81e7d11..ef9854d 100644 --- a/src/main/src/general/network/msg_pack.rs +++ b/src/main/src/general/network/msg_pack.rs @@ -1,3 +1,4 @@ + use downcast_rs::{impl_downcast, Downcast}; use super::{ @@ -103,7 +104,9 @@ define_msg_ids!( (proto::DataVersionScheduleRequest, pack, { pack.context.is_some() }), - (proto::DataVersionScheduleResponse, _pack, { true }), + (proto::DataVersionScheduleResponse, pack, { + pack.cache_plan.is_some() + }), (proto::WriteOneDataRequest, _pack, { true }), (proto::WriteOneDataResponse, _pack, { true }), (proto::DataMetaUpdateRequest, _pack, { true }), diff --git a/src/main/src/general/network/proto_ext.rs b/src/main/src/general/network/proto_ext.rs index a6166da..4abf69a 100644 --- a/src/main/src/general/network/proto_ext.rs +++ b/src/main/src/general/network/proto_ext.rs @@ -1,37 +1,15 @@ -use crate::{general::data::m_dist_lock::DistLockOpe}; +use crate::{general::m_dist_lock::DistLockOpe, util::VecOrSlice}; -use super::proto::{self, kv::KvResponse, FileData}; +use super::proto::{self, kv::KvResponse, DataItem, FileData}; -use std::{ops::Range, path::Path}; +use std::ops::Range; pub trait ProtoExtDataItem { fn data_sz_bytes(&self) -> usize; fn clone_split_range(&self, range: Range) -> Self; - fn to_string(&self) -> String; - fn new_raw_bytes(rawbytes: impl Into>) -> Self; - fn as_raw_bytes<'a>(&'a self) -> Option<&'a [u8]>; - fn new_file_data(filepath: impl AsRef, is_dir: bool) -> Self; } impl ProtoExtDataItem for proto::DataItem { - fn new_raw_bytes(rawbytes: impl Into>) -> Self { - proto::DataItem { - data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes( - rawbytes.into(), - )), - } - } - fn new_file_data(filepath: impl AsRef, is_dir: bool) -> Self { - let file_content = std::fs::read(filepath.as_ref()).unwrap(); - Self { - data_item_dispatch: Some(proto::data_item::DataItemDispatch::File(FileData { - file_name_opt: filepath.as_ref().to_string_lossy().to_string(), - is_dir_opt: is_dir, - file_content, - })), - } - } - fn data_sz_bytes(&self) -> usize { match self.data_item_dispatch.as_ref().unwrap() { proto::data_item::DataItemDispatch::File(file_data) => file_data.file_content.len(), @@ -42,15 +20,6 @@ impl ProtoExtDataItem for proto::DataItem { } fn clone_split_range(&self, range: Range) -> Self { - // let data_length = match &self.data_item_dispatch.as_ref().unwrap() { - // proto::data_item::DataItemDispatch::File(file_data) => file_data.file_content.len(), - // proto::data_item::DataItemDispatch::RawBytes(vec) => vec.len(), - // }; - - // if range.start >= data_length || range.end > data_length { - // panic!("range out of bounds: {:?}", range); - // } - Self { data_item_dispatch: Some(match &self.data_item_dispatch.as_ref().unwrap() { proto::data_item::DataItemDispatch::File(file_data) => { @@ -66,23 +35,6 @@ impl ProtoExtDataItem for proto::DataItem { }), } } - - fn as_raw_bytes<'a>(&'a self) -> Option<&'a [u8]> { - match &self.data_item_dispatch.as_ref().unwrap() { - proto::data_item::DataItemDispatch::RawBytes(vec) => Some(vec), - _ => None, - } - } - fn to_string(&self) -> String { - match &self.data_item_dispatch.as_ref().unwrap() { - proto::data_item::DataItemDispatch::File(file_data) => { - format!("file: {}", file_data.file_name_opt.clone()) - } - proto::data_item::DataItemDispatch::RawBytes(vec) => { - format!("raw bytes: {:?}", &vec[0..vec.len().min(100)]) - } - } - } } impl AsRef<[u8]> for proto::DataItem { diff --git a/src/main/src/general/network/proto_src/data.proto b/src/main/src/general/network/proto_src/data.proto index 90310b1..8009d4d 100644 --- a/src/main/src/general/network/proto_src/data.proto +++ b/src/main/src/general/network/proto_src/data.proto @@ -67,19 +67,17 @@ message DataVersionScheduleRequest { DataScheduleContext context = 3; } -//message DataCachePlan{ -// uint32 cache_mode=1; -// // left empty when mode is all node pr no node -// repeated uint32 cache_nodes=2; -//} +message DataCachePlan{ + uint32 cache_mode=1; + // left empty when mode is all node pr no node + repeated uint32 cache_nodes=2; +} message DataVersionScheduleResponse { uint64 version = 1; // required // split of each data part - // DataCachePlan cache_plan = 2; - - repeated uint32 cache_mode=2; + DataCachePlan cache_plan = 2; repeated DataSplit split = 3; } diff --git a/src/main/src/general/network/proto_src/sche.proto b/src/main/src/general/network/proto_src/sche.proto index 402d2fb..723b804 100644 --- a/src/main/src/general/network/proto_src/sche.proto +++ b/src/main/src/general/network/proto_src/sche.proto @@ -35,8 +35,5 @@ message DistributeTaskReq{ } } -message DistributeTaskResp{ - bool success=1; - string err_msg=2; -} +message DistributeTaskResp{} diff --git a/src/main/src/general/network/rpc_model.rs b/src/main/src/general/network/rpc_model.rs index 2d9776d..f6249d2 100644 --- a/src/main/src/general/network/rpc_model.rs +++ b/src/main/src/general/network/rpc_model.rs @@ -14,7 +14,7 @@ use std::{ }; use tokio::{net::UnixListener, sync::oneshot}; -use crate::result::{WSResult, WsFuncError, WsRpcErr}; +use crate::result::{WSResult, WsRpcErr}; // start from the begining #[async_trait] @@ -71,7 +71,6 @@ pub async fn call( ) -> WSResult { // wait for connection if not connected - tracing::debug!("111111111111111111111111"); let tx = { let mut conn_map = CONN_MAP.write(); match conn_map.get_mut(&conn) { @@ -85,15 +84,11 @@ pub async fn call( } }; - tracing::debug!("22222222222222222222222222"); - // register the call back let (wait_tx, wait_rx) = oneshot::channel(); let next_task = NEXT_TASK_ID.fetch_add(1, Ordering::SeqCst); let _ = CALL_MAP.write().insert(next_task, wait_tx); - tracing::debug!("33333333333333333333333333"); - // send the request let mut buf = BytesMut::with_capacity(req.encoded_len() + 8); buf.put_i32(req.encoded_len() as i32); @@ -142,26 +137,23 @@ lazy_static! { static ref NEXT_TASK_ID: AtomicU32 = AtomicU32::new(0); } -async fn listen_task(socket: tokio::net::UnixStream) -> WSResult<()> { +async fn listen_task(socket: tokio::net::UnixStream) { tracing::debug!("new connection: {:?}", socket.peer_addr().unwrap()); let (mut sockrx, socktx) = socket.into_split(); let mut buf = [0; 1024]; let mut len = 0; - let (conn, rx) = - match listen_task_ext::verify_remote::(&mut sockrx, &mut len, &mut buf).await { - Ok((conn, rx)) => (conn, rx), - Err(err) => { - tracing::debug!("verify failed {:?}", err); - return Err(WsFuncError::InsranceVerifyFailed("verify failed".to_string()).into()); - } - }; + + let Some((conn, rx)) = + listen_task_ext::verify_remote::(&mut sockrx, &mut len, &mut buf).await + else { + tracing::debug!("verify failed"); + return; + }; listen_task_ext::spawn_send_loop(rx, socktx); listen_task_ext::read_loop::(conn, &mut sockrx, &mut len, &mut buf).await; - - Ok(()) } pub(super) mod listen_task_ext { @@ -174,10 +166,7 @@ pub(super) mod listen_task_ext { sync::mpsc::Receiver, }; - use crate::{ - general::network::rpc_model::ConnState, - result::{WSResult, WsFuncError}, - }; + use crate::general::network::rpc_model::ConnState; use super::{HashValue, RpcCustom, CALL_MAP, CONN_MAP}; @@ -185,19 +174,16 @@ pub(super) mod listen_task_ext { sockrx: &mut OwnedReadHalf, len: &mut usize, buf: &mut [u8], - ) -> WSResult<(HashValue, Receiver>)> { + ) -> Option<(HashValue, Receiver>)> { async fn verify_remote_inner( sockrx: &mut OwnedReadHalf, len: &mut usize, buf: &mut [u8], - ) -> WSResult<(HashValue, Receiver>)> { + ) -> Option<(HashValue, Receiver>)> { // println!("waiting for verify head len"); if !wait_for_len(sockrx, len, 4, buf).await { tracing::warn!("failed to read verify head len"); - return Err(WsFuncError::InsranceVerifyFailed( - "failed to read verify head len".to_string(), - ) - .into()); + return None; } let verify_msg_len = consume_i32(0, buf, len); @@ -205,43 +191,34 @@ pub(super) mod listen_task_ext { // println!("waiting for verify msg {}", verify_msg_len); if !wait_for_len(sockrx, len, verify_msg_len, buf).await { tracing::warn!("failed to read verify msg"); - return Err(WsFuncError::InsranceVerifyFailed( - "failed to read verify msg".to_string(), - ) - .into()); + return None; } // println!("wait done"); let Some(id) = R::verify(&buf[4..4 + verify_msg_len]).await else { tracing::warn!("verify failed"); - return Err(WsFuncError::InsranceVerifyFailed("verify failed".to_string()).into()); + return None; }; let (tx, rx) = tokio::sync::mpsc::channel(10); let mut write_conn_map = CONN_MAP.write(); if write_conn_map.contains_key(&id) { tracing::warn!("conflict conn id: {:?}", id); - return Err( - WsFuncError::InsranceVerifyFailed("conflict conn id".to_string()).into(), - ); + return None; } let _ = write_conn_map.insert(id.clone(), ConnState { tx }); // println!("verify success"); - Ok((id, rx)) + Some((id, rx)) } - match tokio::time::timeout( + let res = tokio::time::timeout( Duration::from_secs(5), verify_remote_inner::(sockrx, len, buf), ) .await - { - Ok(ok) => ok, - Err(_) => { - tracing::warn!("verify timeout"); - Err(WsFuncError::InsranceVerifyFailed("verify timeout".to_string()).into()) - } - } + .unwrap_or_else(|_elapse| None); + // println!("verify return"); + res } pub(super) async fn read_loop( diff --git a/src/main/src/general/test_utils.rs b/src/main/src/general/test_utils.rs index 37e8c08..a88b7c8 100644 --- a/src/main/src/general/test_utils.rs +++ b/src/main/src/general/test_utils.rs @@ -1,5 +1,6 @@ -use std::{collections::HashMap, fs}; +use std::{collections::HashMap, fs, sync::OnceLock}; +use futures::lock; use lazy_static::lazy_static; use tokio::sync::Mutex; diff --git a/src/main/src/main.rs b/src/main/src/main.rs index e3b2af3..8e81720 100644 --- a/src/main/src/main.rs +++ b/src/main/src/main.rs @@ -9,8 +9,7 @@ unused_results, clippy::let_underscore_future, clippy::let_underscore_future, - unused_must_use, - unconditional_recursion + unused_must_use )] use clap::Parser; @@ -68,12 +67,12 @@ pub fn start_tracing() { if mp.contains("wasm_serverless::worker::m_kv_user_client") { return false; } - // if mp.contains("wasm_serverless::general::m_data_general") { - // return false; - // } - // if mp.contains("wasm_serverless::master::m_data_master") { - // return false; - // } + if mp.contains("wasm_serverless::general::m_data_general") { + return false; + } + if mp.contains("wasm_serverless::master::m_data_master") { + return false; + } if mp.contains("sled::pagecache") { return false; } diff --git a/src/main/src/master/m_data_master.rs b/src/main/src/master/m_data_master.rs index 9b60388..7cabc37 100644 --- a/src/main/src/master/m_data_master.rs +++ b/src/main/src/master/m_data_master.rs @@ -1,19 +1,18 @@ use std::collections::HashSet; use std::time::Duration; -use crate::general::data::{ - m_data_general::{ - DataGeneral, DataItemIdx, DataSetMetaBuilder, DataSplit, EachNodeSplit, - }, - m_kv_store_engine::{KeyType, KeyTypeDataSetMeta, KvAdditionalConf, KvStoreEngine}, +use crate::general::m_data_general::{ + CacheModeVisitor, DataGeneral, DataSetMetaBuilder, DataSplit, EachNodeSplit, +}; +use crate::general::m_kv_store_engine::{ + KeyType, KeyTypeDataSetMeta, KvAdditionalConf, KvStoreEngine, }; - use crate::general::network::m_p2p::{P2PModule, RPCCaller, RPCHandler, RPCResponsor}; use crate::general::network::proto::{ self, DataVersionScheduleRequest, DataVersionScheduleResponse, }; use crate::result::{WSResult, WSResultExt}; -use crate::sys::{LogicalModulesRef}; +use crate::sys::{LogicalModulesRef, NodeID}; use crate::util::JoinHandleWrapper; use crate::{ general::network::http_handler::HttpHandler, @@ -75,78 +74,57 @@ impl LogicalModule for DataMaster { } impl DataMaster { - // fn set_data_cache_mode_default(builder: &mut DataSetMetaBuilder) { - // if builder.building. - // // default cache mode - // let _ = builder - // .cache_mode_map_common_kv() - // .cache_mode_pos_auto() - // .cache_mode_time_auto(); - // } + fn set_data_cache_mode_default(builder: &mut DataSetMetaBuilder) { + // default cache mode + let _ = builder + .cache_mode_map_common_kv() + .cache_mode_pos_auto() + .cache_mode_time_auto(); + } fn set_data_cache_mode_for_meta( req: &DataVersionScheduleRequest, builder: &mut DataSetMetaBuilder, ) { - fn default_set_data_cache_mode_for_meta( - req: &DataVersionScheduleRequest, - builder: &mut DataSetMetaBuilder, - ) { - // for each item(by split length), set cache mode - for idx in 0..req.context.as_ref().unwrap().each_data_sz_bytes.len() { - let _ = builder - .cache_mode_time_forever(idx as DataItemIdx) - .cache_mode_pos_allnode(idx as DataItemIdx) - .cache_mode_map_common_kv(idx as DataItemIdx); - } - } if let Some(context) = req.context.as_ref() { match context.ope_role.as_ref().unwrap() { proto::data_schedule_context::OpeRole::UploadApp(_data_ope_role_upload_app) => { let _ = builder - // 0 is app meta data, map to common kv - .cache_mode_time_forever(0) - .cache_mode_pos_allnode(0) - .cache_mode_map_common_kv(0) - // 1 is app package data, map to file - .cache_mode_time_forever(1) - .cache_mode_pos_allnode(1) - .cache_mode_map_file(1); + .cache_mode_time_forever() + .cache_mode_pos_allnode() + .cache_mode_map_file(); } proto::data_schedule_context::OpeRole::FuncCall(_data_ope_role_func_call) => { - default_set_data_cache_mode_for_meta(req, builder); + Self::set_data_cache_mode_default(builder); } } } else { - tracing::warn!( - "context is None, use default cache mode, maybe we need to suitable for this case" - ); - default_set_data_cache_mode_for_meta(req, builder); + Self::set_data_cache_mode_default(builder); } } - // fn decide_cache_nodes( - // _ctx: &proto::DataScheduleContext, - // each_item_cache_mode: CacheModeVisitor, - // ) -> Vec { - // if cache_mode.is_time_auto() { - // // for time auto, we just do the cache when data is get - // return vec![]; - // } else if cache_mode.is_time_forever() { - // if cache_mode.is_pos_auto() { - // // for pos auto, we just do the cache when data is get - // // simple strategy temporarily - // return vec![]; - // } else if cache_mode.is_pos_specnode() { - // return vec![]; - // } else { - // // all node just return empty, can be just refered from cache_mode - // // no need to redundant info in cache nodes - // return vec![]; - // } - // } else { - // panic!("not supported time mode {:?}", cache_mode) - // } - // } + fn decide_cache_nodes( + _ctx: &proto::DataScheduleContext, + cache_mode: CacheModeVisitor, + ) -> Vec { + if cache_mode.is_time_auto() { + // for time auto, we just do the cache when data is get + return vec![]; + } else if cache_mode.is_time_forever() { + if cache_mode.is_pos_auto() { + // for pos auto, we just do the cache when data is get + // simple strategy temporarily + return vec![]; + } else if cache_mode.is_pos_specnode() { + return vec![]; + } else { + // all node just return empty, can be just refered from cache_mode + // no need to redundant info in cache nodes + return vec![]; + } + } else { + panic!("not supported time mode {:?}", cache_mode) + } + } fn decide_each_data_split(&self, ctx: &proto::DataScheduleContext) -> Vec { // let DEFAULT_SPLIT_SIZE = 4 * 1024 * 1024; @@ -207,29 +185,19 @@ impl DataMaster { ); let set_meta = dataset_meta.map_or_else( || { - tracing::debug!("new dataset meta for data({:?})", req.unique_id); let mut builder = DataSetMetaBuilder::new(); - // version let _ = builder.version(1); - // data splits bf cache mod - let _ = builder.set_data_splits(self.decide_each_data_split(ctx)); - // cache mode Self::set_data_cache_mode_for_meta(&req, &mut builder); - - builder.build() - }, - |(_kv_version, set_meta)| { - tracing::debug!("update dataset meta for data({:?})", req.unique_id); - let version = set_meta.version; - let mut builder = DataSetMetaBuilder::from(set_meta); - // version - let _ = builder.version(version + 1); - // data splits bf cache mod let _ = builder.set_data_splits(self.decide_each_data_split(ctx)); - // cache mode - Self::set_data_cache_mode_for_meta(&req, &mut builder); builder.build() }, + |(_kv_version, mut set_meta)| { + set_meta.version += 1; + set_meta + // let mut replace = setmeta.borrow_mut().take().unwrap(); + // replace.version = set_meta.version + 1; + // replace + }, ); // ## update version local tracing::debug!( @@ -312,8 +280,10 @@ impl DataMaster { // call_tasks.push(call_task); - // let cache_nodes = - // Self::decide_cache_nodes(req.context.as_ref().unwrap(), new_meta.cache_mode); + let cache_nodes = Self::decide_cache_nodes( + req.context.as_ref().unwrap(), + CacheModeVisitor(new_meta.cache_mode), + ); tracing::debug!( "data:{:?} version required({}) and schedule done, caller will do following thing after receive `DataVersionScheduleResponse`", @@ -324,7 +294,10 @@ impl DataMaster { responsor .send_resp(DataVersionScheduleResponse { version: new_meta.version, - cache_mode: new_meta.cache_mode.into_iter().map(|v| v as u32).collect(), + cache_plan: Some(proto::DataCachePlan { + cache_mode: new_meta.cache_mode as u32, + cache_nodes, + }), split: new_meta .datas_splits .into_iter() diff --git a/src/main/src/master/m_http_handler.rs b/src/main/src/master/m_http_handler.rs index 2d6f528..a27ae8d 100644 --- a/src/main/src/master/m_http_handler.rs +++ b/src/main/src/master/m_http_handler.rs @@ -10,12 +10,9 @@ use ws_derive::LogicalModule; // use use crate::{ - general::{ - app::AppMetaManager, - network::{ - http_handler::{self, HttpHandler}, - m_p2p::P2PModule, - }, + general::network::{ + http_handler::{self, HttpHandler}, + m_p2p::P2PModule, }, logical_module_view_impl, result::WSResult, @@ -33,7 +30,6 @@ logical_module_view_impl!( metric_observor, Option ); -logical_module_view_impl!(MasterHttpHandlerView, appmeta_manager, AppMetaManager); #[derive(LogicalModule)] pub struct MasterHttpHandler { @@ -100,39 +96,12 @@ impl HttpHandler for MasterHttpHandler { // self.local_req_id_allocator.alloc() // } async fn handle_request(&self, app: &str, _http_text: String) -> Response { - tracing::debug!("master handle_request {}", app); + tracing::debug!("handle_request {}", app); if app == "metrics" { return self.handle_prometheus(); } - - let view = self.view.clone(); - if !view.p2p().nodes_config.this.1.is_master() { - tracing::debug!("this is_master"); - match self.view.appmeta_manager().app_available(app).await { - Ok(true) => {} - Ok(false) => { - return (StatusCode::NOT_FOUND, "app not found").into_response(); - } - Err(e) => { - return (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(); - } - } - } - - // check app is available - // match self.view.appmeta_manager().app_available(app).await { - // Ok(true) => {} - // Ok(false) => { - // return (StatusCode::NOT_FOUND, "app not found").into_response(); - // } - // Err(e) => { - // return (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(); - // } - // } - // 选择节点 let node = self.view.master().handle_http_schedule(app).await; - tracing::debug!("scheduled node is {:?}", node); // if self.view.p2p().nodes_config.this.0 == node { // // println!("run"); @@ -160,7 +129,6 @@ impl HttpHandler for MasterHttpHandler { .get(&(node as u32)) .unwrap(); - tracing::debug!("scheduled target_node is {:?}", target_node); let url = target_node.http_url(); let url = if url.ends_with('/') { // 如果是,去除末尾的斜杠 diff --git a/src/main/src/master/m_master.rs b/src/main/src/master/m_master.rs index 2f6f00a..222ff2c 100644 --- a/src/main/src/master/m_master.rs +++ b/src/main/src/master/m_master.rs @@ -110,9 +110,7 @@ pub struct TargetNode(pub NodeID); impl TargetNode { pub fn http_redirect(&self, nodesconf: &NodesConfig) -> Redirect { - tracing::debug!("node_id : {:?}", self.0); let conf = nodesconf.get_nodeconfig(self.0); - tracing::debug!("conf.http_url() : {:?}", &conf.http_url().clone()); Redirect::temporary(&conf.http_url()) } } diff --git a/src/main/src/modules_global_bridge/process_func.rs b/src/main/src/modules_global_bridge/process_func.rs index 77e20d3..ddb5708 100644 --- a/src/main/src/modules_global_bridge/process_func.rs +++ b/src/main/src/modules_global_bridge/process_func.rs @@ -1,5 +1,5 @@ use crate::{ - general::app::AppMetaManager, + general::m_appmeta_manager::AppMetaManager, worker::func::{m_instance_manager::InstanceManager, shared::process_rpc::ProcessRpc}, }; diff --git a/src/main/src/result.rs b/src/main/src/result.rs index ced7cdd..398c870 100644 --- a/src/main/src/result.rs +++ b/src/main/src/result.rs @@ -1,4 +1,4 @@ -use std::{fmt::Debug, os::unix::net::SocketAddr, sync::Arc}; +use std::{fmt::Debug, os::unix::net::SocketAddr}; use async_raft::{InitializeError, RaftError}; use camelpaste::paste; @@ -11,8 +11,8 @@ use zip_extract::ZipExtractError; use crate::{ general::{ - app::FnMeta, - data::m_data_general::{DataItemIdx, DataSplitIdx, EachNodeSplit}, + m_appmeta_manager::FnMeta, + m_data_general::EachNodeSplit, network::{proto, rpc_model::HashValue}, }, sys::NodeID, @@ -128,9 +128,6 @@ pub enum WsFuncError { AppNotFound { app: String, }, - InvalidAppMetaDataItem { - app: String, - }, FuncNotFound { app: String, func: String, @@ -165,7 +162,6 @@ pub enum WsFuncError { }, InstanceJavaPidNotFound(String), InstanceProcessStartFailed(std::io::Error), - InsranceVerifyFailed(String), } #[derive(Debug)] @@ -173,10 +169,6 @@ pub enum WsDataError { DataSetNotFound { uniqueid: Vec, }, - GetDataFailed { - unique_id: Vec, - msg: String, - }, SetExpiredDataVersion { target_version: u64, cur_version: u64, @@ -191,14 +183,6 @@ pub enum WsDataError { expect: usize, actual: usize, }, - KvDeserializeErr { - unique_id: Vec, - context: String, - }, - KvGotWrongSplitCountAndIdx { - unique_id: Vec, - idx: Vec, - }, KvEngineInnerError { inner: sled::Error, context: String, @@ -216,40 +200,13 @@ pub enum WsDataError { }, SplitRecoverMissing { unique_id: Vec, - idx: DataItemIdx, + idx: usize, missing: Vec, }, - SplitDataItemNotRawBytes { - unique_id: Vec, - splitidx: DataSplitIdx, - }, - SplitLenMismatch { - unique_id: Vec, - splitidx: DataSplitIdx, - expect: usize, - actual: usize, - }, - UnknownCacheMapMode { - mode: u16, - }, - UnknownCacheTimeMode { - mode: u16, - }, - UnknownCachePosMode { - mode: u16, - }, - ItemIdxOutOfRange { - wanted: DataItemIdx, - len: u8, - }, - ItemIdxEmpty, } #[derive(Error, Debug)] pub enum WSError { - #[error("ArcWrapper: {0:?}")] - ArcWrapper(Arc), - #[error("Io error: {0:?}")] WsIoErr(WsIoErr), diff --git a/src/main/src/sys.rs b/src/main/src/sys.rs index f7c1e37..54fd516 100644 --- a/src/main/src/sys.rs +++ b/src/main/src/sys.rs @@ -1,10 +1,10 @@ use crate::{ config::NodesConfig, general::{ - app::AppMetaManager, - data::{ - m_data_general::DataGeneral, m_dist_lock::DistLock, m_kv_store_engine::KvStoreEngine, - }, + m_appmeta_manager::AppMetaManager, + m_data_general::DataGeneral, + m_dist_lock::DistLock, + m_kv_store_engine::KvStoreEngine, m_metric_publisher::MetricPublisher, m_os::OperatingSystem, network::{http_handler::HttpHandlerDispatch, m_p2p::P2PModule}, diff --git a/src/main/src/util.rs b/src/main/src/util.rs index 3aff9e6..d1dab24 100644 --- a/src/main/src/util.rs +++ b/src/main/src/util.rs @@ -1,7 +1,7 @@ use std::{ fmt::Debug, future::Future, - ops::{Deref, DerefMut, Drop, Range}, + ops::{Deref, DerefMut, Drop}, pin::Pin, ptr::NonNull, task::{Context, Poll}, @@ -241,21 +241,3 @@ impl<'a, T> From<&'a [T]> for VecOrSlice<'a, T> { Self::Slice(v) } } - -pub trait VecExt { - fn limit_range_debug(&self, range: Range) -> String; -} - -impl VecExt for Vec { - fn limit_range_debug(&self, range: Range) -> String { - if self.len() >= range.end { - format!("{:?}", &self[range]) - } else { - format!( - "{:?}, hide len:{}", - &self[range.start..], - self.len() - range.end - ) - } - } -} diff --git a/src/main/src/worker/func/m_instance_manager.rs b/src/main/src/worker/func/m_instance_manager.rs index dcfabee..b7ca84d 100644 --- a/src/main/src/worker/func/m_instance_manager.rs +++ b/src/main/src/worker/func/m_instance_manager.rs @@ -4,7 +4,7 @@ use crate::general::m_os::OperatingSystem; use crate::general::network::rpc_model; use crate::sys::LogicalModulesRef; use crate::{ - general::app::AppType, // worker::host_funcs, + general::m_appmeta_manager::AppType, // worker::host_funcs, result::WSResult, sys::{LogicalModule, LogicalModuleNewArgs}, util::JoinHandleWrapper, diff --git a/src/main/src/worker/func/mod.rs b/src/main/src/worker/func/mod.rs index f779ba3..292f204 100644 --- a/src/main/src/worker/func/mod.rs +++ b/src/main/src/worker/func/mod.rs @@ -5,7 +5,7 @@ pub mod wasm_host_funcs; use crate::{ general::{ - app::{AppType, FnMeta}, + m_appmeta_manager::{AppType, FnMeta}, network::http_handler::ReqId, }, result::WSResult, diff --git a/src/main/src/worker/func/shared/process.rs b/src/main/src/worker/func/shared/process.rs index ff69fe8..2996f75 100644 --- a/src/main/src/worker/func/shared/process.rs +++ b/src/main/src/worker/func/shared/process.rs @@ -9,9 +9,10 @@ use tokio::{process::Command, sync::oneshot}; use crate::{ general::{ - app::AppType, + m_appmeta_manager::AppType, network::rpc_model::{self, HashValue}, }, + result::{WSError, WsIoErr}, worker::func::{shared::java, InstanceTrait}, }; @@ -213,20 +214,9 @@ impl InstanceTrait for ProcessInstance { // if fn_ctx.func_meta.allow_rpc_call() { let _ = self.wait_for_verify().await; - tracing::debug!( - "wait_for_verify done, call app:{}, func:{}", - fn_ctx.app, - fn_ctx.func - ); - tracing::debug!("before process_rpc::call_func "); - let res = process_rpc::call_func(&fn_ctx.app, &fn_ctx.func, fn_ctx.http_str_unwrap()) - .await; - tracing::debug!("after process_rpc::call_func "); - return res + return process_rpc::call_func(&fn_ctx.app, &fn_ctx.func, fn_ctx.http_str_unwrap()) + .await .map(|v| Some(v.ret_str)); - // return process_rpc::call_func(&fn_ctx.app, &fn_ctx.func, fn_ctx.http_str_unwrap()) - // .await - // .map(|v| Some(v.ret_str)); } // if let Some(httpmethod) = fn_ctx.func_meta.allow_http_call() { diff --git a/src/main/src/worker/func/shared/process_instance_man_related.rs b/src/main/src/worker/func/shared/process_instance_man_related.rs index 987b62b..33116c2 100644 --- a/src/main/src/worker/func/shared/process_instance_man_related.rs +++ b/src/main/src/worker/func/shared/process_instance_man_related.rs @@ -3,7 +3,7 @@ use std::time::Duration; use tokio::process::Command; use crate::{ - general::app::AppType, + general::m_appmeta_manager::AppType, result::{WSResult, WsFuncError}, worker::func::{ m_instance_manager::{EachAppCache, InstanceManager}, @@ -68,7 +68,6 @@ impl InstanceManager { // let pid = java::wait_for_pid(app_name).await?; proc_ins.bind_process(p); let _ = proc_ins.wait_for_verify().await; - tracing::debug!("wait_for_verify done1"); if !restart { tracing::debug!("don't restart after checkpoint, kill it"); @@ -85,7 +84,6 @@ impl InstanceManager { tracing::debug!("make checkpoint for app: {}", app); let p = self.get_process_instance(&AppType::Jar, app); let _ = p.wait_for_verify().await; - tracing::debug!("wait_for_verify done2"); tokio::time::sleep(Duration::from_secs(3)).await; self.update_checkpoint(app, false).await?; diff --git a/src/main/src/worker/func/shared/process_rpc.rs b/src/main/src/worker/func/shared/process_rpc.rs index 84aa59d..92b326e 100644 --- a/src/main/src/worker/func/shared/process_rpc.rs +++ b/src/main/src/worker/func/shared/process_rpc.rs @@ -5,7 +5,7 @@ pub mod proc_proto { use crate::{ general::network::rpc_model::{self, HashValue, MsgIdBind, ReqMsg, RpcCustom}, modules_global_bridge::process_func::{ - ModulesGlobalBrigeInstanceManager, + ModulesGlobalBrigeAppMetaManager, ModulesGlobalBrigeInstanceManager, }, result::WSResult, sys::LogicalModulesRef, @@ -64,7 +64,7 @@ impl RpcCustom for ProcessRpc { unsafe { tracing::debug!("verify begin"); - // let appman = ProcessRpc::global_m_app_meta_manager(); + let appman = ProcessRpc::global_m_app_meta_manager(); struct Defer; impl Drop for Defer { fn drop(&mut self) { @@ -73,21 +73,21 @@ impl RpcCustom for ProcessRpc { } let _d = Defer; - // TODO: add http available check - // let ishttp = { - // let appmanmetas = appman.meta.read().await; - // let Some(app) = appmanmetas.get_app_meta(&res.appid).await else { - // tracing::warn!("app {} not found, invalid verify !", res.appid); - // return None; - // }; - // app.contains_http_fn() - // }; - // let with_http_port = res.http_port.is_some(); - // if ishttp && !with_http_port - // // || (!ishttp && with_http_port) <<< seems ok - // { - // return None; - // } + let ishttp = { + let appmanmetas = appman.meta.read().await; + let Some(app) = appmanmetas.get_app_meta(&res.appid).await else { + tracing::warn!("app {} not found, invalid verify !", res.appid); + return None; + }; + app.contains_http_fn() + }; + + let with_http_port = res.http_port.is_some(); + if ishttp && !with_http_port + // || (!ishttp && with_http_port) <<< seems ok + { + return None; + } // update to the instance let insman = ProcessRpc::global_m_instance_manager().unwrap(); diff --git a/src/main/src/worker/func/wasm_host_funcs/mod.rs b/src/main/src/worker/func/wasm_host_funcs/mod.rs index 9116858..2b82de0 100644 --- a/src/main/src/worker/func/wasm_host_funcs/mod.rs +++ b/src/main/src/worker/func/wasm_host_funcs/mod.rs @@ -12,7 +12,7 @@ use crate::sys::LogicalModulesRef; mod utils { - use wasmedge_sdk::{Caller, Instance, Memory}; + use wasmedge_sdk::{Caller, CallingFrame, Instance, Memory}; use crate::{ general::m_os::OperatingSystem, @@ -25,32 +25,32 @@ mod utils { }; pub trait WasmCtx { - fn i_memory(&self, idx: u32) -> Option; - fn i_instance(&self) -> Option<&Instance>; + fn memory(&self, idx: u32) -> Option; + fn instance(&self) -> Option<&Instance>; } impl WasmCtx for Caller { - fn i_memory(&self, idx: u32) -> Option { + fn memory(&self, idx: u32) -> Option { self.memory(idx) } - fn i_instance(&self) -> Option<&Instance> { + fn instance(&self) -> Option<&Instance> { self.instance() } } - // impl WasmCtx for CallingFrame { - // fn memory(&self, idx: u32) -> Option { - // self.memory(idx) - // } - // fn instance(&self) -> Option<&Instance> { - // self.instance() - // } - // } + impl WasmCtx for CallingFrame { + fn memory(&self, idx: u32) -> Option { + self.memory(idx) + } + fn instance(&self) -> Option<&Instance> { + self.instance() + } + } pub fn u8slice<'a>(caller: &impl WasmCtx, ptr: i32, len: i32) -> &'a [u8] { // tracing::debug!("u8slice ptr: {}, len: {}", ptr, len); let mem = caller - .i_memory(0) + .memory(0) .unwrap() .data_pointer(ptr as u32, len as u32) .unwrap(); @@ -67,7 +67,7 @@ mod utils { pub fn i32slice<'a>(caller: &impl WasmCtx, ptr: i32, len: i32) -> &'a [i32] { let mem = caller - .i_memory(0) + .memory(0) .unwrap() .data_pointer(ptr as u32, len as u32) .unwrap(); @@ -83,7 +83,7 @@ mod utils { pub fn mutu8sclice<'a>(caller: &impl WasmCtx, ptr: i32, len: i32) -> Option<&'a mut [u8]> { if let Ok(mem) = caller - .i_memory(0) + .memory(0) .unwrap() .data_pointer_mut(ptr as u32, len as u32) { @@ -103,7 +103,7 @@ mod utils { pub fn mutref<'a, T: Sized>(caller: &impl WasmCtx, ptr: i32) -> &'a mut T { unsafe { &mut *(caller - .i_memory(0) + .memory(0) .unwrap() .data_pointer_mut(ptr as u32, std::mem::size_of::() as u32) .unwrap() as *mut T) @@ -115,7 +115,7 @@ mod utils { m_instance_manager() .instance_running_function .read() - .get(&caller.i_instance().unwrap().name().unwrap()) + .get(&caller.instance().unwrap().name().unwrap()) .unwrap() .0 .clone(), diff --git a/src/main/src/worker/m_executor.rs b/src/main/src/worker/m_executor.rs index da69b25..6666e3d 100644 --- a/src/main/src/worker/m_executor.rs +++ b/src/main/src/worker/m_executor.rs @@ -1,6 +1,6 @@ use crate::{ general::{ - app::AppMetaManager, + m_appmeta_manager::AppMetaManager, network::{ http_handler::ReqId, m_p2p::{P2PModule, RPCHandler, RPCResponsor}, @@ -127,55 +127,23 @@ impl Executor { tracing::debug!("receive distribute task: {:?}", req); let app = req.app.to_owned(); let func = req.func.to_owned(); - let appmeta = match self.view.appmeta_manager().get_app_meta(&app).await { - Ok(Some(appmeta)) => appmeta, - Ok(None) => { - tracing::warn!("app {} not found in data meta", app); - if let Err(err) = resp - .send_resp(DistributeTaskResp { - success: false, - err_msg: format!("app {} not found in data meta", app), - }) - .await - { - tracing::error!("send distribute task resp failed with err: {}", err); - } - return; - } - Err(err) => { - tracing::error!("get appmeta failed with err: {}", err); - if let Err(err) = resp - .send_resp(DistributeTaskResp { - success: false, - err_msg: format!("get appmeta failed with err: {}", err), - }) - .await - { - tracing::error!("send distribute task resp failed with err: {}", err); - } - return; - } - }; - - let apptype = appmeta.app_type.clone(); - let Some(fnmeta) = appmeta.get_fn_meta(&func) else { - tracing::warn!("func {} not found, exist:{:?}", func, appmeta.fns()); - if let Err(err) = resp - .send_resp(DistributeTaskResp { - success: false, - err_msg: format!("func {} not found, exist:{:?}", func, appmeta.fns()), - }) - .await - { - tracing::error!("send distribute task resp failed with err: {}", err); - } - return; + let (apptype, fnmeta) = { + let appmetaman_r = self.view.appmeta_manager().meta.read().await; + let Some(appmeta) = appmetaman_r.get_app_meta(&app).await else { + // TODO: return err + unreachable!(); + }; + let Some(fnmeta) = appmeta.get_fn_meta(&func) else { + // TODO: return err + unreachable!(); + }; + (appmeta.app_type.clone(), fnmeta.clone()) }; let ctx = FnExeCtx { app: req.app, app_type: apptype, - func_meta: fnmeta.clone(), + func_meta: fnmeta, func: req.func, req_id: 0, res: None, @@ -187,13 +155,7 @@ impl Executor { }, sub_waiters: vec![], }; - if let Err(err) = resp - .send_resp(DistributeTaskResp { - success: true, - err_msg: "".to_owned(), - }) - .await - { + if let Err(err) = resp.send_resp(DistributeTaskResp {}).await { tracing::error!("send sche resp for app:{app} fn:{func} failed with err: {err}"); } let _ = self.execute(ctx).await; @@ -227,9 +189,9 @@ impl Executor { // trigger app let appname = split[0]; let funcname = split[1]; - + let app_meta_man = self.view.appmeta_manager().meta.read().await; // check app exist - let Some(app) = self.view.appmeta_manager().get_app_meta(appname).await? else { + let Some(app) = app_meta_man.get_app_meta(appname).await else { tracing::warn!("app {} not found", appname); return Err(WsFuncError::AppNotFound { app: appname.to_owned(), @@ -280,7 +242,7 @@ impl Executor { sub_waiters: vec![], func_meta: func.clone(), }; - + drop(app_meta_man); self.execute(ctx).await } // pub async fn execute_http_app(&self, fn_ctx_builder: FunctionCtxBuilder) { @@ -348,7 +310,6 @@ impl Executor { .expect("Time went backwards") .as_millis() as u64; - tracing::debug!("start execute"); let res = instance.execute(&mut fn_ctx).await; // let return_to_agent_time = SystemTime::now() @@ -358,7 +319,7 @@ impl Executor { let res = res.map(|v| { v.map(|v| { - let mut res: serde_json::Value = serde_json::from_str(&*v).unwrap(); + let mut res: serde_json::Value = serde_json::from_str(&v).unwrap(); let _ = res.as_object_mut().unwrap().insert( "bf_exec_time".to_owned(), serde_json::Value::from(bf_exec_time), diff --git a/src/main/src/worker/m_kv_user_client.rs b/src/main/src/worker/m_kv_user_client.rs index c9e97d3..27e4b3f 100644 --- a/src/main/src/worker/m_kv_user_client.rs +++ b/src/main/src/worker/m_kv_user_client.rs @@ -1,13 +1,9 @@ -use crate::general::network::proto_ext::ProtoExtDataItem; +use std::collections::HashMap; + use crate::{ general::{ - data::{ - m_data_general::{ - new_data_unique_id_fn_kv, DataGeneral, DataItemIdx, DataSetMetaV2, GetOrDelDataArg, - GetOrDelDataArgType, - }, - m_dist_lock::DistLock, - }, + m_data_general::{new_data_unique_id_fn_kv, DataGeneral, DataSetMetaV2}, + m_dist_lock::DistLock, network::{ m_p2p::{P2PModule, RPCCaller}, proto::{ @@ -19,11 +15,10 @@ use crate::{ }, logical_module_view_impl, result::{WSError, WSResult, WSResultExt, WsDataError}, - sys::{LogicalModule, LogicalModuleNewArgs, LogicalModulesRef}, + sys::{LogicalModule, LogicalModuleNewArgs, LogicalModulesRef, NodeID}, util::JoinHandleWrapper, }; use async_trait::async_trait; -use std::collections::HashMap; use ws_derive::LogicalModule; logical_module_view_impl!(KvUserClientView); @@ -231,50 +226,25 @@ impl KvUserClient { fn convert_get_data_res_to_kv_response( key: Vec, - uid: Vec, - _meta: DataSetMetaV2, - splits: HashMap, - ) -> WSResult> { - if splits.len() != 1 { - return Err(WSError::WsDataError( - WsDataError::KvGotWrongSplitCountAndIdx { - unique_id: uid.clone(), - idx: splits.keys().cloned().collect(), - }, - )); - } - - let (idx, data_item) = splits.into_iter().next().unwrap(); - if idx != 0 { - return Err(WSError::WsDataError( - WsDataError::KvGotWrongSplitCountAndIdx { - unique_id: uid.clone(), - idx: vec![idx], - }, - )); - } - - let data_item_dispatch = data_item.data_item_dispatch.unwrap(); - let raw_bytes = match data_item_dispatch { - proto::data_item::DataItemDispatch::RawBytes(value) => value, - _ => { - return Err(WSError::WsDataError(WsDataError::KvDeserializeErr { - unique_id: uid, - context: format!( - "data_item_dispatch({}) is not RawBytes", - proto::DataItem { - data_item_dispatch: Some(data_item_dispatch), - } - .to_string(), - ), - })) + uid: &[u8], + meta: DataSetMetaV2, + mut splits: HashMap<(NodeID, usize), proto::DataItem>, + ) -> Vec { + if meta.datas_splits.len() != 1 { + tracing::warn!( + "convert kv invalid data count number: {}", + meta.datas_splits.len() + ); + vec![] + } else { + match meta.datas_splits[0].recorver_data(uid, 0, &mut splits) { + Ok(ok) => vec![proto::kv::KvPair { key, value: ok }], + Err(err) => { + tracing::warn!("convert kv data error:{:?}", err); + vec![] + } } - }; - - Ok(vec![proto::kv::KvPair { - key: key, - value: raw_bytes, - }]) + } } async fn handle_kv_get(&self, get: proto::kv::kv_request::KvGetRequest) -> KvResponse { @@ -282,27 +252,15 @@ impl KvUserClient { let data_general = self.view.data_general(); let uid = new_data_unique_id_fn_kv(&get.range.as_ref().unwrap().start); - let got = data_general - .get_or_del_data(GetOrDelDataArg { - meta: None, - unique_id: uid.clone(), - ty: GetOrDelDataArgType::All, - }) - .await; + let got = data_general.get_data(uid.clone()).await; let got = match got { - Ok((meta, splits)) => match Self::convert_get_data_res_to_kv_response( + Ok((meta, splits)) => Self::convert_get_data_res_to_kv_response( get.range.unwrap().start, - uid, + &uid, meta, splits, - ) { - Ok(res) => res, - Err(err) => { - tracing::warn!("get kv data error:{:?}", err); - vec![] - } - }, + ), Err(WSError::WsDataError(WsDataError::DataSetNotFound { uniqueid })) => { tracing::debug!("get kv data not found, uid({:?})", uniqueid); vec![] @@ -319,27 +277,15 @@ impl KvUserClient { let data_general = self.view.data_general(); let uid = new_data_unique_id_fn_kv(&delete.range.as_ref().unwrap().start); - let deleted = data_general - .get_or_del_data(GetOrDelDataArg { - meta: None, - unique_id: uid.clone(), - ty: GetOrDelDataArgType::Delete, - }) - .await; + let deleted = data_general.delete_data(uid.clone()).await; let deleted = match deleted { - Ok((deleted_meta, deleted_splits)) => match Self::convert_get_data_res_to_kv_response( + Ok((deleted_meta, deleted_splits)) => Self::convert_get_data_res_to_kv_response( delete.range.unwrap().start, - uid, + &uid, deleted_meta, deleted_splits, - ) { - Ok(res) => res, - Err(err) => { - tracing::warn!("delete kv data error:{:?}", err); - vec![] - } - }, + ), Err(WSError::WsDataError(WsDataError::DataSetNotFound { uniqueid })) => { tracing::debug!("delete kv data not found, uid({:?})", uniqueid); vec![] @@ -420,8 +366,8 @@ impl KvUserClient { #[cfg(test)] mod test { - - use std::{time::Duration}; + use core::str; + use std::{sync::Arc, time::Duration}; use super::KvUserClientView; use crate::general::{ @@ -522,7 +468,7 @@ mod test { assert!(res.responses.len() == 1); match res.responses[0].resp.clone().unwrap() { proto::kv::kv_response::Resp::CommonResp(kv_response) => { - assert_eq!(kv_response.kvs.len(), 1); + assert!(kv_response.kvs.len() == 1); assert!(kv_response.kvs[0].key == test_key.as_bytes().to_owned()); assert!(kv_response.kvs[0].value == test_value.as_bytes().to_owned()); } diff --git a/telego/README b/telego/README deleted file mode 100644 index 599a364..0000000 --- a/telego/README +++ /dev/null @@ -1 +0,0 @@ -https://qcnoe3hd7k5c.feishu.cn/wiki/HKyFwat29i8PiEkxhCQcW9NdnTe \ No newline at end of file diff --git a/telego/bin_waverless/deployment.yml b/telego/bin_waverless/deployment.yml deleted file mode 100644 index e8d3e41..0000000 --- a/telego/bin_waverless/deployment.yml +++ /dev/null @@ -1,20 +0,0 @@ -comment: 非常方便磁盘使用分析工具 - -prepare: - # x86 - - url: https://dev.yorhel.nl/download/ncdu-2.5-linux-x86_64.tar.gz - as: ncdu-2.5-linux-x86_64.tar.gz - trans: - - extract - - copy: - - ncdu: teledeploy/ncdu_amd64 - # arm - - url: https://dev.yorhel.nl/download/ncdu-2.5-linux-aarch64.tar.gz - as: ncdu-2.5-linux-aarch64.tar.gz - trans: - - extract - - copy: - - ncdu: teledeploy/ncdu_arm64 - -bin: - waverless: From d24ce72aa357951fcfeead11966df87841782a02 Mon Sep 17 00:00:00 2001 From: pa <1020401660@qq.com> Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 23/26] Revert "feat: fix oneshot with dashmap" This reverts commit 1924fe226f4fb8a6ed7f03d407b94a1a6220cfeb. --- src/main/src/general/m_kv_store_engine.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/src/general/m_kv_store_engine.rs b/src/main/src/general/m_kv_store_engine.rs index 532176f..2cd4cff 100644 --- a/src/main/src/general/m_kv_store_engine.rs +++ b/src/main/src/general/m_kv_store_engine.rs @@ -198,9 +198,9 @@ impl KvStoreEngine { let _ = db.insert(keybytes, vec).unwrap(); - // if let Some(mut key_waitings) = self.key_waitings.get_mut(key) { - if let Some((_, key_waitings)) = self.key_waitings.remove(key) { - for wait_tx in key_waitings { + if let Some(mut key_waitings) = self.key_waitings.get_mut(key) { + // if let Some((_, key_waitings)) = self.key_waitings.remove(key) { + for wait_tx in key_waitings.drain(..) { wait_tx .send((kvversion, KvValue::RawData(value.clone()))) .unwrap_or_else(|_| panic!("send new key event failed")); From 7d01b03a76026999a84e4bdeab9d2cd3363e2133 Mon Sep 17 00:00:00 2001 From: pa <1020401660@qq.com> Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 24/26] Revert "feat: issue for dashmap" This reverts commit dffa9b56cb8c345e0fc8c56adbbf7d84d5e71ff5. --- src/main/Cargo.toml | 1 - src/main/src/general/m_appmeta_manager/mod.rs | 17 +- src/main/src/general/m_data_general.rs | 442 +++++------------- src/main/src/general/m_dist_lock.rs | 10 +- src/main/src/general/m_kv_store_engine.rs | 160 ++----- src/main/src/general/m_os/mod.rs | 6 +- src/main/src/general/network/m_p2p.rs | 49 +- src/main/src/general/network/m_p2p_quic.rs | 4 +- src/main/src/general/network/proto_ext.rs | 103 +--- src/main/src/general/test_utils.rs | 22 +- src/main/src/main.rs | 19 +- src/main/src/master/m_data_master.rs | 74 +-- src/main/src/result.rs | 22 +- src/main/src/util.rs | 17 - src/main/src/worker/func/shared/process.rs | 10 +- src/main/src/worker/m_kv_user_client.rs | 185 +------- 16 files changed, 210 insertions(+), 931 deletions(-) diff --git a/src/main/Cargo.toml b/src/main/Cargo.toml index f4d1de5..938e4f2 100644 --- a/src/main/Cargo.toml +++ b/src/main/Cargo.toml @@ -8,7 +8,6 @@ edition = "2021" [features] default = [] # 默认启用的特性 unsafe-log = [] -rpc-log = [] [dependencies] qp2p.workspace = true #{ path = "qp2p" } diff --git a/src/main/src/general/m_appmeta_manager/mod.rs b/src/main/src/general/m_appmeta_manager/mod.rs index 79cd346..d0ec8e4 100644 --- a/src/main/src/general/m_appmeta_manager/mod.rs +++ b/src/main/src/general/m_appmeta_manager/mod.rs @@ -9,7 +9,7 @@ use super::{ m_os::OperatingSystem, network::{http_handler::HttpHandler, m_p2p::P2PModule}, }; -use crate::{general::network::proto, result::WSResultExt, worker::m_executor::Executor}; +use crate::{general::network::proto, worker::m_executor::Executor}; use crate::{ general::{ kv_interface::KvOps, @@ -852,20 +852,17 @@ impl AppMetaManager { OpeRole::UploadApp(DataOpeRoleUploadApp {}), )), ) - .await?; + .await; tracing::debug!("app uploaded"); Ok(()) } pub fn set_app_meta_list(&self, list: Vec) { - self.view - .kv_store_engine() - .set( - KeyTypeServiceList, - &serde_json::to_string(&list).unwrap().into(), - false, - ) - .todo_handle(); + self.view.kv_store_engine().set( + KeyTypeServiceList, + &serde_json::to_string(&list).unwrap().into(), + false, + ); } pub fn get_app_meta_list(&self) -> Vec { let res = self diff --git a/src/main/src/general/m_data_general.rs b/src/main/src/general/m_data_general.rs index 438309e..0fa9891 100644 --- a/src/main/src/general/m_data_general.rs +++ b/src/main/src/general/m_data_general.rs @@ -6,19 +6,16 @@ use super::{ network::{ m_p2p::{P2PModule, RPCCaller, RPCHandler, RPCResponsor}, proto::{ - self, DataMeta, DataMetaGetRequest, DataVersionScheduleRequest, WriteOneDataRequest, - WriteOneDataResponse, + self, DataMeta, DataMetaGetRequest, DataVersionScheduleRequest, + WriteOneDataRequest, WriteOneDataResponse, }, proto_ext::ProtoExtDataItem, }, }; use crate::{ - general::{ - m_kv_store_engine::{KeyLockGuard, KeyType}, - network::{msg_pack::MsgPack, proto_ext::DataItemExt}, - }, + general::m_kv_store_engine::KeyType, logical_module_view_impl, - result::{WSError, WSResult, WSResultExt, WsRuntimeErr, WsSerialErr}, + result::{WSError, WSResult, WsRuntimeErr, WsSerialErr}, sys::{LogicalModule, LogicalModuleNewArgs, NodeID}, util::JoinHandleWrapper, }; @@ -27,7 +24,7 @@ use async_trait::async_trait; use camelpaste::paste; use core::str; -use prost::{bytes, Message}; +use prost::Message; use serde::{Deserialize, Serialize}; use std::{ collections::{HashMap, HashSet}, @@ -140,9 +137,10 @@ impl LogicalModule for DataGeneral { move |responsor: RPCResponsor, req: proto::DataMetaUpdateRequest| { let view = view.clone(); - let _ = tokio::spawn(async move { - view.rpc_handle_data_meta_update(responsor, req).await - }); + let _ = + tokio::spawn( + async move { view.rpc_handle_data_meta_update(responsor, req) }, + ); Ok(()) }, ); @@ -151,9 +149,7 @@ impl LogicalModule for DataGeneral { .regist(p2p, move |responsor, req| { let view = view.clone(); let _ = tokio::spawn(async move { - view.rpc_handle_get_data_meta(req, responsor) - .await - .todo_handle(); + view.rpc_handle_get_data_meta(req, responsor).await; }); Ok(()) }); @@ -164,9 +160,7 @@ impl LogicalModule for DataGeneral { req: proto::GetOneDataRequest| { let view = view.clone(); let _ = - tokio::spawn( - async move { view.rpc_handle_get_one_data(responsor, req).await }, - ); + tokio::spawn(async move { view.rpc_handle_get_one_data(responsor, req) }); Ok(()) }, ); @@ -182,83 +176,51 @@ impl DataGeneralView { responsor: RPCResponsor, mut req: proto::DataMetaUpdateRequest, ) { - struct Defer { - node: NodeID, - }; - impl Drop for Defer { - fn drop(&mut self) { - tracing::debug!("rpc_handle_data_meta_update return at node({})", self.node); - } - } - let _defer = Defer { - node: self.p2p().nodes_config.this_node(), - }; - let key = KeyTypeDataSetMeta(&req.unique_id); let keybytes = key.make_key(); - tracing::debug!("rpc_handle_data_meta_update {:?}", req); - let kv_lock = self.kv_store_engine().with_rwlock(&keybytes); - let _kv_write_lock_guard = kv_lock.write(); + let write_lock = self.kv_store_engine().with_rwlock(&keybytes); + write_lock.write(); if let Some((_old_version, mut old_meta)) = self.kv_store_engine().get(&key, true, KvAdditionalConf {}) { if old_meta.version > req.version { - drop(_kv_write_lock_guard); - let err_msg = "New data version is smaller, failed update"; - tracing::warn!("{}", err_msg); - responsor - .send_resp(proto::DataMetaUpdateResponse { - version: old_meta.version, - message: err_msg.to_owned(), - }) - .await - .todo_handle(); + responsor.send_resp(proto::DataMetaUpdateResponse { + version: old_meta.version, + message: "New data version overwrite".to_owned(), + }); return; } old_meta.version = req.version; if req.serialized_meta.len() > 0 { - self.kv_store_engine() - .set_raw(&keybytes, std::mem::take(&mut req.serialized_meta), true) - .todo_handle(); + self.kv_store_engine().set_raw( + &keybytes, + std::mem::take(&mut req.serialized_meta), + true, + ); } else { - self.kv_store_engine() - .set(key, &old_meta, true) - .todo_handle(); + self.kv_store_engine().set(key, &old_meta, true); } } else { if req.serialized_meta.len() > 0 { - tracing::debug!( - "set new meta data, {:?}", - bincode::deserialize::(&req.serialized_meta) + self.kv_store_engine().set_raw( + &keybytes, + std::mem::take(&mut req.serialized_meta), + true, ); - self.kv_store_engine() - .set_raw(&keybytes, std::mem::take(&mut req.serialized_meta), true) - .todo_handle(); } else { - drop(_kv_write_lock_guard); - let err_msg = "Old meta data not found and missing new meta"; - tracing::warn!("{}", err_msg); - responsor - .send_resp(proto::DataMetaUpdateResponse { - version: 0, - message: err_msg.to_owned(), - }) - .await - .todo_handle(); + responsor.send_resp(proto::DataMetaUpdateResponse { + version: 0, + message: "Old meta data not found and missing new meta".to_owned(), + }); return; } } - drop(_kv_write_lock_guard); - tracing::debug!("rpc_handle_data_meta_update success"); - responsor - .send_resp(proto::DataMetaUpdateResponse { - version: req.version, - message: "Update success".to_owned(), - }) - .await - .todo_handle(); + responsor.send_resp(proto::DataMetaUpdateResponse { + version: req.version, + message: "Update success".to_owned(), + }); } async fn rpc_handle_get_one_data( @@ -266,23 +228,16 @@ impl DataGeneralView { responsor: RPCResponsor, req: proto::GetOneDataRequest, ) -> WSResult<()> { - tracing::debug!("rpc_handle_get_one_data {:?}", req); - // req.unique_id let kv_store_engine = self.kv_store_engine(); - let _ = self - .get_data_meta(&req.unique_id, req.delete) - .map_err(|err| { - tracing::warn!("rpc_handle_get_one_data get_data_meta failed: {:?}", err); - err - })?; + let _ = self.get_data_meta(&req.unique_id, true)?; // let meta = bincode::deserialize::(&req.serialized_meta).map_err(|err| { // WsSerialErr::BincodeErr { // err, // context: "rpc_handle_get_one_data".to_owned(), // } // })?; - let mut got_or_deleted = vec![]; + let mut deleted = vec![]; let mut kv_ope_err = vec![]; @@ -311,10 +266,10 @@ impl DataGeneralView { KvAdditionalConf {}, ) }; - got_or_deleted.push(value); + deleted.push(value); } - // tracing::warn!("temporaly no data response"); + tracing::warn!("temporaly no data response"); let (success, message): (bool, String) = if kv_ope_err.len() > 0 { (false, { @@ -324,38 +279,16 @@ impl DataGeneralView { } msg }) - } else if got_or_deleted.iter().all(|v| v.is_some()) { + } else if deleted.iter().all(|v| v.is_some()) { (true, "success".to_owned()) } else { - tracing::warn!("some data not found"); (false, "some data not found".to_owned()) }; - let mut got_or_deleted_checked: Vec = vec![]; - if success { - for v in got_or_deleted { - let decode_res = proto::DataItem::decode_persist(v.unwrap().1); - // if let Ok(v) = decode_res { - got_or_deleted_checked.push(decode_res); - // } else { - // success = false; - // got_or_deleted_checked = vec![]; - // message = format!("decode data item failed {:?}", decode_res.unwrap_err()); - // tracing::warn!("{}", message); - // break; - // } - } - } - - // = got_or_deleted - // .into_iter() - // .map(|one| proto::FileData::decode(bytes::Bytes::from(one.unwrap().1))) - // .all(|one|one.is_ok()) - // .collect::>(); responsor .send_resp(proto::GetOneDataResponse { success, - data: got_or_deleted_checked, + data: vec![], message, }) .await?; @@ -373,191 +306,111 @@ impl DataGeneralView { // Step 0: pre-check { if req.data.is_empty() { - responsor - .send_resp(WriteOneDataResponse { - remote_version: 0, - success: false, - message: "Request data is empty".to_owned(), - }) - .await - .todo_handle(); + responsor.send_resp(WriteOneDataResponse { + remote_version: 0, + success: false, + message: "Request data is empty".to_owned(), + }); return; } if req.data[0].data_item_dispatch.is_none() { - responsor - .send_resp(WriteOneDataResponse { - remote_version: 0, - success: false, - message: "Request data enum is none".to_owned(), - }) - .await - .todo_handle(); + responsor.send_resp(WriteOneDataResponse { + remote_version: 0, + success: false, + message: "Request data enum is none".to_owned(), + }); return; } } // Step1: verify version // take old meta - let mut required_meta: Option<(usize, DataSetMetaV2)> = None; { - let keybytes: Vec = KeyTypeDataSetMeta(&req.unique_id).make_key(); - let fail_by_overwrite = || async { + let keybytes = KeyTypeDataSetMeta(&req.unique_id).make_key(); + let fail_by_overwrite = || { let message = "New data version overwrite".to_owned(); tracing::warn!("{}", message); - responsor - .send_resp(WriteOneDataResponse { - remote_version: 0, - success: false, - message, - }) - .await - .todo_handle(); + responsor.send_resp(WriteOneDataResponse { + remote_version: 0, + success: false, + message, + }); }; - let fail_with_msg = |message: String| async { + let fail_with_msg = |message: String| { tracing::warn!("{}", message); - responsor - .send_resp(WriteOneDataResponse { - remote_version: 0, - success: false, - message, - }) - .await - .todo_handle(); + responsor.send_resp(WriteOneDataResponse { + remote_version: 0, + success: false, + message, + }); }; - loop { - // tracing::debug!("verify version loop"); - let lock = - kv_store_engine.with_rwlock(&KeyTypeDataSetMeta(&req.unique_id).make_key()); - let guard = KeyLockGuard::Read(lock.read()); - required_meta = kv_store_engine.get( + let res = kv_store_engine.get( &KeyTypeDataSetMeta(&req.unique_id), - true, + false, KvAdditionalConf {}, ); //tofix, master send maybe not synced - let old_dataset_version = if required_meta.is_none() { + let old_dataset_version = if res.is_none() { 0 } else { - required_meta.as_ref().unwrap().1.version + res.as_ref().unwrap().1.version }; // need to wait for new version - if required_meta.is_none() - || required_meta.as_ref().unwrap().1.version < req.version - { - if required_meta.is_none() { - tracing::debug!("no data version, waiting for notify"); - } else { - tracing::debug!( - "data version is old({}) at node({}), waiting for new notify({})", - required_meta.as_ref().unwrap().1.version, - self.p2p().nodes_config.this_node(), - req.version - ); - } - - let (kv_version, new_value) = kv_store_engine - .register_waiter_for_new(&keybytes, guard) - .await - .unwrap_or_else(|err| { - panic!("fail to wait for new data version: {:?}", err); - }); - - let Some(new_value) = new_value.as_raw_data() else { + if res.is_none() || res.as_ref().unwrap().1.version < req.version { + let (_, new_value) = kv_store_engine.wait_for_new(&keybytes).await; + let Some(new_value) = new_value.as_data_set_meta() else { fail_with_msg(format!( "fatal error, kv value supposed to be DataSetMeta, rathe than {:?}", new_value - )) - .await; + )); return; }; - // deserialize - let new_value = bincode::deserialize::(&new_value); - if let Err(err) = new_value { - fail_with_msg(format!( - "fatal error, kv value deserialization failed: {}", - err - )) - .await; - return; - } - let new_value = new_value.unwrap(); - - // version check if new_value.version > req.version { - fail_by_overwrite().await; + fail_by_overwrite(); return; } else if new_value.version < req.version { - tracing::debug!("recv data version({}) is old than required({}), waiting for new notify",new_value.version, req.version); // still need to wait for new version continue; } else { - required_meta = Some((kv_version, new_value)); break; } } else if old_dataset_version > req.version { - drop(guard); - fail_by_overwrite().await; + fail_by_overwrite(); return; - } else { - tracing::debug!( - "data version is matched cur({}) require({}) // 0 should be invalid", - old_dataset_version, - req.version - ); - break; } } } // Step3: write data tracing::debug!("start to write data"); - let lock = kv_store_engine.with_rwlock(&KeyTypeDataSetMeta(&req.unique_id).make_key()); - let guard = KeyLockGuard::Write(lock.write()); - let check_meta = kv_store_engine.get( - &KeyTypeDataSetMeta(&req.unique_id), - true, - KvAdditionalConf {}, - ); //tofix, master send maybe not synced - if check_meta.is_none() - || check_meta.as_ref().unwrap().0 != required_meta.as_ref().unwrap().0 - { - drop(guard); - responsor - .send_resp(WriteOneDataResponse { - remote_version: if check_meta.is_none() { - 0 - } else { - check_meta.as_ref().unwrap().1.version - }, - success: false, - message: "meta is updated again, cancel write".to_owned(), - }) - .await - .todo_handle(); - return; - } - // let old_dataset_version = if res.is_none() { - // 0 - // } else { - // res.as_ref().unwrap().1.version - // }; - for (idx, data) in req.data.into_iter().enumerate() { - let serialize = data.encode_persist(); - if let Err(err) = kv_store_engine.set( - KeyTypeDataSetItem { - uid: req.unique_id.as_ref(), //req.unique_id.clone(), - idx: idx as u8, - }, - &serialize, - true, - ) { - tracing::warn!("flush error: {}", err) + match data.data_item_dispatch.unwrap() { + proto::data_item::DataItemDispatch::File(f) => { + // just store in kv + kv_store_engine.set( + KeyTypeDataSetItem { + uid: req.unique_id.as_ref(), //req.unique_id.clone(), + idx: idx as u8, + }, + &f.encode_to_vec(), + false, + ); + } + proto::data_item::DataItemDispatch::RawBytes(bytes) => { + tracing::debug!("writing data part{} bytes", idx); + kv_store_engine.set( + KeyTypeDataSetItem { + uid: &req.unique_id, + idx: idx as u8, + }, + &bytes, + false, + ); + } } } kv_store_engine.flush(); - drop(guard); tracing::debug!("data is written"); responsor .send_resp(WriteOneDataResponse { @@ -565,8 +418,7 @@ impl DataGeneralView { success: true, message: "".to_owned(), }) - .await - .todo_handle(); + .await; // ## response } @@ -575,13 +427,8 @@ impl DataGeneralView { req: proto::DataMetaGetRequest, responsor: RPCResponsor, ) -> WSResult<()> { - tracing::debug!("rpc_handle_get_data_meta with req({:?})", req); let meta = self.get_data_meta(&req.unique_id, req.delete)?; - if meta.is_none() { - tracing::debug!("rpc_handle_get_data_meta data meta not found"); - } else { - tracing::debug!("rpc_handle_get_data_meta data meta found"); - } + let serialized_meta = meta.map_or(vec![], |(_kvversion, meta)| { bincode::serialize(&meta).unwrap() }); @@ -599,9 +446,6 @@ impl DataGeneralView { unique_id: &[u8], delete: bool, ) -> WSResult> { - let ope_name = if delete { "delete" } else { "get" }; - tracing::debug!("{} data meta for uid({:?})", ope_name, unique_id); - let kv_store_engine = self.kv_store_engine(); let key = KeyTypeDataSetMeta(&unique_id); let keybytes = key.make_key(); @@ -610,9 +454,9 @@ impl DataGeneralView { let _guard = write_lock.write(); let meta_opt = if delete { - kv_store_engine.del(key, true)? - } else { kv_store_engine.get(&key, true, KvAdditionalConf {}) + } else { + kv_store_engine.del(key, true)? }; Ok(meta_opt) } @@ -624,11 +468,7 @@ impl DataGeneralView { // } impl DataGeneral { - async fn get_or_del_datameta_from_master( - &self, - unique_id: &[u8], - delete: bool, - ) -> WSResult { + async fn get_datameta_from_master(&self, unique_id: &[u8]) -> WSResult { let p2p = self.view.p2p(); let data_general = self.view.data_general(); // get meta from master @@ -639,24 +479,15 @@ impl DataGeneral { p2p.nodes_config.get_master_node(), DataMetaGetRequest { unique_id: unique_id.to_owned(), - delete, + delete: true, }, Some(Duration::from_secs(30)), ) .await?; - if meta.serialized_meta.is_empty() { - return Err(WsDataError::DataSetNotFound { - uniqueid: unique_id.to_owned(), - } - .into()); - } bincode::deserialize::(&meta.serialized_meta).map_err(|e| { WSError::from(WsSerialErr::BincodeErr { err: e, - context: format!( - "get_datameta_from_master failed, meta:{:?}", - meta.serialized_meta - ), + context: "delete data meta at master wrong meta serialized".to_owned(), }) }) } @@ -691,13 +522,11 @@ impl DataGeneral { let view = view.clone(); let task = tokio::spawn(async move { let req_idxs = req.idxs.clone(); - tracing::debug!("rpc_call_get_data start, remote({})", node_id); let res = view .data_general() .rpc_call_get_data .call(view.p2p(), node_id, req, Some(Duration::from_secs(30))) .await; - tracing::debug!("rpc_call_get_data returned, remote({})", node_id); let res: WSResult> = res.map(|response| { if !response.success { tracing::warn!("get/delete data failed {}", response.message); @@ -732,24 +561,9 @@ impl DataGeneral { unique_id: impl Into>, ) -> WSResult<(DataSetMetaV2, HashMap<(NodeID, usize), proto::DataItem>)> { let unique_id: Vec = unique_id.into(); - tracing::debug!("get_or_del_datameta_from_master start"); // Step1: get meta - let meta: DataSetMetaV2 = self - .get_or_del_datameta_from_master(&unique_id, false) - .await - .map_err(|err| { - if let WSError::WsDataError(WsDataError::DataSetNotFound { uniqueid }) = err { - tracing::debug!("data not found, uniqueid:{:?}", uniqueid); - return WSError::WsDataError(WsDataError::DataSetNotFound { uniqueid }); - } - tracing::warn!("`get_data` failed, err:{}", err); - err - })?; - tracing::debug!("get_or_del_datameta_from_master end"); - tracing::debug!("get_data_by_meta start"); - let res = self.get_data_by_meta(&unique_id, meta, false).await; - tracing::debug!("get_data_by_meta end"); - res + let meta: DataSetMetaV2 = self.get_datameta_from_master(&unique_id).await?; + self.get_data_by_meta(&unique_id, meta, false).await } /// return (meta, data_map) @@ -761,18 +575,7 @@ impl DataGeneral { let unique_id: Vec = unique_id.into(); // Step1: get meta - let meta: DataSetMetaV2 = self - .get_or_del_datameta_from_master(&unique_id, true) - .await - .map_err(|err| { - if let WSError::WsDataError(WsDataError::DataSetNotFound { uniqueid }) = err { - tracing::debug!("data not found, uniqueid:{:?}", uniqueid); - return WSError::WsDataError(WsDataError::DataSetNotFound { uniqueid }); - } - tracing::warn!("`get_data` failed, err:{}", err); - err - })?; - // .default_log_err("`delete_data`")?; + let meta: DataSetMetaV2 = self.get_datameta_from_master(&unique_id).await?; self.get_data_by_meta(&unique_id, meta, true).await // @@ -824,7 +627,6 @@ impl DataGeneral { // Step 1: need the master to do the decision // - require for the latest version for write permission // - require for the distribution and cache mode - tracing::debug!("{} data version scheduling", log_tag); let version_schedule_resp = { let resp = self .rpc_call_data_version_schedule @@ -866,11 +668,6 @@ impl DataGeneral { }; resp }; - tracing::debug!( - "{} data version scheduled, resp: {:?}", - log_tag, - version_schedule_resp - ); // Step2: dispatch the data source and caches { @@ -911,20 +708,19 @@ impl DataGeneral { let one_data_item_split = one_data_item.clone_split_range(offset..offset + split_size); let t = tokio::spawn(async move { - let req = WriteOneDataRequest { - unique_id, - version, - data: vec![one_data_item_split], - }; - tracing::debug!( - "[{}] write_data flushing, target node: {}, `WriteOneDataRequest` msg_id: {}", - log_tag, - nodeid, - req.msg_id() - ); + tracing::debug!("write_data flushing {}", log_tag); view.data_general() .rpc_call_write_once_data - .call(view.p2p(), nodeid, req, Some(Duration::from_secs(60))) + .call( + view.p2p(), + nodeid, + WriteOneDataRequest { + unique_id, + version, + data: vec![one_data_item_split], + }, + Some(Duration::from_secs(60)), + ) .await }); write_source_data_tasks.push(t); @@ -1085,8 +881,6 @@ pub struct DataSetMetaV2 { pub datas_splits: Vec, } -pub type DataSetMeta = DataSetMetaV2; - // message EachNodeSplit{ // uint32 node_id=1; // uint32 data_offset=2; diff --git a/src/main/src/general/m_dist_lock.rs b/src/main/src/general/m_dist_lock.rs index d9ca84e..5cee707 100644 --- a/src/main/src/general/m_dist_lock.rs +++ b/src/main/src/general/m_dist_lock.rs @@ -17,7 +17,6 @@ use crate::{ logical_module_view_impl, result::WSResult, sys::LogicalModuleNewArgs, util::JoinHandleWrapper, }; use axum::async_trait; -use enum_as_inner::EnumAsInner; use parking_lot::Mutex; use rand::thread_rng; use rand::Rng; @@ -39,13 +38,6 @@ logical_module_view_impl!(View, dist_lock, DistLock); type LockReleaseId = u32; -#[derive(EnumAsInner)] -pub enum DistLockOpe { - Read, - Write, - Unlock(LockReleaseId), -} - /// https://fvd360f8oos.feishu.cn/wiki/ZUPNwpKLEiRs6Ukzf3ncVa9FnHe /// 这个是对于某个key的锁的状态记录,包括读写锁的引用计数,以及等待释放的notify /// 对于写锁,只有第一个人竞争往map里插入能拿到锁,后续的都得等notify,然后竞争往map里插入 @@ -460,7 +452,7 @@ mod test { #[tokio::test(flavor = "multi_thread")] async fn test_dist_lock() { - let (_hold, sys1, sys2) = test_utils::get_test_sys().await; + let (sys1, sys2) = test_utils::get_test_sys().await; tokio::time::sleep(Duration::from_secs(3)).await; assert!(sys1.inner.upgrade().is_some()); diff --git a/src/main/src/general/m_kv_store_engine.rs b/src/main/src/general/m_kv_store_engine.rs index 2cd4cff..71aeb43 100644 --- a/src/main/src/general/m_kv_store_engine.rs +++ b/src/main/src/general/m_kv_store_engine.rs @@ -11,8 +11,6 @@ use parking_lot::{Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard}; use serde::Serialize; use serde::{de::DeserializeOwned, ser::SerializeTuple}; -use sled::IVec; -use tokio::sync::oneshot; use std::io::Cursor; use std::sync::Arc; @@ -33,9 +31,7 @@ use ws_derive::LogicalModule; logical_module_view_impl!(View); logical_module_view_impl!(View, os, OperatingSystem); logical_module_view_impl!(View, p2p, P2PModule); -logical_module_view_impl!(View, kv_store_engine, KvStoreEngine); -/// start from 1 pub type KvVersion = usize; /// attention: non-reentrant @@ -43,11 +39,6 @@ pub struct KeyLock { lock: Arc>, } -pub enum KeyLockGuard<'a> { - Read(RwLockReadGuard<'a, ()>), - Write(RwLockWriteGuard<'a, ()>), -} - impl KeyLock { pub fn new(lock: Arc>) -> Self { Self { lock } @@ -67,7 +58,7 @@ impl KeyLock { #[derive(LogicalModule)] pub struct KvStoreEngine { - key_waitings: DashMap, Vec>>, + key_waitings: DashMap, Mutex>>>, /// lock should be free when there is no read or write operation on the key /// let's use cache to replace the map @@ -112,7 +103,6 @@ impl LogicalModule for KvStoreEngine { } } -#[derive(Debug)] pub enum KvAdditionalRes { // SerialedValue(Arc<[u8]>), } @@ -130,26 +120,16 @@ impl Default for KvAdditionalConf { } impl KvStoreEngine { - pub fn register_waiter_for_new( - &self, - key: &[u8], - hold_key_guard: KeyLockGuard<'_>, - ) -> oneshot::Receiver<(KvVersion, KvValue)> { + pub async fn wait_for_new(&self, key: &[u8]) -> (KvVersion, KvValue) { let (wait_tx, wait_rx) = tokio::sync::oneshot::channel(); - // 对于每一个key的等待数组,只有这里插入和set时清空; - // 临界区讨论: - // 首先用户坑定时先校验了没有要求的key,才会监听 - // 有个问题是,监听还没完成插入的时候,进来了新key怎么办? - // 所以要保证用户判断到没有key的时候持有锁,直到插入完成才解锁 let _ = self .key_waitings .entry(key.to_owned()) .and_modify(|v| { - v.push(tokio::sync::oneshot::channel().0); + v.lock().push(tokio::sync::oneshot::channel().0); }) - .or_insert_with(|| vec![wait_tx]); - drop(hold_key_guard); - wait_rx + .or_insert_with(|| Mutex::new(vec![wait_tx])); + wait_rx.await.unwrap() } // make sure some operation is atomic @@ -192,24 +172,16 @@ impl KvStoreEngine { // let let mut vec_writer = Cursor::new(vec![0; 8 + value.len()]); // assert_eq!(bincode::serialized_size(&kvversion).unwrap(), 8); - bincode::serialize_into(&mut vec_writer, &kvversion).unwrap(); + + bincode::serialize_into(&mut vec_writer, &kvversion); let mut vec = vec_writer.into_inner(); - vec[8..].copy_from_slice(&value); + vec.extend(value); let _ = db.insert(keybytes, vec).unwrap(); - if let Some(mut key_waitings) = self.key_waitings.get_mut(key) { - // if let Some((_, key_waitings)) = self.key_waitings.remove(key) { - for wait_tx in key_waitings.drain(..) { - wait_tx - .send((kvversion, KvValue::RawData(value.clone()))) - .unwrap_or_else(|_| panic!("send new key event failed")); - } - } Ok((kvversion, additinal_res)) } - /// first kv version start from 1 pub fn set( &self, key: K, @@ -241,16 +213,15 @@ impl KvStoreEngine { 1 }; // let - - assert_eq!(bincode::serialized_size(&kvversion).unwrap(), 8); let mut vec_writer = Cursor::new(vec![ 0; - 8 + bincode::serialized_size(&value).unwrap() + 8 + bincode::serialized_size(&kvversion).unwrap() as usize ]); + assert_eq!(bincode::serialized_size(&kvversion).unwrap(), 8); - bincode::serialize_into(&mut vec_writer, &kvversion).unwrap(); - bincode::serialize_into(&mut vec_writer, value).unwrap(); + bincode::serialize_into(&mut vec_writer, &kvversion); + bincode::serialize_into(&mut vec_writer, value); let _ = db.insert(keybytes, vec_writer.into_inner()).unwrap(); @@ -275,19 +246,6 @@ impl KvStoreEngine { }) } - pub fn decode_kv(key_: &K, data: &IVec) -> (KvVersion, K::Value) - where - K: KeyType, - { - let kvversion = bincode::deserialize::(&data.as_ref()[0..8]); - let value = key_.deserialize_from(&data.as_ref()[8..]); - if let (Ok(kvversion), Some(value)) = (kvversion, value) { - return (kvversion as usize, value); - } - - (0, bincode::deserialize::(&data.as_ref()).unwrap()) - } - pub fn get<'a, K>( &self, key_: &K, @@ -311,7 +269,21 @@ impl KvStoreEngine { tracing::error!("get kv error: {:?}", e); None }, - |v| v.map(|v| Self::decode_kv(key_, &v)), + |v| { + v.map(|v| { + // support old no kv version storage format + if let Some(value) = key_.deserialize_from(v.as_ref()) { + (0, value) + } else { + let kvversion = + bincode::deserialize::(&v.as_ref()[0..8]).unwrap() as usize; + let value: K::Value = key_ + .deserialize_from(&v.as_ref()[8..]) + .unwrap_or_else(|| panic!("deserialize failed")); + (kvversion, value) + } + }) + }, ) } @@ -347,7 +319,18 @@ impl KvStoreEngine { let _hold_lock_guard = hold_lock.as_ref().map(|lock| lock.write()); let res = self.db.get().unwrap().remove(keybytes).unwrap(); - Ok(res.map(|v| Self::decode_kv(&key, &v))) + Ok(res.map(|v| { + // support old no kv version storage format + if let Some(value) = key.deserialize_from(v.as_ref()) { + (0, value) + } else { + let kvversion = bincode::deserialize::(&v.as_ref()[0..8]).unwrap() as usize; + let value: K::Value = key + .deserialize_from(&v.as_ref()[8..]) + .unwrap_or_else(|| panic!("deserialize failed")); + (kvversion, value) + } + })) } pub fn flush(&self) { let _ = self.db.get().unwrap().flush().unwrap(); @@ -446,7 +429,6 @@ pub enum KvValue { ServiceList(Vec), DataSetMeta(DataSetMetaV2), DataSetItem(Vec), - RawData(Vec), } pub struct KeyTypeKv<'a>(pub &'a [u8]); @@ -547,67 +529,3 @@ impl Serialize for KeyTypeDataSetItem<'_> { tup.end() } } - -#[cfg(test)] -mod test { - use crate::{ - general::{ - m_data_general::{DataSetMetaBuilder, DataSetMetaV2}, - m_kv_store_engine::{KeyTypeDataSetMeta, KvAdditionalConf}, - test_utils, - }, - result::WSResultExt, - sys::LogicalModuleNewArgs, - }; - - use super::View; - - #[tokio::test(flavor = "multi_thread")] - async fn test_kv_store_engine() { - let (_hold, _sys1, sys2) = test_utils::get_test_sys().await; - let view = View::new(sys2); - let key = "test_kv_store_engine_key"; - view.kv_store_engine() - .set( - KeyTypeDataSetMeta(key.as_bytes()), - &DataSetMetaBuilder::new() - .cache_mode_map_common_kv() - .cache_mode_pos_allnode() - .cache_mode_time_auto() - .version(3) - .build(), - false, - ) - .todo_handle(); - let set = view - .kv_store_engine() - .get( - &KeyTypeDataSetMeta(key.as_bytes()), - false, - KvAdditionalConf {}, - ) - .unwrap(); - assert_eq!(set.0, 1); - assert_eq!(set.1.version, 3); - let del = view - .kv_store_engine() - .del(KeyTypeDataSetMeta(key.as_bytes()), false) - .unwrap() - .unwrap(); - assert_eq!(del.0, 1); - assert_eq!(del.1.version, 3); - assert!(view - .kv_store_engine() - .get( - &KeyTypeDataSetMeta(key.as_bytes()), - false, - KvAdditionalConf {}, - ) - .is_none()); - assert!(view - .kv_store_engine() - .del(KeyTypeDataSetMeta(key.as_bytes()), false) - .unwrap() - .is_none()); - } -} diff --git a/src/main/src/general/m_os/mod.rs b/src/main/src/general/m_os/mod.rs index c3de2bd..ee978b1 100644 --- a/src/main/src/general/m_os/mod.rs +++ b/src/main/src/general/m_os/mod.rs @@ -13,7 +13,7 @@ use super::{ use crate::{ general::network::proto, logical_module_view_impl, - result::{ErrCvt, WSError, WSResult, WSResultExt, WsIoErr}, + result::{ErrCvt, WSError, WSResult, WsIoErr}, sys::{LogicalModule, LogicalModuleNewArgs, LogicalModulesRef}, util::JoinHandleWrapper, }; @@ -239,7 +239,7 @@ impl OperatingSystem { }) .await .unwrap(); - responser.send_resp(res).await.todo_handle(); + responser.send_resp(res).await; } async fn remote_get_dir_content_handler( @@ -306,7 +306,7 @@ impl OperatingSystem { }) .await .unwrap(); - responser.send_resp(res).await.todo_handle(); + responser.send_resp(res).await; } pub fn open_file(&self, fname: &str) -> WSResult { diff --git a/src/main/src/general/network/m_p2p.rs b/src/main/src/general/network/m_p2p.rs index 82a9297..4e7d85e 100644 --- a/src/main/src/general/network/m_p2p.rs +++ b/src/main/src/general/network/m_p2p.rs @@ -13,7 +13,7 @@ use super::{ use crate::{ config::NodesConfig, logical_module_view_impl, - result::{ErrCvt, WSResult, WSResultExt, WsNetworkConnErr, WsNetworkLogicErr}, + result::{ErrCvt, WSResult, WsNetworkConnErr, WsNetworkLogicErr}, sys::{LogicalModule, LogicalModuleNewArgs, LogicalModulesRef, NodeID}, util::JoinHandleWrapper, }; @@ -104,13 +104,6 @@ impl RPCCaller { req: R, dur: Option, ) -> WSResult { - #[cfg(feature = "rpc-log")] - tracing::debug!( - "call rpc {:?} from {} to {}", - req, - p2p.nodes_config.this_node(), - node_id - ); p2p.call_rpc::(node_id, req, dur).await } } @@ -222,13 +215,6 @@ impl Responser { where RESP: MsgPack + Default, { - #[cfg(feature = "rpc-log")] - tracing::debug!( - "resp rpc {:?} from {} to {}", - resp, - self.view.p2p().nodes_config.this_node(), - self.node_id - ); if self.view.p2p().nodes_config.this.0 == self.node_id { self.view.p2p().dispatch( self.node_id, @@ -294,17 +280,7 @@ impl P2PModule { *b.downcast::().unwrap() } }; - // if msg.msg_id() == 3 { - // tracing::info!("dispatch {:?} from: {}", msg, nid); - // } // tracing::debug!("dispatch from {} msg:{:?}", nid, msg); - #[cfg(feature = "rpc-log")] - tracing::debug!( - "handling rpc {:?} from {} to {}", - msg, - nid, - p2p.nodes_config.this_node(), - ); f( Responser { task_id, @@ -420,8 +396,7 @@ impl P2PModule { r.msg_id(), taskid, DispatchPayload::Local(Box::new(r)), - ) - .todo_handle(); + ); let resp = rx.await.unwrap(); let resp = resp.downcast::().unwrap(); @@ -463,12 +438,7 @@ impl P2PModule { Err(err) => { let _ = self.waiting_tasks.remove(&(taskid, node_id)).unwrap(); // tracing::info!("1stop holding lock msg:{} node:{}", r.msg_id(), node_id); - tracing::error!( - "rpc send failed: {:?}, request({:?}) from node({:?})", - err, - r, - self.nodes_config.this_node() - ); + tracing::error!("rpc send failed: {:?}", err); return Err(err); } } @@ -487,12 +457,7 @@ impl P2PModule { let _ = self.waiting_tasks.remove(&(taskid, node_id)); // let _ = self.p2p_kernel.close(node_id).await; - tracing::error!( - "rpc timeout: {:?} to node {} with req {:?}", - err, - node_id, - r - ); + tracing::error!("rpc timeout: {:?} to node {}", err, node_id); // tracing::warn!("rpc timeout: {:?} to node {}", err, node_id); // tracing::info!("2stop holding lock msg:{} node:{}", r.msg_id(), node_id); @@ -518,11 +483,7 @@ impl P2PModule { cb(nid, self, taskid, data)?; Ok(()) } else { - tracing::warn!( - "not match id: {}, this node: {}", - id, - self.nodes_config.this_node() - ); + tracing::warn!("not match id: {}", id); Err(WsNetworkLogicErr::MsgIdNotDispatchable(id).into()) } } diff --git a/src/main/src/general/network/m_p2p_quic.rs b/src/main/src/general/network/m_p2p_quic.rs index 9e221e3..fee9396 100644 --- a/src/main/src/general/network/m_p2p_quic.rs +++ b/src/main/src/general/network/m_p2p_quic.rs @@ -33,7 +33,7 @@ use ws_derive::LogicalModule; use crate::{ // module_view::P2PQuicNodeLMView, - logical_module_view_impl, result::{ErrCvt, WSResult, WSResultExt, WsNetworkConnErr, WsSerialErr}, sys::{BroadcastMsg, BroadcastSender, LogicalModule, LogicalModuleNewArgs, LogicalModulesRef, NodeID}, util::JoinHandleWrapper + logical_module_view_impl, result::{ErrCvt, WSResult, WsNetworkConnErr, WsSerialErr}, sys::{LogicalModulesRef,BroadcastMsg, BroadcastSender, LogicalModule, LogicalModuleNewArgs, NodeID}, util::JoinHandleWrapper }; use super::m_p2p::{MsgId, P2PKernel, P2PModule, TaskId}; @@ -360,7 +360,7 @@ async fn handle_connection( let head=bytes.split_to(headlen as usize); match deserialize_msg_id_task_id(&head) { Ok((msg_id, task_id)) => { - view.p2p().dispatch(remote_id, msg_id, task_id, bytes.into()).todo_handle(); + view.p2p().dispatch(remote_id, msg_id, task_id, bytes.into()); } Err(err) => { tracing::warn!("incoming deserial head error: {:?}", err); diff --git a/src/main/src/general/network/proto_ext.rs b/src/main/src/general/network/proto_ext.rs index 4abf69a..94fef6f 100644 --- a/src/main/src/general/network/proto_ext.rs +++ b/src/main/src/general/network/proto_ext.rs @@ -1,6 +1,4 @@ -use crate::{general::m_dist_lock::DistLockOpe, util::VecOrSlice}; - -use super::proto::{self, kv::KvResponse, DataItem, FileData}; +use super::proto::{self, kv::KvResponse}; use std::ops::Range; @@ -79,102 +77,3 @@ impl ProtoExtKvResponse for KvResponse { } } } - -pub trait KvRequestExt { - fn new_set(kv: proto::kv::KvPair) -> Self; - fn new_get(key: Vec) -> Self; - fn new_delete(key: Vec) -> Self; - fn new_lock(ope: DistLockOpe, key: Vec) -> Self; -} - -impl KvRequestExt for proto::kv::KvRequest { - fn new_set(kv: proto::kv::KvPair) -> Self { - proto::kv::KvRequest { - op: Some(proto::kv::kv_request::Op::Set( - proto::kv::kv_request::KvPutRequest { kv: Some(kv) }, - )), - } - } - fn new_get(key: Vec) -> Self { - proto::kv::KvRequest { - op: Some(proto::kv::kv_request::Op::Get( - proto::kv::kv_request::KvGetRequest { - range: Some(proto::kv::KeyRange { - start: key, - end: vec![], - }), - }, - )), - } - } - fn new_delete(key: Vec) -> Self { - proto::kv::KvRequest { - op: Some(proto::kv::kv_request::Op::Delete( - proto::kv::kv_request::KvDeleteRequest { - range: Some(proto::kv::KeyRange { - start: key, - end: vec![], - }), - }, - )), - } - } - fn new_lock(ope: DistLockOpe, key: Vec) -> Self { - proto::kv::KvRequest { - op: Some(proto::kv::kv_request::Op::Lock( - proto::kv::kv_request::KvLockRequest { - read_or_write: ope.is_read(), - release_id: if let DistLockOpe::Unlock(release_id) = ope { - vec![release_id] - } else { - vec![] - }, - range: Some(proto::kv::KeyRange { - start: key, - end: vec![], - }), - }, - )), - } - } -} - -pub trait DataItemExt { - fn decode_persist(data: Vec) -> Self; - fn encode_persist<'a>(&'a self) -> Vec; -} - -impl DataItemExt for proto::DataItem { - fn decode_persist(data: Vec) -> Self { - let data_item_dispatch = match data[0] { - 0 => proto::data_item::DataItemDispatch::File(FileData { - file_name_opt: String::new(), - is_dir_opt: false, - file_content: data[1..].to_owned(), - }), - 1 => proto::data_item::DataItemDispatch::RawBytes(data[1..].to_owned()), - _ => { - panic!("unknown data type") - } - }; - Self { - data_item_dispatch: Some(data_item_dispatch), - } - } - fn encode_persist<'a>(&'a self) -> Vec { - match self.data_item_dispatch.as_ref().unwrap() { - proto::data_item::DataItemDispatch::File(f) => { - let mut ret = vec![0]; - ret.extend_from_slice(&f.file_content); - ret - } - proto::data_item::DataItemDispatch::RawBytes(bytes) => { - // tracing::debug!("writing data part{} bytes", idx); - // VecOrSlice::from(&bytes) - let mut ret = vec![1]; - ret.extend_from_slice(bytes); - ret - } - } - } -} diff --git a/src/main/src/general/test_utils.rs b/src/main/src/general/test_utils.rs index a88b7c8..6971295 100644 --- a/src/main/src/general/test_utils.rs +++ b/src/main/src/general/test_utils.rs @@ -1,6 +1,5 @@ -use std::{collections::HashMap, fs, sync::OnceLock}; +use std::{collections::HashMap, sync::OnceLock}; -use futures::lock; use lazy_static::lazy_static; use tokio::sync::Mutex; @@ -15,30 +14,17 @@ lazy_static! { Mutex::new(None); } -/// sys1 is the master, sys2 is the worker -pub async fn get_test_sys<'a>() -> ( - tokio::sync::MutexGuard< - 'a, - std::option::Option<((Sys, LogicalModulesRef), (Sys, LogicalModulesRef))>, - >, - LogicalModulesRef, - LogicalModulesRef, -) { +pub async fn get_test_sys() -> (LogicalModulesRef, LogicalModulesRef) { let mut locked = TEST_SYS1_SYS2.lock().await; if locked.is_none() { *locked = Some(start_2_node().await); } - // let locked = locked.as_ref().unwrap(); - let sys1_handle = locked.as_ref().unwrap().0 .1.clone(); - let sys2_handle = locked.as_ref().unwrap().1 .1.clone(); - (locked, sys1_handle, sys2_handle) + let locked = locked.as_ref().unwrap(); + (locked.0 .1.clone(), locked.1 .1.clone()) } -/// sys1 is the master, sys2 is the worker async fn start_2_node() -> ((Sys, LogicalModulesRef), (Sys, LogicalModulesRef)) { start_tracing(); - let _ = fs::remove_dir_all("test_temp_dir1"); - let _ = fs::remove_dir_all("test_temp_dir2"); let node0: NodeConfig = serde_yaml::from_str( r#" diff --git a/src/main/src/main.rs b/src/main/src/main.rs index 8e81720..cac2ee6 100644 --- a/src/main/src/main.rs +++ b/src/main/src/main.rs @@ -8,15 +8,13 @@ clippy::unnecessary_mut_passed, unused_results, clippy::let_underscore_future, - clippy::let_underscore_future, - unused_must_use + clippy::let_underscore_future )] use clap::Parser; use cmd_arg::CmdArgs; use sys::Sys; -use tracing::Level; use tracing_subscriber::{ prelude::__tracing_subscriber_SubscriberExt, util::SubscriberInitExt, Layer, }; @@ -63,21 +61,6 @@ pub fn start_tracing() { if mp.contains("hyper") { return false; } - if *v.level() == Level::DEBUG { - if mp.contains("wasm_serverless::worker::m_kv_user_client") { - return false; - } - if mp.contains("wasm_serverless::general::m_data_general") { - return false; - } - if mp.contains("wasm_serverless::master::m_data_master") { - return false; - } - if mp.contains("sled::pagecache") { - return false; - } - // return false; - } } // if v.module_path().unwrap().contains("less::network::p2p") { diff --git a/src/main/src/master/m_data_master.rs b/src/main/src/master/m_data_master.rs index 7cabc37..679e197 100644 --- a/src/main/src/master/m_data_master.rs +++ b/src/main/src/master/m_data_master.rs @@ -1,17 +1,19 @@ + use std::collections::HashSet; use std::time::Duration; use crate::general::m_data_general::{ - CacheModeVisitor, DataGeneral, DataSetMetaBuilder, DataSplit, EachNodeSplit, + CacheModeVisitor, DataGeneral, DataSetMetaBuilder, DataSplit, + EachNodeSplit, }; use crate::general::m_kv_store_engine::{ KeyType, KeyTypeDataSetMeta, KvAdditionalConf, KvStoreEngine, }; -use crate::general::network::m_p2p::{P2PModule, RPCCaller, RPCHandler, RPCResponsor}; +use crate::general::network::m_p2p::{P2PModule, RPCHandler, RPCResponsor}; use crate::general::network::proto::{ self, DataVersionScheduleRequest, DataVersionScheduleResponse, }; -use crate::result::{WSResult, WSResultExt}; +use crate::result::WSResult; use crate::sys::{LogicalModulesRef, NodeID}; use crate::util::JoinHandleWrapper; use crate::{ @@ -34,7 +36,6 @@ logical_module_view_impl!(DataMasterView, kv_store_engine, KvStoreEngine); pub struct DataMaster { view: DataMasterView, rpc_handler: RPCHandler, - rpc_caller_data_meta_update: RPCCaller, } #[async_trait] @@ -46,7 +47,6 @@ impl LogicalModule for DataMaster { Self { rpc_handler: RPCHandler::new(), view: DataMasterView::new(args.logical_modules_ref.clone()), - rpc_caller_data_meta_update: RPCCaller::new(), // view: DataMasterView::new(args.logical_modules_ref.clone()), } } @@ -57,7 +57,6 @@ impl LogicalModule for DataMaster { async fn start(&self) -> WSResult> { tracing::info!("start as master"); let view = self.view.clone(); - self.rpc_caller_data_meta_update.regist(view.p2p()); self.rpc_handler .regist(self.view.p2p(), move |responsor, req| { let view = view.clone(); @@ -152,7 +151,6 @@ impl DataMaster { }], }); } - tracing::debug!("decide_each_data_split res: {:?}", datasplits); datasplits } @@ -200,14 +198,8 @@ impl DataMaster { }, ); // ## update version local - tracing::debug!( - "update version local for data({:?}), the updated meta is {:?}", - req.unique_id, - set_meta - ); - let _ = kv_store_engine - .set(KeyTypeDataSetMeta(&req.unique_id), &set_meta, true) - .unwrap(); + tracing::debug!("update version local for data({:?})", req.unique_id); + kv_store_engine.set(KeyTypeDataSetMeta(&req.unique_id), &set_meta, true); kv_store_engine.flush(); set_meta }; @@ -227,54 +219,21 @@ impl DataMaster { for need_notify_node in need_notify_nodes { let view = self.view.clone(); - // let mut req = req.clone(); - // req.version = new_meta.version; + let mut req = req.clone(); + req.version = new_meta.version; // don't need to retry or wait - let serialized_meta = bincode::serialize(&new_meta).unwrap(); - let unique_id = req.unique_id.clone(); - let version = new_meta.version; let _call_task = tokio::spawn(async move { let p2p = view.p2p(); tracing::debug!( - "updating version for data({:?}) to node: {}, this_node: {}", - std::str::from_utf8(&unique_id).map_or_else( - |_err| { format!("{:?}", unique_id) }, - |ok| { ok.to_owned() } - ), - need_notify_node, - p2p.nodes_config.this_node() - ); - - tracing::debug!( - "async notify `DataMetaUpdateRequest` to node {}", + "updating version for data({:?}) to node: {}", + req.unique_id, need_notify_node ); - let resp = view - .data_master() - .rpc_caller_data_meta_update - .call( - p2p, - need_notify_node, - proto::DataMetaUpdateRequest { - unique_id, - version, - serialized_meta, - }, - Some(Duration::from_secs(60)), - ) + view.data_general() + .rpc_call_data_version_schedule + .call(p2p, need_notify_node, req, Some(Duration::from_secs(60))) .await; - if let Err(err) = resp { - tracing::error!( - "notify `DataMetaUpdateRequest` to node {} failed: {}", - need_notify_node, - err - ); - } else if let Ok(ok) = resp { - if ok.version != version { - tracing::error!("notify `DataMetaUpdateRequest` to node {} failed: version mismatch, expect: {}, remote: {}", need_notify_node, version, ok.version); - } - } }); } @@ -286,7 +245,7 @@ impl DataMaster { ); tracing::debug!( - "data:{:?} version required({}) and schedule done, caller will do following thing after receive `DataVersionScheduleResponse`", + "data:{:?} version:{} require done, followers are waiting for new data", req.unique_id, new_meta.version ); @@ -304,8 +263,7 @@ impl DataMaster { .map(|v| v.into()) .collect(), }) - .await - .todo_handle(); + .await; Ok(()) } // async fn rpc_handler_dataversion_synced_on_node( diff --git a/src/main/src/result.rs b/src/main/src/result.rs index 398c870..0026ec0 100644 --- a/src/main/src/result.rs +++ b/src/main/src/result.rs @@ -1,4 +1,4 @@ -use std::{fmt::Debug, os::unix::net::SocketAddr}; +use std::os::unix::net::SocketAddr; use async_raft::{InitializeError, RaftError}; use camelpaste::paste; @@ -12,7 +12,7 @@ use zip_extract::ZipExtractError; use crate::{ general::{ m_appmeta_manager::FnMeta, - m_data_general::EachNodeSplit, + m_data_general::{EachNodeSplit}, network::{proto, rpc_model::HashValue}, }, sys::NodeID, @@ -167,7 +167,7 @@ pub enum WsFuncError { #[derive(Debug)] pub enum WsDataError { DataSetNotFound { - uniqueid: Vec, + uniqueid: String, }, SetExpiredDataVersion { target_version: u64, @@ -330,19 +330,3 @@ impl_err_convertor!(SendError, WsNetworkConnErr, SendError); impl_err_convertor!(InitializeError, WsRaftErr, InitializeError); impl_err_convertor!(RaftError, WsRaftErr, RaftError); impl_err_convertor!(std::io::Error, WsIoErr, Io); - -pub trait WSResultExt { - fn todo_handle(&self); -} - -impl WSResultExt for WSResult { - #[inline] - fn todo_handle(&self) { - match self { - Ok(_ok) => {} - Err(err) => { - tracing::warn!("result err: {:?}", err); - } - } - } -} diff --git a/src/main/src/util.rs b/src/main/src/util.rs index d1dab24..7242d87 100644 --- a/src/main/src/util.rs +++ b/src/main/src/util.rs @@ -224,20 +224,3 @@ impl Drop for DropDebug { tracing::debug!("tracked drop {} [{}]", self.tag, self.rand); } } - -pub enum VecOrSlice<'a, T> { - Vec(Vec), - Slice(&'a [T]), -} - -impl From> for VecOrSlice<'_, T> { - fn from(v: Vec) -> Self { - Self::Vec(v) - } -} - -impl<'a, T> From<&'a [T]> for VecOrSlice<'a, T> { - fn from(v: &'a [T]) -> Self { - Self::Slice(v) - } -} diff --git a/src/main/src/worker/func/shared/process.rs b/src/main/src/worker/func/shared/process.rs index 2996f75..58e4b3a 100644 --- a/src/main/src/worker/func/shared/process.rs +++ b/src/main/src/worker/func/shared/process.rs @@ -1,6 +1,8 @@ // process function just run in unique process -use std::sync::Arc; +use std::{ + sync::Arc, +}; use async_trait::async_trait; use enum_as_inner::EnumAsInner; @@ -9,10 +11,9 @@ use tokio::{process::Command, sync::oneshot}; use crate::{ general::{ - m_appmeta_manager::AppType, + m_appmeta_manager::{AppType}, network::rpc_model::{self, HashValue}, }, - result::{WSError, WsIoErr}, worker::func::{shared::java, InstanceTrait}, }; @@ -68,8 +69,7 @@ impl ProcessInstance { let pid = p.id().unwrap(); tracing::debug!("killing app {} on pid raw:{} check:{:?}", self.app, pid, id); // let pid = p.id().unwrap(); - p.kill().await.unwrap(); - + p.kill().await; // cmd kill id if let Some(id) = id { let _ = Command::new("kill") diff --git a/src/main/src/worker/m_kv_user_client.rs b/src/main/src/worker/m_kv_user_client.rs index 27e4b3f..2e20858 100644 --- a/src/main/src/worker/m_kv_user_client.rs +++ b/src/main/src/worker/m_kv_user_client.rs @@ -14,7 +14,7 @@ use crate::{ }, }, logical_module_view_impl, - result::{WSError, WSResult, WSResultExt, WsDataError}, + result::WSResult, sys::{LogicalModule, LogicalModuleNewArgs, LogicalModulesRef, NodeID}, util::JoinHandleWrapper, }; @@ -25,7 +25,6 @@ logical_module_view_impl!(KvUserClientView); logical_module_view_impl!(KvUserClientView, p2p, P2PModule); logical_module_view_impl!(KvUserClientView, data_general, DataGeneral); logical_module_view_impl!(KvUserClientView, dist_lock, DistLock); -logical_module_view_impl!(KvUserClientView, kv_user_client, Option); #[derive(LogicalModule)] pub struct KvUserClient { @@ -192,7 +191,6 @@ impl KvUserClient { Ok(kv_responses) } - async fn handle_kv_set( &self, app_name: &str, @@ -219,8 +217,7 @@ impl KvUserClient { }), )), ) - .await - .todo_handle(); + .await; KvResponse::new_common(vec![]) } @@ -232,7 +229,7 @@ impl KvUserClient { ) -> Vec { if meta.datas_splits.len() != 1 { tracing::warn!( - "convert kv invalid data count number: {}", + "delete kv invalid data count number: {}", meta.datas_splits.len() ); vec![] @@ -240,7 +237,7 @@ impl KvUserClient { match meta.datas_splits[0].recorver_data(uid, 0, &mut splits) { Ok(ok) => vec![proto::kv::KvPair { key, value: ok }], Err(err) => { - tracing::warn!("convert kv data error:{:?}", err); + tracing::warn!("delete kv data error:{:?}", err); vec![] } } @@ -261,12 +258,8 @@ impl KvUserClient { meta, splits, ), - Err(WSError::WsDataError(WsDataError::DataSetNotFound { uniqueid })) => { - tracing::debug!("get kv data not found, uid({:?})", uniqueid); - vec![] - } Err(err) => { - tracing::warn!("get kv data error:{:?}", err); + tracing::warn!("delete kv data error:{:?}", err); vec![] } }; @@ -286,10 +279,6 @@ impl KvUserClient { deleted_meta, deleted_splits, ), - Err(WSError::WsDataError(WsDataError::DataSetNotFound { uniqueid })) => { - tracing::debug!("delete kv data not found, uid({:?})", uniqueid); - vec![] - } Err(err) => { tracing::warn!("delete kv data error:{:?}", err); vec![] @@ -363,167 +352,3 @@ impl KvUserClient { // // } // } } - -#[cfg(test)] -mod test { - use core::str; - use std::{sync::Arc, time::Duration}; - - use super::KvUserClientView; - use crate::general::{ - network::{ - proto::{ - self, - kv::{KvRequest, KvRequests}, - }, - proto_ext::KvRequestExt, - }, - test_utils, - }; - - #[tokio::test(flavor = "multi_thread")] - async fn test_kv_user_client() { - let (_hold, _sys1, sys2) = test_utils::get_test_sys().await; - tokio::time::sleep(Duration::from_secs(3)).await; - let view = KvUserClientView::new(sys2); - let app = "test_app"; - let func = "test_func"; - let test_key = "test_key"; - let test_value = "test_value"; - - // first time get should be none - { - let res = view - .kv_user_client() - .kv_requests( - app, - func, - KvRequests { - app: app.to_owned(), - func: func.to_owned(), - prev_kv_opeid: -1, - requests: vec![KvRequest::new_get(test_key.as_bytes().to_owned())], - }, - ) - .await - .unwrap(); - assert!(res.responses.len() == 1); - match res.responses[0].resp.clone().unwrap() { - proto::kv::kv_response::Resp::CommonResp(kv_response) => { - assert!(kv_response.kvs.len() == 0); - } - proto::kv::kv_response::Resp::LockId(_) => panic!(), - } - tracing::debug!("first time get is none"); - } - - // (insert and get then delete twice) *3 - for _ in 0..3 { - let res = view - .kv_user_client() - .kv_requests( - app, - func, - KvRequests { - app: app.to_owned(), - func: func.to_owned(), - prev_kv_opeid: -1, - requests: vec![KvRequest::new_set(proto::kv::KvPair { - key: test_key.as_bytes().to_owned(), - value: test_value.as_bytes().to_owned(), - })], - }, - ) - .await - .unwrap(); - assert!(res.responses.len() == 1); - match res.responses[0].resp.clone().unwrap() { - proto::kv::kv_response::Resp::CommonResp(kv_response) => { - assert!(kv_response.kvs.len() == 0); - // assert_eq!(str::from_utf8(&kv_response.kvs[0].key).unwrap(), test_key); - // assert_eq!( - // str::from_utf8(&kv_response.kvs[0].value).unwrap(), - // test_value - // ); - } - proto::kv::kv_response::Resp::LockId(_) => panic!(), - } - tracing::debug!("set success"); - - // get after set - let res = view - .kv_user_client() - .kv_requests( - app, - func, - KvRequests { - app: app.to_owned(), - func: func.to_owned(), - prev_kv_opeid: -1, - requests: vec![KvRequest::new_get(test_key.as_bytes().to_owned())], - }, - ) - .await - .unwrap(); - assert!(res.responses.len() == 1); - match res.responses[0].resp.clone().unwrap() { - proto::kv::kv_response::Resp::CommonResp(kv_response) => { - assert!(kv_response.kvs.len() == 1); - assert!(kv_response.kvs[0].key == test_key.as_bytes().to_owned()); - assert!(kv_response.kvs[0].value == test_value.as_bytes().to_owned()); - } - proto::kv::kv_response::Resp::LockId(_) => panic!(), - } - tracing::debug!("get after set success"); - - // delete after get - let res = view - .kv_user_client() - .kv_requests( - app, - func, - KvRequests { - app: app.to_owned(), - func: func.to_owned(), - prev_kv_opeid: -1, - requests: vec![KvRequest::new_delete(test_key.as_bytes().to_owned())], - }, - ) - .await - .unwrap(); - assert!(res.responses.len() == 1); - match res.responses[0].resp.clone().unwrap() { - proto::kv::kv_response::Resp::CommonResp(kv_response) => { - assert!(kv_response.kvs[0].key == test_key.as_bytes().to_owned()); - assert!(kv_response.kvs[0].value == test_value.as_bytes().to_owned()); - } - proto::kv::kv_response::Resp::LockId(_) => panic!(), - } - tracing::debug!("delete after get success"); - - // delete again will be none - let res = view - .kv_user_client() - .kv_requests( - app, - func, - KvRequests { - app: app.to_owned(), - func: func.to_owned(), - prev_kv_opeid: -1, - requests: vec![KvRequest::new_delete(test_key.as_bytes().to_owned())], - }, - ) - .await - .unwrap(); - assert!(res.responses.len() == 1); - match res.responses[0].resp.clone().unwrap() { - proto::kv::kv_response::Resp::CommonResp(kv_response) => { - assert!(kv_response.kvs.len() == 0); - } - proto::kv::kv_response::Resp::LockId(_) => panic!(), - } - tracing::debug!("delete again is none"); - } - } -} From 53d0c7571bb32ce94b4c8385bc75d79b14e1cb82 Mon Sep 17 00:00:00 2001 From: pa <1020401660@qq.com> Date: Wed, 16 Apr 2025 01:26:06 +0800 Subject: [PATCH 25/26] Revert "feat: distributed lock" This reverts commit ce625bba205fe042632c82837996c43a527f450e. --- .gitignore | 3 +- Cargo.lock | 2022 +++++++++-------- Cargo.toml | 8 +- .../storage/kv/raft_kv/tikvraft_kernel/mod.rs | 2 +- src/main/Cargo.toml | 7 +- src/main/src/config.rs | 4 +- .../src/general/m_appmeta_manager/http.rs | 11 +- src/main/src/general/m_appmeta_manager/mod.rs | 75 +- src/main/src/general/m_data_general.rs | 1084 ++------- src/main/src/general/m_dist_lock.rs | 557 ----- src/main/src/general/m_kv_store_engine.rs | 280 +-- src/main/src/general/m_metric_publisher.rs | 2 - src/main/src/general/mod.rs | 3 - src/main/src/general/network/m_p2p_quic.rs | 2 +- src/main/src/general/network/msg_pack.rs | 151 +- src/main/src/general/network/proto_ext.rs | 79 +- .../src/general/network/proto_src/data.proto | 110 +- .../src/general/network/proto_src/kv.proto | 24 - src/main/src/general/test_utils.rs | 71 - src/main/src/main.rs | 4 +- src/main/src/master/m_data_master.rs | 380 ++-- src/main/src/master/m_master_kv.rs | 565 ++--- src/main/src/modules_global_bridge/mod.rs | 13 +- src/main/src/result.rs | 55 +- src/main/src/sys.rs | 73 +- src/main/src/util.rs | 39 +- .../src/worker/func/m_instance_manager.rs | 10 +- .../src/worker/func/shared/process_rpc.rs | 9 - .../src/worker/func/wasm_host_funcs/kv.rs | 28 +- .../src/worker/func/wasm_host_funcs/mod.rs | 27 +- src/main/src/worker/m_data_follower.rs | 188 +- src/main/src/worker/m_kv_user_client.rs | 347 +-- src/main/src/worker/mod.rs | 2 +- src/s3_server | 1 + test/files/node_config.yaml | 7 - test/kv_store_engine_1/conf | 4 - test/kv_store_engine_1/db | Bin 96 -> 0 bytes ws_derive/src/lib.rs | 14 +- 38 files changed, 2136 insertions(+), 4125 deletions(-) delete mode 100644 src/main/src/general/m_dist_lock.rs delete mode 100644 src/main/src/general/test_utils.rs create mode 160000 src/s3_server delete mode 100644 test/files/node_config.yaml delete mode 100644 test/kv_store_engine_1/conf delete mode 100644 test/kv_store_engine_1/db diff --git a/.gitignore b/.gitignore index 8b3728b..94c1bac 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ target *.zip -Python-3.10.12 -test_temp_dir* \ No newline at end of file +Python-3.10.12 \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index ad35ee5..18d5b72 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,18 +4,18 @@ version = 3 [[package]] name = "addr2line" -version = "0.24.2" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" dependencies = [ "gimli", ] [[package]] -name = "adler2" -version = "2.0.0" +name = "adler" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "aes" @@ -30,26 +30,51 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.1.3" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" dependencies = [ "memchr", ] +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi", +] + [[package]] name = "anyhow" -version = "1.0.93" +version = "1.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" +checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" [[package]] name = "async-channel" -version = "2.3.1" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89b47800b0be77592da0afd425cc03468052844aff33b84e33cc696f64e77b6a" +checksum = "d37875bd9915b7d67c2f117ea2c30a0989874d0b2cb694fe25403c85763c0c9e" dependencies = [ "concurrent-queue", + "event-listener", "event-listener-strategy", "futures-core", "pin-project-lite", @@ -77,13 +102,13 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.83" +version = "0.1.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" +checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.38", ] [[package]] @@ -92,10 +117,10 @@ version = "0.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be22061baf4b10a69b85c37dd7eb542021030bf1b0838eef0987d54b091663a6" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.4.1", "cfg-if", "futures", - "getrandom 0.2.15", + "getrandom 0.2.10", "libc", "path-absolutize", "serde", @@ -122,9 +147,9 @@ dependencies = [ [[package]] name = "autocfg" -version = "1.4.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "axum" @@ -137,9 +162,9 @@ dependencies = [ "bitflags 1.3.2", "bytes", "futures-util", - "http 0.2.12", - "http-body 0.4.6", - "hyper 0.14.31", + "http 0.2.10", + "http-body 0.4.5", + "hyper 0.14.27", "itoa", "matchit", "memchr", @@ -152,7 +177,7 @@ dependencies = [ "serde_json", "serde_path_to_error", "serde_urlencoded", - "sync_wrapper 0.1.2", + "sync_wrapper", "tokio", "tower", "tower-layer", @@ -168,8 +193,8 @@ dependencies = [ "async-trait", "bytes", "futures-util", - "http 0.2.12", - "http-body 0.4.6", + "http 0.2.10", + "http-body 0.4.5", "mime", "rustversion", "tower-layer", @@ -178,17 +203,17 @@ dependencies = [ [[package]] name = "backtrace" -version = "0.3.74" +version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" dependencies = [ "addr2line", + "cc", "cfg-if", "libc", "miniz_oxide", "object", "rustc-demangle", - "windows-targets 0.52.6", ] [[package]] @@ -203,6 +228,16 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "base64-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" +dependencies = [ + "outref", + "vsimd", +] + [[package]] name = "base64ct" version = "1.6.0" @@ -236,7 +271,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.87", + "syn 2.0.38", ] [[package]] @@ -247,9 +282,18 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.6.0" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" + +[[package]] +name = "block-buffer" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4" +dependencies = [ + "generic-array", +] [[package]] name = "block-buffer" @@ -262,9 +306,15 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.16.0" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" + +[[package]] +name = "bytecount" +version = "0.6.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" +checksum = "e1e5f035d16fc623ae5f74981db80a439803888314e3a555fd6f04acd51a3205" [[package]] name = "byteorder" @@ -274,9 +324,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.8.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" +checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" dependencies = [ "serde", ] @@ -308,15 +358,45 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "02d88a780e6aa14b75d7be99f374d8b5c315aaf9c12ada1e2b1cb281468584c9" +[[package]] +name = "camino" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59e92b5a388f549b863a7bea62612c09f24c8393560709a54558a9abdfb3b9c" +dependencies = [ + "serde", +] + +[[package]] +name = "cargo-platform" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e34637b3140142bdf929fb439e8aa4ebad7651ebf7b1080b3930aa16ac1459ff" +dependencies = [ + "serde", +] + +[[package]] +name = "cargo_metadata" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4acbb09d9ee8e23699b9634375c72795d095bf268439da88562cf9b501f181fa" +dependencies = [ + "camino", + "cargo-platform", + "semver", + "serde", + "serde_json", +] + [[package]] name = "cc" -version = "1.2.1" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd9de9f2205d5ef3fd67e685b0df337994ddd4495e2a28d185500d0e1edfea47" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" dependencies = [ "jobserver", "libc", - "shlex", ] [[package]] @@ -334,6 +414,21 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chrono" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-targets 0.52.5", +] + [[package]] name = "cipher" version = "0.4.4" @@ -346,15 +441,30 @@ dependencies = [ [[package]] name = "clang-sys" -version = "1.8.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1" dependencies = [ "glob", "libc", "libloading", ] +[[package]] +name = "clap" +version = "2.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +dependencies = [ + "ansi_term", + "atty", + "bitflags 1.3.2", + "strsim 0.8.0", + "textwrap 0.11.0", + "unicode-width", + "vec_map", +] + [[package]] name = "clap" version = "3.2.25" @@ -367,9 +477,9 @@ dependencies = [ "clap_lex", "indexmap 1.9.3", "once_cell", - "strsim", + "strsim 0.10.0", "termcolor", - "textwrap", + "textwrap 0.16.0", ] [[package]] @@ -396,22 +506,43 @@ dependencies = [ [[package]] name = "cmake" -version = "0.1.51" +version = "0.1.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb1e43aa7fd152b1f968787f7dbcdeb306d1867ff373c69955211876c053f91a" +checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130" dependencies = [ "cc", ] [[package]] name = "concurrent-queue" -version = "2.5.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +checksum = "f057a694a54f12365049b0958a1685bb52d567f5593b355fbf685838e873d400" dependencies = [ "crossbeam-utils", ] +[[package]] +name = "const-str" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21077772762a1002bb421c3af42ac1725fa56066bfc53d9a55bb79905df2aaf3" +dependencies = [ + "const-str-proc-macro", +] + +[[package]] +name = "const-str-proc-macro" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e1e0fdd2e5d3041e530e1b21158aeeef8b5d0e306bc5c1e3d6cf0930d10e25a" +dependencies = [ + "proc-macro2", + "quote", + "regex", + "syn 1.0.109", +] + [[package]] name = "constant_time_eq" version = "0.1.5" @@ -439,71 +570,79 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.7" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" [[package]] name = "cpufeatures" -version = "0.2.15" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ca741a962e1b0bff6d724a1a0958b686406e853bb14061f218562e1896f95e6" +checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" dependencies = [ "libc", ] [[package]] name = "crc32fast" -version = "1.4.2" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" dependencies = [ "cfg-if", ] [[package]] name = "crossbeam-channel" -version = "0.5.13" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" dependencies = [ + "cfg-if", "crossbeam-utils", ] [[package]] name = "crossbeam-deque" -version = "0.8.5" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" dependencies = [ + "cfg-if", "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" -version = "0.9.18" +version = "0.9.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" dependencies = [ + "autocfg", + "cfg-if", "crossbeam-utils", + "memoffset", + "scopeguard", ] [[package]] name = "crossbeam-skiplist" -version = "0.1.3" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df29de440c58ca2cc6e587ec3d22347551a32435fbde9d2bff64e78a9ffa151b" +checksum = "883a5821d7d079fcf34ac55f27a833ee61678110f6b97637cc74513c0d0b42fc" dependencies = [ + "cfg-if", "crossbeam-epoch", "crossbeam-utils", + "scopeguard", ] [[package]] name = "crossbeam-utils" -version = "0.8.20" +version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" [[package]] name = "crypto-common" @@ -515,6 +654,16 @@ dependencies = [ "typenum", ] +[[package]] +name = "crypto-mac" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1d1a86f49236c215f271d40892d5fc950490551400b02ef360692c29815c714" +dependencies = [ + "generic-array", + "subtle", +] + [[package]] name = "darling" version = "0.13.4" @@ -535,7 +684,7 @@ dependencies = [ "ident_case", "proc-macro2", "quote", - "strsim", + "strsim 0.10.0", "syn 1.0.109", ] @@ -550,38 +699,33 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "dashmap" -version = "6.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" -dependencies = [ - "cfg-if", - "crossbeam-utils", - "hashbrown 0.14.5", - "lock_api", - "once_cell", - "parking_lot_core 0.9.10", -] - [[package]] name = "deranged" -version = "0.3.11" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3" dependencies = [ "powerfmt", ] [[package]] name = "derive_more" -version = "0.99.18" +version = "0.99.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f33878137e4dafd7fa914ad4e259e18a4e8e532b9617a2d0150262bf53abfce" +checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 1.0.109", +] + +[[package]] +name = "digest" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" +dependencies = [ + "generic-array", ] [[package]] @@ -590,7 +734,7 @@ version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ - "block-buffer", + "block-buffer 0.10.4", "crypto-common", "subtle", ] @@ -617,21 +761,16 @@ dependencies = [ ] [[package]] -name = "displaydoc" -version = "0.2.5" +name = "dotenv" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.87", -] +checksum = "77c90badedccf4105eca100756a0b1289e191f6fcbdadd3cee1d2f614f97da8f" [[package]] name = "downcast-rs" -version = "1.2.1" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" +checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650" [[package]] name = "dtoa" @@ -641,29 +780,29 @@ checksum = "dcbb2bf8e87535c23f7a8a321e364ce21462d0ff10cb6407820e8e96dfff6653" [[package]] name = "either" -version = "1.13.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" [[package]] name = "encoding_rs" -version = "0.8.35" +version = "0.8.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" dependencies = [ "cfg-if", ] [[package]] name = "enum-as-inner" -version = "0.6.1" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc" +checksum = "5ffccbb6966c05b32ef8fbac435df276c4ae4d3dc55a8cd0eb9745e6c12f546a" dependencies = [ - "heck 0.5.0", + "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.38", ] [[package]] @@ -674,19 +813,28 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.9" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +checksum = "ac3e13f66a2f95e32a39eaa81f6b95d42878ca0e1db0c7543723dfe12557e860" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.48.0", +] + +[[package]] +name = "error-chain" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d2f06b9cac1506ece98fe3231e3cc9c4410ec3d5b1f24ae1c8946f0742cdefc" +dependencies = [ + "version_check", ] [[package]] name = "event-listener" -version = "5.3.1" +version = "3.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6032be9bd27023a771701cc49f9f053c751055f71efb2e0ae5c15809093675ba" +checksum = "d93877bcde0eb80ca09131a08d23f0a5c18a620b01db137dba666d18cd9b30c2" dependencies = [ "concurrent-queue", "parking", @@ -695,9 +843,9 @@ dependencies = [ [[package]] name = "event-listener-strategy" -version = "0.5.2" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f214dc438f977e6d4e3500aaa277f5ad94ca83fbbd9b1a15713ce2344ccc5a1" +checksum = "d96b852f1345da36d551b9473fa1e2b1eb5c5195585c6c018118bc92a8d91160" dependencies = [ "event-listener", "pin-project-lite", @@ -705,9 +853,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.2.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4" +checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" [[package]] name = "fiber-for-wasmedge" @@ -730,9 +878,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] name = "flate2" -version = "1.0.35" +version = "1.0.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" +checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" dependencies = [ "crc32fast", "miniz_oxide", @@ -761,9 +909,9 @@ checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" [[package]] name = "form_urlencoded" -version = "1.2.1" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" dependencies = [ "percent-encoding", ] @@ -780,9 +928,9 @@ dependencies = [ [[package]] name = "futures" -version = "0.3.31" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" dependencies = [ "futures-channel", "futures-core", @@ -795,9 +943,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.31" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" dependencies = [ "futures-core", "futures-sink", @@ -805,15 +953,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.31" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" [[package]] name = "futures-executor" -version = "0.3.31" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" dependencies = [ "futures-core", "futures-task", @@ -822,38 +970,38 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.31" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" [[package]] name = "futures-macro" -version = "0.3.31" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.38", ] [[package]] name = "futures-sink" -version = "0.3.31" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" [[package]] name = "futures-task" -version = "0.3.31" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" [[package]] name = "futures-util" -version = "0.3.31" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" dependencies = [ "futures-channel", "futures-core", @@ -899,9 +1047,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.15" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" dependencies = [ "cfg-if", "libc", @@ -910,9 +1058,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.31.1" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" +checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" [[package]] name = "glob" @@ -922,9 +1070,28 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.4.6" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.10", + "indexmap 2.0.2", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "h2" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" +checksum = "fa82e28a107a8cc405f0839610bdc9b15f1e25ec7d696aa5cf173edbcb1486ab" dependencies = [ "atomic-waker", "bytes", @@ -932,7 +1099,7 @@ dependencies = [ "futures-core", "futures-sink", "http 1.1.0", - "indexmap 2.6.0", + "indexmap 2.0.2", "slab", "tokio", "tokio-util", @@ -947,15 +1114,18 @@ checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" [[package]] name = "hashbrown" -version = "0.14.5" +version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +checksum = "f93e7192158dbcda357bdec5fb5788eebf8bbac027f3f33e719d29135ae84156" [[package]] -name = "hashbrown" -version = "0.15.1" +name = "heck" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" +dependencies = [ + "unicode-segmentation", +] [[package]] name = "heck" @@ -963,12 +1133,6 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" -[[package]] -name = "heck" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" - [[package]] name = "hermit-abi" version = "0.1.19" @@ -980,15 +1144,35 @@ dependencies = [ [[package]] name = "hermit-abi" -version = "0.3.9" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" +checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" [[package]] -name = "hermit-abi" -version = "0.4.0" +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hex-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f7685beb53fc20efc2605f32f5d51e9ba18b8ef237961d1760169d2290d3bee" +dependencies = [ + "outref", + "vsimd", +] + +[[package]] +name = "hmac" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" +checksum = "2a2a2320eb7ec0ebe8da8f744d7812d9fc4cb4d09344ac01898dbcb6a20ae69b" +dependencies = [ + "crypto-mac", + "digest 0.9.0", +] [[package]] name = "hmac" @@ -996,14 +1180,23 @@ version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" dependencies = [ - "digest", + "digest 0.10.7", +] + +[[package]] +name = "home" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb" +dependencies = [ + "windows-sys 0.48.0", ] [[package]] name = "http" -version = "0.2.12" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +checksum = "f95b9abcae896730d42b78e09c155ed4ddf82c07b4de772c64aee5b2d8b7c150" dependencies = [ "bytes", "fnv", @@ -1023,20 +1216,20 @@ dependencies = [ [[package]] name = "http-body" -version = "0.4.6" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" dependencies = [ "bytes", - "http 0.2.12", + "http 0.2.10", "pin-project-lite", ] [[package]] name = "http-body" -version = "1.0.1" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" dependencies = [ "bytes", "http 1.1.0", @@ -1044,14 +1237,14 @@ dependencies = [ [[package]] name = "http-body-util" -version = "0.1.2" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" +checksum = "0475f8b2ac86659c21b64320d5d653f9efe42acd2a4e560073ec61a155a34f1d" dependencies = [ "bytes", - "futures-util", + "futures-core", "http 1.1.0", - "http-body 1.0.1", + "http-body 1.0.0", "pin-project-lite", ] @@ -1063,9 +1256,9 @@ checksum = "add0ab9360ddbd88cfeb3bd9574a1d85cfdfa14db10b3e21d3700dbc4328758f" [[package]] name = "httparse" -version = "1.9.5" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d71d3574edd2771538b901e6549113b4006ece66150fb69c0fb6d9a2adae946" +checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" [[package]] name = "httpdate" @@ -1075,21 +1268,22 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hyper" -version = "0.14.31" +version = "0.14.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c08302e8fa335b151b788c775ff56e7a03ae64ff85c548ee820fecb70356e85" +checksum = "ffb1cfd654a8219eaef89881fdb3bb3b1cdc5fa75ded05d6933b2b382e395468" dependencies = [ "bytes", "futures-channel", "futures-core", "futures-util", - "http 0.2.12", - "http-body 0.4.6", + "h2 0.3.26", + "http 0.2.10", + "http-body 0.4.5", "httparse", "httpdate", "itoa", "pin-project-lite", - "socket2 0.5.7", + "socket2 0.4.10", "tokio", "tower-service", "tracing", @@ -1098,16 +1292,16 @@ dependencies = [ [[package]] name = "hyper" -version = "1.5.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbbff0a806a4728c99295b254c8838933b5b082d75e3cb70c8dab21fdfbcfa9a" +checksum = "fe575dd17d0862a9a33781c8c4696a55c320909004a67a00fb286ba8b1bc496d" dependencies = [ "bytes", "futures-channel", "futures-util", - "h2", + "h2 0.4.5", "http 1.1.0", - "http-body 1.0.1", + "http-body 1.0.0", "httparse", "itoa", "pin-project-lite", @@ -1117,20 +1311,16 @@ dependencies = [ ] [[package]] -name = "hyper-rustls" -version = "0.27.3" +name = "hyper-tls" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" +checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" dependencies = [ - "futures-util", - "http 1.1.0", - "hyper 1.5.0", - "hyper-util", - "rustls 0.23.17", - "rustls-pki-types", + "bytes", + "hyper 0.14.27", + "native-tls", "tokio", - "tokio-rustls", - "tower-service", + "tokio-native-tls", ] [[package]] @@ -1141,7 +1331,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", "http-body-util", - "hyper 1.5.0", + "hyper 1.3.1", "hyper-util", "native-tls", "tokio", @@ -1151,139 +1341,45 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.10" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" +checksum = "7b875924a60b96e5d7b9ae7b066540b1dd1cbd90d1828f54c92e02a283351c56" dependencies = [ "bytes", "futures-channel", "futures-util", "http 1.1.0", - "http-body 1.0.1", - "hyper 1.5.0", + "http-body 1.0.0", + "hyper 1.3.1", "pin-project-lite", - "socket2 0.5.7", + "socket2 0.5.5", "tokio", + "tower", "tower-service", "tracing", ] [[package]] -name = "icu_collections" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" -dependencies = [ - "displaydoc", - "yoke", - "zerofrom", - "zerovec", -] - -[[package]] -name = "icu_locid" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" -dependencies = [ - "displaydoc", - "litemap", - "tinystr", - "writeable", - "zerovec", -] - -[[package]] -name = "icu_locid_transform" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" -dependencies = [ - "displaydoc", - "icu_locid", - "icu_locid_transform_data", - "icu_provider", - "tinystr", - "zerovec", -] - -[[package]] -name = "icu_locid_transform_data" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" - -[[package]] -name = "icu_normalizer" -version = "1.5.0" +name = "iana-time-zone" +version = "0.1.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" dependencies = [ - "displaydoc", - "icu_collections", - "icu_normalizer_data", - "icu_properties", - "icu_provider", - "smallvec", - "utf16_iter", - "utf8_iter", - "write16", - "zerovec", -] - -[[package]] -name = "icu_normalizer_data" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" - -[[package]] -name = "icu_properties" -version = "1.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" -dependencies = [ - "displaydoc", - "icu_collections", - "icu_locid_transform", - "icu_properties_data", - "icu_provider", - "tinystr", - "zerovec", -] - -[[package]] -name = "icu_properties_data" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" - -[[package]] -name = "icu_provider" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" -dependencies = [ - "displaydoc", - "icu_locid", - "icu_provider_macros", - "stable_deref_trait", - "tinystr", - "writeable", - "yoke", - "zerofrom", - "zerovec", + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", ] [[package]] -name = "icu_provider_macros" -version = "1.5.0" +name = "iana-time-zone-haiku" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.87", + "cc", ] [[package]] @@ -1294,23 +1390,12 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "idna" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" -dependencies = [ - "idna_adapter", - "smallvec", - "utf8_iter", -] - -[[package]] -name = "idna_adapter" -version = "1.2.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" dependencies = [ - "icu_normalizer", - "icu_properties", + "unicode-bidi", + "unicode-normalization", ] [[package]] @@ -1325,12 +1410,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.6.0" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +checksum = "8adf3ddd720272c6ea8bf59463c04e0f93d0bbf7c5439b691bca2987e0270897" dependencies = [ "equivalent", - "hashbrown 0.15.1", + "hashbrown 0.14.2", ] [[package]] @@ -1344,9 +1429,9 @@ dependencies = [ [[package]] name = "instant" -version = "0.1.13" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" dependencies = [ "cfg-if", ] @@ -1357,27 +1442,16 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" dependencies = [ - "hermit-abi 0.3.9", + "hermit-abi 0.3.3", "libc", "windows-sys 0.48.0", ] [[package]] name = "ipnet" -version = "2.10.1" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708" - -[[package]] -name = "is-terminal" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b" -dependencies = [ - "hermit-abi 0.4.0", - "libc", - "windows-sys 0.52.0", -] +checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" [[package]] name = "itertools" @@ -1390,24 +1464,24 @@ dependencies = [ [[package]] name = "itertools" -version = "0.12.1" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" dependencies = [ "either", ] [[package]] name = "itoa" -version = "1.0.11" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" [[package]] name = "jobserver" -version = "0.1.32" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" dependencies = [ "libc", ] @@ -1423,9 +1497,9 @@ dependencies = [ [[package]] name = "lazy_static" -version = "1.5.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "lazycell" @@ -1441,28 +1515,18 @@ checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" [[package]] name = "libc" -version = "0.2.164" +version = "0.2.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "433bfe06b8c75da9b2e3fbea6e5329ff87748f0b144ef75306e674c3f6f7c13f" +checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" [[package]] name = "libloading" -version = "0.8.5" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" +checksum = "c571b676ddfc9a8c12f1f3d3085a7b163966a8fd8098a90640953ce5f6170161" dependencies = [ "cfg-if", - "windows-targets 0.52.6", -] - -[[package]] -name = "libredox" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" -dependencies = [ - "bitflags 2.6.0", - "libc", + "windows-sys 0.48.0", ] [[package]] @@ -1481,9 +1545,9 @@ dependencies = [ [[package]] name = "libz-sys" -version = "1.1.20" +version = "1.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2d16453e800a8cf6dd2fc3eb4bc99b786a9b90c663b8559a5b1a041bf89e472" +checksum = "d97137b25e321a73eef1418d1d5d2eda4d77e12813f8e6dead84bc52c5870a7b" dependencies = [ "cc", "libc", @@ -1499,21 +1563,15 @@ checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" [[package]] name = "linux-raw-sys" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" - -[[package]] -name = "litemap" -version = "0.7.3" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" +checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f" [[package]] name = "lock_api" -version = "0.4.12" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16" dependencies = [ "autocfg", "scopeguard", @@ -1521,17 +1579,17 @@ dependencies = [ [[package]] name = "log" -version = "0.4.22" +version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" [[package]] -name = "matchers" -version = "0.1.0" +name = "mach2" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +checksum = "19b955cdeb2a02b9117f121ce63aa52d08ade45de53e48fe6a38b39c10f6f709" dependencies = [ - "regex-automata 0.1.10", + "libc", ] [[package]] @@ -1540,6 +1598,17 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" +[[package]] +name = "md-5" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5a279bb9607f9f53c22d496eade00d138d1bdcccd07d74650387cf94942a15" +dependencies = [ + "block-buffer 0.9.0", + "digest 0.9.0", + "opaque-debug", +] + [[package]] name = "md-5" version = "0.10.6" @@ -1547,14 +1616,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" dependencies = [ "cfg-if", - "digest", + "digest 0.10.7", ] [[package]] name = "memchr" -version = "2.7.4" +version = "2.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" + +[[package]] +name = "memoffset" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +dependencies = [ + "autocfg", +] [[package]] name = "mime" @@ -1570,38 +1648,38 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.8.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" dependencies = [ - "adler2", + "adler", ] [[package]] name = "mio" -version = "1.0.2" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" +checksum = "3dce281c5e46beae905d4de1870d8b1509a9142b62eedf18b443b011ca8343d0" dependencies = [ - "hermit-abi 0.3.9", "libc", "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys 0.52.0", + "windows-sys 0.48.0", ] [[package]] name = "moka" -version = "0.12.8" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32cf62eb4dd975d2dde76432fb1075c49e3ee2331cf36f1f8fd4b66550d32b6f" +checksum = "d8017ec3548ffe7d4cef7ac0e12b044c01164a74c0f3119420faeaf13490ad8b" dependencies = [ "crossbeam-channel", "crossbeam-epoch", "crossbeam-utils", "once_cell", - "parking_lot 0.12.3", + "parking_lot 0.12.1", "quanta", "rustc_version", + "skeptic", "smallvec", "tagptr", "thiserror", @@ -1618,7 +1696,7 @@ dependencies = [ "bytes", "encoding_rs", "futures-util", - "http 0.2.12", + "http 0.2.10", "httparse", "log", "memchr", @@ -1629,9 +1707,9 @@ dependencies = [ [[package]] name = "multimap" -version = "0.10.0" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03" +checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" [[package]] name = "native-tls" @@ -1679,12 +1757,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "num-conv" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" - [[package]] name = "num-derive" version = "0.3.3" @@ -1698,35 +1770,60 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.19" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" dependencies = [ "autocfg", ] +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi 0.3.3", + "libc", +] + +[[package]] +name = "num_threads" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" +dependencies = [ + "libc", +] + [[package]] name = "object" -version = "0.36.5" +version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" +checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" dependencies = [ "memchr", ] [[package]] name = "once_cell" -version = "1.20.2" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + +[[package]] +name = "opaque-debug" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" [[package]] name = "openssl" -version = "0.10.68" +version = "0.10.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6174bc48f102d208783c2c84bf931bb75927a617866870de8a4ea85597f871f5" +checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.4.1", "cfg-if", "foreign-types", "libc", @@ -1743,7 +1840,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.38", ] [[package]] @@ -1754,9 +1851,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.104" +version = "0.9.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45abf306cbf99debc8195b66b7346498d7b10c210de50418b5ccd7ceba08c741" +checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" dependencies = [ "cc", "libc", @@ -1770,6 +1867,12 @@ version = "6.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" +[[package]] +name = "outref" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a" + [[package]] name = "overload" version = "0.1.1" @@ -1778,9 +1881,9 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" [[package]] name = "parking" -version = "2.2.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" +checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae" [[package]] name = "parking_lot" @@ -1795,12 +1898,12 @@ dependencies = [ [[package]] name = "parking_lot" -version = "0.12.3" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" dependencies = [ "lock_api", - "parking_lot_core 0.9.10", + "parking_lot_core 0.9.8", ] [[package]] @@ -1819,15 +1922,15 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.10" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.5.7", + "redox_syscall 0.3.5", "smallvec", - "windows-targets 0.52.6", + "windows-targets 0.48.5", ] [[package]] @@ -1843,9 +1946,9 @@ dependencies = [ [[package]] name = "paste" -version = "1.0.15" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" [[package]] name = "path-absolutize" @@ -1871,10 +1974,10 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83a0692ec44e4cf1ef28ca317f14f8f07da2d95ec3fa01f86e4467b725e60917" dependencies = [ - "digest", - "hmac", + "digest 0.10.7", + "hmac 0.12.1", "password-hash", - "sha2", + "sha2 0.10.8", ] [[package]] @@ -1894,45 +1997,45 @@ dependencies = [ [[package]] name = "percent-encoding" -version = "2.3.1" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" [[package]] name = "petgraph" -version = "0.6.5" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" dependencies = [ "fixedbitset", - "indexmap 2.6.0", + "indexmap 2.0.2", ] [[package]] name = "pin-project" -version = "1.1.7" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be57f64e946e500c8ee36ef6331845d40a93055567ec57e8fae13efd33759b95" +checksum = "fda4ed1c6c173e3fc7a83629421152e01d7b1f9b7f65fb301e490e8cfc656422" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.7" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c0f5fad0874fc7abcd4d750e76917eaebbecaa2c20bde22e1dbeeba8beb758c" +checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.38", ] [[package]] name = "pin-project-lite" -version = "0.2.15" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" +checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" [[package]] name = "pin-utils" @@ -1942,9 +2045,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.31" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" +checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" [[package]] name = "powerfmt" @@ -1954,21 +2057,18 @@ checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" [[package]] name = "ppv-lite86" -version = "0.2.20" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" -dependencies = [ - "zerocopy", -] +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "prettyplease" -version = "0.2.25" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033" +checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" dependencies = [ "proc-macro2", - "syn 2.0.87", + "syn 2.0.38", ] [[package]] @@ -1997,22 +2097,22 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.89" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da" dependencies = [ "unicode-ident", ] [[package]] name = "prometheus-client" -version = "0.22.3" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "504ee9ff529add891127c4827eb481bd69dc0ebc72e9a682e187db4caa60c3ca" +checksum = "6f87c10af16e0af74010d2a123d202e8363c04db5acfa91d8747f64a8524da3a" dependencies = [ "dtoa", "itoa", - "parking_lot 0.12.3", + "parking_lot 0.12.1", "prometheus-client-derive-encode", ] @@ -2024,7 +2124,7 @@ checksum = "440f724eba9f6996b75d63681b0a92b06947f1457076d503a4d2e2c8f56442b8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.38", ] [[package]] @@ -2039,33 +2139,34 @@ dependencies = [ [[package]] name = "prost" -version = "0.12.6" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29" +checksum = "f4fdd22f3b9c31b53c060df4a0613a1c7f062d4115a2b984dd15b1858f7e340d" dependencies = [ "bytes", - "prost-derive 0.12.6", + "prost-derive 0.12.1", ] [[package]] name = "prost-build" -version = "0.12.6" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4" +checksum = "8bdf592881d821b83d471f8af290226c8d51402259e9bb5be7f9f8bdebbb11ac" dependencies = [ "bytes", - "heck 0.5.0", - "itertools 0.12.1", + "heck 0.4.1", + "itertools 0.11.0", "log", "multimap", "once_cell", "petgraph", "prettyplease", - "prost 0.12.6", + "prost 0.12.1", "prost-types", "regex", - "syn 2.0.87", + "syn 2.0.38", "tempfile", + "which", ] [[package]] @@ -2083,24 +2184,35 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.12.6" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1" +checksum = "265baba7fabd416cf5078179f7d2cbeca4ce7a9041111900675ea7c4cb8a4c32" dependencies = [ "anyhow", - "itertools 0.12.1", + "itertools 0.11.0", "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.38", ] [[package]] name = "prost-types" -version = "0.12.6" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e081b29f63d83a4bc75cfc9f3fe424f9156cf92d8a4f0c9407cce9a1b67327cf" +dependencies = [ + "prost 0.12.1", +] + +[[package]] +name = "pulldown-cmark" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9091c90b0a32608e984ff2fa4091273cbdd755d54935c51d520887f4a1dbd5b0" +checksum = "77a1a2f1f0a7ecff9c31abbe177637be0e97a0aef46cf8738ece09327985d998" dependencies = [ - "prost 0.12.6", + "bitflags 1.3.2", + "memchr", + "unicase", ] [[package]] @@ -2115,7 +2227,7 @@ dependencies = [ "quinn", "quinn-proto", "rcgen", - "rustls 0.20.9", + "rustls", "serde", "thiserror", "tokio", @@ -2125,12 +2237,13 @@ dependencies = [ [[package]] name = "quanta" -version = "0.12.3" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5167a477619228a0b284fac2674e3c388cba90631d7b7de620e6f1fcd08da5" +checksum = "a17e662a7a8291a865152364c20c7abc5e60486ab2001e8ec10b24862de0b9ab" dependencies = [ "crossbeam-utils", "libc", + "mach2", "once_cell", "raw-cpuid", "wasi 0.11.0+wasi-snapshot-preview1", @@ -2138,6 +2251,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "quick-xml" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffc053f057dd768a56f62cd7e434c42c831d296968997e9ac1f76ea7c2d14c41" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quinn" version = "0.9.4" @@ -2149,7 +2272,7 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls 0.20.9", + "rustls", "thiserror", "tokio", "tracing", @@ -2166,7 +2289,7 @@ dependencies = [ "rand 0.8.5", "ring 0.16.20", "rustc-hash", - "rustls 0.20.9", + "rustls", "slab", "thiserror", "tinyvec", @@ -2189,9 +2312,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.37" +version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" dependencies = [ "proc-macro2", ] @@ -2255,7 +2378,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.15", + "getrandom 0.2.10", ] [[package]] @@ -2269,18 +2392,18 @@ dependencies = [ [[package]] name = "raw-cpuid" -version = "11.2.0" +version = "10.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ab240315c661615f2ee9f0f2cd32d5a7343a84d5ebcccb99d46e6637565e7b0" +checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" dependencies = [ - "bitflags 2.6.0", + "bitflags 1.3.2", ] [[package]] name = "rayon" -version = "1.10.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" dependencies = [ "either", "rayon-core", @@ -2288,9 +2411,9 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.12.1" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" dependencies = [ "crossbeam-deque", "crossbeam-utils", @@ -2304,7 +2427,7 @@ checksum = "6413f3de1edee53342e6138e75b56d32e7bc6e332b3bd62d497b1929d4cfbcdd" dependencies = [ "pem", "ring 0.16.20", - "time 0.3.36", + "time 0.3.30", "yasna", ] @@ -2319,86 +2442,79 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.7" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" dependencies = [ - "bitflags 2.6.0", + "bitflags 1.3.2", ] [[package]] -name = "redox_users" -version = "0.4.6" +name = "redox_syscall" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" dependencies = [ - "getrandom 0.2.15", - "libredox", - "thiserror", + "bitflags 1.3.2", ] [[package]] -name = "regex" -version = "1.11.1" +name = "redox_users" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" dependencies = [ - "aho-corasick", - "memchr", - "regex-automata 0.4.9", - "regex-syntax 0.8.5", + "getrandom 0.2.10", + "redox_syscall 0.2.16", + "thiserror", ] [[package]] -name = "regex-automata" -version = "0.1.10" +name = "regex" +version = "1.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" dependencies = [ - "regex-syntax 0.6.29", + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", ] [[package]] name = "regex-automata" -version = "0.4.9" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.5", + "regex-syntax", ] [[package]] name = "regex-syntax" -version = "0.6.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" - -[[package]] -name = "regex-syntax" -version = "0.8.5" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "reqwest" -version = "0.12.9" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" +checksum = "566cafdd92868e0939d3fb961bd0dc25fcfaaed179291093b3d43e6b3150ea10" dependencies = [ "base64 0.22.1", "bytes", "encoding_rs", "futures-core", "futures-util", - "h2", + "h2 0.4.5", "http 1.1.0", - "http-body 1.0.1", + "http-body 1.0.0", "http-body-util", - "hyper 1.5.0", - "hyper-rustls", - "hyper-tls", + "hyper 1.3.1", + "hyper-tls 0.6.0", "hyper-util", "ipnet", "js-sys", @@ -2412,7 +2528,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", - "sync_wrapper 1.0.1", + "sync_wrapper", "system-configuration", "tokio", "tokio-native-tls", @@ -2421,7 +2537,7 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "windows-registry", + "winreg", ] [[package]] @@ -2441,24 +2557,105 @@ dependencies = [ [[package]] name = "ring" -version = "0.17.8" +version = "0.17.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" +checksum = "fb0205304757e5d899b9c2e448b867ffd03ae7f988002e47cd24954391394d0b" dependencies = [ "cc", - "cfg-if", - "getrandom 0.2.15", + "getrandom 0.2.10", "libc", "spin 0.9.8", "untrusted 0.9.0", - "windows-sys 0.52.0", + "windows-sys 0.48.0", +] + +[[package]] +name = "rusoto_core" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1db30db44ea73551326269adcf7a2169428a054f14faf9e1768f2163494f2fa2" +dependencies = [ + "async-trait", + "base64 0.13.1", + "bytes", + "crc32fast", + "futures", + "http 0.2.10", + "hyper 0.14.27", + "hyper-tls 0.5.0", + "lazy_static", + "log", + "rusoto_credential", + "rusoto_signature", + "rustc_version", + "serde", + "serde_json", + "tokio", + "xml-rs", +] + +[[package]] +name = "rusoto_credential" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee0a6c13db5aad6047b6a44ef023dbbc21a056b6dab5be3b79ce4283d5c02d05" +dependencies = [ + "async-trait", + "chrono", + "dirs-next", + "futures", + "hyper 0.14.27", + "serde", + "serde_json", + "shlex", + "tokio", + "zeroize", +] + +[[package]] +name = "rusoto_s3" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7aae4677183411f6b0b412d66194ef5403293917d66e70ab118f07cc24c5b14d" +dependencies = [ + "async-trait", + "bytes", + "futures", + "rusoto_core", + "xml-rs", +] + +[[package]] +name = "rusoto_signature" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5ae95491c8b4847931e291b151127eccd6ff8ca13f33603eb3d0035ecb05272" +dependencies = [ + "base64 0.13.1", + "bytes", + "chrono", + "digest 0.9.0", + "futures", + "hex", + "hmac 0.11.0", + "http 0.2.10", + "hyper 0.14.27", + "log", + "md-5 0.9.1", + "percent-encoding", + "pin-project-lite", + "rusoto_credential", + "rustc_version", + "serde", + "sha2 0.9.9", + "tokio", ] [[package]] name = "rustc-demangle" -version = "0.1.24" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" [[package]] name = "rustc-hash" @@ -2468,9 +2665,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustc_version" -version = "0.4.1" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" dependencies = [ "semver", ] @@ -2491,15 +2688,15 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.41" +version = "0.38.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6" +checksum = "2b426b0506e5d50a7d8dafcf2e81471400deb602392c7dd110815afb4eaf02a3" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.4.1", "errno", "libc", - "linux-raw-sys 0.4.14", - "windows-sys 0.52.0", + "linux-raw-sys 0.4.10", + "windows-sys 0.48.0", ] [[package]] @@ -2513,56 +2710,79 @@ dependencies = [ "webpki", ] -[[package]] -name = "rustls" -version = "0.23.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f1a745511c54ba6d4465e8d5dfbd81b45791756de28d4981af70d6dca128f1e" -dependencies = [ - "once_cell", - "rustls-pki-types", - "rustls-webpki", - "subtle", - "zeroize", -] - [[package]] name = "rustls-pemfile" -version = "2.2.0" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" +checksum = "29993a25686778eb88d4189742cd713c9bce943bc54251a33509dc63cbacf73d" dependencies = [ + "base64 0.22.1", "rustls-pki-types", ] [[package]] name = "rustls-pki-types" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" - -[[package]] -name = "rustls-webpki" -version = "0.102.8" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" -dependencies = [ - "ring 0.17.8", - "rustls-pki-types", - "untrusted 0.9.0", -] +checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" [[package]] name = "rustversion" -version = "1.0.18" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" +checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" [[package]] name = "ryu" -version = "1.0.18" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" + +[[package]] +name = "s3_server" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "backtrace", + "base64-simd", + "chrono", + "const-str", + "dotenv", + "futures", + "hex-simd", + "hmac 0.12.1", + "http 0.2.10", + "httparse", + "hyper 0.14.27", + "md-5 0.10.6", + "memchr", + "mime", + "nom", + "once_cell", + "path-absolutize", + "pin-project-lite", + "quick-xml", + "regex", + "rusoto_core", + "rusoto_s3", + "serde", + "serde_json", + "serde_urlencoded", + "sha2 0.10.8", + "smallvec", + "structopt", + "thiserror", + "tokio", + "tracing", + "tracing-error", + "tracing-futures", + "tracing-subscriber", + "transform-stream", + "urlencoding", + "uuid", + "xml-rs", +] [[package]] name = "same-file" @@ -2575,11 +2795,11 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.27" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" +checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -2600,17 +2820,17 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" dependencies = [ - "ring 0.17.8", + "ring 0.17.5", "untrusted 0.9.0", ] [[package]] name = "security-framework" -version = "2.11.1" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +checksum = "770452e37cad93e0a50d5abc3990d2bc351c36d0328f86cefec2f2fb206eaef6" dependencies = [ - "bitflags 2.6.0", + "bitflags 1.3.2", "core-foundation", "core-foundation-sys", "libc", @@ -2619,9 +2839,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.12.1" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa39c7303dc58b5543c94d22c1766b0d31f2ee58306363ea622b10bbc075eaa2" +checksum = "317936bbbd05227752583946b9e66d7ce3b489f84e11a94a510b4437fef407d7" dependencies = [ "core-foundation-sys", "libc", @@ -2629,47 +2849,49 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.23" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" +checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090" +dependencies = [ + "serde", +] [[package]] name = "serde" -version = "1.0.215" +version = "1.0.190" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" +checksum = "91d3c334ca1ee894a2c6f6ad698fe8c435b76d504b13d436f0685d648d6d96f7" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.215" +version = "1.0.190" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" +checksum = "67c5609f394e5c2bd7fc51efda478004ea80ef42fee983d5c67a65e34f32c0e3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.38", ] [[package]] name = "serde_json" -version = "1.0.133" +version = "1.0.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" +checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" dependencies = [ "itoa", - "memchr", "ryu", "serde", ] [[package]] name = "serde_path_to_error" -version = "0.1.16" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af99884400da37c88f5e9146b7f1fd0fbcae8f6eec4e9da38b67d05486f814a6" +checksum = "4beec8bce849d58d06238cb50db2e1c417cfeafa4c63f692b15c82b7c80f8335" dependencies = [ "itoa", "serde", @@ -2689,11 +2911,11 @@ dependencies = [ [[package]] name = "serde_yaml" -version = "0.9.34+deprecated" +version = "0.9.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +checksum = "3cc7a1570e38322cfe4154732e5110f887ea57e22b76f4bfd32b5bdd3368666c" dependencies = [ - "indexmap 2.6.0", + "indexmap 2.0.2", "itoa", "ryu", "serde", @@ -2708,7 +2930,20 @@ checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" dependencies = [ "cfg-if", "cpufeatures", - "digest", + "digest 0.10.7", +] + +[[package]] +name = "sha2" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d58a1e1bf39749807d89cf2d98ac2dfa0ff1cb3faa38fbb64dd88ac8013d800" +dependencies = [ + "block-buffer 0.9.0", + "cfg-if", + "cpufeatures", + "digest 0.9.0", + "opaque-debug", ] [[package]] @@ -2719,7 +2954,7 @@ checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" dependencies = [ "cfg-if", "cpufeatures", - "digest", + "digest 0.10.7", ] [[package]] @@ -2739,13 +2974,28 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "signal-hook-registry" -version = "1.4.2" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" +checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" dependencies = [ "libc", ] +[[package]] +name = "skeptic" +version = "0.13.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16d23b015676c90a0f01c197bfdc786c20342c73a0afdda9025adb0bc42940a8" +dependencies = [ + "bytecount", + "cargo_metadata", + "error-chain", + "glob", + "pulldown-cmark", + "tempfile", + "walkdir", +] + [[package]] name = "slab" version = "0.4.9" @@ -2791,15 +3041,15 @@ dependencies = [ [[package]] name = "slog-term" -version = "2.9.1" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6e022d0b998abfe5c3782c1f03551a596269450ccd677ea51c56f8b214610e8" +checksum = "87d29185c55b7b258b4f120eab00f48557d4d9bc814f41713f449d35b0f8977c" dependencies = [ - "is-terminal", + "atty", "slog", "term", "thread_local", - "time 0.3.36", + "time 0.3.30", ] [[package]] @@ -2829,12 +3079,12 @@ dependencies = [ [[package]] name = "socket2" -version = "0.5.7" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.48.0", ] [[package]] @@ -2862,10 +3112,10 @@ dependencies = [ ] [[package]] -name = "stable_deref_trait" -version = "1.2.0" +name = "strsim" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" [[package]] name = "strsim" @@ -2873,11 +3123,35 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +[[package]] +name = "structopt" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10" +dependencies = [ + "clap 2.34.0", + "lazy_static", + "structopt-derive", +] + +[[package]] +name = "structopt-derive" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" +dependencies = [ + "heck 0.3.3", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "subtle" -version = "2.6.1" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" +checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" [[package]] name = "syn" @@ -2892,9 +3166,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.87" +version = "2.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" +checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b" dependencies = [ "proc-macro2", "quote", @@ -2907,31 +3181,11 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" -[[package]] -name = "sync_wrapper" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" -dependencies = [ - "futures-core", -] - -[[package]] -name = "synstructure" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.87", -] - [[package]] name = "sysinfo" -version = "0.29.11" +version = "0.29.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd727fc423c2060f6c92d9534cef765c65a6ed3f428a03d7def74a8c4348e666" +checksum = "0a18d114d420ada3a891e6bc8e96a2023402203296a47cdd65083377dad18ba5" dependencies = [ "cfg-if", "core-foundation-sys", @@ -2944,20 +3198,20 @@ dependencies = [ [[package]] name = "system-configuration" -version = "0.6.1" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" +checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" dependencies = [ - "bitflags 2.6.0", + "bitflags 1.3.2", "core-foundation", "system-configuration-sys", ] [[package]] name = "system-configuration-sys" -version = "0.6.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" +checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" dependencies = [ "core-foundation-sys", "libc", @@ -2977,15 +3231,15 @@ checksum = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60" [[package]] name = "tempfile" -version = "3.14.0" +version = "3.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" +checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5" dependencies = [ "cfg-if", "fastrand", - "once_cell", - "rustix 0.38.41", - "windows-sys 0.59.0", + "redox_syscall 0.4.1", + "rustix 0.38.21", + "windows-sys 0.48.0", ] [[package]] @@ -3001,44 +3255,53 @@ dependencies = [ [[package]] name = "termcolor" -version = "1.4.1" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +checksum = "6093bad37da69aab9d123a8091e4be0aa4a03e4d601ec641c327398315f62b64" dependencies = [ "winapi-util", ] [[package]] name = "textwrap" -version = "0.16.1" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "textwrap" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" +checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" [[package]] name = "thiserror" -version = "1.0.69" +version = "1.0.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +checksum = "f9a7210f5c9a7156bb50aa36aed4c95afb51df0df00713949448cf9e97d382d2" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.69" +version = "1.0.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.38", ] [[package]] name = "thread_local" -version = "1.1.8" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" dependencies = [ "cfg-if", "once_cell", @@ -3057,13 +3320,14 @@ dependencies = [ [[package]] name = "time" -version = "0.3.36" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" +checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5" dependencies = [ "deranged", "itoa", - "num-conv", + "libc", + "num_threads", "powerfmt", "serde", "time-core", @@ -3078,29 +3342,18 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.18" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" +checksum = "4ad70d68dba9e1f8aceda7aa6711965dfec1cac869f311a51bd08b3a2ccbce20" dependencies = [ - "num-conv", "time-core", ] -[[package]] -name = "tinystr" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" -dependencies = [ - "displaydoc", - "zerovec", -] - [[package]] name = "tinyvec" -version = "1.8.0" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" dependencies = [ "tinyvec_macros", ] @@ -3113,31 +3366,32 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.41.1" +version = "1.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33" +checksum = "4f38200e3ef7995e5ef13baec2f432a6da0aa9ac495b2c0e8f3b7eec2c92d653" dependencies = [ "backtrace", "bytes", "libc", "mio", - "parking_lot 0.12.3", + "num_cpus", + "parking_lot 0.12.1", "pin-project-lite", "signal-hook-registry", - "socket2 0.5.7", + "socket2 0.5.5", "tokio-macros", - "windows-sys 0.52.0", + "windows-sys 0.48.0", ] [[package]] name = "tokio-macros" -version = "2.4.0" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" +checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.38", ] [[package]] @@ -3150,22 +3404,11 @@ dependencies = [ "tokio", ] -[[package]] -name = "tokio-rustls" -version = "0.26.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" -dependencies = [ - "rustls 0.23.17", - "rustls-pki-types", - "tokio", -] - [[package]] name = "tokio-util" -version = "0.7.12" +version = "0.7.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" +checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" dependencies = [ "bytes", "futures-core", @@ -3196,12 +3439,12 @@ version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61c5bb1d698276a2443e5ecfabc1008bf15a36c12e6a7176e7bf089ea9131140" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.4.1", "bytes", "futures-core", "futures-util", - "http 0.2.12", - "http-body 0.4.6", + "http 0.2.10", + "http-body 0.4.5", "http-range-header", "pin-project-lite", "tower-layer", @@ -3210,15 +3453,15 @@ dependencies = [ [[package]] name = "tower-layer" -version = "0.3.3" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" +checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" [[package]] name = "tower-service" -version = "0.3.3" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" +checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" [[package]] name = "tracing" @@ -3240,7 +3483,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.38", ] [[package]] @@ -3253,6 +3496,16 @@ dependencies = [ "valuable", ] +[[package]] +name = "tracing-error" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d686ec1c0f384b1277f097b2f279a2ecc11afe8c133c1aabf036a27cb4cd206e" +dependencies = [ + "tracing", + "tracing-subscriber", +] + [[package]] name = "tracing-futures" version = "0.2.5" @@ -3265,9 +3518,9 @@ dependencies = [ [[package]] name = "tracing-log" -version = "0.2.0" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +checksum = "f751112709b4e791d8ce53e32c4ed2d353565a795ce84da2285393f41557bdf2" dependencies = [ "log", "once_cell", @@ -3276,22 +3529,27 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.18" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77" dependencies = [ - "matchers", "nu-ansi-term", - "once_cell", - "regex", "sharded-slab", "smallvec", "thread_local", - "tracing", "tracing-core", "tracing-log", ] +[[package]] +name = "transform-stream" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05034de7a8fcb11796a36478a2a8b16dca6772644dec5f49f709d5c66a38d359" +dependencies = [ + "futures-core", +] + [[package]] name = "triomphe" version = "0.1.11" @@ -3300,9 +3558,9 @@ checksum = "859eb650cfee7434994602c3a68b25d77ad9e68c8a6cd491616ef86661382eb3" [[package]] name = "try-lock" -version = "0.2.5" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" [[package]] name = "typenum" @@ -3310,29 +3568,53 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" +[[package]] +name = "unicase" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" +dependencies = [ + "version_check", +] + +[[package]] +name = "unicode-bidi" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" + [[package]] name = "unicode-ident" -version = "1.0.13" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-normalization" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" +checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" +dependencies = [ + "tinyvec", +] [[package]] name = "unicode-segmentation" -version = "1.12.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" [[package]] name = "unicode-width" -version = "0.1.14" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" [[package]] name = "unsafe-libyaml" -version = "0.2.11" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" +checksum = "f28467d3e1d3c6586d8f25fa243f544f5800fec42d97032474e17222c2b75cfa" [[package]] name = "untrusted" @@ -3348,9 +3630,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.3" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada" +checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5" dependencies = [ "form_urlencoded", "idna", @@ -3358,37 +3640,31 @@ dependencies = [ ] [[package]] -name = "utf16_iter" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" - -[[package]] -name = "utf8_iter" -version = "1.0.4" +name = "urlencoding" +version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" [[package]] name = "uuid" -version = "1.11.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" +checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" dependencies = [ - "getrandom 0.2.15", + "getrandom 0.2.10", "rand 0.8.5", "uuid-macro-internal", ] [[package]] name = "uuid-macro-internal" -version = "1.11.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b91f57fe13a38d0ce9e28a03463d8d3c2468ed03d75375110ec71d93b449a08" +checksum = "9881bea7cbe687e36c9ab3b778c36cd0487402e270304e8b1296d5085303c1a2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.38", ] [[package]] @@ -3403,11 +3679,23 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "vec_map" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" + [[package]] name = "version_check" -version = "0.9.5" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "vsimd" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" [[package]] name = "walkdir" @@ -3468,15 +3756,15 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.38", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.45" +version = "0.4.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc7ec4f8827a71586374db3e87abdb5a2bb3a15afed140221307c3ec06b1f63b" +checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03" dependencies = [ "cfg-if", "js-sys", @@ -3502,7 +3790,7 @@ checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.38", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3533,15 +3821,14 @@ dependencies = [ "axum", "bincode", "camelpaste", - "clap", + "clap 3.2.25", "crossbeam-skiplist", - "dashmap", "downcast-rs", "enum-as-inner", "futures", - "hyper 0.14.31", + "hyper 0.14.27", "lazy_static", - "md-5", + "md-5 0.10.6", "moka", "parking_lot 0.11.2", "path-absolutize", @@ -3552,6 +3839,7 @@ dependencies = [ "rand 0.8.5", "regex", "reqwest", + "s3_server", "serde", "serde_json", "serde_yaml", @@ -3584,7 +3872,7 @@ checksum = "ad17cbd3b8a8ed1cba44755e616495e13baf3f7e7f9576df7d7a357b928c070a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.38", ] [[package]] @@ -3614,7 +3902,7 @@ dependencies = [ "fiber-for-wasmedge", "lazy_static", "libc", - "parking_lot 0.12.3", + "parking_lot 0.12.1", "paste", "rand 0.8.5", "scoped-tls", @@ -3666,9 +3954,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.72" +version = "0.3.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" +checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" dependencies = [ "js-sys", "wasm-bindgen", @@ -3680,10 +3968,22 @@ version = "0.22.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed63aea5ce73d0ff405984102c42de94fc55a6b75765d621c65262469b3c9b53" dependencies = [ - "ring 0.17.8", + "ring 0.17.5", "untrusted 0.9.0", ] +[[package]] +name = "which" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "either", + "home", + "once_cell", + "rustix 0.38.21", +] + [[package]] name = "winapi" version = "0.3.9" @@ -3702,11 +4002,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.9" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" dependencies = [ - "windows-sys 0.59.0", + "winapi", ] [[package]] @@ -3716,33 +4016,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] -name = "windows-registry" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" -dependencies = [ - "windows-result", - "windows-strings", - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-result" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" -dependencies = [ - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-strings" -version = "0.1.0" +name = "windows-core" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-result", - "windows-targets 0.52.6", + "windows-targets 0.52.5", ] [[package]] @@ -3775,16 +4054,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-sys" -version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" -dependencies = [ - "windows-targets 0.52.6", + "windows-targets 0.52.5", ] [[package]] @@ -3804,18 +4074,18 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.6" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" dependencies = [ - "windows_aarch64_gnullvm 0.52.6", - "windows_aarch64_msvc 0.52.6", - "windows_i686_gnu 0.52.6", + "windows_aarch64_gnullvm 0.52.5", + "windows_aarch64_msvc 0.52.5", + "windows_i686_gnu 0.52.5", "windows_i686_gnullvm", - "windows_i686_msvc 0.52.6", - "windows_x86_64_gnu 0.52.6", - "windows_x86_64_gnullvm 0.52.6", - "windows_x86_64_msvc 0.52.6", + "windows_i686_msvc 0.52.5", + "windows_x86_64_gnu 0.52.5", + "windows_x86_64_gnullvm 0.52.5", + "windows_x86_64_msvc 0.52.5", ] [[package]] @@ -3832,9 +4102,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.6" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" [[package]] name = "windows_aarch64_msvc" @@ -3850,9 +4120,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.52.6" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" [[package]] name = "windows_i686_gnu" @@ -3868,15 +4138,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.52.6" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" [[package]] name = "windows_i686_gnullvm" -version = "0.52.6" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" [[package]] name = "windows_i686_msvc" @@ -3892,9 +4162,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.52.6" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" [[package]] name = "windows_x86_64_gnu" @@ -3910,9 +4180,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.52.6" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" [[package]] name = "windows_x86_64_gnullvm" @@ -3928,9 +4198,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.6" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" [[package]] name = "windows_x86_64_msvc" @@ -3946,21 +4216,19 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" - -[[package]] -name = "write16" -version = "1.0.0" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" [[package]] -name = "writeable" -version = "0.5.5" +name = "winreg" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" +checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] [[package]] name = "ws_derive" @@ -3974,78 +4242,18 @@ dependencies = [ ] [[package]] -name = "yasna" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e17bb3549cc1321ae1296b9cdc2698e2b6cb1992adfa19a8c72e5b7a738f44cd" -dependencies = [ - "time 0.3.36", -] - -[[package]] -name = "yoke" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" -dependencies = [ - "serde", - "stable_deref_trait", - "yoke-derive", - "zerofrom", -] - -[[package]] -name = "yoke-derive" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.87", - "synstructure", -] - -[[package]] -name = "zerocopy" -version = "0.7.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" -dependencies = [ - "byteorder", - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.7.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.87", -] - -[[package]] -name = "zerofrom" -version = "0.1.4" +name = "xml-rs" +version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" -dependencies = [ - "zerofrom-derive", -] +checksum = "af4e2e2f7cba5a093896c1e150fbfe177d1883e7448200efb81d40b9d339ef26" [[package]] -name = "zerofrom-derive" -version = "0.1.4" +name = "yasna" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +checksum = "e17bb3549cc1321ae1296b9cdc2698e2b6cb1992adfa19a8c72e5b7a738f44cd" dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.87", - "synstructure", + "time 0.3.30", ] [[package]] @@ -4054,28 +4262,6 @@ version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" -[[package]] -name = "zerovec" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" -dependencies = [ - "yoke", - "zerofrom", - "zerovec-derive", -] - -[[package]] -name = "zerovec-derive" -version = "0.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.87", -] - [[package]] name = "zip" version = "0.5.13" @@ -4103,7 +4289,7 @@ dependencies = [ "crc32fast", "crossbeam-utils", "flate2", - "hmac", + "hmac 0.12.1", "pbkdf2", "sha1", "zstd", @@ -4141,9 +4327,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.13+zstd.1.5.6" +version = "2.0.11+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" +checksum = "75652c55c0b6f3e6f12eb786fe1bc960396bf05a1eb3bf1f3691c3610ac2e6d4" dependencies = [ "cc", "pkg-config", diff --git a/Cargo.toml b/Cargo.toml index ae25ebd..d311dd8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,8 @@ [workspace] -members = ["src/main"] +members = ["src/main", "src/s3_server"] resolver = "2" +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [workspace.package] version = "0.9.5" @@ -19,9 +20,8 @@ parking_lot = "0.11.1" # raft = { version = "0.7.0", default-features = false, features = [ # "prost-codec", # ] } # tikv raft -async-raft = "0.6.1" #{ path = "async-raft/async-raft" } # +async-raft = "0.6.1" #{ path = "async-raft/async-raft" } # tracing = "0.1.40" -tracing-subscriber = { version = "0.3", features = ["env-filter", "std"] } # openraft = "0.8" serde = "1.0.126" serde_json = "1.0.64" @@ -32,6 +32,7 @@ slog-async = "2.3.0" slog-term = "2.4.0" regex = "1" camelpaste = "0.1.0" +tracing-subscriber = "0.3" ws_derive = { path = "./ws_derive" } clap = { version = "3", features = ["derive"] } downcast-rs = "1.2.0" @@ -58,7 +59,6 @@ walkdir = "2.5.0" hyper = { version = "0.14.18", features = ["server"] } md-5 = "0.10.1" path-absolutize = "3.0.13" -dashmap = "6.1.0" [profile.test] # 0: no optimizations diff --git a/src/_back/storage/kv/raft_kv/tikvraft_kernel/mod.rs b/src/_back/storage/kv/raft_kv/tikvraft_kernel/mod.rs index fcd136c..2c8b640 100644 --- a/src/_back/storage/kv/raft_kv/tikvraft_kernel/mod.rs +++ b/src/_back/storage/kv/raft_kv/tikvraft_kernel/mod.rs @@ -138,7 +138,7 @@ impl RaftThreadState { tracing::info!("proprose join"); self.proprosed_join = true; let mut steps = vec![]; - for p in &self.view.p2p() { + for p in &self.view.p2p().peers { steps.push(ConfChangeSingle { change_type: ConfChangeType::AddNode.into(), node_id: p.1 as u64, diff --git a/src/main/Cargo.toml b/src/main/Cargo.toml index 938e4f2..cba3019 100644 --- a/src/main/Cargo.toml +++ b/src/main/Cargo.toml @@ -4,10 +4,6 @@ version = "0.1.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[features] -default = [] # 默认启用的特性 -unsafe-log = [] [dependencies] qp2p.workspace = true #{ path = "qp2p" } @@ -55,11 +51,10 @@ futures.workspace = true zip-extract.workspace = true zip.workspace = true walkdir.workspace = true -# s3_server = { path = "../s3_server" } +s3_server = { path = "../s3_server" } hyper.workspace = true md-5.workspace = true path-absolutize.workspace = true -dashmap.workspace = true [dependencies.uuid] version = "1.8.0" diff --git a/src/main/src/config.rs b/src/main/src/config.rs index 601b985..d5894a4 100644 --- a/src/main/src/config.rs +++ b/src/main/src/config.rs @@ -116,10 +116,8 @@ pub struct YamlConfig { } fn read_yaml_config(file_path: impl AsRef) -> YamlConfig { - tracing::info!("Running at dir: {:?}", std::env::current_dir()); - let path = file_path.as_ref().to_owned(); let file = std::fs::File::open(file_path).unwrap_or_else(|err| { - panic!("open config file {:?} failed, err: {:?}", path, err); + panic!("open config file failed, err: {:?}", err); }); serde_yaml::from_reader(file).unwrap_or_else(|e| { panic!("parse yaml config file failed, err: {:?}", e); diff --git a/src/main/src/general/m_appmeta_manager/http.rs b/src/main/src/general/m_appmeta_manager/http.rs index 49bfbc0..76e2f01 100644 --- a/src/main/src/general/m_appmeta_manager/http.rs +++ b/src/main/src/general/m_appmeta_manager/http.rs @@ -13,21 +13,12 @@ lazy_static! { static ref VIEW: Option = None; } fn view() -> &'static super::View { - #[cfg(feature = "unsafe-log")] - tracing::debug!("unsafe http view begin"); - let res = unsafe { util::non_null(&*VIEW).as_ref().as_ref().unwrap() }; - #[cfg(feature = "unsafe-log")] - tracing::debug!("unsafe http view end"); - res + unsafe { util::non_null(&*VIEW).as_ref().as_ref().unwrap() } } pub(super) fn binds(router: Router, view: super::View) -> Router { unsafe { - #[cfg(feature = "unsafe-log")] - tracing::debug!("unsafe http view bind"); let _ = util::non_null(&*VIEW).as_mut().replace(view); - #[cfg(feature = "unsafe-log")] - tracing::debug!("unsafe http view bind end"); } tracing::debug!("binds appmeta_manager http"); router diff --git a/src/main/src/general/m_appmeta_manager/mod.rs b/src/main/src/general/m_appmeta_manager/mod.rs index d0ec8e4..5adf4f7 100644 --- a/src/main/src/general/m_appmeta_manager/mod.rs +++ b/src/main/src/general/m_appmeta_manager/mod.rs @@ -5,16 +5,22 @@ mod v_os; use self::v_os::AppMetaVisitOs; use super::{ m_data_general::{DataGeneral, DATA_UID_PREFIX_APP_META}, - m_kv_store_engine::{KeyTypeServiceList, KvAdditionalConf, KvStoreEngine}, + m_kv_store_engine::{KeyTypeServiceList, KvStoreEngine}, m_os::OperatingSystem, - network::{http_handler::HttpHandler, m_p2p::P2PModule}, + network::{ + http_handler::HttpHandler, + m_p2p::P2PModule, + proto::{ + write_one_data_request::{ + data_item::{self, Data}, + DataItem, FileData, + }, + DataMeta, DataModeCache, DataModeDistribute, + }, + }, }; -use crate::{general::network::proto, worker::m_executor::Executor}; use crate::{ - general::{ - kv_interface::KvOps, - network::proto::{data_schedule_context::OpeRole, DataOpeRoleUploadApp}, - }, + general::kv_interface::KvOps, logical_module_view_impl, master::m_master::Master, result::{ErrCvt, WSResult, WsFuncError}, @@ -22,6 +28,7 @@ use crate::{ util::{self, JoinHandleWrapper}, worker::func::m_instance_manager::InstanceManager, }; +use crate::{general::network::proto, worker::m_executor::Executor}; use async_trait::async_trait; use axum::body::Bytes; use enum_as_inner::EnumAsInner; @@ -229,13 +236,9 @@ impl AppMeta { .iter() .any(|(_, fnmeta)| fnmeta.allow_http_call().is_some()); unsafe { - #[cfg(feature = "unsafe-log")] - tracing::debug!("http_handler begin"); let _ = util::non_null(&self.cache_contains_http_fn) .as_mut() .replace(res); - #[cfg(feature = "unsafe-log")] - tracing::debug!("http_handler end"); } res } @@ -521,10 +524,7 @@ lazy_static::lazy_static! { static ref VIEW: Option = None; } fn view() -> &'static View { - tracing::debug!("get view"); - let res = unsafe { util::non_null(&*VIEW).as_ref().as_ref().unwrap() }; - tracing::debug!("get view end"); - res + unsafe { util::non_null(&*VIEW).as_ref().as_ref().unwrap() } } #[derive(LogicalModule)] @@ -543,11 +543,7 @@ impl LogicalModule for AppMetaManager { { let view = View::new(args.logical_modules_ref.clone()); unsafe { - #[cfg(feature = "unsafe-log")] - tracing::debug!("app man view begin"); let _ = util::non_null(&*VIEW).as_mut().replace(view.clone()); - #[cfg(feature = "unsafe-log")] - tracing::debug!("app man view end"); } let fs_layer = AppMetaVisitOs::new(view.clone()); Self { @@ -599,10 +595,10 @@ impl AppMetas { // self.app_metas.get(app) let meta = view() .data_general() - .get_data_item(format!("{}{}", DATA_UID_PREFIX_APP_META, app).as_bytes(), 0) + .get_data_item(format!("{}{}", DATA_UID_PREFIX_APP_META, app), 0) .await; - let Some(proto::DataItem { - data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes(metabytes)), + let Some(DataItem { + data: Some(Data::RawBytes(metabytes)), }) = meta else { return None; @@ -736,7 +732,7 @@ impl AppMetaManager { Ok(self .view .data_general() - .get_data_item(format!("{}{}", DATA_UID_PREFIX_APP_META, app).as_bytes(), 0) + .get_data_item(format!("{}{}", DATA_UID_PREFIX_APP_META, app), 0) .await .is_some()) } @@ -821,19 +817,17 @@ impl AppMetaManager { .write_data( format!("{}{}", DATA_UID_PREFIX_APP_META, appname), vec![ - proto::DataItem { - data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes( + DataItem { + data: Some(data_item::Data::RawBytes( bincode::serialize(&appmeta).unwrap(), )), }, - proto::DataItem { - data_item_dispatch: Some(proto::data_item::DataItemDispatch::File( - proto::FileData { - file_name_opt: format!("apps/{}", appname), - is_dir_opt: true, - file_content: zipfiledata, - }, - )), + DataItem { + data: Some(data_item::Data::File(FileData { + file_name: format!("apps/{}", appname), + is_dir: true, + file_content: zipfiledata, + })), }, ], // vec![ @@ -846,11 +840,12 @@ impl AppMetaManager { // distribute: DataModeDistribute::BroadcastRough as i32, // }, // ], - Some(( - self.view.p2p().nodes_config.this_node(), - proto::DataOpeType::Write, - OpeRole::UploadApp(DataOpeRoleUploadApp {}), - )), + proto::DataScheduleContext { + ope_node: self.view.p2p().nodes_config.this_node(), + ope_type: proto::DataOpeType::Write as i32, + data_sz_bytes: vec![], + ope_role, + }, ) .await; tracing::debug!("app uploaded"); @@ -861,15 +856,13 @@ impl AppMetaManager { self.view.kv_store_engine().set( KeyTypeServiceList, &serde_json::to_string(&list).unwrap().into(), - false, ); } pub fn get_app_meta_list(&self) -> Vec { let res = self .view .kv_store_engine() - .get(&KeyTypeServiceList, false, KvAdditionalConf {}) - .map(|(_version, list)| list) + .get(KeyTypeServiceList) .unwrap_or_else(|| { return vec![]; }); diff --git a/src/main/src/general/m_data_general.rs b/src/main/src/general/m_data_general.rs index 0fa9891..e42cabe 100644 --- a/src/main/src/general/m_data_general.rs +++ b/src/main/src/general/m_data_general.rs @@ -1,37 +1,31 @@ use super::{ - m_kv_store_engine::{ - KeyTypeDataSetItem, KeyTypeDataSetMeta, KvAdditionalConf, KvStoreEngine, KvVersion, - }, + m_kv_store_engine::{KeyTypeDataSetItem, KeyTypeDataSetMeta, KvStoreEngine}, m_os::OperatingSystem, network::{ m_p2p::{P2PModule, RPCCaller, RPCHandler, RPCResponsor}, proto::{ - self, DataMeta, DataMetaGetRequest, DataVersionScheduleRequest, - WriteOneDataRequest, WriteOneDataResponse, + self, + write_one_data_request::{data_item::Data, DataItem}, + DataMeta, DataModeDistribute, DataVersionScheduleRequest, WriteOneDataRequest, + WriteOneDataResponse, }, proto_ext::ProtoExtDataItem, }, }; use crate::{ - general::m_kv_store_engine::KeyType, + general::network::proto::write_one_data_request, logical_module_view_impl, - result::{WSError, WSResult, WsRuntimeErr, WsSerialErr}, + result::WSResult, sys::{LogicalModule, LogicalModuleNewArgs, NodeID}, util::JoinHandleWrapper, }; use crate::{result::WsDataError, sys::LogicalModulesRef}; use async_trait::async_trait; -use camelpaste::paste; -use core::str; - -use prost::Message; use serde::{Deserialize, Serialize}; use std::{ collections::{HashMap, HashSet}, - sync::Arc, time::Duration, }; -use tokio::task::JoinHandle; use ws_derive::LogicalModule; // use super::m_appmeta_manager::AppMeta; @@ -45,47 +39,16 @@ logical_module_view_impl!(DataGeneralView, os, OperatingSystem); pub type DataVersion = u64; pub const DATA_UID_PREFIX_APP_META: &str = "app"; -pub const DATA_UID_PREFIX_FN_KV: &str = "fkv"; - -pub const CACHE_MODE_TIME_MASK: u16 = 0xf000; -pub const CACHE_MODE_TIME_FOREVER_MASK: u16 = 0x0fff; -pub const CACHE_MODE_TIME_AUTO_MASK: u16 = 0x1fff; - -pub const CACHE_MODE_POS_MASK: u16 = 0x0f00; -pub const CACHE_MODE_POS_ALLNODE_MASK: u16 = 0xf0ff; -pub const CACHE_MODE_POS_SPECNODE_MASK: u16 = 0xf1ff; -pub const CACHE_MODE_POS_AUTO_MASK: u16 = 0xf2ff; -pub const CACHE_MODE_MAP_MASK: u16 = 0x00f0; -pub const CACHE_MODE_MAP_COMMON_KV_MASK: u16 = 0xff0f; -pub const CACHE_MODE_MAP_FILE_MASK: u16 = 0xff1f; // const DATA_UID_PREFIX_OBJ: &str = "obj"; -pub fn new_data_unique_id_app(app_name: &str) -> String { - format!("{}{}", DATA_UID_PREFIX_APP_META, app_name) -} - -pub fn new_data_unique_id_fn_kv(key: &[u8]) -> Vec { - let mut temp = DATA_UID_PREFIX_FN_KV.as_bytes().to_owned(); - temp.extend(key); - temp - // let key_str = str::from_utf8(key).unwrap(); - // format!("{}{}", DATA_UID_PREFIX_FN_KV, key_str) -} - #[derive(LogicalModule)] pub struct DataGeneral { view: DataGeneralView, + pub rpc_call_data_version: RPCCaller, - pub rpc_call_data_version_schedule: RPCCaller, rpc_call_write_once_data: RPCCaller, - rpc_call_get_data_meta: RPCCaller, - rpc_call_get_data: RPCCaller, - rpc_handler_write_once_data: RPCHandler, - rpc_handler_data_meta_update: RPCHandler, - rpc_handler_get_data_meta: RPCHandler, - rpc_handler_get_data: RPCHandler, } #[async_trait] @@ -96,321 +59,140 @@ impl LogicalModule for DataGeneral { { Self { view: DataGeneralView::new(args.logical_modules_ref.clone()), - - rpc_call_data_version_schedule: RPCCaller::new(), + rpc_call_data_version: RPCCaller::new(), rpc_call_write_once_data: RPCCaller::new(), - rpc_call_get_data_meta: RPCCaller::new(), - rpc_call_get_data: RPCCaller::new(), - rpc_handler_write_once_data: RPCHandler::new(), - rpc_handler_data_meta_update: RPCHandler::new(), - rpc_handler_get_data_meta: RPCHandler::new(), - rpc_handler_get_data: RPCHandler::new(), } } async fn start(&self) -> WSResult> { tracing::info!("start as master"); - let p2p = self.view.p2p(); - // register rpc callers - { - self.rpc_call_data_version_schedule.regist(p2p); - self.rpc_call_write_once_data.regist(p2p); - self.rpc_call_get_data_meta.regist(p2p); - self.rpc_call_get_data.regist(p2p); - } - - // register rpc handlers - { - let view = self.view.clone(); - self.rpc_handler_write_once_data - .regist(p2p, move |responsor, req| { - let view = view.clone(); - let _ = tokio::spawn(async move { - view.rpc_handle_write_one_data(responsor, req).await; - }); - Ok(()) - }); - let view = self.view.clone(); - self.rpc_handler_data_meta_update.regist( - p2p, - move |responsor: RPCResponsor, - req: proto::DataMetaUpdateRequest| { - let view = view.clone(); - let _ = - tokio::spawn( - async move { view.rpc_handle_data_meta_update(responsor, req) }, - ); - Ok(()) - }, - ); - let view = self.view.clone(); - self.rpc_handler_get_data_meta - .regist(p2p, move |responsor, req| { - let view = view.clone(); - let _ = tokio::spawn(async move { - view.rpc_handle_get_data_meta(req, responsor).await; - }); - Ok(()) + self.rpc_call_data_version.regist(p2p); + self.rpc_call_write_once_data.regist(p2p); + let view = self.view.clone(); + self.rpc_handler_write_once_data + .regist(p2p, move |responsor, req| { + let view = view.clone(); + let _ = tokio::spawn(async move { + view.data_general().write_one_data(responsor, req).await; }); - let view = self.view.clone(); - self.rpc_handler_get_data.regist( - p2p, - move |responsor: RPCResponsor, - req: proto::GetOneDataRequest| { - let view = view.clone(); - let _ = - tokio::spawn(async move { view.rpc_handle_get_one_data(responsor, req) }); - Ok(()) - }, - ); - } - + Ok(()) + }); Ok(vec![]) } } -impl DataGeneralView { - async fn rpc_handle_data_meta_update( - self, - responsor: RPCResponsor, - mut req: proto::DataMetaUpdateRequest, - ) { - let key = KeyTypeDataSetMeta(&req.unique_id); - let keybytes = key.make_key(); - - let write_lock = self.kv_store_engine().with_rwlock(&keybytes); - write_lock.write(); - - if let Some((_old_version, mut old_meta)) = - self.kv_store_engine().get(&key, true, KvAdditionalConf {}) - { - if old_meta.version > req.version { - responsor.send_resp(proto::DataMetaUpdateResponse { - version: old_meta.version, - message: "New data version overwrite".to_owned(), - }); - return; - } - old_meta.version = req.version; - if req.serialized_meta.len() > 0 { - self.kv_store_engine().set_raw( - &keybytes, - std::mem::take(&mut req.serialized_meta), - true, - ); - } else { - self.kv_store_engine().set(key, &old_meta, true); - } - } else { - if req.serialized_meta.len() > 0 { - self.kv_store_engine().set_raw( - &keybytes, - std::mem::take(&mut req.serialized_meta), - true, - ); - } else { - responsor.send_resp(proto::DataMetaUpdateResponse { - version: 0, - message: "Old meta data not found and missing new meta".to_owned(), - }); - return; - } - } - responsor.send_resp(proto::DataMetaUpdateResponse { - version: req.version, - message: "Update success".to_owned(), - }); - } - - async fn rpc_handle_get_one_data( - self, - responsor: RPCResponsor, - req: proto::GetOneDataRequest, - ) -> WSResult<()> { - // req.unique_id - let kv_store_engine = self.kv_store_engine(); - let _ = self.get_data_meta(&req.unique_id, true)?; - // let meta = bincode::deserialize::(&req.serialized_meta).map_err(|err| { - // WsSerialErr::BincodeErr { - // err, - // context: "rpc_handle_get_one_data".to_owned(), - // } - // })?; - let mut deleted = vec![]; - - let mut kv_ope_err = vec![]; - - for idx in req.idxs { - let value = if req.delete { - match kv_store_engine.del( - KeyTypeDataSetItem { - uid: req.unique_id.as_ref(), //req.unique_id.clone(), - idx: idx as u8, - }, - false, - ) { - Ok(value) => value, - Err(e) => { - kv_ope_err.push(e); - None - } - } - } else { - kv_store_engine.get( - &KeyTypeDataSetItem { - uid: req.unique_id.as_ref(), //req.unique_id.clone(), - idx: idx as u8, - }, - false, - KvAdditionalConf {}, - ) - }; - deleted.push(value); - } - - tracing::warn!("temporaly no data response"); - - let (success, message): (bool, String) = if kv_ope_err.len() > 0 { - (false, { - let mut msg = String::from("KvEngine operation failed: "); - for e in kv_ope_err.iter() { - msg.push_str(&format!("{:?}", e)); - } - msg - }) - } else if deleted.iter().all(|v| v.is_some()) { - (true, "success".to_owned()) - } else { - (false, "some data not found".to_owned()) - }; - - responsor - .send_resp(proto::GetOneDataResponse { - success, - data: vec![], - message, - }) - .await?; +// pub enum DataWrapper { +// Bytes(Vec), +// File(PathBuf), +// } - Ok(()) - } - async fn rpc_handle_write_one_data( - self, +impl DataGeneral { + async fn write_one_data( + &self, responsor: RPCResponsor, req: WriteOneDataRequest, ) { + // ## verify data tracing::debug!("verify data meta bf write data"); - let kv_store_engine = self.kv_store_engine(); - // Step 0: pre-check - { - if req.data.is_empty() { - responsor.send_resp(WriteOneDataResponse { - remote_version: 0, - success: false, - message: "Request data is empty".to_owned(), - }); - return; - } - if req.data[0].data_item_dispatch.is_none() { - responsor.send_resp(WriteOneDataResponse { - remote_version: 0, - success: false, - message: "Request data enum is none".to_owned(), - }); - return; - } + let Some(res) = self + .view + .kv_store_engine() + .get(KeyTypeDataSetMeta(req.unique_id.as_bytes())) + else { + responsor.send_resp(WriteOneDataResponse { + remote_version: 0, + success: false, + message: "Data meta not found".to_owned(), + }); + return; + }; + if res.version != req.version { + responsor.send_resp(WriteOneDataResponse { + remote_version: res.version, + success: false, + message: "Data meta version not match".to_owned(), + }); + return; } - - // Step1: verify version - // take old meta - { - let keybytes = KeyTypeDataSetMeta(&req.unique_id).make_key(); - let fail_by_overwrite = || { - let message = "New data version overwrite".to_owned(); - tracing::warn!("{}", message); - responsor.send_resp(WriteOneDataResponse { - remote_version: 0, - success: false, - message, - }); - }; - let fail_with_msg = |message: String| { - tracing::warn!("{}", message); - responsor.send_resp(WriteOneDataResponse { - remote_version: 0, - success: false, - message, - }); - }; - loop { - let res = kv_store_engine.get( - &KeyTypeDataSetMeta(&req.unique_id), - false, - KvAdditionalConf {}, - ); //tofix, master send maybe not synced - let old_dataset_version = if res.is_none() { - 0 - } else { - res.as_ref().unwrap().1.version - }; - // need to wait for new version - if res.is_none() || res.as_ref().unwrap().1.version < req.version { - let (_, new_value) = kv_store_engine.wait_for_new(&keybytes).await; - let Some(new_value) = new_value.as_data_set_meta() else { - fail_with_msg(format!( - "fatal error, kv value supposed to be DataSetMeta, rathe than {:?}", - new_value - )); - return; - }; - - if new_value.version > req.version { - fail_by_overwrite(); + if req.data.is_empty() { + responsor.send_resp(WriteOneDataResponse { + remote_version: res.version, + success: false, + message: "Data is empty".to_owned(), + }); + return; + } + if req.data[0].data.is_none() { + responsor.send_resp(WriteOneDataResponse { + remote_version: res.version, + success: false, + message: "Data enum is none".to_owned(), + }); + return; + } + for (_idx, data) in req.data.iter().enumerate() { + match data.data.as_ref().unwrap() { + write_one_data_request::data_item::Data::File(f) => { + if f.file_name.starts_with("/") { + responsor.send_resp(WriteOneDataResponse { + remote_version: res.version, + success: false, + message: format!( + "File name {} starts with / is forbidden", + f.file_name + ), + }); return; - } else if new_value.version < req.version { - // still need to wait for new version - continue; - } else { - break; } - } else if old_dataset_version > req.version { - fail_by_overwrite(); - return; } + _ => {} } } - - // Step3: write data + // ## write data tracing::debug!("start to write data"); for (idx, data) in req.data.into_iter().enumerate() { - match data.data_item_dispatch.unwrap() { - proto::data_item::DataItemDispatch::File(f) => { - // just store in kv - kv_store_engine.set( - KeyTypeDataSetItem { - uid: req.unique_id.as_ref(), //req.unique_id.clone(), - idx: idx as u8, - }, - &f.encode_to_vec(), - false, - ); + match data.data.unwrap() { + write_one_data_request::data_item::Data::File(f) => { + tracing::debug!("writing data part{} file {}", idx, f.file_name); + let p: std::path::PathBuf = self.view.os().file_path.join(f.file_name); + let view = self.view.clone(); + + let p2 = p.clone(); + let res = if f.is_dir { + tokio::task::spawn_blocking(move || { + view.os().unzip_data_2_path(p2, f.file_content); + }) + } else { + // flush to p + tokio::task::spawn_blocking(move || { + view.os().cover_data_2_path(p2, f.file_content); + }) + }; + let res = res.await; + if let Err(e) = res { + responsor.send_resp(WriteOneDataResponse { + remote_version: req.version, + success: false, + message: format!("Write file error: {:?}, path: {:?}", e, p), + }); + return; + } } - proto::data_item::DataItemDispatch::RawBytes(bytes) => { + write_one_data_request::data_item::Data::RawBytes(bytes) => { tracing::debug!("writing data part{} bytes", idx); - kv_store_engine.set( + self.view.kv_store_engine().set( KeyTypeDataSetItem { - uid: &req.unique_id, + uid: req.unique_id.as_bytes(), idx: idx as u8, }, &bytes, - false, ); } } } - kv_store_engine.flush(); + self.view.kv_store_engine().flush(); tracing::debug!("data is written"); responsor .send_resp(WriteOneDataResponse { @@ -421,215 +203,78 @@ impl DataGeneralView { .await; // ## response } - - async fn rpc_handle_get_data_meta( - self, - req: proto::DataMetaGetRequest, - responsor: RPCResponsor, - ) -> WSResult<()> { - let meta = self.get_data_meta(&req.unique_id, req.delete)?; - - let serialized_meta = meta.map_or(vec![], |(_kvversion, meta)| { - bincode::serialize(&meta).unwrap() - }); - - responsor - .send_resp(proto::DataMetaGetResponse { serialized_meta }) - .await?; - - Ok(()) - } - // pub async fn - - fn get_data_meta( - &self, - unique_id: &[u8], - delete: bool, - ) -> WSResult> { - let kv_store_engine = self.kv_store_engine(); - let key = KeyTypeDataSetMeta(&unique_id); - let keybytes = key.make_key(); - - let write_lock = kv_store_engine.with_rwlock(&keybytes); - let _guard = write_lock.write(); - - let meta_opt = if delete { - kv_store_engine.get(&key, true, KvAdditionalConf {}) - } else { - kv_store_engine.del(key, true)? + pub async fn get_data_item(&self, unique_id: String, idx: u8) -> Option { + let Some(itembytes) = self.view.kv_store_engine().get(KeyTypeDataSetItem { + uid: unique_id.as_bytes(), + idx: idx as u8, + }) else { + return None; }; - Ok(meta_opt) - } -} - -// pub enum DataWrapper { -// Bytes(Vec), -// File(PathBuf), -// } - -impl DataGeneral { - async fn get_datameta_from_master(&self, unique_id: &[u8]) -> WSResult { - let p2p = self.view.p2p(); - let data_general = self.view.data_general(); - // get meta from master - let meta = data_general - .rpc_call_get_data_meta - .call( - p2p, - p2p.nodes_config.get_master_node(), - DataMetaGetRequest { - unique_id: unique_id.to_owned(), - delete: true, - }, - Some(Duration::from_secs(30)), - ) - .await?; - bincode::deserialize::(&meta.serialized_meta).map_err(|e| { - WSError::from(WsSerialErr::BincodeErr { - err: e, - context: "delete data meta at master wrong meta serialized".to_owned(), - }) + Some(DataItem { + data: Some(Data::RawBytes(itembytes)), }) } - async fn get_data_by_meta( - &self, - unique_id: &[u8], - meta: DataSetMetaV2, - delete: bool, - ) -> WSResult<(DataSetMetaV2, HashMap<(NodeID, usize), proto::DataItem>)> { - let view = &self.view; - // Step2: delete data on each node - let mut each_node_data: HashMap = HashMap::new(); - for (idx, data_splits) in meta.datas_splits.iter().enumerate() { - for split in &data_splits.splits { - let _ = each_node_data - .entry(split.node_id) - .and_modify(|old| { - old.idxs.push(idx as u32); - }) - .or_insert(proto::GetOneDataRequest { - unique_id: unique_id.to_owned(), - idxs: vec![idx as u32], - delete, - return_data: true, - }); - } - } + pub async fn set_dataversion(&self, req: DataVersionScheduleRequest) -> WSResult<()> { + // follower just update the version from master + let old = self + .view + .kv_store_engine() + .get(KeyTypeDataSetMeta(req.unique_id.as_bytes())); - let mut tasks = vec![]; - for (node_id, req) in each_node_data { - let view = view.clone(); - let task = tokio::spawn(async move { - let req_idxs = req.idxs.clone(); - let res = view - .data_general() - .rpc_call_get_data - .call(view.p2p(), node_id, req, Some(Duration::from_secs(30))) - .await; - let res: WSResult> = res.map(|response| { - if !response.success { - tracing::warn!("get/delete data failed {}", response.message); - vec![] - } else { - req_idxs.into_iter().zip(response.data).collect() - } - }); - (node_id, res) - }); - tasks.push(task); - } + let Some(mut old) = old else { + return Err(WsDataError::DataSetNotFound { + uniqueid: req.unique_id, + } + .into()); + }; - let mut node_2_datas: HashMap<(NodeID, usize), proto::DataItem> = HashMap::new(); - for tasks in tasks { - let (node_id, data) = tasks.await.map_err(|err| { - WSError::from(WsRuntimeErr::TokioJoin { - err, - context: "delete_data - deleting remote data".to_owned(), - }) - })?; - for (idx, data_item) in data? { - let _ = node_2_datas.insert((node_id, idx as usize), data_item); + // only the latest version has the permission + if old.version > req.version { + return Err(WsDataError::SetExpiredDataVersion { + target_version: req.version, + cur_version: old.version, + data_id: req.unique_id.clone(), } + .into()); } - Ok((meta, node_2_datas)) - } - - pub async fn get_data( - &self, - unique_id: impl Into>, - ) -> WSResult<(DataSetMetaV2, HashMap<(NodeID, usize), proto::DataItem>)> { - let unique_id: Vec = unique_id.into(); - // Step1: get meta - let meta: DataSetMetaV2 = self.get_datameta_from_master(&unique_id).await?; - self.get_data_by_meta(&unique_id, meta, false).await - } - - /// return (meta, data_map) - /// data_map: (node_id, idx) -> data_items - pub async fn delete_data( - &self, - unique_id: impl Into>, - ) -> WSResult<(DataSetMetaV2, HashMap<(NodeID, usize), proto::DataItem>)> { - let unique_id: Vec = unique_id.into(); - - // Step1: get meta - let meta: DataSetMetaV2 = self.get_datameta_from_master(&unique_id).await?; - - self.get_data_by_meta(&unique_id, meta, true).await - // - } - - /// - check the uid from DATA_UID_PREFIX_XXX - pub async fn get_data_item(&self, unique_id: &[u8], idx: u8) -> Option { - let Some((_, itembytes)) = self.view.kv_store_engine().get( - &KeyTypeDataSetItem { - uid: unique_id, - idx: idx as u8, - }, - false, - KvAdditionalConf {}, - ) else { - return None; - }; - Some(proto::DataItem { - data_item_dispatch: Some(proto::data_item::DataItemDispatch::RawBytes(itembytes)), - }) + // update the version + old.version = req.version; + + self.view.kv_store_engine().set( + KeyTypeDataSetMeta(req.unique_id.as_bytes()), + &old, // &DataSetMetaV1 { + // version: req.version, + // data_metas: req.data_metas.into_iter().map(|v| v.into()).collect(), + // synced_nodes: HashSet::new(), + // }, + ); + self.view.kv_store_engine().flush(); + Ok(()) } - /// The user's data write entry - /// - /// - check the design here - /// - /// - check the uid from DATA_UID_PREFIX_XXX - /// - /// - https://fvd360f8oos.feishu.cn/docx/XoFudWhAgox84MxKC3ccP1TcnUh#share-Rtxod8uDqoIcRwxOM1rccuXxnQg + /// check the design here + /// https://fvd360f8oos.feishu.cn/docx/XoFudWhAgox84MxKC3ccP1TcnUh#share-Rtxod8uDqoIcRwxOM1rccuXxnQg pub async fn write_data( &self, - unique_id: impl Into>, + unique_id: String, // data_metas: Vec, - datas: Vec, + datas: Vec, context_openode_opetype_operole: Option<( NodeID, proto::DataOpeType, proto::data_schedule_context::OpeRole, )>, - ) -> WSResult<()> { + ) { let p2p = self.view.p2p(); - let unique_id: Vec = unique_id.into(); - let log_tag = Arc::new(format!( - "write_data,uid:{:?},operole:{:?}", - str::from_utf8(&unique_id), - context_openode_opetype_operole.as_ref().map(|v| &v.2) - )); // Step 1: need the master to do the decision // - require for the latest version for write permission // - require for the distribution and cache mode - let version_schedule_resp = { + let resp = { let resp = self - .rpc_call_data_version_schedule + .rpc_call_data_version .call( self.view.p2p(), p2p.nodes_config.get_master_node(), @@ -640,10 +285,7 @@ impl DataGeneral { |(ope_node, ope_type, ope_role)| proto::DataScheduleContext { ope_node: ope_node as i64, ope_type: ope_type as i32, - each_data_sz_bytes: datas - .iter() - .map(|data_item| data_item.data_sz_bytes() as u32) - .collect::>(), + data_sz_bytes: datas.iter().map(|v| v.data_sz_bytes()).collect(), ope_role: Some(ope_role), }, ), @@ -653,16 +295,9 @@ impl DataGeneral { .await; let resp = match resp { - Err(inner_e) => { - let e = WsDataError::WriteDataRequireVersionErr { - unique_id, - err: Box::new(inner_e), - }; - tracing::warn!("{:?}", e); - return Err(e.into()); - - // tracing::warn!("write_data require version error: {:?}", e); - // return e; + Err(e) => { + tracing::warn!("write_data_broadcast_rough require version error: {:?}", e); + return; } Ok(ok) => ok, }; @@ -671,167 +306,91 @@ impl DataGeneral { // Step2: dispatch the data source and caches { - // resp.split is decision for each data, so the length should be verified - if version_schedule_resp.split.len() != datas.len() { - let e = WsDataError::WriteDataSplitLenNotMatch { - unique_id, - expect: datas.len(), - actual: version_schedule_resp.split.len(), - }; - tracing::warn!("{:?}", e); - return Err(e.into()); - } - let mut write_source_data_tasks = vec![]; // write the data split to kv - for (one_data_splits, one_data_item) in - version_schedule_resp.split.into_iter().zip(datas) - { - // let mut last_node_begin: Option<(NodeID, usize)> = None; - fn flush_the_data( - log_tag: &str, - unique_id: &[u8], - version: u64, - split_size: usize, - view: &DataGeneralView, - one_data_item: &proto::DataItem, - nodeid: NodeID, - offset: usize, - write_source_data_tasks: &mut Vec>>, - ) { - let log_tag = log_tag.to_owned(); - let unique_id = unique_id.to_owned(); - let view = view.clone(); - // let version = version_schedule_resp.version; - // let split_size = one_data_splits.split_size as usize; - let one_data_item_split = - one_data_item.clone_split_range(offset..offset + split_size); - let t = tokio::spawn(async move { - tracing::debug!("write_data flushing {}", log_tag); - view.data_general() - .rpc_call_write_once_data - .call( - view.p2p(), - nodeid, - WriteOneDataRequest { - unique_id, - version, - data: vec![one_data_item_split], - }, - Some(Duration::from_secs(60)), - ) - .await - }); + for one_data_splits in resp.split { + let mut last_node_begin: Option<(NodeID, usize)> = None; + let flush_the_data = |nodeid: NodeID, begin: usize| { + let t = tokio::spawn(async move {}); write_source_data_tasks.push(t); - } - for split in one_data_splits.splits.iter() { - flush_the_data( - &log_tag, - &unique_id, - version_schedule_resp.version, - split.data_size as usize, - &self.view, - &one_data_item, - split.node_id, - split.data_offset as usize, - &mut write_source_data_tasks, - ); - } - } - - // count and hanlde failed - let mut failed = false; - for t in write_source_data_tasks { - let res = t.await; - match res { - Ok(res) => match res { - Ok(_) => {} - Err(e) => { - failed = true; - tracing::warn!("write source data failed: {}", e); + }; + for (idx, node) in one_data_splits.node_ids.iter().enumerate() { + if let Some((node, begin)) = last_node_begin { + if node != *node { + // flush the data + } else { + last_node_begin = Some((*node, idx)); } - }, - - Err(e) => { - failed = true; - tracing::warn!("write_source_data_tasks failed: {}", e); + } else { + last_node_begin = Some((*node, idx)); } } + + // one_data_splits.node_ids. } - if failed { - tracing::warn!("TODO: need to rollback"); - } - // let res = join_all(write_source_data_tasks).await; - // // check if there's error - // if let Some(err)=res.iter().filter(|res|{res.is_err()}).next(){ - // tracing::warn!("failed to write data {}") - // panic!("failed to write data"); - // } } - Ok(()) // if DataModeDistribute::BroadcastRough as i32 == data_metas[0].distribute { // self.write_data_broadcast_rough(unique_id, data_metas, datas) // .await; // } } + async fn write_data_broadcast_rough( + &self, + unique_id: String, + data_metas: Vec, + datas: Vec, + ) { + let p2p = self.view.p2p(); - // async fn write_data_broadcast_rough( - // &self, - // unique_id: String, - // data_metas: Vec, - // datas: Vec, - // ) { - // let p2p = self.view.p2p(); - - // tracing::debug!("start broadcast data with version"); - // let version = resp.version; - // // use the got version to send to global paralell - // let mut tasks = vec![]; - - // for (_idx, node) in p2p.nodes_config.all_nodes_iter().enumerate() { - // let n = *node.0; - // let view = self.view.clone(); - // let datas = datas.clone(); - // let unique_id = unique_id.clone(); - // // let datas = unsafe { util::SendNonNull(util::non_null(&datas)) }; + tracing::debug!("start broadcast data with version"); + let version = resp.version; + // use the got version to send to global paralell + let mut tasks = vec![]; - // let t = tokio::spawn(async move { - // view.data_general() - // .rpc_call_write_once_data - // .call( - // view.p2p(), - // n, - // WriteOneDataRequest { - // unique_id, - // version, - // data: datas, - // }, - // Some(Duration::from_secs(60)), - // ) - // .await - // }); + for (_idx, node) in p2p.nodes_config.all_nodes_iter().enumerate() { + let n = *node.0; + let view = self.view.clone(); + let datas = datas.clone(); + let unique_id = unique_id.clone(); + // let datas = unsafe { util::SendNonNull(util::non_null(&datas)) }; + + let t = tokio::spawn(async move { + view.data_general() + .rpc_call_write_once_data + .call( + view.p2p(), + n, + WriteOneDataRequest { + unique_id, + version, + data: datas, + }, + Some(Duration::from_secs(60)), + ) + .await + }); - // tasks.push(t); - // } - // for t in tasks { - // let res = t.await.unwrap(); - // match res { - // Err(e) => { - // tracing::warn!("write_data_broadcast_rough broadcast error: {:?}", e); - // } - // Ok(ok) => { - // if !ok.success { - // tracing::warn!( - // "write_data_broadcast_rough broadcast error: {:?}", - // ok.message - // ); - // } - // } - // } - // } - // } + tasks.push(t); + } + for t in tasks { + let res = t.await.unwrap(); + match res { + Err(e) => { + tracing::warn!("write_data_broadcast_rough broadcast error: {:?}", e); + } + Ok(ok) => { + if !ok.success { + tracing::warn!( + "write_data_broadcast_rough broadcast error: {:?}", + ok.message + ); + } + } + } + } + } } #[derive(Serialize, Deserialize)] @@ -856,7 +415,6 @@ impl Into for DataMetaSys { } } -/// depracated, latest is v2 /// the data's all in one meta /// https://fvd360f8oos.feishu.cn/docx/XoFudWhAgox84MxKC3ccP1TcnUh#share-Tqqkdxubpokwi5xREincb1sFnLc #[derive(Serialize, Deserialize)] @@ -866,173 +424,18 @@ pub struct DataSetMetaV1 { pub data_metas: Vec, pub synced_nodes: HashSet, } - -/// the data's all in one meta -/// -/// attention: new from `DataSetMetaBuilder` -/// -/// https://fvd360f8oos.feishu.cn/docx/XoFudWhAgox84MxKC3ccP1TcnUh#share-Tqqkdxubpokwi5xREincb1sFnLc -#[derive(Serialize, Deserialize, Debug)] +#[derive(Serialize, Deserialize)] pub struct DataSetMetaV2 { // unique_id: Vec, - api_version: u8, + pub api_version: u8, pub version: u64, pub cache_mode: u16, - pub datas_splits: Vec, -} - -// message EachNodeSplit{ -// uint32 node_id=1; -// uint32 data_offset=2; -// uint32 data_size=3; -// } - -#[derive(Serialize, Deserialize, Debug, Clone)] -pub struct EachNodeSplit { - pub node_id: NodeID, - pub data_offset: u32, - pub data_size: u32, -} - -/// we need to know the split size for one data -#[derive(Serialize, Deserialize, Debug)] -pub struct DataSplit { - pub splits: Vec, -} - -impl DataSplit { - /// node_2_datas will be consumed partially - pub fn recorver_data( - &self, - unique_id: &[u8], - idx: usize, - node_2_datas: &mut HashMap<(NodeID, usize), proto::DataItem>, - ) -> WSResult> { - let nodes = node_2_datas - .iter() - .filter(|v| v.0 .1 == idx) - .map(|v| v.0 .0) - .collect::>(); - - let mut each_node_splits: HashMap)> = - HashMap::new(); - - for node in nodes { - let data = node_2_datas.remove(&(node, idx)).unwrap(); - let _ = each_node_splits.insert(node, (data, None)); - } - - let mut max_size = 0; - let mut missing = vec![]; - - // zip with split info - // by the way, check if the split is missing - for split in &self.splits { - let Some(find) = each_node_splits.get_mut(&split.node_id) else { - missing.push((*split).clone()); - continue; - }; - find.1 = Some(split.clone()); - if split.data_offset + split.data_size > max_size { - max_size = split.data_offset + split.data_size; - } - } - - if missing.len() > 0 { - return Err(WsDataError::SplitRecoverMissing { - unique_id: unique_id.to_owned(), - idx, - missing, - } - .into()); - } - - let mut recover = vec![0; max_size.try_into().unwrap()]; - - for (_node, (data, splitmeta)) in each_node_splits { - let splitmeta = splitmeta.unwrap(); - let begin = splitmeta.data_offset as usize; - let end = begin + splitmeta.data_size as usize; - recover[begin..end].copy_from_slice(data.as_ref()); - } - - Ok(recover) - } -} - -impl Into for EachNodeSplit { - fn into(self) -> proto::EachNodeSplit { - proto::EachNodeSplit { - node_id: self.node_id, - data_offset: self.data_offset, - data_size: self.data_size, - } - } -} - -impl Into for DataSplit { - fn into(self) -> proto::DataSplit { - proto::DataSplit { - splits: self.splits.into_iter().map(|s| s.into()).collect(), - } - } -} -// uint32 split_size = 1; -// repeated uint32 node_ids = 2; - -#[derive(Debug, Clone, Copy)] -pub struct CacheModeVisitor(pub u16); - -macro_rules! generate_cache_mode_methods { - // The macro takes a list of pairs of the form [time, mask] and generates methods. - ($(($group:ident, $mode:ident)),*) => { - paste!{ - impl CacheModeVisitor { - $( - pub fn [](&self) -> bool { - self.0 & [] - == self.0 & [] & [] - } - )* - } - - } - }; -} -generate_cache_mode_methods!( - (time, forever), - (time, auto), - (pos, allnode), - (pos, specnode), - (pos, auto), - (map, common_kv), - (map, file) -); - -#[test] -fn test_cache_mode_visitor() { - let cache_mode_visitor = CacheModeVisitor(CACHE_MODE_TIME_FOREVER_MASK); - assert!(cache_mode_visitor.is_time_forever()); - assert!(!cache_mode_visitor.is_time_auto()); - - let cache_mode_visitor = CacheModeVisitor(CACHE_MODE_POS_ALLNODE_MASK); - assert!(cache_mode_visitor.is_pos_allnode()); - assert!(!cache_mode_visitor.is_pos_specnode()); - assert!(!cache_mode_visitor.is_pos_auto()); - - let cache_mode_visitor = CacheModeVisitor(CACHE_MODE_MAP_FILE_MASK); - assert!(cache_mode_visitor.is_map_file()); - assert!(!cache_mode_visitor.is_map_common_kv()); + pub datas_splits: Vec>, } pub struct DataSetMetaBuilder { building: Option, } -impl From for DataSetMetaBuilder { - fn from(d: DataSetMetaV2) -> Self { - Self { building: Some(d) } - } -} impl DataSetMetaBuilder { pub fn new() -> Self { Self { @@ -1045,37 +448,17 @@ impl DataSetMetaBuilder { } } pub fn cache_mode_time_forever(&mut self) -> &mut Self { - self.building.as_mut().unwrap().cache_mode &= CACHE_MODE_TIME_FOREVER_MASK; - self - } - - pub fn cache_mode_time_auto(&mut self) -> &mut Self { - self.building.as_mut().unwrap().cache_mode &= CACHE_MODE_TIME_AUTO_MASK; + self.building.as_mut().unwrap().cache_mode &= 0x00111111; self } pub fn cache_mode_pos_allnode(&mut self) -> &mut Self { - self.building.as_mut().unwrap().cache_mode &= CACHE_MODE_POS_ALLNODE_MASK; + self.building.as_mut().unwrap().cache_mode &= 0x11001111; self } pub fn cache_mode_pos_specnode(&mut self) -> &mut Self { - self.building.as_mut().unwrap().cache_mode &= CACHE_MODE_POS_SPECNODE_MASK; - self - } - - pub fn cache_mode_pos_auto(&mut self) -> &mut Self { - self.building.as_mut().unwrap().cache_mode &= CACHE_MODE_POS_AUTO_MASK; - self - } - - pub fn cache_mode_map_common_kv(&mut self) -> &mut Self { - self.building.as_mut().unwrap().cache_mode &= CACHE_MODE_MAP_COMMON_KV_MASK; - self - } - - pub fn cache_mode_map_file(&mut self) -> &mut Self { - self.building.as_mut().unwrap().cache_mode &= CACHE_MODE_MAP_FILE_MASK; + self.building.as_mut().unwrap().cache_mode &= 0x11011111; self } @@ -1084,8 +467,7 @@ impl DataSetMetaBuilder { self } - #[must_use] - pub fn set_data_splits(&mut self, splits: Vec) -> &mut Self { + pub fn set_data_splits(&mut self, splits: Vec>) -> &mut Self { self.building.as_mut().unwrap().datas_splits = splits; self } @@ -1099,8 +481,8 @@ impl From for DataSetMetaV2 { fn from( DataSetMetaV1 { version, - data_metas: _, - synced_nodes: _, + data_metas, + synced_nodes, }: DataSetMetaV1, ) -> Self { DataSetMetaBuilder::new() diff --git a/src/main/src/general/m_dist_lock.rs b/src/main/src/general/m_dist_lock.rs deleted file mode 100644 index 5cee707..0000000 --- a/src/main/src/general/m_dist_lock.rs +++ /dev/null @@ -1,557 +0,0 @@ -use std::collections::HashMap; -use std::collections::HashSet; -use std::fmt; -use std::fmt::Debug; -use std::hash::DefaultHasher; -use std::hash::Hash; -use std::hash::Hasher; -use std::sync::atomic::AtomicUsize; -use std::sync::atomic::Ordering; -use std::sync::Arc; -use std::time::Duration; - -use crate::sys::LogicalModule; -use crate::sys::LogicalModulesRef; -use crate::util::DropDebug; -use crate::{ - logical_module_view_impl, result::WSResult, sys::LogicalModuleNewArgs, util::JoinHandleWrapper, -}; -use axum::async_trait; -use parking_lot::Mutex; -use rand::thread_rng; -use rand::Rng; -use tokio::sync::Notify; -use tokio::sync::OwnedRwLockReadGuard; -use tokio::sync::OwnedRwLockWriteGuard; -use tokio::sync::RwLock; -use ws_derive::LogicalModule; - -use super::network::m_p2p::P2PModule; -use super::network::m_p2p::RPCCaller; -use super::network::m_p2p::RPCHandler; -use super::network::m_p2p::RPCResponsor; -use super::network::proto; - -logical_module_view_impl!(View); -logical_module_view_impl!(View, p2p, P2PModule); -logical_module_view_impl!(View, dist_lock, DistLock); - -type LockReleaseId = u32; - -/// https://fvd360f8oos.feishu.cn/wiki/ZUPNwpKLEiRs6Ukzf3ncVa9FnHe -/// 这个是对于某个key的锁的状态记录,包括读写锁的引用计数,以及等待释放的notify -/// 对于写锁,只有第一个人竞争往map里插入能拿到锁,后续的都得等notify,然后竞争往map里插入 -/// 对于读锁,只要自增的初始值>=1即可,初始值为0意味着当前锁已经处于释放过程中(cnt减到0但可能还在map里)得等到map中的值被删掉 -/// 如何等map中的值被删掉:等到map中的值被删掉,再把wait_for_delete中的notify全部notify -/// 有没有在删掉后,全部notify过了,又有用户往notify队列里插入,(使用Mutex