From ed29ee1b4e7d98bc9d2cdb55ea656d3d3da26346 Mon Sep 17 00:00:00 2001 From: Meshiest Date: Fri, 6 Dec 2024 19:24:46 -0500 Subject: [PATCH 01/24] feat: fix up agent to work in kubernetes environment --- crates/agent/src/cli.rs | 33 +++++------- crates/agent/src/client.rs | 11 +++- crates/agent/src/main.rs | 7 ++- crates/cli/src/cli.rs | 2 +- crates/cli/src/events.rs | 2 +- crates/common/src/state/agent_mode.rs | 23 ++++++-- crates/common/src/state/port_config.rs | 8 +-- crates/controlplane/src/cli.rs | 10 ++-- crates/controlplane/src/state/agent_flags.rs | 8 ++- crates/xtask/src/main.rs | 13 +++++ devops/README.md | 31 +++++++++++ devops/agent-entrypoint.sh | 36 +++++++++++++ devops/agent.Dockerfile | 12 +++++ devops/snops.Dockerfile | 36 +++++++++++++ devops/snops.agents.yaml | 41 ++++++++++++++ devops/snops.controlplane.yaml | 56 ++++++++++++++++++++ 16 files changed, 289 insertions(+), 40 deletions(-) create mode 100644 devops/README.md create mode 100644 devops/agent-entrypoint.sh create mode 100644 devops/agent.Dockerfile create mode 100644 devops/snops.Dockerfile create mode 100644 devops/snops.agents.yaml create mode 100644 devops/snops.controlplane.yaml diff --git a/crates/agent/src/cli.rs b/crates/agent/src/cli.rs index 9194a6be..19ce5273 100644 --- a/crates/agent/src/cli.rs +++ b/crates/agent/src/cli.rs @@ -15,45 +15,42 @@ use tracing::{info, warn}; use crate::net; -pub const ENV_ENDPOINT: &str = "SNOPS_ENDPOINT"; -pub const ENV_ENDPOINT_DEFAULT: &str = "127.0.0.1:1234"; - // TODO: allow agents to define preferred internal/external addrs #[derive(Debug, Parser)] pub struct Cli { - #[arg(long, env = ENV_ENDPOINT)] + #[clap(long, env = "SNOPS_ENDPOINT", default_value = "127.0.0.1:1234")] /// Control plane endpoint address (IP, or wss://host, http://host) - pub endpoint: Option, + pub endpoint: String, /// Agent ID, used to identify the agent in the network. - #[arg(long)] + #[clap(long, env = "SNOPS_AGENT_ID")] pub id: AgentId, /// Locally provided private key file, used for envs where private keys are /// locally provided - #[arg(long)] - #[clap(long = "private-key-file")] + #[clap(long = "private-key-file", env = "SNOPS_AGENT_PRIVATE_KEY_FILE")] pub private_key_file: Option, /// Labels to attach to the agent, used for filtering and grouping. - #[arg(long, value_delimiter = ',', num_args = 1..)] + #[clap(long, value_delimiter = ',', num_args = 1..)] pub labels: Option>, /// Path to the directory containing the stored data and configuration - #[arg(long, default_value = "./snops-data")] + #[clap(long, env = "SNOPS_AGENT_DATA_DIR", default_value = "./snops-data")] pub path: PathBuf, /// Enable the agent to fetch its external address. Necessary to determine /// which agents are on shared networks, and for /// external-to-external connections - #[arg(long)] + #[clap(long)] pub external: Option, /// Manually specify internal addresses. - #[arg(long)] + #[clap(long)] pub internal: Option, - #[clap(long = "bind", default_value_t = IpAddr::V4(Ipv4Addr::UNSPECIFIED))] + /// Bind address for the agent to listen on + #[clap(long = "bind", env = "SNOPS_AGENT_HOST", default_value_t = IpAddr::V4(Ipv4Addr::UNSPECIFIED))] pub bind_addr: IpAddr, #[clap(flatten)] @@ -62,7 +59,7 @@ pub struct Cli { #[clap(flatten)] pub modes: AgentModeOptions, - #[clap(short, long, default_value_t = false)] + #[clap(short, long, default_value_t = false, env = "SNOPS_AGENT_QUIET")] /// Run the agent in quiet mode, suppressing most node output pub quiet: bool, @@ -113,11 +110,7 @@ impl Cli { pub fn endpoint_and_uri(&self) -> (String, Uri) { // get the endpoint - let endpoint = self - .endpoint - .as_ref() - .cloned() - .unwrap_or(ENV_ENDPOINT_DEFAULT.to_owned()); + let endpoint = &self.endpoint; let mut query = format!("/agent?mode={}", u8::from(self.modes)); @@ -144,7 +137,7 @@ impl Cli { labels .iter() .filter(|s| !s.is_empty()) - .cloned() + .map(|s| s.trim()) .collect::>() .join(",") )); diff --git a/crates/agent/src/client.rs b/crates/agent/src/client.rs index ea94b9c4..52c5e9c8 100644 --- a/crates/agent/src/client.rs +++ b/crates/agent/src/client.rs @@ -59,12 +59,19 @@ pub async fn ws_connection(ws_req: Request, state: Arc) { return } // Shutdown the agent if the control plane requires an upgrade - tungstenite::Error::Http(e) if e.status() == StatusCode::UPGRADE_REQUIRED => { + tungstenite::Error::Http(res) if res.status() == StatusCode::UPGRADE_REQUIRED => { error!("The control plane requires an agent upgrade. Shutting down..."); state.shutdown().await; return; } - _ => error!("failed to connect to websocket: {e}"), + tungstenite::Error::Http(res) => { + error!( + "failed to connect to websocket: {}\n{}", + res.status(), + String::from_utf8_lossy(res.body().as_ref().unwrap_or(&vec![])) + ); + } + _ => error!("failed to connect to websocket: {e:?}"), } return; } diff --git a/crates/agent/src/main.rs b/crates/agent/src/main.rs index e8d7d8d0..ef1b0705 100644 --- a/crates/agent/src/main.rs +++ b/crates/agent/src/main.rs @@ -44,7 +44,12 @@ async fn main() { let (_guard, reload_handler) = init_logging(); - let args = Cli::parse(); + let mut args = Cli::parse(); + if args.modes.all_when_none() { + info!( + "No node modes specified, defaulting to all modes (client, validator, prover, compute)" + ); + } let (internal_addrs, external_addr) = args.addrs(); diff --git a/crates/cli/src/cli.rs b/crates/cli/src/cli.rs index 639d4807..56ec0b15 100644 --- a/crates/cli/src/cli.rs +++ b/crates/cli/src/cli.rs @@ -5,7 +5,7 @@ use clap::{Parser, ValueHint}; #[clap(name = "snops-cli", author = "MONADIC.US")] pub struct Cli { /// The url the control plane is on. - #[clap(short, long, default_value = "http://localhost:1234", value_hint = ValueHint::Url)] + #[clap(short, long, env = "SNOPS_ENDPOINT", default_value = "http://localhost:1234", value_hint = ValueHint::Url)] url: String, /// The subcommand to run. #[clap(subcommand)] diff --git a/crates/cli/src/events.rs b/crates/cli/src/events.rs index a99bd797..d65d96e4 100644 --- a/crates/cli/src/events.rs +++ b/crates/cli/src/events.rs @@ -53,7 +53,7 @@ impl EventsClient { Err(tungstenite::Error::Io(e)) if e.kind() == std::io::ErrorKind::ConnectionRefused => { bail!("Failed to connect to websocket: Connection refused") } - Err(e) => bail!("Failed to connect to websocket: {}", e), + Err(e) => bail!("Failed to connect to websocket: {e:?}"), }; Ok(Self { diff --git a/crates/common/src/state/agent_mode.rs b/crates/common/src/state/agent_mode.rs index 90de519a..72bfe7d1 100644 --- a/crates/common/src/state/agent_mode.rs +++ b/crates/common/src/state/agent_mode.rs @@ -5,22 +5,37 @@ use std::fmt::Display; )] pub struct AgentModeOptions { /// Enable running a validator node - #[arg(long)] + #[arg(long, env = "SNOPS_AGENT_VALIDATOR")] pub validator: bool, /// Enable running a prover node - #[arg(long)] + #[arg(long, env = "SNOPS_AGENT_PROVER")] pub prover: bool, /// Enable running a client node - #[arg(long)] + #[arg(long, env = "SNOPS_AGENT_CLIENT")] pub client: bool, /// Enable functioning as a compute target when inventoried - #[arg(long)] + #[arg(long, env = "SNOPS_AGENT_COMPUTE")] pub compute: bool, } +impl AgentModeOptions { + /// Enable all modes when none are specified + pub fn all_when_none(&mut self) -> bool { + if self.validator || self.prover || self.client || self.compute { + return false; + } + + self.validator = true; + self.prover = true; + self.client = true; + self.compute = true; + true + } +} + impl From for u8 { fn from(mode: AgentModeOptions) -> u8 { (mode.validator as u8) diff --git a/crates/common/src/state/port_config.rs b/crates/common/src/state/port_config.rs index 675dbd8c..c5b8d179 100644 --- a/crates/common/src/state/port_config.rs +++ b/crates/common/src/state/port_config.rs @@ -3,19 +3,19 @@ use crate::format::{DataFormat, DataFormatReader}; #[derive(Debug, Copy, Clone, serde::Serialize, serde::Deserialize, clap::Parser, Eq, PartialEq)] pub struct PortConfig { /// Specify the IP address and port for the node server - #[clap(long = "node", default_value_t = 4130)] + #[clap(long = "node", env = "SNARKOS_PORT_NODE", default_value_t = 4130)] pub node: u16, /// Specify the IP address and port for the BFT - #[clap(long = "bft", default_value_t = 5000)] + #[clap(long = "bft", env = "SNARKOS_PORT_BFT", default_value_t = 5000)] pub bft: u16, /// Specify the IP address and port for the REST server - #[clap(long = "rest", default_value_t = 3030)] + #[clap(long = "rest", env = "SNARKOS_PORT_REST", default_value_t = 3030)] pub rest: u16, /// Specify the port for the metrics - #[clap(long = "metrics", default_value_t = 9000)] + #[clap(long = "metrics", env = "SNARKOS_PORT_METRICS", default_value_t = 9000)] pub metrics: u16, } diff --git a/crates/controlplane/src/cli.rs b/crates/controlplane/src/cli.rs index 2a026781..2346c2ec 100644 --- a/crates/controlplane/src/cli.rs +++ b/crates/controlplane/src/cli.rs @@ -12,11 +12,11 @@ use url::Url; #[derive(Debug, Parser)] pub struct Cli { - #[clap(long = "bind", default_value_t = IpAddr::V4(Ipv4Addr::UNSPECIFIED))] + #[clap(long = "bind", env="SNOPS_IP", default_value_t = IpAddr::V4(Ipv4Addr::UNSPECIFIED))] pub bind_addr: IpAddr, /// Control plane server port - #[arg(long, default_value_t = 1234)] + #[arg(long, env = "SNOPS_PORT", default_value_t = 1234)] pub port: u16, // TODO: store services in a file config or something? @@ -29,19 +29,19 @@ pub struct Cli { #[arg(long, env = "LOKI_URL")] pub loki: Option, - #[arg(long, default_value_t = PrometheusLocation::Docker)] + #[arg(long, env="PROMETHEUS_LOCATION", default_value_t = PrometheusLocation::Docker)] pub prometheus_location: PrometheusLocation, /// Path to the directory containing the stored data - #[arg(long, default_value = "snops-control-data")] + #[arg(long, env = "SNOPS_DATA_DIR", default_value = "snops-control-data")] pub path: PathBuf, - #[arg(long)] /// Hostname to advertise to the control plane, used when resolving the /// control plane's address for external cannons can be an external IP /// or FQDN, will have the port appended /// /// must contain http:// or https:// + #[arg(long, env = "SNOPS_HOSTNAME")] pub hostname: Option, #[cfg(any(feature = "clipages", feature = "mangen"))] diff --git a/crates/controlplane/src/state/agent_flags.rs b/crates/controlplane/src/state/agent_flags.rs index d3eecf95..190f593f 100644 --- a/crates/controlplane/src/state/agent_flags.rs +++ b/crates/controlplane/src/state/agent_flags.rs @@ -12,9 +12,13 @@ use snops_common::{ pub struct AgentFlags { #[serde(deserialize_with = "deser_mode", serialize_with = "ser_mode")] pub mode: AgentModeOptions, - #[serde(deserialize_with = "deser_labels", serialize_with = "ser_labels")] + #[serde( + default, + deserialize_with = "deser_labels", + serialize_with = "ser_labels" + )] pub labels: IndexSet, - #[serde(deserialize_with = "deser_pk", default, serialize_with = "ser_pk")] + #[serde(default, deserialize_with = "deser_pk", serialize_with = "ser_pk")] pub local_pk: bool, } diff --git a/crates/xtask/src/main.rs b/crates/xtask/src/main.rs index 6b395844..ad584ce3 100644 --- a/crates/xtask/src/main.rs +++ b/crates/xtask/src/main.rs @@ -38,6 +38,8 @@ enum Command { Build(Build), /// For watching the project and auto-rebuilding Dev { target: BuildTarget }, + /// For building the containers + Containers, } #[derive(Parser)] @@ -224,6 +226,17 @@ impl Command { Command::InstallUpx => install_upx(sh), Command::Build(build) => build.run(sh), Command::Dev { target } => dev(sh, target), + Command::Containers => { + cmd!(sh, "docker build -t snops . -f ./devops/snops.Dockerfile") + .run() + .context("Building snops container")?; + cmd!( + sh, + "docker build -t snops-agent . -f ./devops/agent.Dockerfile" + ) + .run() + .context("Building snops-agent container") + } } } } diff --git a/devops/README.md b/devops/README.md new file mode 100644 index 00000000..328c89f3 --- /dev/null +++ b/devops/README.md @@ -0,0 +1,31 @@ +## Local Development + +### Prereqs +1. Install [`kind`](https://kind.sigs.k8s.io/) + +### Setup + +1. `cargo xtask containers` - Build snops containers +2. `kind create cluster` - Create development Kubernetes cluster +3. `kind load docker-image snops snops-agent` +4. `cat devops/snops.*.yaml | kubectl --context kind-kind apply -f -` +5. Deploy environment + ```bash + kubectl --context kind-kind exec -it deployments/snops-controlplane -- scli env apply - Date: Fri, 6 Dec 2024 23:18:01 -0500 Subject: [PATCH 02/24] refactor(schema): move schema documents to common from controlplane --- Cargo.lock | 168 +--- crates/cli/Cargo.toml | 2 +- crates/common/Cargo.toml | 4 + crates/common/src/key_source.rs | 56 ++ crates/common/src/lib.rs | 2 + crates/common/src/schema/cannon/mod.rs | 17 + .../src => common/src/schema}/cannon/sink.rs | 5 +- crates/common/src/schema/cannon/source.rs | 103 +++ crates/common/src/schema/error.rs | 13 + crates/common/src/schema/mod.rs | 73 ++ .../src/schema/nodes.rs | 210 +---- .../src/schema/outcomes.rs.old} | 0 crates/common/src/schema/persist/doc_node.rs | 150 ++++ crates/common/src/schema/persist/mod.rs | 7 + .../src/schema/persist/tx_sink.rs} | 13 +- .../src/schema/persist/tx_source.rs} | 20 +- crates/common/src/schema/serialize.rs | 0 .../src/schema/storage/accounts.rs | 0 .../src/schema/storage/binaries.rs | 15 +- crates/common/src/schema/storage/mod.rs | 125 +++ crates/controlplane/Cargo.toml | 4 +- .../src/{schema => apply}/error.rs | 12 +- .../controlplane/src/apply/loaded_storage.rs | 774 ++++++++++++++++++ crates/controlplane/src/apply/mod.rs | 5 + .../helpers.rs => apply/storage_helpers.rs} | 41 +- crates/controlplane/src/cannon/context.rs | 12 +- crates/controlplane/src/cannon/mod.rs | 14 +- crates/controlplane/src/cannon/router.rs | 2 +- crates/controlplane/src/cannon/source.rs | 134 +-- crates/controlplane/src/env/error.rs | 2 +- crates/controlplane/src/env/mod.rs | 50 +- crates/controlplane/src/error.rs | 10 - crates/controlplane/src/main.rs | 8 +- crates/controlplane/src/persist/env.rs | 20 +- crates/controlplane/src/persist/mod.rs | 4 - crates/controlplane/src/persist/node.rs | 18 +- crates/controlplane/src/persist/storage.rs | 9 +- crates/controlplane/src/schema/cannon.rs | 14 - .../controlplane/src/schema/infrastructure.rs | 9 - crates/controlplane/src/schema/mod.rs | 55 -- .../controlplane/src/schema/storage/loaded.rs | 408 --------- crates/controlplane/src/schema/storage/mod.rs | 486 ----------- crates/controlplane/src/server/api.rs | 11 +- crates/controlplane/src/server/content.rs | 6 +- crates/controlplane/src/server/error.rs | 5 +- .../controlplane/src/state/external_peers.rs | 14 +- crates/controlplane/src/state/global.rs | 3 +- crates/controlplane/src/state/mod.rs | 2 +- devops/README.md | 52 +- devops/snops.agents.yaml | 1 - snops_book/user_guide/envs/CANNONS.md | 4 +- snops_book/user_guide/envs/README.md | 2 +- snops_book/user_guide/envs/STORAGE.md | 8 +- snops_book/user_guide/envs/TOPOLOGY.md | 8 +- specs/canary-4-validators.yaml | 4 +- specs/canary-bonding.yaml | 4 +- specs/canary-clients.yaml | 4 +- specs/example-cannon-default.yaml | 6 +- specs/example-cannon-record.yaml | 4 +- specs/example-multi-binaries.yaml | 4 +- specs/genesis-params-0.yaml | 2 +- specs/genesis-params-1.yaml | 2 +- specs/persist-4-validators.yaml | 4 +- specs/test-4-clients-canary.yaml | 4 +- specs/test-4-validators.yaml | 4 +- specs/testnet-4-validators.yaml | 6 +- specs/testnet-clients.yaml | 4 +- 67 files changed, 1595 insertions(+), 1647 deletions(-) create mode 100644 crates/common/src/schema/cannon/mod.rs rename crates/{controlplane/src => common/src/schema}/cannon/sink.rs (92%) create mode 100644 crates/common/src/schema/cannon/source.rs create mode 100644 crates/common/src/schema/error.rs create mode 100644 crates/common/src/schema/mod.rs rename crates/{controlplane => common}/src/schema/nodes.rs (50%) rename crates/{controlplane/src/schema/outcomes.rs => common/src/schema/outcomes.rs.old} (100%) create mode 100644 crates/common/src/schema/persist/doc_node.rs create mode 100644 crates/common/src/schema/persist/mod.rs rename crates/{controlplane/src/persist/sink.rs => common/src/schema/persist/tx_sink.rs} (93%) rename crates/{controlplane/src/persist/source.rs => common/src/schema/persist/tx_source.rs} (92%) create mode 100644 crates/common/src/schema/serialize.rs rename crates/{controlplane => common}/src/schema/storage/accounts.rs (100%) rename crates/{controlplane => common}/src/schema/storage/binaries.rs (96%) create mode 100644 crates/common/src/schema/storage/mod.rs rename crates/controlplane/src/{schema => apply}/error.rs (95%) create mode 100644 crates/controlplane/src/apply/loaded_storage.rs create mode 100644 crates/controlplane/src/apply/mod.rs rename crates/controlplane/src/{schema/storage/helpers.rs => apply/storage_helpers.rs} (95%) delete mode 100644 crates/controlplane/src/schema/cannon.rs delete mode 100644 crates/controlplane/src/schema/infrastructure.rs delete mode 100644 crates/controlplane/src/schema/mod.rs delete mode 100644 crates/controlplane/src/schema/storage/loaded.rs delete mode 100644 crates/controlplane/src/schema/storage/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 4b688f90..053c5dd4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -524,12 +524,6 @@ dependencies = [ "pkg-config", ] -[[package]] -name = "cactus" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acbc26382d871df4b7442e3df10a9402bf3cf5e55cbd66f12be38861425f0564" - [[package]] name = "cc" version = "1.1.30" @@ -562,20 +556,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" -[[package]] -name = "cfgrammar" -version = "0.13.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6026d8cd82ada8bbcfe337805dd1eb6afdc9e80fa4d57e977b3a36315e0c5525" -dependencies = [ - "indexmap 2.6.0", - "lazy_static", - "num-traits", - "regex", - "serde", - "vob", -] - [[package]] name = "chrono" version = "0.4.38" @@ -1021,18 +1001,6 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" -[[package]] -name = "filetime" -version = "0.2.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35c0522e981e68cbfa8c3f978441a5f34b30b96e146b33cd3359176b50fe8586" -dependencies = [ - "cfg-if", - "libc", - "libredox", - "windows-sys 0.59.0", -] - [[package]] name = "fixedbitset" version = "0.5.7" @@ -1226,15 +1194,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "getopts" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" -dependencies = [ - "unicode-width", -] - [[package]] name = "getrandom" version = "0.2.15" @@ -1799,7 +1758,6 @@ checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ "bitflags 2.6.0", "libc", - "redox_syscall 0.5.7", ] [[package]] @@ -1889,60 +1847,6 @@ dependencies = [ "prost-types", ] -[[package]] -name = "lrlex" -version = "0.13.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05863fdac293d1bc74f0cd91512933a5ab67e0cb607dc78ac4984be089456b49" -dependencies = [ - "cfgrammar", - "getopts", - "lazy_static", - "lrpar", - "num-traits", - "quote 1.0.37", - "regex", - "regex-syntax 0.8.5", - "serde", - "vergen", -] - -[[package]] -name = "lrpar" -version = "0.13.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b1ecae55cf667db308d3555e22b20bcc28eaeca0c95a09b37171673be157c71" -dependencies = [ - "bincode", - "cactus", - "cfgrammar", - "filetime", - "indexmap 2.6.0", - "lazy_static", - "lrtable", - "num-traits", - "packedvec", - "regex", - "serde", - "static_assertions", - "vergen", - "vob", -] - -[[package]] -name = "lrtable" -version = "0.13.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d42d2752cb50a171efadda0cb6fa97432e8bf05accfff3eed320b87e80a2f69e" -dependencies = [ - "cfgrammar", - "fnv", - "num-traits", - "serde", - "sparsevec", - "vob", -] - [[package]] name = "lru" version = "0.12.5" @@ -2233,15 +2137,6 @@ dependencies = [ "libc", ] -[[package]] -name = "num_threads" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" -dependencies = [ - "libc", -] - [[package]] name = "number_prefix" version = "0.4.0" @@ -2359,16 +2254,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" -[[package]] -name = "packedvec" -version = "1.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bde3c690ec20e4a2b4fb46f0289a451181eb50011a1e2acc8d85e2fde9062a45" -dependencies = [ - "num-traits", - "serde", -] - [[package]] name = "parking_lot" version = "0.11.2" @@ -2537,19 +2422,6 @@ dependencies = [ "url", ] -[[package]] -name = "promql-parser" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "007a331efb31f6ddb644590ef22359c9469784931162aad92599e34bcfa66583" -dependencies = [ - "cfgrammar", - "lazy_static", - "lrlex", - "lrpar", - "regex", -] - [[package]] name = "prost" version = "0.12.6" @@ -4549,7 +4421,6 @@ dependencies = [ "lazy_static", "lazysort", "prometheus-http-query", - "promql-parser", "rand", "rand_chacha", "rayon", @@ -4655,16 +4526,19 @@ dependencies = [ "clap", "clap-markdown", "clap_mangen", + "fixedbitset", "futures", "http 1.1.0", "indexmap 2.6.0", "lasso", "lazy_static", + "lazysort", "paste", "rand", "regex", "serde", "serde_json", + "serde_yml", "sha2", "sled", "snops-checkpoint", @@ -4687,18 +4561,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "sparsevec" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35df5d2e580b29f3f7ec5b4ed49b0ab3acf7f3624122b3e823cafb9630f293b8" -dependencies = [ - "num-traits", - "packedvec", - "serde", - "vob", -] - [[package]] name = "spin" version = "0.9.8" @@ -4947,9 +4809,7 @@ checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" dependencies = [ "deranged", "itoa", - "libc", "num-conv", - "num_threads", "powerfmt", "serde", "time-core", @@ -5485,34 +5345,12 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" -[[package]] -name = "vergen" -version = "8.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2990d9ea5967266ea0ccf413a4aa5c42a93dbcfda9cb49a97de6931726b12566" -dependencies = [ - "anyhow", - "rustversion", - "time", -] - [[package]] name = "version_check" version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" -[[package]] -name = "vob" -version = "3.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c058f4c41e71a043c67744cb76dcc1ae63ece328c1732a72489ccccc2dec23e6" -dependencies = [ - "num-traits", - "rustc_version", - "serde", -] - [[package]] name = "walkdir" version = "2.5.0" diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index ce3bf359..3653af9f 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -22,7 +22,7 @@ reqwest = { workspace = true, features = ["json"] } rustls.workspace = true serde.workspace = true serde_json.workspace = true -snops-common = { workspace = true, features = ["aot_cmds"] } +snops-common = { workspace = true, features = ["aot_cmds", "schema"] } tokio = { workspace = true, features = ["macros", "signal", "rt-multi-thread"] } tokio-tungstenite.workspace = true urlencoding = "2.1.3" diff --git a/crates/common/Cargo.toml b/crates/common/Cargo.toml index 3d5268da..9ed50ac8 100644 --- a/crates/common/Cargo.toml +++ b/crates/common/Cargo.toml @@ -8,6 +8,7 @@ description = "Common types and utilities for snops" [features] default = [] aot_cmds = [] +schema = ["fixedbitset", "serde_yaml", "lazysort"] clipages = ["anyhow", "clap-markdown"] mangen = ["anyhow", "clap_mangen"] @@ -18,16 +19,19 @@ chrono = { workspace = true, features = ["serde"] } clap.workspace = true clap_mangen = { workspace = true, optional = true } clap-markdown = { workspace = true, optional = true } +fixedbitset = { workspace = true, optional = true } futures.workspace = true http.workspace = true indexmap = { workspace = true, features = ["std", "serde"] } lasso.workspace = true lazy_static.workspace = true +lazysort = { workspace = true, optional = true } paste.workspace = true rand.workspace = true regex.workspace = true serde.workspace = true serde_json.workspace = true +serde_yaml = { workspace = true, optional = true } sha2.workspace = true sled.workspace = true snops-checkpoint = { workspace = true, features = ["serde"] } diff --git a/crates/common/src/key_source.rs b/crates/common/src/key_source.rs index 9b9d6a7d..8f1bcc5c 100644 --- a/crates/common/src/key_source.rs +++ b/crates/common/src/key_source.rs @@ -224,3 +224,59 @@ impl KeySource { } } } + +#[cfg(test)] +mod tests { + use crate::key_source::KeySource; + use crate::key_source::ACCOUNTS_KEY_ID; + + #[test] + fn test_key_source_deserialization() { + assert_eq!( + serde_yaml::from_str::("committee.0").unwrap(), + KeySource::Committee(Some(0)) + ); + assert_eq!( + serde_yaml::from_str::("committee.100").unwrap(), + KeySource::Committee(Some(100)) + ); + assert_eq!( + serde_yaml::from_str::("committee.$").unwrap(), + KeySource::Committee(None) + ); + + assert_eq!( + serde_yaml::from_str::("accounts.0").unwrap(), + KeySource::Named(*ACCOUNTS_KEY_ID, Some(0)) + ); + assert_eq!( + serde_yaml::from_str::("accounts.$").unwrap(), + KeySource::Named(*ACCOUNTS_KEY_ID, None) + ); + + assert_eq!( + serde_yaml::from_str::( + "APrivateKey1zkp8CZNn3yeCseEtxuVPbDCwSyhGW6yZKUYKfgXmcpoGPWH" + ) + .unwrap(), + KeySource::PrivateKeyLiteral( + "APrivateKey1zkp8CZNn3yeCseEtxuVPbDCwSyhGW6yZKUYKfgXmcpoGPWH".to_string() + ) + ); + + assert_eq!( + serde_yaml::from_str::( + "aleo1ekc03f2vwemtpksckhrcl7mv4t7sm6ykldwldvvlysqt2my9zygqfhndya" + ) + .unwrap(), + KeySource::PublicKeyLiteral( + "aleo1ekc03f2vwemtpksckhrcl7mv4t7sm6ykldwldvvlysqt2my9zygqfhndya".to_string() + ) + ); + + assert!(serde_yaml::from_str::("committee.-100").is_err(),); + assert!(serde_yaml::from_str::("accounts.-100").is_err(),); + assert!(serde_yaml::from_str::("accounts._").is_err(),); + assert!(serde_yaml::from_str::("accounts.*").is_err(),); + } +} diff --git a/crates/common/src/lib.rs b/crates/common/src/lib.rs index 948881e8..d889eff1 100644 --- a/crates/common/src/lib.rs +++ b/crates/common/src/lib.rs @@ -13,6 +13,8 @@ pub mod events; pub mod format; pub mod key_source; pub mod node_targets; +#[cfg(feature = "schema")] +pub mod schema; pub mod util; #[cfg(feature = "clipages")] diff --git a/crates/common/src/schema/cannon/mod.rs b/crates/common/src/schema/cannon/mod.rs new file mode 100644 index 00000000..8b558fb3 --- /dev/null +++ b/crates/common/src/schema/cannon/mod.rs @@ -0,0 +1,17 @@ +use serde::{Deserialize, Serialize}; +use sink::TxSink; +use source::TxSource; + +pub mod sink; +pub mod source; +use crate::state::CannonId; + +/// A document describing the node infrastructure for a test. +#[derive(Deserialize, Serialize, Debug, Clone)] +pub struct CannonDocument { + pub name: CannonId, + pub description: Option, + + pub source: TxSource, + pub sink: TxSink, +} diff --git a/crates/controlplane/src/cannon/sink.rs b/crates/common/src/schema/cannon/sink.rs similarity index 92% rename from crates/controlplane/src/cannon/sink.rs rename to crates/common/src/schema/cannon/sink.rs index 4ef86f7d..f58a3093 100644 --- a/crates/controlplane/src/cannon/sink.rs +++ b/crates/common/src/schema/cannon/sink.rs @@ -1,5 +1,6 @@ use serde::{Deserialize, Serialize}; -use snops_common::state::TxPipeId; + +use crate::{node_targets::NodeTargets, state::TxPipeId}; #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] @@ -12,7 +13,7 @@ pub struct TxSink { /// /// Requires cannon to have an associated env_id #[serde(default)] - pub target: Option, + pub target: Option, /// Number of attempts to broadcast a transaction to the target /// should the transaction not make it into the next block. This /// is helpful for mitigating ghost transactions. diff --git a/crates/common/src/schema/cannon/source.rs b/crates/common/src/schema/cannon/source.rs new file mode 100644 index 00000000..0b7698d7 --- /dev/null +++ b/crates/common/src/schema/cannon/source.rs @@ -0,0 +1,103 @@ +use lasso::Spur; +use serde::{Deserialize, Serialize}; + +use crate::{node_targets::NodeTargets, INTERN}; + +#[derive(Clone, Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub struct TxSource { + /// Receive authorizations from a persistent path + /// /api/v1/env/:env_id/cannons/:id/auth + #[serde(default)] + pub query: QueryTarget, + #[serde(default)] + pub compute: ComputeTarget, +} + +impl Default for ComputeTarget { + fn default() -> Self { + ComputeTarget::Agent { labels: None } + } +} + +/// Used to determine the redirection for the following paths: +/// /cannon///latest/stateRoot +/// /cannon///transaction/broadcast +#[derive(Clone, Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case", untagged)] +pub enum QueryTarget { + /// Target a specific node (probably over rpc instead of reqwest lol...) + /// + /// Requires cannon to have an associated env_id + Node(NodeTargets), + /// Use the local ledger query service + Local(LocalService), +} + +impl Default for QueryTarget { + fn default() -> Self { + QueryTarget::Local(LocalService { sync_from: None }) + } +} + +fn deser_labels<'de, D>(deser: D) -> Result>, D::Error> +where + D: serde::Deserializer<'de>, +{ + Ok(Option::>::deserialize(deser)?.map(|s| { + s.into_iter() + .map(|s| INTERN.get_or_intern(s)) + .collect::>() + })) +} + +fn ser_labels(labels: &Option>, ser: S) -> Result +where + S: serde::Serializer, +{ + match labels { + Some(labels) => { + let labels = labels + .iter() + .map(|s| INTERN.resolve(s)) + .collect::>(); + serde::Serialize::serialize(&labels, ser) + } + None => serde::Serialize::serialize(&None::, ser), + } +} + +/// Which service is providing the compute power for executing transactions +#[derive(Clone, Debug, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case", untagged)] +pub enum ComputeTarget { + /// Use the agent pool to generate executions + Agent { + #[serde( + default, + deserialize_with = "deser_labels", + serialize_with = "ser_labels", + skip_serializing_if = "Option::is_none" + )] + labels: Option>, + }, + /// Use demox' API to generate executions + #[serde(rename_all = "kebab-case")] + Demox { demox_api: String }, +} + +/// Represents an instance of a local query service. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct LocalService { + // TODO debate this + /// An optional node to sync blocks from... + /// necessary for private tx mode in realtime mode as this will have to + /// sync from a node that has a valid ledger + /// + /// When present, the cannon will update the ledger service from this node + /// if the node is out of sync, it will corrupt the ledger... + /// + /// requires cannon to have an associated env_id + #[serde(default, skip_serializing_if = "Option::is_none")] + pub sync_from: Option, +} diff --git a/crates/common/src/schema/error.rs b/crates/common/src/schema/error.rs new file mode 100644 index 00000000..4ec850af --- /dev/null +++ b/crates/common/src/schema/error.rs @@ -0,0 +1,13 @@ +use thiserror::Error; + +use crate::impl_into_status_code; + +#[derive(Debug, Error)] +#[error("`{i}`: `{e}`")] +pub struct DeserializeError { + pub i: usize, + #[source] + pub e: serde_yaml::Error, +} + +impl_into_status_code!(DeserializeError); diff --git a/crates/common/src/schema/mod.rs b/crates/common/src/schema/mod.rs new file mode 100644 index 00000000..af71dd17 --- /dev/null +++ b/crates/common/src/schema/mod.rs @@ -0,0 +1,73 @@ +use cannon::CannonDocument; +use error::DeserializeError; +use nodes::NodesDocument; +use serde::{Deserialize, Serialize}; +use storage::StorageDocument; + +use crate::state::NodeKey; + +pub mod cannon; +pub mod error; +pub mod nodes; +pub mod persist; +pub mod serialize; +pub mod storage; + +// TODO: Considerations: +// TODO: - Generate json schema with https://docs.rs/schemars/latest/schemars/ +// TODO: - Do these types need to implement `Serialize`? + +/// A document representing all item types. +#[derive(Deserialize, Serialize, Debug, Clone)] +#[serde(tag = "kind")] +#[non_exhaustive] +pub enum ItemDocument { + #[serde(rename = "snops/storage/v1")] + Storage(Box), + + #[serde(rename = "snops/nodes/v1")] + Nodes(Box), + + #[serde(rename = "snops/cannon/v1")] + Cannon(Box), +} + +/// Deserialize (YAML) many documents into a `Vec` of documents. +pub fn deserialize_docs(str: &str) -> Result, DeserializeError> { + serde_yaml::Deserializer::from_str(str) + .enumerate() + .map(|(i, doc)| ItemDocument::deserialize(doc).map_err(|e| DeserializeError { i, e })) + .collect() +} + +/// Deserialize (YAML) many documents into a `Vec` of documents. +pub fn deserialize_docs_bytes(str: &[u8]) -> Result, DeserializeError> { + serde_yaml::Deserializer::from_slice(str) + .enumerate() + .map(|(i, doc)| ItemDocument::deserialize(doc).map_err(|e| DeserializeError { i, e })) + .collect() +} + +#[cfg(test)] +mod test { + use super::deserialize_docs_bytes; + + #[test] + fn deserialize_specs() { + for entry in std::fs::read_dir("../../specs") + .expect("failed to read specs dir") + .map(Result::unwrap) + { + let file_name = entry.file_name(); + let name = file_name.to_str().expect("failed to read spec file name"); + if !name.ends_with(".yaml") && !name.ends_with(".yml") { + continue; + } + + let data = std::fs::read(entry.path()).expect("failed to read spec file"); + if let Err(e) = deserialize_docs_bytes(&data) { + panic!("failed to deserialize spec file {name}: {e}") + } + } + } +} diff --git a/crates/controlplane/src/schema/nodes.rs b/crates/common/src/schema/nodes.rs similarity index 50% rename from crates/controlplane/src/schema/nodes.rs rename to crates/common/src/schema/nodes.rs index da16b984..66fb6625 100644 --- a/crates/controlplane/src/schema/nodes.rs +++ b/crates/common/src/schema/nodes.rs @@ -3,7 +3,9 @@ use std::net::{IpAddr, SocketAddr}; use fixedbitset::FixedBitSet; use indexmap::{IndexMap, IndexSet}; use serde::{de::Visitor, Deserialize, Deserializer, Serialize}; -use snops_common::{ + +use super::NodeKey; +use crate::{ key_source::KeySource, lasso::Spur, node_targets::NodeTargets, @@ -12,12 +14,9 @@ use snops_common::{ INTERN, }; -use super::NodeKey; -use crate::persist::prelude::*; - /// A document describing the node infrastructure for a test. -#[derive(Deserialize, Debug, Clone)] -pub struct Document { +#[derive(Deserialize, Serialize, Debug, Clone)] +pub struct NodesDocument { pub name: String, pub description: Option, /// The network to use for all nodes. @@ -44,37 +43,6 @@ pub struct ExternalNode { pub rest: Option, } -impl DataFormat for ExternalNode { - type Header = u8; - const LATEST_HEADER: Self::Header = 1; - - fn write_data( - &self, - writer: &mut W, - ) -> Result { - let mut written = 0; - written += writer.write_data(&self.bft)?; - written += writer.write_data(&self.node)?; - written += writer.write_data(&self.rest)?; - Ok(written) - } - - fn read_data( - reader: &mut R, - header: &Self::Header, - ) -> Result { - match header { - 1 => { - let bft = reader.read_data(&())?; - let node = reader.read_data(&())?; - let rest = reader.read_data(&())?; - Ok(ExternalNode { bft, node, rest }) - } - _ => Err(DataReadError::Custom("unsupported version".to_owned())), - } - } -} - /// Impl serde Deserialize ExternalNode but allow for { bft: addr, node: addr, /// rest: addr} or just `addr` impl<'de> Deserialize<'de> for ExternalNode { @@ -165,9 +133,10 @@ where } // TODO: could use some more clarification on some of these fields -/// A node in the testing infrastructure. +/// A node in the environment #[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)] pub struct Node { + /// When true, the node will be started #[serde(default = "please_be_online")] pub online: bool, /// When specified, creates a group of nodes, all with the same @@ -248,168 +217,3 @@ impl Node { mask } } - -#[derive(Debug, Clone)] -pub struct NodeFormatHeader { - pub(crate) key_source: DataHeaderOf, - pub(crate) height_request: DataHeaderOf, - pub(crate) node_targets: DataHeaderOf, - pub has_binaries: bool, -} - -impl DataFormat for NodeFormatHeader { - type Header = u8; - const LATEST_HEADER: Self::Header = 2; - - fn write_data( - &self, - writer: &mut W, - ) -> Result { - let mut written = 0; - written += self.key_source.write_data(writer)?; - written += self.height_request.write_data(writer)?; - written += self.node_targets.write_data(writer)?; - Ok(written) - } - - fn read_data( - reader: &mut R, - header: &Self::Header, - ) -> Result { - if *header == 0 || *header > Self::LATEST_HEADER { - return Err(DataReadError::unsupported( - "NodeFormatHeader", - format!("1 or {}", Self::LATEST_HEADER), - *header, - )); - } - - let key_source = KeySource::read_header(reader)?; - let height_request = HeightRequest::read_header(reader)?; - let node_targets = NodeTargets::read_header(reader)?; - Ok(NodeFormatHeader { - key_source, - height_request, - node_targets, - has_binaries: *header > 1, - }) - } -} - -impl DataFormat for Node { - type Header = NodeFormatHeader; - const LATEST_HEADER: Self::Header = NodeFormatHeader { - key_source: KeySource::LATEST_HEADER, - height_request: HeightRequest::LATEST_HEADER, - node_targets: NodeTargets::LATEST_HEADER, - has_binaries: true, - }; - - fn write_data( - &self, - writer: &mut W, - ) -> Result { - let mut written = 0; - written += self.online.write_data(writer)?; - written += self.replicas.write_data(writer)?; - written += self.key.write_data(writer)?; - written += self.height.write_data(writer)?; - written += self.labels.write_data(writer)?; - written += self.agent.write_data(writer)?; - written += self.validators.write_data(writer)?; - written += self.peers.write_data(writer)?; - written += self.env.write_data(writer)?; - written += self.binary.write_data(writer)?; - Ok(written) - } - - fn read_data( - reader: &mut R, - header: &Self::Header, - ) -> Result { - let online = reader.read_data(&())?; - let replicas = reader.read_data(&())?; - let key = reader.read_data(&header.key_source)?; - let height = reader.read_data(&header.height_request)?; - let labels = Vec::::read_data(reader, &())?; - let agent = reader.read_data(&())?; - let validators = reader.read_data(&header.node_targets)?; - let peers = reader.read_data(&header.node_targets)?; - let env = Vec::<(String, String)>::read_data(reader, &((), ()))?; - let binary = if header.has_binaries { - reader.read_data(&())? - } else { - None - }; - - Ok(Node { - online, - replicas, - key, - height, - labels: labels.into_iter().collect(), - agent, - validators, - peers, - env: env.into_iter().collect(), - binary, - }) - } -} - -#[cfg(test)] -mod tests { - use snops_common::key_source::ACCOUNTS_KEY_ID; - - use super::*; - - #[test] - fn test_key_source_deserialization() { - assert_eq!( - serde_yaml::from_str::("committee.0").unwrap(), - KeySource::Committee(Some(0)) - ); - assert_eq!( - serde_yaml::from_str::("committee.100").unwrap(), - KeySource::Committee(Some(100)) - ); - assert_eq!( - serde_yaml::from_str::("committee.$").unwrap(), - KeySource::Committee(None) - ); - - assert_eq!( - serde_yaml::from_str::("accounts.0").unwrap(), - KeySource::Named(*ACCOUNTS_KEY_ID, Some(0)) - ); - assert_eq!( - serde_yaml::from_str::("accounts.$").unwrap(), - KeySource::Named(*ACCOUNTS_KEY_ID, None) - ); - - assert_eq!( - serde_yaml::from_str::( - "APrivateKey1zkp8CZNn3yeCseEtxuVPbDCwSyhGW6yZKUYKfgXmcpoGPWH" - ) - .unwrap(), - KeySource::PrivateKeyLiteral( - "APrivateKey1zkp8CZNn3yeCseEtxuVPbDCwSyhGW6yZKUYKfgXmcpoGPWH".to_string() - ) - ); - - assert_eq!( - serde_yaml::from_str::( - "aleo1ekc03f2vwemtpksckhrcl7mv4t7sm6ykldwldvvlysqt2my9zygqfhndya" - ) - .unwrap(), - KeySource::PublicKeyLiteral( - "aleo1ekc03f2vwemtpksckhrcl7mv4t7sm6ykldwldvvlysqt2my9zygqfhndya".to_string() - ) - ); - - assert!(serde_yaml::from_str::("committee.-100").is_err(),); - assert!(serde_yaml::from_str::("accounts.-100").is_err(),); - assert!(serde_yaml::from_str::("accounts._").is_err(),); - assert!(serde_yaml::from_str::("accounts.*").is_err(),); - } -} diff --git a/crates/controlplane/src/schema/outcomes.rs b/crates/common/src/schema/outcomes.rs.old similarity index 100% rename from crates/controlplane/src/schema/outcomes.rs rename to crates/common/src/schema/outcomes.rs.old diff --git a/crates/common/src/schema/persist/doc_node.rs b/crates/common/src/schema/persist/doc_node.rs new file mode 100644 index 00000000..e8e9263c --- /dev/null +++ b/crates/common/src/schema/persist/doc_node.rs @@ -0,0 +1,150 @@ +use lasso::Spur; + +use crate::schema::nodes::{ExternalNode, Node}; +use crate::{ + format::{ + DataFormat, DataFormatReader, DataFormatWriter, DataHeaderOf, DataReadError, DataWriteError, + }, + key_source::KeySource, + node_targets::NodeTargets, + state::HeightRequest, +}; + +impl DataFormat for ExternalNode { + type Header = u8; + const LATEST_HEADER: Self::Header = 1; + + fn write_data( + &self, + writer: &mut W, + ) -> Result { + let mut written = 0; + written += writer.write_data(&self.bft)?; + written += writer.write_data(&self.node)?; + written += writer.write_data(&self.rest)?; + Ok(written) + } + + fn read_data( + reader: &mut R, + header: &Self::Header, + ) -> Result { + match header { + 1 => { + let bft = reader.read_data(&())?; + let node = reader.read_data(&())?; + let rest = reader.read_data(&())?; + Ok(ExternalNode { bft, node, rest }) + } + _ => Err(DataReadError::Custom("unsupported version".to_owned())), + } + } +} + +#[derive(Debug, Clone)] +pub struct NodeFormatHeader { + pub(crate) key_source: DataHeaderOf, + pub(crate) height_request: DataHeaderOf, + pub(crate) node_targets: DataHeaderOf, + pub has_binaries: bool, +} + +impl DataFormat for NodeFormatHeader { + type Header = u8; + const LATEST_HEADER: Self::Header = 2; + + fn write_data( + &self, + writer: &mut W, + ) -> Result { + let mut written = 0; + written += self.key_source.write_data(writer)?; + written += self.height_request.write_data(writer)?; + written += self.node_targets.write_data(writer)?; + Ok(written) + } + + fn read_data( + reader: &mut R, + header: &Self::Header, + ) -> Result { + if *header == 0 || *header > Self::LATEST_HEADER { + return Err(DataReadError::unsupported( + "NodeFormatHeader", + format!("1 or {}", Self::LATEST_HEADER), + *header, + )); + } + + let key_source = KeySource::read_header(reader)?; + let height_request = HeightRequest::read_header(reader)?; + let node_targets = NodeTargets::read_header(reader)?; + Ok(NodeFormatHeader { + key_source, + height_request, + node_targets, + has_binaries: *header > 1, + }) + } +} + +impl DataFormat for Node { + type Header = NodeFormatHeader; + const LATEST_HEADER: Self::Header = NodeFormatHeader { + key_source: KeySource::LATEST_HEADER, + height_request: HeightRequest::LATEST_HEADER, + node_targets: NodeTargets::LATEST_HEADER, + has_binaries: true, + }; + + fn write_data( + &self, + writer: &mut W, + ) -> Result { + let mut written = 0; + written += self.online.write_data(writer)?; + written += self.replicas.write_data(writer)?; + written += self.key.write_data(writer)?; + written += self.height.write_data(writer)?; + written += self.labels.write_data(writer)?; + written += self.agent.write_data(writer)?; + written += self.validators.write_data(writer)?; + written += self.peers.write_data(writer)?; + written += self.env.write_data(writer)?; + written += self.binary.write_data(writer)?; + Ok(written) + } + + fn read_data( + reader: &mut R, + header: &Self::Header, + ) -> Result { + let online = reader.read_data(&())?; + let replicas = reader.read_data(&())?; + let key = reader.read_data(&header.key_source)?; + let height = reader.read_data(&header.height_request)?; + let labels = Vec::::read_data(reader, &())?; + let agent = reader.read_data(&())?; + let validators = reader.read_data(&header.node_targets)?; + let peers = reader.read_data(&header.node_targets)?; + let env = Vec::<(String, String)>::read_data(reader, &((), ()))?; + let binary = if header.has_binaries { + reader.read_data(&())? + } else { + None + }; + + Ok(Node { + online, + replicas, + key, + height, + labels: labels.into_iter().collect(), + agent, + validators, + peers, + env: env.into_iter().collect(), + binary, + }) + } +} diff --git a/crates/common/src/schema/persist/mod.rs b/crates/common/src/schema/persist/mod.rs new file mode 100644 index 00000000..a0b9799f --- /dev/null +++ b/crates/common/src/schema/persist/mod.rs @@ -0,0 +1,7 @@ +mod doc_node; +mod tx_sink; +mod tx_source; + +pub use doc_node::*; +pub use tx_sink::*; +pub use tx_source::*; diff --git a/crates/controlplane/src/persist/sink.rs b/crates/common/src/schema/persist/tx_sink.rs similarity index 93% rename from crates/controlplane/src/persist/sink.rs rename to crates/common/src/schema/persist/tx_sink.rs index 645bab48..c204718c 100644 --- a/crates/controlplane/src/persist/sink.rs +++ b/crates/common/src/schema/persist/tx_sink.rs @@ -1,7 +1,14 @@ -use snops_common::{node_targets::NodeTargets, state::TxPipeId}; +use std::io::{Read, Write}; -use super::prelude::*; -use crate::cannon::sink::TxSink; +use crate::{ + format::{ + read_dataformat, write_dataformat, DataFormat, DataFormatReader, DataHeaderOf, + DataReadError, DataWriteError, + }, + node_targets::NodeTargets, + schema::cannon::sink::TxSink, + state::TxPipeId, +}; #[derive(Debug, Clone)] pub struct TxSinkFormatHeader { diff --git a/crates/controlplane/src/persist/source.rs b/crates/common/src/schema/persist/tx_source.rs similarity index 92% rename from crates/controlplane/src/persist/source.rs rename to crates/common/src/schema/persist/tx_source.rs index b88b0931..4165ae86 100644 --- a/crates/controlplane/src/persist/source.rs +++ b/crates/common/src/schema/persist/tx_source.rs @@ -1,7 +1,10 @@ -use snops_common::node_targets::NodeTargets; +use std::io::{Read, Write}; -use super::prelude::*; -use crate::cannon::source::{ComputeTarget, LocalService, QueryTarget, TxSource}; +use crate::{ + format::{DataFormat, DataFormatReader, DataHeaderOf, DataReadError, DataWriteError}, + node_targets::NodeTargets, + schema::cannon::source::{ComputeTarget, LocalService, QueryTarget, TxSource}, +}; #[derive(Debug, Clone)] pub struct TxSourceFormatHeader { @@ -112,11 +115,14 @@ impl DataFormat for TxSource { #[cfg(test)] mod tests { - use snops_common::{node_targets::NodeTargets, INTERN}; - use crate::{ - cannon::source::{ComputeTarget, LocalService, QueryTarget, TxSource}, - persist::{prelude::*, TxSourceFormatHeader}, + format::{read_dataformat, write_dataformat, DataFormat}, + node_targets::NodeTargets, + schema::{ + cannon::source::{ComputeTarget, LocalService, QueryTarget, TxSource}, + persist::TxSourceFormatHeader, + }, + INTERN, }; macro_rules! case { diff --git a/crates/common/src/schema/serialize.rs b/crates/common/src/schema/serialize.rs new file mode 100644 index 00000000..e69de29b diff --git a/crates/controlplane/src/schema/storage/accounts.rs b/crates/common/src/schema/storage/accounts.rs similarity index 100% rename from crates/controlplane/src/schema/storage/accounts.rs rename to crates/common/src/schema/storage/accounts.rs diff --git a/crates/controlplane/src/schema/storage/binaries.rs b/crates/common/src/schema/storage/binaries.rs similarity index 96% rename from crates/controlplane/src/schema/storage/binaries.rs rename to crates/common/src/schema/storage/binaries.rs index 8b9ec320..568bccee 100644 --- a/crates/controlplane/src/schema/storage/binaries.rs +++ b/crates/common/src/schema/storage/binaries.rs @@ -6,12 +6,13 @@ use std::{ use lazy_static::lazy_static; use lazysort::SortedBy; -use serde::Deserialize; -use snops_common::{ +use serde::{Deserialize, Serialize}; +use thiserror::Error; + +use crate::{ binaries::{BinaryEntry, BinarySource}, util::sha256_file, }; -use thiserror::Error; const PROFILES: [&str; 4] = ["release-small", "release", "release-big", "debug"]; @@ -147,17 +148,17 @@ pub enum BinResolveError { SetPermissions(PathBuf, #[source] std::io::Error), } -#[derive(Debug, Clone, Copy, Deserialize, Default)] +#[derive(Debug, Clone, Copy, Deserialize, Serialize, Default)] #[serde(untagged)] pub enum AutoIsDefault { #[default] None, - #[serde(with = "snops_common::state::strings::auto")] + #[serde(with = "crate::state::strings::auto")] Auto, Value(T), } -#[derive(Deserialize, Debug, Clone)] +#[derive(Deserialize, Serialize, Debug, Clone)] pub struct BinaryEntryInternal { pub source: BinarySource, #[serde(default)] @@ -167,7 +168,7 @@ pub struct BinaryEntryInternal { } /// A BinaryEntryDoc can be a shorthand or a full entry -#[derive(Deserialize, Debug, Clone)] +#[derive(Deserialize, Serialize, Debug, Clone)] #[serde(untagged)] pub enum BinaryEntryDoc { Shorthand(BinarySource), diff --git a/crates/common/src/schema/storage/mod.rs b/crates/common/src/schema/storage/mod.rs new file mode 100644 index 00000000..a5ba128e --- /dev/null +++ b/crates/common/src/schema/storage/mod.rs @@ -0,0 +1,125 @@ +use std::path::PathBuf; + +use indexmap::IndexMap; +use serde::{Deserialize, Serialize}; +use snops_checkpoint::RetentionPolicy; + +use crate::state::{InternedId, StorageId}; + +mod accounts; +use accounts::*; +mod binaries; +pub use binaries::*; + +pub const STORAGE_DIR: &str = "storage"; + +/// A storage document. Explains how storage for a test should be set up. +#[derive(Debug, Clone, Deserialize, Serialize)] +#[serde(rename_all = "kebab-case")] +pub struct StorageDocument { + pub id: StorageId, + /// Regen version + #[serde(default)] + pub regen: u16, + pub name: String, + pub description: Option, + /// Tell nodes not to re-download the storage data. + #[serde(default)] + pub persist: bool, + #[serde(default)] + pub generate: Option, + #[serde(default)] + pub connect: Option, + #[serde(default)] + pub retention_policy: Option, + /// The binaries list for this storage is used to determine which binaries + /// are used by the agents. + /// Overriding `default` will replace the node's default binary rather than + /// using snops' own default aot binary. + /// Overriding `compute` will replace the node's default binary only for + /// compute + #[serde(default)] + pub binaries: IndexMap, +} + +/// Data generation instructions. +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct StorageGeneration { + #[serde(default)] + pub genesis: Option, + + #[serde(default)] + pub accounts: IndexMap, + + #[serde(default)] + pub transactions: Vec, +} + +// TODO: Convert this into a struct similar to the execute action, then use +// compute agents to assemble these on the fly +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct Transaction { + pub file: PathBuf, + pub total: u64, + pub amount: u64, + pub sources: Vec, + pub destinations: Vec, +} + +#[derive(Debug, Clone, Deserialize, Serialize)] +#[serde(rename_all = "kebab-case")] +pub struct GenesisGeneration { + pub private_key: Option, + pub seed: Option, + pub additional_accounts: Option, + pub additional_accounts_balance: Option, + #[serde(flatten)] + pub balances: GenesisBalances, + #[serde(flatten)] + pub commissions: GenesisCommissions, + pub bonded_withdrawal: Option>, +} + +#[derive(Debug, Clone, Deserialize, Serialize)] +#[serde(untagged)] +pub enum GenesisBalances { + #[serde(rename_all = "kebab-case")] + Defined { + bonded_balances: IndexMap, + }, + #[serde(rename_all = "kebab-case")] + Generated { + committee_size: Option, + bonded_balance: Option, + }, +} + +#[derive(Debug, Clone, Deserialize, Serialize)] +#[serde(untagged)] +pub enum GenesisCommissions { + #[serde(rename_all = "kebab-case")] + Defined { + bonded_commissions: IndexMap, + }, + #[serde(rename_all = "kebab-case")] + Generated { bonded_commission: Option }, +} + +impl Default for GenesisGeneration { + fn default() -> Self { + Self { + seed: None, + private_key: None, + additional_accounts: None, + additional_accounts_balance: None, + balances: GenesisBalances::Generated { + committee_size: None, + bonded_balance: None, + }, + commissions: GenesisCommissions::Generated { + bonded_commission: None, + }, + bonded_withdrawal: None, + } + } +} diff --git a/crates/controlplane/Cargo.toml b/crates/controlplane/Cargo.toml index f32a3481..c1d894bb 100644 --- a/crates/controlplane/Cargo.toml +++ b/crates/controlplane/Cargo.toml @@ -33,7 +33,7 @@ jwt.workspace = true lazysort.workspace = true lazy_static.workspace = true prometheus-http-query = "0.8" -promql-parser = "0.4" +# promql-parser = "0.4" rand.workspace = true rand_chacha.workspace = true rayon.workspace = true @@ -45,7 +45,7 @@ serde_yaml.workspace = true sha2.workspace = true sled.workspace = true snops-checkpoint.workspace = true -snops-common = { workspace = true, features = ["aot_cmds"] } +snops-common = { workspace = true, features = ["aot_cmds", "schema"] } strum_macros.workspace = true tarpc.workspace = true thiserror.workspace = true diff --git a/crates/controlplane/src/schema/error.rs b/crates/controlplane/src/apply/error.rs similarity index 95% rename from crates/controlplane/src/schema/error.rs rename to crates/controlplane/src/apply/error.rs index f13af3d6..2e6f5f37 100644 --- a/crates/controlplane/src/schema/error.rs +++ b/crates/controlplane/src/apply/error.rs @@ -13,7 +13,17 @@ use strum_macros::AsRefStr; use thiserror::Error; use url::Url; -use super::storage::BinarySourceError; +#[derive(Debug, Error)] +#[error("`{i}`: `{e}`")] +pub struct DeserializeError { + pub i: usize, + #[source] + pub e: serde_yaml::Error, +} + +impl_into_status_code!(DeserializeError); + +use snops_common::schema::storage::BinarySourceError; #[derive(Debug, Error, AsRefStr)] pub enum StorageError { diff --git a/crates/controlplane/src/apply/loaded_storage.rs b/crates/controlplane/src/apply/loaded_storage.rs new file mode 100644 index 00000000..d345c60f --- /dev/null +++ b/crates/controlplane/src/apply/loaded_storage.rs @@ -0,0 +1,774 @@ +use std::{ + fs, io::Write, ops::Deref, os::unix::fs::PermissionsExt, path::PathBuf, process::Stdio, + sync::Arc, +}; + +use futures_util::StreamExt; +use indexmap::IndexMap; +use rand::seq::IteratorRandom; +use sha2::{Digest, Sha256}; +use snops_checkpoint::RetentionPolicy; +use snops_common::{ + aot_cmds::error::CommandError, + api::StorageInfo, + binaries::{BinaryEntry, BinarySource}, + constant::{SNARKOS_GENESIS_FILE, VERSION_FILE}, + key_source::{KeySource, ACCOUNTS_KEY_ID}, + schema::storage::{ + GenesisBalances, GenesisCommissions, GenesisGeneration, StorageDocument, StorageGeneration, + DEFAULT_AOT_BINARY, STORAGE_DIR, + }, + state::{InternedId, KeyState, NetworkId, StorageId}, +}; +use tokio::process::Command; +use tracing::{error, info, trace, warn}; + +use super::error::SchemaError; +use crate::{ + apply::{ + error::StorageError, + storage_helpers::{ + get_version_from_path, pick_account_addr, pick_additional_addr, pick_commitee_addr, + read_to_addrs, + }, + }, + cli::Cli, + persist::PersistStorage, + state::GlobalState, +}; + +// IndexMap +pub type AleoAddrMap = IndexMap; + +#[derive(Debug, Clone)] +pub struct LoadedStorage { + /// Storage ID + pub id: StorageId, + /// Network ID + pub network: NetworkId, + /// Version counter for this storage - incrementing will invalidate old + /// saved ledgers + pub version: u16, + /// committee lookup + pub committee: AleoAddrMap, + /// other accounts files lookup + pub accounts: IndexMap, + /// storage of checkpoints + pub retention_policy: Option, + /// whether agents using this storage should persist it + pub persist: bool, + /// whether to use the network's native genesis block + pub native_genesis: bool, + /// binaries available for this storage + pub binaries: IndexMap, +} + +impl LoadedStorage { + pub async fn from_doc( + doc: StorageDocument, + state: &GlobalState, + network: NetworkId, + ) -> Result, SchemaError> { + let id = doc.id; + + // add the prepared storage to the storage map + + if state.storage.contains_key(&(network, id)) { + // TODO: we probably don't want to warn here. instead, it would be nice to + // hash/checksum the storage to compare it with the conflicting storage + warn!("a storage with the id {id} has already been prepared"); + } + + let base = state.storage_path(network, id); + let version_file = base.join(VERSION_FILE); + + let mut native_genesis = false; + + // TODO: The dir can be made by a previous run and the aot stuff can fail + // i.e an empty/incomplete directory can exist and we should check those + let mut exists = matches!(tokio::fs::try_exists(&base).await, Ok(true)); + + // warn if an existing block/ledger already exists + if exists { + warn!("The specified storage ID {id} already exists"); + } + + let old_version = get_version_from_path(&version_file).await?; + + info!( + "Storage {id} has version {old_version:?}. incoming version is {}", + doc.regen + ); + + // wipe old storage when the version changes + if old_version != Some(doc.regen) && exists { + info!("Storage {id} version changed, removing old storage"); + tokio::fs::remove_dir_all(&base) + .await + .map_err(|e| StorageError::RemoveStorage(version_file.clone(), e))?; + exists = false; + } + + // gather the binaries + let mut binaries = IndexMap::default(); + for (id, v) in doc.binaries { + let mut entry = + BinaryEntry::try_from(v).map_err(|e| StorageError::BinaryParse(id, e))?; + if let BinarySource::Path(p) = &mut entry.source { + if !p.exists() { + return Err(StorageError::BinaryFileMissing(id, p.clone()).into()); + } + // canonicalize the path + if let Ok(canon) = p.canonicalize() { + trace!( + "resolved binary relative path from {} to {}", + p.display(), + canon.display() + ); + *p = canon + } + } + info!("Resolved binary {id}: {entry}"); + binaries.insert(id, entry); + } + + // resolve the default aot bin for this storage + let aot_bin = LoadedStorage::resolve_binary_from_map( + id, + network, + &binaries, + state, + InternedId::default(), + ) + .await?; + + tokio::fs::create_dir_all(&base) + .await + .map_err(|e| StorageError::GenerateStorage(id, e))?; + + // generate the block and ledger if we have generation params + if let (Some(generation), false) = (doc.generate.as_ref(), exists) { + tracing::debug!("Generating storage for {id}"); + // generate the genesis block using the aot cli + let output = base.join(SNARKOS_GENESIS_FILE); + + match (doc.connect, generation.genesis.as_ref()) { + (None, None) => { + native_genesis = true; + info!("{id}: using network native genesis") + } + (Some(ref url), _) => { + // downloaded genesis block is not native + let err = |e| StorageError::FailedToFetchGenesis(id, url.clone(), e); + + // I think its ok to reuse this error here + // because it just turns a failing response into an error + // or failing to turn it into bytes + let res = reqwest::get(url.clone()) + .await + .map_err(err)? + .error_for_status() + .map_err(err)? + .bytes() + .await + .map_err(err)?; + + tokio::fs::write(&output, res) + .await + .map_err(|e| StorageError::FailedToWriteGenesis(id, e))?; + } + (None, Some(genesis)) => { + // generated genesis block is not native + let mut command = Command::new(&aot_bin); + command + .stdout(Stdio::inherit()) + .stderr(Stdio::inherit()) + .env("NETWORK", network.to_string()) + .arg("genesis") + .arg("--output") + .arg(&output); + + // conditional seed flag + if let Some(seed) = genesis.seed { + command.arg("--seed").arg(seed.to_string()); + } + + // conditional genesis key flag + if let Some(private_key) = &genesis.private_key { + command.arg("--genesis-key").arg(private_key); + }; + + // generate committee based on the generation params + match &genesis.balances { + GenesisBalances::Generated { + committee_size, + bonded_balance, + } => { + command + .arg("--committee-output") + .arg(base.join("committee.json")); + + if let Some(committee_size) = committee_size { + command + .arg("--committee-size") + .arg(committee_size.to_string()); + } + if let Some(bonded_balance) = bonded_balance { + command + .arg("--bonded-balance") + .arg(bonded_balance.to_string()); + } + } + GenesisBalances::Defined { bonded_balances } => { + command + .arg("--bonded-balances") + .arg(serde_json::to_string(&bonded_balances).unwrap()); + } + } + + // generate committee commissions based on the generation params + match &genesis.commissions { + GenesisCommissions::Generated { bonded_commission } => { + if let Some(bonded_commission) = bonded_commission { + command + .arg("--bonded-balance") + .arg(bonded_commission.to_string()); + } + } + GenesisCommissions::Defined { bonded_commissions } => { + command + .arg("--bonded-commissions") + .arg(serde_json::to_string(&bonded_commissions).unwrap()); + } + } + + if let Some(withdrawal) = &genesis.bonded_withdrawal { + command + .arg("--bonded-withdrawal") + .arg(serde_json::to_string(withdrawal).unwrap()); + } + + // conditionally add additional accounts + if let Some(additional_accounts) = genesis.additional_accounts { + command + .arg("--additional-accounts") + .arg(additional_accounts.to_string()) + .arg("--additional-accounts-output") + .arg(base.join("accounts.json")); + } + + if let Some(balance) = genesis.additional_accounts_balance { + command + .arg("--additional-accounts-balance") + .arg(balance.to_string()); + } + + info!("Generating genesis for {id} with command: {command:?}"); + + let res = command + .spawn() + .map_err(|e| { + StorageError::Command( + CommandError::action("spawning", "aot genesis", e), + id, + ) + })? + .wait() + .await + .map_err(|e| { + StorageError::Command( + CommandError::action("waiting", "aot genesis", e), + id, + ) + })?; + + if !res.success() { + warn!("failed to run genesis generation command..."); + } + + // ensure the genesis block was generated + tokio::fs::try_exists(&output) + .await + .map_err(|e| StorageError::FailedToGenGenesis(id, e))?; + } + } + } + + let mut accounts = IndexMap::new(); + accounts.insert( + *ACCOUNTS_KEY_ID, + read_to_addrs(pick_additional_addr, &base.join("accounts.json")).await?, + ); + + if let Some(generation) = &doc.generate { + for (name, account) in &generation.accounts { + let path = base.join(format!("{}.json", name)); + + if !path.exists() { + info!("generating accounts for {name}"); + + let mut command = Command::new(&aot_bin); + command + .stdout(Stdio::inherit()) + .stderr(Stdio::inherit()) + .env("NETWORK", network.to_string()) + .arg("accounts") + .arg(account.count.to_string()) + .arg("--output") + .arg(&path); + if let Some(seed) = account.seed { + command.arg("--seed").arg(seed.to_string()); + } + + let res = command + .spawn() + .map_err(|e| { + StorageError::Command( + CommandError::action("spawning", "aot accounts", e), + id, + ) + })? + .wait() + .await + .map_err(|e| { + StorageError::Command( + CommandError::action("waiting", "aot accounts", e), + id, + ) + })?; + + if !res.success() { + warn!("failed to run account generation command for {name}..."); + } + } + + accounts.insert(*name, read_to_addrs(pick_account_addr, &path).await?); + } + } + + // write the regen version to a "version" file + tokio::fs::write(&version_file, doc.regen.to_string()) + .await + .map_err(|e| StorageError::WriteVersion(version_file.clone(), e))?; + + let committee_file = base.join("committee.json"); + + // if the committee was specified in the generation params, use that + if let ( + Some(StorageGeneration { + genesis: + Some(GenesisGeneration { + private_key, + balances: GenesisBalances::Defined { bonded_balances }, + .. + }), + .. + }), + false, + ) = (doc.generate.as_ref(), committee_file.exists()) + { + // TODO: should be possible to get committee from genesis blocks + let mut balances: IndexMap<_, _> = bonded_balances + .iter() + .map(|(addr, bal)| (addr.clone(), (String::new(), *bal))) + .collect(); + + // derive the committee member 0's key + if let (Some(key), true) = (private_key, !balances.is_empty()) { + balances[0].0.clone_from(key) + } + + // write balances to committee.json if if doesn't exist + tokio::fs::write(&committee_file, serde_json::to_string(&balances).unwrap()) + .await + .map_err(|e| StorageError::WriteCommittee(committee_file.clone(), e))?; + }; + // otherwise read the committee from the committee.json file + let committee = read_to_addrs(pick_commitee_addr, &committee_file).await?; + + let storage = Arc::new(LoadedStorage { + version: doc.regen, + id, + network, + committee, + accounts, + retention_policy: doc.retention_policy, + persist: doc.persist, + native_genesis, + binaries, + }); + if let Err(e) = state + .db + .storage + .save(&(network, id), &PersistStorage::from(storage.deref())) + { + error!("failed to save storage meta: {e}"); + } + state.storage.insert((network, id), storage.clone()); + + Ok(storage) + } + + pub fn lookup_keysource_pk(&self, key: &KeySource) -> KeyState { + match key { + KeySource::Local => KeyState::Local, + KeySource::PrivateKeyLiteral(pk) => KeyState::Literal(pk.clone()), + KeySource::PublicKeyLiteral(_) => KeyState::None, + KeySource::ProgramLiteral(_) => KeyState::None, + KeySource::Committee(Some(i)) => self + .committee + .get_index(*i) + .map(|(_, pk)| pk.clone()) + .into(), + KeySource::Committee(None) => KeyState::None, + KeySource::Named(name, Some(i)) => self + .accounts + .get(name) + .and_then(|a| a.get_index(*i).map(|(_, pk)| pk.clone())) + .into(), + KeySource::Named(_name, None) => KeyState::None, + } + } + + pub fn lookup_keysource_addr(&self, key: &KeySource) -> KeyState { + match key { + KeySource::Local => KeyState::Local, + KeySource::PrivateKeyLiteral(_) => KeyState::None, + KeySource::PublicKeyLiteral(addr) => KeyState::Literal(addr.clone()), + KeySource::ProgramLiteral(addr) => KeyState::Literal(addr.clone()), + KeySource::Committee(Some(i)) => self + .committee + .get_index(*i) + .map(|(addr, _)| addr.clone()) + .into(), + KeySource::Committee(None) => KeyState::None, + KeySource::Named(name, Some(i)) => self + .accounts + .get(name) + .and_then(|a| a.get_index(*i).map(|(addr, _)| addr.clone())) + .into(), + KeySource::Named(_name, None) => KeyState::None, + } + } + + pub fn sample_keysource_pk(&self, key: &KeySource) -> KeyState { + match key { + KeySource::Local => KeyState::Local, + KeySource::PrivateKeyLiteral(pk) => KeyState::Literal(pk.clone()), + KeySource::PublicKeyLiteral(_) => KeyState::None, + KeySource::ProgramLiteral(_) => KeyState::None, + KeySource::Committee(Some(i)) => self + .committee + .get_index(*i) + .map(|(_, pk)| pk.clone()) + .into(), + KeySource::Committee(None) => self + .committee + .values() + .choose(&mut rand::thread_rng()) + .cloned() + .into(), + KeySource::Named(name, Some(i)) => self + .accounts + .get(name) + .and_then(|a| a.get_index(*i).map(|(_, pk)| pk.clone())) + .into(), + KeySource::Named(name, None) => self + .accounts + .get(name) + .and_then(|a| a.values().choose(&mut rand::thread_rng()).cloned()) + .into(), + } + } + + pub fn sample_keysource_addr(&self, key: &KeySource) -> KeyState { + match key { + KeySource::Local => KeyState::Local, + KeySource::PrivateKeyLiteral(_) => KeyState::None, + KeySource::PublicKeyLiteral(addr) => KeyState::Literal(addr.clone()), + KeySource::ProgramLiteral(addr) => KeyState::Literal(addr.clone()), + KeySource::Committee(Some(i)) => self + .committee + .get_index(*i) + .map(|(addr, _)| addr.clone()) + .into(), + KeySource::Committee(None) => self + .committee + .keys() + .choose(&mut rand::thread_rng()) + .cloned() + .into(), + KeySource::Named(name, Some(i)) => self + .accounts + .get(name) + .and_then(|a| a.get_index(*i).map(|(addr, _)| addr.clone())) + .into(), + KeySource::Named(name, None) => self + .accounts + .get(name) + .and_then(|a| a.keys().choose(&mut rand::thread_rng()).cloned()) + .into(), + } + } + + pub fn info(&self) -> StorageInfo { + let mut binaries: IndexMap<_, _> = self + .binaries + .iter() + .map(|(k, v)| (*k, v.with_api_path(self.network, self.id, *k))) + .collect(); + + // insert the default binary source information (so agents have a way to compare + // shasums and file size) + binaries + .entry(InternedId::default()) + .or_insert(DEFAULT_AOT_BINARY.with_api_path( + self.network, + self.id, + InternedId::default(), + )); + + StorageInfo { + id: self.id, + version: self.version, + retention_policy: self.retention_policy.clone(), + persist: self.persist, + native_genesis: self.native_genesis, + binaries, + } + } + + pub fn path(&self, state: &GlobalState) -> PathBuf { + self.path_cli(&state.cli) + } + + pub fn path_cli(&self, cli: &Cli) -> PathBuf { + let mut path = cli.path.join(STORAGE_DIR); + path.push(self.network.to_string()); + path.push(self.id.to_string()); + path + } + + /// Resolve the default binary for this storage + pub async fn resolve_default_binary( + &self, + state: &GlobalState, + ) -> Result { + self.resolve_binary(state, InternedId::default()).await + } + + /// Resolve the compute binary for this storage + pub async fn resolve_compute_binary( + &self, + state: &GlobalState, + ) -> Result { + self.resolve_binary(state, InternedId::compute_id()).await + } + + /// Resolve (find and download) a binary for this storage by id + pub async fn resolve_binary( + &self, + state: &GlobalState, + id: InternedId, + ) -> Result { + Self::resolve_binary_from_map(self.id, self.network, &self.binaries, state, id).await + } + + /// Resolve a binary entry for this storage by id + pub fn resolve_binary_entry( + &self, + id: InternedId, + ) -> Result<(InternedId, &BinaryEntry), StorageError> { + Self::resolve_binary_entry_from_map(self.id, &self.binaries, id) + } + + pub fn resolve_binary_entry_from_map( + storage_id: InternedId, + binaries: &IndexMap, + mut id: InternedId, + ) -> Result<(InternedId, &BinaryEntry), StorageError> { + let compute_id = InternedId::compute_id(); + + // if the binary id is "compute" and there is no "compute" binary override in + // the map, then we should use the default binary + if id == compute_id && !binaries.contains_key(&compute_id) { + id = InternedId::default(); + } + + // if the binary id is the default binary id and there is no default binary + // override in the map, + if id == InternedId::default() && !binaries.contains_key(&InternedId::default()) { + // then we should use the default AOT binary + return Ok((id, &DEFAULT_AOT_BINARY)); + } + + let bin = binaries + .get(&id) + .ok_or(StorageError::BinaryDoesNotExist(id, storage_id))?; + + Ok((id, bin)) + } + + pub async fn resolve_binary_from_map( + storage_id: InternedId, + network: NetworkId, + binaries: &IndexMap, + state: &GlobalState, + id: InternedId, + ) -> Result { + let (id, bin) = Self::resolve_binary_entry_from_map(storage_id, binaries, id)?; + + let id_str: &str = id.as_ref(); + + let remote_url = match bin.source.clone() { + // if the binary is a relative path, then we should use the path as is + // rather than downloading it + BinarySource::Path(path) => return Ok(path.clone()), + BinarySource::Url(url) => url, + }; + + // derive the path to the binary + let mut download_path = state.cli.path.join(STORAGE_DIR); + download_path.push(network.to_string()); + download_path.push(storage_id.to_string()); + download_path.push("binaries"); + download_path.push(id_str); + + // if the file already exists, ensure that it is the correct size and sha256 + if download_path.exists() { + let perms = download_path + .metadata() + .map_err(|e| StorageError::PermissionError(download_path.clone(), e))? + .permissions(); + if perms.mode() != 0o755 { + std::fs::set_permissions(&download_path, std::fs::Permissions::from_mode(0o755)) + .map_err(|e| StorageError::PermissionError(download_path.clone(), e))?; + } + + match bin.check_file_sha256(&download_path) { + Ok(None) => {} + Ok(Some(sha256)) => { + return Err(StorageError::BinarySha256Mismatch( + storage_id, + download_path, + bin.sha256.clone().unwrap_or_default(), + sha256, + )); + } + Err(e) => { + return Err(StorageError::BinaryCheckFailed( + storage_id, + download_path, + e.to_string(), + )); + } + } + + match bin.check_file_size(&download_path) { + // file is okay :) + Ok(None) => {} + Ok(Some(size)) => { + return Err(StorageError::BinarySizeMismatch( + storage_id, + download_path, + bin.size.unwrap_or_default(), + size, + )); + } + Err(e) => { + return Err(StorageError::BinaryCheckFailed( + storage_id, + download_path, + e.to_string(), + )); + } + } + + return Ok(download_path); + } + + let resp = reqwest::get(remote_url.clone()) + .await + .map_err(|e| StorageError::FailedToFetchBinary(id, remote_url.clone(), e))?; + + if resp.status() != reqwest::StatusCode::OK { + return Err(StorageError::FailedToFetchBinaryWithStatus( + id, + remote_url, + resp.status(), + )); + } + + if let Some(parent) = download_path.parent() { + fs::create_dir_all(parent) + .map_err(|e| StorageError::FailedToCreateBinaryFile(id, e))?; + } + + let mut file = std::fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(&download_path) + .map_err(|e| StorageError::FailedToCreateBinaryFile(id, e))?; + + let mut digest = Sha256::new(); + let mut stream = resp.bytes_stream(); + let mut size = 0u64; + + while let Some(chunk) = stream.next().await { + match chunk { + Ok(chunk) => { + size += chunk.len() as u64; + file.write_all(&chunk) + .map_err(|e| StorageError::FailedToWriteBinaryFile(id, e))?; + digest.update(&chunk); + } + Err(e) => { + return Err(StorageError::FailedToFetchBinary(id, remote_url, e)); + } + } + } + + // check if the binary sha256 matches the expected sha256 + let sha256 = format!("{:x}", digest.finalize()); + if let Some(bin_sha256) = bin.sha256.as_ref() { + if bin_sha256.to_lowercase() != sha256 { + return Err(StorageError::BinarySha256Mismatch( + id, + download_path, + bin_sha256.clone(), + sha256, + )); + } + } + + // check if the binary size matches the expected size + if let Some(bin_size) = bin.size { + if bin_size != size { + return Err(StorageError::BinarySizeMismatch( + id, + download_path, + bin_size, + size, + )); + } + } + + info!( + "downloaded binary {storage_id}.{id_str} to {} ({size} bytes)", + download_path.display() + ); + trace!("binary {storage_id}.{id_str} has sha256 {sha256}"); + + let perms = download_path + .metadata() + .map_err(|e| StorageError::PermissionError(download_path.clone(), e))? + .permissions(); + if perms.mode() != 0o755 { + std::fs::set_permissions(&download_path, std::fs::Permissions::from_mode(0o755)) + .map_err(|e| StorageError::PermissionError(download_path.clone(), e))?; + } + + Ok(download_path) + } +} diff --git a/crates/controlplane/src/apply/mod.rs b/crates/controlplane/src/apply/mod.rs new file mode 100644 index 00000000..c197d54a --- /dev/null +++ b/crates/controlplane/src/apply/mod.rs @@ -0,0 +1,5 @@ +pub mod error; +mod loaded_storage; +pub mod storage_helpers; + +pub use loaded_storage::*; diff --git a/crates/controlplane/src/schema/storage/helpers.rs b/crates/controlplane/src/apply/storage_helpers.rs similarity index 95% rename from crates/controlplane/src/schema/storage/helpers.rs rename to crates/controlplane/src/apply/storage_helpers.rs index a14dbb12..1ea647c5 100644 --- a/crates/controlplane/src/schema/storage/helpers.rs +++ b/crates/controlplane/src/apply/storage_helpers.rs @@ -3,8 +3,26 @@ use std::path::PathBuf; use indexmap::IndexMap; use serde::de::DeserializeOwned; -use super::AleoAddrMap; -use crate::schema::error::StorageError; +use super::{error::StorageError, AleoAddrMap}; + +// TODO: function should also take storage id +// in case of error, the storage id can be used to provide more context +pub async fn read_to_addrs( + f: impl Fn(T) -> String, + file: &PathBuf, +) -> Result { + if !file.exists() { + return Ok(Default::default()); + } + + let data = tokio::fs::read_to_string(file) + .await + .map_err(|e| StorageError::ReadBalances(file.clone(), e))?; + let parsed: IndexMap = + serde_json::from_str(&data).map_err(|e| StorageError::ParseBalances(file.clone(), e))?; + + Ok(parsed.into_iter().map(|(k, v)| (k, f(v))).collect()) +} pub async fn get_version_from_path(path: &PathBuf) -> Result, StorageError> { if !path.exists() { @@ -27,22 +45,3 @@ pub fn pick_commitee_addr(entry: (String, u64)) -> String { pub fn pick_account_addr(entry: String) -> String { entry } - -// TODO: function should also take storage id -// in case of error, the storage id can be used to provide more context -pub async fn read_to_addrs( - f: impl Fn(T) -> String, - file: &PathBuf, -) -> Result { - if !file.exists() { - return Ok(Default::default()); - } - - let data = tokio::fs::read_to_string(file) - .await - .map_err(|e| StorageError::ReadBalances(file.clone(), e))?; - let parsed: IndexMap = - serde_json::from_str(&data).map_err(|e| StorageError::ParseBalances(file.clone(), e))?; - - Ok(parsed.into_iter().map(|(k, v)| (k, f(v))).collect()) -} diff --git a/crates/controlplane/src/cannon/context.rs b/crates/controlplane/src/cannon/context.rs index 13833bc8..b11bc488 100644 --- a/crates/controlplane/src/cannon/context.rs +++ b/crates/controlplane/src/cannon/context.rs @@ -6,6 +6,10 @@ use futures_util::{stream::FuturesUnordered, StreamExt}; use lazysort::SortedBy; use snops_common::{ events::{Event, TransactionAbortReason, TransactionEvent}, + schema::cannon::{ + sink::TxSink, + source::{ComputeTarget, TxSource}, + }, state::{AgentId, Authorization, CannonId, EnvId, NetworkId, TransactionSendState}, }; use tracing::{error, trace, warn}; @@ -13,15 +17,11 @@ use tracing::{error, trace, warn}; use super::{ error::{CannonError, ExecutionContextError, SourceError}, file::TransactionSink, - sink::TxSink, - source::TxSource, + source::ExecuteAuth, tracker::TransactionTracker, CannonReceivers, }; -use crate::{ - cannon::source::ComputeTarget, - state::{EmitEvent, GetGlobalState, GlobalState, REST_CLIENT}, -}; +use crate::state::{EmitEvent, GetGlobalState, GlobalState, REST_CLIENT}; /// Information a transaction cannon needs for execution via spawned task pub struct ExecutionContext { diff --git a/crates/controlplane/src/cannon/mod.rs b/crates/controlplane/src/cannon/mod.rs index f70b04ee..a216ec98 100644 --- a/crates/controlplane/src/cannon/mod.rs +++ b/crates/controlplane/src/cannon/mod.rs @@ -3,7 +3,6 @@ pub mod error; pub mod file; mod net; pub mod router; -pub mod sink; pub mod source; pub mod tracker; @@ -20,8 +19,13 @@ use dashmap::DashMap; use snops_common::{ aot_cmds::AotCmd, format::PackedUint, + schema::cannon::{ + sink::TxSink, + source::{QueryTarget, TxSource}, + }, state::{Authorization, CannonId, EnvId, NetworkId, StorageId, TransactionSendState}, }; +use source::{GetQueryPort, GetStateRoot}; use tokio::{ sync::{ mpsc::{UnboundedReceiver, UnboundedSender}, @@ -32,12 +36,8 @@ use tokio::{ use tracing::{error, trace, warn}; use tracker::TransactionTracker; -use self::{ - error::{CannonError, CannonInstanceError}, - sink::TxSink, - source::TxSource, -}; -use crate::{cannon::source::QueryTarget, state::GlobalState}; +use self::error::{CannonError, CannonInstanceError}; +use crate::state::GlobalState; /* diff --git a/crates/controlplane/src/cannon/router.rs b/crates/controlplane/src/cannon/router.rs index 14cf8e96..80054ec1 100644 --- a/crates/controlplane/src/cannon/router.rs +++ b/crates/controlplane/src/cannon/router.rs @@ -11,10 +11,10 @@ use serde::Deserialize; use serde_json::json; use snops_common::{ key_source::KeySource, + schema::cannon::source::QueryTarget, state::{id_or_none, Authorization, KeyState, NetworkId}, }; -use super::source::QueryTarget; use crate::{ server::{actions::execute::execute_status, error::ServerError}, state::AppState, diff --git a/crates/controlplane/src/cannon/source.rs b/crates/controlplane/src/cannon/source.rs index 21878b0a..f925151e 100644 --- a/crates/controlplane/src/cannon/source.rs +++ b/crates/controlplane/src/cannon/source.rs @@ -1,11 +1,11 @@ use std::sync::Arc; use chrono::Utc; -use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; use snops_common::events::{EventHelpers, TransactionEvent}; +use snops_common::schema::cannon::source::{ComputeTarget, LocalService, QueryTarget, TxSource}; +use snops_common::state::NetworkId; use snops_common::state::{Authorization, TransactionSendState}; -use snops_common::{lasso::Spur, node_targets::NodeTargets, state::NetworkId, INTERN}; use tracing::error; use super::context::CtxEventHelper; @@ -18,31 +18,19 @@ use super::{ use crate::env::set::find_compute_agent; use crate::state::EmitEvent; -/// Represents an instance of a local query service. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct LocalService { - // TODO debate this - /// An optional node to sync blocks from... - /// necessary for private tx mode in realtime mode as this will have to - /// sync from a node that has a valid ledger - /// - /// When present, the cannon will update the ledger service from this node - /// if the node is out of sync, it will corrupt the ledger... - /// - /// requires cannon to have an associated env_id - #[serde(default, skip_serializing_if = "Option::is_none")] - pub sync_from: Option, +pub trait GetStateRoot { + fn get_state_root( + &self, + network: NetworkId, + port: u16, + ) -> impl std::future::Future>; } -impl LocalService { +impl GetStateRoot for LocalService { // TODO: cache this when sync_from is false /// Fetch the state root from the local query service /// (non-cached) - pub async fn get_state_root( - &self, - network: NetworkId, - port: u16, - ) -> Result { + async fn get_state_root(&self, network: NetworkId, port: u16) -> Result { let url = format!("http://127.0.0.1:{port}/{network}/latest/stateRoot"); let response = reqwest::get(&url) .await @@ -54,92 +42,13 @@ impl LocalService { } } -/// Used to determine the redirection for the following paths: -/// /cannon///latest/stateRoot -/// /cannon///transaction/broadcast -#[derive(Clone, Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case", untagged)] -pub enum QueryTarget { - /// Target a specific node (probably over rpc instead of reqwest lol...) - /// - /// Requires cannon to have an associated env_id - Node(NodeTargets), - /// Use the local ledger query service - Local(LocalService), -} - -impl Default for QueryTarget { - fn default() -> Self { - QueryTarget::Local(LocalService { sync_from: None }) - } -} - -fn deser_labels<'de, D>(deser: D) -> Result>, D::Error> -where - D: serde::Deserializer<'de>, -{ - Ok(Option::>::deserialize(deser)?.map(|s| { - s.into_iter() - .map(|s| INTERN.get_or_intern(s)) - .collect::>() - })) -} - -fn ser_labels(labels: &Option>, ser: S) -> Result -where - S: serde::Serializer, -{ - match labels { - Some(labels) => { - let labels = labels - .iter() - .map(|s| INTERN.resolve(s)) - .collect::>(); - serde::Serialize::serialize(&labels, ser) - } - None => serde::Serialize::serialize(&None::, ser), - } -} - -/// Which service is providing the compute power for executing transactions -#[derive(Clone, Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case", untagged)] -pub enum ComputeTarget { - /// Use the agent pool to generate executions - Agent { - #[serde( - default, - deserialize_with = "deser_labels", - serialize_with = "ser_labels", - skip_serializing_if = "Option::is_none" - )] - labels: Option>, - }, - /// Use demox' API to generate executions - #[serde(rename_all = "kebab-case")] - Demox { demox_api: String }, -} - -impl Default for ComputeTarget { - fn default() -> Self { - ComputeTarget::Agent { labels: None } - } -} - -#[derive(Clone, Debug, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub struct TxSource { - /// Receive authorizations from a persistent path - /// /api/v1/env/:env_id/cannons/:id/auth - #[serde(default)] - pub query: QueryTarget, - #[serde(default)] - pub compute: ComputeTarget, +pub trait GetQueryPort { + fn get_query_port(&self) -> Result, CannonError>; } -impl TxSource { +impl GetQueryPort for TxSource { /// Get an available port for the query service if applicable - pub fn get_query_port(&self) -> Result, CannonError> { + fn get_query_port(&self) -> Result, CannonError> { if !matches!(self.query, QueryTarget::Local(_)) { return Ok(None); } @@ -149,13 +58,24 @@ impl TxSource { } } -impl ComputeTarget { - pub async fn execute( +pub trait ExecuteAuth { + /// Execute the authorization and emit it to the transaction tracker + fn execute( &self, ctx: &ExecutionContext, query_path: &str, tx_id: &Arc, auth: &Authorization, + ) -> impl std::future::Future>; +} + +impl ExecuteAuth for ComputeTarget { + async fn execute( + self: &ComputeTarget, + ctx: &ExecutionContext, + query_path: &str, + tx_id: &Arc, + auth: &Authorization, ) -> Result<(), CannonError> { match self { ComputeTarget::Agent { labels } => { diff --git a/crates/controlplane/src/env/error.rs b/crates/controlplane/src/env/error.rs index a97dfa98..e443ccbb 100644 --- a/crates/controlplane/src/env/error.rs +++ b/crates/controlplane/src/env/error.rs @@ -11,8 +11,8 @@ use thiserror::Error; use tokio::task::JoinError; use crate::{ + apply::error::{SchemaError, StorageError}, cannon::error::{AuthorizeError, CannonError}, - schema::error::{SchemaError, StorageError}, }; #[derive(Debug, Error, AsRefStr)] diff --git a/crates/controlplane/src/env/mod.rs b/crates/controlplane/src/env/mod.rs index 3da59028..550681e1 100644 --- a/crates/controlplane/src/env/mod.rs +++ b/crates/controlplane/src/env/mod.rs @@ -8,10 +8,18 @@ use bimap::BiMap; use dashmap::DashMap; use futures_util::future::join_all; use indexmap::{map::Entry, IndexMap, IndexSet}; -use serde::{Deserialize, Serialize}; +use serde::Serialize; use snops_common::{ api::{AgentEnvInfo, EnvInfo}, node_targets::NodeTargets, + schema::{ + cannon::{ + sink::TxSink, + source::{ComputeTarget, QueryTarget, TxSource}, + }, + nodes::{ExternalNode, Node}, + ItemDocument, + }, state::{ AgentId, AgentPeer, AgentState, CannonId, EnvId, NetworkId, NodeKey, NodeState, ReconcileOptions, TxPipeId, @@ -22,20 +30,10 @@ use tracing::{error, info, trace, warn}; use self::error::*; use crate::{ - cannon::{ - file::TransactionSink, - sink::TxSink, - source::{ComputeTarget, QueryTarget, TxSource}, - CannonInstance, CannonInstanceMeta, - }, + apply::LoadedStorage, + cannon::{file::TransactionSink, CannonInstance, CannonInstanceMeta}, env::set::{get_agent_mappings, labels_from_nodes, pair_with_nodes, AgentMapping, BusyMode}, - error::DeserializeError, persist::PersistEnv, - schema::{ - nodes::{ExternalNode, Node}, - storage::LoadedStorage, - ItemDocument, - }, state::{Agent, GlobalState}, }; @@ -92,22 +90,6 @@ pub enum PortType { } impl Environment { - /// Deserialize (YAML) many documents into a `Vec` of documents. - pub fn deserialize(str: &str) -> Result, DeserializeError> { - serde_yaml::Deserializer::from_str(str) - .enumerate() - .map(|(i, doc)| ItemDocument::deserialize(doc).map_err(|e| DeserializeError { i, e })) - .collect() - } - - /// Deserialize (YAML) many documents into a `Vec` of documents. - pub fn deserialize_bytes(str: &[u8]) -> Result, DeserializeError> { - serde_yaml::Deserializer::from_slice(str) - .enumerate() - .map(|(i, doc)| ItemDocument::deserialize(doc).map_err(|e| DeserializeError { i, e })) - .collect() - } - /// Apply an environment spec. This will attempt to delegate the given node /// configurations to available agents, or update existing agents with new /// configurations. @@ -347,10 +329,12 @@ impl Environment { // prepare the storage after all the other documents // as it depends on the network id - let storage = storage_doc - .ok_or(PrepareError::MissingStorage)? - .prepare(&state, network) - .await?; + let storage = LoadedStorage::from_doc( + *storage_doc.ok_or(PrepareError::MissingStorage)?, + &state, + network, + ) + .await?; let storage_id = storage.id; diff --git a/crates/controlplane/src/error.rs b/crates/controlplane/src/error.rs index 89ad73f2..7ab71f0b 100644 --- a/crates/controlplane/src/error.rs +++ b/crates/controlplane/src/error.rs @@ -4,16 +4,6 @@ use snops_common::{impl_into_status_code, impl_into_type_str}; use strum_macros::AsRefStr; use thiserror::Error; -#[derive(Debug, Error)] -#[error("`{i}`: `{e}`")] -pub struct DeserializeError { - pub i: usize, - #[source] - pub e: serde_yaml::Error, -} - -impl_into_status_code!(DeserializeError); - #[derive(Debug, Error, AsRefStr)] pub enum StateError { #[error(transparent)] diff --git a/crates/controlplane/src/main.rs b/crates/controlplane/src/main.rs index 6f4a65ba..ba7a3c57 100644 --- a/crates/controlplane/src/main.rs +++ b/crates/controlplane/src/main.rs @@ -3,14 +3,17 @@ use std::{io, net::SocketAddr, sync::Arc}; use clap::Parser; use cli::Cli; use prometheus_http_query::Client as PrometheusClient; -use schema::storage::{DEFAULT_AGENT_BINARY, DEFAULT_AOT_BINARY}; -use snops_common::db::Database; +use snops_common::{ + db::Database, + schema::storage::{DEFAULT_AGENT_BINARY, DEFAULT_AOT_BINARY}, +}; use state::GlobalState; use tokio::select; use tracing::{error, info, level_filters::LevelFilter, trace}; use tracing_subscriber::{prelude::*, reload, EnvFilter}; pub mod agent_version; +pub mod apply; pub mod cannon; pub mod cli; pub mod db; @@ -19,7 +22,6 @@ pub mod error; pub mod events; pub mod logging; pub mod persist; -pub mod schema; pub mod server; pub mod state; diff --git a/crates/controlplane/src/persist/env.rs b/crates/controlplane/src/persist/env.rs index 4c85889c..5c99a277 100644 --- a/crates/controlplane/src/persist/env.rs +++ b/crates/controlplane/src/persist/env.rs @@ -2,14 +2,16 @@ use std::sync::Arc; use bimap::BiMap; use dashmap::DashMap; -use snops_common::state::TransactionSendState; -use snops_common::state::{CannonId, EnvId, NetworkId, NodeKey, StorageId}; +use snops_common::{ + schema::cannon::{sink::TxSink, source::TxSource}, + state::{CannonId, EnvId, NetworkId, NodeKey, StorageId, TransactionSendState}, +}; use tokio::sync::Semaphore; use super::prelude::*; use super::PersistNode; use crate::{ - cannon::{sink::TxSink, source::TxSource, tracker::TransactionTracker}, + cannon::tracker::TransactionTracker, env::{ error::{EnvError, PrepareError}, prepare_cannons, EnvNodeState, EnvPeer, Environment, @@ -249,15 +251,15 @@ mod tests { use snops_common::{ format::{read_dataformat, write_dataformat, DataFormat}, + schema::{ + cannon::{sink::TxSink, source::TxSource}, + persist::{TxSinkFormatHeader, TxSourceFormatHeader}, + }, state::{InternedId, NetworkId}, }; - use crate::{ - cannon::{sink::TxSink, source::TxSource}, - persist::{ - PersistEnv, PersistEnvFormatHeader, PersistNode, PersistNodeFormatHeader, - TxSinkFormatHeader, TxSourceFormatHeader, - }, + use crate::persist::{ + PersistEnv, PersistEnvFormatHeader, PersistNode, PersistNodeFormatHeader, }; macro_rules! case { diff --git a/crates/controlplane/src/persist/mod.rs b/crates/controlplane/src/persist/mod.rs index 0b2d2db9..79e4b813 100644 --- a/crates/controlplane/src/persist/mod.rs +++ b/crates/controlplane/src/persist/mod.rs @@ -1,15 +1,11 @@ mod agent; mod env; mod node; -mod sink; -mod source; mod storage; pub use agent::*; pub use env::*; pub use node::*; -pub use sink::*; -pub use source::*; pub use storage::*; pub(crate) mod prelude { diff --git a/crates/controlplane/src/persist/node.rs b/crates/controlplane/src/persist/node.rs index 5cd638bc..b25ddfec 100644 --- a/crates/controlplane/src/persist/node.rs +++ b/crates/controlplane/src/persist/node.rs @@ -1,7 +1,12 @@ -use snops_common::state::AgentId; +use snops_common::{ + schema::{ + nodes::{ExternalNode, Node}, + persist::NodeFormatHeader, + }, + state::AgentId, +}; use super::prelude::*; -use crate::schema::nodes::{ExternalNode, Node, NodeFormatHeader}; #[derive(Debug, Clone)] pub struct PersistNodeFormatHeader { @@ -90,13 +95,14 @@ mod tests { use snops_common::{ format::DataFormat, node_targets::NodeTargets, + schema::{ + nodes::{ExternalNode, Node}, + persist::NodeFormatHeader, + }, state::{HeightRequest, InternedId}, }; - use crate::{ - persist::{PersistNode, PersistNodeFormatHeader}, - schema::nodes::{ExternalNode, Node, NodeFormatHeader}, - }; + use crate::persist::{PersistNode, PersistNodeFormatHeader}; macro_rules! case { ($name:ident, $ty:ty, $a:expr, $b:expr) => { diff --git a/crates/controlplane/src/persist/storage.rs b/crates/controlplane/src/persist/storage.rs index a3df3f37..29106e61 100644 --- a/crates/controlplane/src/persist/storage.rs +++ b/crates/controlplane/src/persist/storage.rs @@ -3,20 +3,21 @@ use snops_checkpoint::RetentionPolicy; use snops_common::{ binaries::BinaryEntry, key_source::ACCOUNTS_KEY_ID, + schema::storage::STORAGE_DIR, state::{InternedId, NetworkId, StorageId}, }; use tracing::warn; use super::prelude::*; use crate::{ - cli::Cli, - schema::{ + apply::{ error::StorageError, - storage::{ + storage_helpers::{ pick_account_addr, pick_additional_addr, pick_commitee_addr, read_to_addrs, - LoadedStorage, STORAGE_DIR, }, + LoadedStorage, }, + cli::Cli, }; /// Metadata for storage that can be used to restore a loaded storage diff --git a/crates/controlplane/src/schema/cannon.rs b/crates/controlplane/src/schema/cannon.rs deleted file mode 100644 index ae963f33..00000000 --- a/crates/controlplane/src/schema/cannon.rs +++ /dev/null @@ -1,14 +0,0 @@ -use serde::Deserialize; -use snops_common::state::CannonId; - -use crate::cannon::{sink::TxSink, source::TxSource}; - -/// A document describing the node infrastructure for a test. -#[derive(Deserialize, Debug, Clone)] -pub struct Document { - pub name: CannonId, - pub description: Option, - - pub source: TxSource, - pub sink: TxSink, -} diff --git a/crates/controlplane/src/schema/infrastructure.rs b/crates/controlplane/src/schema/infrastructure.rs deleted file mode 100644 index 2efafc50..00000000 --- a/crates/controlplane/src/schema/infrastructure.rs +++ /dev/null @@ -1,9 +0,0 @@ -use serde::Deserialize; - -// TODO - -/// A document describing a test's infrastructure. -#[derive(Deserialize, Debug, Clone)] -pub struct Document { - // TODO -} diff --git a/crates/controlplane/src/schema/mod.rs b/crates/controlplane/src/schema/mod.rs deleted file mode 100644 index f0aa38a1..00000000 --- a/crates/controlplane/src/schema/mod.rs +++ /dev/null @@ -1,55 +0,0 @@ -use serde::Deserialize; -use snops_common::state::NodeKey; - -pub mod cannon; -pub mod error; -pub mod infrastructure; -pub mod nodes; -pub mod outcomes; -pub mod storage; - -// TODO: Considerations: -// TODO: - Generate json schema with https://docs.rs/schemars/latest/schemars/ -// TODO: - Do these types need to implement `Serialize`? - -/// A document representing all item types. -#[derive(Deserialize, Debug, Clone)] -#[serde(tag = "version")] -#[non_exhaustive] -pub enum ItemDocument { - #[serde(rename = "storage.snarkos.testing.monadic.us/v1")] - Storage(Box), - - #[serde(rename = "nodes.snarkos.testing.monadic.us/v1")] - Nodes(Box), - - #[serde(rename = "infrastructure.snarkos.testing.monadic.us/v1")] - Infrastructure(Box), - - #[serde(rename = "cannon.snarkos.testing.monadic.us/v1")] - Cannon(Box), -} - -#[cfg(test)] -mod test { - use crate::env::Environment; - - #[test] - fn deserialize_specs() { - for entry in std::fs::read_dir("../../specs") - .expect("failed to read specs dir") - .map(Result::unwrap) - { - let file_name = entry.file_name(); - let name = file_name.to_str().expect("failed to read spec file name"); - if !name.ends_with(".yaml") && !name.ends_with(".yml") { - continue; - } - - let data = std::fs::read(entry.path()).expect("failed to read spec file"); - if let Err(e) = Environment::deserialize_bytes(&data) { - panic!("failed to deserialize spec file {name}: {e}") - } - } - } -} diff --git a/crates/controlplane/src/schema/storage/loaded.rs b/crates/controlplane/src/schema/storage/loaded.rs deleted file mode 100644 index 8a2b3991..00000000 --- a/crates/controlplane/src/schema/storage/loaded.rs +++ /dev/null @@ -1,408 +0,0 @@ -use std::{fs, io::Write, os::unix::fs::PermissionsExt, path::PathBuf}; - -use futures_util::StreamExt; -use indexmap::IndexMap; -use rand::seq::IteratorRandom; -use sha2::{Digest, Sha256}; -use snops_checkpoint::RetentionPolicy; -use snops_common::{ - api::StorageInfo, - binaries::{BinaryEntry, BinarySource}, - key_source::KeySource, - state::{InternedId, KeyState, NetworkId, StorageId}, -}; -use tracing::{info, trace}; - -use super::{DEFAULT_AOT_BINARY, STORAGE_DIR}; -use crate::{cli::Cli, schema::error::StorageError, state::GlobalState}; - -// IndexMap -pub type AleoAddrMap = IndexMap; - -#[derive(Debug, Clone)] -pub struct LoadedStorage { - /// Storage ID - pub id: StorageId, - /// Network ID - pub network: NetworkId, - /// Version counter for this storage - incrementing will invalidate old - /// saved ledgers - pub version: u16, - /// committee lookup - pub committee: AleoAddrMap, - /// other accounts files lookup - pub accounts: IndexMap, - /// storage of checkpoints - pub retention_policy: Option, - /// whether agents using this storage should persist it - pub persist: bool, - /// whether to use the network's native genesis block - pub native_genesis: bool, - /// binaries available for this storage - pub binaries: IndexMap, -} - -impl LoadedStorage { - pub fn lookup_keysource_pk(&self, key: &KeySource) -> KeyState { - match key { - KeySource::Local => KeyState::Local, - KeySource::PrivateKeyLiteral(pk) => KeyState::Literal(pk.clone()), - KeySource::PublicKeyLiteral(_) => KeyState::None, - KeySource::ProgramLiteral(_) => KeyState::None, - KeySource::Committee(Some(i)) => self - .committee - .get_index(*i) - .map(|(_, pk)| pk.clone()) - .into(), - KeySource::Committee(None) => KeyState::None, - KeySource::Named(name, Some(i)) => self - .accounts - .get(name) - .and_then(|a| a.get_index(*i).map(|(_, pk)| pk.clone())) - .into(), - KeySource::Named(_name, None) => KeyState::None, - } - } - - pub fn lookup_keysource_addr(&self, key: &KeySource) -> KeyState { - match key { - KeySource::Local => KeyState::Local, - KeySource::PrivateKeyLiteral(_) => KeyState::None, - KeySource::PublicKeyLiteral(addr) => KeyState::Literal(addr.clone()), - KeySource::ProgramLiteral(addr) => KeyState::Literal(addr.clone()), - KeySource::Committee(Some(i)) => self - .committee - .get_index(*i) - .map(|(addr, _)| addr.clone()) - .into(), - KeySource::Committee(None) => KeyState::None, - KeySource::Named(name, Some(i)) => self - .accounts - .get(name) - .and_then(|a| a.get_index(*i).map(|(addr, _)| addr.clone())) - .into(), - KeySource::Named(_name, None) => KeyState::None, - } - } - - pub fn sample_keysource_pk(&self, key: &KeySource) -> KeyState { - match key { - KeySource::Local => KeyState::Local, - KeySource::PrivateKeyLiteral(pk) => KeyState::Literal(pk.clone()), - KeySource::PublicKeyLiteral(_) => KeyState::None, - KeySource::ProgramLiteral(_) => KeyState::None, - KeySource::Committee(Some(i)) => self - .committee - .get_index(*i) - .map(|(_, pk)| pk.clone()) - .into(), - KeySource::Committee(None) => self - .committee - .values() - .choose(&mut rand::thread_rng()) - .cloned() - .into(), - KeySource::Named(name, Some(i)) => self - .accounts - .get(name) - .and_then(|a| a.get_index(*i).map(|(_, pk)| pk.clone())) - .into(), - KeySource::Named(name, None) => self - .accounts - .get(name) - .and_then(|a| a.values().choose(&mut rand::thread_rng()).cloned()) - .into(), - } - } - - pub fn sample_keysource_addr(&self, key: &KeySource) -> KeyState { - match key { - KeySource::Local => KeyState::Local, - KeySource::PrivateKeyLiteral(_) => KeyState::None, - KeySource::PublicKeyLiteral(addr) => KeyState::Literal(addr.clone()), - KeySource::ProgramLiteral(addr) => KeyState::Literal(addr.clone()), - KeySource::Committee(Some(i)) => self - .committee - .get_index(*i) - .map(|(addr, _)| addr.clone()) - .into(), - KeySource::Committee(None) => self - .committee - .keys() - .choose(&mut rand::thread_rng()) - .cloned() - .into(), - KeySource::Named(name, Some(i)) => self - .accounts - .get(name) - .and_then(|a| a.get_index(*i).map(|(addr, _)| addr.clone())) - .into(), - KeySource::Named(name, None) => self - .accounts - .get(name) - .and_then(|a| a.keys().choose(&mut rand::thread_rng()).cloned()) - .into(), - } - } - - pub fn info(&self) -> StorageInfo { - let mut binaries: IndexMap<_, _> = self - .binaries - .iter() - .map(|(k, v)| (*k, v.with_api_path(self.network, self.id, *k))) - .collect(); - - // insert the default binary source information (so agents have a way to compare - // shasums and file size) - binaries - .entry(InternedId::default()) - .or_insert(DEFAULT_AOT_BINARY.with_api_path( - self.network, - self.id, - InternedId::default(), - )); - - StorageInfo { - id: self.id, - version: self.version, - retention_policy: self.retention_policy.clone(), - persist: self.persist, - native_genesis: self.native_genesis, - binaries, - } - } - - pub fn path(&self, state: &GlobalState) -> PathBuf { - self.path_cli(&state.cli) - } - - pub fn path_cli(&self, cli: &Cli) -> PathBuf { - let mut path = cli.path.join(STORAGE_DIR); - path.push(self.network.to_string()); - path.push(self.id.to_string()); - path - } - - /// Resolve the default binary for this storage - pub async fn resolve_default_binary( - &self, - state: &GlobalState, - ) -> Result { - self.resolve_binary(state, InternedId::default()).await - } - - /// Resolve the compute binary for this storage - pub async fn resolve_compute_binary( - &self, - state: &GlobalState, - ) -> Result { - self.resolve_binary(state, InternedId::compute_id()).await - } - - /// Resolve (find and download) a binary for this storage by id - pub async fn resolve_binary( - &self, - state: &GlobalState, - id: InternedId, - ) -> Result { - Self::resolve_binary_from_map(self.id, self.network, &self.binaries, state, id).await - } - - /// Resolve a binary entry for this storage by id - pub fn resolve_binary_entry( - &self, - id: InternedId, - ) -> Result<(InternedId, &BinaryEntry), StorageError> { - Self::resolve_binary_entry_from_map(self.id, &self.binaries, id) - } - - pub fn resolve_binary_entry_from_map( - storage_id: InternedId, - binaries: &IndexMap, - mut id: InternedId, - ) -> Result<(InternedId, &BinaryEntry), StorageError> { - let compute_id = InternedId::compute_id(); - - // if the binary id is "compute" and there is no "compute" binary override in - // the map, then we should use the default binary - if id == compute_id && !binaries.contains_key(&compute_id) { - id = InternedId::default(); - } - - // if the binary id is the default binary id and there is no default binary - // override in the map, - if id == InternedId::default() && !binaries.contains_key(&InternedId::default()) { - // then we should use the default AOT binary - return Ok((id, &DEFAULT_AOT_BINARY)); - } - - let bin = binaries - .get(&id) - .ok_or(StorageError::BinaryDoesNotExist(id, storage_id))?; - - Ok((id, bin)) - } - - pub async fn resolve_binary_from_map( - storage_id: InternedId, - network: NetworkId, - binaries: &IndexMap, - state: &GlobalState, - id: InternedId, - ) -> Result { - let (id, bin) = Self::resolve_binary_entry_from_map(storage_id, binaries, id)?; - - let id_str: &str = id.as_ref(); - - let remote_url = match bin.source.clone() { - // if the binary is a relative path, then we should use the path as is - // rather than downloading it - BinarySource::Path(path) => return Ok(path.clone()), - BinarySource::Url(url) => url, - }; - - // derive the path to the binary - let mut download_path = state.cli.path.join(STORAGE_DIR); - download_path.push(network.to_string()); - download_path.push(storage_id.to_string()); - download_path.push("binaries"); - download_path.push(id_str); - - // if the file already exists, ensure that it is the correct size and sha256 - if download_path.exists() { - let perms = download_path - .metadata() - .map_err(|e| StorageError::PermissionError(download_path.clone(), e))? - .permissions(); - if perms.mode() != 0o755 { - std::fs::set_permissions(&download_path, std::fs::Permissions::from_mode(0o755)) - .map_err(|e| StorageError::PermissionError(download_path.clone(), e))?; - } - - match bin.check_file_sha256(&download_path) { - Ok(None) => {} - Ok(Some(sha256)) => { - return Err(StorageError::BinarySha256Mismatch( - storage_id, - download_path, - bin.sha256.clone().unwrap_or_default(), - sha256, - )); - } - Err(e) => { - return Err(StorageError::BinaryCheckFailed( - storage_id, - download_path, - e.to_string(), - )); - } - } - - match bin.check_file_size(&download_path) { - // file is okay :) - Ok(None) => {} - Ok(Some(size)) => { - return Err(StorageError::BinarySizeMismatch( - storage_id, - download_path, - bin.size.unwrap_or_default(), - size, - )); - } - Err(e) => { - return Err(StorageError::BinaryCheckFailed( - storage_id, - download_path, - e.to_string(), - )); - } - } - - return Ok(download_path); - } - - let resp = reqwest::get(remote_url.clone()) - .await - .map_err(|e| StorageError::FailedToFetchBinary(id, remote_url.clone(), e))?; - - if resp.status() != reqwest::StatusCode::OK { - return Err(StorageError::FailedToFetchBinaryWithStatus( - id, - remote_url, - resp.status(), - )); - } - - if let Some(parent) = download_path.parent() { - fs::create_dir_all(parent) - .map_err(|e| StorageError::FailedToCreateBinaryFile(id, e))?; - } - - let mut file = std::fs::OpenOptions::new() - .write(true) - .create(true) - .truncate(true) - .open(&download_path) - .map_err(|e| StorageError::FailedToCreateBinaryFile(id, e))?; - - let mut digest = Sha256::new(); - let mut stream = resp.bytes_stream(); - let mut size = 0u64; - - while let Some(chunk) = stream.next().await { - match chunk { - Ok(chunk) => { - size += chunk.len() as u64; - file.write_all(&chunk) - .map_err(|e| StorageError::FailedToWriteBinaryFile(id, e))?; - digest.update(&chunk); - } - Err(e) => { - return Err(StorageError::FailedToFetchBinary(id, remote_url, e)); - } - } - } - - // check if the binary sha256 matches the expected sha256 - let sha256 = format!("{:x}", digest.finalize()); - if let Some(bin_sha256) = bin.sha256.as_ref() { - if bin_sha256.to_lowercase() != sha256 { - return Err(StorageError::BinarySha256Mismatch( - id, - download_path, - bin_sha256.clone(), - sha256, - )); - } - } - - // check if the binary size matches the expected size - if let Some(bin_size) = bin.size { - if bin_size != size { - return Err(StorageError::BinarySizeMismatch( - id, - download_path, - bin_size, - size, - )); - } - } - - info!( - "downloaded binary {storage_id}.{id_str} to {} ({size} bytes)", - download_path.display() - ); - trace!("binary {storage_id}.{id_str} has sha256 {sha256}"); - - let perms = download_path - .metadata() - .map_err(|e| StorageError::PermissionError(download_path.clone(), e))? - .permissions(); - if perms.mode() != 0o755 { - std::fs::set_permissions(&download_path, std::fs::Permissions::from_mode(0o755)) - .map_err(|e| StorageError::PermissionError(download_path.clone(), e))?; - } - - Ok(download_path) - } -} diff --git a/crates/controlplane/src/schema/storage/mod.rs b/crates/controlplane/src/schema/storage/mod.rs deleted file mode 100644 index eb06cdd0..00000000 --- a/crates/controlplane/src/schema/storage/mod.rs +++ /dev/null @@ -1,486 +0,0 @@ -use std::{ops::Deref, path::PathBuf, process::Stdio, sync::Arc}; - -use indexmap::IndexMap; -use serde::{Deserialize, Serialize}; -use snops_checkpoint::RetentionPolicy; -use snops_common::{ - aot_cmds::error::CommandError, - binaries::{BinaryEntry, BinarySource}, - constant::{SNARKOS_GENESIS_FILE, VERSION_FILE}, - key_source::ACCOUNTS_KEY_ID, - state::{InternedId, NetworkId, StorageId}, -}; -use tokio::process::Command; -use tracing::{error, info, trace, warn}; - -use super::error::{SchemaError, StorageError}; -use crate::{persist::PersistStorage, state::GlobalState}; - -mod accounts; -use accounts::*; -mod helpers; -pub use helpers::*; -mod loaded; -pub use loaded::*; -mod binaries; -pub use binaries::*; - -pub const STORAGE_DIR: &str = "storage"; - -/// A storage document. Explains how storage for a test should be set up. -#[derive(Deserialize, Debug, Clone)] -#[serde(rename_all = "kebab-case")] -pub struct Document { - pub id: StorageId, - /// Regen version - #[serde(default)] - pub regen: u16, - pub name: String, - pub description: Option, - /// Tell nodes not to re-download the storage data. - #[serde(default)] - pub persist: bool, - #[serde(default)] - pub generate: Option, - #[serde(default)] - pub connect: Option, - #[serde(default)] - pub retention_policy: Option, - /// The binaries list for this storage is used to determine which binaries - /// are used by the agents. - /// Overriding `default` will replace the node's default binary rather than - /// using snops' own default aot binary. - /// Overriding `compute` will replace the node's default binary only for - /// compute - #[serde(default)] - pub binaries: IndexMap, -} - -/// Data generation instructions. -#[derive(Deserialize, Debug, Clone, Serialize)] -pub struct StorageGeneration { - #[serde(default)] - pub genesis: Option, - - #[serde(default)] - pub accounts: IndexMap, - - #[serde(default)] - pub transactions: Vec, -} - -// TODO: Convert this into a struct similar to the execute action, then use -// compute agents to assemble these on the fly -#[derive(Deserialize, Debug, Clone, Serialize)] -pub struct Transaction { - pub file: PathBuf, - pub total: u64, - pub amount: u64, - pub sources: Vec, - pub destinations: Vec, -} - -#[derive(Deserialize, Debug, Clone, Serialize)] -#[serde(rename_all = "kebab-case")] -pub struct GenesisGeneration { - pub private_key: Option, - pub seed: Option, - pub additional_accounts: Option, - pub additional_accounts_balance: Option, - #[serde(flatten)] - pub balances: GenesisBalances, - #[serde(flatten)] - pub commissions: GenesisCommissions, - pub bonded_withdrawal: Option>, -} - -#[derive(Deserialize, Debug, Clone, Serialize)] -#[serde(untagged)] -pub enum GenesisBalances { - #[serde(rename_all = "kebab-case")] - Defined { - bonded_balances: IndexMap, - }, - #[serde(rename_all = "kebab-case")] - Generated { - committee_size: Option, - bonded_balance: Option, - }, -} - -#[derive(Deserialize, Debug, Clone, Serialize)] -#[serde(untagged)] -pub enum GenesisCommissions { - #[serde(rename_all = "kebab-case")] - Defined { - bonded_commissions: IndexMap, - }, - #[serde(rename_all = "kebab-case")] - Generated { bonded_commission: Option }, -} - -impl Default for GenesisGeneration { - fn default() -> Self { - Self { - seed: None, - private_key: None, - additional_accounts: None, - additional_accounts_balance: None, - balances: GenesisBalances::Generated { - committee_size: None, - bonded_balance: None, - }, - commissions: GenesisCommissions::Generated { - bonded_commission: None, - }, - bonded_withdrawal: None, - } - } -} - -impl Document { - pub async fn prepare( - self, - state: &GlobalState, - network: NetworkId, - ) -> Result, SchemaError> { - let id = self.id; - - // add the prepared storage to the storage map - - if state.storage.contains_key(&(network, id)) { - // TODO: we probably don't want to warn here. instead, it would be nice to - // hash/checksum the storage to compare it with the conflicting storage - warn!("a storage with the id {id} has already been prepared"); - } - - let base = state.storage_path(network, id); - let version_file = base.join(VERSION_FILE); - - let mut native_genesis = false; - - // TODO: The dir can be made by a previous run and the aot stuff can fail - // i.e an empty/incomplete directory can exist and we should check those - let mut exists = matches!(tokio::fs::try_exists(&base).await, Ok(true)); - - // warn if an existing block/ledger already exists - if exists { - warn!("The specified storage ID {id} already exists"); - } - - let old_version = get_version_from_path(&version_file).await?; - - info!( - "Storage {id} has version {old_version:?}. incoming version is {}", - self.regen - ); - - // wipe old storage when the version changes - if old_version != Some(self.regen) && exists { - info!("Storage {id} version changed, removing old storage"); - tokio::fs::remove_dir_all(&base) - .await - .map_err(|e| StorageError::RemoveStorage(version_file.clone(), e))?; - exists = false; - } - - // gather the binaries - let mut binaries = IndexMap::default(); - for (id, v) in self.binaries { - let mut entry = - BinaryEntry::try_from(v).map_err(|e| StorageError::BinaryParse(id, e))?; - if let BinarySource::Path(p) = &mut entry.source { - if !p.exists() { - return Err(StorageError::BinaryFileMissing(id, p.clone()).into()); - } - // canonicalize the path - if let Ok(canon) = p.canonicalize() { - trace!( - "resolved binary relative path from {} to {}", - p.display(), - canon.display() - ); - *p = canon - } - } - info!("Resolved binary {id}: {entry}"); - binaries.insert(id, entry); - } - - // resolve the default aot bin for this storage - let aot_bin = LoadedStorage::resolve_binary_from_map( - id, - network, - &binaries, - state, - InternedId::default(), - ) - .await?; - - tokio::fs::create_dir_all(&base) - .await - .map_err(|e| StorageError::GenerateStorage(id, e))?; - - // generate the block and ledger if we have generation params - if let (Some(generation), false) = (self.generate.as_ref(), exists) { - tracing::debug!("Generating storage for {id}"); - // generate the genesis block using the aot cli - let output = base.join(SNARKOS_GENESIS_FILE); - - match (self.connect, generation.genesis.as_ref()) { - (None, None) => { - native_genesis = true; - info!("{id}: using network native genesis") - } - (Some(ref url), _) => { - // downloaded genesis block is not native - let err = |e| StorageError::FailedToFetchGenesis(id, url.clone(), e); - - // I think its ok to reuse this error here - // because it just turns a failing response into an error - // or failing to turn it into bytes - let res = reqwest::get(url.clone()) - .await - .map_err(err)? - .error_for_status() - .map_err(err)? - .bytes() - .await - .map_err(err)?; - - tokio::fs::write(&output, res) - .await - .map_err(|e| StorageError::FailedToWriteGenesis(id, e))?; - } - (None, Some(genesis)) => { - // generated genesis block is not native - let mut command = Command::new(&aot_bin); - command - .stdout(Stdio::inherit()) - .stderr(Stdio::inherit()) - .env("NETWORK", network.to_string()) - .arg("genesis") - .arg("--output") - .arg(&output); - - // conditional seed flag - if let Some(seed) = genesis.seed { - command.arg("--seed").arg(seed.to_string()); - } - - // conditional genesis key flag - if let Some(private_key) = &genesis.private_key { - command.arg("--genesis-key").arg(private_key); - }; - - // generate committee based on the generation params - match &genesis.balances { - GenesisBalances::Generated { - committee_size, - bonded_balance, - } => { - command - .arg("--committee-output") - .arg(base.join("committee.json")); - - if let Some(committee_size) = committee_size { - command - .arg("--committee-size") - .arg(committee_size.to_string()); - } - if let Some(bonded_balance) = bonded_balance { - command - .arg("--bonded-balance") - .arg(bonded_balance.to_string()); - } - } - GenesisBalances::Defined { bonded_balances } => { - command - .arg("--bonded-balances") - .arg(serde_json::to_string(&bonded_balances).unwrap()); - } - } - - // generate committee commissions based on the generation params - match &genesis.commissions { - GenesisCommissions::Generated { bonded_commission } => { - if let Some(bonded_commission) = bonded_commission { - command - .arg("--bonded-balance") - .arg(bonded_commission.to_string()); - } - } - GenesisCommissions::Defined { bonded_commissions } => { - command - .arg("--bonded-commissions") - .arg(serde_json::to_string(&bonded_commissions).unwrap()); - } - } - - if let Some(withdrawal) = &genesis.bonded_withdrawal { - command - .arg("--bonded-withdrawal") - .arg(serde_json::to_string(withdrawal).unwrap()); - } - - // conditionally add additional accounts - if let Some(additional_accounts) = genesis.additional_accounts { - command - .arg("--additional-accounts") - .arg(additional_accounts.to_string()) - .arg("--additional-accounts-output") - .arg(base.join("accounts.json")); - } - - if let Some(balance) = genesis.additional_accounts_balance { - command - .arg("--additional-accounts-balance") - .arg(balance.to_string()); - } - - info!("Generating genesis for {id} with command: {command:?}"); - - let res = command - .spawn() - .map_err(|e| { - StorageError::Command( - CommandError::action("spawning", "aot genesis", e), - id, - ) - })? - .wait() - .await - .map_err(|e| { - StorageError::Command( - CommandError::action("waiting", "aot genesis", e), - id, - ) - })?; - - if !res.success() { - warn!("failed to run genesis generation command..."); - } - - // ensure the genesis block was generated - tokio::fs::try_exists(&output) - .await - .map_err(|e| StorageError::FailedToGenGenesis(id, e))?; - } - } - } - - let mut accounts = IndexMap::new(); - accounts.insert( - *ACCOUNTS_KEY_ID, - read_to_addrs(pick_additional_addr, &base.join("accounts.json")).await?, - ); - - if let Some(generation) = &self.generate { - for (name, account) in &generation.accounts { - let path = base.join(format!("{}.json", name)); - - if !path.exists() { - info!("generating accounts for {name}"); - - let mut command = Command::new(&aot_bin); - command - .stdout(Stdio::inherit()) - .stderr(Stdio::inherit()) - .env("NETWORK", network.to_string()) - .arg("accounts") - .arg(account.count.to_string()) - .arg("--output") - .arg(&path); - if let Some(seed) = account.seed { - command.arg("--seed").arg(seed.to_string()); - } - - let res = command - .spawn() - .map_err(|e| { - StorageError::Command( - CommandError::action("spawning", "aot accounts", e), - id, - ) - })? - .wait() - .await - .map_err(|e| { - StorageError::Command( - CommandError::action("waiting", "aot accounts", e), - id, - ) - })?; - - if !res.success() { - warn!("failed to run account generation command for {name}..."); - } - } - - accounts.insert(*name, read_to_addrs(pick_account_addr, &path).await?); - } - } - - // write the regen version to a "version" file - tokio::fs::write(&version_file, self.regen.to_string()) - .await - .map_err(|e| StorageError::WriteVersion(version_file.clone(), e))?; - - let committee_file = base.join("committee.json"); - - // if the committee was specified in the generation params, use that - if let ( - Some(StorageGeneration { - genesis: - Some(GenesisGeneration { - private_key, - balances: GenesisBalances::Defined { bonded_balances }, - .. - }), - .. - }), - false, - ) = (self.generate.as_ref(), committee_file.exists()) - { - // TODO: should be possible to get committee from genesis blocks - let mut balances: IndexMap<_, _> = bonded_balances - .iter() - .map(|(addr, bal)| (addr.clone(), (String::new(), *bal))) - .collect(); - - // derive the committee member 0's key - if let (Some(key), true) = (private_key, !balances.is_empty()) { - balances[0].0.clone_from(key) - } - - // write balances to committee.json if if doesn't exist - tokio::fs::write(&committee_file, serde_json::to_string(&balances).unwrap()) - .await - .map_err(|e| StorageError::WriteCommittee(committee_file.clone(), e))?; - }; - // otherwise read the committee from the committee.json file - let committee = read_to_addrs(pick_commitee_addr, &committee_file).await?; - - let storage = Arc::new(LoadedStorage { - version: self.regen, - id, - network, - committee, - accounts, - retention_policy: self.retention_policy, - persist: self.persist, - native_genesis, - binaries, - }); - if let Err(e) = state - .db - .storage - .save(&(network, id), &PersistStorage::from(storage.deref())) - { - error!("failed to save storage meta: {e}"); - } - state.storage.insert((network, id), storage.clone()); - - Ok(storage) - } -} diff --git a/crates/controlplane/src/server/api.rs b/crates/controlplane/src/server/api.rs index 128ca728..87484043 100644 --- a/crates/controlplane/src/server/api.rs +++ b/crates/controlplane/src/server/api.rs @@ -15,16 +15,13 @@ use snops_common::{ lasso::Spur, node_targets::NodeTargets, rpc::control::agent::AgentMetric, + schema::cannon::source::QueryTarget, state::{id_or_none, AgentModeOptions, AgentState, CannonId, EnvId, KeyState, NodeKey}, }; use tarpc::context; use super::{actions, error::ServerError, event_ws, models::AgentStatusResponse}; -use crate::{ - cannon::{router::redirect_cannon_routes, source::QueryTarget}, - make_env_filter, - state::AppState, -}; +use crate::{cannon::router::redirect_cannon_routes, make_env_filter, state::AppState}; use crate::{ env::{EnvPeer, Environment}, state::AgentFlags, @@ -464,7 +461,7 @@ async fn get_mappings( struct FindAgents { mode: AgentModeOptions, env: Option, - #[serde(default, deserialize_with = "crate::schema::nodes::deser_label")] + #[serde(default, deserialize_with = "snops_common::schema::nodes::deser_label")] labels: IndexSet, all: bool, include_offline: bool, @@ -605,7 +602,7 @@ async fn post_env_apply( State(state): State, body: String, ) -> Response { - let documents = match Environment::deserialize(&body) { + let documents = match snops_common::schema::deserialize_docs(&body) { Ok(documents) => documents, Err(e) => return ServerError::from(e).into_response(), }; diff --git a/crates/controlplane/src/server/content.rs b/crates/controlplane/src/server/content.rs index c6066ee5..b4d86ad4 100644 --- a/crates/controlplane/src/server/content.rs +++ b/crates/controlplane/src/server/content.rs @@ -10,16 +10,14 @@ use axum::{ use http::{StatusCode, Uri}; use snops_common::{ binaries::{BinaryEntry, BinarySource}, + schema::storage::{DEFAULT_AGENT_BINARY, DEFAULT_AOT_BINARY}, state::{InternedId, NetworkId, StorageId}, }; use tower::Service; use tower_http::services::ServeFile; use crate::{ - schema::{ - error::StorageError, - storage::{DEFAULT_AGENT_BINARY, DEFAULT_AOT_BINARY}, - }, + apply::error::StorageError, server::error::ServerError, state::{AppState, GlobalState}, unwrap_or_not_found, diff --git a/crates/controlplane/src/server/error.rs b/crates/controlplane/src/server/error.rs index 8c37c1ee..2e4ab353 100644 --- a/crates/controlplane/src/server/error.rs +++ b/crates/controlplane/src/server/error.rs @@ -4,15 +4,14 @@ use serde::{ser::SerializeStruct, Serialize, Serializer}; use serde_json::json; use snops_common::{ aot_cmds::AotCmdError, db::error::DatabaseError, events::TransactionAbortReason, - impl_into_status_code, impl_into_type_str, + impl_into_status_code, impl_into_type_str, schema::error::DeserializeError, }; use thiserror::Error; use crate::{ + apply::error::{SchemaError, StorageError}, cannon::error::CannonError, env::error::{EnvError, EnvRequestError, ExecutionError}, - error::DeserializeError, - schema::error::{SchemaError, StorageError}, }; #[derive(Debug, Error, strum_macros::AsRefStr)] diff --git a/crates/controlplane/src/state/external_peers.rs b/crates/controlplane/src/state/external_peers.rs index df360bfb..903e7b33 100644 --- a/crates/controlplane/src/state/external_peers.rs +++ b/crates/controlplane/src/state/external_peers.rs @@ -3,16 +3,16 @@ use std::{collections::HashMap, net::SocketAddr, sync::Arc, time::Duration}; use chrono::{TimeDelta, Utc}; use futures_util::future; use serde_json::Value; -use snops_common::state::{EnvId, LatestBlockInfo, NetworkId, NodeKey}; +use snops_common::{ + schema::nodes::ExternalNode, + state::{EnvId, LatestBlockInfo, NetworkId, NodeKey}, +}; use tokio::{sync::mpsc, time::timeout}; use super::{snarkos_request, AgentClient, GlobalState}; -use crate::{ - env::{ - cache::{ABlockHash, ATransactionId, MAX_BLOCK_RANGE}, - EnvNodeState, EnvPeer, - }, - schema::nodes::ExternalNode, +use crate::env::{ + cache::{ABlockHash, ATransactionId, MAX_BLOCK_RANGE}, + EnvNodeState, EnvPeer, }; type ExtPeerPair = (NodeKey, SocketAddr); diff --git a/crates/controlplane/src/state/global.rs b/crates/controlplane/src/state/global.rs index f945bc71..ea1ea3ca 100644 --- a/crates/controlplane/src/state/global.rs +++ b/crates/controlplane/src/state/global.rs @@ -9,6 +9,7 @@ use snops_common::{ constant::ENV_AGENT_KEY, events::Event, node_targets::NodeTargets, + schema::storage::STORAGE_DIR, state::{ AgentId, AgentPeer, AgentState, EnvId, LatestBlockInfo, NetworkId, NodeType, StorageId, }, @@ -22,12 +23,12 @@ use super::{ AddrMap, AgentClient, AgentPool, EnvMap, StorageMap, }; use crate::{ + apply::LoadedStorage, cli::Cli, db::Database, env::{cache::NetworkCache, error::EnvRequestError, Environment, PortType}, error::StateError, events::Events, - schema::storage::{LoadedStorage, STORAGE_DIR}, server::error::StartError, ReloadHandler, }; diff --git a/crates/controlplane/src/state/mod.rs b/crates/controlplane/src/state/mod.rs index 3773fd45..3ef81f93 100644 --- a/crates/controlplane/src/state/mod.rs +++ b/crates/controlplane/src/state/mod.rs @@ -18,7 +18,7 @@ pub use global::*; pub use reconcile::*; pub use rpc::*; -use crate::{env::Environment, schema::storage::LoadedStorage}; +use crate::{apply::LoadedStorage, env::Environment}; pub type AppState = Arc; /// Map of agent ids to agents diff --git a/devops/README.md b/devops/README.md index 328c89f3..e26b7c86 100644 --- a/devops/README.md +++ b/devops/README.md @@ -1,31 +1,59 @@ -## Local Development +# Kubernetes Integration -### Prereqs -1. Install [`kind`](https://kind.sigs.k8s.io/) +## Developer Environment -### Setup +### Prerequisites + +1. [Docker](https://www.docker.com/) +1. [`kind`](https://kind.sigs.k8s.io/) (Kubernetes in docker) + +### Start Environment + +1. Build snops containers + + ```bash + cargo xtask containers + ``` + +2. Create development Kubernetes cluster + + ```bash + kind create cluster + ``` + +3. Load locally built containers into `kind` -1. `cargo xtask containers` - Build snops containers -2. `kind create cluster` - Create development Kubernetes cluster -3. `kind load docker-image snops snops-agent` -4. `cat devops/snops.*.yaml | kubectl --context kind-kind apply -f -` -5. Deploy environment + ```bash + kind load docker-image snops snops-agent + ``` + +4. Install snops and agents into the Kubernetes cluster + + ```bash + cat devops/snops.*.yaml | kubectl --context kind-kind apply -f - + ``` + +5. Deploy snarkops environment ```bash kubectl --context kind-kind exec -it deployments/snops-controlplane -- scli env apply - Date: Fri, 6 Dec 2024 23:21:32 -0500 Subject: [PATCH 03/24] refactor(cli,agent): use url::form_urlencoded serializer for query building --- Cargo.lock | 2 +- crates/agent/src/cli.rs | 22 ++++++++++++---------- crates/cli/Cargo.toml | 2 +- crates/cli/src/events.rs | 11 +++++++---- crates/controlplane/Cargo.toml | 2 +- 5 files changed, 22 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 053c5dd4..52999180 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4513,7 +4513,7 @@ dependencies = [ "snops-common", "tokio", "tokio-tungstenite", - "urlencoding", + "url", ] [[package]] diff --git a/crates/agent/src/cli.rs b/crates/agent/src/cli.rs index 19ce5273..74ae3529 100644 --- a/crates/agent/src/cli.rs +++ b/crates/agent/src/cli.rs @@ -112,18 +112,20 @@ impl Cli { // get the endpoint let endpoint = &self.endpoint; - let mut query = format!("/agent?mode={}", u8::from(self.modes)); + let mut qs = url::form_urlencoded::Serializer::new(String::new()); + + qs.append_pair("mode", &u8::from(self.modes).to_string()); // Add agent version - query.push_str(&format!("&version={}", env!("CARGO_PKG_VERSION"))); + qs.append_pair("version", env!("CARGO_PKG_VERSION")); // add &id= - query.push_str(&format!("&id={}", self.id)); + qs.append_pair("id", self.id.as_ref()); // add local pk flag if let Some(file) = self.private_key_file.as_ref() { if fs::metadata(file).is_ok() { - query.push_str("&local_pk=true"); + qs.append_pair("local_pk", "true"); } else { warn!("Private-key-file flag ignored as the file was not found: {file:?}") } @@ -132,15 +134,15 @@ impl Cli { // add &labels= if id is present if let Some(labels) = &self.labels { info!("Using labels: {:?}", labels); - query.push_str(&format!( - "&labels={}", - labels + qs.append_pair( + "labels", + &labels .iter() .filter(|s| !s.is_empty()) .map(|s| s.trim()) .collect::>() - .join(",") - )); + .join(","), + ); } let (is_tls, host) = endpoint @@ -153,7 +155,7 @@ impl Cli { let ws_uri = Uri::builder() .scheme(if is_tls { "wss" } else { "ws" }) .authority(addr.to_owned()) - .path_and_query(query) + .path_and_query(format!("/agent?{}", qs.finish())) .build() .unwrap(); diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index 3653af9f..d998029a 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -25,4 +25,4 @@ serde_json.workspace = true snops-common = { workspace = true, features = ["aot_cmds", "schema"] } tokio = { workspace = true, features = ["macros", "signal", "rt-multi-thread"] } tokio-tungstenite.workspace = true -urlencoding = "2.1.3" +url = { workspace = true, features = ["serde"] } diff --git a/crates/cli/src/events.rs b/crates/cli/src/events.rs index d65d96e4..edb595e9 100644 --- a/crates/cli/src/events.rs +++ b/crates/cli/src/events.rs @@ -38,10 +38,13 @@ impl EventsClient { }; let req = Uri::from_str(&match filter { - Some(filter) => format!( - "{proto}://{hostname}/api/v1/events?filter={}", - urlencoding::encode(&filter.to_string()) - ), + Some(filter) => { + let qs = url::form_urlencoded::Serializer::new(String::new()) + .append_pair("filter", &filter.to_string()) + .finish(); + + format!("{proto}://{hostname}/api/v1/events?{qs}") + } None => format!("{proto}://{hostname}/api/v1/events"), }) .context("Invalid URI")? diff --git a/crates/controlplane/Cargo.toml b/crates/controlplane/Cargo.toml index c1d894bb..fd6dd329 100644 --- a/crates/controlplane/Cargo.toml +++ b/crates/controlplane/Cargo.toml @@ -55,5 +55,5 @@ tower-http.workspace = true tracing-appender.workspace = true tracing.workspace = true tracing-subscriber.workspace = true -url = { workspace = true, features = ["serde"] } +url = { workspace = true } uuid = { workspace = true, features = ["fast-rng", "v4"] } From 0f86dff98b0ac0b8d7aea81700391342ea3a1c61 Mon Sep 17 00:00:00 2001 From: Meshiest Date: Sat, 7 Dec 2024 00:19:21 -0500 Subject: [PATCH 04/24] feat(cli): spec helper commands --- Cargo.lock | 1 + crates/cli/Cargo.toml | 1 + crates/cli/src/commands/mod.rs | 4 + crates/cli/src/commands/spec.rs | 120 +++++++++++++++++++ crates/common/src/schema/mod.rs | 16 +++ crates/common/src/schema/nodes.rs | 44 ++++++- crates/common/src/schema/persist/doc_node.rs | 8 +- crates/controlplane/src/env/mod.rs | 59 +++------ 8 files changed, 206 insertions(+), 47 deletions(-) create mode 100644 crates/cli/src/commands/spec.rs diff --git a/Cargo.lock b/Cargo.lock index 52999180..d1c19185 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4506,6 +4506,7 @@ dependencies = [ "clap_complete", "futures-util", "http 1.1.0", + "indexmap 2.6.0", "reqwest 0.12.8", "rustls 0.23.15", "serde", diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index d998029a..0f0e25a6 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -18,6 +18,7 @@ clap_complete.workspace = true clap-stdin.workspace = true futures-util.workspace = true http.workspace = true +indexmap = { workspace = true, features = ["serde"] } reqwest = { workspace = true, features = ["json"] } rustls.workspace = true serde.workspace = true diff --git a/crates/cli/src/commands/mod.rs b/crates/cli/src/commands/mod.rs index 4f5eaab1..d586d8a3 100644 --- a/crates/cli/src/commands/mod.rs +++ b/crates/cli/src/commands/mod.rs @@ -10,6 +10,7 @@ pub(crate) static DUMMY_ID: &str = "dummy_value___"; mod agent; mod env; +mod spec; #[derive(Debug, Parser)] pub enum Commands { @@ -23,6 +24,8 @@ pub enum Commands { Agent(agent::Agent), #[clap(alias = "e")] Env(env::Env), + #[clap(alias = "s")] + Spec(spec::Spec), SetLogLevel { level: String, }, @@ -68,6 +71,7 @@ impl Commands { client.close().await?; return Ok(()); } + Commands::Spec(spec) => return spec.command.run(url, client).await, #[cfg(feature = "mangen")] Commands::Man(mangen) => { mangen.run( diff --git a/crates/cli/src/commands/spec.rs b/crates/cli/src/commands/spec.rs new file mode 100644 index 00000000..668102ce --- /dev/null +++ b/crates/cli/src/commands/spec.rs @@ -0,0 +1,120 @@ +use anyhow::{anyhow, Result}; +use clap::{Parser, ValueHint}; +use clap_stdin::FileOrStdin; +use indexmap::IndexMap; +use reqwest::Client; +use snops_common::schema::ItemDocument; + +#[derive(Debug, Parser)] +pub struct Spec { + #[clap(subcommand)] + pub command: SpecCommands, +} + +#[derive(Debug, Parser)] +pub enum SpecCommands { + /// Extract all node keys from a spec file. + NodeKeys { + /// The environment spec file. + #[clap(value_hint = ValueHint::AnyPath)] + spec: FileOrStdin, + /// When present, include external keys. + #[clap(long)] + external: bool, + }, + /// Extract all nodes from a spec file. + Nodes { + /// The environment spec file. + #[clap(value_hint = ValueHint::AnyPath)] + spec: FileOrStdin, + }, + /// Count how many agents would be needed to run the spec. + NumAgents { + /// The environment spec file. + #[clap(value_hint = ValueHint::AnyPath)] + spec: FileOrStdin, + }, + /// Get the network id a spec. + Network { + /// The environment spec file. + #[clap(value_hint = ValueHint::AnyPath)] + spec: FileOrStdin, + }, + /// Check the spec for errors. + Check { + /// The environment spec file. + #[clap(value_hint = ValueHint::AnyPath)] + spec: FileOrStdin, + }, +} + +impl SpecCommands { + pub async fn run(self, _url: &str, _client: Client) -> Result<()> { + match self { + SpecCommands::NodeKeys { spec, external } => { + let docs = snops_common::schema::deserialize_docs(&spec.contents()?)?; + let keys = docs + .into_iter() + .filter_map(|doc| doc.node_owned()) + .flat_map(|doc| { + let internal = doc + .expand_internal_replicas() + .map(|r| r.0) + // Collection has to happen here so `doc` is dropped + .collect::>(); + internal.into_iter().chain(if external { + doc.external.into_keys().collect::>() + } else { + vec![] + }) + }) + .collect::>(); + + println!("{}", serde_json::to_string_pretty(&keys)?); + Ok(()) + } + SpecCommands::Nodes { spec } => { + let docs = snops_common::schema::deserialize_docs(&spec.contents()?)?; + + // Get nodes from the spec with retained order + let nodes = docs + .into_iter() + .filter_map(|doc| doc.node_owned()) + .flat_map(|doc| doc.expand_internal_replicas().collect::>()) + .collect::>(); + + println!("{}", serde_json::to_string_pretty(&nodes)?); + Ok(()) + } + SpecCommands::Network { spec } => { + let docs = snops_common::schema::deserialize_docs(&spec.contents()?)?; + let network = docs + .into_iter() + .filter_map(|doc| doc.node_owned()) + .map(|doc| doc.network.unwrap_or_default()) + .next() + .ok_or_else(|| anyhow!("No network id found in spec"))?; + + println!("{}", network); + Ok(()) + } + SpecCommands::NumAgents { spec } => { + let docs = snops_common::schema::deserialize_docs(&spec.contents()?)?; + let num_agents = get_num_agents_for_spec(&docs); + println!("{num_agents}"); + Ok(()) + } + SpecCommands::Check { spec } => { + let _ = snops_common::schema::deserialize_docs(&spec.contents()?)?; + println!("ok"); + Ok(()) + } + } + } +} + +pub fn get_num_agents_for_spec(docs: &[ItemDocument]) -> usize { + docs.iter() + .filter_map(|doc| doc.node().map(|n| n.expand_internal_replicas().count())) + .sum::() +} diff --git a/crates/common/src/schema/mod.rs b/crates/common/src/schema/mod.rs index af71dd17..55af45a2 100644 --- a/crates/common/src/schema/mod.rs +++ b/crates/common/src/schema/mod.rs @@ -48,6 +48,22 @@ pub fn deserialize_docs_bytes(str: &[u8]) -> Result, Deseriali .collect() } +impl ItemDocument { + pub fn node(&self) -> Option<&NodesDocument> { + match self { + ItemDocument::Nodes(n) => Some(n.as_ref()), + _ => None, + } + } + + pub fn node_owned(self) -> Option { + match self { + ItemDocument::Nodes(n) => Some(*n), + _ => None, + } + } +} + #[cfg(test)] mod test { use super::deserialize_docs_bytes; diff --git a/crates/common/src/schema/nodes.rs b/crates/common/src/schema/nodes.rs index 66fb6625..0348d4cf 100644 --- a/crates/common/src/schema/nodes.rs +++ b/crates/common/src/schema/nodes.rs @@ -1,4 +1,7 @@ -use std::net::{IpAddr, SocketAddr}; +use std::{ + net::{IpAddr, SocketAddr}, + num::NonZeroUsize, +}; use fixedbitset::FixedBitSet; use indexmap::{IndexMap, IndexSet}; @@ -33,6 +36,43 @@ pub struct NodesDocument { pub nodes: IndexMap, } +impl NodesDocument { + pub fn expand_internal_replicas(&self) -> impl Iterator + '_ { + self.nodes.iter().flat_map(|(doc_node_key, doc_node)| { + let num_replicas = doc_node.replicas.map(|r| r.get()).unwrap_or(1); + + // Iterate over the replicas + (0..num_replicas.min(10000)).map(move |i| { + let node_key = match num_replicas { + // If there is only one replica, use the doc_node_key + 1 => doc_node_key.to_owned(), + // If there are multiple replicas, append the index to the + // doc_node_key + _ => { + let mut node_key = doc_node_key.to_owned(); + if !node_key.id.is_empty() { + node_key.id.push('-'); + } + node_key.id.push_str(&i.to_string()); + node_key + } + }; + + // Replace the key with a new one + let mut node = doc_node.to_owned(); + node.replicas = None; + + // Update the node's private key + if let Some(key) = node.key.as_mut() { + *key = key.with_index(i); + } + + (node_key, node) + }) + }) + } +} + #[derive(Debug, Clone, PartialEq, Eq, Serialize)] pub struct ExternalNode { // NOTE: these fields must be validated at runtime, because validators require `bft` to be set, @@ -141,7 +181,7 @@ pub struct Node { pub online: bool, /// When specified, creates a group of nodes, all with the same /// configuration. - pub replicas: Option, + pub replicas: Option, /// The private key to start the node with. pub key: Option, /// Height of ledger to inherit. diff --git a/crates/common/src/schema/persist/doc_node.rs b/crates/common/src/schema/persist/doc_node.rs index e8e9263c..99ef4799 100644 --- a/crates/common/src/schema/persist/doc_node.rs +++ b/crates/common/src/schema/persist/doc_node.rs @@ -1,3 +1,5 @@ +use std::num::NonZeroUsize; + use lasso::Spur; use crate::schema::nodes::{ExternalNode, Node}; @@ -103,7 +105,7 @@ impl DataFormat for Node { ) -> Result { let mut written = 0; written += self.online.write_data(writer)?; - written += self.replicas.write_data(writer)?; + written += self.replicas.map(NonZeroUsize::get).write_data(writer)?; written += self.key.write_data(writer)?; written += self.height.write_data(writer)?; written += self.labels.write_data(writer)?; @@ -120,7 +122,7 @@ impl DataFormat for Node { header: &Self::Header, ) -> Result { let online = reader.read_data(&())?; - let replicas = reader.read_data(&())?; + let replicas: Option = reader.read_data(&())?; let key = reader.read_data(&header.key_source)?; let height = reader.read_data(&header.height_request)?; let labels = Vec::::read_data(reader, &())?; @@ -136,7 +138,7 @@ impl DataFormat for Node { Ok(Node { online, - replicas, + replicas: replicas.and_then(NonZeroUsize::new), key, height, labels: labels.into_iter().collect(), diff --git a/crates/controlplane/src/env/mod.rs b/crates/controlplane/src/env/mod.rs index 550681e1..58b36992 100644 --- a/crates/controlplane/src/env/mod.rs +++ b/crates/controlplane/src/env/mod.rs @@ -166,50 +166,25 @@ impl Environment { // set of resolved keys that will be present (new and old) let mut agent_keys = HashSet::new(); - // flatten replicas - for (doc_node_key, mut doc_node) in nodes.nodes { - let num_replicas = doc_node.replicas.unwrap_or(1); - // nobody needs more than 10k replicas anyway - for i in 0..num_replicas.min(10000) { - let node_key = match num_replicas { - 0 => Err(PrepareError::NodeHas0Replicas)?, - 1 => doc_node_key.to_owned(), - _ => { - let mut node_key = doc_node_key.to_owned(); - if !node_key.id.is_empty() { - node_key.id.push('-'); - } - node_key.id.push_str(&i.to_string()); - node_key - } - }; - agent_keys.insert(node_key.clone()); - - // nodes in flattened_nodes have replicas unset - doc_node.replicas.take(); - - // replace the key with a new one - let mut node = doc_node.to_owned(); - if let Some(key) = node.key.as_mut() { - *key = key.with_index(i); - } + for (node_key, node) in nodes.expand_internal_replicas() { + // Track this node as a potential agent + agent_keys.insert(node_key.clone()); + + // Skip delegating nodes that are already present in the node map + // Agents are able to determine what updates need to be applied + // based on their resolved node states. + if node_peers.contains_left(&node_key) { + info!("{env_id}: updating node {node_key}"); + updated_states.insert(node_key, EnvNodeState::Internal(node)); + continue; + } - // Skip delegating nodes that are already present in the node map - // Agents are able to determine what updates need to be applied - // based on their resolved node states. - if node_peers.contains_left(&node_key) { - info!("{env_id}: updating node {node_key}"); - updated_states.insert(node_key, EnvNodeState::Internal(node)); - continue; + match incoming_states.entry(node_key) { + Entry::Occupied(ent) => { + Err(PrepareError::DuplicateNodeKey(ent.key().clone()))? } - - match incoming_states.entry(node_key) { - Entry::Occupied(ent) => { - Err(PrepareError::DuplicateNodeKey(ent.key().clone()))? - } - Entry::Vacant(ent) => ent.insert(EnvNodeState::Internal(node)), - }; - } + Entry::Vacant(ent) => ent.insert(EnvNodeState::Internal(node)), + }; } // list of nodes that will be removed after applying this document From 86d1b30fee8cb45c6864a14eac7381c240cb725e Mon Sep 17 00:00:00 2001 From: Meshiest Date: Sat, 7 Dec 2024 00:19:41 -0500 Subject: [PATCH 05/24] docs(cli): update CLI docs --- snops_book/user_guide/clis/SNOPS_AGENT.md | 4 +- snops_book/user_guide/clis/SNOPS_CLI.md | 85 +++++++++++++++++++++++ 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/snops_book/user_guide/clis/SNOPS_AGENT.md b/snops_book/user_guide/clis/SNOPS_AGENT.md index d5a40e54..72a1b1d2 100644 --- a/snops_book/user_guide/clis/SNOPS_AGENT.md +++ b/snops_book/user_guide/clis/SNOPS_AGENT.md @@ -20,6 +20,8 @@ This document contains the help content for the `snops-agent` command-line progr ###### **Options:** * `--endpoint ` — Control plane endpoint address (IP, or wss://host, http://host) + + Default value: `127.0.0.1:1234` * `--id ` — Agent ID, used to identify the agent in the network * `--private-key-file ` — Locally provided private key file, used for envs where private keys are locally provided * `--labels ` — Labels to attach to the agent, used for filtering and grouping @@ -28,7 +30,7 @@ This document contains the help content for the `snops-agent` command-line progr Default value: `./snops-data` * `--external ` — Enable the agent to fetch its external address. Necessary to determine which agents are on shared networks, and for external-to-external connections * `--internal ` — Manually specify internal addresses -* `--bind ` +* `--bind ` — Bind address for the agent to listen on Default value: `0.0.0.0` * `--node ` — Specify the IP address and port for the node server diff --git a/snops_book/user_guide/clis/SNOPS_CLI.md b/snops_book/user_guide/clis/SNOPS_CLI.md index 305d778e..191ec104 100644 --- a/snops_book/user_guide/clis/SNOPS_CLI.md +++ b/snops_book/user_guide/clis/SNOPS_CLI.md @@ -41,6 +41,12 @@ This document contains the help content for the `snops-cli` command-line program * [`snops-cli env mappings`↴](#snops-cli-env-mappings) * [`snops-cli env program`↴](#snops-cli-env-program) * [`snops-cli env storage`↴](#snops-cli-env-storage) +* [`snops-cli spec`↴](#snops-cli-spec) +* [`snops-cli spec node-keys`↴](#snops-cli-spec-node-keys) +* [`snops-cli spec nodes`↴](#snops-cli-spec-nodes) +* [`snops-cli spec num-agents`↴](#snops-cli-spec-num-agents) +* [`snops-cli spec network`↴](#snops-cli-spec-network) +* [`snops-cli spec check`↴](#snops-cli-spec-check) * [`snops-cli set-log-level`↴](#snops-cli-set-log-level) * [`snops-cli events`↴](#snops-cli-events) * [`snops-cli man`↴](#snops-cli-man) @@ -55,6 +61,7 @@ This document contains the help content for the `snops-cli` command-line program * `autocomplete` — Generate shell completions * `agent` — For interacting with snop agents * `env` — For interacting with snop environments +* `spec` — * `set-log-level` — * `events` — Listen to events from the control plane, optionally filtered * `man` — For generating cli manpages. Only with the mangen feature enabled @@ -560,6 +567,84 @@ Get an env's storage info +## `snops-cli spec` + +**Usage:** `snops-cli spec ` + +###### **Subcommands:** + +* `node-keys` — Extract all node keys from a spec file +* `nodes` — Extract all nodes from a spec file +* `num-agents` — Count how many agents would be needed to run the spec +* `network` — Get the network id a spec +* `check` — Check the spec for errors + + + +## `snops-cli spec node-keys` + +Extract all node keys from a spec file + +**Usage:** `snops-cli spec node-keys [OPTIONS] ` + +###### **Arguments:** + +* `` — The environment spec file + +###### **Options:** + +* `--external` — When present, include external keys + + + +## `snops-cli spec nodes` + +Extract all nodes from a spec file + +**Usage:** `snops-cli spec nodes ` + +###### **Arguments:** + +* `` — The environment spec file + + + +## `snops-cli spec num-agents` + +Count how many agents would be needed to run the spec + +**Usage:** `snops-cli spec num-agents ` + +###### **Arguments:** + +* `` — The environment spec file + + + +## `snops-cli spec network` + +Get the network id a spec + +**Usage:** `snops-cli spec network ` + +###### **Arguments:** + +* `` — The environment spec file + + + +## `snops-cli spec check` + +Check the spec for errors + +**Usage:** `snops-cli spec check ` + +###### **Arguments:** + +* `` — The environment spec file + + + ## `snops-cli set-log-level` **Usage:** `snops-cli set-log-level ` From 08f1aee232aee81695aca6b46e3833a08af7de28 Mon Sep 17 00:00:00 2001 From: Meshiest Date: Sat, 7 Dec 2024 01:32:22 -0500 Subject: [PATCH 06/24] feat(agent,controlplane): add support for 'ephemeral' agents that are removed on disconnect --- crates/agent/src/cli.rs | 10 +++++++- crates/controlplane/src/server/agent_ws.rs | 29 ++++++++++++++++++---- snops_book/user_guide/clis/SNOPS_AGENT.md | 3 +++ 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/crates/agent/src/cli.rs b/crates/agent/src/cli.rs index 74ae3529..50784173 100644 --- a/crates/agent/src/cli.rs +++ b/crates/agent/src/cli.rs @@ -59,10 +59,14 @@ pub struct Cli { #[clap(flatten)] pub modes: AgentModeOptions, - #[clap(short, long, default_value_t = false, env = "SNOPS_AGENT_QUIET")] /// Run the agent in quiet mode, suppressing most node output + #[clap(short, long, default_value_t = false, env = "SNOPS_AGENT_QUIET")] pub quiet: bool, + /// When present, delete the agent from the controlplane on disconnect + #[clap(long, default_value_t = false, env = "SNOPS_AGENT_EPHEMERAL")] + pub ephemeral: bool, + #[cfg(any(feature = "clipages", feature = "mangen"))] #[clap(subcommand)] pub command: Commands, @@ -122,6 +126,10 @@ impl Cli { // add &id= qs.append_pair("id", self.id.as_ref()); + if self.ephemeral { + qs.append_pair("ephemeral", "true"); + } + // add local pk flag if let Some(file) = self.private_key_file.as_ref() { if fs::metadata(file).is_ok() { diff --git a/crates/controlplane/src/server/agent_ws.rs b/crates/controlplane/src/server/agent_ws.rs index e249c82b..ac8b7301 100644 --- a/crates/controlplane/src/server/agent_ws.rs +++ b/crates/controlplane/src/server/agent_ws.rs @@ -42,6 +42,8 @@ pub struct AgentWsQuery { pub version: Option, #[serde(flatten)] pub flags: AgentFlags, + #[serde(default)] + pub ephemeral: Option, } pub async fn agent_ws_handler( @@ -69,7 +71,18 @@ pub async fn agent_ws_handler( _ => (), } - ws.on_upgrade(|socket| handle_socket(socket, headers, state, query)) + ws.on_upgrade(|socket| async move { + let ephemeral = query.ephemeral.is_some_and(|e| e); + let Some(id) = handle_socket(socket, headers, Arc::clone(&state), query).await else { + return; + }; + if ephemeral { + info!("Removing ephemeral agent {id}"); + if let Err(e) = state.db.agents.delete(&id) { + error!("failed to remove agent {id} to the database: {e}"); + } + } + }) } async fn handle_socket( @@ -77,7 +90,8 @@ async fn handle_socket( headers: HeaderMap, state: AppState, query: AgentWsQuery, -) { +) -> Option { + let is_ephemeral = query.ephemeral.is_some_and(|e| e); // Safe because handle socket is only called if version is Some let agent_version = query.version.unwrap(); @@ -170,7 +184,10 @@ async fn handle_socket( // mark the agent as connected, update the flags as well agent.mark_connected(client.clone(), query.flags); - info!("Agent {id} reconnected with version {agent_version}"); + info!( + "Agent {id} reconnected with version {agent_version}{}", + if is_ephemeral { " (ephemeral)" } else { "" } + ); if let Err(e) = state.db.agents.save(&id, &agent) { error!("failed to save agent {id} to the database: {e}"); } @@ -192,7 +209,7 @@ async fn handle_socket( { warn!("An agent is trying to identify as an already-connected agent {id}"); let _ = socket.send(Message::Close(None)).await; - return; + return None; } // create a new agent @@ -209,7 +226,8 @@ async fn handle_socket( state.pool.insert(id, agent); info!( - "Agent {id} connected with version {agent_version}; pool is now {} nodes", + "Agent {id} connected with version {agent_version}{}; pool is now {} nodes", + if is_ephemeral { " (ephemeral)" } else { "" }, state.pool.len() ); @@ -383,4 +401,5 @@ async fn handle_socket( } info!("Agent {id} disconnected"); + Some(id) } diff --git a/snops_book/user_guide/clis/SNOPS_AGENT.md b/snops_book/user_guide/clis/SNOPS_AGENT.md index 72a1b1d2..d7bb598f 100644 --- a/snops_book/user_guide/clis/SNOPS_AGENT.md +++ b/snops_book/user_guide/clis/SNOPS_AGENT.md @@ -52,6 +52,9 @@ This document contains the help content for the `snops-agent` command-line progr * `-q`, `--quiet` — Run the agent in quiet mode, suppressing most node output Default value: `false` +* `--ephemeral` — When present, delete the agent from the controlplane on disconnect + + Default value: `false` From 99e67716ce036a766338a1ed4d467d810ac811b8 Mon Sep 17 00:00:00 2001 From: Meshiest Date: Sat, 7 Dec 2024 01:33:24 -0500 Subject: [PATCH 07/24] feat(devops): add compute agent deployment, fix agent modes in agents deployment --- devops/README.md | 6 ++++ devops/agent-entrypoint.sh | 2 ++ devops/snops.agents.yaml | 4 +-- devops/snops.compute-scale.yaml | 53 +++++++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 2 deletions(-) create mode 100644 devops/snops.compute-scale.yaml diff --git a/devops/README.md b/devops/README.md index e26b7c86..5233ae87 100644 --- a/devops/README.md +++ b/devops/README.md @@ -38,6 +38,12 @@ kubectl --context kind-kind exec -it deployments/snops-controlplane -- scli env apply - Date: Sat, 7 Dec 2024 07:35:39 -0800 Subject: [PATCH 08/24] fix: common compiling, clippy and fmt fixes --- .github/workflows/pr.yml | 4 ++-- crates/agent/src/reconcile/storage.rs | 2 +- crates/aot/src/accounts.rs | 2 +- crates/common/Cargo.toml | 4 ++-- crates/common/src/key_source.rs | 2 +- crates/common/src/schema/mod.rs | 1 - crates/common/src/schema/serialize.rs | 0 7 files changed, 7 insertions(+), 8 deletions(-) delete mode 100644 crates/common/src/schema/serialize.rs diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 4bc4dbe5..ca6344f4 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -5,8 +5,8 @@ on: env: CARGO_TERM_COLOR: always RUST_BACKTRACE: 1 - # https://releases.rs/docs/1.82.0/ release date - NIGHTLY_TOOLCHAIN: nightly-2024-10-17 + # https://releases.rs/docs/1.83.0/ release date + NIGHTLY_TOOLCHAIN: nightly-2024-11-28 # Cancel in progress workflows on pull_requests. # https://docs.github.com/en/actions/using-jobs/using-concurrency#example-using-a-fallback-value diff --git a/crates/agent/src/reconcile/storage.rs b/crates/agent/src/reconcile/storage.rs index 90383671..a28ce7f7 100644 --- a/crates/agent/src/reconcile/storage.rs +++ b/crates/agent/src/reconcile/storage.rs @@ -205,7 +205,7 @@ pub struct LedgerReconciler<'a> { pub modify_handle: &'a mut Option<(AbortHandle, Arc>>)>, } -impl<'a> LedgerReconciler<'a> { +impl LedgerReconciler<'_> { pub fn untar_paths(&self) -> (PathBuf, &'static str) { if self.env_info.storage.persist { ( diff --git a/crates/aot/src/accounts.rs b/crates/aot/src/accounts.rs index 61d266ab..02497dbd 100644 --- a/crates/aot/src/accounts.rs +++ b/crates/aot/src/accounts.rs @@ -38,7 +38,7 @@ pub const BECH32M_CHARSET: &str = "0123456789acdefghjklmnpqrstuvwxyz"; #[derive(Clone, Copy)] struct VanityCheck<'a>(&'a [bech32::u5]); -impl<'a> bech32::WriteBase32 for VanityCheck<'a> { +impl bech32::WriteBase32 for VanityCheck<'_> { type Err = bool; fn write_u5(&mut self, data: bech32::u5) -> std::result::Result<(), Self::Err> { diff --git a/crates/common/Cargo.toml b/crates/common/Cargo.toml index 9ed50ac8..7e9e95d8 100644 --- a/crates/common/Cargo.toml +++ b/crates/common/Cargo.toml @@ -21,7 +21,7 @@ clap_mangen = { workspace = true, optional = true } clap-markdown = { workspace = true, optional = true } fixedbitset = { workspace = true, optional = true } futures.workspace = true -http.workspace = true +http = { workspace = true, features = ["std"] } indexmap = { workspace = true, features = ["std", "serde"] } lasso.workspace = true lazy_static.workspace = true @@ -40,7 +40,7 @@ tarpc.workspace = true thiserror.workspace = true tokio = { workspace = true, features = ["process"] } tracing.workspace = true -url.workspace = true +url = { workspace = true, features = ["serde"] } wildmatch.workspace = true [dev-dependencies] diff --git a/crates/common/src/key_source.rs b/crates/common/src/key_source.rs index 8f1bcc5c..8864149a 100644 --- a/crates/common/src/key_source.rs +++ b/crates/common/src/key_source.rs @@ -45,7 +45,7 @@ impl<'de> Deserialize<'de> for KeySource { { struct KeySourceVisitor; - impl<'de> Visitor<'de> for KeySourceVisitor { + impl Visitor<'_> for KeySourceVisitor { type Value = KeySource; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { diff --git a/crates/common/src/schema/mod.rs b/crates/common/src/schema/mod.rs index 55af45a2..fefc6720 100644 --- a/crates/common/src/schema/mod.rs +++ b/crates/common/src/schema/mod.rs @@ -10,7 +10,6 @@ pub mod cannon; pub mod error; pub mod nodes; pub mod persist; -pub mod serialize; pub mod storage; // TODO: Considerations: diff --git a/crates/common/src/schema/serialize.rs b/crates/common/src/schema/serialize.rs deleted file mode 100644 index e69de29b..00000000 From 37d8055b63f1fc881e9dcd16a667a6d4201c8979 Mon Sep 17 00:00:00 2001 From: gluax <16431709+gluax@users.noreply.github.com> Date: Sat, 7 Dec 2024 07:57:01 -0800 Subject: [PATCH 09/24] fix(ci): don't fail if module has no tests --- .github/workflows/pr.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index ca6344f4..f8ef3b2b 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -135,38 +135,38 @@ jobs: - name: 🧪 Test All if: steps.changes.outputs.top_toml == 'true' - run: cargo +${{ env.NIGHTLY_TOOLCHAIN }} nextest run --all --verbose --fail-fast --all-features --exclude snops-agent --exclude xtask + run: cargo +${{ env.NIGHTLY_TOOLCHAIN }} nextest run --all --verbose --fail-fast --all-features --exclude snops-agent --exclude xtask --no-tests=warn - name: 🧪 Test Aot if: steps.changes.outputs.aot == 'true' && steps.changes.outputs.top_toml == 'false' - run: cargo +${{ env.NIGHTLY_TOOLCHAIN }} nextest run -p snarkos-aot --verbose --fail-fast --all-features + run: cargo +${{ env.NIGHTLY_TOOLCHAIN }} nextest run -p snarkos-aot --verbose --fail-fast --all-features --no-tests=warn - name: 🧪 Test Checkpoint # env: # RUSTFLAGS: -Zcodegen-backend=cranelift if: steps.changes.outputs.checkpoint == 'true' && steps.changes.outputs.top_toml == 'false' - run: cargo +${{ env.NIGHTLY_TOOLCHAIN }} nextest run -p snops-checkpoint --verbose --fail-fast --all-features + run: cargo +${{ env.NIGHTLY_TOOLCHAIN }} nextest run -p snops-checkpoint --verbose --fail-fast --all-features --no-tests=warn - name: 🧪 Test Common # env: # RUSTFLAGS: -Zcodegen-backend=cranelift if: steps.changes.outputs.common == 'true' && steps.changes.outputs.top_toml == 'false' - run: cargo +${{ env.NIGHTLY_TOOLCHAIN }} nextest run -p snops-common --verbose --fail-fast --all-features + run: cargo +${{ env.NIGHTLY_TOOLCHAIN }} nextest run -p snops-common --verbose --fail-fast --all-features --no-tests=warn - name: 🧪 Test Control Plane # env: # RUSTFLAGS: -Zcodegen-backend=cranelift if: (steps.changes.outputs.control_plane == 'true' || steps.changes.outputs.common == 'true') && steps.changes.outputs.top_toml == 'false' - run: cargo +${{ env.NIGHTLY_TOOLCHAIN }} nextest run -p snops --verbose --fail-fast --all-features + run: cargo +${{ env.NIGHTLY_TOOLCHAIN }} nextest run -p snops --verbose --fail-fast --all-features --no-tests=warn - name: 🧪 Test Agent # env: # RUSTFLAGS: "" if: (steps.changes.outputs.agent == 'true' || steps.changes.outputs.common == 'true') - run: cargo +${{ env.NIGHTLY_TOOLCHAIN }} nextest run -p snops-agent --verbose --fail-fast --all-features + run: cargo +${{ env.NIGHTLY_TOOLCHAIN }} nextest run -p snops-agent --verbose --fail-fast --all-features --no-tests=warn - name: 🧪 Test Scli # env: # RUSTFLAGS: -Zcodegen-backend=cranelift if: (steps.changes.outputs.scli == 'true' || steps.changes.outputs.common == 'true') && steps.changes.outputs.top_toml == 'false' - run: cargo +${{ env.NIGHTLY_TOOLCHAIN }} nextest run -p snops-cli --verbose --fail-fast --all-features + run: cargo +${{ env.NIGHTLY_TOOLCHAIN }} nextest run -p snops-cli --verbose --fail-fast --all-features --no-tests=warn From b1a601f593df969a41c7b1b1c1afeb4d53930235 Mon Sep 17 00:00:00 2001 From: Meshiest Date: Sat, 7 Dec 2024 23:46:11 -0500 Subject: [PATCH 10/24] refactor(devops): use kustomization for k8s resource aggregation --- devops/README.md | 4 ++-- .../agents-deployment.yaml} | 0 devops/k8s/agents-replicas.yaml | 6 ++++++ .../compute-deployment.yaml} | 0 .../controlplane-deployment.yaml} | 13 ------------- devops/k8s/controlplane-service.yaml | 13 +++++++++++++ devops/k8s/kustomization.yaml | 11 +++++++++++ 7 files changed, 32 insertions(+), 15 deletions(-) rename devops/{snops.agents.yaml => k8s/agents-deployment.yaml} (100%) create mode 100644 devops/k8s/agents-replicas.yaml rename devops/{snops.compute-scale.yaml => k8s/compute-deployment.yaml} (100%) rename devops/{snops.controlplane.yaml => k8s/controlplane-deployment.yaml} (84%) create mode 100644 devops/k8s/controlplane-service.yaml create mode 100644 devops/k8s/kustomization.yaml diff --git a/devops/README.md b/devops/README.md index 5233ae87..84d24364 100644 --- a/devops/README.md +++ b/devops/README.md @@ -30,7 +30,7 @@ 4. Install snops and agents into the Kubernetes cluster ```bash - cat devops/snops.*.yaml | kubectl --context kind-kind apply -f - + kubectl --context kind-kind apply -k devops/k8s ``` 5. Deploy snarkops environment @@ -49,7 +49,7 @@ 1. Delete snarkops environment ```bash - kubectl --context kind-kind exec -it deployments/snops-controlplane -- scli env delete + kubectl --context kind-kind delete -k devops/k8s ``` 1. Uninstall snops from the Kubernetes cluster diff --git a/devops/snops.agents.yaml b/devops/k8s/agents-deployment.yaml similarity index 100% rename from devops/snops.agents.yaml rename to devops/k8s/agents-deployment.yaml diff --git a/devops/k8s/agents-replicas.yaml b/devops/k8s/agents-replicas.yaml new file mode 100644 index 00000000..18157093 --- /dev/null +++ b/devops/k8s/agents-replicas.yaml @@ -0,0 +1,6 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +replicas: + - name: snops-agents + count: 4 diff --git a/devops/snops.compute-scale.yaml b/devops/k8s/compute-deployment.yaml similarity index 100% rename from devops/snops.compute-scale.yaml rename to devops/k8s/compute-deployment.yaml diff --git a/devops/snops.controlplane.yaml b/devops/k8s/controlplane-deployment.yaml similarity index 84% rename from devops/snops.controlplane.yaml rename to devops/k8s/controlplane-deployment.yaml index 41c82384..64c84c2e 100644 --- a/devops/snops.controlplane.yaml +++ b/devops/k8s/controlplane-deployment.yaml @@ -41,16 +41,3 @@ spec: limits: cpu: "1" memory: "2Gi" ---- -apiVersion: v1 -kind: Service -metadata: - name: snops-service -spec: - selector: - app: snops-controlplane - ports: - - name: http - protocol: TCP - port: 1234 - targetPort: 1234 diff --git a/devops/k8s/controlplane-service.yaml b/devops/k8s/controlplane-service.yaml new file mode 100644 index 00000000..1a63fb76 --- /dev/null +++ b/devops/k8s/controlplane-service.yaml @@ -0,0 +1,13 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: snops-service +spec: + selector: + app: snops-controlplane + ports: + - name: http + protocol: TCP + port: 1234 + targetPort: 1234 diff --git a/devops/k8s/kustomization.yaml b/devops/k8s/kustomization.yaml new file mode 100644 index 00000000..3e4a95e6 --- /dev/null +++ b/devops/k8s/kustomization.yaml @@ -0,0 +1,11 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: default +commonLabels: + app: snops + +resources: + - controlplane-deployment.yaml + - controlplane-service.yaml + - agents-deployment.yaml + - compute-deployment.yaml From 4890c14f502e2c89a8b43b1990c90baebe80fac8 Mon Sep 17 00:00:00 2001 From: Meshiest Date: Wed, 11 Dec 2024 18:06:51 -0500 Subject: [PATCH 11/24] chore: update cli version --- Cargo.lock | 2 +- crates/cli/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d1c19185..b46b0a01 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4498,7 +4498,7 @@ dependencies = [ [[package]] name = "snops-cli" -version = "0.1.0" +version = "0.2.0" dependencies = [ "anyhow", "clap", diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index 0f0e25a6..cd383c17 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "snops-cli" -version = "0.1.0" +version = "0.2.0" edition = "2021" license = "MIT" description = "CLI for interacting with snarkops control plane API" From 718eabc63c928642cc85609e6e1cb50575b64467 Mon Sep 17 00:00:00 2001 From: Meshiest Date: Wed, 11 Dec 2024 18:07:26 -0500 Subject: [PATCH 12/24] refactor(cli): rename autocomplete to completion, like most CLIs --- crates/cli/src/commands/mod.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/crates/cli/src/commands/mod.rs b/crates/cli/src/commands/mod.rs index d586d8a3..8678506f 100644 --- a/crates/cli/src/commands/mod.rs +++ b/crates/cli/src/commands/mod.rs @@ -16,9 +16,12 @@ mod spec; pub enum Commands { /// Generate shell completions. #[command(arg_required_else_help = true)] - Autocomplete { + Completion { /// Which shell you want to generate completions for. shell: clap_complete::Shell, + /// Rename the command in the completions. + #[clap(long)] + rename: Option, }, #[clap(alias = "a")] Agent(agent::Agent), @@ -47,9 +50,9 @@ impl Commands { let client = reqwest::Client::new(); let response = match self { - Commands::Autocomplete { shell } => { + Commands::Completion { shell, rename } => { let mut cmd = Cli::command(); - let cmd_name = cmd.get_name().to_string(); + let cmd_name = rename.unwrap_or_else(|| cmd.get_name().to_string()); clap_complete::generate(shell, &mut cmd, cmd_name, &mut std::io::stdout()); return Ok(()); From 827f693be5619c54bc55fdeffedc4dab2b593908 Mon Sep 17 00:00:00 2001 From: Meshiest Date: Wed, 11 Dec 2024 19:56:05 -0500 Subject: [PATCH 13/24] feat(agent,controlplane): add kubernetes ready and liveliness service endpoints --- crates/agent/src/cli.rs | 4 ++ crates/agent/src/main.rs | 24 ++++++++++++ crates/agent/src/service.rs | 54 +++++++++++++++++++++++++++ crates/controlplane/src/server/api.rs | 2 + devops/agent-entrypoint.sh | 2 +- devops/k8s/agents-deployment.yaml | 13 +++++++ devops/k8s/compute-deployment.yaml | 14 +++++++ 7 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 crates/agent/src/service.rs diff --git a/crates/agent/src/cli.rs b/crates/agent/src/cli.rs index 50784173..88ea4534 100644 --- a/crates/agent/src/cli.rs +++ b/crates/agent/src/cli.rs @@ -53,6 +53,10 @@ pub struct Cli { #[clap(long = "bind", env = "SNOPS_AGENT_HOST", default_value_t = IpAddr::V4(Ipv4Addr::UNSPECIFIED))] pub bind_addr: IpAddr, + /// Port for the agent to listen on for readiness and liveness checks + #[clap(long, env = "SNOPS_AGENT_HEALTH_PORT")] + pub service_port: Option, + #[clap(flatten)] pub ports: PortConfig, diff --git a/crates/agent/src/main.rs b/crates/agent/src/main.rs index ef1b0705..ffa33846 100644 --- a/crates/agent/src/main.rs +++ b/crates/agent/src/main.rs @@ -7,6 +7,7 @@ mod net; mod reconcile; mod rpc; mod server; +mod service; mod state; mod transfers; @@ -72,6 +73,17 @@ async fn main() { let agent_rpc_listener = tokio::net::TcpListener::bind((Ipv4Addr::LOCALHOST, 0)) .await .expect("failed to bind status server"); + + // Setup the status server socket + let agent_service_listener = if let Some(service_port) = args.service_port { + Some( + tokio::net::TcpListener::bind((Ipv4Addr::UNSPECIFIED, service_port)) + .await + .expect("failed to bind status server"), + ) + } else { + None + }; let agent_rpc_port = agent_rpc_listener .local_addr() .expect("failed to get status server port") @@ -137,6 +149,18 @@ async fn main() { } }); + // Start the status server if enabled + if let Some(listener) = agent_service_listener { + let service_state = Arc::clone(&state); + tokio::spawn(async move { + info!("Starting service API server on port {agent_rpc_port}"); + if let Err(e) = service::start(listener, service_state).await { + error!("service API server crashed: {e:?}"); + std::process::exit(1); + } + }); + } + // Get the interrupt signals to break the stream connection let mut interrupt = Signals::term_or_interrupt(); diff --git a/crates/agent/src/service.rs b/crates/agent/src/service.rs new file mode 100644 index 00000000..8f0558f3 --- /dev/null +++ b/crates/agent/src/service.rs @@ -0,0 +1,54 @@ +use std::sync::Arc; + +use anyhow::Result; +use axum::{ + extract::State, + response::{IntoResponse, Response}, + routing::get, + Json, Router, +}; +use http::StatusCode; +use serde_json::json; +use snops_common::state::AgentState; +use tracing::info; + +use crate::state::AppState; + +pub async fn start(listener: tokio::net::TcpListener, state: AppState) -> Result<()> { + let app = Router::new() + .route("/readyz", get(|| async { Json(json!({ "status": "ok" })) })) + .route("/livez", get(livez)) + .with_state(Arc::clone(&state)); + info!("Starting service API on: {}", listener.local_addr()?); + + axum::serve(listener, app).await?; + + Ok(()) +} + +async fn livez(State(state): State) -> Response { + // If the node is configured to be online, but is not online, return an error + match state.get_agent_state().await.as_ref() { + AgentState::Node(_, node) if node.online && !state.is_node_online() => { + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(json!({ + "status": "node offline", + "node_status": state.get_node_status().await, + })), + ) + .into_response() + } + _ => {} + } + + if !state.is_ws_online() { + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(json!({ "status": "controlplane offline" })), + ) + .into_response(); + } + + Json(json!({ "status": "ok" })).into_response() +} diff --git a/crates/controlplane/src/server/api.rs b/crates/controlplane/src/server/api.rs index ea5525df..d12b19f8 100644 --- a/crates/controlplane/src/server/api.rs +++ b/crates/controlplane/src/server/api.rs @@ -49,6 +49,8 @@ macro_rules! unwrap_or_bad_request { pub(super) fn routes() -> Router { Router::new() + .route("/readyz", get(|| async { Json(json!({ "status": "ok" })) })) + .route("/livez", get(|| async { Json(json!({ "status": "ok" })) })) .route("/events", get(event_ws::event_ws_handler)) .route("/log/:level", post(set_log_level)) .route("/agents", get(get_agents)) diff --git a/devops/agent-entrypoint.sh b/devops/agent-entrypoint.sh index ceb31875..adcc28a2 100644 --- a/devops/agent-entrypoint.sh +++ b/devops/agent-entrypoint.sh @@ -32,7 +32,7 @@ sleep 1 while true; do download_agent - $AGENT_BIN + $AGENT_BIN --labels "k8s,$AGENT_LABELS" echo "Agent exited, restarting in 5 seconds..." sleep 5 done diff --git a/devops/k8s/agents-deployment.yaml b/devops/k8s/agents-deployment.yaml index 3d2e07e7..6dcc9730 100644 --- a/devops/k8s/agents-deployment.yaml +++ b/devops/k8s/agents-deployment.yaml @@ -30,10 +30,23 @@ spec: value: "true" - name: SNOPS_AGENT_CLIENT value: "true" + - name: AGENT_LABELS + value: "local" + - name: SNOPS_AGENT_HEALTH_PORT + value: "8080" ports: - containerPort: 5000 - containerPort: 4130 - containerPort: 3030 + - containerPort: 8080 + livenessProbe: + httpGet: + path: /livez + port: 8080 + readinessProbe: + httpGet: + path: /readyz + port: 8080 resources: limits: cpu: "1" diff --git a/devops/k8s/compute-deployment.yaml b/devops/k8s/compute-deployment.yaml index b838915e..a995013b 100644 --- a/devops/k8s/compute-deployment.yaml +++ b/devops/k8s/compute-deployment.yaml @@ -30,6 +30,20 @@ spec: value: "true" - name: SNOPS_AGENT_EPHEMERAL value: "true" + - name: AGENT_LABELS + value: "compute" + - name: SNOPS_AGENT_HEALTH_PORT + value: "8080" + ports: + - containerPort: 8080 + livenessProbe: + httpGet: + path: /livez + port: 8080 + readinessProbe: + httpGet: + path: /readyz + port: 8080 resources: limits: cpu: 1000m From c1abb741c15c2a30486e6bc17974a7cdb1e2ca97 Mon Sep 17 00:00:00 2001 From: Meshiest Date: Wed, 11 Dec 2024 20:20:30 -0500 Subject: [PATCH 14/24] feat(devops): helm charts for deploying a controlplane and agents --- .gitignore | 7 +- crates/agent/src/cli.rs | 17 ++-- crates/xtask/src/main.rs | 11 ++- devops/README.md | 18 +++-- devops/helm/snops-agents/.helmignore | 23 ++++++ devops/helm/snops-agents/Chart.yaml | 7 ++ .../helm/snops-agents/templates/_helpers.tpl | 62 +++++++++++++++ .../snops-agents/templates/deployment.yaml | 77 +++++++++++++++++++ devops/helm/snops-agents/values.yaml | 37 +++++++++ devops/helm/snops-compute/.helmignore | 23 ++++++ devops/helm/snops-compute/Chart.yaml | 7 ++ .../helm/snops-compute/templates/_helpers.tpl | 62 +++++++++++++++ .../snops-compute/templates/deployment.yaml | 74 ++++++++++++++++++ .../snops-compute/templates/hpa.yaml.disabled | 32 ++++++++ devops/helm/snops-compute/values.yaml | 43 +++++++++++ devops/helm/snops-controlplane/.helmignore | 23 ++++++ devops/helm/snops-controlplane/Chart.yaml | 7 ++ .../snops-controlplane/templates/_helpers.tpl | 62 +++++++++++++++ .../templates/deployment.yaml | 72 +++++++++++++++++ .../snops-controlplane/templates/service.yaml | 16 ++++ devops/helm/snops-controlplane/values.yaml | 41 ++++++++++ devops/helm/snops/.helmignore | 23 ++++++ devops/helm/snops/Chart.yaml | 24 ++++++ devops/helm/snops/templates/NOTES.txt | 17 ++++ devops/helm/snops/templates/_helpers.tpl | 62 +++++++++++++++ .../templates/tests/test-connection.yaml | 15 ++++ devops/helm/snops/values.yaml | 76 ++++++++++++++++++ devops/k8s/agents-deployment.yaml | 53 ------------- devops/k8s/agents-replicas.yaml | 6 -- devops/k8s/compute-deployment.yaml | 67 ---------------- devops/k8s/controlplane-deployment.yaml | 43 ----------- devops/k8s/controlplane-service.yaml | 13 ---- devops/k8s/kustomization.yaml | 11 --- 33 files changed, 918 insertions(+), 213 deletions(-) create mode 100644 devops/helm/snops-agents/.helmignore create mode 100644 devops/helm/snops-agents/Chart.yaml create mode 100644 devops/helm/snops-agents/templates/_helpers.tpl create mode 100644 devops/helm/snops-agents/templates/deployment.yaml create mode 100644 devops/helm/snops-agents/values.yaml create mode 100644 devops/helm/snops-compute/.helmignore create mode 100644 devops/helm/snops-compute/Chart.yaml create mode 100644 devops/helm/snops-compute/templates/_helpers.tpl create mode 100644 devops/helm/snops-compute/templates/deployment.yaml create mode 100644 devops/helm/snops-compute/templates/hpa.yaml.disabled create mode 100644 devops/helm/snops-compute/values.yaml create mode 100644 devops/helm/snops-controlplane/.helmignore create mode 100644 devops/helm/snops-controlplane/Chart.yaml create mode 100644 devops/helm/snops-controlplane/templates/_helpers.tpl create mode 100644 devops/helm/snops-controlplane/templates/deployment.yaml create mode 100644 devops/helm/snops-controlplane/templates/service.yaml create mode 100644 devops/helm/snops-controlplane/values.yaml create mode 100644 devops/helm/snops/.helmignore create mode 100644 devops/helm/snops/Chart.yaml create mode 100644 devops/helm/snops/templates/NOTES.txt create mode 100644 devops/helm/snops/templates/_helpers.tpl create mode 100644 devops/helm/snops/templates/tests/test-connection.yaml create mode 100644 devops/helm/snops/values.yaml delete mode 100644 devops/k8s/agents-deployment.yaml delete mode 100644 devops/k8s/agents-replicas.yaml delete mode 100644 devops/k8s/compute-deployment.yaml delete mode 100644 devops/k8s/controlplane-deployment.yaml delete mode 100644 devops/k8s/controlplane-service.yaml delete mode 100644 devops/k8s/kustomization.yaml diff --git a/.gitignore b/.gitignore index 9578c1e2..dc5586ca 100644 --- a/.gitignore +++ b/.gitignore @@ -16,4 +16,9 @@ book # report files report.html -report.json \ No newline at end of file +report.json + +# Helm chart files +*.tgz +*.tar.gz +Chart.lock diff --git a/crates/agent/src/cli.rs b/crates/agent/src/cli.rs index 88ea4534..9a0d8767 100644 --- a/crates/agent/src/cli.rs +++ b/crates/agent/src/cli.rs @@ -145,16 +145,13 @@ impl Cli { // add &labels= if id is present if let Some(labels) = &self.labels { - info!("Using labels: {:?}", labels); - qs.append_pair( - "labels", - &labels - .iter() - .filter(|s| !s.is_empty()) - .map(|s| s.trim()) - .collect::>() - .join(","), - ); + let label_vec = labels + .iter() + .filter(|s| !s.is_empty()) + .map(|s| s.trim()) + .collect::>(); + info!("Using labels: {label_vec:?}"); + qs.append_pair("labels", &label_vec.join(",")); } let (is_tls, host) = endpoint diff --git a/crates/xtask/src/main.rs b/crates/xtask/src/main.rs index ad584ce3..715915bb 100644 --- a/crates/xtask/src/main.rs +++ b/crates/xtask/src/main.rs @@ -227,12 +227,15 @@ impl Command { Command::Build(build) => build.run(sh), Command::Dev { target } => dev(sh, target), Command::Containers => { - cmd!(sh, "docker build -t snops . -f ./devops/snops.Dockerfile") - .run() - .context("Building snops container")?; cmd!( sh, - "docker build -t snops-agent . -f ./devops/agent.Dockerfile" + "docker build -t snops:latest . -f ./devops/snops.Dockerfile" + ) + .run() + .context("Building snops container")?; + cmd!( + sh, + "docker build -t snops-agent:latest . -f ./devops/agent.Dockerfile" ) .run() .context("Building snops-agent container") diff --git a/devops/README.md b/devops/README.md index 84d24364..da4c9995 100644 --- a/devops/README.md +++ b/devops/README.md @@ -6,6 +6,7 @@ 1. [Docker](https://www.docker.com/) 1. [`kind`](https://kind.sigs.k8s.io/) (Kubernetes in docker) +1. [`helm`](https://helm.sh/) (Kubernetes package manager) ### Start Environment @@ -24,13 +25,14 @@ 3. Load locally built containers into `kind` ```bash - kind load docker-image snops snops-agent + kind load docker-image snops:latest snops-agent:latest ``` 4. Install snops and agents into the Kubernetes cluster ```bash - kubectl --context kind-kind apply -k devops/k8s + helm dependencies build ./devops/helm/snops + helm --kube-context kind-kind install snops ./devops/helm/snops ``` 5. Deploy snarkops environment @@ -41,7 +43,13 @@ 6. Execute a transaction ```bash - kubectl --context kind-kind exec -it deployments/snops-controlplane -- scli env action execute transfer_public example.aleo 1u64 + kubectl --context kind-kind exec -it deployments/snops-controlplane -- scli env action execute transfer_public example.aleo 123u64 + ``` + +7. Verify a balance + + ```bash + kubectl --context kind-kind exec -it deployments/snops-controlplane -- scli env balance example.aleo ``` ### Teardown @@ -49,13 +57,13 @@ 1. Delete snarkops environment ```bash - kubectl --context kind-kind delete -k devops/k8s + kubectl --context kind-kind exec -it deployments/snops-controlplane -- scli env apply delete ``` 1. Uninstall snops from the Kubernetes cluster ```bash - cat devops/snops.*.yaml | kubectl --context kind-kind delete -f - + helm --kube-context kind-kind uninstall snops ``` 1. Delete `kind` cluster diff --git a/devops/helm/snops-agents/.helmignore b/devops/helm/snops-agents/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/devops/helm/snops-agents/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/devops/helm/snops-agents/Chart.yaml b/devops/helm/snops-agents/Chart.yaml new file mode 100644 index 00000000..2bbc51e2 --- /dev/null +++ b/devops/helm/snops-agents/Chart.yaml @@ -0,0 +1,7 @@ +apiVersion: v2 +name: snops-agents +description: A Helm chart for deploying snarkops agents +type: application +version: 0.0.0 + +appVersion: "latest" diff --git a/devops/helm/snops-agents/templates/_helpers.tpl b/devops/helm/snops-agents/templates/_helpers.tpl new file mode 100644 index 00000000..ba04c300 --- /dev/null +++ b/devops/helm/snops-agents/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "helm.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "helm.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "helm.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "helm.labels" -}} +helm.sh/chart: {{ include "helm.chart" . }} +{{ include "helm.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "helm.selectorLabels" -}} +app.kubernetes.io/name: {{ include "helm.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "helm.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "helm.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/devops/helm/snops-agents/templates/deployment.yaml b/devops/helm/snops-agents/templates/deployment.yaml new file mode 100644 index 00000000..5f6501cd --- /dev/null +++ b/devops/helm/snops-agents/templates/deployment.yaml @@ -0,0 +1,77 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "helm.fullname" . }} + labels: + {{- include "helm.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicas }} + selector: + matchLabels: + {{- include "helm.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "helm.labels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ .Chart.Name }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + env: + - name: SNOPS_ENDPOINT + value: http://{{ .Values.controlplaneService.name }}:{{ .Values.controlplaneService.port }} + - name: SNOPS_AGENT_ID + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: SNOPS_AGENT_VALIDATOR + value: "true" + - name: SNOPS_AGENT_CLIENT + value: "true" + - name: AGENT_LABELS + value: "{{ .Values.agentLabels }}" + - name: SNOPS_AGENT_HEALTH_PORT + value: "8080" + ports: + - containerPort: 5000 + - containerPort: 4130 + - containerPort: 3030 + - containerPort: 8080 + livenessProbe: + httpGet: + path: /livez + port: 8080 + readinessProbe: + httpGet: + path: /readyz + port: 8080 + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- with .Values.volumeMounts }} + volumeMounts: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.volumes }} + volumes: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/devops/helm/snops-agents/values.yaml b/devops/helm/snops-agents/values.yaml new file mode 100644 index 00000000..dc94976a --- /dev/null +++ b/devops/helm/snops-agents/values.yaml @@ -0,0 +1,37 @@ +imagePullSecrets: [] + +podAnnotations: {} +podLabels: {} + +# Comma separated list of labels to add to the agent CLI +agentLabels: "" + +# Must be the same as the controlplane service name and port +controlplaneService: + name: snops-service + port: 1234 + +# Configurations for the agent pods +image: + repository: snops-agent + # Overrides the image tag whose default is the chart appVersion. + tag: "" + pullPolicy: IfNotPresent + +replicas: 4 + +resources: {} +# limits: +# cpu: 500m +# memory: 2Gi +# requests: +# cpu: 500m +# memory: 2Gi + +volumes: [] +nodeSelector: {} +affinity: {} +volumeMounts: [] + +nameOverride: "" +fullnameOverride: "" diff --git a/devops/helm/snops-compute/.helmignore b/devops/helm/snops-compute/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/devops/helm/snops-compute/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/devops/helm/snops-compute/Chart.yaml b/devops/helm/snops-compute/Chart.yaml new file mode 100644 index 00000000..9ca995cd --- /dev/null +++ b/devops/helm/snops-compute/Chart.yaml @@ -0,0 +1,7 @@ +apiVersion: v2 +name: snops-compute +description: A Helm chart for deploying snarkops compute agents +type: application +version: 0.0.0 + +appVersion: "latest" diff --git a/devops/helm/snops-compute/templates/_helpers.tpl b/devops/helm/snops-compute/templates/_helpers.tpl new file mode 100644 index 00000000..ba04c300 --- /dev/null +++ b/devops/helm/snops-compute/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "helm.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "helm.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "helm.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "helm.labels" -}} +helm.sh/chart: {{ include "helm.chart" . }} +{{ include "helm.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "helm.selectorLabels" -}} +app.kubernetes.io/name: {{ include "helm.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "helm.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "helm.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/devops/helm/snops-compute/templates/deployment.yaml b/devops/helm/snops-compute/templates/deployment.yaml new file mode 100644 index 00000000..94b40804 --- /dev/null +++ b/devops/helm/snops-compute/templates/deployment.yaml @@ -0,0 +1,74 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "helm.fullname" . }} + labels: + {{- include "helm.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicas }} + selector: + matchLabels: + {{- include "helm.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "helm.labels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ .Chart.Name }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + env: + - name: SNOPS_ENDPOINT + value: http://{{ .Values.controlplaneService.name }}:{{ .Values.controlplaneService.port }} + - name: SNOPS_AGENT_ID + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: SNOPS_AGENT_COMPUTE + value: "true" + - name: SNOPS_AGENT_EPHEMERAL + value: "true" + - name: AGENT_LABELS + value: "{{ .Values.agentLabels }}" + - name: SNOPS_AGENT_HEALTH_PORT + value: "8080" + ports: + - containerPort: 8080 + livenessProbe: + httpGet: + path: /livez + port: 8080 + readinessProbe: + httpGet: + path: /readyz + port: 8080 + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- with .Values.volumeMounts }} + volumeMounts: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.volumes }} + volumes: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/devops/helm/snops-compute/templates/hpa.yaml.disabled b/devops/helm/snops-compute/templates/hpa.yaml.disabled new file mode 100644 index 00000000..28c087ea --- /dev/null +++ b/devops/helm/snops-compute/templates/hpa.yaml.disabled @@ -0,0 +1,32 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "helm.fullname" . }} + labels: + {{- include "helm.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "helm.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/devops/helm/snops-compute/values.yaml b/devops/helm/snops-compute/values.yaml new file mode 100644 index 00000000..4414ad55 --- /dev/null +++ b/devops/helm/snops-compute/values.yaml @@ -0,0 +1,43 @@ +imagePullSecrets: [] + +podAnnotations: {} +podLabels: {} + +# Comma separated list of labels to add to the compute agent CLI +agentLabels: "" + +# Must be the same as the controlplane service name and port +controlplaneService: + name: snops-service + port: 1234 + +# Configurations for the agent pods +image: + repository: snops-agent + # Overrides the image tag whose default is the chart appVersion. + tag: "" + pullPolicy: IfNotPresent + +replicas: 1 + +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 100 + targetCPUUtilizationPercentage: 80 + +resources: {} +# limits: +# cpu: 1000m +# memory: 2Gi +# requests: +# cpu: 1000m +# memory: 2Gi + +volumes: [] +nodeSelector: {} +affinity: {} +volumeMounts: [] + +nameOverride: "" +fullnameOverride: "" diff --git a/devops/helm/snops-controlplane/.helmignore b/devops/helm/snops-controlplane/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/devops/helm/snops-controlplane/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/devops/helm/snops-controlplane/Chart.yaml b/devops/helm/snops-controlplane/Chart.yaml new file mode 100644 index 00000000..59bc0185 --- /dev/null +++ b/devops/helm/snops-controlplane/Chart.yaml @@ -0,0 +1,7 @@ +apiVersion: v2 +name: snops-controlplane +description: A Helm chart for deploying the snarkops control plane +type: application +version: 0.0.0 + +appVersion: "latest" diff --git a/devops/helm/snops-controlplane/templates/_helpers.tpl b/devops/helm/snops-controlplane/templates/_helpers.tpl new file mode 100644 index 00000000..ba04c300 --- /dev/null +++ b/devops/helm/snops-controlplane/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "helm.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "helm.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "helm.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "helm.labels" -}} +helm.sh/chart: {{ include "helm.chart" . }} +{{ include "helm.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "helm.selectorLabels" -}} +app.kubernetes.io/name: {{ include "helm.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "helm.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "helm.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/devops/helm/snops-controlplane/templates/deployment.yaml b/devops/helm/snops-controlplane/templates/deployment.yaml new file mode 100644 index 00000000..0416164c --- /dev/null +++ b/devops/helm/snops-controlplane/templates/deployment.yaml @@ -0,0 +1,72 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "helm.fullname" . }} + labels: + snops: controlplane + {{- include "helm.labels" . | nindent 4 }} +spec: + selector: + matchLabels: + snops: controlplane + {{- include "helm.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + snops: controlplane + {{- include "helm.labels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ .Chart.Name }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + env: + - name: SNOPS_DATA_DIR + value: /usr/share/snops + - name: SNOPS_PORT + value: "1234" + - name: AOT_BIN + value: /etc/snops/snarkos-aot + - name: AGENT_BIN + value: /etc/snops/snops-agent + ports: + - name: http + containerPort: {{ .Values.service.port }} + protocol: TCP + livenessProbe: + httpGet: + path: /api/v1/livez + port: {{ .Values.service.port }} + readinessProbe: + httpGet: + path: /api/v1/readyz + port: {{ .Values.service.port }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- with .Values.volumeMounts }} + volumeMounts: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.volumes }} + volumes: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/devops/helm/snops-controlplane/templates/service.yaml b/devops/helm/snops-controlplane/templates/service.yaml new file mode 100644 index 00000000..e9790921 --- /dev/null +++ b/devops/helm/snops-controlplane/templates/service.yaml @@ -0,0 +1,16 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ .Values.service.name }} + labels: + {{- include "helm.labels" . | nindent 4 }} +spec: + selector: + snops: controlplane + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: {{ .Values.service.port }} + protocol: TCP + name: http diff --git a/devops/helm/snops-controlplane/values.yaml b/devops/helm/snops-controlplane/values.yaml new file mode 100644 index 00000000..b82d01e3 --- /dev/null +++ b/devops/helm/snops-controlplane/values.yaml @@ -0,0 +1,41 @@ +# Default values for helm. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +imagePullSecrets: [] + +podAnnotations: {} +podLabels: {} + +service: + # referenced by agents + name: snops-service + type: ClusterIP # or NodePort + port: 1234 + +image: + repository: snops + # Overrides the image tag whose default is the chart appVersion. + tag: "" + pullPolicy: IfNotPresent + +resources: {} +# limits: +# cpu: 200m +# memory: 500Mi +# requests: +# cpu: 200m +# memory: 500Mi + +volumes: [] +nodeSelector: {} +affinity: {} + +# Additional volumeMounts on the output Deployment definition. +volumeMounts: [] +# - name: foo +# mountPath: "/etc/foo" +# readOnly: true + +nameOverride: "" +fullnameOverride: "" diff --git a/devops/helm/snops/.helmignore b/devops/helm/snops/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/devops/helm/snops/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/devops/helm/snops/Chart.yaml b/devops/helm/snops/Chart.yaml new file mode 100644 index 00000000..0ad24a0d --- /dev/null +++ b/devops/helm/snops/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: snarkops +description: A Helm chart for deploying snarkops along with some agents +type: application +version: 0.0.0 + +appVersion: "latest" + +dependencies: + - alias: controlplane + name: snops-controlplane + enabled: true + version: "0.0.0" + repository: file://../snops-controlplane + - alias: agents + name: snops-agents + enabled: true + version: "0.0.0" + repository: file://../snops-agents + - alias: compute + name: snops-compute + enabled: true + version: "0.0.0" + repository: file://../snops-compute diff --git a/devops/helm/snops/templates/NOTES.txt b/devops/helm/snops/templates/NOTES.txt new file mode 100644 index 00000000..d0751ee9 --- /dev/null +++ b/devops/helm/snops/templates/NOTES.txt @@ -0,0 +1,17 @@ +1. Get the application URL by running these commands: + +{{- if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "helm.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "helm.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "helm.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "helm.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + echo "Visit http://127.0.0.1:8080 to use your application" + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT +{{- end }} diff --git a/devops/helm/snops/templates/_helpers.tpl b/devops/helm/snops/templates/_helpers.tpl new file mode 100644 index 00000000..ba04c300 --- /dev/null +++ b/devops/helm/snops/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "helm.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "helm.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "helm.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "helm.labels" -}} +helm.sh/chart: {{ include "helm.chart" . }} +{{ include "helm.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "helm.selectorLabels" -}} +app.kubernetes.io/name: {{ include "helm.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "helm.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "helm.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/devops/helm/snops/templates/tests/test-connection.yaml b/devops/helm/snops/templates/tests/test-connection.yaml new file mode 100644 index 00000000..bf1c65f2 --- /dev/null +++ b/devops/helm/snops/templates/tests/test-connection.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "helm.fullname" . }}-test-connection" + labels: + {{- include "helm.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test +spec: + containers: + - name: wget + image: busybox + command: ['wget'] + args: ['{{ include "helm.fullname" . }}:{{ .Values.service.port }}'] + restartPolicy: Never diff --git a/devops/helm/snops/values.yaml b/devops/helm/snops/values.yaml new file mode 100644 index 00000000..83cee5c8 --- /dev/null +++ b/devops/helm/snops/values.yaml @@ -0,0 +1,76 @@ +service: &service + type: ClusterIP # or NodePort (LoadBalancer is not supported yet) + name: snops-service + port: 1234 + +image: &image + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "" + +imagePullSecrets: &imagePullSecrets [] +podAnnotations: &podAnnotations {} +podLabels: &podLabels {} + +controlplane: + image: *image + service: *service + + resources: {} + # limits: + # cpu: 200m + # memory: 500Mi + # requests: + # cpu: 200m + # memory: 500Mi + + imagePullSecrets: *imagePullSecrets + podAnnotations: *podAnnotations + podLabels: *podLabels + + volumes: + - name: controlplane-data + emptyDir: {} + + volumeMounts: + - name: controlplane-data + mountPath: /usr/share/snops + +agents: + image: *image + service: *service + replicas: 4 + + resources: {} + # limits: + # cpu: 500m + # memory: 2Gi + # requests: + # cpu: 500m + # memory: 2Gi + + imagePullSecrets: *imagePullSecrets + podAnnotations: *podAnnotations + podLabels: *podLabels + +compute: + image: *image + service: *service + replicas: 1 + autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 100 + targetCPUUtilizationPercentage: 80 + + resources: {} + # limits: + # cpu: 1000m + # memory: 2Gi + # requests: + # cpu: 1000m + # memory: 2Gi + + imagePullSecrets: *imagePullSecrets + podAnnotations: *podAnnotations + podLabels: *podLabels diff --git a/devops/k8s/agents-deployment.yaml b/devops/k8s/agents-deployment.yaml deleted file mode 100644 index 6dcc9730..00000000 --- a/devops/k8s/agents-deployment.yaml +++ /dev/null @@ -1,53 +0,0 @@ ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: snops-agents - labels: - app: snops-agent -spec: - replicas: 4 - selector: - matchLabels: - app: snops-agent - template: - metadata: - labels: - app: snops-agent - spec: - containers: - - name: snops-agent - image: snops-agent - imagePullPolicy: IfNotPresent - env: - - name: SNOPS_ENDPOINT - value: http://snops-service:1234 - - name: SNOPS_AGENT_ID - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: SNOPS_AGENT_VALIDATOR - value: "true" - - name: SNOPS_AGENT_CLIENT - value: "true" - - name: AGENT_LABELS - value: "local" - - name: SNOPS_AGENT_HEALTH_PORT - value: "8080" - ports: - - containerPort: 5000 - - containerPort: 4130 - - containerPort: 3030 - - containerPort: 8080 - livenessProbe: - httpGet: - path: /livez - port: 8080 - readinessProbe: - httpGet: - path: /readyz - port: 8080 - resources: - limits: - cpu: "1" - memory: "2Gi" diff --git a/devops/k8s/agents-replicas.yaml b/devops/k8s/agents-replicas.yaml deleted file mode 100644 index 18157093..00000000 --- a/devops/k8s/agents-replicas.yaml +++ /dev/null @@ -1,6 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -replicas: - - name: snops-agents - count: 4 diff --git a/devops/k8s/compute-deployment.yaml b/devops/k8s/compute-deployment.yaml deleted file mode 100644 index a995013b..00000000 --- a/devops/k8s/compute-deployment.yaml +++ /dev/null @@ -1,67 +0,0 @@ ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: snops-compute - labels: - app: snops-compute -spec: - replicas: 1 - selector: - matchLabels: - app: snops-compute - template: - metadata: - labels: - app: snops-compute - spec: - containers: - - name: snops-compute - image: snops-agent - imagePullPolicy: IfNotPresent - env: - - name: SNOPS_ENDPOINT - value: http://snops-service:1234 - - name: SNOPS_AGENT_ID - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: SNOPS_AGENT_COMPUTE - value: "true" - - name: SNOPS_AGENT_EPHEMERAL - value: "true" - - name: AGENT_LABELS - value: "compute" - - name: SNOPS_AGENT_HEALTH_PORT - value: "8080" - ports: - - containerPort: 8080 - livenessProbe: - httpGet: - path: /livez - port: 8080 - readinessProbe: - httpGet: - path: /readyz - port: 8080 - resources: - limits: - cpu: 1000m - memory: "2Gi" - requests: - cpu: 1000m -# TODO: figure out autoscaling -# May need to apply the metrics-server: https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml -# --- -# apiVersion: autoscaling/v1 -# kind: HorizontalPodAutoscaler -# metadata: -# name: snops-compute -# spec: -# scaleTargetRef: -# apiVersion: apps/v1 -# kind: Deployment -# name: snops-compute -# minReplicas: 1 -# maxReplicas: 10 -# targetCPUUtilizationPercentage: 50 diff --git a/devops/k8s/controlplane-deployment.yaml b/devops/k8s/controlplane-deployment.yaml deleted file mode 100644 index 64c84c2e..00000000 --- a/devops/k8s/controlplane-deployment.yaml +++ /dev/null @@ -1,43 +0,0 @@ ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: snops-controlplane - labels: - app: snops-controlplane -spec: - replicas: 1 - selector: - matchLabels: - app: snops-controlplane - template: - metadata: - name: snops-controlplane - labels: - app: snops-controlplane - spec: - volumes: - - name: controlplane-data - emptyDir: {} - containers: - - name: snops-controlplane - image: snops - imagePullPolicy: Never #IfNotPresent - env: - - name: SNOPS_DATA_DIR - value: /usr/share/snops - - name: SNOPS_PORT - value: "1234" - - name: AOT_BIN - value: /etc/snops/snarkos-aot - - name: AGENT_BIN - value: /etc/snops/snops-agent - ports: - - containerPort: 1234 - volumeMounts: - - name: controlplane-data - mountPath: /usr/share/snops - resources: - limits: - cpu: "1" - memory: "2Gi" diff --git a/devops/k8s/controlplane-service.yaml b/devops/k8s/controlplane-service.yaml deleted file mode 100644 index 1a63fb76..00000000 --- a/devops/k8s/controlplane-service.yaml +++ /dev/null @@ -1,13 +0,0 @@ ---- -apiVersion: v1 -kind: Service -metadata: - name: snops-service -spec: - selector: - app: snops-controlplane - ports: - - name: http - protocol: TCP - port: 1234 - targetPort: 1234 diff --git a/devops/k8s/kustomization.yaml b/devops/k8s/kustomization.yaml deleted file mode 100644 index 3e4a95e6..00000000 --- a/devops/k8s/kustomization.yaml +++ /dev/null @@ -1,11 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -namespace: default -commonLabels: - app: snops - -resources: - - controlplane-deployment.yaml - - controlplane-service.yaml - - agents-deployment.yaml - - compute-deployment.yaml From 9599f8019412113627c2b9243502a6335f71cb4d Mon Sep 17 00:00:00 2001 From: Meshiest Date: Sat, 14 Dec 2024 15:31:56 -0500 Subject: [PATCH 15/24] chore(helm): doc/todo comments in helm chart valeus --- devops/helm/snops/values.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/devops/helm/snops/values.yaml b/devops/helm/snops/values.yaml index 83cee5c8..3fd78ac2 100644 --- a/devops/helm/snops/values.yaml +++ b/devops/helm/snops/values.yaml @@ -53,6 +53,12 @@ agents: podAnnotations: *podAnnotations podLabels: *podLabels + # TODO: support secrets storage + # TODO: support custom environment variables + + # Agents can also receive volumes, nodeSelector, and affinity. + # This is helpful if you want to run agents on specific nodes. + compute: image: *image service: *service @@ -74,3 +80,8 @@ compute: imagePullSecrets: *imagePullSecrets podAnnotations: *podAnnotations podLabels: *podLabels + + # Compute agents can also take volumes & volumeMounts (just like agents) + # as well as nodeSelector and affinity. + # This is so they can be configured to exist on specific nodes with GPU support, + # or other specific requirements. From 3ca249f47860c301d97cd62b27308ed9ffb8ca91 Mon Sep 17 00:00:00 2001 From: Meshiest Date: Sat, 14 Dec 2024 15:32:34 -0500 Subject: [PATCH 16/24] fix(controlplane): increase execute auth RPC timeout --- crates/controlplane/src/state/rpc.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/controlplane/src/state/rpc.rs b/crates/controlplane/src/state/rpc.rs index f55f82ae..95f4bcd5 100644 --- a/crates/controlplane/src/state/rpc.rs +++ b/crates/controlplane/src/state/rpc.rs @@ -50,7 +50,7 @@ impl AgentClient { auth: String, ) -> Result { let mut ctx = context::current(); - ctx.deadline += Duration::from_secs(30); + ctx.deadline += Duration::from_secs(60); Ok(self .0 .execute_authorization(ctx, env_id, network, query, auth) From 293c7d691cda8cbbe723c08b03498e0e98c7da60 Mon Sep 17 00:00:00 2001 From: Meshiest Date: Sat, 14 Dec 2024 17:35:17 -0500 Subject: [PATCH 17/24] refactor(controlplane): remove key-to-agent bimap, support optional agents in env nodes --- crates/cli/src/commands/env/mod.rs | 5 + crates/common/src/schema/nodes.rs | 11 +- crates/common/src/schema/persist/doc_node.rs | 6 +- crates/common/src/state/node_state.rs | 1 + crates/controlplane/src/env/mod.rs | 263 +++++++++--------- crates/controlplane/src/env/set.rs | 17 +- crates/controlplane/src/persist/env.rs | 64 +---- crates/controlplane/src/persist/node.rs | 88 +++--- crates/controlplane/src/server/api.rs | 60 ++-- .../controlplane/src/state/external_peers.rs | 16 +- crates/controlplane/src/state/global.rs | 2 +- 11 files changed, 253 insertions(+), 280 deletions(-) diff --git a/crates/cli/src/commands/env/mod.rs b/crates/cli/src/commands/env/mod.rs index 179e170a..12ba56ec 100644 --- a/crates/cli/src/commands/env/mod.rs +++ b/crates/cli/src/commands/env/mod.rs @@ -305,6 +305,11 @@ pub async fn post_and_wait(url: &str, req: RequestBuilder, env_id: EnvId) -> Res let mut node_map: HashMap = res.json().await?; println!("{}", serde_json::to_string_pretty(&node_map)?); + // No agents to wait for + if node_map.is_empty() { + return Ok(()); + } + let filter = node_map .values() .copied() diff --git a/crates/common/src/schema/nodes.rs b/crates/common/src/schema/nodes.rs index 0348d4cf..067e4a5e 100644 --- a/crates/common/src/schema/nodes.rs +++ b/crates/common/src/schema/nodes.rs @@ -33,11 +33,11 @@ pub struct NodesDocument { pub external: IndexMap, #[serde(default)] - pub nodes: IndexMap, + pub nodes: IndexMap, } impl NodesDocument { - pub fn expand_internal_replicas(&self) -> impl Iterator + '_ { + pub fn expand_internal_replicas(&self) -> impl Iterator + '_ { self.nodes.iter().flat_map(|(doc_node_key, doc_node)| { let num_replicas = doc_node.replicas.map(|r| r.get()).unwrap_or(1); @@ -172,10 +172,9 @@ where labels.serialize(serializer) } -// TODO: could use some more clarification on some of these fields -/// A node in the environment +/// A node in the environment spec #[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)] -pub struct Node { +pub struct NodeDoc { /// When true, the node will be started #[serde(default = "please_be_online")] pub online: bool, @@ -221,7 +220,7 @@ pub struct Node { pub binary: Option, } -impl Node { +impl NodeDoc { pub fn into_state(&self, node_key: NodeKey) -> NodeState { NodeState { node_key, diff --git a/crates/common/src/schema/persist/doc_node.rs b/crates/common/src/schema/persist/doc_node.rs index 99ef4799..a36b707f 100644 --- a/crates/common/src/schema/persist/doc_node.rs +++ b/crates/common/src/schema/persist/doc_node.rs @@ -2,7 +2,7 @@ use std::num::NonZeroUsize; use lasso::Spur; -use crate::schema::nodes::{ExternalNode, Node}; +use crate::schema::nodes::{ExternalNode, NodeDoc}; use crate::{ format::{ DataFormat, DataFormatReader, DataFormatWriter, DataHeaderOf, DataReadError, DataWriteError, @@ -90,7 +90,7 @@ impl DataFormat for NodeFormatHeader { } } -impl DataFormat for Node { +impl DataFormat for NodeDoc { type Header = NodeFormatHeader; const LATEST_HEADER: Self::Header = NodeFormatHeader { key_source: KeySource::LATEST_HEADER, @@ -136,7 +136,7 @@ impl DataFormat for Node { None }; - Ok(Node { + Ok(NodeDoc { online, replicas: replicas.and_then(NonZeroUsize::new), key, diff --git a/crates/common/src/state/node_state.rs b/crates/common/src/state/node_state.rs index 424e280f..67142e2b 100644 --- a/crates/common/src/state/node_state.rs +++ b/crates/common/src/state/node_state.rs @@ -168,6 +168,7 @@ impl KeyState { } } +/// Peers sent to the agent with resolved addresses or port numbers #[derive( Debug, Clone, Copy, serde::Serialize, serde::Deserialize, PartialEq, Eq, Hash, PartialOrd, Ord, )] diff --git a/crates/controlplane/src/env/mod.rs b/crates/controlplane/src/env/mod.rs index 58b36992..d3747fd2 100644 --- a/crates/controlplane/src/env/mod.rs +++ b/crates/controlplane/src/env/mod.rs @@ -1,4 +1,3 @@ -use core::fmt; use std::{ collections::{HashMap, HashSet}, sync::Arc, @@ -17,7 +16,7 @@ use snops_common::{ sink::TxSink, source::{ComputeTarget, QueryTarget, TxSource}, }, - nodes::{ExternalNode, Node}, + nodes::{ExternalNode, NodeDoc}, ItemDocument, }, state::{ @@ -47,9 +46,8 @@ pub struct Environment { pub storage: Arc, pub network: NetworkId, - // TODO: pub outcome_results: RwLock, - pub node_peers: BiMap, - pub node_states: DashMap, + // A map of nodes to their respective states + pub nodes: DashMap, /// Map of transaction files to their respective counters pub sinks: HashMap>, @@ -60,29 +58,14 @@ pub struct Environment { /// The effective test state of a node. #[derive(Debug, Clone, Serialize)] #[allow(clippy::large_enum_variant)] -pub enum EnvNodeState { - Internal(Node), +pub enum EnvNode { + Internal { + agent: Option, + node: NodeDoc, + }, External(ExternalNode), } -#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize)] -/// A way of looking up a peer in the test state. -/// Could technically use AgentPeer like this but it would have needless port -/// information -pub enum EnvPeer { - Internal(AgentId), - External(NodeKey), -} - -impl fmt::Display for EnvPeer { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - EnvPeer::Internal(id) => write!(f, "agent {id}"), - EnvPeer::External(k) => write!(f, "external node {k}"), - } - } -} - pub enum PortType { Node, Bft, @@ -105,13 +88,12 @@ impl Environment { let mut storage_doc = None; - let (mut node_peers, mut node_states) = if let Some(ref env) = prev_env { - // reuse certain elements from the previous environment with the same - // name - (env.node_peers.clone(), env.node_states.clone()) - } else { - (Default::default(), Default::default()) - }; + // reuse certain elements from the previous environment with the same + // name + let mut nodes = prev_env + .as_ref() + .map(|e| e.nodes.clone()) + .unwrap_or_default(); let mut network = NetworkId::default(); @@ -153,48 +135,73 @@ impl Environment { pending_cannons.insert(cannon.name, (cannon.source, cannon.sink)); } - ItemDocument::Nodes(nodes) => { - if let Some(n) = nodes.network { + ItemDocument::Nodes(nodes_doc) => { + if let Some(n) = nodes_doc.network { network = n; } // maps of states and peers that are new to this environment let mut incoming_states = IndexMap::default(); - let mut updated_states = IndexMap::::default(); + let mut updated_states = IndexMap::::default(); let mut incoming_peers = BiMap::default(); // set of resolved keys that will be present (new and old) let mut agent_keys = HashSet::new(); - for (node_key, node) in nodes.expand_internal_replicas() { + for (node_key, node) in nodes_doc.expand_internal_replicas() { // Track this node as a potential agent agent_keys.insert(node_key.clone()); // Skip delegating nodes that are already present in the node map // Agents are able to determine what updates need to be applied // based on their resolved node states. - if node_peers.contains_left(&node_key) { - info!("{env_id}: updating node {node_key}"); - updated_states.insert(node_key, EnvNodeState::Internal(node)); - continue; + match nodes.get(&node_key).as_deref() { + Some(EnvNode::Internal { agent, .. }) => { + info!("{env_id}: updating node {node_key}"); + updated_states.insert( + node_key.clone(), + EnvNode::Internal { + agent: *agent, + node, + }, + ); + continue; + } + Some(EnvNode::External(_)) => { + warn!("{env_id}: replacing ext {node_key} with internal node"); + updated_states.insert( + node_key.clone(), + EnvNode::Internal { agent: None, node }, + ); + continue; + } + None => {} } match incoming_states.entry(node_key) { Entry::Occupied(ent) => { Err(PrepareError::DuplicateNodeKey(ent.key().clone()))? } - Entry::Vacant(ent) => ent.insert(EnvNodeState::Internal(node)), + Entry::Vacant(ent) => { + ent.insert(EnvNode::Internal { agent: None, node }) + } }; } // list of nodes that will be removed after applying this document - let nodes_to_remove = node_peers + let nodes_to_remove = nodes .iter() - .filter_map(|(k, v)| match v { - EnvPeer::Internal(_) => (!agent_keys.contains(k)).then_some(k), - EnvPeer::External(_) => (!nodes.external.contains_key(k)).then_some(k), + .filter_map(|e| { + let key = e.key().clone(); + match e.value() { + EnvNode::Internal { .. } => { + (!agent_keys.contains(&key)).then_some(key) + } + EnvNode::External(_) => { + (!nodes_doc.external.contains_key(&key)).then_some(key) + } + } }) - .cloned() .collect::>(); // get a set of all labels the nodes can reference @@ -206,11 +213,15 @@ impl Environment { // list of agents that are now free because their nodes are no longer // going to be part of the environment - let mut removed_agents = node_peers + let mut removed_agents = nodes .iter() - .filter_map(|(key, mode)| { - if let (EnvPeer::Internal(agent), false) = - (mode, agent_keys.contains(key)) + .filter_map(|ent| { + if let ( + EnvNode::Internal { + agent: Some(agent), .. + }, + false, + ) = (ent.value(), agent_keys.contains(ent.key())) { Some(*agent) } else { @@ -247,7 +258,7 @@ impl Environment { } .map(|(key, id, busy)| { // extend the node map with the newly paired agent - incoming_peers.insert(key, EnvPeer::Internal(id)); + incoming_peers.insert(key, id); busy }) .collect(); @@ -256,46 +267,42 @@ impl Environment { "{env_id}: delegated {} nodes to agents", incoming_peers.len() ); - for (key, node) in &incoming_peers { - info!("node {key}: {node}"); + for (key, agent) in &incoming_peers { + // Insert the agent into the node map + if let Some(EnvNode::Internal { agent: v, .. }) = + incoming_states.get_mut(key) + { + info!("node {key}: {agent}"); + *v = Some(*agent); + } - // all re-allocated potentially removed agents are removed + // All re-allocated potentially removed agents are removed // from the agents that will need to be inventoried - match node { - EnvPeer::Internal(agent) if removed_agents.contains(agent) => { - removed_agents.swap_remove(agent); - } - _ => {} + if removed_agents.contains(agent) { + removed_agents.swap_remove(agent); } } - // all removed agents that were not recycled are pending inventory + // All removed agents that were not recycled are pending inventory agents_to_inventory.extend(removed_agents); // append external nodes to the node map - for (node_key, node) in &nodes.external { + for (node_key, node) in &nodes_doc.external { match incoming_states.entry(node_key.clone()) { Entry::Occupied(ent) => { Err(PrepareError::DuplicateNodeKey(ent.key().clone()))? } - Entry::Vacant(ent) => { - ent.insert(EnvNodeState::External(node.to_owned())) - } + Entry::Vacant(ent) => ent.insert(EnvNode::External(node.to_owned())), }; } - nodes.external.keys().for_each(|k| { - incoming_peers.insert(k.clone(), EnvPeer::External(k.clone())); - }); // remove the nodes that are no longer relevant nodes_to_remove.into_iter().for_each(|key| { - node_peers.remove_by_left(&key); - node_states.remove(&key); + nodes.remove(&key); }); - node_peers.extend(incoming_peers.into_iter()); - node_states.extend(incoming_states.into_iter()); - node_states.extend(updated_states.into_iter()); + nodes.extend(incoming_states.into_iter()); + nodes.extend(updated_states.into_iter()); } _ => warn!("ignored unimplemented document type"), @@ -347,8 +354,7 @@ impl Environment { id: env_id, storage, network, - node_peers, - node_states, + nodes, sinks, cannons, }); @@ -394,13 +400,13 @@ impl Environment { let mut pending_changes = vec![]; let mut node_map = HashMap::new(); - for entry in self.node_states.iter() { + for entry in self.nodes.iter() { let key = entry.key(); let node = entry.value(); - let EnvNodeState::Internal(node) = node else { + let EnvNode::Internal { agent, node } = node else { continue; }; - let Some(agent_id) = self.get_agent_by_key(key) else { + let Some(agent_id) = *agent else { continue; }; let Some(agent) = state.pool.get(&agent_id) else { @@ -471,14 +477,15 @@ impl Environment { state .update_agent_states( - env.node_peers - .right_values() + env.nodes + .iter() // find all agents associated with the env - .filter_map(|peer| match peer { - EnvPeer::Internal(id) => Some(*id), + .filter_map(|ent| match ent.value() { + EnvNode::Internal { + agent: Some(id), .. + } => Some((*id, AgentState::Inventory)), _ => None, }) - .map(|id| (id, AgentState::Inventory)) // this collect is necessary because the iter sent to reconcile_agents // must be owned by this thread. Without this, the iter would hold a reference // to the env.node_peers.right_values(), which is NOT Send @@ -491,17 +498,12 @@ impl Environment { /// Lookup a env agent id by node key. pub fn get_agent_by_key(&self, key: &NodeKey) -> Option { - self.node_peers.get_by_left(key).and_then(|id| match id { - EnvPeer::Internal(id) => Some(*id), - EnvPeer::External(_) => None, + self.nodes.get(key).and_then(|ent| match ent.value() { + EnvNode::Internal { agent, .. } => *agent, + _ => None, }) } - pub fn get_node_key_by_agent(&self, id: AgentId) -> Option<&NodeKey> { - let peer = EnvPeer::Internal(id); - self.node_peers.get_by_right(&peer) - } - pub fn matching_nodes<'a>( &'a self, targets: &'a NodeTargets, @@ -517,42 +519,34 @@ impl Environment { targets: &'a NodeTargets, pool: &'a DashMap, port_type: PortType, - ) -> impl Iterator + 'a { - self.node_peers + ) -> impl Iterator + 'a { + self.nodes .iter() - .filter(|(key, _)| targets.matches(key)) - .filter_map(move |(key, value)| match value { - EnvPeer::Internal(id) => { - let agent = pool.get(id)?; - - Some(( - key, - AgentPeer::Internal( - *id, - match port_type { - PortType::Bft => agent.bft_port(), - PortType::Node => agent.node_port(), - PortType::Rest => agent.rest_port(), - }, - ), - )) - } + .filter(|ent| targets.matches(ent.key())) + .filter_map(move |ent| { + Some(( + ent.key().clone(), + match ent.value() { + EnvNode::Internal { agent: id, .. } => { + let agent = id.and_then(|id| pool.get(&id))?; + + AgentPeer::Internal( + agent.id, + match port_type { + PortType::Bft => agent.bft_port(), + PortType::Node => agent.node_port(), + PortType::Rest => agent.rest_port(), + }, + ) + } - EnvPeer::External(_key) => { - let entry = self.node_states.get(key)?; - let EnvNodeState::External(external) = entry.value() else { - return None; - }; - - Some(( - key, - AgentPeer::External(match port_type { - PortType::Bft => external.bft?, - PortType::Node => external.node?, - PortType::Rest => external.rest?, + EnvNode::External(ext) => AgentPeer::External(match port_type { + PortType::Bft => ext.bft?, + PortType::Node => ext.node?, + PortType::Rest => ext.rest?, }), - )) - } + }, + )) }) } @@ -571,15 +565,15 @@ impl Environment { fn nodes_with_peer<'a>( &'a self, key: &'a NodeKey, - ) -> impl Iterator> { - self.node_states.iter().filter(move |s| { + ) -> impl Iterator> { + self.nodes.iter().filter(move |ent| { // Only internal nodes can be agents - let EnvNodeState::Internal(node) = s.value() else { + let EnvNode::Internal { node, .. } = ent.value() else { return false; }; // Ignore self-reference - if s.key() == key { + if ent.key() == key { return false; } @@ -595,13 +589,18 @@ impl Environment { is_port_change: bool, is_ip_change: bool, ) { - let Some(key) = self.get_node_key_by_agent(agent_id) else { + let Some(key) = state + .pool + .get(&agent_id) + .and_then(|a| a.node_key().cloned()) + else { return; }; + let pending_reconciles = self - .nodes_with_peer(key) + .nodes_with_peer(&key) .filter_map(|ent| { - let EnvNodeState::Internal(env_node) = ent.value() else { + let EnvNode::Internal { node: env_node, .. } = ent.value() else { return None; }; @@ -692,7 +691,7 @@ impl Environment { state: &GlobalState, id: AgentId, key: &NodeKey, - node: &Node, + node: &NodeDoc, ) -> NodeState { // base node state let mut node_state = node.into_state(key.to_owned()); @@ -713,7 +712,7 @@ impl Environment { &self, pool: &DashMap, id: AgentId, - node: &Node, + node: &NodeDoc, ) -> (Vec, Vec) { // a filter to exclude the current node from the list of peers let not_me = |agent: &AgentPeer| !matches!(agent, AgentPeer::Internal(candidate_id, _) if *candidate_id == id); diff --git a/crates/controlplane/src/env/set.rs b/crates/controlplane/src/env/set.rs index 8f8ffe3e..35837d97 100644 --- a/crates/controlplane/src/env/set.rs +++ b/crates/controlplane/src/env/set.rs @@ -12,7 +12,7 @@ use snops_common::{ state::{AgentId, NodeKey}, }; -use super::{DelegationError, EnvNodeState}; +use super::{DelegationError, EnvNode}; use crate::state::{Agent, AgentClient, Busy, GlobalState}; pub struct AgentMapping { @@ -113,15 +113,12 @@ pub fn get_agent_mappings( } /// Get a list of unique labels given a node config -pub fn labels_from_nodes(nodes: &IndexMap) -> Vec { +pub fn labels_from_nodes(nodes: &IndexMap) -> Vec { let mut labels = HashSet::new(); for node in nodes.values() { - match node { - EnvNodeState::Internal(n) => { - labels.extend(&n.labels); - } - EnvNodeState::External(_) => {} + if let EnvNode::Internal { node: n, .. } = node { + labels.extend(&n.labels); } } @@ -167,7 +164,7 @@ pub fn find_compute_agent( /// with an agent in parallel pub fn pair_with_nodes( agents: Vec, - nodes: &IndexMap, + nodes: &IndexMap, labels: &[Spur], ) -> Result)>, Vec> { // errors that occurred while pairing nodes with agents @@ -181,11 +178,11 @@ pub fn pair_with_nodes( // filter out external nodes // split into nodes that want specific agents and nodes that want specific labels .filter_map(|(key, env_node)| match env_node { - EnvNodeState::Internal(n) => match n.agent { + EnvNode::Internal { node: n, .. } => match n.agent { Some(agent) => Some((Some((key, agent)), None)), None => Some((None, Some((key, n.mask(key, labels))))), }, - EnvNodeState::External(_) => None, + EnvNode::External(_) => None, }) // unzip and filter out the Nones .fold((vec![], vec![]), |(mut vec_a, mut vec_b), (a, b)| { diff --git a/crates/controlplane/src/persist/env.rs b/crates/controlplane/src/persist/env.rs index 5c99a277..dd46fe93 100644 --- a/crates/controlplane/src/persist/env.rs +++ b/crates/controlplane/src/persist/env.rs @@ -1,7 +1,5 @@ use std::sync::Arc; -use bimap::BiMap; -use dashmap::DashMap; use snops_common::{ schema::cannon::{sink::TxSink, source::TxSource}, state::{CannonId, EnvId, NetworkId, NodeKey, StorageId, TransactionSendState}, @@ -9,12 +7,11 @@ use snops_common::{ use tokio::sync::Semaphore; use super::prelude::*; -use super::PersistNode; use crate::{ cannon::tracker::TransactionTracker, env::{ error::{EnvError, PrepareError}, - prepare_cannons, EnvNodeState, EnvPeer, Environment, + prepare_cannons, EnvNode, Environment, }, state::GlobalState, }; @@ -22,7 +19,7 @@ use crate::{ #[derive(Clone)] pub struct PersistEnvFormatHeader { version: u8, - nodes: DataHeaderOf, + nodes: DataHeaderOf, tx_source: DataHeaderOf, tx_sink: DataHeaderOf, network: DataHeaderOf, @@ -33,7 +30,7 @@ pub struct PersistEnv { pub storage_id: StorageId, pub network: NetworkId, /// List of nodes and their states or external node info - pub nodes: Vec<(NodeKey, PersistNode)>, + pub nodes: Vec<(NodeKey, EnvNode)>, /// Loaded cannon configs in this env pub cannons: Vec<(CannonId, TxSource, TxSink)>, } @@ -41,29 +38,9 @@ pub struct PersistEnv { impl From<&Environment> for PersistEnv { fn from(value: &Environment) -> Self { let nodes = value - .node_states + .nodes .iter() - .filter_map(|entry| { - let key = entry.key(); - let agent_index = value.node_peers.get_by_left(key).and_then(|v| { - if let EnvPeer::Internal(a) = v { - Some(a) - } else { - None - } - }); - match entry.value() { - EnvNodeState::Internal(n) => agent_index.map(|agent| { - ( - key.clone(), - PersistNode::Internal(*agent, Box::new(n.clone())), - ) - }), - EnvNodeState::External(n) => { - Some((key.clone(), PersistNode::External(n.clone()))) - } - } - }) + .map(|ent| (ent.key().clone(), ent.value().clone())) .collect(); PersistEnv { @@ -91,20 +68,7 @@ impl PersistEnv { .get(&(self.network, self.storage_id)) .ok_or(PrepareError::MissingStorage)?; - let mut node_map = BiMap::default(); - let initial_nodes = DashMap::default(); - for (key, v) in self.nodes { - match v { - PersistNode::Internal(agent, n) => { - node_map.insert(key.clone(), EnvPeer::Internal(agent)); - initial_nodes.insert(key, EnvNodeState::Internal(*n)); - } - PersistNode::External(n) => { - node_map.insert(key.clone(), EnvPeer::External(key.clone())); - initial_nodes.insert(key, EnvNodeState::External(n)); - } - } - } + let nodes = self.nodes.into_iter().collect(); let compute_aot_bin = storage.resolve_compute_binary(&state).await?; @@ -142,8 +106,7 @@ impl PersistEnv { id: self.id, network: self.network, storage: storage.clone(), - node_peers: node_map, - node_states: initial_nodes, + nodes, sinks, cannons, }) @@ -197,7 +160,7 @@ impl DataFormat for PersistEnv { type Header = PersistEnvFormatHeader; const LATEST_HEADER: Self::Header = PersistEnvFormatHeader { version: 1, - nodes: PersistNode::LATEST_HEADER, + nodes: EnvNode::LATEST_HEADER, tx_source: TxSource::LATEST_HEADER, tx_sink: TxSink::LATEST_HEADER, network: NetworkId::LATEST_HEADER, @@ -258,8 +221,9 @@ mod tests { state::{InternedId, NetworkId}, }; - use crate::persist::{ - PersistEnv, PersistEnvFormatHeader, PersistNode, PersistNodeFormatHeader, + use crate::{ + env::EnvNode, + persist::{EnvNodeStateFormatHeader, PersistEnv, PersistEnvFormatHeader}, }; macro_rules! case { @@ -289,8 +253,8 @@ mod tests { [ PersistEnvFormatHeader::LATEST_HEADER.to_byte_vec()?, PersistEnv::LATEST_HEADER.version.to_byte_vec()?, - PersistNodeFormatHeader::LATEST_HEADER.to_byte_vec()?, - PersistNode::LATEST_HEADER.to_byte_vec()?, + EnvNodeStateFormatHeader::LATEST_HEADER.to_byte_vec()?, + EnvNode::LATEST_HEADER.to_byte_vec()?, TxSourceFormatHeader::LATEST_HEADER.to_byte_vec()?, TxSource::LATEST_HEADER.to_byte_vec()?, TxSinkFormatHeader::LATEST_HEADER.to_byte_vec()?, @@ -315,7 +279,7 @@ mod tests { PersistEnv::LATEST_HEADER.to_byte_vec()?, InternedId::from_str("foo")?.to_byte_vec()?, InternedId::from_str("bar")?.to_byte_vec()?, - Vec::<(String, PersistNode)>::new().to_byte_vec()?, + Vec::<(String, EnvNode)>::new().to_byte_vec()?, Vec::<(InternedId, TxSource, TxSink)>::new().to_byte_vec()?, NetworkId::default().to_byte_vec()?, ] diff --git a/crates/controlplane/src/persist/node.rs b/crates/controlplane/src/persist/node.rs index b25ddfec..9301d16b 100644 --- a/crates/controlplane/src/persist/node.rs +++ b/crates/controlplane/src/persist/node.rs @@ -1,28 +1,21 @@ -use snops_common::{ - schema::{ - nodes::{ExternalNode, Node}, - persist::NodeFormatHeader, - }, - state::AgentId, +use snops_common::schema::{ + nodes::{ExternalNode, NodeDoc}, + persist::NodeFormatHeader, }; use super::prelude::*; +use crate::env::EnvNode; #[derive(Debug, Clone)] -pub struct PersistNodeFormatHeader { +pub struct EnvNodeStateFormatHeader { + pub(crate) version: u8, pub(crate) node: NodeFormatHeader, pub(crate) external_node: DataHeaderOf, } -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum PersistNode { - Internal(AgentId, Box), - External(ExternalNode), -} - -impl DataFormat for PersistNodeFormatHeader { +impl DataFormat for EnvNodeStateFormatHeader { type Header = u8; - const LATEST_HEADER: Self::Header = 1; + const LATEST_HEADER: Self::Header = 2; fn write_data(&self, writer: &mut W) -> Result { Ok(write_dataformat(writer, &self.node)? + write_dataformat(writer, &self.external_node)?) @@ -40,29 +33,31 @@ impl DataFormat for PersistNodeFormatHeader { let node = read_dataformat(reader)?; let external_node = read_dataformat(reader)?; - Ok(PersistNodeFormatHeader { + Ok(EnvNodeStateFormatHeader { + version: *header, node, external_node, }) } } -impl DataFormat for PersistNode { - type Header = PersistNodeFormatHeader; - const LATEST_HEADER: Self::Header = PersistNodeFormatHeader { - node: Node::LATEST_HEADER, +impl DataFormat for EnvNode { + type Header = EnvNodeStateFormatHeader; + const LATEST_HEADER: Self::Header = EnvNodeStateFormatHeader { + version: EnvNodeStateFormatHeader::LATEST_HEADER, + node: NodeDoc::LATEST_HEADER, external_node: ExternalNode::LATEST_HEADER, }; fn write_data(&self, writer: &mut W) -> Result { let mut written = 0; match self { - PersistNode::Internal(id, state) => { + EnvNode::Internal { agent, node } => { written += writer.write_data(&0u8)?; - written += writer.write_data(id)?; - written += writer.write_data(state)?; + written += writer.write_data(agent)?; + written += writer.write_data(node)?; } - PersistNode::External(n) => { + EnvNode::External(n) => { written += writer.write_data(&1u8)?; written += writer.write_data(n)?; } @@ -73,16 +68,21 @@ impl DataFormat for PersistNode { fn read_data(reader: &mut R, header: &Self::Header) -> Result { match reader.read_data(&())? { 0u8 => { - let id = reader.read_data(&())?; - let state = reader.read_data(&header.node)?; - Ok(PersistNode::Internal(id, Box::new(state))) + let agent = if header.version == 1 { + // Version 1 required an agent id, later versions have the agent id as an option + Some(reader.read_data(&())?) + } else { + reader.read_data(&())? + }; + let node = reader.read_data(&header.node)?; + Ok(EnvNode::Internal { agent, node }) } 1u8 => { let n = reader.read_data(&header.external_node)?; - Ok(PersistNode::External(n)) + Ok(EnvNode::External(n)) } n => Err(DataReadError::Custom(format!( - "invalid PersistNode discriminant: {n}" + "invalid EnvNodeState discriminant: {n}" ))), } } @@ -96,13 +96,13 @@ mod tests { format::DataFormat, node_targets::NodeTargets, schema::{ - nodes::{ExternalNode, Node}, + nodes::{ExternalNode, NodeDoc}, persist::NodeFormatHeader, }, state::{HeightRequest, InternedId}, }; - use crate::persist::{PersistNode, PersistNodeFormatHeader}; + use crate::{env::EnvNode, persist::EnvNodeStateFormatHeader}; macro_rules! case { ($name:ident, $ty:ty, $a:expr, $b:expr) => { @@ -127,11 +127,11 @@ mod tests { case!( node_header, - PersistNodeFormatHeader, - PersistNode::LATEST_HEADER, + EnvNodeStateFormatHeader, + EnvNode::LATEST_HEADER, [ NodeFormatHeader::LATEST_HEADER.to_byte_vec()?, - Node::LATEST_HEADER.to_byte_vec()?, + NodeDoc::LATEST_HEADER.to_byte_vec()?, ExternalNode::LATEST_HEADER.to_byte_vec()?, ] .concat() @@ -139,10 +139,10 @@ mod tests { case!( node_internal, - PersistNode, - PersistNode::Internal( - InternedId::from_str("id")?, - Box::new(Node { + EnvNode, + EnvNode::Internal { + agent: Some(InternedId::from_str("id")?), + node: NodeDoc { online: true, replicas: None, key: None, @@ -153,12 +153,12 @@ mod tests { peers: NodeTargets::None, env: Default::default(), binary: None, - }) - ), + } + }, [ 0u8.to_byte_vec()?, - InternedId::from_str("id")?.to_byte_vec()?, - Node { + Some(InternedId::from_str("id")?).to_byte_vec()?, + NodeDoc { online: true, replicas: None, key: None, @@ -177,8 +177,8 @@ mod tests { case!( node_external, - PersistNode, - PersistNode::External(ExternalNode { + EnvNode, + EnvNode::External(ExternalNode { bft: None, node: None, rest: None diff --git a/crates/controlplane/src/server/api.rs b/crates/controlplane/src/server/api.rs index d12b19f8..bea7cf69 100644 --- a/crates/controlplane/src/server/api.rs +++ b/crates/controlplane/src/server/api.rs @@ -21,11 +21,10 @@ use snops_common::{ use tarpc::context; use super::{actions, error::ServerError, event_ws, models::AgentStatusResponse}; -use crate::{cannon::router::redirect_cannon_routes, make_env_filter, state::AppState}; use crate::{ - env::{EnvPeer, Environment}, - state::AgentFlags, + cannon::router::redirect_cannon_routes, env::EnvNode, make_env_filter, state::AppState, }; +use crate::{env::Environment, state::AgentFlags}; #[macro_export] macro_rules! unwrap_or_not_found { @@ -552,20 +551,19 @@ async fn get_env_topology(Path(env_id): Path, State(state): State { - internal.insert(*id, node_state); - } - EnvPeer::External(ip) => { - external.insert( - nk.to_string(), - json!({"ip": ip.to_string(), "ports": node_state}), + for ent in env.nodes.iter() { + let nk = ent.key(); + match ent.value() { + EnvNode::Internal { agent: id, node } => { + internal.insert( + id.map(|id| id.to_string()) + .unwrap_or_else(|| format!("{nk} pending agent")), + node.clone(), ); } + EnvNode::External(ip) => { + external.insert(nk.to_string(), json!(ip)); + } } } @@ -581,16 +579,22 @@ async fn get_env_topology_resolved( let mut resolved = HashMap::new(); - for (_, peer) in env.node_peers.iter() { - if let EnvPeer::Internal(id) = peer { - let Some(agent) = state.pool.get(id) else { - continue; - }; - match agent.state().clone() { - AgentState::Inventory => continue, - AgentState::Node(_, state) => { - resolved.insert(*id, state); - } + for ent in env.nodes.iter() { + let EnvNode::Internal { + agent: Some(id), .. + } = ent.value() + else { + continue; + }; + + let Some(agent) = state.pool.get(id) else { + continue; + }; + + match agent.state() { + AgentState::Inventory => continue, + AgentState::Node(_, state) => { + resolved.insert(*id, state.clone()); } } } @@ -604,10 +608,10 @@ async fn get_env_agents(Path(env_id): Path, State(state): State Some((k, *id)), + .filter_map(|ent| match ent.value() { + EnvNode::Internal { agent, .. } => Some((ent.key().clone(), *agent)), _ => None, }) .collect::>(), diff --git a/crates/controlplane/src/state/external_peers.rs b/crates/controlplane/src/state/external_peers.rs index 903e7b33..6a357f11 100644 --- a/crates/controlplane/src/state/external_peers.rs +++ b/crates/controlplane/src/state/external_peers.rs @@ -12,7 +12,7 @@ use tokio::{sync::mpsc, time::timeout}; use super::{snarkos_request, AgentClient, GlobalState}; use crate::env::{ cache::{ABlockHash, ATransactionId, MAX_BLOCK_RANGE}, - EnvNodeState, EnvPeer, + EnvNode, }; type ExtPeerPair = (NodeKey, SocketAddr); @@ -233,11 +233,15 @@ pub fn online_agents_above_height( return Vec::new(); }; - env.node_peers + env.nodes .iter() - .filter_map(|(_, peer)| { + .filter_map(|node| { // ensure peer is internal - let EnvPeer::Internal(agent_id) = peer else { + let EnvNode::Internal { + agent: Some(agent_id), + .. + } = node.value() + else { return None; }; let agent = state.pool.get(agent_id)?; @@ -374,7 +378,7 @@ fn get_all_external_peers(state: &GlobalState) -> Vec<((EnvId, NetworkId), Vec Vec<((EnvId, NetworkId), Vec Some((n.key().clone(), *addr)), _ => None, diff --git a/crates/controlplane/src/state/global.rs b/crates/controlplane/src/state/global.rs index ea1ea3ca..c2219e8d 100644 --- a/crates/controlplane/src/state/global.rs +++ b/crates/controlplane/src/state/global.rs @@ -266,7 +266,7 @@ impl GlobalState { AgentPeer::Internal(id, _) => id, AgentPeer::External(addr) => { // lookup the external peer info from the cache - return Some(if let Some(info) = ext_infos.and_then(|c| c.get(key)) { + return Some(if let Some(info) = ext_infos.and_then(|c| c.get(&key)) { (info.score(&now), Some(info.clone()), None, None) } else { (0u32, None, None, Some(addr)) From 7478a654bb77f699a7652f432bcd4c88de39d95f Mon Sep 17 00:00:00 2001 From: Meshiest Date: Sat, 14 Dec 2024 20:00:04 -0500 Subject: [PATCH 18/24] feat(aot): optional keys for node runner --- crates/aot/src/key.rs | 27 +++++++++++++++++++++++++++ crates/aot/src/runner/mod.rs | 4 ++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/crates/aot/src/key.rs b/crates/aot/src/key.rs index b4e8bc7b..c30638f6 100644 --- a/crates/aot/src/key.rs +++ b/crates/aot/src/key.rs @@ -31,3 +31,30 @@ impl Key { } } } + +/// A command line argument for specifying the account private key of the node. +/// Done by a private key or a private key file. +#[derive(Debug, Args, Clone)] +#[group(required = false, multiple = false)] +pub struct OptionalKey { + /// Specify the account private key of the node + #[clap(env, long)] + pub private_key: Option>, + /// Specify the account private key of the node + #[clap(env, long)] + pub private_key_file: Option, +} + +impl OptionalKey { + pub fn try_get(self) -> Result> { + match (self.private_key, self.private_key_file) { + (Some(key), None) => Ok(key), + (None, Some(file)) => { + let raw = std::fs::read_to_string(file)?.trim().to_string(); + Ok(PrivateKey::from_str(&raw)?) + } + // Generate a private key if none is provided + _ => Ok(*snarkos_account::Account::::new(&mut rand::thread_rng())?.private_key()), + } + } +} diff --git a/crates/aot/src/runner/mod.rs b/crates/aot/src/runner/mod.rs index fd3dd59d..f58f9d77 100644 --- a/crates/aot/src/runner/mod.rs +++ b/crates/aot/src/runner/mod.rs @@ -23,7 +23,7 @@ use snarkvm::{ use snops_checkpoint::{CheckpointManager, RetentionPolicy}; use snops_common::state::{snarkos_status::SnarkOSStatus, NodeType}; -use crate::{cli::ReloadHandler, Account, Address, DbLedger, Key, Network}; +use crate::{cli::ReloadHandler, key::OptionalKey, Account, Address, DbLedger, Network}; mod metrics; mod rpc; @@ -45,7 +45,7 @@ pub struct Runner { pub node_type: NodeType, #[clap(flatten)] - pub key: Key, + pub key: OptionalKey, /// Specify the IP(v4 or v6) address to bind to. #[clap(long = "bind", default_value_t = IpAddr::V4(Ipv4Addr::UNSPECIFIED))] From 321589d3497a03195dea8645b973c1d6d77224e6 Mon Sep 17 00:00:00 2001 From: Meshiest Date: Sat, 14 Dec 2024 20:00:33 -0500 Subject: [PATCH 19/24] feat(controlplane): optional storage documents, optional node keys --- crates/common/src/schema/nodes.rs | 2 ++ crates/common/src/schema/storage/mod.rs | 4 +++- crates/controlplane/src/env/mod.rs | 8 ++------ 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/crates/common/src/schema/nodes.rs b/crates/common/src/schema/nodes.rs index 067e4a5e..3e93073b 100644 --- a/crates/common/src/schema/nodes.rs +++ b/crates/common/src/schema/nodes.rs @@ -180,8 +180,10 @@ pub struct NodeDoc { pub online: bool, /// When specified, creates a group of nodes, all with the same /// configuration. + #[serde(default)] pub replicas: Option, /// The private key to start the node with. + #[serde(default)] pub key: Option, /// Height of ledger to inherit. /// diff --git a/crates/common/src/schema/storage/mod.rs b/crates/common/src/schema/storage/mod.rs index a5ba128e..19477c3a 100644 --- a/crates/common/src/schema/storage/mod.rs +++ b/crates/common/src/schema/storage/mod.rs @@ -14,14 +14,16 @@ pub use binaries::*; pub const STORAGE_DIR: &str = "storage"; /// A storage document. Explains how storage for a test should be set up. -#[derive(Debug, Clone, Deserialize, Serialize)] +#[derive(Default, Debug, Clone, Deserialize, Serialize)] #[serde(rename_all = "kebab-case")] pub struct StorageDocument { pub id: StorageId, /// Regen version #[serde(default)] pub regen: u16, + #[serde(default)] pub name: String, + #[serde(default)] pub description: Option, /// Tell nodes not to re-download the storage data. #[serde(default)] diff --git a/crates/controlplane/src/env/mod.rs b/crates/controlplane/src/env/mod.rs index d3747fd2..e125dd77 100644 --- a/crates/controlplane/src/env/mod.rs +++ b/crates/controlplane/src/env/mod.rs @@ -311,12 +311,8 @@ impl Environment { // prepare the storage after all the other documents // as it depends on the network id - let storage = LoadedStorage::from_doc( - *storage_doc.ok_or(PrepareError::MissingStorage)?, - &state, - network, - ) - .await?; + let storage = + LoadedStorage::from_doc(*storage_doc.unwrap_or_default(), &state, network).await?; let storage_id = storage.id; From 042533aab42ec1da88fd9b4e34ad8270da66f443 Mon Sep 17 00:00:00 2001 From: Meshiest Date: Sat, 14 Dec 2024 20:15:18 -0500 Subject: [PATCH 20/24] feat(snops): support shorthand 'client' node key syntax instead of 'client/' --- crates/common/Cargo.toml | 1 + crates/common/src/node_targets.rs | 83 +++++++++++++++++++++++++---- crates/common/src/state/mod.rs | 2 +- crates/common/src/state/node_key.rs | 82 +++++++++++++++++++++++++++- 4 files changed, 157 insertions(+), 11 deletions(-) diff --git a/crates/common/Cargo.toml b/crates/common/Cargo.toml index 7e9e95d8..e890994f 100644 --- a/crates/common/Cargo.toml +++ b/crates/common/Cargo.toml @@ -46,3 +46,4 @@ wildmatch.workspace = true [dev-dependencies] indexmap = { workspace = true, features = ["std"] } tokio = { workspace = true, features = ["io-util", "process"] } +serde_yaml = { workspace = true } diff --git a/crates/common/src/node_targets.rs b/crates/common/src/node_targets.rs index 4191137d..46bb71ce 100644 --- a/crates/common/src/node_targets.rs +++ b/crates/common/src/node_targets.rs @@ -108,7 +108,7 @@ impl<'de> Deserialize<'de> for NodeTargets { lazy_static! { static ref NODE_TARGET_REGEX: Regex = - Regex::new(r"^(?P\*|any|client|validator|prover)\/(?P[A-Za-z0-9\-*]+)(?:@(?P[A-Za-z0-9\-*]+))?$") + Regex::new(r"^(?P\*|any|client|validator|prover)(?:\/(?P[A-Za-z0-9\-*]+))?(?:@(?P[A-Za-z0-9\-*]+))?$") .unwrap(); } @@ -192,10 +192,13 @@ impl FromStr for NodeTarget { }; // match the node ID - let id = match &captures["id"] { + let id = match captures + .name("id") + .map(|id| id.as_str()) + .unwrap_or_default() + { // full wildcard - "*" => NodeTargetId::All, - "any" => NodeTargetId::All, + "*" | "any" => NodeTargetId::All, // partial wildcard id if id.contains('*') => NodeTargetId::WildcardPattern(WildMatch::new(id)), @@ -205,17 +208,16 @@ impl FromStr for NodeTarget { }; // match the namespace - let ns = match captures.name("ns") { + let ns = match captures.name("ns").map(|id| id.as_str()) { // full wildcard - Some(id) if id.as_str() == "*" => NodeTargetNamespace::All, - Some(id) if id.as_str() == "any" => NodeTargetNamespace::All, + Some("*") | Some("any") => NodeTargetNamespace::All, // local; either explicitly stated, or empty - Some(id) if id.as_str() == "local" => NodeTargetNamespace::Local, + Some("local") => NodeTargetNamespace::Local, None => NodeTargetNamespace::Local, // literal namespace - Some(id) => NodeTargetNamespace::Literal(id.as_str().into()), + Some(id) => NodeTargetNamespace::Literal(id.to_string()), }; Ok(Self { ty, id, ns }) @@ -454,3 +456,66 @@ impl NodeTargets { } } } + +#[cfg(test)] +mod test { + use wildmatch::WildMatch; + + use crate::{ + node_targets::{ + NodeTarget, NodeTargetId, NodeTargetNamespace, NodeTargetType, NodeTargets, + }, + state::NodeType::*, + }; + + #[test] + fn test_node_key_serde() { + assert_eq!( + serde_yaml::from_str::("client").unwrap(), + NodeTargets::One(NodeTarget { + ty: NodeTargetType::One(Client), + id: NodeTargetId::Literal("".to_string()), + ns: NodeTargetNamespace::Local + }) + ); + assert_eq!( + serde_yaml::from_str::("validator/foo").unwrap(), + NodeTargets::One(NodeTarget { + ty: NodeTargetType::One(Validator), + id: NodeTargetId::Literal("foo".to_string()), + ns: NodeTargetNamespace::Local, + }) + ); + assert_eq!( + serde_yaml::from_str::("validator@foo").unwrap(), + NodeTargets::One(NodeTarget { + ty: NodeTargetType::One(Validator), + id: NodeTargetId::Literal("".to_string()), + ns: NodeTargetNamespace::Literal("foo".to_string()), + }) + ); + assert_eq!( + serde_yaml::from_str::("client/foo@bar").unwrap(), + NodeTargets::One(NodeTarget { + ty: NodeTargetType::One(Client), + id: NodeTargetId::Literal("foo".to_string()), + ns: NodeTargetNamespace::Literal("bar".to_string()), + }) + ); + assert_eq!( + serde_yaml::from_str::("client/foo-*@bar").unwrap(), + NodeTargets::One(NodeTarget { + ty: NodeTargetType::One(Client), + id: NodeTargetId::WildcardPattern(WildMatch::new("foo-*")), + ns: NodeTargetNamespace::Literal("bar".to_string()), + }) + ); + + assert!(serde_yaml::from_str::("client@").is_err()); + assert!(serde_yaml::from_str::("unknown@").is_err()); + assert!(serde_yaml::from_str::("unknown").is_err()); + assert!(serde_yaml::from_str::("client@@").is_err()); + assert!(serde_yaml::from_str::("validator/!").is_err()); + assert!(serde_yaml::from_str::("client/!").is_err()); + } +} diff --git a/crates/common/src/state/mod.rs b/crates/common/src/state/mod.rs index f228edfe..60ff9645 100644 --- a/crates/common/src/state/mod.rs +++ b/crates/common/src/state/mod.rs @@ -33,7 +33,7 @@ pub use transaction_status::*; lazy_static! { static ref NODE_KEY_REGEX: Regex = Regex::new( - r"^(?Pclient|validator|prover)\/(?P[A-Za-z0-9\-]*)(?:@(?P[A-Za-z0-9\-]+))?$" + r"^(?Pclient|validator|prover)(?:\/(?P[A-Za-z0-9\-]*))?(?:@(?P[A-Za-z0-9\-]+))?$" ) .unwrap(); static ref INTERNED_ID_REGEX: Regex = diff --git a/crates/common/src/state/node_key.rs b/crates/common/src/state/node_key.rs index 102539f6..f7d8f423 100644 --- a/crates/common/src/state/node_key.rs +++ b/crates/common/src/state/node_key.rs @@ -25,7 +25,10 @@ impl FromStr for NodeKey { let ty = NodeType::from_str(&captures["ty"]).unwrap(); // match the node ID - let id = String::from(&captures["id"]); + let id = captures + .name("id") + .map(|id| id.as_str().to_string()) + .unwrap_or_default(); // match the namespace let ns = match captures.name("ns") { @@ -106,3 +109,80 @@ impl DataFormat for NodeKey { Ok(Self { ty, id, ns }) } } + +#[cfg(test)] +mod test { + use crate::state::{NodeKey, NodeType::*}; + + #[test] + fn test_node_key_parse() { + use super::NodeKey; + + let key = NodeKey { + ty: Client, + id: "test".to_string(), + ns: None, + }; + + let s = key.to_string(); + assert_eq!(s, "client/test"); + + let key2 = s.parse::().unwrap(); + assert_eq!(key, key2); + + let key = NodeKey { + ty: Client, + id: "test".to_string(), + ns: Some("ns".to_string()), + }; + + let s = key.to_string(); + assert_eq!(s, "client/test@ns"); + + let key2 = s.parse::().unwrap(); + assert_eq!(key, key2); + } + + #[test] + fn test_node_key_serde() { + assert_eq!( + serde_yaml::from_str::("client").unwrap(), + NodeKey { + ty: Client, + id: "".to_string(), + ns: None + } + ); + assert_eq!( + serde_yaml::from_str::("validator/foo").unwrap(), + NodeKey { + ty: Validator, + id: "foo".to_string(), + ns: None + } + ); + assert_eq!( + serde_yaml::from_str::("validator@foo").unwrap(), + NodeKey { + ty: Validator, + id: "".to_string(), + ns: Some("foo".to_string()) + } + ); + assert_eq!( + serde_yaml::from_str::("client/foo@bar").unwrap(), + NodeKey { + ty: Client, + id: "foo".to_string(), + ns: Some("bar".to_string()) + } + ); + + assert!(serde_yaml::from_str::("client@").is_err()); + assert!(serde_yaml::from_str::("unknown@").is_err()); + assert!(serde_yaml::from_str::("unknown").is_err()); + assert!(serde_yaml::from_str::("client@@").is_err()); + assert!(serde_yaml::from_str::("validator/!").is_err()); + assert!(serde_yaml::from_str::("client/!").is_err()); + } +} From db89f5715421463a952f60bdc2a66650f580d29d Mon Sep 17 00:00:00 2001 From: Meshiest Date: Sun, 15 Dec 2024 02:09:31 -0500 Subject: [PATCH 21/24] fix(controlplane): fix storage document ignoring native genesis without generate params --- crates/controlplane/src/apply/loaded_storage.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/controlplane/src/apply/loaded_storage.rs b/crates/controlplane/src/apply/loaded_storage.rs index d345c60f..5ca30101 100644 --- a/crates/controlplane/src/apply/loaded_storage.rs +++ b/crates/controlplane/src/apply/loaded_storage.rs @@ -292,6 +292,13 @@ impl LoadedStorage { .map_err(|e| StorageError::FailedToGenGenesis(id, e))?; } } + } else { + // if there is no generation params, then we should use the network's native + // genesis + if doc.connect.is_none() { + native_genesis = true; + info!("{id}: using network native genesis") + } } let mut accounts = IndexMap::new(); From c940fdedcc43cbeff82e95ca924f489961641e46 Mon Sep 17 00:00:00 2001 From: Meshiest Date: Sun, 15 Dec 2024 02:11:54 -0500 Subject: [PATCH 22/24] chore: cleanup some specs --- crates/common/src/schema/nodes.rs | 4 ++++ specs/canary-clients.yaml | 22 ------------------- ...anary.yaml => example-external-peers.yaml} | 0 specs/mainnet-clients.yaml | 5 +++++ specs/persist-4-validators.yaml | 2 -- specs/testnet-4-validators.yaml | 4 +--- 6 files changed, 10 insertions(+), 27 deletions(-) delete mode 100644 specs/canary-clients.yaml rename specs/{test-4-clients-canary.yaml => example-external-peers.yaml} (100%) create mode 100644 specs/mainnet-clients.yaml diff --git a/crates/common/src/schema/nodes.rs b/crates/common/src/schema/nodes.rs index 3e93073b..f987ea07 100644 --- a/crates/common/src/schema/nodes.rs +++ b/crates/common/src/schema/nodes.rs @@ -20,13 +20,17 @@ use crate::{ /// A document describing the node infrastructure for a test. #[derive(Deserialize, Serialize, Debug, Clone)] pub struct NodesDocument { + #[serde(default)] pub name: String, + #[serde(default)] pub description: Option, + /// The network to use for all nodes. /// /// Determines if /mainnet/ or /testnet/ are used in routes. /// /// Also determines which parameters/genesis block to use + #[serde(default)] pub network: Option, #[serde(default)] diff --git a/specs/canary-clients.yaml b/specs/canary-clients.yaml deleted file mode 100644 index 496e0f0b..00000000 --- a/specs/canary-clients.yaml +++ /dev/null @@ -1,22 +0,0 @@ ---- -kind: snops/storage/v1 - -id: canary-clients -name: canary-clients -regen: 1 -generate: - accounts: - clients: 4 - ---- -kind: snops/nodes/v1 -name: 4-clients - -network: canary - -nodes: - client/test: - replicas: 4 - key: clients.$ - height: top - peers: [] diff --git a/specs/test-4-clients-canary.yaml b/specs/example-external-peers.yaml similarity index 100% rename from specs/test-4-clients-canary.yaml rename to specs/example-external-peers.yaml diff --git a/specs/mainnet-clients.yaml b/specs/mainnet-clients.yaml new file mode 100644 index 00000000..70f9ee23 --- /dev/null +++ b/specs/mainnet-clients.yaml @@ -0,0 +1,5 @@ +kind: snops/nodes/v1 +network: mainnet +nodes: + client: + replicas: 4 diff --git a/specs/persist-4-validators.yaml b/specs/persist-4-validators.yaml index 737baefb..bebb8242 100644 --- a/specs/persist-4-validators.yaml +++ b/specs/persist-4-validators.yaml @@ -15,8 +15,6 @@ regen: 1 --- kind: snops/nodes/v1 -name: my-local-mainnet - nodes: validator/test: replicas: 4 diff --git a/specs/testnet-4-validators.yaml b/specs/testnet-4-validators.yaml index 82eed6e6..100149a8 100644 --- a/specs/testnet-4-validators.yaml +++ b/specs/testnet-4-validators.yaml @@ -18,9 +18,7 @@ name: 4-validators network: testnet nodes: - validator/test: + validator: replicas: 4 key: committee.$ - height: 0 validators: validator/* - peers: [] From dcd95a31210c43b8ddf0b644ff08254fa36e5777 Mon Sep 17 00:00:00 2001 From: Meshiest Date: Sun, 15 Dec 2024 04:06:28 -0500 Subject: [PATCH 23/24] fix(controlplane): fix native genesis logic failing when a storage already exists --- crates/controlplane/src/apply/loaded_storage.rs | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/crates/controlplane/src/apply/loaded_storage.rs b/crates/controlplane/src/apply/loaded_storage.rs index 5ca30101..470ea08d 100644 --- a/crates/controlplane/src/apply/loaded_storage.rs +++ b/crates/controlplane/src/apply/loaded_storage.rs @@ -82,7 +82,12 @@ impl LoadedStorage { let base = state.storage_path(network, id); let version_file = base.join(VERSION_FILE); - let mut native_genesis = false; + // No need to generate if we have a connect url or a genesis block + let native_genesis = + !(doc.connect.is_some() || doc.generate.as_ref().is_some_and(|c| c.genesis.is_some())); + if native_genesis { + info!("{id}: using network native genesis"); + } // TODO: The dir can be made by a previous run and the aot stuff can fail // i.e an empty/incomplete directory can exist and we should check those @@ -154,8 +159,7 @@ impl LoadedStorage { match (doc.connect, generation.genesis.as_ref()) { (None, None) => { - native_genesis = true; - info!("{id}: using network native genesis") + // no genesis needed } (Some(ref url), _) => { // downloaded genesis block is not native @@ -292,13 +296,6 @@ impl LoadedStorage { .map_err(|e| StorageError::FailedToGenGenesis(id, e))?; } } - } else { - // if there is no generation params, then we should use the network's native - // genesis - if doc.connect.is_none() { - native_genesis = true; - info!("{id}: using network native genesis") - } } let mut accounts = IndexMap::new(); From 7e807ee0698ea140c324568f4ddbb9f628b8f3b4 Mon Sep 17 00:00:00 2001 From: Meshiest Date: Sun, 15 Dec 2024 04:11:24 -0500 Subject: [PATCH 24/24] refactor(controlplane): replace Arc with a real Semaphore --- crates/agent/src/main.rs | 2 +- crates/controlplane/src/env/set.rs | 35 +++++++++----------------- crates/controlplane/src/state/agent.rs | 33 +++++++++++------------- 3 files changed, 28 insertions(+), 42 deletions(-) diff --git a/crates/agent/src/main.rs b/crates/agent/src/main.rs index ffa33846..2a9695fb 100644 --- a/crates/agent/src/main.rs +++ b/crates/agent/src/main.rs @@ -171,7 +171,7 @@ async fn main() { client::ws_connection(req, Arc::clone(&state2)).await; // Remove the control client state2.client.write().await.take(); - info!("Attempting to reconnect to the control plane..."); + tracing::trace!("Attempting to reconnect to the control plane..."); tokio::time::sleep(Duration::from_secs(5)).await; } }); diff --git a/crates/controlplane/src/env/set.rs b/crates/controlplane/src/env/set.rs index 35837d97..5839441f 100644 --- a/crates/controlplane/src/env/set.rs +++ b/crates/controlplane/src/env/set.rs @@ -1,6 +1,6 @@ use std::{ collections::{HashMap, HashSet}, - sync::{mpsc, Arc, Weak}, + sync::{mpsc, Arc}, }; use fixedbitset::FixedBitSet; @@ -11,13 +11,14 @@ use snops_common::{ set::MASK_PREFIX_LEN, state::{AgentId, NodeKey}, }; +use tokio::sync::{OwnedSemaphorePermit, Semaphore}; use super::{DelegationError, EnvNode}; -use crate::state::{Agent, AgentClient, Busy, GlobalState}; +use crate::state::{Agent, AgentClient, GlobalState}; pub struct AgentMapping { id: AgentId, - claim: Weak, + claim: Arc, mask: FixedBitSet, } @@ -53,7 +54,7 @@ impl AgentMapping { }; // check if the agent is already claimed - if claim.strong_count() > 1 { + if claim.available_permits() == 0 { return None; } @@ -73,22 +74,12 @@ impl AgentMapping { } /// Attempt to atomically claim the agent - pub fn claim(&self) -> Option> { - // avoid needlessly upgrading the weak pointer - if self.claim.strong_count() > 1 { - return None; - } - - let arc = self.claim.upgrade()?; - // 2 because the agent owns arc, and this would be the second - // there is a slim chance that two nodes could claim the same agent. if we run - // into this we can add an AtomicBool to the mapping to determine if the - // agent is claimed by the node on this thread - (Arc::strong_count(&arc) == 2).then_some(arc) + pub fn claim(&self) -> Option { + self.claim.clone().try_acquire_owned().ok() } /// Attempt to atomically claim the agent if there is a mask subset - pub fn claim_if_subset(&self, mask: &FixedBitSet) -> Option> { + pub fn claim_if_subset(&self, mask: &FixedBitSet) -> Option { if mask.is_subset(&self.mask) { self.claim() } else { @@ -133,7 +124,7 @@ pub fn labels_from_nodes(nodes: &IndexMap) -> Vec { fn _find_compute_agent_by_mask<'a, I: Iterator>( mut agents: I, labels: &[Spur], -) -> Option<(&'a Agent, Arc)> { +) -> Option<(&'a Agent, OwnedSemaphorePermit)> { // replace with let mut mask = FixedBitSet::with_capacity(labels.len() + MASK_PREFIX_LEN); mask.insert_range(MASK_PREFIX_LEN..labels.len() + MASK_PREFIX_LEN); @@ -149,14 +140,12 @@ fn _find_compute_agent_by_mask<'a, I: Iterator>( pub fn find_compute_agent( state: &GlobalState, labels: &[Spur], -) -> Option<(AgentId, AgentClient, Arc)> { +) -> Option<(AgentId, AgentClient, OwnedSemaphorePermit)> { state.pool.iter().find_map(|a| { if !a.can_compute() || a.is_compute_claimed() || !labels.iter().all(|l| a.has_label(*l)) { return None; } - let arc = a.make_busy(); - a.client_owned() - .and_then(|c| (Arc::strong_count(&arc) == 2).then_some((a.id(), c, arc))) + Some((a.id, a.client_owned()?, a.make_busy()?)) }) } @@ -166,7 +155,7 @@ pub fn pair_with_nodes( agents: Vec, nodes: &IndexMap, labels: &[Spur], -) -> Result)>, Vec> { +) -> Result, Vec> { // errors that occurred while pairing nodes with agents let (errors_tx, errors_rx) = mpsc::channel(); // nodes that were successfully claimed. dropping this will automatically diff --git a/crates/controlplane/src/state/agent.rs b/crates/controlplane/src/state/agent.rs index 7e25f87b..f4502877 100644 --- a/crates/controlplane/src/state/agent.rs +++ b/crates/controlplane/src/state/agent.rs @@ -1,6 +1,6 @@ use std::{ net::{IpAddr, SocketAddr}, - sync::{Arc, Weak}, + sync::Arc, time::Instant, }; @@ -19,14 +19,11 @@ use snops_common::{ }, INTERN, }; +use tokio::sync::{OwnedSemaphorePermit, Semaphore}; use super::{AgentClient, AgentFlags, PendingAgentReconcile}; use crate::server::jwt::{Claims, JWT_SECRET}; -#[derive(Debug)] -/// Apparently `const* ()` is not send, so this is a workaround -pub struct Busy; - /// An active agent, known by the control plane. #[derive(Debug)] pub struct Agent { @@ -40,9 +37,9 @@ pub struct Agent { pub(crate) flags: AgentFlags, /// Count of how many executions this agent is currently working on - pub(crate) compute_claim: Arc, + pub(crate) compute_claim: Arc, /// Count of how many environments this agent is pending for - pub(crate) env_claim: Arc, + pub(crate) env_claim: Arc, /// The external address of the agent, along with its local addresses. pub(crate) ports: Option, @@ -54,8 +51,8 @@ impl Agent { Self { id, flags, - compute_claim: Arc::new(Busy), - env_claim: Arc::new(Busy), + compute_claim: Arc::new(Semaphore::new(1)), + env_claim: Arc::new(Semaphore::new(1)), claims: Claims { id, nonce: ChaChaRng::from_entropy().gen(), @@ -78,8 +75,8 @@ impl Agent { Self { id: claims.id, flags, - compute_claim: Arc::new(Busy), - env_claim: Arc::new(Busy), + compute_claim: Arc::new(Semaphore::new(1)), + env_claim: Arc::new(Semaphore::new(1)), claims, connection: AgentConnection::Offline { since: Instant::now(), @@ -153,24 +150,24 @@ impl Agent { } /// Mark an agent as busy. This is used to prevent multiple authorizations - pub fn make_busy(&self) -> Arc { - Arc::clone(&self.compute_claim) + pub fn make_busy(&self) -> Option { + self.compute_claim.clone().try_acquire_owned().ok() } /// Mark an agent as busy. This is used to prevent multiple authorizations - pub fn get_compute_claim(&self) -> Weak { - Arc::downgrade(&self.compute_claim) + pub fn get_compute_claim(&self) -> Arc { + Arc::clone(&self.compute_claim) } /// Check if an agent is owned by an environment pub fn is_env_claimed(&self) -> bool { - Arc::strong_count(&self.env_claim) > 1 + self.env_claim.available_permits() == 0 } /// Get a weak reference to the env claim, which can be used to later lock /// this agent for an environment. - pub fn get_env_claim(&self) -> Weak { - Arc::downgrade(&self.env_claim) + pub fn get_env_claim(&self) -> Arc { + Arc::clone(&self.env_claim) } pub fn env(&self) -> Option {