From 6fa1dea3ee42d7de648d8981a567fdbd0d077f3e Mon Sep 17 00:00:00 2001 From: nickwest-zkp Date: Thu, 14 Aug 2025 10:29:33 +0800 Subject: [PATCH 1/9] fix #1 #2 --- bsc_cluster.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bsc_cluster.sh b/bsc_cluster.sh index ff36cbfd..fcde83ac 100644 --- a/bsc_cluster.sh +++ b/bsc_cluster.sh @@ -19,7 +19,7 @@ sleepAfterStart=10 # stop geth client function exit_previous() { ValIdx=$1 - ps -ef | grep geth$ValIdx | grep config |awk '{print $2}' | xargs kill + ps -ef | grep geth$ValIdx | grep config |awk '{print $2}' | xargs -r kill sleep ${sleepBeforeStart} } @@ -60,7 +60,7 @@ function reset_genesis() { poetry install --no-root npm install rm -rf lib/forge-std - forge install --no-git --no-commit foundry-rs/forge-std@v1.7.3 + forge install --no-git foundry-rs/forge-std@v1.7.3 cd lib/forge-std/lib rm -rf ds-test git clone https://github.com/dapphub/ds-test @@ -225,7 +225,7 @@ function native_start() { --metrics --metrics.addr localhost --metrics.port ${MetricsPort} --metrics.expensive \ --pprof --pprof.addr localhost --pprof.port ${PProfPort} \ --gcmode ${gcmode} --syncmode full --monitor.maliciousvote \ - --rialtohash ${rialtoHash} --override.passedforktime ${PassedForkTime} --override.lorentz ${PassedForkTime} --override.maxwell ${LastHardforkTime} \ + --override.passedforktime ${PassedForkTime} --override.lorentz ${PassedForkTime} --override.maxwell ${LastHardforkTime} \ --override.immutabilitythreshold ${FullImmutabilityThreshold} --override.breatheblockinterval ${BreatheBlockInterval} \ --override.minforblobrequest ${MinBlocksForBlobRequests} --override.defaultextrareserve ${DefaultExtraReserveForBlobRequests} \ >> ${workspace}/.local/node${i}/bsc-node.log 2>&1 & @@ -240,7 +240,7 @@ function native_start() { --metrics --metrics.addr localhost --metrics.port $((MetricsPort+1)) --metrics.expensive \ --pprof --pprof.addr localhost --pprof.port $((PProfPort+1)) \ --gcmode ${gcmode} --syncmode full --monitor.maliciousvote \ - --rialtohash ${rialtoHash} --override.passedforktime ${PassedForkTime} --override.lorentz ${PassedForkTime} --override.maxwell ${LastHardforkTime} \ + --override.passedforktime ${PassedForkTime} --override.lorentz ${PassedForkTime} --override.maxwell ${LastHardforkTime} \ --override.immutabilitythreshold ${FullImmutabilityThreshold} --override.breatheblockinterval ${BreatheBlockInterval} \ --override.minforblobrequest ${MinBlocksForBlobRequests} --override.defaultextrareserve ${DefaultExtraReserveForBlobRequests} \ >> ${workspace}/.local/sentry${i}/bsc-node.log 2>&1 & @@ -257,7 +257,7 @@ function native_start() { --metrics --metrics.addr localhost --metrics.port $((6160)) --metrics.expensive \ --pprof --pprof.addr localhost --pprof.port $((7160)) \ --gcmode ${gcmode} --syncmode full --monitor.maliciousvote \ - --rialtohash ${rialtoHash} --override.passedforktime ${PassedForkTime} --override.lorentz ${PassedForkTime} --override.maxwell ${LastHardforkTime} \ + --override.passedforktime ${PassedForkTime} --override.lorentz ${PassedForkTime} --override.maxwell ${LastHardforkTime} \ --override.immutabilitythreshold ${FullImmutabilityThreshold} --override.breatheblockinterval ${BreatheBlockInterval} \ --override.minforblobrequest ${MinBlocksForBlobRequests} --override.defaultextrareserve ${DefaultExtraReserveForBlobRequests} \ >> ${workspace}/.local/fullnode0/bsc-node.log 2>&1 & From 9110aa5df4af356253d050d983ff27101e5751a2 Mon Sep 17 00:00:00 2001 From: nickwest-zkp Date: Fri, 5 Sep 2025 17:38:39 +0800 Subject: [PATCH 2/9] update genesis submodule --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index d9bb7c4c..bd00c814 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,5 +1,5 @@ [submodule "genesis"] path = genesis - url = https://github.com/bnb-chain/bsc-genesis-contract.git + url = https://github.com/simplechain-org/bsc-genesis-contract.git branch = master ignore = dirty \ No newline at end of file From c9a06c91eaee533df5bf961ee8026f7342f9cae3 Mon Sep 17 00:00:00 2001 From: nickwest-zkp Date: Fri, 5 Sep 2025 17:53:02 +0800 Subject: [PATCH 3/9] update latest genesis --- genesis | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genesis b/genesis index 234e3685..bf3ac733 160000 --- a/genesis +++ b/genesis @@ -1 +1 @@ -Subproject commit 234e3685ec309624f0fbef41043ae117caebc853 +Subproject commit bf3ac733f8aaf93ed88ca0ad2dcddd051166e4e1 From f6e9fd81ae0d713715ba046950ba815590eb573d Mon Sep 17 00:00:00 2001 From: nickwest-zkp Date: Fri, 5 Sep 2025 19:00:54 +0800 Subject: [PATCH 4/9] remove error submodule --- .gitmodules | 5 ----- genesis | 1 - 2 files changed, 6 deletions(-) delete mode 160000 genesis diff --git a/.gitmodules b/.gitmodules index bd00c814..e69de29b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,5 +0,0 @@ -[submodule "genesis"] - path = genesis - url = https://github.com/simplechain-org/bsc-genesis-contract.git - branch = master - ignore = dirty \ No newline at end of file diff --git a/genesis b/genesis deleted file mode 160000 index bf3ac733..00000000 --- a/genesis +++ /dev/null @@ -1 +0,0 @@ -Subproject commit bf3ac733f8aaf93ed88ca0ad2dcddd051166e4e1 From 06c755fc65c14330603dc722cee7368f5738b74c Mon Sep 17 00:00:00 2001 From: nickwest-zkp Date: Fri, 5 Sep 2025 19:08:03 +0800 Subject: [PATCH 5/9] add correct submodule --- .gitmodules | 3 +++ genesis | 1 + 2 files changed, 4 insertions(+) create mode 160000 genesis diff --git a/.gitmodules b/.gitmodules index e69de29b..f4ac017c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "genesis"] + path = genesis + url = https://github.com/simplechain-org/bsc-genesis-contract.git diff --git a/genesis b/genesis new file mode 160000 index 00000000..bf3ac733 --- /dev/null +++ b/genesis @@ -0,0 +1 @@ +Subproject commit bf3ac733f8aaf93ed88ca0ad2dcddd051166e4e1 From a2c8ebd2a979c4d1793643b628710f301c678401 Mon Sep 17 00:00:00 2001 From: nickwest-zkp Date: Fri, 5 Sep 2025 19:11:07 +0800 Subject: [PATCH 6/9] add correct submodule --- .gitmodules | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitmodules b/.gitmodules index f4ac017c..5f1de8c3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,4 @@ [submodule "genesis"] path = genesis url = https://github.com/simplechain-org/bsc-genesis-contract.git + branch = develop/v1.0.1 From 6e0b3964503305986b8525b4b87c0db82e31bfe3 Mon Sep 17 00:00:00 2001 From: nickwest-zkp Date: Fri, 5 Sep 2025 19:23:40 +0800 Subject: [PATCH 7/9] update submodule --- genesis | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genesis b/genesis index bf3ac733..310ddecb 160000 --- a/genesis +++ b/genesis @@ -1 +1 @@ -Subproject commit bf3ac733f8aaf93ed88ca0ad2dcddd051166e4e1 +Subproject commit 310ddecbff56a8b189604700fa9df9c01929244a From 053988d73ddcfa1056fdfaba2d3f2465db39e73d Mon Sep 17 00:00:00 2001 From: nickwest-zkp Date: Mon, 8 Sep 2025 09:01:20 +0800 Subject: [PATCH 8/9] support docker&&multi hosts deploy --- .gitignore | 1 + Dockerfile | 43 +++ bsc_cluster.sh | 41 +-- config.toml | 3 +- deploy_cluster.py | 648 +++++++++++++++++++++++++++++++++++++++++ deployment-config.yaml | 121 ++++++++ docker-compose.yml | 62 ++++ file_distributor.py | 541 ++++++++++++++++++++++++++++++++++ monitor_cluster.py | 343 ++++++++++++++++++++++ 9 files changed, 1784 insertions(+), 19 deletions(-) create mode 100644 Dockerfile create mode 100644 deploy_cluster.py create mode 100644 deployment-config.yaml create mode 100644 docker-compose.yml create mode 100644 file_distributor.py create mode 100644 monitor_cluster.py diff --git a/.gitignore b/.gitignore index ab57b06f..6ecd745c 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ .idea/ lib/ +node_modules/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..8d1277fa --- /dev/null +++ b/Dockerfile @@ -0,0 +1,43 @@ +# BSC Node Docker Image +FROM ubuntu:20.04 + +# Install dependencies +RUN apt-get update && apt-get install -y \ + wget \ + jq \ + curl \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Create user +RUN useradd -m -s /bin/bash sipc2 + +# Set working directory +WORKDIR /home/sipc2 + +# Copy sipc2 binary and configuration +COPY bin/geth /usr/local/bin/geth +RUN chmod +x /usr/local/bin/geth + +# Copy scripts +COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh +RUN chmod +x /usr/local/bin/docker-entrypoint.sh + +# Create data directory +RUN mkdir -p /home/sipc2/data + +# Set ownership +RUN chown -R sipc2:sipc2 /home/sipc2 + +# Switch to sipc2 user +USER sipc2 + +# Expose ports +EXPOSE 8545 8546 30303 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD curl -f http://localhost:8545 || exit 1 + +# Set entrypoint +ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"] diff --git a/bsc_cluster.sh b/bsc_cluster.sh index 5f747748..32d3ab22 100644 --- a/bsc_cluster.sh +++ b/bsc_cluster.sh @@ -47,23 +47,24 @@ function reset_genesis() { if [ ! -f "${workspace}/genesis/genesis-template.json" ]; then cd ${workspace} && git submodule update --init --recursive genesis cd ${workspace}/genesis && git reset --hard ${GENESIS_COMMIT} - fi - cd ${workspace}/genesis - cp genesis-template.json genesis-template.json.bk - cp scripts/init_holders.template scripts/init_holders.template.bk - git stash - cd ${workspace} && git submodule update --remote --recursive genesis && cd ${workspace}/genesis - git reset --hard ${GENESIS_COMMIT} - mv genesis-template.json.bk genesis-template.json - mv scripts/init_holders.template.bk scripts/init_holders.template - poetry install --no-root - npm install - rm -rf lib/forge-std - forge install --no-git foundry-rs/forge-std@v1.7.3 - cd lib/forge-std/lib - rm -rf ds-test - git clone https://github.com/dapphub/ds-test + cd ${workspace}/genesis + cp genesis-template.json genesis-template.json.bk + cp scripts/init_holders.template scripts/init_holders.template.bk + git stash + cd ${workspace} && git submodule update --remote --recursive genesis && cd ${workspace}/genesis + git reset --hard ${GENESIS_COMMIT} + mv genesis-template.json.bk genesis-template.json + mv scripts/init_holders.template.bk scripts/init_holders.template + + poetry install --no-root + npm install + rm -rf lib/forge-std + forge install --no-git foundry-rs/forge-std@v1.7.3 + cd lib/forge-std/lib + rm -rf ds-test + git clone https://github.com/dapphub/ds-test + fi } function prepare_config() { @@ -257,7 +258,6 @@ function native_start() { --metrics --metrics.addr localhost --metrics.port $((6160)) --metrics.expensive \ --pprof --pprof.addr localhost --pprof.port $((7160)) \ --gcmode ${gcmode} --syncmode full --monitor.maliciousvote \ - --override.passedforktime ${PassedForkTime} --override.lorentz ${PassedForkTime} --override.maxwell ${LastHardforkTime} \ --override.immutabilitythreshold ${FullImmutabilityThreshold} --override.breatheblockinterval ${BreatheBlockInterval} \ --override.minforblobrequest ${MinBlocksForBlobRequests} --override.defaultextrareserve ${DefaultExtraReserveForBlobRequests} \ >> ${workspace}/.local/fullnode0/bsc-node.log 2>&1 & @@ -297,7 +297,12 @@ restart) exit_previous $ValidatorIdx native_start $ValidatorIdx ;; +regen-genesis) + create_validator + reset_genesis + prepare_config + ;; *) - echo "Usage: bsc_cluster.sh | reset | stop [vidx]| start [vidx]| restart [vidx]" + echo "Usage: bsc_cluster.sh | reset | stop [vidx]| start [vidx]| restart [vidx] | regen-genesis" ;; esac diff --git a/config.toml b/config.toml index a4d79ac9..0da2e466 100644 --- a/config.toml +++ b/config.toml @@ -1,5 +1,5 @@ [Eth] -NetworkId = 714 +NetworkId = 1914 SyncMode = "full" NoPruning = false NoPrefetch = false @@ -40,6 +40,7 @@ GlobalQueue = 1024 Lifetime = 10800000000000 [Node] +DataDir = "/data" IPCPath = "geth.ipc" HTTPHost = "0.0.0.0" InsecureUnlockAllowed = true diff --git a/deploy_cluster.py b/deploy_cluster.py new file mode 100644 index 00000000..ebdcdde8 --- /dev/null +++ b/deploy_cluster.py @@ -0,0 +1,648 @@ +#!/usr/bin/env python3 +""" +BSC Cluster Deployment Script +Deploys BSC nodes to multiple servers using Docker +""" + +import os +import yaml +import paramiko +import argparse +import subprocess +import time +from pathlib import Path +from typing import Dict, List, Any +import logging +from concurrent.futures import ThreadPoolExecutor, as_completed +from file_distributor import FileDistributor + +# Setup logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + + +class BSCClusterDeployer: + def __init__(self, config_path: str): + self.config = self.load_config(config_path) + self.distributor = FileDistributor(config_path) + + def load_config(self, config_path: str) -> Dict[str, Any]: + """Load deployment configuration from YAML file""" + with open(config_path, 'r', encoding='utf-8') as f: + return yaml.safe_load(f) + + def create_ssh_client(self, server_config: Dict[str, Any]) -> paramiko.SSHClient: + """Create SSH client for server connection""" + return self.distributor.create_ssh_client(server_config) + + def build_docker_image(self) -> bool: + """Build Docker image locally""" + logger.info("Building Docker image...") + + try: + # Check if Dockerfile exists + if not os.path.exists("Dockerfile"): + logger.error("Dockerfile not found in current directory") + return False + + # Build Docker image + image_name = self.config['docker']['image_name'] + image_tag = self.config['docker']['image_tag'] + + cmd = f"docker build -t {image_name}:{image_tag} ." + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + + if result.returncode != 0: + logger.error(f"Docker build failed: {result.stderr}") + return False + + logger.info(f"Docker image {image_name}:{image_tag} built successfully") + return True + + except Exception as e: + logger.error(f"Error building Docker image: {e}") + return False + + def push_docker_image(self) -> bool: + """Push Docker image to registry""" + registry = self.config['docker'].get('registry', '') + if not registry: + logger.info("No registry configured, skipping push") + return True + + logger.info(f"Pushing Docker image to registry: {registry}") + + try: + image_name = self.config['docker']['image_name'] + image_tag = self.config['docker']['image_tag'] + + # Tag image for registry + registry_image = f"{registry}/{image_name}:{image_tag}" + + # Tag the image + tag_cmd = f"docker tag {image_name}:{image_tag} {registry_image}" + result = subprocess.run(tag_cmd, shell=True, capture_output=True, text=True) + if result.returncode != 0: + logger.error(f"Docker tag failed: {result.stderr}") + return False + + # Push the image + push_cmd = f"docker push {registry_image}" + result = subprocess.run(push_cmd, shell=True, capture_output=True, text=True) + if result.returncode != 0: + logger.error(f"Docker push failed: {result.stderr}") + return False + + logger.info(f"Docker image pushed to {registry_image}") + return True + + except Exception as e: + logger.error(f"Error pushing Docker image: {e}") + return False + + def deploy_to_server(self, server_config: Dict[str, Any]) -> bool: + """Deploy SimpleChain2 node to a specific server""" + server_name = server_config['name'] + role = server_config['role'] # Get role from server config + ports = server_config['ports'] # Get ports from server config + node_index = server_config['node_index'] # Get node index + + logger.info(f"Starting deployment to {server_name}") + current_dir = Path(__file__).resolve().parent + + # Get validator address for validator nodes + validator_address = "0x0000000000000000000000000000000000000000" + if role == 'validator': + validator_address = self.get_validator_address(node_index) + logger.info(f"Using validator address: {validator_address}") + + try: + # Create SSH client + ssh_client = self.create_ssh_client(server_config) + + # Create remote directories (use user home directory instead of /opt) + remote_base = f"{current_dir}/sipc2/{server_name}" + self.distributor.ensure_remote_directory(ssh_client, remote_base) + self.distributor.ensure_remote_directory(ssh_client, f"{remote_base}/data") + + # Generate Docker run command with correct validator address + docker_cmd = self.generate_docker_run_command(server_config, validator_address) + print(docker_cmd) + data_dir = f"{current_dir}/sipc2/{server_name}/data" + # Create deployment script + if role == 'validator': + deployment_script = f"""#!/bin/bash +set -e + +echo "Starting BSC validator node deployment on {server_name}" + +# Stop existing container if running +docker stop {server_name} 2>/dev/null || true +docker rm {server_name} 2>/dev/null || true + +# Run Docker container +{docker_cmd} + +echo "BSC validator node deployment completed on {server_name}" + +# Wait for node to be ready for staking +echo "Waiting for node to be ready for staking..." +sleep 45 + +# Register validator and stake tokens +echo "Registering validator and staking tokens..." +# Note: This requires the create-validator tool and proper configuration +# The actual staking command should be added here based on your setup +# Example command structure: +# create-validator --consensus-key-dir ~/sipc2/{server_name}/keys/validator \\ +# --vote-key-dir ~/sipc2/{server_name}/keys/bls \\ +# --password-path ~/sipc2/{server_name}/keys/password.txt \\ +# --amount 20001 \\ +# --validator-desc {server_name} \\ +# -rpc-url http://localhost:{ports['http']} + +echo "Validator registration and staking completed" +""" + else: + # Check if we should clean data directory + clean_data = self.config['options'].get('clean_before_deploy', True) + + deployment_script = f"""#!/bin/bash +set -e + +echo "Starting BSC {role} node deployment on {server_name}" + +# Stop existing container if running +docker stop {server_name} 2>/dev/null || true +docker rm {server_name} 2>/dev/null || true + +# Clean up mounted data directory to prevent dirty data issues +if [ "{clean_data}" = "True" ]; then + echo "Cleaning up data directory to prevent dirty data..." + if [ -d "{data_dir}" ]; then + rm -rf {data_dir}/* + find {data_dir} -name ".*" -type f -delete 2>/dev/null || true + fi +else + echo "Skipping data directory cleanup (clean_before_deploy=False)" +fi + +# Run Docker container +{docker_cmd} + +echo "BSC {role} node deployment completed on {server_name}" +""" + + # Upload and execute deployment script + remote_script_path = f"{remote_base}/deploy.sh" + with ssh_client.open_sftp() as sftp: + with sftp.file(remote_script_path, 'w') as f: + f.write(deployment_script) + + # Make script executable and run it + stdin, stdout, stderr = ssh_client.exec_command(f"chmod +x {remote_script_path} && {remote_script_path}") + exit_code = stdout.channel.recv_exit_status() + + if exit_code != 0: + error = stderr.read().decode() + logger.error(f"Deployment failed on {server_name}: {error}") + ssh_client.close() + return False + + # If this is a validator node, register and stake tokens + if role == 'validator': + logger.info(f"Validator node deployed, proceeding with staking registration for {server_name}") + if not self.register_validator_stake(server_config, ssh_client, validator_address): + logger.warning(f"Staking registration failed for {server_name}, but deployment was successful") + + logger.info(f"Deployment completed successfully on {server_name}") + ssh_client.close() + return True + + except Exception as e: + logger.error(f"Deployment failed on {server_name}: {e}") + return False + + def generate_docker_run_command(self, server_config: Dict[str, Any], validator_address: str = None) -> str: + """Generate Docker run command for server""" + image_name = self.config['docker']['image_name'] + image_tag = self.config['docker']['image_tag'] + registry = self.config['docker'].get('registry', '') + + if registry: + full_image_name = f"{registry}/{image_name}:{image_tag}" + else: + full_image_name = f"{image_name}:{image_tag}" + + container_name = server_config['name'] + node_index = server_config['node_index'] + role = server_config['role'] + + # Set environment variables + env_vars = [ + f"-e NODE_TYPE={role}", + ] + + # Generate port mappings + ports = server_config['ports'] + port_mappings = [ + f"-p {ports['http']}:8545", # HTTP port + f"-p {ports['ws']}:8546", # WebSocket port + f"-p {ports['p2p']}:30303", # P2P port + f"-p {ports['metrics']}:6060", # Metrics port + f"-p 7060:6060" # PProf port (mapped to metrics port) + ] + + # Generate volume mappings (match docker-entrypoint.sh paths) + current_dir = Path(__file__).resolve().parent + remote_base = f"{current_dir}/sipc2/{container_name}" + + # Basic volume mappings + volume_mappings = [ + f"-v {remote_base}/keys:/home/sipc2/keys", + f"-v {remote_base}/config/genesis.json:/home/sipc2/config/genesis.json", + f"-v {remote_base}/config/config.toml:/home/sipc2/config/config.toml", + f"-v {remote_base}/data:/data", + # f"-v {remote_base}/keys/password.txt:/home/sipc2/password.txt" + ] + + # Add keystore and bls mappings for validators + if role == 'validator': + # Add nodekey mapping + nodekey_path = f"{remote_base}/keys/validator-nodekey" + if os.path.exists(nodekey_path): + volume_mappings.append(f"-v {nodekey_path}:/home/sipc2/keys/nodekey") + + elif role == 'sentry': + # Add nodekey mapping for sentry + nodekey_path = f"{remote_base}/keys/sentry-nodekey" + if os.path.exists(nodekey_path): + volume_mappings.append(f"-v {nodekey_path}:/home/sipc2/keys/nodekey") + + elif role == 'fullnode': + # Add nodekey mapping for fullnode + nodekey_path = f"{remote_base}/keys/fullnode-nodekey" + if os.path.exists(nodekey_path): + volume_mappings.append(f"-v {nodekey_path}:/home/sipc2/keys/nodekey") + + # Build Docker run command with native_start parameters + if role == 'validator': + # Use provided validator address or default + if validator_address is None: + validator_address = "0x0000000000000000000000000000000000000000" + + # Validator node startup command based on native_start + docker_cmd = [ + "docker run -d", + f"--name {container_name}", + "--restart unless-stopped", + " ".join(port_mappings), + " ".join(volume_mappings), + full_image_name, + f"--mine --vote --password /home/sipc2/keys/password.txt --unlock {validator_address}", + f"--miner.etherbase {validator_address} --blspassword /home/sipc2/keys/password.txt", + "--nodekey /home/sipc2/keys/nodekey", + "--blswallet /home/sipc2/keys/bls/wallet", + "--keystore /home/sipc2/keys/validator/keystore", + "--rpc.allow-unprotected-txs --allow-insecure-unlock", + "--ws.addr 0.0.0.0 --ws.port 8546 --http.addr 0.0.0.0 --http.port 8545 --http.corsdomain '*'", + "--metrics --metrics.addr localhost --metrics.port 6060 --metrics.expensive", + "--pprof --pprof.addr localhost --pprof.port 6060", + "--gcmode archive --syncmode full --monitor.maliciousvote", + "--override.passedforktime 1725500000 --override.lorentz 1725500000 --override.maxwell 1725500000", + "--override.immutabilitythreshold 100 --override.breatheblockinterval 300", + "--override.minforblobrequest 20 --override.defaultextrareserve 10" + ] + else: + # Sentry/Full node startup command + docker_cmd = [ + "docker run -d", + f"--name {container_name}", + "--restart unless-stopped", + " ".join(port_mappings), + " ".join(volume_mappings), + full_image_name, + "--rpc.allow-unprotected-txs --allow-insecure-unlock", + "--ws.addr 0.0.0.0 --ws.port 8546 --http.addr 0.0.0.0 --http.port 8545 --http.corsdomain '*'", + "--metrics --metrics.addr localhost --metrics.port 6060 --metrics.expensive", + "--pprof --pprof.addr localhost --pprof.port 6060", + "--gcmode archive --syncmode full", + "--override.passedforktime 1725500000 --override.lorentz 1725500000 --override.maxwell 1725500000", + "--override.immutabilitythreshold 100 --override.breatheblockinterval 300", + "--override.minforblobrequest 20 --override.defaultextrareserve 10" + ] + + return " ".join(docker_cmd) + + def get_validator_address(self, node_index: int) -> str: + """Get validator address from keystore file""" + import os + import json + + try: + # Path to validator keystore directory + current_dir = Path(__file__).resolve().parent + validator_dir = os.path.join(current_dir, f"keys/validator{node_index}/keystore") + + if not os.path.exists(validator_dir): + logger.error(f"Validator keystore directory not found: {validator_dir}") + return "0x0000000000000000000000000000000000000000" + + # Find keystore file (should be the only file in the directory) + keystore_files = [f for f in os.listdir(validator_dir)] + if not keystore_files: + logger.error(f"No keystore file found in {validator_dir}") + return "0x0000000000000000000000000000000000000000" + + keystore_file = os.path.join(validator_dir, keystore_files[0]) + + # Read and parse keystore file + with open(keystore_file, 'r') as f: + keystore_data = json.load(f) + + address = keystore_data.get('address', '') + if address: + # Ensure address has 0x prefix + if not address.startswith('0x'): + address = f"0x{address}" + logger.info(f"Found validator address for node {node_index}: {address}") + return address + else: + logger.error(f"No address found in keystore file {keystore_file}") + return "0x0000000000000000000000000000000000000000" + + except Exception as e: + logger.error(f"Error reading validator address for node {node_index}: {e}") + return "0x0000000000000000000000000000000000000000" + + def regenerate_genesis_and_configs(self) -> bool: + """Regenerate genesis.json and config files if configured""" + logger.info("Checking if genesis and config regeneration is needed...") + + regenerate_genesis = self.config['options'].get('regenerate_genesis', False) + regenerate_configs = self.config['options'].get('regenerate_configs', False) + + if not regenerate_genesis and not regenerate_configs: + logger.info("Genesis and config regeneration are both disabled, skipping...") + return True + + try: + if regenerate_genesis: + logger.info("Regenerating genesis.json and base config...") + + # Call bsc_cluster.sh to regenerate genesis + import subprocess + script_path = "./bsc_cluster.sh" + if not os.path.exists(script_path): + print(f"ERROR: Script not found at {script_path}") + else: + result = subprocess.run(["bash", script_path, "regen-genesis"], + capture_output=True, + text=True, + cwd=".") + + print("Return code:", result.returncode) + print("STDOUT:", result.stdout) + if result.stderr: + print("STDERR:", result.stderr) + #result = subprocess.run(["bash", "bsc_cluster.sh regen-genesis"], capture_output=True, text=True, cwd=".") + if result.returncode != 0: + logger.error(f"Failed to regenerate genesis: {result.stderr}") + return False + logger.info("Genesis and config regeneration completed successfully") + + if regenerate_configs: + logger.info("Server-specific config regeneration will be handled during file distribution") + + return True + + except Exception as e: + logger.error(f"Error during regeneration: {e}") + return False + + def register_validator_stake(self, server_config: Dict[str, Any], ssh_client: paramiko.SSHClient, validator_address: str = None) -> bool: + """Register validator and stake tokens after deployment""" + server_name = server_config['name'] + node_index = server_config['node_index'] + ports = server_config['ports'] + + # Get validator address if not provided + if validator_address is None: + validator_address = self.get_validator_address(node_index) + + logger.info(f"Registering validator {validator_address} and staking tokens for {server_name}") + current_dir = Path(__file__).resolve().parent + try: + # Wait for node to be ready + time.sleep(45) + + # Create staking command using native create-validator binary + staking_command = f""" +# Execute validator registration using native binary +echo "Executing validator registration using native create-validator..." +echo "Validator address: {validator_address}" + +{current_dir}/sipc2/{server_name}/create-validator \\ + --consensus-key-dir {current_dir}/sipc2/{server_name}/keys/validator \\ + --vote-key-dir {current_dir}/sipc2/{server_name}/keys \\ + --password-path {current_dir}/sipc2/{server_name}/keys/password.txt \\ + --amount 20001 \\ + --validator-desc Val{node_index} \\ + --rpc-url http://localhost:{ports['http']} + +if [ $? -eq 0 ]; then + echo "Validator registration completed successfully for {validator_address}" +else + echo "Validator registration failed for {validator_address}, but node deployment was successful" + echo "You may need to manually register the validator later" + exit 1 +fi +""" + + # Upload and execute staking script + remote_script_path = f"{current_dir}/sipc2/{server_name}/register_stake.sh" + with ssh_client.open_sftp() as sftp: + with sftp.file(remote_script_path, 'w') as f: + f.write(staking_command) + + # Make script executable and run it + stdin, stdout, stderr = ssh_client.exec_command(f"chmod +x {remote_script_path} && {remote_script_path}") + print(stdout.read().decode()) + print(stderr.read().decode()) + exit_code = stdout.channel.recv_exit_status() + + if exit_code != 0: + error = stderr.read().decode() + logger.warning(f"Staking registration failed on {server_name}: {error}") + logger.warning("This might be expected if create-validator tool is not available") + return False + + logger.info(f"Staking registration completed successfully on {server_name}") + return True + + except Exception as e: + logger.error(f"Error during staking registration on {server_name}: {e}") + return False + + def monitor_deployment(self) -> Dict[str, Any]: + """Monitor deployment status""" + logger.info("Monitoring deployment status...") + + status = {} + servers = self.config['servers'] + + for server in servers: + server_name = server['name'] + status[server_name] = self.check_server_status(server) + + return status + + def check_server_status(self, server_config: Dict[str, Any]) -> Dict[str, str]: + """Check status of BSC node on server""" + try: + ssh_client = self.create_ssh_client(server_config) + container_name = server_config['name'] + + # Check if container is running + stdin, stdout, stderr = ssh_client.exec_command(f"docker ps --filter name={container_name} --format '{{{{.Status}}}}'") + status_output = stdout.read().decode().strip() + + if status_output: + # Check container health + stdin, stdout, stderr = ssh_client.exec_command(f"docker inspect {container_name} --format '{{{{.State.Health.Status}}}}'") + health_output = stdout.read().decode().strip() + + return { + "status": "running", + "health": health_output if health_output else "unknown", + "details": status_output + } + else: + return { + "status": "stopped", + "health": "n/a", + "details": "Container not running" + } + + except Exception as e: + return { + "status": "error", + "health": "n/a", + "details": str(e) + } + + def deploy_cluster(self) -> bool: + """Deploy BSC cluster to all servers""" + logger.info("Starting BSC cluster deployment") + + # Build Docker image + if not self.config['options'].get('skip_build', False): + if not self.build_docker_image(): + return False + + if not self.push_docker_image(): + return False + + # Regenerate genesis and configs if needed + if not self.regenerate_genesis_and_configs(): + logger.error("Failed to regenerate genesis and configs") + return False + + # Distribute files + if not self.config['options'].get('skip_distribution', False): + if not self.distributor.distribute_files( + parallel=self.config['options'].get('parallel_deployment', True), + max_parallel=self.config['options'].get('max_parallel', 5) + ): + return False + + # Deploy to servers + servers = self.config['servers'] + parallel = self.config['options'].get('parallel_deployment', True) + max_parallel = self.config['options'].get('max_parallel', 5) + + if parallel and len(servers) > 1: + logger.info(f"Starting parallel deployment to {len(servers)} servers") + + with ThreadPoolExecutor(max_workers=max_parallel) as executor: + futures = [ + executor.submit(self.deploy_to_server, server) + for server in servers + ] + + success_count = 0 + for future in as_completed(futures): + if future.result(): + success_count += 1 + + logger.info(f"Deployment completed: {success_count}/{len(servers)} successful") + return success_count == len(servers) + else: + logger.info("Starting sequential deployment") + + success_count = 0 + for server in servers: + if self.deploy_to_server(server): + success_count += 1 + + logger.info(f"Deployment completed: {success_count}/{len(servers)} successful") + return success_count == len(servers) + + +def main(): + parser = argparse.ArgumentParser(description="BSC Cluster Deployer") + parser.add_argument("--config", default="deployment-config.yaml", help="Path to deployment config file") + parser.add_argument("--action", choices=['deploy', 'monitor', 'files'], default='deploy', help="Action to perform") + parser.add_argument("--skip-build", action="store_true", help="Skip Docker image build") + parser.add_argument("--skip-distribution", action="store_true", help="Skip file distribution") + parser.add_argument("--regenerate-genesis", action="store_true", help="Force regenerate genesis.json") + parser.add_argument("--no-regenerate-genesis", action="store_true", help="Skip genesis.json regeneration") + parser.add_argument("--regenerate-configs", action="store_true", help="Force regenerate server-specific configs") + parser.add_argument("--no-regenerate-configs", action="store_true", help="Skip server-specific config regeneration") + + args = parser.parse_args() + + # Validate config file + if not os.path.exists(args.config): + logger.error(f"Configuration file not found: {args.config}") + return 1 + + try: + deployer = BSCClusterDeployer(args.config) + + # Override config options with command line args + if args.skip_build: + deployer.config['options']['skip_build'] = True + if args.skip_distribution: + deployer.config['options']['skip_distribution'] = True + + # Handle genesis regeneration options + if args.regenerate_genesis: + deployer.config['options']['regenerate_genesis'] = True + if args.no_regenerate_genesis: + deployer.config['options']['regenerate_genesis'] = False + + # Handle config regeneration options + if args.regenerate_configs: + deployer.config['options']['regenerate_configs'] = True + if args.no_regenerate_configs: + deployer.config['options']['regenerate_configs'] = False + + if args.action == 'deploy': + success = deployer.deploy_cluster() + elif args.action == 'monitor': + status = deployer.monitor_deployment() + print(yaml.dump(status, default_flow_style=False)) + success = True + elif args.action == 'files': + success = deployer.distributor.distribute_files() + + return 0 if success else 1 + + except Exception as e: + logger.error(f"Deployment failed: {e}") + return 1 + + +if __name__ == "__main__": + exit(main()) diff --git a/deployment-config.yaml b/deployment-config.yaml new file mode 100644 index 00000000..6f4ce6ef --- /dev/null +++ b/deployment-config.yaml @@ -0,0 +1,121 @@ +# BSC Cluster Deployment Configuration +deployment: + name: "simple-chain" + version: "1.0.0" + chain_id: 1914 + +# Cluster configuration +cluster: + size: 1 + enable_sentry_nodes: false + enable_full_nodes: false + +# Docker configuration +docker: + image_name: "nickwest-zkp/simplechain-v2" + image_tag: "latest" + registry: "ghcr.io" # Use GitHub Container Registry + +# Server cluster configuration +servers: + - name: "validator-1" + host: "192.168.3.205" + user: "ubuntu" + ssh_key: "~/.ssh/id_ed25519" + role: "validator" + node_index: 0 + ports: + http: 8545 + ws: 8546 + p2p: 30303 + metrics: 6060 + + # - name: "validator-2" + # host: "192.168.3.209" + # user: "ubuntu" + # ssh_key: "~/.ssh/id_ed25519" + # role: "validator" + # node_index: 1 + # ports: + # http: 8545 + # ws: 8546 + # p2p: 30303 + # metrics: 6060 + + # - name: "validator-3" + # host: "192.168.1.103" + # user: "ubuntu" + # ssh_key: "~/.ssh/id_rsa" + # role: "validator" + # node_index: 2 + # ports: + # http: 8745 + # ws: 8746 + # p2p: 32303 + # metrics: 6260 + + # - name: "validator-4" + # host: "192.168.1.104" + # user: "ubuntu" + # ssh_key: "~/.ssh/id_rsa" + # role: "validator" + # node_index: 3 + # ports: + # http: 8845 + # ws: 8846 + # p2p: 33303 + # metrics: 6360 + + # - name: "sentry-1" + # host: "192.168.1.105" + # user: "ubuntu" + # ssh_key: "~/.ssh/id_rsa" + # role: "sentry" + # node_index: 0 + # ports: + # http: 8945 + # ws: 8946 + # p2p: 34303 + # metrics: 6460 + + # - name: "fullnode-1" + # host: "192.168.1.106" + # user: "ubuntu" + # ssh_key: "~/.ssh/id_rsa" + # role: "fullnode" + # node_index: 0 + # ports: + # http: 9045 + # ws: 9046 + # p2p: 35303 + # metrics: 6560 + +# File distribution configuration +files: + genesis: "genesis/genesis.json" + config: "config.toml" + keys_base: "keys" + deployment_base: "deployment" + +# Deployment options +options: + skip_build: true # Skip Docker build since we use pre-built image from ghcr.io + skip_distribution: false # Still need to distribute config and keys + regenerate_genesis: true # Regenerate genesis.json before deployment + regenerate_configs: true # Regenerate config.toml for each server + clean_before_deploy: true + parallel_deployment: true + max_parallel: 5 + +# Monitoring configuration +monitoring: + enabled: true + prometheus_port: 9090 + grafana_port: 3000 + +# Logging configuration +logging: + level: "info" + format: "json" + rotate_size: "10M" + max_backups: 5 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..db5067e8 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,62 @@ +version: '3.8' + +services: + bsc-validator-1: + build: . + container_name: bsc-validator-1 + environment: + - NODE_TYPE=validator + - CONSENSUS_ADDRESS=0x${VALIDATOR_ADDRESS_1} + volumes: + - ./deployment/validator1:/home/sipc2 + - ./genesis/genesis.json:/home/sipc2/genesis.json + - ./config.toml:/home/sipc2/config.toml + ports: + - "8545:8545" + - "8546:8546" + - "30303:30303" + - "6060:6060" + networks: + - bsc-network + restart: unless-stopped + + bsc-validator-2: + build: . + container_name: bsc-validator-2 + environment: + - NODE_TYPE=validator + - CONSENSUS_ADDRESS=0x${VALIDATOR_ADDRESS_2} + volumes: + - ./deployment/validator2:/home/sipc2 + - ./genesis/genesis.json:/home/sipc2/genesis.json + - ./config.toml:/home/sipc2/config.toml + ports: + - "8645:8545" + - "8646:8546" + - "31303:30303" + - "6160:6060" + networks: + - bsc-network + restart: unless-stopped + + # bsc-fullnode: + # build: . + # container_name: bsc-fullnode + # environment: + # - NODE_TYPE=fullnode + # volumes: + # - ./deployment/fullnode:/home/bsc + # - ./genesis/genesis.json:/home/bsc/genesis.json + # - ./config.toml:/home/bsc/config.toml + # ports: + # - "8745:8545" + # - "8746:8546" + # - "32303:30303" + # - "6260:6060" + # networks: + # - bsc-network + # restart: unless-stopped + +networks: + bsc-network: + driver: bridge diff --git a/file_distributor.py b/file_distributor.py new file mode 100644 index 00000000..b6df4258 --- /dev/null +++ b/file_distributor.py @@ -0,0 +1,541 @@ +#!/usr/bin/env python3 +""" +SIPC2 Node File Distributor +Distributes necessary files to remote servers for Docker deployment +""" + +import os +import yaml +import paramiko +import argparse +import shutil +import socket +import subprocess +from pathlib import Path +from typing import Dict, List, Any +import logging +from concurrent.futures import ThreadPoolExecutor, as_completed + +# Setup logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + + +class FileDistributor: + def __init__(self, config_path: str): + self.config = self.load_config(config_path) + self.ssh_clients = {} + + def is_localhost(self, host: str) -> bool: + """Check if the target host is localhost""" + localhost_addresses = ['localhost', '127.0.0.1', '::1'] + + # Check direct matches + if host.lower() in localhost_addresses: + return True + + # Try to resolve hostname and check if it's localhost + try: + resolved_ip = socket.gethostbyname(host) + if resolved_ip in ['127.0.0.1', '::1']: + return True + + # Get local IP addresses + local_ips = socket.gethostbyname_ex(socket.gethostname())[2] + if resolved_ip in local_ips: + return True + + except socket.gaierror: + pass + + return False + + def generate_genesis_and_config(self): + """Generate genesis.json and base config.toml before distribution""" + logger.info("Checking if genesis generation is needed...") + + # Check if regeneration is enabled in config + if not self.config['options'].get('regenerate_genesis', False): + logger.info("Genesis regeneration is disabled, skipping...") + return True + + logger.info("Generating genesis.json and config files...") + + try: + # Call bsc_cluster.sh to generate genesis + import subprocess + result = subprocess.run(["bash", "bsc_cluster.sh"], capture_output=True, text=True) + if result.returncode != 0: + logger.error(f"Failed to generate genesis: {result.stderr}") + return False + + logger.info("Genesis and config generation completed") + return True + + except Exception as e: + logger.error(f"Error generating genesis and config: {e}") + return False + + def generate_server_config(self, server_config: Dict[str, Any]) -> str: + """Generate server-specific config.toml""" + # Read base config template + config_template_path = self.config['files']['config'] + if not os.path.exists(config_template_path): + logger.error(f"Config template not found: {config_template_path}") + return None + + with open(config_template_path, 'r', encoding='utf-8') as f: + config_content = f.read() + + # Update config with server-specific values + ports = server_config['ports'] + chain_id = self.config['deployment']['chain_id'] + + # Replace key configuration values + config_content = config_content.replace('NetworkId = 56', f'NetworkId = {chain_id}') + config_content = config_content.replace('HTTPPort = 8545', f'HTTPPort = {ports["http"]}') + config_content = config_content.replace('WSPort = 8546', f'WSPort = {ports["ws"]}') + + # Generate server-specific config file path + server_name = server_config['name'] + server_config_path = f"config/config_{server_name}.toml" + + # Ensure config directory exists + os.makedirs("config", exist_ok=True) + + # Write server-specific config + with open(server_config_path, 'w', encoding='utf-8') as f: + f.write(config_content) + + logger.info(f"Generated server config for {server_name}: {server_config_path}") + return server_config_path + + def load_config(self, config_path: str) -> Dict[str, Any]: + """Load deployment configuration from YAML file""" + with open(config_path, 'r', encoding='utf-8') as f: + return yaml.safe_load(f) + + def create_ssh_client(self, server_config: Dict[str, Any]) -> paramiko.SSHClient: + """Create SSH client for server connection""" + client = paramiko.SSHClient() + client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + + try: + # Load SSH key + private_key_path = os.path.expanduser(server_config['ssh_key']) + private_key = None + + # Try to load Ed25519 key first + try: + private_key = paramiko.Ed25519Key.from_private_key_file(private_key_path) + logger.debug(f"Loaded Ed25519 key from {private_key_path}") + except Exception: + # If Ed25519 fails, try RSA key + try: + private_key = paramiko.RSAKey.from_private_key_file(private_key_path) + logger.debug(f"Loaded RSA key from {private_key_path}") + except Exception as rsa_error: + logger.error(f"Failed to load SSH key from {private_key_path}: {rsa_error}") + raise ValueError(f"No valid SSH key found at {private_key_path}") + + if private_key is None: + raise ValueError(f"Could not load SSH key from {private_key_path}") + + # Connect to server + client.connect( + hostname=server_config['host'], + username=server_config['user'], + pkey=private_key + ) + + logger.info(f"Connected to {server_config['name']} ({server_config['host']})") + return client + + except Exception as e: + logger.error(f"Failed to connect to {server_config['name']}: {e}") + raise + + def ensure_remote_directory(self, ssh_client: paramiko.SSHClient, remote_path: str): + """Ensure remote directory exists""" + try: + stdin, stdout, stderr = ssh_client.exec_command(f"mkdir -p {remote_path}") + exit_code = stdout.channel.recv_exit_status() + + if exit_code != 0: + error = stderr.read().decode() + logger.error(f"Failed to create directory {remote_path}: {error}") + return False + + return True + except Exception as e: + logger.error(f"Error creating directory {remote_path}: {e}") + return False + + def copy_file_local(self, local_path: str, remote_path: str) -> bool: + """Copy file locally (for localhost deployments)""" + try: + # Ensure destination directory exists + remote_dir = os.path.dirname(remote_path) + os.makedirs(remote_dir, exist_ok=True) + + # Copy file + shutil.copy2(local_path, remote_path) + logger.info(f"Copied {local_path} -> {remote_path}") + return True + + except Exception as e: + logger.error(f"Failed to copy {local_path} to {remote_path}: {e}") + return False + + def upload_file(self, ssh_client: paramiko.SSHClient, local_path: str, remote_path: str, host: str = None) -> bool: + """Upload file to remote server or copy locally for localhost""" + try: + # Check if target is localhost + if host and self.is_localhost(host): + return self.copy_file_local(local_path, remote_path) + + # Create SFTP client for remote hosts + sftp = ssh_client.open_sftp() + + # Upload file + sftp.put(local_path, remote_path) + + sftp.close() + logger.info(f"Uploaded {local_path} -> {remote_path}") + return True + + except Exception as e: + logger.error(f"Failed to upload {local_path} to {remote_path}: {e}") + return False + + def copy_directory_local(self, local_dir: str, remote_dir: str) -> bool: + """Copy directory locally (for localhost deployments)""" + try: + # Use shutil.copytree to copy entire directory + if os.path.exists(remote_dir): + shutil.rmtree(remote_dir) + shutil.copytree(local_dir, remote_dir) + logger.info(f"Copied directory {local_dir} -> {remote_dir}") + return True + + except Exception as e: + logger.error(f"Failed to copy directory {local_dir} to {remote_dir}: {e}") + return False + + def upload_directory(self, ssh_client: paramiko.SSHClient, local_dir: str, remote_dir: str, host: str = None) -> bool: + """Upload directory recursively to remote server or copy locally for localhost""" + try: + # Check if target is localhost + if host and self.is_localhost(host): + return self.copy_directory_local(local_dir, remote_dir) + + # Create SFTP client for remote hosts + sftp = ssh_client.open_sftp() + + # Walk through local directory + for root, dirs, files in os.walk(local_dir): + # Calculate relative path + relative_path = os.path.relpath(root, local_dir) + if relative_path == '.': + remote_path = remote_dir + else: + remote_path = os.path.join(remote_dir, relative_path) + + # Create remote directory + try: + sftp.mkdir(remote_path) + except IOError: + pass # Directory might already exist + + # Upload files + for file in files: + local_file = os.path.join(root, file) + remote_file = os.path.join(remote_path, file) + sftp.put(local_file, remote_file) + logger.info(f"Uploaded {local_file} -> {remote_file}") + + sftp.close() + logger.info(f"Uploaded directory {local_dir} -> {remote_dir}") + return True + + except Exception as e: + logger.error(f"Failed to upload directory {local_dir} to {remote_dir}: {e}") + return False + + def distribute_files_to_server(self, server_config: Dict[str, Any]) -> bool: + """Distribute files to a specific server""" + server_name = server_config['name'] + server_host = server_config['host'] + logger.info(f"Starting file distribution to {server_name}") + current_dir = Path(__file__).resolve().parent + + # Check if this is localhost deployment + is_local = self.is_localhost(server_host) + if is_local: + logger.info(f"Detected localhost deployment for {server_name}, will use local file copy") + else: + logger.info(f"Remote deployment to {server_name} ({server_host})") + + try: + # For localhost, we don't need SSH client + if is_local: + ssh_client = None + else: + # Create SSH client for remote hosts + ssh_client = self.create_ssh_client(server_config) + + # Create remote directories (use user home directory instead of /opt) + if is_local: + remote_base = os.path.join(current_dir, "sipc2", server_name) + os.makedirs(remote_base, exist_ok=True) + os.makedirs(os.path.join(remote_base, "config"), exist_ok=True) + os.makedirs(os.path.join(remote_base, "keys"), exist_ok=True) + else: + remote_base = f"{current_dir}/sipc2/{server_name}" + if ssh_client: + self.ensure_remote_directory(ssh_client, remote_base) + self.ensure_remote_directory(ssh_client, f"{remote_base}/config") + self.ensure_remote_directory(ssh_client, f"{remote_base}/keys") + else: + logger.error("SSH client is not available for remote directory creation") + return False + + # Compile create-validator if binary doesn't exist + create_validator_path = os.path.join(current_dir, "create-validator/create-validator") + create_validator_dir = os.path.join(current_dir, "create-validator") + + if not os.path.exists(create_validator_path): + logger.info("create-validator binary not found, compiling from Go source...") + try: + # Check if Go is installed + subprocess.run(["go", "version"], check=True, capture_output=True) + + # Change to create-validator directory and build + os.chdir(create_validator_dir) + result = subprocess.run(["go", "build", "-o", "create-validator", "main.go"], + capture_output=True, text=True) + + if result.returncode == 0: + logger.info("Successfully compiled create-validator") + else: + logger.error(f"Failed to compile create-validator: {result.stderr}") + logger.warning("Continuing without create-validator binary") + + except subprocess.CalledProcessError as e: + logger.error(f"Go is not installed or not accessible: {e}") + logger.warning("Continuing without create-validator binary") + except FileNotFoundError: + logger.error("Go compiler not found") + logger.warning("Continuing without create-validator binary") + except Exception as e: + logger.error(f"Error compiling create-validator: {e}") + logger.warning("Continuing without create-validator binary") + finally: + # Ensure we're back in the original directory + os.chdir(current_dir) + + # Upload create-validator binary for staking + if os.path.exists(create_validator_path): + logger.info(f"Uploading create-validator binary to {server_name}") + remote_create_validator_path = os.path.join(remote_base, "create-validator") if is_local else f"{remote_base}/create-validator" + if is_local: + self.upload_file(None, create_validator_path, remote_create_validator_path, server_host) + # Make executable + os.chmod(remote_create_validator_path, 0o755) + else: + self.upload_file(ssh_client, create_validator_path, f"{remote_base}/create-validator", server_host) + # Make executable + ssh_client.exec_command(f"chmod +x {remote_base}/create-validator") + else: + logger.warning("create-validator binary is not available for upload") + + # Upload genesis file + genesis_path = os.path.join(current_dir, self.config['files']['genesis']) + if os.path.exists(genesis_path): + remote_genesis_path = os.path.join(remote_base, "config", "genesis.json") if is_local else f"{remote_base}/config/genesis.json" + if is_local: + self.upload_file(None, genesis_path, remote_genesis_path, server_host) + else: + self.upload_file(ssh_client, genesis_path, f"{remote_base}/config/genesis.json", server_host) + + # Generate and upload server-specific config file + if self.config['options'].get('regenerate_configs', True): + server_config_path = os.path.join(current_dir, self.generate_server_config(server_config)) + if server_config_path and os.path.exists(server_config_path): + remote_config_path = os.path.join(remote_base, "config", "config.toml") if is_local else f"{remote_base}/config/config.toml" + if is_local: + self.upload_file(None, server_config_path, remote_config_path, server_host) + else: + self.upload_file(ssh_client, server_config_path, f"{remote_base}/config/config.toml", server_host) + else: + # Use the default config.toml if regeneration is disabled + config_path = os.path.join(current_dir, self.config['files']['config']) + if os.path.exists(config_path): + remote_config_path = os.path.join(remote_base, "config", "config.toml") if is_local else f"{remote_base}/config/config.toml" + if is_local: + self.upload_file(None, config_path, remote_config_path, server_host) + else: + self.upload_file(ssh_client, config_path, f"{remote_base}/config/config.toml", server_host) + + # Upload keys based on node type and index + node_index = server_config['node_index'] + role = server_config['role'] + + if role == 'validator': + # Upload validator keys + validator_key_dir = os.path.join(current_dir, f"{self.config['files']['keys_base']}/validator{node_index}") + if os.path.exists(validator_key_dir): + remote_validator_path = os.path.join(remote_base, "keys", "validator") if is_local else f"{remote_base}/keys/validator" + if is_local: + # For local, remove directory if it exists + if os.path.exists(remote_validator_path) and os.path.isdir(remote_validator_path): + import shutil + shutil.rmtree(remote_validator_path) + self.upload_directory(None, validator_key_dir, remote_validator_path, server_host) + else: + # For remote, remove directory if it exists + if ssh_client: + ssh_client.exec_command(f"rm -rf {remote_base}/keys/validator") + self.upload_directory(ssh_client, validator_key_dir, f"{remote_base}/keys/validator", server_host) + + # Upload BLS keys + bls_key_dir = os.path.join(current_dir, f"{self.config['files']['keys_base']}/bls{node_index}") + if os.path.exists(bls_key_dir): + remote_bls_path = os.path.join(remote_base, "keys/bls") if is_local else f"{remote_base}/keys/bls" + if is_local: + # For local, remove directory if it exists + if os.path.exists(remote_bls_path) and os.path.isdir(remote_bls_path): + import shutil + shutil.rmtree(remote_bls_path) + self.upload_directory(None, bls_key_dir, f"{remote_base}/keys/", server_host) + else: + # For remote, remove directory if it exists + if ssh_client: + ssh_client.exec_command(f"rm -rf {remote_base}/keys/bls") + self.upload_directory(ssh_client, bls_key_dir, f"{remote_base}/keys/", server_host) + + # Upload validator nodekey + validator_nodekey_file = os.path.join(current_dir, f"{self.config['files']['keys_base']}/validator-nodekey{node_index}") + if os.path.exists(validator_nodekey_file): + remote_nodekey_path = os.path.join(remote_base, "keys", "validator-nodekey") if is_local else f"{remote_base}/keys/validator-nodekey" + if is_local: + self.upload_file(None, validator_nodekey_file, remote_nodekey_path, server_host) + else: + self.upload_file(ssh_client, validator_nodekey_file, f"{remote_base}/keys/validator-nodekey", server_host) + + # Upload password file + password_file = os.path.join(current_dir, f"{self.config['files']['keys_base']}/password.txt") + if os.path.exists(password_file): + if is_local: + self.upload_file(None, password_file, os.path.join(remote_base, "keys", "password.txt"), server_host) + else: + self.upload_file(ssh_client, password_file, f"{remote_base}/keys/password.txt", server_host) + + elif role in ['sentry', 'fullnode']: + # Upload node key + node_key_file = os.path.join(current_dir, f"{self.config['files']['keys_base']}/{role}-nodekey{node_index}") + if os.path.exists(node_key_file): + if is_local: + self.upload_file(None, node_key_file, os.path.join(remote_base, "keys", f"{role}-nodekey"), server_host) + else: + self.upload_file(ssh_client, node_key_file, f"{remote_base}/keys/{role}-nodekey", server_host) + + # Upload password file for all node types + password_file = os.path.join(current_dir, f"{self.config['files']['keys_base']}/password.txt") + if os.path.exists(password_file): + if is_local: + self.upload_file(None, password_file, os.path.join(remote_base, "keys", "password.txt"), server_host) + else: + self.upload_file(ssh_client, password_file, f"{remote_base}/keys/password.txt", server_host) + + if not is_local and ssh_client: + ssh_client.close() + logger.info(f"File distribution completed for {server_name}") + return True + + except Exception as e: + logger.error(f"File distribution failed for {server_name}: {e}") + return False + + def distribute_files(self, parallel: bool = True, max_parallel: int = 5) -> bool: + """Distribute files to all servers in the cluster""" + + # Generate genesis and base config before distribution + if not self.generate_genesis_and_config(): + logger.error("Failed to generate genesis and config files") + return False + + servers = self.config['servers'] + + if parallel and len(servers) > 1: + logger.info(f"Starting parallel file distribution to {len(servers)} servers") + + with ThreadPoolExecutor(max_workers=max_parallel) as executor: + futures = [ + executor.submit(self.distribute_files_to_server, server) + for server in servers + ] + + success_count = 0 + for future in as_completed(futures): + if future.result(): + success_count += 1 + + logger.info(f"File distribution completed: {success_count}/{len(servers)} successful") + return success_count == len(servers) + else: + logger.info("Starting sequential file distribution") + + success_count = 0 + for server in servers: + if self.distribute_files_to_server(server): + success_count += 1 + + logger.info(f"File distribution completed: {success_count}/{len(servers)} successful") + return success_count == len(servers) + + +def main(): + parser = argparse.ArgumentParser(description="BSC Node File Distributor") + parser.add_argument("--config", default="deployment-config.yaml", help="Path to deployment config file") + parser.add_argument("--parallel", action="store_true", help="Enable parallel distribution") + parser.add_argument("--max-parallel", type=int, default=5, help="Maximum parallel workers") + parser.add_argument("--regenerate-genesis", action="store_true", help="Force regenerate genesis.json") + parser.add_argument("--no-regenerate-genesis", action="store_true", help="Skip genesis.json regeneration") + parser.add_argument("--regenerate-configs", action="store_true", help="Force regenerate server-specific configs") + parser.add_argument("--no-regenerate-configs", action="store_true", help="Skip server-specific config regeneration") + + args = parser.parse_args() + + # Validate config file + if not os.path.exists(args.config): + logger.error(f"Configuration file not found: {args.config}") + return 1 + + try: + distributor = FileDistributor(args.config) + + # Override config options with command line args + if args.regenerate_genesis: + distributor.config['options']['regenerate_genesis'] = True + if args.no_regenerate_genesis: + distributor.config['options']['regenerate_genesis'] = False + if args.regenerate_configs: + distributor.config['options']['regenerate_configs'] = True + if args.no_regenerate_configs: + distributor.config['options']['regenerate_configs'] = False + + success = distributor.distribute_files( + parallel=args.parallel, + max_parallel=args.max_parallel + ) + + return 0 if success else 1 + + except Exception as e: + logger.error(f"File distribution failed: {e}") + return 1 + + +if __name__ == "__main__": + exit(main()) diff --git a/monitor_cluster.py b/monitor_cluster.py new file mode 100644 index 00000000..34f392be --- /dev/null +++ b/monitor_cluster.py @@ -0,0 +1,343 @@ +#!/usr/bin/env python3 +""" +BSC Cluster Monitor +Monitors the status of BSC nodes in the cluster +""" + +import os +import yaml +import paramiko +import argparse +import time +import requests +from pathlib import Path +from typing import Dict, List, Any +import logging +from concurrent.futures import ThreadPoolExecutor, as_completed +import json + +# Setup logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + + +class BSCClusterMonitor: + def __init__(self, config_path: str): + self.config = self.load_config(config_path) + + def load_config(self, config_path: str) -> Dict[str, Any]: + """Load deployment configuration from YAML file""" + with open(config_path, 'r', encoding='utf-8') as f: + return yaml.safe_load(f) + + def create_ssh_client(self, server_config: Dict[str, Any]) -> paramiko.SSHClient: + """Create SSH client for server connection""" + client = paramiko.SSHClient() + client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + + try: + # Load SSH key + private_key_path = os.path.expanduser(server_config['ssh_key']) + private_key = paramiko.RSAKey.from_private_key_file(private_key_path) + + # Connect to server + client.connect( + hostname=server_config['host'], + username=server_config['user'], + pkey=private_key + ) + + return client + + except Exception as e: + logger.error(f"Failed to connect to {server_config['name']}: {e}") + raise + + def check_server_status(self, server_config: Dict[str, Any]) -> Dict[str, Any]: + """Check status of BSC node on server""" + server_name = server_config['name'] + server_host = server_config['host'] + http_port = server_config['ports']['http'] + + status = { + 'server': server_name, + 'host': server_host, + 'container_status': 'unknown', + 'node_status': 'unknown', + 'block_height': 0, + 'peers': 0, + 'syncing': False, + 'errors': [] + } + + try: + # Create SSH client + ssh_client = self.create_ssh_client(server_config) + + # Check Docker container status + container_name = server_config['name'] + stdin, stdout, stderr = ssh_client.exec_command( + f"docker ps --filter name={container_name} --format '{{{{.Status}}}}'" + ) + container_output = stdout.read().decode().strip() + + if container_output: + status['container_status'] = 'running' + + # Check container health + stdin, stdout, stderr = ssh_client.exec_command( + f"docker inspect {container_name} --format '{{{{.State.Health.Status}}}}'" + ) + health_output = stdout.read().decode().strip() + status['container_health'] = health_output if health_output else 'unknown' + else: + status['container_status'] = 'stopped' + status['errors'].append('Container not running') + + ssh_client.close() + + except Exception as e: + status['errors'].append(f'SSH connection failed: {str(e)}') + + # Check RPC endpoint + try: + rpc_url = f"http://{server_host}:{http_port}" + response = requests.post( + rpc_url, + json={ + "jsonrpc": "2.0", + "method": "eth_syncing", + "params": [], + "id": 1 + }, + timeout=5 + ) + + if response.status_code == 200: + result = response.json() + if result.get('result'): + status['node_status'] = 'syncing' + status['syncing'] = True + else: + status['node_status'] = 'synced' + else: + status['node_status'] = 'error' + status['errors'].append(f'RPC returned status {response.status_code}') + + except requests.exceptions.RequestException as e: + status['node_status'] = 'unreachable' + status['errors'].append(f'RPC connection failed: {str(e)}') + + # Get block height + if status['node_status'] in ['synced', 'syncing']: + try: + rpc_url = f"http://{server_host}:{http_port}" + response = requests.post( + rpc_url, + json={ + "jsonrpc": "2.0", + "method": "eth_blockNumber", + "params": [], + "id": 1 + }, + timeout=5 + ) + + if response.status_code == 200: + result = response.json() + block_hex = result.get('result', '0x0') + status['block_height'] = int(block_hex, 16) + + except Exception as e: + status['errors'].append(f'Failed to get block height: {str(e)}') + + # Get peer count + if status['node_status'] in ['synced', 'syncing']: + try: + rpc_url = f"http://{server_host}:{http_port}" + response = requests.post( + rpc_url, + json={ + "jsonrpc": "2.0", + "method": "net_peerCount", + "params": [], + "id": 1 + }, + timeout=5 + ) + + if response.status_code == 200: + result = response.json() + peer_hex = result.get('result', '0x0') + status['peers'] = int(peer_hex, 16) + + except Exception as e: + status['errors'].append(f'Failed to get peer count: {str(e)}') + + return status + + def monitor_cluster(self, parallel: bool = True, max_parallel: int = 10) -> Dict[str, Any]: + """Monitor all servers in the cluster""" + logger.info("Starting cluster monitoring") + + servers = self.config['servers'] + cluster_status = { + 'timestamp': time.time(), + 'cluster_size': len(servers), + 'servers': {}, + 'summary': { + 'total_servers': len(servers), + 'running_containers': 0, + 'healthy_nodes': 0, + 'syncing_nodes': 0, + 'synced_nodes': 0, + 'errors': [] + } + } + + server_statuses = [] + + if parallel and len(servers) > 1: + with ThreadPoolExecutor(max_workers=max_parallel) as executor: + futures = [ + executor.submit(self.check_server_status, server) + for server in servers + ] + + for future in as_completed(futures): + server_statuses.append(future.result()) + else: + for server in servers: + server_statuses.append(self.check_server_status(server)) + + # Process results + for status in server_statuses: + server_name = status['server'] + cluster_status['servers'][server_name] = status + + # Update summary + if status['container_status'] == 'running': + cluster_status['summary']['running_containers'] += 1 + + if status['node_status'] in ['synced', 'syncing']: + cluster_status['summary']['healthy_nodes'] += 1 + + if status['node_status'] == 'syncing': + cluster_status['summary']['syncing_nodes'] += 1 + + if status['node_status'] == 'synced': + cluster_status['summary']['synced_nodes'] += 1 + + if status['errors']: + cluster_status['summary']['errors'].extend(status['errors']) + + logger.info(f"Monitoring completed. {cluster_status['summary']['running_containers']}/{len(servers)} containers running") + return cluster_status + + def print_status_report(self, cluster_status: Dict[str, Any]): + """Print formatted status report""" + print("\n" + "="*80) + print("BSC CLUSTER STATUS REPORT") + print("="*80) + print(f"Timestamp: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(cluster_status['timestamp']))}") + print(f"Cluster Size: {cluster_status['cluster_size']}") + print() + + # Summary + summary = cluster_status['summary'] + print("SUMMARY:") + print(f" Total Servers: {summary['total_servers']}") + print(f" Running Containers: {summary['running_containers']}") + print(f" Healthy Nodes: {summary['healthy_nodes']}") + print(f" Syncing Nodes: {summary['syncing_nodes']}") + print(f" Synced Nodes: {summary['synced_nodes']}") + print() + + # Server details + print("SERVER DETAILS:") + print("-" * 80) + print("<15" print("-" * 80) + + for server_name, status in cluster_status['servers'].items(): + container_status = status['container_status'] + node_status = status['node_status'] + block_height = status['block_height'] + peers = status['peers'] + + print("<15") + + if status['errors']: + print(f" Errors: {', '.join(status['errors'])}") + + print() + + # Errors summary + if summary['errors']: + print("CLUSTER ERRORS:") + for error in summary['errors'][:5]: # Show first 5 errors + print(f" - {error}") + if len(summary['errors']) > 5: + print(f" ... and {len(summary['errors']) - 5} more errors") + print() + + def start_continuous_monitoring(self, interval: int = 60): + """Start continuous monitoring""" + logger.info(f"Starting continuous monitoring (interval: {interval}s)") + + try: + while True: + cluster_status = self.monitor_cluster() + self.print_status_report(cluster_status) + + print(f"\nWaiting {interval} seconds before next check...") + time.sleep(interval) + + except KeyboardInterrupt: + logger.info("Monitoring stopped by user") + except Exception as e: + logger.error(f"Monitoring failed: {e}") + + +def main(): + parser = argparse.ArgumentParser(description="BSC Cluster Monitor") + parser.add_argument("--config", default="deployment-config.yaml", help="Path to deployment config file") + parser.add_argument("--continuous", action="store_true", help="Enable continuous monitoring") + parser.add_argument("--interval", type=int, default=60, help="Monitoring interval in seconds") + parser.add_argument("--parallel", action="store_true", help="Enable parallel monitoring") + parser.add_argument("--max-parallel", type=int, default=10, help="Maximum parallel workers") + parser.add_argument("--output", choices=['console', 'json', 'yaml'], default='console', help="Output format") + + args = parser.parse_args() + + # Validate config file + if not os.path.exists(args.config): + logger.error(f"Configuration file not found: {args.config}") + return 1 + + try: + monitor = BSCClusterMonitor(args.config) + + if args.continuous: + monitor.start_continuous_monitoring(args.interval) + else: + cluster_status = monitor.monitor_cluster( + parallel=args.parallel, + max_parallel=args.max_parallel + ) + + if args.output == 'console': + monitor.print_status_report(cluster_status) + elif args.output == 'json': + print(json.dumps(cluster_status, indent=2)) + elif args.output == 'yaml': + print(yaml.dump(cluster_status, default_flow_style=False)) + + return 0 + + except Exception as e: + logger.error(f"Monitoring failed: {e}") + return 1 + + +if __name__ == "__main__": + exit(main()) From a89e97fd89c7de15fe4866bb5295f72a7aebbe09 Mon Sep 17 00:00:00 2001 From: nickwest-zkp Date: Thu, 11 Sep 2025 11:40:38 +0800 Subject: [PATCH 9/9] Separate the operation address key and consensus address key --- bsc_cluster.sh | 65 ++++++++++-- create-validator/main.go | 18 ++-- deploy_cluster.py | 218 ++++++++++++++++++++++++++++++++++----- file_distributor.py | 161 +++++++++++++++++++++++++++-- 4 files changed, 418 insertions(+), 44 deletions(-) diff --git a/bsc_cluster.sh b/bsc_cluster.sh index 32d3ab22..0244d7d2 100644 --- a/bsc_cluster.sh +++ b/bsc_cluster.sh @@ -23,12 +23,34 @@ function exit_previous() { sleep ${sleepBeforeStart} } +# Check if validator is a file or directory and handle accordingly +function handle_validator() { + local validator_path=$1 + local index=$2 + local target_dir=$3 + + if [ -f "${validator_path}" ]; then + # It's a file containing the operator address + echo "Validator ${index} is a file, copying operator address file" + cp ${validator_path} ${target_dir}/ + elif [ -d "${validator_path}" ]; then + # It's a directory containing the keystore + echo "Validator ${index} is a directory, copying keystore directory" + cp -r ${validator_path} ${workspace}/.local/ + else + echo "Error: Validator ${index} is neither a file nor a directory" + exit 1 + fi +} + function create_validator() { rm -rf ${workspace}/.local mkdir -p ${workspace}/.local for ((i = 0; i < size; i++)); do - cp -r ${workspace}/keys/validator${i} ${workspace}/.local/ + # Handle validator based on whether it's a file or directory + handle_validator "${workspace}/keys/validator${i}" ${i} "${workspace}/.local/validator${i}" + cp -r ${workspace}/keys/consensus${i} ${workspace}/.local/ cp -r ${workspace}/keys/bls${i} ${workspace}/.local/ done } @@ -74,10 +96,25 @@ function prepare_config() { echo "passedHardforkTime "${passedHardforkTime} > ${workspace}/.local/hardforkTime.txt initHolders=${INIT_HOLDER} for ((i = 0; i < size; i++)); do - for f in ${workspace}/.local/validator${i}/keystore/*; do + # Check if validator is a file or directory and extract operator address accordingly + if [ -f "${workspace}/keys/validator${i}" ]; then + # Read operator address from the file + operator_addr=$(cat ${workspace}/keys/validator${i}) + elif [ -d "${workspace}/keys/validator${i}" ]; then + # Extract operator address from keystore + for f in ${workspace}/keys/validator${i}/keystore/*; do + operator_addr="0x$(cat ${f} | jq -r .address)" + done + else + echo "Error: Validator ${i} is neither a file nor a directory" + exit 1 + fi + + initHolders=${initHolders}","${operator_addr} + fee_addr=${operator_addr} + + for f in ${workspace}/.local/consensus${i}/keystore/*; do cons_addr="0x$(cat ${f} | jq -r .address)" - initHolders=${initHolders}","${cons_addr} - fee_addr=${cons_addr} done targetDir=${workspace}/.local/node${i} @@ -129,7 +166,18 @@ function initNetwork() { for ((i = 0; i < size; i++)); do mkdir ${workspace}/.local/node${i}/geth cp ${workspace}/keys/validator-nodekey${i} ${workspace}/.local/node${i}/geth/nodekey - mv ${workspace}/.local/validator${i}/keystore ${workspace}/.local/node${i}/ && rm -rf ${workspace}/.local/validator${i} + + # Handle validator based on whether it's a file or directory + if [ -d "${workspace}/.local/validator${i}" ]; then + # Old format: validator is a directory with keystore + mv ${workspace}/.local/validator${i}/keystore ${workspace}/.local/node${i}/ && rm -rf ${workspace}/.local/validator${i} + else + # New format: validator is a file with operator address + # In this case, we don't need to move anything to node directory + rm -rf ${workspace}/.local/validator${i} + fi + + mv ${workspace}/.local/consensus${i}/keystore ${workspace}/.local/node${i}/ && rm -rf ${workspace}/.local/consensus${i} if [ ${EnableSentryNode} = true ]; then mkdir ${workspace}/.local/sentry${i}/geth cp ${workspace}/keys/sentry-nodekey${i} ${workspace}/.local/sentry${i}/geth/nodekey @@ -266,10 +314,13 @@ function native_start() { } function register_stakehub(){ + if [ -f "${workspace}/keys/validator${i}" ];then + exit 1 + fi # wait feynman enable sleep 45 for ((i = 0; i < size; i++));do - ${workspace}/create-validator/create-validator --consensus-key-dir ${workspace}/keys/validator${i} --vote-key-dir ${workspace}/keys/bls${i} \ + ${workspace}/create-validator/create-validator --operator-key-dir ${workspace}/keys/validator${i} --consensus-key-dir ${workspace}/keys/consensus${i} --vote-key-dir ${workspace}/keys/bls${i} \ --password-path ${workspace}/keys/password.txt --amount 20001 --validator-desc Val${i} --rpc-url ${RPC_URL} done } @@ -305,4 +356,4 @@ regen-genesis) *) echo "Usage: bsc_cluster.sh | reset | stop [vidx]| start [vidx]| restart [vidx] | regen-genesis" ;; -esac +esac \ No newline at end of file diff --git a/create-validator/main.go b/create-validator/main.go index cbb2a978..970d6873 100644 --- a/create-validator/main.go +++ b/create-validator/main.go @@ -29,11 +29,11 @@ var ( consensusKeyDir = flag.String("consensus-key-dir", "", "consensus keys dir") voteKeyDir = flag.String("vote-key-dir", "", "vote keys dir") passwordPath = flag.String("password-path", "", "password dir") + operatorKeyDir = flag.String("operator-key-dir", "", "operator keys dir") ) func main() { flag.Parse() - if *consensusKeyDir == "" { panic("consensus-keys-dir is required") } @@ -43,6 +43,9 @@ func main() { if *passwordPath == "" { panic("password-path is required") } + if *operatorKeyDir == "" { + *operatorKeyDir = *consensusKeyDir + } client, err := ethclient.Dial(*rpcUrl) if err != nil { @@ -56,9 +59,12 @@ func main() { password := string(bytes.TrimSpace(bz)) consensusKs := keystore.NewKeyStore(*consensusKeyDir+"/keystore", keystore.StandardScryptN, keystore.StandardScryptP) + operatorKs := keystore.NewKeyStore(*operatorKeyDir+"/keystore", keystore.StandardScryptN, keystore.StandardScryptP) consensusAddr := consensusKs.Accounts()[0].Address - consensusAcc := accounts.Account{Address: consensusAddr} - err = consensusKs.Unlock(consensusAcc, password) + + operatorAddr := operatorKs.Accounts()[0].Address + operatorAcc := accounts.Account{Address: operatorAddr} + err = operatorKs.Unlock(operatorAcc, password) if err != nil { panic(err) } @@ -94,7 +100,7 @@ func main() { paddedChainIdBytes := make([]byte, 32) copy(paddedChainIdBytes[32-len(chainId.Bytes()):], chainId.Bytes()) - msgHash := crypto.Keccak256(append(consensusAddr.Bytes(), append(pubKey[:], paddedChainIdBytes...)...)) + msgHash := crypto.Keccak256(append(operatorAddr.Bytes(), append(pubKey[:], paddedChainIdBytes...)...)) req := validatorpb.SignRequest{ PublicKey: pubKey[:], SigningRoot: msgHash, @@ -114,7 +120,7 @@ func main() { panic(err) } - nonce, err := client.PendingNonceAt(context.Background(), consensusAddr) + nonce, err := client.PendingNonceAt(context.Background(), operatorAddr) if err != nil { panic(err) } @@ -132,7 +138,7 @@ func main() { Data: data, }) - signedTx, err := consensusKs.SignTx(consensusAcc, tx, chainId) + signedTx, err := operatorKs.SignTx(operatorAcc, tx, chainId) if err != nil { panic(err) } diff --git a/deploy_cluster.py b/deploy_cluster.py index ebdcdde8..9a55ad91 100644 --- a/deploy_cluster.py +++ b/deploy_cluster.py @@ -10,11 +10,14 @@ import argparse import subprocess import time +import secrets +import json from pathlib import Path from typing import Dict, List, Any import logging from concurrent.futures import ThreadPoolExecutor, as_completed from file_distributor import FileDistributor +from eth_account import Account # Setup logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') @@ -31,6 +34,133 @@ def load_config(self, config_path: str) -> Dict[str, Any]: with open(config_path, 'r', encoding='utf-8') as f: return yaml.safe_load(f) + def _ensure_dir(self, path: str): + os.makedirs(path, exist_ok=True) + + def _write_text_file(self, path: str, content: str): + parent = os.path.dirname(path) + if parent: + os.makedirs(parent, exist_ok=True) + with open(path, 'w', encoding='utf-8') as f: + f.write(content) + + def _generate_password(self) -> str: + return secrets.token_urlsafe(24) + + def _ensure_password_file(self, keys_base_dir: str) -> str: + password_path = os.path.join(keys_base_dir, 'password.txt') + if not os.path.exists(password_path): + password = self._generate_password() + self._write_text_file(password_path, password) + logger.info(f"Generated new password file: {password_path}") + return password_path + + def _generate_nodekey(self, target_path: str): + if os.path.exists(target_path): + logger.info(f"Nodekey already exists: {target_path}") + return + key_bytes = secrets.token_bytes(32) + key_hex = key_bytes.hex() + self._write_text_file(target_path, key_hex) + logger.info(f"Generated nodekey: {target_path}") + + def _generate_eth_keystore(self, out_dir: str, password: str) -> str: + """Generate an Ethereum account keystore JSON in out_dir; returns address.""" + self._ensure_dir(out_dir) + acct = Account.create() + keystore = Account.encrypt(acct.key, password) + filename = f"UTC--{time.strftime('%Y-%m-%dT%H-%M-%SZ', time.gmtime())}--{acct.address.lower().replace('0x','')}" + keystore_path = os.path.join(out_dir, filename) + with open(keystore_path, 'w', encoding='utf-8') as f: + json.dump(keystore, f) + logger.info(f"Generated keystore: {keystore_path}") + return acct.address + + def generate_keys_for_server(self, server_config: Dict[str, Any]) -> bool: + """Generate required keys locally for a server if missing.""" + try: + current_dir = Path(__file__).resolve().parent + keys_base = os.path.join(current_dir, self.config['files']['keys_base']) + node_index = server_config['node_index'] + role = server_config['role'] + + # Ensure base dir + self._ensure_dir(keys_base) + + # Ensure password file + password_path = self._ensure_password_file(keys_base) + with open(password_path, 'r', encoding='utf-8') as f: + password = f.read().strip() + + # Generate role-specific nodekey + nodekey_filename = None + if role == 'validator': + nodekey_filename = f"validator-nodekey{node_index}" + elif role == 'sentry': + nodekey_filename = f"sentry-nodekey{node_index}" + elif role == 'fullnode': + nodekey_filename = f"fullnode-nodekey{node_index}" + else: + logger.warning(f"Unknown role {role}; skipping nodekey generation") + + if nodekey_filename: + self._generate_nodekey(os.path.join(keys_base, nodekey_filename)) + + if role == 'validator': + # Operator and consensus keystores + operator_keystore_dir = os.path.join(keys_base, f"validator{node_index}", "keystore") + consensus_keystore_dir = os.path.join(keys_base, f"consensus{node_index}", "keystore") + if not os.path.isdir(operator_keystore_dir) or not os.listdir(operator_keystore_dir): + self._generate_eth_keystore(operator_keystore_dir, password) + else: + logger.info(f"Operator keystore exists: {operator_keystore_dir}") + if not os.path.isdir(consensus_keystore_dir) or not os.listdir(consensus_keystore_dir): + self._generate_eth_keystore(consensus_keystore_dir, password) + else: + logger.info(f"Consensus keystore exists: {consensus_keystore_dir}") + + # Ensure BLS directory (place wallet files if required externally) + bls_dir = os.path.join(keys_base, f"bls{node_index}") + self._ensure_dir(bls_dir) + + return True + except Exception as e: + logger.error(f"Failed to generate keys for server {server_config['name']}: {e}") + return False + + def add_node(self, server_name: str, update_peers_config: bool = True) -> bool: + """Add a new node: generate keys, distribute files/config, and start the node.""" + try: + servers = self.config['servers'] + target = None + for s in servers: + if s['name'] == server_name: + target = s + break + if target is None: + logger.error(f"Server not found in config: {server_name}") + return False + + # 1) Generate keys for the new server + if not self.generate_keys_for_server(target): + return False + + # 2) Distribute files: update configs for all servers if requested, else only target + if update_peers_config: + logger.info("Updating configs for all servers to include new static/trusted nodes...") + for s in servers: + if not self.distributor.distribute_files_to_server(s): + logger.warning(f"Failed distributing to {s['name']}") + else: + if not self.distributor.distribute_files_to_server(target): + return False + + # 3) Deploy the new server + return self.deploy_to_server(target) + except Exception as e: + logger.error(f"Add node failed: {e}") + return False + def create_ssh_client(self, server_config: Dict[str, Any]) -> paramiko.SSHClient: """Create SSH client for server connection""" return self.distributor.create_ssh_client(server_config) @@ -120,15 +250,18 @@ def deploy_to_server(self, server_config: Dict[str, Any]) -> bool: # Create SSH client ssh_client = self.create_ssh_client(server_config) - # Create remote directories (use user home directory instead of /opt) + # Create remote directories (per-node isolation) remote_base = f"{current_dir}/sipc2/{server_name}" self.distributor.ensure_remote_directory(ssh_client, remote_base) self.distributor.ensure_remote_directory(ssh_client, f"{remote_base}/data") + self.distributor.ensure_remote_directory(ssh_client, f"{remote_base}/keys") + self.distributor.ensure_remote_directory(ssh_client, f"{remote_base}/config") # Generate Docker run command with correct validator address docker_cmd = self.generate_docker_run_command(server_config, validator_address) print(docker_cmd) - data_dir = f"{current_dir}/sipc2/{server_name}/data" + # Use per-node data directory for cleanup + data_dir = f"{remote_base}/data" # Create deployment script if role == 'validator': deployment_script = f"""#!/bin/bash @@ -211,9 +344,10 @@ def deploy_to_server(self, server_config: Dict[str, Any]) -> bool: # If this is a validator node, register and stake tokens if role == 'validator': - logger.info(f"Validator node deployed, proceeding with staking registration for {server_name}") - if not self.register_validator_stake(server_config, ssh_client, validator_address): - logger.warning(f"Staking registration failed for {server_name}, but deployment was successful") + if os.path.isdir(f"{current_dir}/sipc2/{server_name}/keys/validator"): + logger.info(f"Validator node deployed, proceeding with staking registration for {server_name}") + if not self.register_validator_stake(server_config, ssh_client, validator_address): + logger.warning(f"Staking registration failed for {server_name}, but deployment was successful") logger.info(f"Deployment completed successfully on {server_name}") ssh_client.close() @@ -246,11 +380,11 @@ def generate_docker_run_command(self, server_config: Dict[str, Any], validator_a # Generate port mappings ports = server_config['ports'] port_mappings = [ - f"-p {ports['http']}:8545", # HTTP port - f"-p {ports['ws']}:8546", # WebSocket port - f"-p {ports['p2p']}:30303", # P2P port - f"-p {ports['metrics']}:6060", # Metrics port - f"-p 7060:6060" # PProf port (mapped to metrics port) + f"-p {ports['http']}:8545", # HTTP port (TCP) + f"-p {ports['ws']}:8546", # WebSocket port (TCP) + f"-p {ports['p2p']}:30303", # P2P TCP + f"-p {ports['p2p']}:30303/udp", # P2P UDP for discovery + f"-p {ports['metrics']}:6060" # Metrics/PProf port (TCP) ] # Generate volume mappings (match docker-entrypoint.sh paths) @@ -270,20 +404,17 @@ def generate_docker_run_command(self, server_config: Dict[str, Any], validator_a if role == 'validator': # Add nodekey mapping nodekey_path = f"{remote_base}/keys/validator-nodekey" - if os.path.exists(nodekey_path): - volume_mappings.append(f"-v {nodekey_path}:/home/sipc2/keys/nodekey") + volume_mappings.append(f"-v {nodekey_path}:/home/sipc2/keys/nodekey") elif role == 'sentry': # Add nodekey mapping for sentry nodekey_path = f"{remote_base}/keys/sentry-nodekey" - if os.path.exists(nodekey_path): - volume_mappings.append(f"-v {nodekey_path}:/home/sipc2/keys/nodekey") + volume_mappings.append(f"-v {nodekey_path}:/home/sipc2/keys/nodekey") elif role == 'fullnode': # Add nodekey mapping for fullnode nodekey_path = f"{remote_base}/keys/fullnode-nodekey" - if os.path.exists(nodekey_path): - volume_mappings.append(f"-v {nodekey_path}:/home/sipc2/keys/nodekey") + volume_mappings.append(f"-v {nodekey_path}:/home/sipc2/keys/nodekey") # Build Docker run command with native_start parameters if role == 'validator': @@ -303,12 +434,12 @@ def generate_docker_run_command(self, server_config: Dict[str, Any], validator_a f"--miner.etherbase {validator_address} --blspassword /home/sipc2/keys/password.txt", "--nodekey /home/sipc2/keys/nodekey", "--blswallet /home/sipc2/keys/bls/wallet", - "--keystore /home/sipc2/keys/validator/keystore", + "--keystore /home/sipc2/keys/consensus/keystore", "--rpc.allow-unprotected-txs --allow-insecure-unlock", "--ws.addr 0.0.0.0 --ws.port 8546 --http.addr 0.0.0.0 --http.port 8545 --http.corsdomain '*'", "--metrics --metrics.addr localhost --metrics.port 6060 --metrics.expensive", "--pprof --pprof.addr localhost --pprof.port 6060", - "--gcmode archive --syncmode full --monitor.maliciousvote", + "--gcmode full --syncmode full --monitor.maliciousvote", "--override.passedforktime 1725500000 --override.lorentz 1725500000 --override.maxwell 1725500000", "--override.immutabilitythreshold 100 --override.breatheblockinterval 300", "--override.minforblobrequest 20 --override.defaultextrareserve 10" @@ -326,7 +457,7 @@ def generate_docker_run_command(self, server_config: Dict[str, Any], validator_a "--ws.addr 0.0.0.0 --ws.port 8546 --http.addr 0.0.0.0 --http.port 8545 --http.corsdomain '*'", "--metrics --metrics.addr localhost --metrics.port 6060 --metrics.expensive", "--pprof --pprof.addr localhost --pprof.port 6060", - "--gcmode archive --syncmode full", + "--gcmode full --syncmode full", "--override.passedforktime 1725500000 --override.lorentz 1725500000 --override.maxwell 1725500000", "--override.immutabilitythreshold 100 --override.breatheblockinterval 300", "--override.minforblobrequest 20 --override.defaultextrareserve 10" @@ -342,7 +473,7 @@ def get_validator_address(self, node_index: int) -> str: try: # Path to validator keystore directory current_dir = Path(__file__).resolve().parent - validator_dir = os.path.join(current_dir, f"keys/validator{node_index}/keystore") + validator_dir = os.path.join(current_dir, f"keys/consensus{node_index}/keystore") if not os.path.exists(validator_dir): logger.error(f"Validator keystore directory not found: {validator_dir}") @@ -387,6 +518,40 @@ def regenerate_genesis_and_configs(self) -> bool: return True try: + # Check if BSC_CLUSTER_SIZE in .env matches cluster.size in deployment config + cluster_size = self.config['cluster']['size'] + env_file_path = ".env" + + if os.path.exists(env_file_path): + # Read .env file + with open(env_file_path, 'r') as f: + env_content = f.read() + + # Extract BSC_CLUSTER_SIZE from .env + import re + match = re.search(r'BSC_CLUSTER_SIZE=(\d+)', env_content) + if match: + env_cluster_size = int(match.group(1)) + + # If they don't match, update .env file + if env_cluster_size != cluster_size: + logger.info(f"Updating BSC_CLUSTER_SIZE in .env from {env_cluster_size} to {cluster_size}") + # Replace the BSC_CLUSTER_SIZE line + env_content = re.sub( + r'BSC_CLUSTER_SIZE=\d+', + f'BSC_CLUSTER_SIZE={cluster_size}', + env_content + ) + + # Write back to .env file + with open(env_file_path, 'w') as f: + f.write(env_content) + logger.info(".env file updated successfully") + else: + logger.warning("BSC_CLUSTER_SIZE not found in .env file") + else: + logger.warning(".env file not found") + if regenerate_genesis: logger.info("Regenerating genesis.json and base config...") @@ -405,7 +570,6 @@ def regenerate_genesis_and_configs(self) -> bool: print("STDOUT:", result.stdout) if result.stderr: print("STDERR:", result.stderr) - #result = subprocess.run(["bash", "bsc_cluster.sh regen-genesis"], capture_output=True, text=True, cwd=".") if result.returncode != 0: logger.error(f"Failed to regenerate genesis: {result.stderr}") return False @@ -443,7 +607,8 @@ def register_validator_stake(self, server_config: Dict[str, Any], ssh_client: pa echo "Validator address: {validator_address}" {current_dir}/sipc2/{server_name}/create-validator \\ - --consensus-key-dir {current_dir}/sipc2/{server_name}/keys/validator \\ + --operator-key-dir {current_dir}/sipc2/{server_name}/keys/validator \\ + --consensus-key-dir {current_dir}/sipc2/{server_name}/keys/consensus \\ --vote-key-dir {current_dir}/sipc2/{server_name}/keys \\ --password-path {current_dir}/sipc2/{server_name}/keys/password.txt \\ --amount 20001 \\ @@ -592,7 +757,9 @@ def deploy_cluster(self) -> bool: def main(): parser = argparse.ArgumentParser(description="BSC Cluster Deployer") parser.add_argument("--config", default="deployment-config.yaml", help="Path to deployment config file") - parser.add_argument("--action", choices=['deploy', 'monitor', 'files'], default='deploy', help="Action to perform") + parser.add_argument("--action", choices=['deploy', 'monitor', 'files', 'add-node'], default='deploy', help="Action to perform") + parser.add_argument("--server-name", help="Server name (from deployment-config.yaml) for add-node action") + parser.add_argument("--no-update-peers-config", action="store_true", help="When adding node, do not refresh configs for all peers") parser.add_argument("--skip-build", action="store_true", help="Skip Docker image build") parser.add_argument("--skip-distribution", action="store_true", help="Skip file distribution") parser.add_argument("--regenerate-genesis", action="store_true", help="Force regenerate genesis.json") @@ -636,6 +803,11 @@ def main(): success = True elif args.action == 'files': success = deployer.distributor.distribute_files() + elif args.action == 'add-node': + if not args.server_name: + logger.error("--server-name is required for add-node action") + return 1 + success = deployer.add_node(args.server_name, update_peers_config=not args.no_update_peers_config) return 0 if success else 1 diff --git a/file_distributor.py b/file_distributor.py index b6df4258..6752072d 100644 --- a/file_distributor.py +++ b/file_distributor.py @@ -6,6 +6,8 @@ import os import yaml +import re +from eth_keys import keys import paramiko import argparse import shutil @@ -87,14 +89,89 @@ def generate_server_config(self, server_config: Dict[str, Any]) -> str: with open(config_template_path, 'r', encoding='utf-8') as f: config_content = f.read() - # Update config with server-specific values + # Update config with server-specific values using regex to be robust to template differences ports = server_config['ports'] chain_id = self.config['deployment']['chain_id'] - # Replace key configuration values - config_content = config_content.replace('NetworkId = 56', f'NetworkId = {chain_id}') - config_content = config_content.replace('HTTPPort = 8545', f'HTTPPort = {ports["http"]}') - config_content = config_content.replace('WSPort = 8546', f'WSPort = {ports["ws"]}') + # Replace NetworkId + config_content = re.sub(r"(?m)^(\s*NetworkId\s*=\s*)\d+\b", rf"\g<1>{chain_id}", config_content) + # Replace HTTPPort + config_content = re.sub(r"(?m)^(\s*HTTPPort\s*=\s*)\d+\b", rf"\g<1>{ports['http']}", config_content) + # Replace WSPort + config_content = re.sub(r"(?m)^(\s*WSPort\s*=\s*)\d+\b", rf"\g<1>{ports['ws']}", config_content) + + # Compute and inject StaticNodes from all peers' nodekeys + try: + static_nodes: List[str] = [] + trusted_nodes: List[str] = [] + + for peer in self.config['servers']: + # Skip self + if peer['name'] == server_config['name']: + continue + + peer_nodekey_path = self._resolve_nodekey_path(peer) + if not peer_nodekey_path or not os.path.exists(peer_nodekey_path): + logger.warning(f"Nodekey not found for {peer['name']}: {peer_nodekey_path}") + continue + + enode_id_hex = self._derive_enode_id_from_nodekey(peer_nodekey_path) + if not enode_id_hex: + logger.warning(f"Failed to derive enode for {peer['name']}") + continue + + # Prefer public_ip if provided + peer_ip = peer.get('public_ip', peer['host']) + peer_p2p = peer['ports']['p2p'] + enode_url = f"enode://{enode_id_hex}@{peer_ip}:{peer_p2p}" + static_nodes.append(enode_url) + trusted_nodes.append(enode_url) + + if static_nodes: + static_nodes_literal = "[" + ", ".join([f'\"{u}\"' for u in static_nodes]) + "]" + + # Try to replace existing StaticNodes + if re.search(r"(?m)^\s*StaticNodes\s*=\s*\[.*\]", config_content): + config_content = re.sub( + r"(?ms)^(\s*StaticNodes\s*=\s*)\[.*?\]", + rf"\g<1>{static_nodes_literal}", + config_content, + ) + else: + # Insert under [Node.P2P] section + if "[Node.P2P]" in config_content: + config_content = re.sub( + r"(?ms)(\[Node\.P2P\].*?)(\n\[|\Z)", + lambda m: m.group(1) + f"\nStaticNodes = {static_nodes_literal}\n" + m.group(2), + config_content, + ) + else: + # Append at end as fallback + config_content += f"\n[Node.P2P]\nStaticNodes = {static_nodes_literal}\n" + + if trusted_nodes: + trusted_nodes_literal = "[" + ", ".join([f'\"{u}\"' for u in trusted_nodes]) + "]" + + # Try to replace existing TrustedNodes + if re.search(r"(?m)^\s*TrustedNodes\s*=\s*\[.*\]", config_content): + config_content = re.sub( + r"(?ms)^(\s*TrustedNodes\s*=\s*)\[.*?\]", + rf"\g<1>{trusted_nodes_literal}", + config_content, + ) + else: + # Insert under [Node.P2P] section + if "[Node.P2P]" in config_content: + config_content = re.sub( + r"(?ms)(\[Node\.P2P\].*?)(\n\[|\Z)", + lambda m: m.group(1) + f"\nTrustedNodes = {trusted_nodes_literal}\n" + m.group(2), + config_content, + ) + else: + # Append at end as fallback + config_content += f"\n[Node.P2P]\nTrustedNodes = {trusted_nodes_literal}\n" + except Exception as e: + logger.error(f"Failed to inject StaticNodes: {e}") # Generate server-specific config file path server_name = server_config['name'] @@ -290,12 +367,14 @@ def distribute_files_to_server(self, server_config: Dict[str, Any]) -> bool: os.makedirs(remote_base, exist_ok=True) os.makedirs(os.path.join(remote_base, "config"), exist_ok=True) os.makedirs(os.path.join(remote_base, "keys"), exist_ok=True) + os.makedirs(os.path.join(remote_base, "data"), exist_ok=True) else: remote_base = f"{current_dir}/sipc2/{server_name}" if ssh_client: self.ensure_remote_directory(ssh_client, remote_base) self.ensure_remote_directory(ssh_client, f"{remote_base}/config") self.ensure_remote_directory(ssh_client, f"{remote_base}/keys") + self.ensure_remote_directory(ssh_client, f"{remote_base}/data") else: logger.error("SSH client is not available for remote directory creation") return False @@ -391,13 +470,34 @@ def distribute_files_to_server(self, server_config: Dict[str, Any]) -> bool: if os.path.exists(remote_validator_path) and os.path.isdir(remote_validator_path): import shutil shutil.rmtree(remote_validator_path) - self.upload_directory(None, validator_key_dir, remote_validator_path, server_host) + if os.path.isdir(validator_key_dir): + self.upload_directory(None, validator_key_dir, remote_validator_path, server_host) + else: + self.upload_file(None, validator_key_dir, remote_validator_path, server_host) else: # For remote, remove directory if it exists if ssh_client: ssh_client.exec_command(f"rm -rf {remote_base}/keys/validator") - self.upload_directory(ssh_client, validator_key_dir, f"{remote_base}/keys/validator", server_host) - + if os.path.isdir(validator_key_dir): + self.upload_directory(ssh_client, validator_key_dir, f"{remote_base}/keys/validator", server_host) + else: + self.upload_file(ssh_client, validator_key_dir, f"{remote_base}/keys/validator", server_host) + + # Upload consensus keys + consensus_key_dir = os.path.join(current_dir, f"{self.config['files']['keys_base']}/consensus{node_index}") + if os.path.exists(consensus_key_dir): + remote_consensus_path = os.path.join(remote_base, "keys", "consensus") if is_local else f"{remote_base}/keys/consensus" + if is_local: + # For local, remove directory if it exists + if os.path.exists(remote_consensus_path) and os.path.isdir(remote_consensus_path): + import shutil + shutil.rmtree(remote_consensus_path) + self.upload_directory(None, consensus_key_dir, remote_consensus_path, server_host) + else: + if ssh_client: + ssh_client.exec_command(f"rm -rf {remote_base}/keys/consensus") + self.upload_directory(ssh_client, consensus_key_dir, f"{remote_base}/keys/consensus", server_host) + # Upload BLS keys bls_key_dir = os.path.join(current_dir, f"{self.config['files']['keys_base']}/bls{node_index}") if os.path.exists(bls_key_dir): @@ -457,6 +557,51 @@ def distribute_files_to_server(self, server_config: Dict[str, Any]) -> bool: logger.error(f"File distribution failed for {server_name}: {e}") return False + def _resolve_nodekey_path(self, server: Dict[str, Any]) -> str: + """Resolve local nodekey path for a server based on role and node_index""" + current_dir = Path(__file__).resolve().parent + keys_base = self.config['files']['keys_base'] + node_index = server['node_index'] + role = server['role'] + + if role == 'validator': + filename = f"validator-nodekey{node_index}" + elif role == 'sentry': + filename = f"sentry-nodekey{node_index}" + elif role == 'fullnode': + filename = f"fullnode-nodekey{node_index}" + else: + return None + + return os.path.join(current_dir, f"{keys_base}/{filename}") + + def _derive_enode_id_from_nodekey(self, nodekey_path: str) -> str: + """Derive enode ID (pubkey hex without 0x) from a devp2p nodekey file""" + try: + with open(nodekey_path, 'rb') as f: + key_bytes = f.read().strip() + + # Nodekey may be hex string or raw 32 bytes + if len(key_bytes) > 32: + try: + key_hex = key_bytes.decode().strip() + if key_hex.startswith('0x'): + key_hex = key_hex[2:] + key_bytes = bytes.fromhex(key_hex) + except Exception: + # Assume it's text but not hex; raise + pass + + if len(key_bytes) != 32: + raise ValueError("Invalid nodekey length; expected 32 bytes") + + priv = keys.PrivateKey(key_bytes) + pub_bytes = priv.public_key.to_bytes() # 64 bytes (X||Y) + return pub_bytes.hex() + except Exception as e: + logger.error(f"Failed to derive enode from {nodekey_path}: {e}") + return None + def distribute_files(self, parallel: bool = True, max_parallel: int = 5) -> bool: """Distribute files to all servers in the cluster"""