From b0c95fa6bbf651548d5dc260b4b079cb34505dc0 Mon Sep 17 00:00:00 2001 From: Andy Taylor Date: Sun, 19 Jan 2025 11:19:33 +0000 Subject: [PATCH] Add EditorConfig and enhance .gitignore - Introduced a new `.editorconfig` file with standardized settings for line endings, indentation, and file types. - Expanded `.gitignore` to include more editor and system-specific files, ensuring cleaner git status outputs. - Updated README to inform users of the `prepare.sh` prerequisite step. - Streamlined Terraform configurations with enhanced documentation. --- .editorconfig | 56 +++ .gitignore | 67 ++- README.md | 204 ++++++++- config.yml | 154 ++++--- documentation/AWS.md | 18 +- extras/pf.sshuttle.conf | 7 + main.tf | 30 +- modules/backend/main.tf | 68 +++ modules/deploy/aws/main.tf | 535 +++++++++++++++++++++- modules/deploy/azure/main.tf | 4 +- modules/deploy/config/backend.hcl | 5 + modules/deploy/data/04-customize.sh | 6 +- modules/deploy/data/cml.sh | 218 +++++++-- modules/deploy/data/del.sh | 2 +- modules/deploy/data/vars.sh | 3 +- modules/deploy/data/virl2-base-config.yml | 2 +- modules/deploy/main.tf | 10 + modules/readyness/main.tf | 12 +- prepare.bat | 294 ++++++++++-- prepare.sh | 287 +++++++++++- providers.tf | 8 + requirements.txt | 7 + scripts/clean_code.py | 119 +++++ scripts/generate_virlrc.py | 104 +++++ scripts/manage_cml_instances.py | 214 +++++++++ ssm-tunnel-guide.md | 107 +++++ terraform.tf | 13 +- upload-images-to-aws-macos.sh | 168 +++++++ upload-images-to-aws.sh | 10 +- 29 files changed, 2505 insertions(+), 227 deletions(-) create mode 100644 .editorconfig create mode 100644 extras/pf.sshuttle.conf create mode 100644 modules/backend/main.tf create mode 100644 modules/deploy/config/backend.hcl create mode 100644 providers.tf create mode 100644 requirements.txt create mode 100644 scripts/clean_code.py create mode 100644 scripts/generate_virlrc.py create mode 100644 scripts/manage_cml_instances.py create mode 100644 ssm-tunnel-guide.md create mode 100755 upload-images-to-aws-macos.sh diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..8d3e8d3 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,56 @@ +# Top-most EditorConfig file +root = true + +# Unix-style newlines with a newline ending every file +[*] +end_of_line = lf +insert_final_newline = true +charset = utf-8 +trim_trailing_whitespace = true +max_line_length = 120 + +# Python files +[*.py] +indent_style = space +indent_size = 4 + +# Shell scripts +[*.sh] +indent_style = space +indent_size = 4 +shell_variant = bash +binary_next_line = true +switch_case_indent = true + +# Terraform files +[*.{tf,tfvars}] +indent_style = space +indent_size = 2 +quote_type = double + +# YAML files +[*.{yml,yaml}] +indent_style = space +indent_size = 2 +quote_type = single + +# Markdown files +[*.md] +indent_style = space +indent_size = 2 +trim_trailing_whitespace = false + +# JSON files +[*.json] +indent_style = space +indent_size = 2 + +# HCL files (Terraform configs) +[*.hcl] +indent_style = space +indent_size = 2 + +# Documentation +[documentation/**] +indent_style = space +indent_size = 2 \ No newline at end of file diff --git a/.gitignore b/.gitignore index 3506120..323fec0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,69 @@ +# macOS system files .DS_Store +.AppleDouble +.LSOverride +._* + +# Editor files +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# Environment files .envrc -.terraform +.env +.env.* +!.envrc.example + +# Terraform files +.terraform/ .terraform.lock.hcl -terraform.tfstate* +terraform.tfstate +terraform.tfstate.* .terraform.tfstate.lock.info +*.tfvars +!example.tfvars + +# Python files +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Node modules (for documentation) +node_modules/ + +# Backup files +*.bak +*.backup +*~ +backups_*/ + +# CML specific +.virlrc +# config.yml is tracked as it serves as a template +*.pkg +*.qcow2 +*.img + +# Logs +*.log +logs/ diff --git a/README.md b/README.md index 67d99bd..00e3d38 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,12 @@ With CML 2.7, you can run CML instances on Azure and AWS. We have tested CML de *It is very likely that this tool chain can not be used "as-is"*. It should be forked and adapted to specific customer requirements and environments. +> [!NOTE] +> +> Please ensure that `prepare.sh` is run first as this sets key variable names + + + > [!IMPORTANT] > > **Version 2.7 vs 2.8** @@ -24,6 +30,34 @@ With CML 2.7, you can run CML instances on Azure and AWS. We have tested CML de > [!IMPORTANT] > Read the section below about [cloud provider selection](#important-cloud-provider-selection) (prepare script). +## CML Utilities + +This project includes scripts that integrate with [cmlutils](https://pypi.org/project/cmlutils/), a command-line tool for managing CML labs. To use cmlutils: + +1. Install the package: +```bash +pip install cmlutils +``` + +2. Generate the .virlrc configuration: +```bash +python scripts/generate_virlrc.py +``` + +3. Source the configuration: +```bash +source .virlrc +``` + +4. You can now use cmlutils commands: +```bash +# List labs +cml ls + +# Stop all labs +cml down --all +``` + ## General requirements The tooling uses Terraform to deploy CML instances in the Cloud. It's therefore required to have a functional Terraform installation on the computer where this tool chain should be used. @@ -48,18 +82,90 @@ Some of the steps and procedures outlined below are preparation steps and only n - creating the storage resources and uploading images and software into it - creation of an SSH key pair and making the public key available to the cloud service - editing the `config.yml` configuration file including the selection of the cloud service, an instance flavor, region, license token and other parameters +- optionally setting up S3 backend for state management #### Important: Cloud provider selection The tooling supports multiple cloud providers (currently AWS and Azure). Not everyone wants both providers. The **default configuration is set to use AWS only**. If Azure should be used either instead or in addition then the following steps are mandatory: 1. Run the `prepare.sh` script to modify and prepare the tool chain. If on Windows, use `prepare.bat`. You can actually choose to use both, if that's what you want. + - The script will ask for a prefix for AWS resources (or generate a random one) + - If AWS is enabled, you'll be offered to set up S3 backend for state management + - The script will create necessary configurations and backups 2. Configure the proper target ("aws" or "azure") in the configuration file The first step is unfortunately required, since it is impossible to dynamically select different cloud configurations within the same Terraform HCL configuration. See [this SO link](https://stackoverflow.com/questions/70428374/how-to-make-the-provider-configuration-optional-and-based-on-the-condition-in-te) for more some context and details. The default "out-of-the-box" configuration is AWS, so if you want to run on Azure, don't forget to run the prepare script. +#### State Management + +The tool now supports using S3 as a backend for Terraform state management. When running `prepare.sh`: + +1. You'll be asked if you want to use S3 for state management +2. If yes, the script will: + - Check for an existing state bucket + - Create the bucket and DynamoDB table if they don't exist + - Configure Terraform to use the S3 backend + - Migrate any existing state + +Benefits of using S3 backend: +- State locking via DynamoDB +- Encrypted state storage +- State versioning +- Team collaboration support + +#### Known Issues + +When using bare metal instances (e.g., `c5.metal`), you might encounter timeout errors during deployment: + +``` +│ Error: CML2 Provider Error +│ +│ with module.ready.data.cml2_system.state, +│ on modules/readyness/main.tf line 7, in data "cml2_system" "state": +│ 7: data "cml2_system" "state" { +│ +│ ran into timeout (max 15m) +``` + +This is expected as metal instances take longer to start/stop. The provider has been configured with extended timeouts (30 minutes), but you may still see this error. The deployment will continue to work despite this error. + +#### Resource Naming and Prefixes + +The `prepare.sh` script handles all resource naming and prefixes: + +1. You'll be prompted to enter a prefix or use a randomly generated one +2. This prefix will be used for all AWS resources (buckets, instances, etc.) +3. The script automatically updates all relevant files with the new prefix +4. No personal identifiers need to be stored in the repository + +Example resources that use the prefix: +- S3 bucket: `-aws-cml` +- State bucket: `-aws-cml-tfstate` +- Instance names: `cml-dublin-` + +The following fields in config.yml will be automatically updated: +```yaml +aws: + region: # Set based on user input + availability_zone: # Set based on region + bucket: # Set to -aws-cml +common: + key_name: # Set to cml-- +``` + +The prepare script will: +1. Ask for your preferred AWS region +2. Map the region to a city name based on location: + - EMEA: dublin (eu-west-1), london (eu-west-2), paris (eu-west-3), etc. + - US: virginia (us-east-1), ohio (us-east-2), california (us-west-1), etc. + - APAC: singapore (ap-southeast-1), sydney (ap-southeast-2), tokyo (ap-northeast-1), etc. +3. Configure all region-specific settings automatically +4. Use the region city in resource naming + +You can leave these fields empty in the repository as they will be populated by the prepare script. + #### Managing secrets > [!WARNING] @@ -83,9 +189,9 @@ secret: manager: conjur secrets: app: - username: admin + username: # Example using Conjur - path: example-org/example-project/secret/admin_password + path: example-org/example-project/secret/_password ``` Refer to the `.envrc.example` file for examples to set up environment variables to use an external secrets manager. @@ -99,7 +205,7 @@ secret: manager: dummy secrets: app: - username: admin + username: # raw_secret: # Undefined ``` @@ -110,7 +216,7 @@ secret: The included default `config.yml` configures generated passwords for the following secrets: - App password (for the UI) -- System password for the OS system administration user +- System password for the OS system istration user - Cluster secret when clustering is enabled Regardless of the secret manager in use or whether you use random passwords or not: You **must** provide a valid Smart Licensing token for the sytem to work, though. @@ -165,6 +271,81 @@ It is assumed that the CML cloud repository was cloned to the computer where Ter When installed, run `terraform init` to initialize Terraform. This will download the required providers and create the state files. +## Using sshuttle for VPN-like Access + +To access your CML instance and its internal networks, you can use sshuttle as a lightweight VPN alternative. This allows you to reach the CML web interface and any nodes in your labs directly from your local machine. + +### Installation + +```bash +# On Ubuntu/Debian +sudo apt-get install sshuttle + +# On macOS using Homebrew +brew install sshuttle + +# Using pip (all platforms) +pip install sshuttle +``` + +### Basic Usage + +To create a VPN-like connection to your CML instance: + +```bash +# Replace 10.0.0.0/16 with your CML instance's internal network range +sshuttle --dns -r admin@your-cml-instance 10.0.0.0/16 +``` + +### macOS Configuration + +On macOS, you'll need additional configuration due to the platform's security features: + +1. Copy the provided packet filter configuration: + ```bash + sudo cp extras/pf.sshuttle.conf /etc/pf.anchors/ + ``` + +2. Add the following line to `/etc/pf.conf` if it doesn't exist: + ``` + rdr-anchor "sshuttle" + ``` + +3. Load the configuration: + ```bash + sudo pfctl -f /etc/pf.conf + ``` + +The `pf.sshuttle.conf` file is required on macOS because: +- macOS uses the Packet Filter (PF) firewall +- sshuttle needs specific firewall rules to forward traffic +- The configuration allows: + - Traffic to the SSH tunnel port (2222) + - Forwarded traffic to/from your CML networks + - Proper NAT for return traffic + +### Usage Tips + +1. Run sshuttle in the background: + ```bash + sshuttle --dns -D -r admin@your-cml-instance 10.0.0.0/16 + ``` + +2. To stop the background process: + ```bash + pkill sshuttle + ``` + +3. For AWS deployments, use the bastion host: + ```bash + sshuttle --dns -r admin@your-bastion-host 10.0.0.0/16 + ``` + +Once connected, you can: +- Access the CML web interface using its private IP +- Connect directly to nodes in your labs +- Use DNS resolution for lab hostnames + ## Cloud specific instructions See the documentation directory for cloud specific instructions: @@ -226,4 +407,17 @@ All scripts are copied as they are including all comments which will require eve A potential solution to the data limit is to provide the scripts in storage by bundling them up into a tar file or similar, store the tar file in S3 and then only reference this file in the user-data. However, this hasn't been implemented, yet. -EOF +## Development + +### Git Hooks + +This repository uses git hooks to maintain code quality: + +- `pre-commit`: Automatically sanitizes configuration files to remove sensitive data +- `pre-push`: Prevents pushing sensitive files to public repositories + +To enable the hooks: +```bash +git config core.hooksPath .githooks +chmod +x .githooks/* +``` diff --git a/config.yml b/config.yml index 01833a2..f9fb92e 100644 --- a/config.yml +++ b/config.yml @@ -3,64 +3,70 @@ # Copyright (c) 2019-2024, Cisco Systems, Inc. # All rights reserved. # +# This is a template configuration file. Before using: +# 1. Run prepare.sh (or prepare.bat on Windows) to set up your environment +# 2. Uncomment and customize the settings below based on your needs +# 3. Never commit sensitive information like passwords or API keys +# # at this time, "aws" and "azure" are defined targets # make sure that you ran the prepare.sh / prepare.bat script! -target: aws +# target: aws aws: - region: us-east-1 - availability_zone: us-east-1a - bucket: bucket-name-goes-here - # flavor: c5.2xlarge - flavor: m5zn.metal - flavor_compute: m5zn.metal - profile: permission-profile-goes-here + # region: # Will be set by prepare.sh based on user input + # availability_zone: # Will be set by prepare.sh based on region + # bucket: # Will be set by prepare.sh to -aws-cml + # flavor: c5.metal + # flavor_compute: c5.metal + # profile: s3-access-for-ec2 # # The following two attributes are used in the aws-mini variant. They specify # the subnet and the security group ID the CML VM should use. The are NOT # used when using the regular AWS deployment option (non-mini). - subnet_id: "" - sg_id: "" + # subnet_id: "" + # sg_id: "" # # The followin values are used by the regular AWS deployment option. # When specifying a VPC ID below then this prefix must exist on that VPC! - public_vpc_ipv4_cidr: 10.0.0.0/16 - enable_ebs_encryption: false + # public_vpc_ipv4_cidr: 10.0.0.0/16 + # enable_ebs_encryption: false # # Leave empty to create a custom VPC / Internet gateway, or provide the IDs # of the VPC / gateway to use, they must exist and properly associated. # also: an IPv6 CIDR prefix must be associated with the specified VPC - vpc_id: "" - gw_id: "" + # vpc_id: "" + # gw_id: "" # # Use spot instances, when available for the VMs forming the cluster spot_instances: - use_spot_for_controller: false - use_spot_for_computes: false + # use_spot_for_controller: false + # use_spot_for_computes: false azure: - resource_group: resource-group-name - size: Standard_D4d_v4 - size_compute: unused_at_the_moment - storage_account: storage-account-name - container_name: container-name + # resource_group: resource-group-name + # size: Standard_D4d_v4 + # size_compute: unused_at_the_moment + # storage_account: storage-account-name + # container_name: container-name common: - disk_size: 64 - controller_hostname: cml-controller - key_name: ssh-key-name - allowed_ipv4_subnets: ["0.0.0.0/0"] - enable_patty: true + # disk_size: 64 + # controller_hostname: cml-controller + # key_name: # Will be set by prepare.sh to cml-- + # List of allowed IPv4 subnets for inbound traffic + # allowed_ipv4_subnets: + # - "10.0.0.0/8" + # enable_patty: true cluster: - enable_cluster: false + # enable_cluster: false # No longer used, see the secret manager section below - #secret: your-secret-password - allow_vms_on_controller: true - number_of_compute_nodes: 0 - compute_hostname_prefix: cml-compute - compute_disk_size: 32 + # secret: your-secret-password + # allow_vms_on_controller: true + # number_of_compute_nodes: 0 + # compute_hostname_prefix: cml-compute + # compute_disk_size: 32 secret: # At this time, 'vault', 'conjur' and 'dummy' are supported secrets managers. @@ -70,7 +76,7 @@ secret: # https://registry.terraform.io/providers/hashicorp/random/latest/docs/resources/password #manager: vault #manager: conjur - manager: dummy + # manager: dummy conjur: @@ -88,29 +94,29 @@ secret: # SmartLicense token is accessed using 'secrets.smartlicense_token.secret'. secrets: app: - username: admin + username: # Used with dummy secret manager. If unspecified, a random password will # be generated. You need to escape special chars: #raw_secret: '\"!@$%' - #raw_secret: your-secret-password + raw_secret: <_password> # Path to secret, used with both Vault and Conjur: - #path: example-org/example-project/admin_password + #path: example-org/example-project/_password # Used with Vault only: #field: secret sys: - username: sysadmin + # username: sys # Used with dummy secret manager. If unspecified, a random password will # be generated. - #raw_secret: your-secret-password + # raw_secret: <_password> # Path to secret, used with both Vault and Conjur: - #path: example-org/example-project/sysadmin_password + #path: example-org/example-project/sys_password # Used with Vault only: #field: secret smartlicense_token: # Only used with dummy secret manager - raw_secret: your-smart-licensing-token + # raw_secret: # Path to secret, used with both Vault and Conjur: #path: example-org/example-project/smartlicense_token # Used with Vault only: @@ -127,17 +133,17 @@ secret: app: # **No longer used, see the secret manager section above** - #user: admin + #user: #pass: your-secret-password - software: cml2_2.7.2-26_amd64-29.pkg + # software: cml2_2.8.0-6_amd64-32.pkg # The list must have at least ONE element, this is what the dummy is for in # case 00- and 01- are commented out! customize: # - 00-patch_vmx.sh - - 99-dummy.sh + # - 99-dummy.sh license: - flavor: CML_Enterprise + flavor: CML_Personal # **No longer used, see the secret manager section above** #token: your-smart-licensing-token # Unless you have additional node licenses available, leave this at zero @@ -151,47 +157,47 @@ license: refplat: definitions: - alpine - # - alpine-trex - # - alpine-wanem - # - asav - # - cat8000v - # - cat9000v-q200 - # - cat9000v-uadp - # - cat-sdwan-edge - # - cat-sdwan-controller - # - cat-sdwan-manager - # - cat-sdwan-validator - # - cat-sdwan-vedge - # - csr1000v - # - desktop + - alpine-trex + - alpine-wanem + - asav + - cat8000v + - cat9000v-q200 + - cat9000v-uadp + - cat-sdwan-edge + - cat-sdwan-controller + - cat-sdwan-manager + - cat-sdwan-validator + - cat-sdwan-vedge + - csr1000v + - desktop - iol-xe - ioll2-xe - iosv - iosvl2 - # - iosxrv9000 - # - nxosv9000 + - iosxrv9000 + - nxosv9000 - server - ubuntu images: - alpine-3-19-1-base - # - alpine-3-19-1-trex - # - alpine-3-19-1-wanem - # - asav-9-20-2 - # - cat8000v-17-13-01a - # - cat9000v-q200-17-12-01prd9 - # - cat9000v-uadp-17-12-01prd9 - # - cat-sdwan-edge-17-13-01a - # - cat-sdwan-controller-20-13-1 - # - cat-sdwan-manager-20-13-1 - # - cat-sdwan-validator-20-13-1 - # - cat-sdwan-vedge-20-13-1 - # - csr1000v-17-03-068a - # - desktop-3-19-1-xfce + - alpine-3-19-1-trex + - alpine-3-19-1-wanem + - asav-9-20-2 + - cat8000v-17-13-01a + - cat9000v-q200-17-12-01prd9 + - cat9000v-uadp-17-12-01prd9 + - cat-sdwan-edge-17-13-01a + - cat-sdwan-controller-20-13-1 + - cat-sdwan-manager-20-13-1 + - cat-sdwan-validator-20-13-1 + - cat-sdwan-vedge-20-13-1 + - csr1000v-17-03-068a + - desktop-3-19-1-xfce - iol-xe-17-12-01 - ioll2-xe-17-12-01 - iosv-159-3-m8 - iosvl2-2020 - # - iosxrv9000-7-11-1 - # - nxosv9300-10-4-2-f + - iosxrv9000-7-11-1 + - nxosv9300-10-4-2-f - server-tcl-14-1 - ubuntu-22-04-20240126 diff --git a/documentation/AWS.md b/documentation/AWS.md index ccf38bd..8b80e39 100644 --- a/documentation/AWS.md +++ b/documentation/AWS.md @@ -571,7 +571,7 @@ Before destroying an instance using `terraform destroy` it is important to remov To remove the license using automation, a script is provided in `/provision/del.sh`. The output from the deployment can be used, it looks like this: ```plain -ssh -p1122 sysadmin@IP_ADDRESS_OF_CONTROLLER /provision/del.sh +ssh -p1122 sys@IP_ADDRESS_OF_CONTROLLER /provision/del.sh ``` This requires all labs to be stopped (no running VMs allowed) prior to removing the license. It will only work as long as the provisioned usernames and passwords have not changed between deployment and destruction of the instance. @@ -664,7 +664,7 @@ Outputs: cml2info = { "address" = "18.194.38.215" - "del" = "ssh -p1122 sysadmin@18.194.38.215 /provision/del.sh" + "del" = "ssh -p1122 sys@18.194.38.215 /provision/del.sh" "url" = "https://18.194.38.215" "version" = "2.5.1+build.10" } @@ -675,7 +675,7 @@ $ As can be seen above, a public IPv4 address has been assigned to the instance which can be used to access it via SSH and the provided SSH key pair (if this does not connect right away then the system isn't ready, yet and more wait is needed): ```plain -$ ssh -p1122 sysadmin@18.194.38.215 +$ ssh -p1122 sys@18.194.38.215 The authenticity of host '[18.194.38.215]:1122 ([18.194.38.215]:1122)' can't be established. ED25519 key fingerprint is SHA256:dz7GcRGzcWiyHbPb++NyQykP9r7UoG0rNiACi5ft1lQ. This key is not known by any other names @@ -683,13 +683,13 @@ Are you sure you want to continue connecting (yes/no/[fingerprint])? yes Warning: Permanently added '[18.194.38.215]:1122' (ED25519) to the list of known hosts. Welcome to Ubuntu 20.04.6 LTS (GNU/Linux 5.15.0-1033-aws x86_64) [...] -sysadmin@rschmied-aws-2023042001:~$ +sys@rschmied-aws-2023042001:~$ ``` At this point, the status of the system can be checked: ```plain -sysadmin@rschmied-aws-2023042001:~$ systemctl status | head +sys@rschmied-aws-2023042001:~$ systemctl status | head ● rschmied-aws-2023042001 State: running Jobs: 0 queued @@ -700,13 +700,13 @@ sysadmin@rschmied-aws-2023042001:~$ systemctl status | head ├─user.slice │ └─user-1001.slice │ ├─user@1001.service -sysadmin@rschmied-aws-2023042001:~$ systemctl status virl2.target +sys@rschmied-aws-2023042001:~$ systemctl status virl2.target ● virl2.target - CML2 Network Simulation System Loaded: loaded (/lib/systemd/system/virl2.target; enabled; vendor preset: enabled) Active: active since Fri 2024-04-21 14:47:58 UTC; 2min 13s ago Warning: some journal files were not opened due to insufficient permissions. -sysadmin@rschmied-aws-2023042001:~$ +sys@rschmied-aws-2023042001:~$ ``` The system is running and the VIRL2 target (CML) is active! @@ -717,7 +717,7 @@ Prior to stopping the instance, the licensing token must be removed via the UI. > The `del.sh` has no output if the command is successful. ```plain -$ ssh -p1122 sysadmin@18.194.38.215 /provision/del.sh +$ ssh -p1122 sys@18.194.38.215 /provision/del.sh The authenticity of host '[18.194.38.215]:1122 ([18.194.38.215]:1122)' can't be established. ED25519 key fingerprint is SHA256:4QxgLv9zzKR5gJP4rWE41STdnAHufBYkTKBpp/VA+k8. This key is not known by any other names @@ -748,7 +748,7 @@ Plan: 0 to add, 0 to change, 3 to destroy. Changes to Outputs: - cml2info = { - address = "18.194.38.215" - - del = "ssh -p1122 sysadmin@18.194.38.215 /provision/del.sh" + - del = "ssh -p1122 sys@18.194.38.215 /provision/del.sh" - url = "https://18.194.38.215" - version = "2.5.1+build.10" } -> null diff --git a/extras/pf.sshuttle.conf b/extras/pf.sshuttle.conf new file mode 100644 index 0000000..5a630d5 --- /dev/null +++ b/extras/pf.sshuttle.conf @@ -0,0 +1,7 @@ +# Allow traffic to the SSM tunneled port +pass in proto tcp from any to any port 2222 +pass out proto tcp from any to any port 2222 + +# Allow forwarded traffic for your target subnet +pass in proto tcp from 10.0.0.0/16 to any +pass out proto tcp from any to 10.0.0.0/16 diff --git a/main.tf b/main.tf index 1d36e4f..0e26e98 100644 --- a/main.tf +++ b/main.tf @@ -4,8 +4,13 @@ # All rights reserved. # +# Local variables for configuration processing locals { + # Load and decode the YAML configuration file raw_cfg = yamldecode(file(var.cfg_file)) + + # Merge configuration excluding secrets, then add processed secrets from the secrets module + # This ensures secrets are properly managed and not exposed in raw form cfg = merge( { for k, v in local.raw_cfg : k => v if k != "secret" @@ -14,33 +19,48 @@ locals { secrets = module.secrets.secrets } ) + + # Process extra configuration variables if provided + # If cfg_extra_vars is a file path, read the file; otherwise use the value directly extras = var.cfg_extra_vars == null ? "" : ( fileexists(var.cfg_extra_vars) ? file(var.cfg_extra_vars) : var.cfg_extra_vars ) } +# Secrets management module +# Handles secure storage and retrieval of sensitive information like passwords and API keys module "secrets" { source = "./modules/secrets" cfg = local.raw_cfg } +# Deployment module +# Manages the creation and configuration of CML infrastructure in the chosen cloud provider module "deploy" { source = "./modules/deploy" cfg = local.cfg extras = local.extras + providers = { + cml2.controller = cml2.controller + } } +# CML2 Provider Configuration +# Sets up the connection to the CML controller using the deployed instance's public IP provider "cml2" { address = "https://${module.deploy.public_ip}" username = local.cfg.secrets.app.username password = local.cfg.secrets.app.secret - skip_verify = true - dynamic_config = true + skip_verify = true # Skip SSL verification as CML may use self-signed certificates + dynamic_config = true # Allow dynamic configuration updates } +# Readiness Check Module +# Verifies that the CML instance is fully operational and ready to accept connections module "ready" { source = "./modules/readyness" - depends_on = [ - module.deploy.public_ip - ] + providers = { + cml2 = cml2.controller + } + depends_on = [module.deploy] # Ensure deployment is complete before checking readiness } diff --git a/modules/backend/main.tf b/modules/backend/main.tf new file mode 100644 index 0000000..fc786ed --- /dev/null +++ b/modules/backend/main.tf @@ -0,0 +1,68 @@ +variable "prefix" { + description = "Prefix for resource names" + type = string +} + +variable "region" { + description = "AWS region" + type = string + default = "eu-west-1" +} + +# S3 bucket for Terraform state +resource "aws_s3_bucket" "terraform_state" { + bucket = "${var.prefix}-aws-cml-tfstate" + + lifecycle { + prevent_destroy = true + } + + tags = { + Name = "CML-terraform-state-${var.prefix}" + } +} + +# Enable versioning for state files +resource "aws_s3_bucket_versioning" "terraform_state" { + bucket = aws_s3_bucket.terraform_state.id + versioning_configuration { + status = "Enabled" + } +} + +# Enable server-side encryption +resource "aws_s3_bucket_server_side_encryption_configuration" "terraform_state" { + bucket = aws_s3_bucket.terraform_state.id + + rule { + apply_server_side_encryption_by_default { + sse_algorithm = "AES256" + } + } +} + +# Block all public access +resource "aws_s3_bucket_public_access_block" "terraform_state" { + bucket = aws_s3_bucket.terraform_state.id + + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + +# DynamoDB table for state locking +resource "aws_dynamodb_table" "terraform_locks" { + name = "${var.prefix}-aws-cml-tfstate-lock" + billing_mode = "PAY_PER_REQUEST" + hash_key = "LockID" + + attribute { + name = "LockID" + type = "S" + } + + tags = { + Name = "CML-terraform-locks-${var.prefix}" + } +} diff --git a/modules/deploy/aws/main.tf b/modules/deploy/aws/main.tf index 302a63b..75106a6 100644 --- a/modules/deploy/aws/main.tf +++ b/modules/deploy/aws/main.tf @@ -31,6 +31,12 @@ locals { # Need to have this as it's referenced in the template (Azure specific) { sas_token = "undefined" } ) + ssh_config = { + enable_password_auth = true + enable_root_login = true + enable_service = true + enable_console = true + } } ) @@ -44,6 +50,12 @@ locals { # (Azure specific) { sas_token = "undefined" } ) + ssh_config = { + enable_password_auth = true + enable_root_login = true + enable_service = true + enable_console = true + } } )] @@ -51,17 +63,27 @@ locals { # reference platforms has no single quotes in the file names or keys (should # be reasonable, but you never know...) cloud_config = templatefile("${path.module}/../data/cloud-config.txt", { - vars = local.vars - cml_config = local.cml_config_controller - cfg = var.options.cfg - cml = var.options.cml - common = var.options.common - copyfile = var.options.copyfile - del = var.options.del - interface_fix = var.options.interface_fix - extras = var.options.extras - hostname = var.options.cfg.common.controller_hostname - path = path.module + vars = local.vars + cml_config = local.cml_config_controller + cfg = var.options.cfg + cml = var.options.cml + common = var.options.common + copyfile = var.options.copyfile + del = var.options.del + interface_fix = var.options.interface_fix + extras = var.options.extras + hostname = var.options.cfg.common.controller_hostname + path = path.module + additional_packages = <<-EOT + - amazon-cloudwatch-agent + EOT + runcmd = <<-EOT + - curl -O https://s3.amazonaws.com/amazoncloudwatch-agent/ubuntu/amd64/latest/amazon-cloudwatch-agent.deb + - dpkg -i amazon-cloudwatch-agent.deb + - /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -s -c ssm:/AmazonCloudWatch-Config + - systemctl enable amazon-cloudwatch-agent + - systemctl start amazon-cloudwatch-agent + EOT }) cloud_config_compute = [for i in range(0, local.num_computes) : templatefile("${path.module}/../data/cloud-config.txt", { @@ -163,6 +185,9 @@ locals { "self" : false, } ] + + # Always create the endpoint + create_ec2_instance_connect = true } resource "aws_security_group" "sg_tf" { @@ -236,11 +261,69 @@ resource "aws_vpc" "main_vpc" { count = length(var.options.cfg.aws.vpc_id) > 0 ? 0 : 1 cidr_block = var.options.cfg.aws.public_vpc_ipv4_cidr assign_generated_ipv6_cidr_block = true + enable_dns_hostnames = true + enable_dns_support = true tags = { Name = "CML-vpc-${var.options.rand_id}" } } +# SSM VPC Endpoints +resource "aws_vpc_endpoint" "ssm" { + vpc_id = local.main_vpc.id + service_name = "com.amazonaws.${var.options.cfg.aws.region}.ssm" + vpc_endpoint_type = "Interface" + subnet_ids = [aws_subnet.public_subnet.id] + security_group_ids = [aws_security_group.vpce_sg.id] + private_dns_enabled = true + tags = { + Name = "ssm-endpoint-${var.options.rand_id}" + } +} + +resource "aws_vpc_endpoint" "ssmmessages" { + vpc_id = local.main_vpc.id + service_name = "com.amazonaws.${var.options.cfg.aws.region}.ssmmessages" + vpc_endpoint_type = "Interface" + subnet_ids = [aws_subnet.public_subnet.id] + security_group_ids = [aws_security_group.vpce_sg.id] + private_dns_enabled = true + tags = { + Name = "ssmmessages-endpoint-${var.options.rand_id}" + } +} + +resource "aws_vpc_endpoint" "ec2messages" { + vpc_id = local.main_vpc.id + service_name = "com.amazonaws.${var.options.cfg.aws.region}.ec2messages" + vpc_endpoint_type = "Interface" + subnet_ids = [aws_subnet.public_subnet.id] + security_group_ids = [aws_security_group.vpce_sg.id] + private_dns_enabled = true + tags = { + Name = "ec2messages-endpoint-${var.options.rand_id}" + } +} + +# Security group for VPC endpoints +resource "aws_security_group" "vpce_sg" { + name = "vpce-sg-${var.options.rand_id}" + description = "Security group for VPC endpoints" + vpc_id = local.main_vpc.id + + ingress { + description = "HTTPS from VPC" + from_port = 443 + to_port = 443 + protocol = "tcp" + cidr_blocks = [var.options.cfg.aws.public_vpc_ipv4_cidr] + } + + tags = { + Name = "vpce-sg-${var.options.rand_id}" + } +} + #------------------- public subnet, IGW and routing --------------------------- resource "aws_internet_gateway" "public_igw" { count = length(var.options.cfg.aws.gw_id) > 0 ? 0 : 1 @@ -424,11 +507,15 @@ resource "aws_ec2_transit_gateway_multicast_group_member" "cml_compute_int" { resource "aws_instance" "cml_controller" { instance_type = var.options.cfg.aws.flavor ami = data.aws_ami.ubuntu.id - iam_instance_profile = var.options.cfg.aws.profile + iam_instance_profile = aws_iam_instance_profile.cml_instance_profile.name key_name = var.options.cfg.common.key_name - tags = { Name = "CML-controller-${var.options.rand_id}" } ebs_optimized = "true" depends_on = [aws_route_table_association.public_subnet] + + tags = { + Name = "CML-controller-${var.options.rand_id}" + } + dynamic "instance_market_options" { for_each = var.options.cfg.aws.spot_instances.use_spot_for_controller ? [1] : [] content { @@ -456,17 +543,27 @@ resource "aws_instance" "cml_controller" { } } user_data = data.cloudinit_config.cml_controller.rendered + + timeouts { + create = "20m" + update = "20m" + delete = "20m" + } } resource "aws_instance" "cml_compute" { instance_type = var.options.cfg.aws.flavor_compute ami = data.aws_ami.ubuntu.id - iam_instance_profile = var.options.cfg.aws.profile + iam_instance_profile = aws_iam_instance_profile.cml_instance_profile.name key_name = var.options.cfg.common.key_name - tags = { Name = "CML-compute-${count.index + 1}-${var.options.rand_id}" } ebs_optimized = "true" count = local.num_computes depends_on = [aws_instance.cml_controller, aws_route_table_association.compute_subnet_assoc] + + tags = { + Name = "CML-compute-${count.index + 1}-${var.options.rand_id}" + } + dynamic "instance_market_options" { for_each = var.options.cfg.aws.spot_instances.use_spot_for_computes ? [1] : [] content { @@ -491,6 +588,12 @@ resource "aws_instance" "cml_compute" { device_index = 1 } user_data = data.cloudinit_config.cml_compute[count.index].rendered + + timeouts { + create = "20m" + update = "20m" + delete = "20m" + } } data "aws_ami" "ubuntu" { @@ -498,7 +601,7 @@ data "aws_ami" "ubuntu" { filter { name = "name" - values = ["ubuntu/images/hvm-ssd/ubuntu-focal-20.04-amd64-server-*"] + values = ["ubuntu/images-testing/*noble*"] } filter { @@ -506,7 +609,22 @@ data "aws_ami" "ubuntu" { values = ["hvm"] } - owners = ["099720109477"] # Owner ID of Canonical + filter { + name = "root-device-type" + values = ["ebs"] + } + + filter { + name = "architecture" + values = ["x86_64"] + } + + filter { + name = "state" + values = ["available"] + } + + owners = ["099720109477"] # Canonical's AWS account ID } data "cloudinit_config" "cml_controller" { @@ -532,3 +650,386 @@ data "cloudinit_config" "cml_compute" { content = local.cloud_config_compute[count.index] } } + +# Security group for bastion +resource "aws_security_group" "bastion_sg" { + name = "bastion-sg-${var.options.rand_id}" + description = "Security group for bastion host" + vpc_id = local.main_vpc.id + + ingress { + description = "SSH from anywhere" + from_port = 22 + to_port = 22 + protocol = "tcp" + cidr_blocks = ["10.0.0.0/8"] + # cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + description = "All local traffic" + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["10.0.0.0/8"] + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } + + tags = { + Name = "bastion-sg-${var.options.rand_id}" + } +} + +# Bastion host +data "aws_ami" "amazon_linux_2023" { + most_recent = true + owners = ["amazon"] + + filter { + name = "name" + values = ["al2023-ami-2023.*-x86_64"] + } +} + +resource "aws_instance" "bastion" { + ami = data.aws_ami.amazon_linux_2023.id + instance_type = "t3.medium" + subnet_id = aws_subnet.public_subnet.id + vpc_security_group_ids = [aws_security_group.bastion_sg.id] + key_name = var.options.cfg.common.key_name + iam_instance_profile = aws_iam_instance_profile.cml_instance_profile.name + + tags = { + Name = "CML-bastion-${var.options.rand_id}" + } +} + +data "aws_iam_policy_document" "cloudwatch_policy" { + statement { + effect = "Allow" + actions = [ + "cloudwatch:PutMetricData", + "ec2:DescribeVolumes", + "ec2:DescribeTags", + "logs:PutLogEvents", + "logs:DescribeLogStreams", + "logs:DescribeLogGroups", + "logs:CreateLogStream", + "logs:CreateLogGroup", + # SSM permissions + "ssm:GetParameter", + "ssm:GetParameters", + "ssm:GetParametersByPath", + "ssm:DescribeParameters", + "ssm:PutParameter", + "ssm:UpdateInstanceInformation", + "ssmmessages:CreateControlChannel", + "ssmmessages:CreateDataChannel", + "ssmmessages:OpenControlChannel", + "ssmmessages:OpenDataChannel", + "ec2messages:AcknowledgeMessage", + "ec2messages:DeleteMessage", + "ec2messages:FailMessage", + "ec2messages:GetEndpoint", + "ec2messages:GetMessages", + "ec2messages:SendReply" + ] + resources = ["*"] + } + + # Add new statement for S3 permissions + statement { + effect = "Allow" + actions = [ + "s3:ListBucket", + "s3:GetBucketLocation" + ] + resources = [ + "arn:aws:s3:::${var.options.cfg.aws.bucket}" + ] + } + + statement { + effect = "Allow" + actions = [ + "s3:GetObject", + "s3:GetObjectVersion" + ] + resources = [ + "arn:aws:s3:::${var.options.cfg.aws.bucket}/*" + ] + } + + statement { + effect = "Allow" + actions = [ + "ssm:StartAutomationExecution", + "ssm:GetAutomationExecution", + "ec2:StopInstances" + ] + resources = ["*"] + } +} + +# Create the IAM role and instance profile +resource "aws_iam_role" "cml_instance_role" { + name = "cml-instance-role-${var.options.rand_id}" + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Action = "sts:AssumeRole" + Effect = "Allow" + Principal = { + Service = "ec2.amazonaws.com" + } + } + ] + }) +} + +resource "aws_iam_role_policy" "cml_instance_policy" { + name = "cml-instance-policy-${var.options.rand_id}" + role = aws_iam_role.cml_instance_role.id + policy = data.aws_iam_policy_document.cloudwatch_policy.json +} + +# Attach AWS managed policy for SSM +resource "aws_iam_role_policy_attachment" "ssm_policy" { + role = aws_iam_role.cml_instance_role.name + policy_arn = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore" +} + +resource "aws_iam_instance_profile" "cml_instance_profile" { + name = "cml-instance-profile-${var.options.rand_id}" + role = aws_iam_role.cml_instance_role.name +} + +# Attach this policy to your instance role + +# Replace the VPC endpoint with EC2 Instance Connect endpoint +resource "aws_ec2_instance_connect_endpoint" "cml" { + subnet_id = aws_subnet.public_subnet.id + preserve_client_ip = true + security_group_ids = [aws_security_group.vpce_sg.id] + + tags = { + Name = "cml-ec2-connect-endpoint-${var.options.rand_id}" + } +} + +# SSM document for graceful shutdown with force stop fallback +resource "aws_ssm_document" "forced_shutdown" { + name = "ForceShutdownEC2-${var.options.rand_id}" + document_type = "Automation" + document_format = "JSON" + + content = jsonencode({ + schemaVersion = "0.3" + description = "Stop EC2 instance with graceful shutdown" + parameters = { + InstanceId = { + type = "String" + description = "Instance to stop" + } + } + mainSteps = [ + { + name = "gracefulStop" + action = "aws:executeAwsApi" + inputs = { + Service = "ec2" + Api = "StopInstances" + InstanceIds = [ + "{{ InstanceId }}" + ] + Force = false + } + onFailure = "step:forceStop" + }, + { + name = "forceStop" + action = "aws:executeAwsApi" + inputs = { + Service = "ec2" + Api = "StopInstances" + InstanceIds = [ + "{{ InstanceId }}" + ] + Force = true + } + } + ] + }) +} + +# Trigger SSM shutdown document for controller +resource "null_resource" "controller_shutdown" { + triggers = { + instance_id = aws_instance.cml_controller.id + document_name = aws_ssm_document.forced_shutdown.name + aws_region = var.options.cfg.aws.region + shutdown_script = <<-EOT + set -e + echo "Starting shutdown process for instance $INSTANCE_ID" + + # Start SSM automation + EXECUTION_ID=$(aws ssm start-automation-execution \ + --document-name "$DOC_NAME" \ + --parameters "InstanceId=$INSTANCE_ID" \ + --region $AWS_REGION \ + --query 'AutomationExecutionId' \ + --output text) + + echo "Started SSM automation with execution ID: $EXECUTION_ID" + + # Wait for instance to stop (timeout after 12 minutes) + timeout=720 + while [ $timeout -gt 0 ]; do + # Check SSM execution status + SSM_STATUS=$(aws ssm get-automation-execution \ + --automation-execution-id "$EXECUTION_ID" \ + --region $AWS_REGION \ + --query 'AutomationExecution.Status' \ + --output text) + + echo "SSM automation status: $SSM_STATUS" + + if [ "$SSM_STATUS" = "Failed" ]; then + echo "SSM automation failed, checking instance state directly" + elif [ "$SSM_STATUS" = "Success" ]; then + echo "SSM automation completed successfully" + fi + + # Check instance state + state=$(aws ec2 describe-instances \ + --instance-ids $INSTANCE_ID \ + --query 'Reservations[0].Instances[0].State.Name' \ + --output text \ + --region $AWS_REGION) + + echo "Instance state: $state" + + if [ "$state" = "stopped" ]; then + echo "Instance $INSTANCE_ID stopped successfully" + exit 0 + fi + + sleep 5 + timeout=$((timeout-5)) + echo "Waiting for instance to stop... ($timeout seconds remaining)" + done + + echo "ERROR: Timeout waiting for instance $INSTANCE_ID to stop" + echo "Final SSM automation status: $SSM_STATUS" + echo "Final instance state: $state" + exit 1 + EOT + } + + # Store values at creation time + provisioner "local-exec" { + command = "echo 'Resource created with ID: ${self.triggers.instance_id}'" + } + + # Use stored values at destroy time + provisioner "local-exec" { + when = destroy + command = <<-EOT + export DOC_NAME='${self.triggers.document_name}' + export INSTANCE_ID='${self.triggers.instance_id}' + export AWS_REGION='${self.triggers.aws_region}' + ${self.triggers.shutdown_script} + EOT + } +} + +# Trigger SSM shutdown document for compute nodes +resource "null_resource" "compute_shutdown" { + count = local.num_computes + + triggers = { + instance_id = aws_instance.cml_compute[count.index].id + document_name = aws_ssm_document.forced_shutdown.name + aws_region = var.options.cfg.aws.region + shutdown_script = <<-EOT + set -e + echo "Starting shutdown process for instance $INSTANCE_ID" + + # Start SSM automation + EXECUTION_ID=$(aws ssm start-automation-execution \ + --document-name "$DOC_NAME" \ + --parameters "InstanceId=$INSTANCE_ID" \ + --region $AWS_REGION \ + --query 'AutomationExecutionId' \ + --output text) + + echo "Started SSM automation with execution ID: $EXECUTION_ID" + + # Wait for instance to stop (timeout after 12 minutes) + timeout=720 + while [ $timeout -gt 0 ]; do + # Check SSM execution status + SSM_STATUS=$(aws ssm get-automation-execution \ + --automation-execution-id "$EXECUTION_ID" \ + --region $AWS_REGION \ + --query 'AutomationExecution.Status' \ + --output text) + + echo "SSM automation status: $SSM_STATUS" + + if [ "$SSM_STATUS" = "Failed" ]; then + echo "SSM automation failed, checking instance state directly" + elif [ "$SSM_STATUS" = "Success" ]; then + echo "SSM automation completed successfully" + fi + + # Check instance state + state=$(aws ec2 describe-instances \ + --instance-ids $INSTANCE_ID \ + --query 'Reservations[0].Instances[0].State.Name' \ + --output text \ + --region $AWS_REGION) + + echo "Instance state: $state" + + if [ "$state" = "stopped" ]; then + echo "Instance $INSTANCE_ID stopped successfully" + exit 0 + fi + + sleep 5 + timeout=$((timeout-5)) + echo "Waiting for instance to stop... ($timeout seconds remaining)" + done + + echo "ERROR: Timeout waiting for instance $INSTANCE_ID to stop" + echo "Final SSM automation status: $SSM_STATUS" + echo "Final instance state: $state" + exit 1 + EOT + } + + # Store values at creation time + provisioner "local-exec" { + command = "echo 'Resource created with ID: ${self.triggers.instance_id}'" + } + + # Use stored values at destroy time + provisioner "local-exec" { + when = destroy + command = <<-EOT + export DOC_NAME='${self.triggers.document_name}' + export INSTANCE_ID='${self.triggers.instance_id}' + export AWS_REGION='${self.triggers.aws_region}' + ${self.triggers.shutdown_script} + EOT + } +} diff --git a/modules/deploy/azure/main.tf b/modules/deploy/azure/main.tf index e8b20a9..f44d7ee 100644 --- a/modules/deploy/azure/main.tf +++ b/modules/deploy/azure/main.tf @@ -215,12 +215,12 @@ resource "azurerm_linux_virtual_machine" "cml" { # boot_diagnostics { # } - admin_username = "ubuntu" + _username = "ubuntu" network_interface_ids = [ azurerm_network_interface.cml.id, ] - admin_ssh_key { + _ssh_key { username = "ubuntu" public_key = data.azurerm_ssh_public_key.cml.public_key # public_key = file("~/.ssh/id_rsa.pub") diff --git a/modules/deploy/config/backend.hcl b/modules/deploy/config/backend.hcl new file mode 100644 index 0000000..33be9ff --- /dev/null +++ b/modules/deploy/config/backend.hcl @@ -0,0 +1,5 @@ +bucket = "taylaand-aws-cml-tfstate" +key = "terraform.tfstate" +region = "eu-west-1" +dynamodb_table = "taylaand-aws-cml-tfstate-lock" +encrypt = true diff --git a/modules/deploy/data/04-customize.sh b/modules/deploy/data/04-customize.sh index 87e3da4..d75a850 100644 --- a/modules/deploy/data/04-customize.sh +++ b/modules/deploy/data/04-customize.sh @@ -25,14 +25,14 @@ from time import sleep from httpx import HTTPStatusError from virl2_client import ClientLibrary -admin = os.getenv("CFG_APP_USER", "") + = os.getenv("CFG_APP_USER", "") password = os.getenv("CFG_APP_PASS", "") hostname = os.getenv("CFG_COMMON_HOSTNAME", "") attempts = 6 while attempts > 0: try: - client = ClientLibrary(f"https://{hostname}", admin, password, ssl_verify=False) + client = ClientLibrary(f"https://{hostname}", , password, ssl_verify=) except HTTPStatusError as exc: print(exc) sleep(10) @@ -47,7 +47,7 @@ USER_COUNT = 20 # create 20 users (and pod0 is for us to use, in total 21) # the below block is to remove users again, used for testing -if False: +if : for id in range(0, USER_COUNT + 1): user_id = client.user_management.user_id(f"pod{id}") client.user_management.delete_user(user_id) diff --git a/modules/deploy/data/cml.sh b/modules/deploy/data/cml.sh index e6d94ec..1888b68 100644 --- a/modules/deploy/data/cml.sh +++ b/modules/deploy/data/cml.sh @@ -16,7 +16,30 @@ source /provision/vars.sh function setup_pre_aws() { export AWS_DEFAULT_REGION=${CFG_AWS_REGION} - apt-get install -y awscli + + echo "Installing AWS CLI..." + if ! apt-get install -y awscli; then + echo "APT installation of AWS CLI failed, installing AWS CLI v2..." + + # Install required dependencies + apt-get install -y unzip curl + + # Download and install AWS CLI v2 + cd /tmp + curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" + unzip -q awscliv2.zip + ./aws/install --bin-dir /usr/local/bin --install-dir /usr/local/aws-cli --update + rm -rf aws awscliv2.zip + + # Verify installation + if ! aws --version; then + echo "Error: AWS CLI installation failed" + exit 1 + fi + echo "AWS CLI v2 installed successfully" + else + echo "AWS CLI installed via APT successfully" + fi } function setup_pre_azure() { @@ -26,7 +49,6 @@ function setup_pre_azure() { } function base_setup() { - # Check if this device is a controller if is_controller; then # copy node definitions and images to the instance @@ -60,74 +82,114 @@ function base_setup() { fi # copy CML distribution package from cloud storage into our instance, unpack & install + echo "Copying CML package from cloud storage..." copyfile ${CFG_APP_SOFTWARE} /provision/ - tar xvf /provision/${CFG_APP_SOFTWARE} --wildcards -C /tmp 'cml2*_amd64.deb' 'patty*_amd64.deb' 'iol-tools*_amd64.deb' - systemctl stop ssh - apt-get install -y /tmp/*.deb - # Fixing NetworkManager in netplan, and interface association in virl2-base-config.yml - /provision/interface_fix.py - systemctl restart network-manager - netplan apply - # Fix for the headless setup (tty remove as the cloud VM has none) - sed -i '/^Standard/ s/^/#/' /lib/systemd/system/virl2-initial-setup.service - touch /etc/.virl2_unconfigured - systemctl stop getty@tty1.service - echo "initial setup start: $(date +'%T.%N')" - systemctl enable --now virl2-initial-setup.service - echo "initial setup done: $(date +'%T.%N')" - - # this should not be needed in cloud!? - # systemctl start getty@tty1.service - - # We need to wait until the initial setup is done + + echo "Extracting CML package..." + if [ ! -f "/provision/${CFG_APP_SOFTWARE}" ]; then + echo "Error: CML package not found at /provision/${CFG_APP_SOFTWARE}" + exit 1 + fi + + # Create temp directory for package extraction + TEMP_DIR=$(mktemp -d) + tar xvf "/provision/${CFG_APP_SOFTWARE}" -C "$TEMP_DIR" + + # Find and install the packages + echo "Installing CML packages..." + DEB_FILES=$(find "$TEMP_DIR" -name "*.deb") + if [ -z "$DEB_FILES" ]; then + echo "Error: No .deb files found in the CML package" + exit 1 + fi + + # Stop SSH before installation + systemctl stop ssh || echo "Warning: Failed to stop SSH" + + # Install each package individually + for deb in $DEB_FILES; do + echo "Installing $deb..." + if ! apt-get install -y "$deb"; then + echo "Error: Failed to install $deb" + exit 1 + fi + done + + # Clean up temp directory + rm -rf "$TEMP_DIR" + + echo "Running interface fix..." + if [ -f /provision/interface_fix.py ]; then + /provision/interface_fix.py + else + echo "Warning: interface_fix.py not found" + fi + + # Check for NetworkManager + if systemctl list-unit-files | grep -q network-manager; then + echo "Restarting NetworkManager..." + systemctl restart network-manager || echo "Warning: Failed to restart NetworkManager" + else + echo "NetworkManager not found, setting up..." + setup_network_manager + fi + + # Check for virl2-initial-setup service + if [ -f /lib/systemd/system/virl2-initial-setup.service ]; then + echo "Configuring virl2-initial-setup..." + sed -i '/^Standard/ s/^/#/' /lib/systemd/system/virl2-initial-setup.service + touch /etc/.virl2_unconfigured + systemctl stop getty@tty1.service || echo "Warning: Failed to stop getty" + echo "initial setup start: $(date +'%T.%N')" + systemctl enable --now virl2-initial-setup.service + echo "initial setup done: $(date +'%T.%N')" + else + echo "Error: virl2-initial-setup.service not found. CML installation may have failed." + echo "Contents of /provision:" + ls -la /provision/ + echo "Contents of /tmp:" + ls -la /tmp/ + exit 1 + fi + + # Wait for initial setup attempts=5 while [ $attempts -gt 0 ]; do sleep 5 - # substate=$(systemctl show --property=SubState --value virl2-initial-setup.service) - # if [ "$substate" = "exited" ]; then if [ ! -f /etc/.virl2_unconfigured ]; then echo "initial setup is done" break fi - echo "waiting for initial setup..." + echo "waiting for initial setup... ($attempts attempts remaining)" ((attempts--)) done if [ $attempts -eq 0 ]; then - echo "initial setup did not finish in time... something went wrong!" + echo "Error: initial setup did not finish in time" exit 1 fi - # for good measure, apply the network config again + # Apply network config and restart SSH netplan apply systemctl enable --now ssh.service - # clean up software .pkg / .deb packages + # Clean up rm -f /provision/*.pkg /provision/*.deb /tmp/*.deb - # disable bridge setup in the cloud instance (controller and computes) - # (this is a no-op with 2.7.1 as it skips bridge creation entirely) - /usr/local/bin/virl2-bridge-setup.py --delete - sed -i /usr/local/bin/virl2-bridge-setup.py -e '2iexit()' - # remove the CML specific netplan config - rm /etc/netplan/00-cml2-base.yaml - # apply to ensure gateway selection below works + # Disable bridge setup + if [ -f /usr/local/bin/virl2-bridge-setup.py ]; then + /usr/local/bin/virl2-bridge-setup.py --delete + sed -i /usr/local/bin/virl2-bridge-setup.py -e '2iexit()' + fi + + # Remove CML netplan config + rm -f /etc/netplan/00-cml2-base.yaml netplan apply - # no PaTTY on computes + # Skip PaTTY on computes if ! is_controller; then return 0 fi - - # enable and configure PaTTY - if [ "${CFG_COMMON_ENABLE_PATTY}" = "true" ]; then - sleep 5 # wait for ip address acquisition - GWDEV=$(ip -json route | jq -r '.[]|select(.dst=="default")|(.metric|tostring)+"\t"+.dev' | sort | head -1 | cut -f2) - echo "OPTS=\"-bridge $GWDEV -poll 5\"" >>/etc/default/patty.env - sed -i '/^After/iWants=virl2-patty.service' /lib/systemd/system/virl2.target - systemctl daemon-reload - systemctl enable --now virl2-patty - fi } function cml_configure() { @@ -151,7 +213,7 @@ function cml_configure() { chgrp ${CFG_SYS_USER} /provision/vars.sh chmod g+r /provision/vars.sh - # Change the ownership of the del.sh script to the sysadmin user + # Change the ownership of the del.sh script to the sys user chown ${CFG_SYS_USER}.${CFG_SYS_USER} /provision/del.sh # Check if this device is a controller @@ -274,6 +336,68 @@ fi if [ ! -f /tmp/PACKER_BUILD ]; then cml_configure ${CFG_TARGET} postprocess - # netplan apply + netplan apply # systemctl reboot fi + +# Check for NetworkManager +function setup_network_manager() { + echo "Setting up NetworkManager..." + + # Install NetworkManager if not present + if ! command -v NetworkManager >/dev/null 2>&1; then + echo "Installing NetworkManager..." + apt-get update && apt-get install -y network-manager + fi + + # Check/Create systemd service file + NM_SERVICE="/etc/systemd/system/network-manager.service" + if [ ! -f "$NM_SERVICE" ]; then + echo "Creating NetworkManager service file..." + cat > "$NM_SERVICE" <<'EOF' +[Unit] +Description=Network Manager +Documentation=man:NetworkManager(8) +Wants=network.target +After=network-pre.target dbus.service +Before=network.target +RequiresMountsFor=/var/run/NetworkManager + +[Service] +Type=dbus +BusName=org.freedesktop.NetworkManager +ExecReload=/bin/kill -HUP $MAINPID +ExecStart=/usr/sbin/NetworkManager --no-daemon +Restart=on-failure +CapabilityBoundingSet=CAP_NET_ADMIN CAP_DAC_OVERRIDE CAP_NET_RAW CAP_NET_BIND_SERVICE CAP_SETGID CAP_SETUID CAP_SYS_MODULE CAP_AUDIT_WRITE CAP_KILL CAP_SYS_CHROOT +ProtectSystem=true +ProtectHome=true + +[Install] +WantedBy=multi-user.target +Alias=dbus-org.freedesktop.NetworkManager.service +Also=NetworkManager-dispatcher.service +EOF + fi + + # Reload systemd and start/enable NetworkManager + echo "Configuring NetworkManager service..." + systemctl daemon-reload + systemctl enable network-manager + systemctl start network-manager + + # Verify NetworkManager status + echo "Checking NetworkManager status..." + if ! systemctl is-active --quiet network-manager; then + echo "Error: NetworkManager failed to start" + systemctl status network-manager + exit 1 + fi + + if ! systemctl is-enabled --quiet network-manager; then + echo "Error: NetworkManager not enabled" + exit 1 + fi + + echo "NetworkManager setup completed successfully" +} diff --git a/modules/deploy/data/del.sh b/modules/deploy/data/del.sh index c52778b..3c9e857 100644 --- a/modules/deploy/data/del.sh +++ b/modules/deploy/data/del.sh @@ -6,7 +6,7 @@ # All rights reserved. # # -# NOTE: this only works as long as the admin user password wasn't changed +# NOTE: this only works as long as the user password wasn't changed # from the value which was originally provisioned. # set -x diff --git a/modules/deploy/data/vars.sh b/modules/deploy/data/vars.sh index 7014640..d402b9d 100644 --- a/modules/deploy/data/vars.sh +++ b/modules/deploy/data/vars.sh @@ -13,5 +13,4 @@ CFG_LICENSE_TOKEN="${cfg.secrets.smartlicense_token.secret}" CFG_SAS_TOKEN="${cfg.sas_token}" CFG_SYS_PASS="${cfg.secrets.sys.secret}" CFG_SYS_USER="${cfg.secrets.sys.username}" -CFG_TARGET="${cfg.target}" - +CFG_TARGET="${cfg.target}" \ No newline at end of file diff --git a/modules/deploy/data/virl2-base-config.yml b/modules/deploy/data/virl2-base-config.yml index 075d954..3759b26 100644 --- a/modules/deploy/data/virl2-base-config.yml +++ b/modules/deploy/data/virl2-base-config.yml @@ -1,4 +1,4 @@ -admins: +s: controller: password: ${cfg.secrets.app.secret} username: ${cfg.secrets.app.username} diff --git a/modules/deploy/main.tf b/modules/deploy/main.tf index 6ba75f0..1f9eb93 100644 --- a/modules/deploy/main.tf +++ b/modules/deploy/main.tf @@ -21,3 +21,13 @@ locals { } } +terraform { + required_providers { + cml2 = { + source = "ciscodevnet/cml2" + version = "~> 0.8.1" + configuration_aliases = [cml2.controller] + } + } +} + diff --git a/modules/readyness/main.tf b/modules/readyness/main.tf index c7ca5cf..e9b3921 100644 --- a/modules/readyness/main.tf +++ b/modules/readyness/main.tf @@ -5,7 +5,7 @@ # data "cml2_system" "state" { - timeout = "10m" + timeout = "15m" ignore_errors = true } @@ -13,3 +13,13 @@ data "cml2_system" "state" { # during the time the public IP of the AWS instance is known but not really # reachable resulting in various "gateway timeouts", "service unavailable" or # other, related errors. Especially in cases when going through a proxy. + +terraform { + required_providers { + cml2 = { + source = "ciscodevnet/cml2" + version = "~> 0.8.1" + configuration_aliases = [cml2] + } + } +} diff --git a/prepare.bat b/prepare.bat index 1341219..8fd34d7 100644 --- a/prepare.bat +++ b/prepare.bat @@ -1,61 +1,265 @@ @echo off -rem -rem This file is part of Cisco Modeling Labs -rem Copyright (c) 2019-2024, Cisco Systems, Inc. -rem All rights reserved. -rem +REM This file is part of Cisco Modeling Labs +REM Copyright (c) 2019-2024, Cisco Systems, Inc. +REM All rights reserved. -goto start +setlocal EnableDelayedExpansion + +REM Change to script directory +cd /d "%~dp0" :ask_yes_no -set /p "answer=%~1 (yes/no): " -set "answer=%answer:~0,1%" -if /i "%answer%"=="y" ( +set "prompt=%~1" +set "default=%~2" +:ask_loop +set /p "answer=%prompt% " +if "!answer!"=="" set "answer=%default%" +for %%A in (yes y true 1) do if /i "!answer!"=="%%A" exit /b 0 +for %%A in (no n false 0) do if /i "!answer!"=="%%A" exit /b 1 +echo Please answer yes or no. +goto ask_loop + +:generate_random_prefix +REM Generate random 8 character string (lowercase alphanumeric) +set "prefix=" +set "chars=abcdefghijklmnopqrstuvwxyz0123456789" +for /L %%i in (1,1,8) do ( + set /a "rand=!random! %% 36" + for %%j in (!rand!) do set "prefix=!prefix!!chars:~%%j,1!" +) +exit /b + +:validate_prefix +set "prefix=%~1" +echo !prefix! | findstr /r "^[a-z0-9][a-z0-9-]*[a-z0-9]$" >nul +if errorlevel 1 ( + echo Error: Prefix must contain only lowercase letters, numbers, and hyphens + echo Must start and end with letter or number + exit /b 1 +) +if "!prefix:~20!" neq "" ( + echo Error: Prefix must be 20 characters or less exit /b 1 -) else if /i "%answer%"=="n" ( - exit /b 0 -) else ( - echo Please answer yes or no. - goto :ask_yes_no ) +exit /b 0 + +:get_region_city +set "region=%~1" +if "%region%"=="eu-west-1" set "city=dublin" & exit /b +if "%region%"=="eu-west-2" set "city=london" & exit /b +if "%region%"=="eu-west-3" set "city=paris" & exit /b +if "%region%"=="eu-central-1" set "city=frankfurt" & exit /b +if "%region%"=="eu-central-2" set "city=zurich" & exit /b +if "%region%"=="eu-south-1" set "city=milan" & exit /b +if "%region%"=="eu-south-2" set "city=madrid" & exit /b +if "%region%"=="eu-north-1" set "city=stockholm" & exit /b +if "%region%"=="us-east-1" set "city=virginia" & exit /b +if "%region%"=="us-east-2" set "city=ohio" & exit /b +if "%region%"=="us-west-1" set "city=california" & exit /b +if "%region%"=="us-west-2" set "city=oregon" & exit /b +if "%region%"=="ap-east-1" set "city=hongkong" & exit /b +if "%region%"=="ap-south-1" set "city=mumbai" & exit /b +if "%region%"=="ap-south-2" set "city=hyderabad" & exit /b +if "%region%"=="ap-northeast-1" set "city=tokyo" & exit /b +if "%region%"=="ap-northeast-2" set "city=seoul" & exit /b +if "%region%"=="ap-northeast-3" set "city=osaka" & exit /b +if "%region%"=="ap-southeast-1" set "city=singapore" & exit /b +if "%region%"=="ap-southeast-2" set "city=sydney" & exit /b +if "%region%"=="ap-southeast-3" set "city=jakarta" & exit /b +if "%region%"=="ap-southeast-4" set "city=melbourne" & exit /b +set "city=unknown" +exit /b + +REM Ask for and validate prefix +:get_prefix +set /p "PREFIX=Enter your prefix for AWS resources (random) [default: random]: " +if "!PREFIX!"=="" ( + call :generate_random_prefix + set "PREFIX=!prefix!" + echo Using random prefix: !PREFIX! +) +call :validate_prefix "!PREFIX!" +if errorlevel 1 goto get_prefix + +echo Using prefix: !PREFIX! + +REM Ask for AWS region +:get_region +set /p "AWS_REGION=Enter AWS region (default: eu-west-1): " +if "!AWS_REGION!"=="" set "AWS_REGION=eu-west-1" + +call :get_region_city "!AWS_REGION!" +if "!city!"=="unknown" ( + echo Unsupported region. Please choose from: + echo EMEA: eu-west-1/2/3, eu-central-1/2, eu-south-1/2, eu-north-1 + echo US: us-east-1/2, us-west-1/2 + echo APAC: ap-east-1, ap-south-1/2, ap-northeast-1/2/3, ap-southeast-1/2/3/4 + goto get_region +) + +set "REGION_CITY=!city!" +echo Using AWS region: !AWS_REGION! (!REGION_CITY!) + +REM Create backup directory with timestamp +for /f "tokens=2-4 delims=/ " %%a in ('date /t') do ( + set "datestamp=%%c%%a%%b" +) +for /f "tokens=1-2 delims=: " %%a in ('time /t') do ( + set "timestamp=%%a%%b" +) +set "BACKUP_DIR=backups_!datestamp!_!timestamp!" +mkdir "!BACKUP_DIR!" 2>nul + +REM Function to update prefix in file using PowerShell (more reliable than batch for text processing) +:update_prefix +set "file=%~1" +if exist "!file!" ( + echo Updating !file!... + powershell -Command ^ + "$content = Get-Content -Path '%~1'; ^ + $content = $content -replace '([a-z0-9-]*)-aws-cml', '%PREFIX%-aws-cml'; ^ + $content = $content -replace 'cml-[a-z]*-([a-z0-9-]*)', 'cml-%REGION_CITY%-%PREFIX%'; ^ + Set-Content -Path '%~1' -Value $content" +) +exit /b + +REM Backup and update all relevant files +echo Creating backups in !BACKUP_DIR!... +for %%F in ( + "config.yml" + "documentation\AWS.md" + "modules\deploy\aws\main.tf" + "modules\deploy\main.tf" + "variables.tf" +) do ( + if exist "%%~F" ( + copy "%%~F" "!BACKUP_DIR!\%%~nxF.bak" >nul + call :update_prefix "%%~F" + ) +) + +echo Configuration updated with prefix: !PREFIX! +echo Backups created in: !BACKUP_DIR!\ + +REM Store the root directory +set "ROOT_DIR=%CD%" -:start cd modules\deploy -call :ask_yes_no "Cloud - Enable AWS?" -if errorlevel 1 ( - echo Enabling AWS. - copy aws-on.t-f aws.tf -) else ( - echo Disabling AWS. - copy aws-off.t-f aws.tf +REM Check if backend configuration exists +if exist "config\backend.hcl" ( + echo Initializing backend... + terraform init -migrate-state ) -call :ask_yes_no "Cloud - Enable Azure?" -if errorlevel 1 ( - echo Enabling Azure. - copy azure-on.t-f azure.tf -) else ( - echo Disabling Azure. - copy azure-off.t-f azure.tf +cd "%ROOT_DIR%" + +REM Ask for and validate prefix for Azure +:get_azure_prefix +set /p "AZURE_PREFIX=Enter your prefix for Azure resources (random) [default: random]: " +if "!AZURE_PREFIX!"=="" ( + call :generate_random_prefix + set "AZURE_PREFIX=!prefix!" + echo Using random prefix: !AZURE_PREFIX! ) +call :validate_prefix "!AZURE_PREFIX!" +if errorlevel 1 goto get_azure_prefix -cd ..\.. -cd modules\secrets +echo Using prefix for Azure: !AZURE_PREFIX! -call :ask_yes_no "External Secrets Manager - Enable Conjur?" -if errorlevel 1 ( - echo Enabling Conjur. - copy conjur-on.t-f conjur.tf -) else ( - echo Disabling Conjur. - copy conjur-off.t-f conjur.tf +REM Ask for Azure region +:get_azure_region +set /p "AZURE_REGION=Enter Azure region (default: westus): " +if "!AZURE_REGION!"=="" set "AZURE_REGION=westus" + +REM Create backup directory with timestamp for Azure +for /f "tokens=2-4 delims=/ " %%a in ('date /t') do ( + set "datestamp=%%c%%a%%b" ) -call :ask_yes_no "External Secrets Manager - Enable Vault?" -if errorlevel 1 ( - echo Enabling Vault. - copy vault-on.t-f vault.tf -) else ( - echo Disabling Vault. - copy vault-off.t-f vault.tf +for /f "tokens=1-2 delims=: " %%a in ('time /t') do ( + set "timestamp=%%a%%b" +) +set "AZURE_BACKUP_DIR=backups_azure_!datestamp!_!timestamp!" +mkdir "!AZURE_BACKUP_DIR!" 2>nul + +REM Function to update prefix in file using PowerShell (more reliable than batch for text processing) for Azure +:update_azure_prefix +set "file=%~1" +if exist "!file!" ( + echo Updating !file!... + powershell -Command ^ + "$content = Get-Content -Path '%~1'; ^ + $content = $content -replace '([a-z0-9-]*)-azure-cml', '%AZURE_PREFIX%-azure-cml'; ^ + Set-Content -Path '%~1' -Value $content" +) +exit /b + +REM Backup and update all relevant files for Azure +echo Creating backups in !AZURE_BACKUP_DIR!... +for %%F in ( + "modules\deploy\azure\main.tf" +) do ( + if exist "%%~F" ( + copy "%%~F" "!AZURE_BACKUP_DIR!\%%~nxF.bak" >nul + call :update_azure_prefix "%%~F" + ) +) + +echo Configuration updated with prefix for Azure: !AZURE_PREFIX! +echo Backups created in: !AZURE_BACKUP_DIR!\ + +REM Ask for and validate prefix for GCP +:get_gcp_prefix +set /p "GCP_PREFIX=Enter your prefix for GCP resources (random) [default: random]: " +if "!GCP_PREFIX!"=="" ( + call :generate_random_prefix + set "GCP_PREFIX=!prefix!" + echo Using random prefix: !GCP_PREFIX! +) +call :validate_prefix "!GCP_PREFIX!" +if errorlevel 1 goto get_gcp_prefix + +echo Using prefix for GCP: !GCP_PREFIX! + +REM Ask for GCP region +:get_gcp_region +set /p "GCP_REGION=Enter GCP region (default: us-central1): " +if "!GCP_REGION!"=="" set "GCP_REGION=us-central1" + +REM Create backup directory with timestamp for GCP +for /f "tokens=2-4 delims=/ " %%a in ('date /t') do ( + set "datestamp=%%c%%a%%b" +) +for /f "tokens=1-2 delims=: " %%a in ('time /t') do ( + set "timestamp=%%a%%b" ) +set "GCP_BACKUP_DIR=backups_gcp_!datestamp!_!timestamp!" +mkdir "!GCP_BACKUP_DIR!" 2>nul + +REM Function to update prefix in file using PowerShell (more reliable than batch for text processing) for GCP +:update_gcp_prefix +set "file=%~1" +if exist "!file!" ( + echo Updating !file!... + powershell -Command ^ + "$content = Get-Content -Path '%~1'; ^ + $content = $content -replace '([a-z0-9-]*)-gcp-cml', '%GCP_PREFIX%-gcp-cml'; ^ + Set-Content -Path '%~1' -Value $content" +) +exit /b + +REM Backup and update all relevant files for GCP +echo Creating backups in !GCP_BACKUP_DIR!... +for %%F in ( + "modules\deploy\gcp\main.tf" +) do ( + if exist "%%~F" ( + copy "%%~F" "!GCP_BACKUP_DIR!\%%~nxF.bak" >nul + call :update_gcp_prefix "%%~F" + ) +) + +echo Configuration updated with prefix for GCP: !GCP_PREFIX! +echo Backups created in: !GCP_BACKUP_DIR!\ + +endlocal diff --git a/prepare.sh b/prepare.sh index 645173a..eb427fd 100755 --- a/prepare.sh +++ b/prepare.sh @@ -8,9 +8,13 @@ cd $(dirname $0) ask_yes_no() { + local prompt="$1" + local default="$2" + while true; do - read -p "$1 (yes/no): " answer - answer=$(echo "$answer" | tr '[:upper:]' '[:lower:]') + # No need for additional prompt suffix since it's in the question now + read -p "$prompt " answer + answer=$(echo "${answer:-$default}" | tr '[:upper:]' '[:lower:]') case $answer in yes | y | true | 1) return 0 @@ -25,18 +29,223 @@ ask_yes_no() { done } +# Function to generate random prefix +generate_random_prefix() { + # Generate random 8 character string (lowercase alphanumeric) + cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | fold -w 8 | head -n 1 +} + +# Function to validate prefix +validate_prefix() { + local prefix=$1 + # Check for valid AWS resource naming (lowercase alphanumeric and hyphens) + if [[ ! $prefix =~ ^[a-z0-9][a-z0-9-]*[a-z0-9]$ ]]; then + echo "Error: Prefix must contain only lowercase letters, numbers, and hyphens" + echo " Must start and end with letter or number" + return 1 + fi + if [ ${#prefix} -gt 20 ]; then + echo "Error: Prefix must be 20 characters or less" + return 1 + fi + return 0 +} + +# Ask for and validate prefix +while true; do + read -p "Enter your prefix for AWS resources (random) [default: random]: " PREFIX + if [ -z "$PREFIX" ]; then + PREFIX=$(generate_random_prefix) + echo "Using random prefix: $PREFIX" + fi + if validate_prefix "$PREFIX"; then + break + fi +done + +echo "Using prefix: $PREFIX" + +# Function to map AWS region to city name +get_region_city() { + local region=$1 + case $region in + # EMEA Regions + "eu-west-1") + echo "dublin" + ;; + "eu-west-2") + echo "london" + ;; + "eu-west-3") + echo "paris" + ;; + "eu-central-1") + echo "frankfurt" + ;; + "eu-central-2") + echo "zurich" + ;; + "eu-south-1") + echo "milan" + ;; + "eu-south-2") + echo "madrid" + ;; + "eu-north-1") + echo "stockholm" + ;; + # US Regions + "us-east-1") + echo "virginia" + ;; + "us-east-2") + echo "ohio" + ;; + "us-west-1") + echo "california" + ;; + "us-west-2") + echo "oregon" + ;; + # APAC Regions + "ap-east-1") + echo "hongkong" + ;; + "ap-south-1") + echo "mumbai" + ;; + "ap-south-2") + echo "hyderabad" + ;; + "ap-northeast-1") + echo "tokyo" + ;; + "ap-northeast-2") + echo "seoul" + ;; + "ap-northeast-3") + echo "osaka" + ;; + "ap-southeast-1") + echo "singapore" + ;; + "ap-southeast-2") + echo "sydney" + ;; + "ap-southeast-3") + echo "jakarta" + ;; + "ap-southeast-4") + echo "melbourne" + ;; + *) + echo "unknown" + ;; + esac +} + +# Ask for AWS region +while true; do + read -p "Enter AWS region (default: eu-west-1): " AWS_REGION + AWS_REGION=${AWS_REGION:-eu-west-1} + + REGION_CITY=$(get_region_city "$AWS_REGION") + if [ "$REGION_CITY" = "unknown" ]; then + echo "Unsupported region. Please choose from:" + echo "EMEA: eu-west-1/2/3, eu-central-1/2, eu-south-1/2, eu-north-1" + echo "US: us-east-1/2, us-west-1/2" + echo "APAC: ap-east-1, ap-south-1/2, ap-northeast-1/2/3, ap-southeast-1/2/3/4" + continue + fi + break +done + +echo "Using AWS region: $AWS_REGION ($REGION_CITY)" + +# Function to update prefix in file +update_prefix() { + local file=$1 + if [ -f "$file" ]; then + echo "Updating $file..." + sed -i.bak \ + -e "s/\([a-z0-9-]*\)-aws-cml/${PREFIX}-aws-cml/g" \ + -e "s/cml-[a-z]*-\([a-z0-9-]*\)/cml-${REGION_CITY}-${PREFIX}/g" \ + "$file" + fi +} + +# Create backup directory +BACKUP_DIR="backups_$(date +%Y%m%d_%H%M%S)" +mkdir -p "$BACKUP_DIR" + +# Backup and update all relevant files +echo "Creating backups in $BACKUP_DIR..." +for file in \ + config.yml \ + documentation/AWS.md \ + modules/deploy/aws/main.tf \ + modules/deploy/main.tf \ + variables.tf; do + if [ -f "$file" ]; then + cp "$file" "$BACKUP_DIR/$(basename $file).bak" + update_prefix "$file" + fi +done + +echo "Configuration updated with prefix: $PREFIX" +echo "Backups created in: $BACKUP_DIR/" + +# Store the root directory +ROOT_DIR=$(pwd) + cd modules/deploy -if ask_yes_no "Cloud - Enable AWS?"; then +# Flag to track if S3 backend was requested +USE_S3_BACKEND=false + +# AWS enabled by default +if ask_yes_no "Cloud - Enable AWS? (yes/no) [default: yes]" "yes"; then echo "Enabling AWS." rm aws.tf ln -s aws-on.t-f aws.tf + + # Ask about S3 backend + if ask_yes_no "Do you want to use S3 for Terraform state backend? (yes/no) [default: no]" "no"; then + USE_S3_BACKEND=true + echo "Creating backend configuration..." + mkdir -p "$ROOT_DIR/config" + # Create backend.tf for initial setup + cat > "$ROOT_DIR/backend.tf" < "$ROOT_DIR/config/backend.hcl" </dev/null; then + echo "Found existing S3 bucket: $BUCKET_NAME" + else + echo "Creating S3 backend infrastructure..." + terraform init + terraform apply -target=module.backend -auto-approve + fi + + echo "Configuring Terraform to use S3 backend..." + # Create backend configuration + cat > "$ROOT_DIR/backend.tf" <' + + replacements = { + # Prefix replacements + config.get('prefix', ''): '', + + # AWS credentials and config + config.get('aws', {}).get('region', ''): '', + config.get('aws', {}).get('availability_zone', ''): '', + config.get('aws', {}).get('bucket', ''): '', + + # Secrets + config.get('secret', {}).get('secrets', {}).get('app', {}).get('raw_secret', ''): '<_password>', + config.get('secret', {}).get('secrets', {}).get('app', {}).get('secret', ''): '', + } + + # Add .virlrc replacements + replacements.update(virlrc_replacements) + + # Remove empty keys + return {k: v for k, v in replacements.items() if k} + +def clean_file(file_path, replacements): + """Clean a single file""" + try: + with open(file_path, 'r') as file: + content = file.read() + + # Apply replacements + for old, new in replacements.items(): + if old: # Skip empty strings + content = content.replace(str(old), str(new)) + + with open(file_path, 'w') as file: + file.write(content) + + print(f"Cleaned {file_path}") + except Exception as e: + print(f"Error cleaning {file_path}: {e}") + +def should_clean_file(file_path): + """Determine if file should be cleaned""" + # Files to clean + patterns = [ + r'.*\.yml$', + r'.*\.tf$', + r'.*\.tfvars$', + r'.*\.md$', + r'.*\.sh$', + r'.*\.py$', + r'.*\.hcl$', + r'.*\.virlrc$' # Add .virlrc files + ] + + # Files to skip + skip_patterns = [ + r'.*\.git/.*', + r'.*__pycache__/.*', + r'.*\.terraform/.*', + r'.*\.pytest_cache/.*' + ] + + file_str = str(file_path) + + # Skip if matches skip patterns + if any(re.match(pattern, file_str) for pattern in skip_patterns): + return + + # Clean if matches clean patterns + return any(re.match(pattern, file_str) for pattern in patterns) + +def main(): + """Main function""" + config = load_config() + if not config: + return + + replacements = get_replacements(config) + + # Get project root (parent of scripts directory) + project_root = Path(__file__).resolve().parent.parent + + # Walk through all files + for root, _, files in os.walk(project_root): + for file in files: + file_path = Path(root) / file + if should_clean_file(file_path): + clean_file(file_path, replacements) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/scripts/generate_virlrc.py b/scripts/generate_virlrc.py new file mode 100644 index 0000000..96c27bc --- /dev/null +++ b/scripts/generate_virlrc.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 + +import yaml +import os +import boto3 +import argparse +from pathlib import Path + +def get_private_ip(region=None): + """ + Get CML controller instance details. + + Args: + region (str, optional): AWS region to search in. Defaults to config.yml setting. + + Returns: + dict: Instance details including private_ip, public_ip, and instance_id + """ + try: + # If region not provided, get from config + if not region: + with open('config.yml', 'r') as file: + config = yaml.safe_load(file) + region = config.get('aws', {}).get('region', 'eu-west-1') + + # Create EC2 client + ec2 = boto3.client('ec2', region_name=region) + + # Get instances with CML controller tag + response = ec2.describe_instances( + Filters=[ + { + 'Name': 'tag:Name', + 'Values': ['CML-controller*'] + }, + { + 'Name': 'instance-state-name', + 'Values': ['running'] + } + ] + ) + + # Get instance details + for reservation in response['Reservations']: + for instance in reservation['Instances']: + if 'PrivateIpAddress' in instance: + return { + 'private_ip': instance['PrivateIpAddress'], + 'public_ip': instance.get('PublicIpAddress', ''), + 'instance_id': instance['InstanceId'] + } + + print("No running CML controller instance found") + except Exception as e: + print(f"Error getting instance details: {e}") + return None + +def generate_virlrc(region=None): + """Generate .virlrc file from config.yml""" + try: + with open('config.yml', 'r') as file: + config = yaml.safe_load(file) + + # Get credentials from config + _user = config['secret']['secrets']['app']['username'] + _pass = config['secret']['secrets']['app'].get('raw_secret', '') + + instance_details = get_private_ip(region) + if not instance_details: + print("Error: Could not get private IP of CML controller instance") + return + + virlrc_content = f"""export VIRL_HOST={instance_details['private_ip']} +export VIRL_USERNAME={_user} +export VIRL_PASSWORD={_pass} +export CML_VERIFY_CERT= +export CML_BASTION_IP={instance_details['public_ip']} +export CML_BASTION_ID={instance_details['instance_id']} +""" + + # Write to .virlrc in project directory + script_dir = Path(__file__).resolve().parent.parent + virlrc_path = script_dir / '.virlrc' + with open(virlrc_path, 'w') as f: + f.write(virlrc_content) + + os.chmod(virlrc_path, 0o600) # Set secure permissions + print(f"Generated .virlrc at {virlrc_path}") + + except Exception as e: + print(f"Error generating .virlrc: {e}") + return + + return True + +def main(): + parser = argparse.ArgumentParser(description='Generate .virlrc file for CML') + parser.add_argument('--region', help='AWS region (defaults to config.yml)') + + args = parser.parse_args() + generate_virlrc(args.region) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/scripts/manage_cml_instances.py b/scripts/manage_cml_instances.py new file mode 100644 index 0000000..010dcef --- /dev/null +++ b/scripts/manage_cml_instances.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python3 + +import boto3 +import sys +import yaml +import argparse +import time +import subprocess +from botocore.exceptions import ClientError +from pathlib import Path + +def ask_yes_no(prompt): + """Ask user for yes/no confirmation""" + while True: + response = input(f"{prompt} (yes/no): ").lower() + if response in ['yes', 'y']: + return True + if response in ['no', 'n']: + return + +def load_config(): + """Load region from config.yml if it exists""" + try: + with open('config.yml', 'r') as file: + config = yaml.safe_load(file) + return config.get('aws', {}).get('region', None) + except FileNotFoundError: + return None + +def get_cml_instances(ec2_client): + """Get all CML instances in the region""" + instances = [] + try: + # Get instances with name starting with 'CML-' + response = ec2_client.describe_instances( + Filters=[ + { + 'Name': 'tag:Name', + 'Values': ['CML-*'] + } + ] + ) + + for reservation in response['Reservations']: + for instance in reservation['Instances']: + instances.append(instance) + + return instances + except ClientError as e: + print(f"Error getting instances: {e}") + sys.exit(1) + +def get_instance_status(ec2_client, instance_id): + """Get detailed status of an instance""" + try: + response = ec2_client.describe_instances(InstanceIds=[instance_id]) + instance = response['Reservations'][0]['Instances'][0] + state = instance['State']['Name'] + + # Get instance name from tags + name = next((tag['Value'] for tag in instance.get('Tags', []) + if tag['Key'] == 'Name'), instance_id) + + return { + 'name': name, + 'state': state, + 'id': instance_id + } + except ClientError as e: + print(f"Error getting status for instance {instance_id}: {e}") + return None + +def print_status(instances_status, action): + """Print current status of instances""" + print("\nCurrent Status:") + print("-" * 60) + print(f"{'Instance Name':<30} {'Instance ID':<20} {'State':<15}") + print("-" * 60) + + for status in instances_status: + if status: + print(f"{status['name']:<30} {status['id']:<20} {status['state']:<15}") + print("-" * 60) + +def monitor_instances(ec2_client, instance_ids, action, timeout=7200): # 2 hour timeout + """Monitor instances until they reach desired state or timeout""" + start_time = time.time() + desired_state = 'stopped' if action.lower() == 'stop' else 'running' + + while True: + instances_status = [get_instance_status(ec2_client, id) for id in instance_ids] + print_status(instances_status, action) + + # Check if all instances reached desired state + all_done = all(status and status['state'] == desired_state + for status in instances_status) + + if all_done: + print(f"\nAll instances successfully {action}ed!") + return True + + # Check timeout + elapsed_time = time.time() - start_time + if elapsed_time > timeout: + print(f"\nTimeout after {timeout/60:.1f} minutes!") + return + + # Wait before next check + print(f"\nWaiting... (Elapsed time: {elapsed_time/60:.1f} minutes)") + time.sleep(30) + +def stop_running_labs(): + """Stop all running labs using cmlutils""" + try: + # Check if .virlrc exists + if not Path.home().joinpath('.virlrc').exists(): + print("Warning: .virlrc not found, skipping lab shutdown") + return + + print("Stopping all running labs...") + # First get list of labs + result = subprocess.run(['cml', 'ls', '--all'], + capture_output=True, text=True) + + if result.returncode != 0: + if "No labs found" in result.stderr: + print("No running labs found") + return True + else: + print(f"Error listing labs: {result.stderr}") + return + + # Parse lab IDs from output + labs = [] + for line in result.stdout.splitlines()[3:-1]: # Skip header and footer + if line.strip(): + lab_id = line.split()[0] + if lab_id != 'ID': # Skip header row + labs.append(lab_id) + + if not labs: + print("No running labs found") + return True + + # Stop each lab + for lab_id in labs: + print(f"Stopping lab {lab_id}...") + result = subprocess.run(['cml', 'down', lab_id], + capture_output=True, text=True) + + if result.returncode == 0: + print("Successfully stopped all labs") + return True + else: + print(f"Error stopping labs: {result.stderr}") + return + + except Exception as e: + print(f"Error using cmlutils: {e}") + return + +def manage_instances(action, region=None, timeout=7200): + """Start or stop CML instances in the specified region""" + if not region: + region = load_config() + if not region: + region = 'eu-west-1' + + print(f"Managing instances in region: {region}") + + ec2_client = boto3.client('ec2', region_name=region) + instances = get_cml_instances(ec2_client) + + if not instances: + print(f"No CML instances found in region {region}") + return + + instance_ids = [instance['InstanceId'] for instance in instances] + + try: + if action.lower() == 'stop': + # Try to stop running labs first + if not stop_running_labs(): + if not ask_yes_no("Failed to stop labs. Continue with instance shutdown?"): + print("Aborting instance shutdown") + return + print(f"Initiating stop for instances: {instance_ids}") + ec2_client.stop_instances(InstanceIds=instance_ids) + else: + print(f"Initiating start for instances: {instance_ids}") + ec2_client.start_instances(InstanceIds=instance_ids) + + success = monitor_instances(ec2_client, instance_ids, action) + if not success: + print("Warning: Some instances did not reach desired state!") + sys.exit(1) + + except ClientError as e: + print(f"Error {action}ing instances: {e}") + sys.exit(1) + +def main(): + parser = argparse.ArgumentParser(description='Manage CML instances in AWS') + parser.add_argument('action', choices=['start', 'stop'], + help='Action to perform on instances') + parser.add_argument('--region', help='AWS region (defaults to config.yml)') + parser.add_argument('--timeout', type=int, default=7200, + help='Timeout in seconds (default: 7200)') + + args = parser.parse_args() + manage_instances(args.action, args.region, args.timeout) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/ssm-tunnel-guide.md b/ssm-tunnel-guide.md new file mode 100644 index 0000000..9c9f3a3 --- /dev/null +++ b/ssm-tunnel-guide.md @@ -0,0 +1,107 @@ +# SSM Tunneling with sshuttle on macOS + +## Overview +This guide explains how to set up secure tunneling to AWS private subnets using AWS Systems Manager (SSM) and sshuttle on macOS. The solution uses packet filter (PF) rules to enable proper traffic forwarding. + +## Understanding Packet Filter (PF) +PF is macOS's network packet filter, inherited from OpenBSD. The configuration consists of: + +1. **Anchors**: Named rulesets that can be loaded/unloaded dynamically +2. **Rules**: Define how traffic should be handled +3. **Quick**: Keyword that stops rule processing when a match is found + +The rule syntax we use: +```text +pass [in|out] quick [proto protocol] from source to destination [keep state] + +- pass: Allow the traffic +- in/out: Traffic direction +- quick: Stop processing rules when matched +- proto: Specify protocol (tcp, udp, etc) +- keep state: Track connection state +``` + +## Setup Instructions + +### 1. Configure Packet Filter +Create a new anchor file for sshuttle: + +```bash +sudo tee /etc/pf.anchors/sshuttle << EOF +# Allow SSM tunnel traffic +pass in quick proto tcp from any to any port 2222 keep state +pass out quick proto tcp from any to any port 2222 keep state + +# Allow forwarded subnet traffic +pass in quick from 10.0.0.0/16 to any keep state +pass out quick from any to 10.0.0.0/16 keep state +EOF +``` + +These rules: +- Allow TCP traffic to/from port 2222 (SSM tunnel) +- Allow all traffic to/from the 10.0.0.0/16 subnet +- Use `quick` to ensure rule matching stops when found +- Use `keep state` to track connection states + +### 2. Update PF Configuration +Add the sshuttle anchor to `/etc/pf.conf`: + +```bash +# Add before the final load anchor line +anchor "sshuttle/*" +load anchor "sshuttle" from "/etc/pf.anchors/sshuttle" +``` + +### 3. Apply PF Rules +Reload the PF configuration: + +```bash +sudo pfctl -f /etc/pf.conf +``` + +### 4. Start SSM Port Forwarding +In terminal 1: +```bash +aws ssm start-session \ + --target i-XXXXXXXXXXXXX \ + --document-name AWS-StartPortForwardingSession \ + --parameters '{"portNumber":["22"],"localPortNumber":["2222"]}' +``` + +### 5. Start Sshuttle +In terminal 2: +```bash +sshuttle -r localhost:2222 10.0.0.0/16 -v +``` + +## Notes + +### Troubleshooting PF +Check PF status: +```bash +sudo pfctl -si +``` + +View loaded rules: +```bash +sudo pfctl -sr +``` + +View anchors: +```bash +sudo pfctl -sa +``` + +### Important Considerations +- Keep both SSM session and sshuttle running for the tunnel to work +- Adjust the subnet (10.0.0.0/16) to match your target VPC +- Port 2222 can be changed if needed +- SSM requires appropriate IAM permissions +- PF rules persist across reboots but need to be enabled + +### Security Notes +- SSM provides secure access without direct SSH exposure +- All traffic is encrypted through the SSM session +- PF rules are scoped to specific ports and subnets +- Connection states are tracked for better security diff --git a/terraform.tf b/terraform.tf index 39bae89..2237e94 100644 --- a/terraform.tf +++ b/terraform.tf @@ -7,9 +7,16 @@ terraform { required_providers { cml2 = { - source = "CiscoDevNet/cml2" - version = ">=0.6.2" + source = "ciscodevnet/cml2" + version = "~> 0.8.1" + } + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + random = { + source = "hashicorp/random" + version = "~> 3.0" } } - required_version = ">= 1.1.0" } diff --git a/upload-images-to-aws-macos.sh b/upload-images-to-aws-macos.sh new file mode 100755 index 0000000..49339fe --- /dev/null +++ b/upload-images-to-aws-macos.sh @@ -0,0 +1,168 @@ +#!/bin/bash + +# +# Copyright (c) 2019-2024, Cisco Systems, Inc. +# All rights reserved. +# +# macOS-optimized version of the CML image upload script +# + +# Default settings +BUCKETNAME=${1:-$DEFAULT_BUCKET} +# ISO variable may need to be adjusted to reflect where the image have been extracted to +ISO=${2:-/var/lib/libvirt/images} +PKG=${3:-cml2_*.pkg} + +function help() { + cmd=$(basename "$0") + cat </dev/null; then + echo "Error: $tool is required but not installed" + echo "Install using: brew install $tool" + exit 1 + fi +done + +# Validate AWS CLI configuration +if ! aws sts get-caller-identity &>/dev/null; then + echo "Error: AWS CLI not configured. Please run 'aws configure' first" + exit 1 +fi + +# Check if bucket exists and is accessible +if ! aws s3 ls "s3://${BUCKETNAME}" &>/dev/null; then + echo "Error: Cannot access bucket s3://${BUCKETNAME}" + echo "Please check bucket name and AWS permissions" + exit 1 +fi + +# Validate image directory +if [ ! -d "$ISO" ]; then + echo "Error: Reference platform path \"$ISO\" does not exist!" + exit 1 +fi + +# Change to image directory +cd "$ISO" || exit 1 + +# Validate directory structure +if [ ! -d "virl-base-images" ] || [ ! -d "node-definitions" ]; then + echo "Error: \"$ISO\" missing required directories (virl-base-images or node-definitions)" + exit 1 +fi + +# CTRL+C handler +trap 'echo -e "\nOperation cancelled"; exit 1' INT + +# Look for CML package +cmlpkg=$(find . -name "$PKG" -type f | sort | tail -1) +if [ -n "$cmlpkg" ]; then + if ! dialog --title "Software PKG found, copy to Bucket?" \ + --defaultno --yesno \ + "$(basename "$cmlpkg")" 5 40; then + cmlpkg="" + fi +fi + +# Build list of available images +pushd "virl-base-images" &>/dev/null || exit 1 +options=$(find . -type f -name '*.yaml' -exec sh -c 'basename "{}"; echo "on"' \;) +popd &>/dev/null || exit 1 + +if [ -z "$options" ]; then + echo "Error: No image definitions found in $ISO/virl-base-images" + exit 1 +fi + +# Image selection dialog +selection=$(dialog --stdout --no-items --separate-output --checklist \ + "Select images to copy to AWS bucket \"${BUCKETNAME}\"" 0 60 20 $options) +dialog_status=$? +clear + +if [ $dialog_status -eq 255 ]; then + echo "Upload cancelled by user" + exit 1 +fi + +# Process node definitions +declare -a nodedefs_keys +declare -a nodedefs_values +for imagedef in $selection; do + fullpath=$(find "$ISO" -name "$imagedef") + defname=$(sed -nE '/^node_definition/s/^.*:(\s+)?(\S+)$/\2/p' "$fullpath") + nodedefs_keys+=("$defname") + nodedefs_values+=("1") +done + +# Upload CML package if selected +if [ -n "$cmlpkg" ]; then + dialog --progressbox "Upload software package to bucket" 20 70 < <( + aws s3 cp "$cmlpkg" "s3://${BUCKETNAME}/" + ) +fi + +target="s3://${BUCKETNAME}/refplat" + +# Upload node definitions +dialog --progressbox "Upload node definitions to bucket" 20 70 < <( + for nodedef in "${nodedefs_keys[@]}"; do + fname=$(grep -l "$ISO/node-definitions/"* -Ee "^id:(\s+)?${nodedef}$") + if [ -n "$fname" ]; then + aws s3 cp "$fname" "$target/node-definitions/" + if [ $? -ne 0 ]; then + echo "Error: Failed to upload node definition: $nodedef" + exit 1 + fi + fi + done +) + +# Upload image definitions and files +dialog --progressbox "Upload images to bucket" 20 70 < <( + for imagedef in $selection; do + imagedir=$(find "$ISO" -name "$imagedef" -exec dirname {} \;) + if [ -n "$imagedir" ]; then + aws s3 cp --recursive "$imagedir" "$target/virl-base-images/${imagedir##*/}" + if [ $? -ne 0 ]; then + echo "Error: Failed to upload image: $imagedef" + exit 1 + fi + fi + done +) + +clear +echo "Upload completed successfully!" +echo "Bucket: s3://${BUCKETNAME}/refplat" +echo "Uploaded node definitions: ${#nodedefs_keys[@]}" +echo "Uploaded images: $(echo "$selection" | wc -l)" \ No newline at end of file diff --git a/upload-images-to-aws.sh b/upload-images-to-aws.sh index 446e54e..c1fc315 100755 --- a/upload-images-to-aws.sh +++ b/upload-images-to-aws.sh @@ -15,9 +15,9 @@ # AWS_ACCESS_KEY_ID=ABCD AWS_SECRET_ACCESS_KEY=EF1234 aws ec2 describe-instances # -DEFAULT_BUCKET="aws-cml-images" BUCKETNAME=${1:-$DEFAULT_BUCKET} +# ISO variable may need to be adjusted to reflect where the image have been extracted to ISO=${2:-/var/lib/libvirt/images} PKG=${3:-cml2_*.pkg} @@ -110,11 +110,13 @@ if [ $s -eq 255 ]; then exit 255 fi -declare -A nodedefs +declare -a nodedefs_keys +declare -a nodedefs_values for imagedef in $selection; do fullpath=$(find $ISO -name $imagedef) defname=$(sed -nE '/^node_definition/s/^.*:(\s+)?(\S+)$/\2/p' $fullpath) - nodedefs[$defname]="1" + nodedefs_keys+=("$defname") + nodedefs_values+=("1") done if [ -n "$cmlpkg" ]; then @@ -126,7 +128,7 @@ fi target="s3://${BUCKETNAME}/refplat" dialog --progressbox "Upload node definitions to bucket" 20 70 < <( - for nodedef in ${!nodedefs[@]}; do + for nodedef in "${nodedefs_keys[@]}"; do fname=$(grep -l $ISO/node-definitions/* -Ee "^id:(\s+)?${nodedef}$") aws s3 cp $fname $target/node-definitions/ s=$?