diff --git a/.github/workflows/ami.yml b/.github/workflows/ami.yml new file mode 100644 index 00000000..783c526d --- /dev/null +++ b/.github/workflows/ami.yml @@ -0,0 +1,28 @@ +name: Build and Publish AMI +on: + pull_request: +jobs: + build: + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + environment: images + steps: + - uses: DeterminateSystems/nix-installer-action@cd46bde16ab981b0a7b2dce0574509104543276e # v9 + - uses: DeterminateSystems/magic-nix-cache-action@eeabdb06718ac63a7021c6132129679a8e22d0c7 # v3 + - uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2 + with: + aws-region: eu-north-1 + role-to-assume: arn:aws:iam::${{ vars.AWS_ACCOUNT_ID }}:role/upload-ami + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + - name: build image + run: nix build .#legacyAmazonImage + - name: upload image with coldnsap + run: | + file="$(jq -r '.file' < ./result/nix-support/image-info.json)" + echo "starting coldsnap" + nix run --inputs-from . nixpkgs#coldsnap -- upload --no-progress "$file" + echo "finished coldsnap" + + diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index 85e0d3dd..00000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: CI -on: - push: - branches: - - main - pull_request: - branches: - - main -permissions: - contents: read -jobs: - check: - runs-on: ${{ matrix.runs-on.labels }} - strategy: - matrix: - runs-on: - - labels: [ubuntu-latest] - system: x86_64-linux - steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - uses: DeterminateSystems/nix-installer-action@cd46bde16ab981b0a7b2dce0574509104543276e # v9 - - uses: DeterminateSystems/magic-nix-cache-action@eeabdb06718ac63a7021c6132129679a8e22d0c7 # v3 - - run: nix build .#amazonImage -L --system ${{ matrix.runs-on.system }} - - run: nix flake check -L --system ${{ matrix.runs-on.system }} diff --git a/.github/workflows/upload-legacy-ami.yml b/.github/workflows/upload-legacy-ami.yml deleted file mode 100644 index 8cfec26e..00000000 --- a/.github/workflows/upload-legacy-ami.yml +++ /dev/null @@ -1,119 +0,0 @@ -name: Upload Legacy Amazon Image -permissions: - contents: read -on: - push: - branches: - - main - pull_request: - workflow_dispatch: - schedule: - - cron: '0 0 * * 0' -jobs: - upload-ami: - name: Upload Legacy Amazon Image - runs-on: ubuntu-latest - environment: images - permissions: - contents: read - id-token: write - strategy: - matrix: - system: - - x86_64-linux - - aarch64-linux - steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - uses: DeterminateSystems/nix-installer-action@cd46bde16ab981b0a7b2dce0574509104543276e # v9 - - uses: DeterminateSystems/magic-nix-cache-action@eeabdb06718ac63a7021c6132129679a8e22d0c7 # v3 - # NOTE: We download the AMI from Hydra instead of building it ourselves - # because aarch64 is currently not supported by AWS EC2 and the legacy - # image builder requires nested virtualization. - - name: Download AMI from Hydra - id: download_ami - run: | - set -o pipefail - out=$(curl --location --silent --fail-with-body --header 'Accept: application/json' https://hydra.nixos.org/job/nixos/release-23.11/nixos.amazonImage.${{ matrix.system }}/latest-finished | jq --raw-output '.buildoutputs.out.path') - nix-store --realise "$out" --add-root ./result - echo "image_info=$out/nix-support/image-info.json" >> "$GITHUB_OUTPUT" - - - uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2 - with: - role-to-assume: arn:aws:iam::${{ vars.AWS_ACCOUNT_ID }}:role/upload-ami - aws-region: ${{ vars.AWS_REGION }} - - - name: Upload Smoke test AMI - id: upload_smoke_test_ami - run: | - image_info='${{ steps.download_ami.outputs.image_info }}' - images_bucket='${{ vars.IMAGES_BUCKET }}' - image_ids=$(nix run .#upload-ami -- \ - --image-info "$image_info" \ - --prefix "smoketest/" \ - --s3-bucket "$images_bucket") - echo "image_ids=$image_ids" >> "$GITHUB_OUTPUT" - - - name: Smoke test - id: smoke_test - # NOTE: make sure smoke test isn't cancelled. Such that instance gets cleaned up. - run: | - image_ids='${{ steps.upload_smoke_test_ami.outputs.image_ids }}' - image_id=$(echo "$image_ids" | jq -r '.["${{ vars.AWS_REGION }}"]') - run_id='${{ github.run_id }}' - nix run .#smoke-test -- --image-id "$image_id" - - - name: Clean up smoke test - if: ${{ cancelled() }} - run: | - image_ids='${{ steps.upload_smoke_test_ami.outputs.image_ids }}' - image_id=$(echo "$image_ids" | jq -r '.["${{ vars.AWS_REGION }}"]') - run_id='${{ github.run_id }}' - nix run .#smoke-test -- --image-id "$image_id" --cancel - - - # NOTE: We do not pass run-id as we're not building the image ourselves - # and we thus need to poll hydra periodically. Including the run-id would - # cause us to register the same snapshot as an image over and over again - # for each run. - - name: Upload AMIs to all available regions - if: github.ref == 'refs/heads/main' - run: | - image_info='${{ steps.download_ami.outputs.image_info }}' - images_bucket='${{ vars.IMAGES_BUCKET }}' - nix run .#upload-ami -- \ - --image-info "$image_info" \ - --prefix "nixos/" \ - --s3-bucket "$images_bucket" \ - --copy-to-regions \ - --public - - deploy-pages: - name: Deploy images page - if: github.ref == 'refs/heads/main' - runs-on: ubuntu-latest - needs: upload-ami - permissions: - contents: read - id-token: write - pages: write - environment: - name: github-pages - url: ${{ steps.deployment.outputs.page_url }} - steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - uses: DeterminateSystems/nix-installer-action@cd46bde16ab981b0a7b2dce0574509104543276e # v9 - - uses: DeterminateSystems/magic-nix-cache-action@eeabdb06718ac63a7021c6132129679a8e22d0c7 # v3 - - uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.1 - with: - role-to-assume: arn:aws:iam::${{ vars.AWS_ACCOUNT_ID }}:role/github-pages - aws-region: ${{ vars.AWS_REGION }} - - name: Describe images - run: nix run .#describe-images > ./site/images.json - - name: Upload pages - uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa # v3.0.1 - with: - path: ./site - - name: Deploy pages - uses: actions/deploy-pages@decdde0ac072f6dcbe43649d82d9c635fff5b4e4 # v4.0.4 - id: deployment - if: github.ref == 'refs/heads/main' diff --git a/flake.lock b/flake.lock index db54019b..071c1957 100644 --- a/flake.lock +++ b/flake.lock @@ -2,11 +2,11 @@ "nodes": { "nixpkgs": { "locked": { - "lastModified": 1713532629, - "narHash": "sha256-8iwNoSDOCKFnDF7f8XReiztpESA0GyFieKhWAaG7jrw=", + "lastModified": 1713706266, + "narHash": "sha256-AvFLoQ5SvxYjvMMiV9k+TSE1gkS6DbGCWZUpNiw4tAA=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "7f62671ffcb37436b3df7d6ae44dfdca9e5a069d", + "rev": "e8664fce9b0e956ebee7214276dd807eee585c22", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index 74f952a9..66d709b0 100644 --- a/flake.nix +++ b/flake.nix @@ -19,12 +19,6 @@ amazonImage = ./modules/amazon-image.nix; mock-imds = ./modules/mock-imds.nix; - version = { config, ... }: { - system.stateVersion = config.system.nixos.release; - # NOTE: This will cause an image to be built per commit. - # system.nixos.versionSuffix = lib.mkForce - # ".${lib.substring 0 8 (nixpkgs.lastModifiedDate or nixpkgs.lastModified or "19700101")}.${nixpkgs.shortRev}.${lib.substring 0 8 (self.lastModifiedDate or self.lastModified or "19700101")}.${self.shortRev or "dirty"}"; - }; }; lib.supportedSystems = [ "aarch64-linux" "x86_64-linux" "aarch64-darwin" ]; @@ -52,7 +46,6 @@ modules = [ self.nixosModules.ec2-instance-connect self.nixosModules.amazonImage - self.nixosModules.version ]; }).config.system.build.amazonImage; legacyAmazonImage = (lib.nixosSystem { @@ -61,11 +54,11 @@ modules = [ self.nixosModules.legacyAmazonImage { - boot.loader.grub.enable = false; - boot.loader.systemd-boot.enable = true; + ec2.efi = true; + # amazonImage.sizeMB = "auto"; + amazonImage.sizeMB = "auto"; + amazonImage.format = "raw"; # coldsnap requires raw } - { ec2.efi = true; amazonImage.sizeMB = "auto"; } - self.nixosModules.version ]; }).config.system.build.amazonImage; @@ -89,7 +82,6 @@ node.specialArgs.selfPackages = self.packages.${system}; defaults = { name, ... }: { imports = [ - self.nixosModules.version self.nixosModules.amazonImage self.nixosModules.mock-imds ]; diff --git a/modules/amazon-image.nix b/modules/amazon-image.nix index 89cdc107..44e36e72 100644 --- a/modules/amazon-image.nix +++ b/modules/amazon-image.nix @@ -13,18 +13,19 @@ in pkgs.runCommand config.system.build.image.name { } '' mkdir -p $out mkdir -p $out/nix-support - ${pkgs.qemu-utils}/bin/qemu-img convert -f raw -O vpc ${config.system.build.image}/${config.image.repart.imageFile} $out/${config.image.repart.imageFileBasename}.vhd cat < $out/nix-support/image-info.json { "boot_mode": "uefi", + "format": "raw", "label": "${config.system.nixos.label}", "system": "${pkgs.stdenv.hostPlatform.system}", - "file": "$out/${config.image.repart.imageFileBasename}.vhd" + "file": "${config.system.build.image}/${config.image.repart.imageFile}" } EOF ''; - image.repart.name = "${config.system.nixos.distroId}-${config.system.nixos.label}-${pkgs.stdenv.hostPlatform.system}"; + image.repart.name = config.system.nixos.distroId; + image.repart.version = config.system.nixos.version; image.repart.partitions = { "00-esp" = { contents = { diff --git a/tf/iam_github_actions.tf b/tf/iam_github_actions.tf index a84082e6..f028a9d1 100644 --- a/tf/iam_github_actions.tf +++ b/tf/iam_github_actions.tf @@ -34,6 +34,17 @@ data "aws_iam_policy_document" "upload_ami" { ] resources = ["*"] } + statement { + effect = "Allow" + actions = [ + "ebs:StartSnapshot", + "ebs:PutSnapshotBlock", + "ebs:ListChangedBlocks", + "ebs:ListSnapshotBlocks", + "ebs:CompleteSnapshot", + ] + resources = ["arn:aws:ec2:*:*:snapshot/*"] + } statement { effect = "Allow" actions = [ diff --git a/upload-ami/default.nix b/upload-ami/default.nix index c4f16ae4..17bbb51e 100644 --- a/upload-ami/default.nix +++ b/upload-ami/default.nix @@ -1,6 +1,8 @@ { buildPythonApplication , python3Packages , lib +, coldsnap +, qemu-utils }: let @@ -37,7 +39,11 @@ buildPythonApplication { python3Packages.black ]; - propagatedBuildInputs = lib.flatten (map resolvePackages pyproject.project.dependencies); + + + propagatedBuildInputs = + lib.flatten (map resolvePackages pyproject.project.dependencies) + ++ [ coldsnap qemu-utils ]; checkPhase = '' mypy src diff --git a/upload-ami/pyproject.toml b/upload-ami/pyproject.toml index c483e56b..7cf32694 100644 --- a/upload-ami/pyproject.toml +++ b/upload-ami/pyproject.toml @@ -17,5 +17,6 @@ disable-image-block-public-access = "upload_ami.disable_image_block_public_acces enable-regions = "upload_ami.enable_regions:main" request-public-ami-quota-increase = "upload_ami.request_public_ami_quota_increase:main" describe-images = "upload_ami.describe_images:main" +upload-coldsnap = "upload_ami.upload_coldsnap:main" [tool.mypy] strict=true diff --git a/upload-ami/src/upload_ami/upload_ami.py b/upload-ami/src/upload_ami/upload_ami.py index ac53a0ba..9a85fef7 100644 --- a/upload-ami/src/upload_ami/upload_ami.py +++ b/upload-ami/src/upload_ami/upload_ami.py @@ -9,12 +9,15 @@ import botocore.exceptions from mypy_boto3_ec2.client import EC2Client -from mypy_boto3_ec2.literals import BootModeValuesType from mypy_boto3_ec2.type_defs import RegionTypeDef from mypy_boto3_s3.client import S3Client from concurrent.futures import ThreadPoolExecutor +from typing import TypedDict + +from mypy_boto3_ec2.literals import BootModeValuesType + class ImageInfo(TypedDict): file: str diff --git a/upload-ami/src/upload_ami/upload_coldsnap.py b/upload-ami/src/upload_ami/upload_coldsnap.py new file mode 100644 index 00000000..5af40f93 --- /dev/null +++ b/upload-ami/src/upload_ami/upload_coldsnap.py @@ -0,0 +1,161 @@ +import argparse +from email.mime import image +import json +import logging +from pathlib import Path +from posixpath import basename +from re import I, sub +from tempfile import mktemp +import tempfile +import boto3 +import subprocess +from typing import Literal, TypedDict +from mypy_boto3_ec2 import EC2Client +from mypy_boto3_ec2.literals import BootModeValuesType + + +class ImageInfo(TypedDict): + file: str + label: str + system: str + boot_mode: BootModeValuesType + format: str + + +def register_image_if_not_exists( + ec2: EC2Client, + image_name: str, + image_info: ImageInfo, + snapshot_id: str, + public: bool, +) -> str: + """ + Register image if it doesn't exist yet + + This function is idempotent because image_name is unique + """ + describe_images = ec2.describe_images( + Owners=["self"], Filters=[{"Name": "name", "Values": [image_name]}] + ) + if len(describe_images["Images"]) != 0: + assert len(describe_images["Images"]) == 1 + assert "ImageId" in describe_images["Images"][0] + image_id = describe_images["Images"][0]["ImageId"] + else: + architecture: Literal["x86_64", "arm64"] + assert "system" in image_info + if image_info["system"] == "x86_64-linux": + architecture = "x86_64" + elif image_info["system"] == "aarch64-linux": + architecture = "arm64" + else: + raise Exception("Unknown system: " + image_info["system"]) + + logging.info(f"Registering image {image_name} with snapshot {snapshot_id}") + + # TODO(arianvp): Not all instance types support TPM 2.0 yet. We should + # upload two images, one with and one without TPM 2.0 support. + + # if architecture == "x86_64" and image_info["boot_mode"] == "uefi": + # tpmsupport['TpmSupport'] = "v2.0" + + register_image = ec2.register_image( + Name=image_name, + Architecture=architecture, + BootMode=image_info["boot_mode"], + BlockDeviceMappings=[ + { + "DeviceName": "/dev/xvda", + "Ebs": { + "SnapshotId": snapshot_id, + "VolumeType": "gp3", + }, + } + ], + RootDeviceName="/dev/xvda", + VirtualizationType="hvm", + EnaSupport=True, + ImdsSupport="v2.0", + SriovNetSupport="simple", + ) + image_id = register_image["ImageId"] + + ec2.get_waiter("image_available").wait(ImageIds=[image_id]) + if public: + logging.info(f"Making {image_id} public") + ec2.modify_image_attribute( + ImageId=image_id, + Attribute="launchPermission", + LaunchPermission={"Add": [{"Group": "all"}]}, + ) + return image_id + + +def upload_coldsnap( + *, + image_info: ImageInfo, + prefix: str, +) -> str: + logging.info(f"Uploading image to coldsnap") + + image_file_vhd = Path(image_info["file"]) + image_file_raw = Path(image_file_vhd.with_suffix(".raw").name) + + logging.info(f"Converting {image_file_vhd} to {image_file_raw}") + subprocess.check_call( + ["qemu-img", "convert", "-O", "raw", image_file_vhd, image_file_raw] + ) + + logging.info(f"Uploading {image_file_raw} to ebs") + snapshot_id = ( + subprocess.check_output( + [ + "coldsnap", + "upload", + image_file_raw, + ] + ) + .decode() + .strip() + ) + + logging.info(f"Waiting for snapshot {snapshot_id} to be available") + ec2 = boto3.client("ec2") + ec2.get_waiter("snapshot_completed").wait(SnapshotIds=[snapshot_id]) + + image_name = prefix + image_info["label"] + "-" + image_info["system"] + logging.info(f"Registering image {image_name}") + image_id = register_image_if_not_exists( + ec2=ec2, + image_name=image_name, + image_info=image_info, + snapshot_id=snapshot_id, + public=False, + ) + return image_id + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--image-info", help="Path to image info", required=True) + parser.add_argument("--prefix", help="Prefix for image name", required=True) + parser.add_argument("--debug", action="store_true") + + args = parser.parse_args() + + if args.debug: + level = logging.DEBUG + else: + level = logging.INFO + logging.basicConfig(level=level) + + image_info: ImageInfo + with open(args.image_info) as f: + image_info = json.load(f) + + print( + upload_coldsnap( + image_info=image_info, + prefix=args.prefix, + ) + )