From 60e3d21067e9cee2c29eb30f50ddbea8794cf17b Mon Sep 17 00:00:00 2001 From: nicholasmhughes Date: Tue, 1 Mar 2022 22:25:42 -0500 Subject: [PATCH 1/3] put some pre-commit checks in place --- .pre-commit-config.yaml | 55 ++ EksCreationEngine.py | 1640 ++++++++++++++++++--------------------- README.md | 33 +- docs/HOWTO.md | 2 +- main.py | 509 ++++++------ plugins/ECEDatadog.py | 87 ++- plugins/ECEFalco.py | 366 ++++----- plugins/ECESecurity.py | 228 +++--- plugins/__init__.py | 35 +- requirements.txt | 12 +- 10 files changed, 1447 insertions(+), 1520 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..8d4d994 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,55 @@ +--- +minimum_pre_commit_version: 2.4.0 +repos: + # ----- Formatting ----------------------------------------------------------------------------> + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.0.1 + hooks: + - id: trailing-whitespace # Trims trailing whitespace. + args: [--markdown-linebreak-ext=md] + - id: mixed-line-ending # Replaces or checks mixed line ending. + args: [--fix=lf] + - id: end-of-file-fixer # Makes sure files end in a newline and only a newline. + - id: check-merge-conflict # Check for files that contain merge conflict strings. + - id: check-ast # Simply check whether files parse as valid python. + + - repo: https://github.com/asottile/pyupgrade + rev: v2.23.3 + hooks: + - id: pyupgrade + name: Rewrite Code to be Py3.7+ + args: [ + --py37-plus + ] + + - repo: https://github.com/asottile/reorder_python_imports + rev: v2.6.0 + hooks: + - id: reorder-python-imports + args: [ + --py37-plus, + ] + + - repo: https://github.com/psf/black + rev: 21.7b0 + hooks: + - id: black + args: [-l 100] + + - repo: https://github.com/asottile/blacken-docs + rev: v1.10.0 + hooks: + - id: blacken-docs + args: [--skip-errors] + files: ^docs/.*\.md$ + additional_dependencies: [black==21.7b0] + # <---- Formatting ----------------------------------------------------------------------------- + + # ----- Security ------------------------------------------------------------------------------> + - repo: https://github.com/PyCQA/bandit + rev: "1.7.0" + hooks: + - id: bandit + name: Run bandit against the code base + args: [--silent, -lll] + # <---- Security ------------------------------------------------------------------------------- diff --git a/EksCreationEngine.py b/EksCreationEngine.py index ea35bf1..6aa6a7c 100644 --- a/EksCreationEngine.py +++ b/EksCreationEngine.py @@ -1,72 +1,78 @@ -#This file is part of Lightspin EKS Creation Engine. -#SPDX-License-Identifier: Apache-2.0 - -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the +# This file is part of Lightspin EKS Creation Engine. +# SPDX-License-Identifier: Apache-2.0 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the #'License'); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -#http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, -#software distributed under the License is distributed on an +# with the License. You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an #'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -#KIND, either express or implied. See the License for the -#specific language governing permissions and limitations -#under the License. - +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. import base64 +import json +import re +import subprocess import sys +import time +from datetime import datetime + import boto3 import botocore.exceptions -import json -from datetime import datetime -import time -import subprocess -import re + from plugins.ECEDatadog import DatadogSetup from plugins.ECEFalco import FalcoSetup cache = list() -class ClusterManager(): +class ClusterManager: def get_latest_eks_optimized_ubuntu(kubernetes_version, ami_id, ami_os, ami_architecture): - ''' + """ This function either receives an AMI ID from main.py or receives the default value of 'SSM' which is matched against the arguments `ami_os` and `ami_architecture` to dynamically pull the latest, stable AMI from SSM Public Parameters. - ''' - ssm = boto3.client('ssm') + """ + ssm = boto3.client("ssm") - if ami_id == 'SSM': + if ami_id == "SSM": # Ubuntu 20.04 LTS - if ami_os == 'ubuntu': + if ami_os == "ubuntu": # AMD64 - if ami_architecture == 'amd64': + if ami_architecture == "amd64": # /aws/service/canonical/ubuntu/eks/20.04/1.21/stable/current/amd64/hvm/ebs-gp2/ami-id - publicParameter = str(f'/aws/service/canonical/{ami_os}/eks/20.04/{kubernetes_version}/stable/current/{ami_architecture}/hvm/ebs-gp2/ami-id') + publicParameter = str( + f"/aws/service/canonical/{ami_os}/eks/20.04/{kubernetes_version}/stable/current/{ami_architecture}/hvm/ebs-gp2/ami-id" + ) # ARM64 else: # /aws/service/canonical/ubuntu/eks/20.04/1.21/stable/current/arm64/hvm/ebs-gp2/ami-id - publicParameter = str(f'/aws/service/canonical/{ami_os}/eks/20.04/{kubernetes_version}/stable/current/{ami_architecture}/hvm/ebs-gp2/ami-id') + publicParameter = str( + f"/aws/service/canonical/{ami_os}/eks/20.04/{kubernetes_version}/stable/current/{ami_architecture}/hvm/ebs-gp2/ami-id" + ) # Amazon Linux 2 # Public Params search in the console is fucky, check here: https://docs.aws.amazon.com/eks/latest/userguide/eks-optimized-ami.html else: # AMD64 - if ami_architecture == 'amd64': + if ami_architecture == "amd64": # /aws/service/eks/optimized-ami/1.21/amazon-linux-2/recommended/image_id - publicParameter = str(f'/aws/service/eks/optimized-ami/{kubernetes_version}/amazon-linux-2/recommended/image_id') + publicParameter = str( + f"/aws/service/eks/optimized-ami/{kubernetes_version}/amazon-linux-2/recommended/image_id" + ) # ARM64 else: # /aws/service/eks/optimized-ami/1.21/amazon-linux-2-arm64/recommended/image_id - publicParameter = str(f'/aws/service/eks/optimized-ami/{kubernetes_version}/amazon-linux-2-arm64/recommended/image_id') + publicParameter = str( + f"/aws/service/eks/optimized-ami/{kubernetes_version}/amazon-linux-2-arm64/recommended/image_id" + ) # retrieve the AMI ID and return it try: - amiId = ssm.get_parameter(Name=publicParameter)['Parameter']['Value'] + amiId = ssm.get_parameter(Name=publicParameter)["Parameter"]["Value"] except Exception as e: raise e else: @@ -77,88 +83,70 @@ def get_latest_eks_optimized_ubuntu(kubernetes_version, ami_id, ami_os, ami_arch del ssm del publicParameter - print(f'Your EKS Nodegroup AMI is {amiId}') + print(f"Your EKS Nodegroup AMI is {amiId}") return amiId def create_cluster_svc_role(cluster_role_name): - ''' + """ This function creates a Cluster Service Role for EKS, required for Cluster Creation - ''' - iam = boto3.client('iam') - sts = boto3.client('sts') - acctId = sts.get_caller_identity()['Account'] + """ + iam = boto3.client("iam") + sts = boto3.client("sts") + acctId = sts.get_caller_identity()["Account"] # Use STS GetCallerIdentity and Datetime to generate CreatedBy and CreatedAt information for tagging - createdBy = str(sts.get_caller_identity()['Arn']) + createdBy = str(sts.get_caller_identity()["Arn"]) createdAt = str(datetime.utcnow()) # Trust Policy for EKS trustPolicy = { - 'Version': '2012-10-17', - 'Statement': [ + "Version": "2012-10-17", + "Statement": [ { - 'Effect': 'Allow', - 'Principal': { - 'Service': 'eks.amazonaws.com' - }, - 'Action': 'sts:AssumeRole' + "Effect": "Allow", + "Principal": {"Service": "eks.amazonaws.com"}, + "Action": "sts:AssumeRole", } - ] + ], } try: r = iam.create_role( - Path='/', + Path="/", RoleName=cluster_role_name, AssumeRolePolicyDocument=json.dumps(trustPolicy), - Description='Allows access to other AWS service resources that are required to operate clusters managed by EKS', + Description="Allows access to other AWS service resources that are required to operate clusters managed by EKS", MaxSessionDuration=3600, Tags=[ - { - 'Key': 'Name', - 'Value': cluster_role_name - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {"Key": "Name", "Value": cluster_role_name}, + {"Key": "CreatedBy", "Value": createdBy}, + {"Key": "CreatedAt", "Value": createdAt}, + {"Key": "CreatedWith", "Value": "Lightspin ECE"}, + ], ) # Attach required Cluster Policy (AWS Managed) or get following error # botocore.errorfactory.InvalidParameterException: An error occurred (InvalidParameterException) when calling the CreateCluster operation: The provided role doesn't have the Amazon EKS Managed Policies associated with it. Please ensure the following policies [arn:aws:iam::aws:policy/AmazonEKSClusterPolicy] are attached - waiter = iam.get_waiter('role_exists') + waiter = iam.get_waiter("role_exists") - waiter.wait( - RoleName=cluster_role_name, - WaiterConfig={ - 'Delay': 3, - 'MaxAttempts': 20 - } - ) + waiter.wait(RoleName=cluster_role_name, WaiterConfig={"Delay": 3, "MaxAttempts": 20}) iam.attach_role_policy( RoleName=cluster_role_name, - PolicyArn='arn:aws:iam::aws:policy/AmazonEKSClusterPolicy' + PolicyArn="arn:aws:iam::aws:policy/AmazonEKSClusterPolicy", ) - roleArn = str(r['Role']['Arn']) + roleArn = str(r["Role"]["Arn"]) except botocore.exceptions.ClientError as error: # If we have an 'EntityAlreadyExists' error it means a Role of the same name exists, we can try to use it instead - if error.response['Error']['Code'] == 'EntityAlreadyExists': - print(f'The supplied role name of {cluster_role_name} already exists, attempting to use it') - roleArn = f'arn:aws:iam::{acctId}:role/{cluster_role_name}' + if error.response["Error"]["Code"] == "EntityAlreadyExists": + print( + f"The supplied role name of {cluster_role_name} already exists, attempting to use it" + ) + roleArn = f"arn:aws:iam::{acctId}:role/{cluster_role_name}" else: - print(f'Error encountered: {error}') + print(f"Error encountered: {error}") RollbackManager.rollback_from_cache(cache=cache) except botocore.exceptions.WaiterError as we: - print(f'Error encountered: {we}') + print(f"Error encountered: {we}") RollbackManager.rollback_from_cache(cache=cache) del iam @@ -166,75 +154,62 @@ def create_cluster_svc_role(cluster_role_name): del acctId del trustPolicy - print(f'Your cluster role ARN is {roleArn}') + print(f"Your cluster role ARN is {roleArn}") return roleArn def create_managed_nodegroup_s3_policy(bucket_name, nodegroup_role_name): - ''' + """ Creates an IAM Policy that allows S3 GetObject permissions for use in the Nodegroup Role - ''' - iam = boto3.client('iam') - sts = boto3.client('sts') - acctId = sts.get_caller_identity()['Account'] + """ + iam = boto3.client("iam") + sts = boto3.client("sts") + acctId = sts.get_caller_identity()["Account"] # Use STS GetCallerIdentity and Datetime to generate CreatedBy and CreatedAt information for tagging - createdBy = str(sts.get_caller_identity()['Arn']) + createdBy = str(sts.get_caller_identity()["Arn"]) createdAt = str(datetime.utcnow()) - policyName = f'{nodegroup_role_name}Policy' + policyName = f"{nodegroup_role_name}Policy" iamPolicyDoc = { - 'Version': '2012-10-17', - 'Statement': [ + "Version": "2012-10-17", + "Statement": [ { - 'Sid': 'GetObjectSid', - 'Effect': 'Allow', - 'Action': [ - 's3:GetObjectAcl', - 's3:GetObject', - 's3:GetBucketAcl', - 's3:GetBucketLocation' + "Sid": "GetObjectSid", + "Effect": "Allow", + "Action": [ + "s3:GetObjectAcl", + "s3:GetObject", + "s3:GetBucketAcl", + "s3:GetBucketLocation", ], - 'Resource': [ - f'arn:aws:s3:::{bucket_name}/*', - f'arn:aws:s3:::{bucket_name}' - ] + "Resource": [f"arn:aws:s3:::{bucket_name}/*", f"arn:aws:s3:::{bucket_name}"], } - ] + ], } try: r = iam.create_policy( PolicyName=policyName, - Path='/', + Path="/", PolicyDocument=json.dumps(iamPolicyDoc), - Description='Allows access to specific S3 buckets for node groups managed by EKS - Created by Lightspin ECE', + Description="Allows access to specific S3 buckets for node groups managed by EKS - Created by Lightspin ECE", Tags=[ - { - 'Key': 'Name', - 'Value': policyName - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {"Key": "Name", "Value": policyName}, + {"Key": "CreatedBy", "Value": createdBy}, + {"Key": "CreatedAt", "Value": createdAt}, + {"Key": "CreatedWith", "Value": "Lightspin ECE"}, + ], ) - policyArn = str(r['Policy']['Arn']) + policyArn = str(r["Policy"]["Arn"]) except botocore.exceptions.ClientError as error: # If we have an 'EntityAlreadyExists' error it means a Role of the same name exists, we can try to use it instead # we will assume it has the right permissions after all - if error.response['Error']['Code'] == 'EntityAlreadyExists': - print(f'The supplied role policy name of {policyName} already exists, attempting to use it') - policyArn = f'arn:aws:iam::{acctId}:policy/{policyName}' + if error.response["Error"]["Code"] == "EntityAlreadyExists": + print( + f"The supplied role policy name of {policyName} already exists, attempting to use it" + ) + policyArn = f"arn:aws:iam::{acctId}:policy/{policyName}" else: - print(f'Error encountered: {error}') + print(f"Error encountered: {error}") RollbackManager.rollback_from_cache(cache=cache) del iam @@ -243,110 +218,89 @@ def create_managed_nodegroup_s3_policy(bucket_name, nodegroup_role_name): del iamPolicyDoc del policyName - print(f'Your node group role policy ARN is {policyArn}') + print(f"Your node group role policy ARN is {policyArn}") return policyArn def create_managed_nodegroup_role(bucket_name, nodegroup_role_name, mde_on_nodes): - ''' + """ This function creates a Nodegroup Service Role for EKS, which gives Nodes permissions to interact with AWS APIs. This function calls the `create_managed_nodegroup_s3_policy` function and passes the S3 Bucket name specified in main.py to allow your Nodegroup Role to communicate with the S3 bucket for bootstrapping purposes - ''' - iam = boto3.client('iam') - sts = boto3.client('sts') - acctId = sts.get_caller_identity()['Account'] + """ + iam = boto3.client("iam") + sts = boto3.client("sts") + acctId = sts.get_caller_identity()["Account"] roleName = nodegroup_role_name # Use STS GetCallerIdentity and Datetime to generate CreatedBy and CreatedAt information for tagging - createdBy = str(sts.get_caller_identity()['Arn']) + createdBy = str(sts.get_caller_identity()["Arn"]) createdAt = str(datetime.utcnow()) # Static list of required AWS Managed Policies for EKS Managed Nodegroup Roles # Adding SSM for SSM access as SSH Keypairs are not specified nodegroupAwsManagedPolicies = [ - 'arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy', - 'arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly', - 'arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy', - 'arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore' + "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy", + "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly", + "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy", + "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore", ] # Grab S3 Node Group policy from other Function & add to List if MDE is enabled - if mde_on_nodes == 'True': - s3PolicyArn = ClusterManager.create_managed_nodegroup_s3_policy(bucket_name, nodegroup_role_name) + if mde_on_nodes == "True": + s3PolicyArn = ClusterManager.create_managed_nodegroup_s3_policy( + bucket_name, nodegroup_role_name + ) nodegroupAwsManagedPolicies.append(s3PolicyArn) # Trust Policy for EKS NodeGroup Role trusts EC2 trustPolicy = { - 'Version': '2012-10-17', - 'Statement': [ + "Version": "2012-10-17", + "Statement": [ { - 'Effect': 'Allow', - 'Principal': { - 'Service': 'ec2.amazonaws.com' - }, - 'Action': 'sts:AssumeRole' + "Effect": "Allow", + "Principal": {"Service": "ec2.amazonaws.com"}, + "Action": "sts:AssumeRole", } - ] + ], } try: r = iam.create_role( - Path='/', + Path="/", RoleName=roleName, AssumeRolePolicyDocument=json.dumps(trustPolicy), - Description='Allows access to other AWS service resources that are required to operate node groups managed by EKS', + Description="Allows access to other AWS service resources that are required to operate node groups managed by EKS", MaxSessionDuration=3600, Tags=[ - { - 'Key': 'Name', - 'Value': roleName - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {"Key": "Name", "Value": roleName}, + {"Key": "CreatedBy", "Value": createdBy}, + {"Key": "CreatedAt", "Value": createdAt}, + {"Key": "CreatedWith", "Value": "Lightspin ECE"}, + ], ) - roleArn = str(r['Role']['Arn']) + roleArn = str(r["Role"]["Arn"]) - waiter = iam.get_waiter('role_exists') - waiter.wait( - RoleName=roleName, - WaiterConfig={ - 'Delay': 3, - 'MaxAttempts': 20 - } - ) + waiter = iam.get_waiter("role_exists") + waiter.wait(RoleName=roleName, WaiterConfig={"Delay": 3, "MaxAttempts": 20}) except botocore.exceptions.ClientError as error: # If we have an 'EntityAlreadyExists' error it means a Role of the same name exists, we can try to use it instead # we will assume it has the right permissions after all - if error.response['Error']['Code'] == 'EntityAlreadyExists': - print(f'The supplied role name of {roleName} already exists, attempting to use it') - roleArn = f'arn:aws:iam::{acctId}:role/{roleName}' + if error.response["Error"]["Code"] == "EntityAlreadyExists": + print(f"The supplied role name of {roleName} already exists, attempting to use it") + roleArn = f"arn:aws:iam::{acctId}:role/{roleName}" else: - print(f'Error encountered: {error}') + print(f"Error encountered: {error}") RollbackManager.rollback_from_cache(cache=cache) except botocore.exceptions.WaiterError as we: - print(f'Error encountered: {we}') + print(f"Error encountered: {we}") RollbackManager.rollback_from_cache(cache=cache) # Loop through List of policies and attach Policies to Role, handle errors if already attached try: for policy in nodegroupAwsManagedPolicies: - iam.attach_role_policy( - RoleName=roleName, - PolicyArn=policy - ) + iam.attach_role_policy(RoleName=roleName, PolicyArn=policy) except Exception as e: - print(f'Error encountered: {e}') + print(f"Error encountered: {e}") RollbackManager.rollback_from_cache(cache=cache) del iam @@ -355,28 +309,28 @@ def create_managed_nodegroup_role(bucket_name, nodegroup_role_name, mde_on_nodes del trustPolicy del roleName - print(f'Your node group role ARN is {roleArn}') + print(f"Your node group role ARN is {roleArn}") return roleArn def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): - ''' + """ This function creates a minimum necessary Security Group for your EKS Cluster based on AWS reccomendations https://docs.aws.amazon.com/eks/latest/userguide/sec-group-reqs.html this will also add permissions to ports TCP 2801 and TCP 8765 for FalcoSidekick and Falco Security, respectively, for At-Create or later configuration of Falco in a Cluster which provides real-time protection and event forwarding - ''' + """ - ec2 = boto3.client('ec2') - sts = boto3.client('sts') + ec2 = boto3.client("ec2") + sts = boto3.client("sts") - print(f'Setting up a Security Group for VPC {vpc_id} for EKS Cluster {cluster_name}') + print(f"Setting up a Security Group for VPC {vpc_id} for EKS Cluster {cluster_name}") # Use STS GetCallerIdentity and Datetime to generate CreatedBy and CreatedAt information for tagging - createdBy = str(sts.get_caller_identity()['Arn']) + createdBy = str(sts.get_caller_identity()["Arn"]) createdAt = str(datetime.utcnow()) # Generate SG Name, passed to the create_security_group() method, and used for general messaging - sgName = str(f'{cluster_name}ClusterSG') + sgName = str(f"{cluster_name}ClusterSG") # Load constants of ports needed reccomended by AWS and needed by Falco/Falco Sidekick defaultPortSet = [53, 443, 2801, 8765, 10250] @@ -385,7 +339,7 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): for p in additional_ports: if int(p) not in defaultPortSet: defaultPortSet.append(int(p)) - + # remove the list, it's not needed anymore del additional_ports @@ -394,67 +348,52 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): # Get CIDR information on the VPC try: - r = ec2.describe_vpcs(VpcIds=[vpc_id])['Vpcs'][0] - vpcMainCidr = str(r['CidrBlock']) + r = ec2.describe_vpcs(VpcIds=[vpc_id])["Vpcs"][0] + vpcMainCidr = str(r["CidrBlock"]) allVpcCidrs.append(vpcMainCidr) # Loop additional CIDRs if they exist and are associated - for cidr in r['CidrBlockAssociationSet']: - if str(cidr['CidrBlockState']['State']) == 'associated': - if str(cidr['CidrBlock']) not in allVpcCidrs: - allVpcCidrs.append(str(cidr['CidrBlock'])) + for cidr in r["CidrBlockAssociationSet"]: + if str(cidr["CidrBlockState"]["State"]) == "associated": + if str(cidr["CidrBlock"]) not in allVpcCidrs: + allVpcCidrs.append(str(cidr["CidrBlock"])) except KeyError as ke: - print(f'Error encountered: {ke}') + print(f"Error encountered: {ke}") RollbackManager.rollback_from_cache(cache=cache) except botocore.exceptions.ClientError as error: - print(f'Error encountered: {error}') + print(f"Error encountered: {error}") RollbackManager.rollback_from_cache(cache=cache) # All CIDRs collected and ports consolidated, Security Group creation starts now try: r = ec2.create_security_group( - Description=f'Security Group for EKS Cluster {cluster_name} - Created by {createdBy} using Lightspin ECE', + Description=f"Security Group for EKS Cluster {cluster_name} - Created by {createdBy} using Lightspin ECE", GroupName=sgName, VpcId=vpc_id, TagSpecifications=[ { - 'ResourceType': 'security-group', - 'Tags': [ - { - 'Key': 'Name', - 'Value': sgName - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - }, + "ResourceType": "security-group", + "Tags": [ + {"Key": "Name", "Value": sgName}, + {"Key": "CreatedBy", "Value": createdBy}, + {"Key": "CreatedAt", "Value": createdAt}, + {"Key": "CreatedWith", "Value": "Lightspin ECE"}, # This tag is required per AWS Docs # One, and only one, of the security groups associated to your nodes should have the following tag applied: For more information about tagging, see Working with tags using the console. kubernetes.io/cluster/cluster-name: owned - { - 'Key': f'kubernetes.io/cluster/{cluster_name}', - 'Value': 'owned' - } - ] + {"Key": f"kubernetes.io/cluster/{cluster_name}", "Value": "owned"}, + ], } - ] + ], ) - secGroupId = str(r['GroupId']) + secGroupId = str(r["GroupId"]) - sgCache = { - 'ClusterSecurityGroupId': secGroupId - } + sgCache = {"ClusterSecurityGroupId": secGroupId} cache.append(sgCache) - print(f'Added {sgName} ID {secGroupId} to Cache') - print(f'Authorizing ingress for Ports {defaultPortSet} for CIDRS {allVpcCidrs} for {sgName}') + print(f"Added {sgName} ID {secGroupId} to Cache") + print( + f"Authorizing ingress for Ports {defaultPortSet} for CIDRS {allVpcCidrs} for {sgName}" + ) # Now start adding Inbound Rules per CIDR and per Port # Add conditional logic for port 53 (DNS) to create both TCP and UDP Rules @@ -465,91 +404,67 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): GroupId=secGroupId, IpPermissions=[ { - 'FromPort': int(port), - 'ToPort': int(port), - 'IpProtocol': 'tcp', - 'IpRanges': [ + "FromPort": int(port), + "ToPort": int(port), + "IpProtocol": "tcp", + "IpRanges": [ { - 'CidrIp': cidr, - 'Description': f'Allow tcp {port} to {cidr}' + "CidrIp": cidr, + "Description": f"Allow tcp {port} to {cidr}", } - ] + ], }, { - 'FromPort': int(port), - 'ToPort': int(port), - 'IpProtocol': 'udp', - 'IpRanges': [ + "FromPort": int(port), + "ToPort": int(port), + "IpProtocol": "udp", + "IpRanges": [ { - 'CidrIp': cidr, - 'Description': f'Allow udp {port} to {cidr}' + "CidrIp": cidr, + "Description": f"Allow udp {port} to {cidr}", } - ] - } + ], + }, ], TagSpecifications=[ { - 'ResourceType': 'security-group-rule', - 'Tags': [ - { - 'Key': 'Name', - 'Value': f'{sgName}{cidr}{port}' - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + "ResourceType": "security-group-rule", + "Tags": [ + {"Key": "Name", "Value": f"{sgName}{cidr}{port}"}, + {"Key": "CreatedBy", "Value": createdBy}, + {"Key": "CreatedAt", "Value": createdAt}, + {"Key": "CreatedWith", "Value": "Lightspin ECE"}, + ], } - ] + ], ) else: ec2.authorize_security_group_ingress( GroupId=secGroupId, IpPermissions=[ { - 'FromPort': int(port), - 'ToPort': int(port), - 'IpProtocol': 'tcp', - 'IpRanges': [ + "FromPort": int(port), + "ToPort": int(port), + "IpProtocol": "tcp", + "IpRanges": [ { - 'CidrIp': cidr, - 'Description': f'Allow tcp {port} to {cidr}' + "CidrIp": cidr, + "Description": f"Allow tcp {port} to {cidr}", } - ] + ], } ], TagSpecifications=[ { - 'ResourceType': 'security-group-rule', - 'Tags': [ - { - 'Key': 'Name', - 'Value': f'{sgName}{cidr}{port}' - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + "ResourceType": "security-group-rule", + "Tags": [ + {"Key": "Name", "Value": f"{sgName}{cidr}{port}"}, + {"Key": "CreatedBy", "Value": createdBy}, + {"Key": "CreatedAt", "Value": createdAt}, + {"Key": "CreatedWith", "Value": "Lightspin ECE"}, + ], } - ] + ], ) # Adding inbound rules per Port for the Security Group itself (talk to self for Node-Cluster Comms) @@ -559,91 +474,67 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): GroupId=secGroupId, IpPermissions=[ { - 'FromPort': int(port), - 'ToPort': int(port), - 'IpProtocol': 'tcp', - 'UserIdGroupPairs': [ + "FromPort": int(port), + "ToPort": int(port), + "IpProtocol": "tcp", + "UserIdGroupPairs": [ { - 'Description': f'Allow tcp {port} to {secGroupId}', - 'GroupId': secGroupId + "Description": f"Allow tcp {port} to {secGroupId}", + "GroupId": secGroupId, } - ] + ], }, { - 'FromPort': int(port), - 'ToPort': int(port), - 'IpProtocol': 'udp', - 'UserIdGroupPairs': [ + "FromPort": int(port), + "ToPort": int(port), + "IpProtocol": "udp", + "UserIdGroupPairs": [ { - 'Description': f'Allow udp {port} to {secGroupId}', - 'GroupId': secGroupId + "Description": f"Allow udp {port} to {secGroupId}", + "GroupId": secGroupId, } - ] - } + ], + }, ], TagSpecifications=[ { - 'ResourceType': 'security-group-rule', - 'Tags': [ - { - 'Key': 'Name', - 'Value': f'{sgName}{secGroupId}{port}' - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + "ResourceType": "security-group-rule", + "Tags": [ + {"Key": "Name", "Value": f"{sgName}{secGroupId}{port}"}, + {"Key": "CreatedBy", "Value": createdBy}, + {"Key": "CreatedAt", "Value": createdAt}, + {"Key": "CreatedWith", "Value": "Lightspin ECE"}, + ], } - ] + ], ) else: ec2.authorize_security_group_ingress( GroupId=secGroupId, IpPermissions=[ { - 'FromPort': int(port), - 'ToPort': int(port), - 'IpProtocol': 'tcp', - 'UserIdGroupPairs': [ + "FromPort": int(port), + "ToPort": int(port), + "IpProtocol": "tcp", + "UserIdGroupPairs": [ { - 'Description': f'Allow tcp {port} to {secGroupId}', - 'GroupId': secGroupId + "Description": f"Allow tcp {port} to {secGroupId}", + "GroupId": secGroupId, } - ] + ], } ], TagSpecifications=[ { - 'ResourceType': 'security-group-rule', - 'Tags': [ - { - 'Key': 'Name', - 'Value': f'{sgName}{secGroupId}{port}' - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + "ResourceType": "security-group-rule", + "Tags": [ + {"Key": "Name", "Value": f"{sgName}{secGroupId}{port}"}, + {"Key": "CreatedBy", "Value": createdBy}, + {"Key": "CreatedAt", "Value": createdAt}, + {"Key": "CreatedWith", "Value": "Lightspin ECE"}, + ], } - ] + ], ) # Adding TCP 443 (HTTPS) from the internet which is required for patching and agent communications @@ -651,120 +542,97 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): GroupId=secGroupId, IpPermissions=[ { - 'FromPort': 443, - 'ToPort': 443, - 'IpProtocol': 'tcp', - 'IpRanges': [ - { - 'CidrIp': '0.0.0.0/0', - 'Description': f'Allow tcp 443 to Internet' - } - ] + "FromPort": 443, + "ToPort": 443, + "IpProtocol": "tcp", + "IpRanges": [ + {"CidrIp": "0.0.0.0/0", "Description": f"Allow tcp 443 to Internet"} + ], } ], TagSpecifications=[ { - 'ResourceType': 'security-group-rule', - 'Tags': [ - { - 'Key': 'Name', - 'Value': f'{sgName}Internet{port}' - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + "ResourceType": "security-group-rule", + "Tags": [ + {"Key": "Name", "Value": f"{sgName}Internet{port}"}, + {"Key": "CreatedBy", "Value": createdBy}, + {"Key": "CreatedAt", "Value": createdAt}, + {"Key": "CreatedWith", "Value": "Lightspin ECE"}, + ], } - ] + ], ) except botocore.exceptions.ClientError as error: - print(f'Error encountered: {error}') + print(f"Error encountered: {error}") RollbackManager.rollback_from_cache(cache=cache) - print(f'Finished creating {sgName} and adding all required Rule Authorizations') + print(f"Finished creating {sgName} and adding all required Rule Authorizations") return secGroupId def encryption_key_factory(cluster_name): - ''' + """ This function is responsible for creating a KMS Key to use with EKS Secrets Envelope Encryption as well as Nodegroup (EC2) EBS Encryption we will attach a proper Key Policy later - ''' - kms = boto3.client('kms') - sts = boto3.client('sts') + """ + kms = boto3.client("kms") + sts = boto3.client("sts") # Use STS GetCallerIdentity and Datetime to generate CreatedBy and CreatedAt information for tagging # STS is also used for the Account ID to interpolate ARNs which will be created later - createdBy = str(sts.get_caller_identity()['Arn']) + createdBy = str(sts.get_caller_identity()["Arn"]) createdAt = str(datetime.utcnow()) - print(f'Creating KMS CMK for encryption operations') + print(f"Creating KMS CMK for encryption operations") # The first time we create the Key we must not attach a policy as the Roles we need to give permission to do not exist yet (nodegroup & cluster IAM role) # it will attach a default policy that allows our entire AWS Account access - this is good so we can override it later try: kmsKeyArn = kms.create_key( - Description=f'Used for EKS Envelope Encryption and EBS Volume Encryption for EKS Cluster {cluster_name} - Created by Lightspin ECE', + Description=f"Used for EKS Envelope Encryption and EBS Volume Encryption for EKS Cluster {cluster_name} - Created by Lightspin ECE", # Default values for AES-256/GCM Keys. Being verbose in case AWS ever changes the default values of these - KeyUsage='ENCRYPT_DECRYPT', - KeySpec='SYMMETRIC_DEFAULT', - Origin='AWS_KMS', + KeyUsage="ENCRYPT_DECRYPT", + KeySpec="SYMMETRIC_DEFAULT", + Origin="AWS_KMS", Tags=[ - { - 'TagKey': 'Name', - 'TagValue': f'{cluster_name}-EKS-CMK' - }, - { - 'TagKey': 'CreatedBy', - 'TagValue': createdBy - }, - { - 'TagKey': 'CreatedAt', - 'TagValue': createdAt - }, - { - 'TagKey': 'CreatedWith', - 'TagValue': 'Lightspin ECE' - } - ] - )['KeyMetadata']['Arn'] + {"TagKey": "Name", "TagValue": f"{cluster_name}-EKS-CMK"}, + {"TagKey": "CreatedBy", "TagValue": createdBy}, + {"TagKey": "CreatedAt", "TagValue": createdAt}, + {"TagKey": "CreatedWith", "TagValue": "Lightspin ECE"}, + ], + )["KeyMetadata"]["Arn"] except KeyError as ke: - print(f'Error encountered: {ke}') + print(f"Error encountered: {ke}") RollbackManager.rollback_from_cache(cache=cache) except botocore.exceptions.ParamValidationError as pe: - print(f'Error encountered: {pe}') + print(f"Error encountered: {pe}") RollbackManager.rollback_from_cache(cache=cache) except botocore.exceptions.ClientError as error: - print(f'Error encountered: {error}') + print(f"Error encountered: {error}") RollbackManager.rollback_from_cache(cache=cache) return kmsKeyArn - - def create_cluster(cluster_name, kubernetes_version, cluster_role_name, subnet_ids, vpc_id, additional_ports): - ''' + + def create_cluster( + cluster_name, kubernetes_version, cluster_role_name, subnet_ids, vpc_id, additional_ports + ): + """ This function uses the EKS Boto3 Client to create a cluster, taking inputs from main.py to determing naming & Encryption - ''' - eks = boto3.client('eks') - sts = boto3.client('sts') + """ + eks = boto3.client("eks") + sts = boto3.client("sts") # Use STS GetCallerIdentity and Datetime to generate CreatedBy and CreatedAt information for tagging - createdBy = str(sts.get_caller_identity()['Arn']) + createdBy = str(sts.get_caller_identity()["Arn"]) createdAt = str(datetime.utcnow()) # Call `create_cluster_svc_role` to create or re-use the EKS cluster service IAM role clusterRoleArn = ClusterManager.create_cluster_svc_role(cluster_role_name) # Call `cluster_security_group_factory` to create or re-use an EKS cluster security group that allows minimum necessary comms intra-VPC - securityGroupId = ClusterManager.cluster_security_group_factory(cluster_name, vpc_id, additional_ports) + securityGroupId = ClusterManager.cluster_security_group_factory( + cluster_name, vpc_id, additional_ports + ) # Call `encryption_key_factory` to create a KMS Key ARN. Simple! (We'll add the Key Policy later) kmsKeyArn = ClusterManager.encryption_key_factory(cluster_name) @@ -776,60 +644,51 @@ def create_cluster(cluster_name, kubernetes_version, cluster_role_name, subnet_i version=str(kubernetes_version), roleArn=clusterRoleArn, resourcesVpcConfig={ - 'subnetIds': subnet_ids, - 'securityGroupIds': [securityGroupId], - 'endpointPublicAccess': False, - 'endpointPrivateAccess': True + "subnetIds": subnet_ids, + "securityGroupIds": [securityGroupId], + "endpointPublicAccess": False, + "endpointPrivateAccess": True, }, logging={ - 'clusterLogging': [ - { + "clusterLogging": [ + { # all Logging types are enabled here - 'types': ['api','audit','authenticator','controllerManager','scheduler'], - 'enabled': True + "types": [ + "api", + "audit", + "authenticator", + "controllerManager", + "scheduler", + ], + "enabled": True, } ] }, - encryptionConfig=[ - { - 'resources': [ - 'secrets' - ], - 'provider': { - 'keyArn': kmsKeyArn - } - } - ], + encryptionConfig=[{"resources": ["secrets"], "provider": {"keyArn": kmsKeyArn}}], tags={ - 'Name': cluster_name, - 'CreatedBy': createdBy, - 'CreatedAt': createdAt, - 'CreatedWith': 'Lightspin ECE' - } + "Name": cluster_name, + "CreatedBy": createdBy, + "CreatedAt": createdAt, + "CreatedWith": "Lightspin ECE", + }, ) # Establish provided EKS Waiter() for cluster to come up # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/eks.html#EKS.Waiter.ClusterActive - print(f'Waiting for your Cluster to come online') + print(f"Waiting for your Cluster to come online") - waiter = eks.get_waiter('cluster_active') + waiter = eks.get_waiter("cluster_active") - waiter.wait( - name=cluster_name, - WaiterConfig={ - 'Delay': 30, - 'MaxAttempts': 40 - } - ) + waiter.wait(name=cluster_name, WaiterConfig={"Delay": 30, "MaxAttempts": 40}) - finalClusterName = str(r['cluster']['name']) + finalClusterName = str(r["cluster"]["name"]) - print(f'EKS Cluster {finalClusterName} is now live') + print(f"EKS Cluster {finalClusterName} is now live") except botocore.exceptions.ClientError as error: - print(f'Error encountered: {error}') + print(f"Error encountered: {error}") RollbackManager.rollback_from_cache(cache=cache) except botocore.exceptions.WaiterError as we: - print(f'Error encountered: {we}') + print(f"Error encountered: {we}") RollbackManager.rollback_from_cache(cache=cache) del eks @@ -842,7 +701,7 @@ def create_cluster(cluster_name, kubernetes_version, cluster_role_name, subnet_i return finalClusterName, securityGroupId, kmsKeyArn, clusterRoleArn def generate_nodegroup_bootstrap(bucket_name, cluster_name, mde_on_nodes, ami_os): - ''' + """ This function generates EC2 UserData (in Base64) to be passed to the `create_launch_template` Function for creating a custom launch template that uses custom AMIs passed in main.py or defaults to the EKS-optimized AMI for Ubuntu 20.04LTS corresponding to the K8s verson used. This function parses the S3 Bucket from main.py which stores the MDE activation scripts, if that is configured. @@ -852,24 +711,26 @@ def generate_nodegroup_bootstrap(bucket_name, cluster_name, mde_on_nodes, ami_os Details: https://aws.amazon.com/blogs/containers/introducing-launch-template-and-custom-ami-support-in-amazon-eks-managed-node-groups/ WTF is `set -ex`? https://askubuntu.com/questions/346900/what-does-set-e-do - ''' - eks = boto3.client('eks') + """ + eks = boto3.client("eks") - print(f'Retrieving Certificate Authority and API Server URL information for bootstrap script') + print( + f"Retrieving Certificate Authority and API Server URL information for bootstrap script" + ) # DescribeCluster and pull necessary values to set as env vars within the bootstrap c = eks.describe_cluster(name=cluster_name) - eksApiServerUrl = str(c['cluster']['endpoint']) - eksB64ClusterCa = str(c['cluster']['certificateAuthority']['data']) + eksApiServerUrl = str(c["cluster"]["endpoint"]) + eksB64ClusterCa = str(c["cluster"]["certificateAuthority"]["data"]) # Support for IMDSv2 Tokens for reaching metadata service # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html#instance-metadata-ex-7 # MDE Installation Scripts: https://docs.microsoft.com/en-us/microsoft-365/security/defender-endpoint/linux-install-manually?view=o365-worldwide - if mde_on_nodes == 'True': + if mde_on_nodes == "True": # Ubuntu - if ami_os == 'ubuntu': - script = f'''#!/bin/bash + if ami_os == "ubuntu": + script = f"""#!/bin/bash set -ex B64_CLUSTER_CA={eksB64ClusterCa} API_SERVER_URL={eksApiServerUrl} @@ -891,10 +752,10 @@ def generate_nodegroup_bootstrap(bucket_name, cluster_name, mde_on_nodes, ami_os TOKEN=$(curl -X PUT 'http://169.254.169.254/latest/api/token' -H 'X-aws-ec2-metadata-token-ttl-seconds: 21600') INSTANCE_ID=$(curl -H 'X-aws-ec2-metadata-token: $TOKEN' -v http://169.254.169.254/latest/meta-data/instance-id) mdatp edr tag set --name GROUP --value $INSTANCE_ID - ''' + """ # Amazon Linux 2 else: - script = f'''#!/bin/bash + script = f"""#!/bin/bash set -ex B64_CLUSTER_CA={eksB64ClusterCa} API_SERVER_URL={eksApiServerUrl} @@ -912,31 +773,31 @@ def generate_nodegroup_bootstrap(bucket_name, cluster_name, mde_on_nodes, ami_os TOKEN=$(curl -X PUT 'http://169.254.169.254/latest/api/token' -H 'X-aws-ec2-metadata-token-ttl-seconds: 21600') INSTANCE_ID=$(curl -H 'X-aws-ec2-metadata-token: $TOKEN' -v http://169.254.169.254/latest/meta-data/instance-id) mdatp edr tag set --name GROUP --value $INSTANCE_ID - ''' + """ else: # No need for MDE in this one, create a regular script # Ubuntu - if ami_os == 'ubuntu': - script = f'''#!/bin/bash + if ami_os == "ubuntu": + script = f"""#!/bin/bash set -ex B64_CLUSTER_CA={eksB64ClusterCa} API_SERVER_URL={eksApiServerUrl} /etc/eks/bootstrap.sh {cluster_name} --b64-cluster-ca $B64_CLUSTER_CA --apiserver-endpoint $API_SERVER_URL apt update apt upgrade -y - ''' + """ # Amazon Linux 2 else: - script = f'''#!/bin/bash + script = f"""#!/bin/bash set -ex B64_CLUSTER_CA={eksB64ClusterCa} API_SERVER_URL={eksApiServerUrl} /etc/eks/bootstrap.sh {cluster_name} --b64-cluster-ca $B64_CLUSTER_CA --apiserver-endpoint $API_SERVER_URL yum update -y - ''' + """ # Base64 encode the bootstrap script - userData = base64.b64encode(script.encode()).decode('ascii') + userData = base64.b64encode(script.encode()).decode("ascii") del eks del c @@ -945,143 +806,167 @@ def generate_nodegroup_bootstrap(bucket_name, cluster_name, mde_on_nodes, ami_os return userData - def create_launch_template(cluster_name, kubernetes_version, ami_id, bucket_name, launch_template_name, kms_key_arn, securityGroupId, ebs_volume_size, instance_type, mde_on_nodes, ami_os, ami_architecture): - ''' + def create_launch_template( + cluster_name, + kubernetes_version, + ami_id, + bucket_name, + launch_template_name, + kms_key_arn, + securityGroupId, + ebs_volume_size, + instance_type, + mde_on_nodes, + ami_os, + ami_architecture, + ): + """ This function creates an EC2 Launch Template using encryption and AMI data supplied from main.py and passes it to the `builder` function where final EKS Nodegroup creation takes place - ''' + """ # This is for creating the Launch Template used by EKS to launch Managed Node Groups with a custom AMI & bootstrap script - ec2 = boto3.client('ec2') - sts = boto3.client('sts') + ec2 = boto3.client("ec2") + sts = boto3.client("sts") # Use STS GetCallerIdentity and Datetime to generate CreatedBy and CreatedAt information for tagging - createdBy = str(sts.get_caller_identity()['Arn']) + createdBy = str(sts.get_caller_identity()["Arn"]) createdAt = str(datetime.utcnow()) # Pull latest AMI ID for EKS-optimized Ubuntu 20.04LTS for specified K8s Version in main.py - amiId = ClusterManager.get_latest_eks_optimized_ubuntu(kubernetes_version, ami_id, ami_os, ami_architecture) + amiId = ClusterManager.get_latest_eks_optimized_ubuntu( + kubernetes_version, ami_id, ami_os, ami_architecture + ) # Retrieve Base64 metadata from bootstrap generation function - this will download and install MDE (MDATP) from files in the S3 bucket specified in main.py if --mde_on_nodes is true. Will use ami_os arguements to create different UserData as well - userData = ClusterManager.generate_nodegroup_bootstrap(bucket_name, cluster_name, mde_on_nodes, ami_os) + userData = ClusterManager.generate_nodegroup_bootstrap( + bucket_name, cluster_name, mde_on_nodes, ami_os + ) # For IMDSv2 - keeping this outside for eventual modification of hop limits? metadataOptions = { - 'HttpTokens': 'required', - 'HttpPutResponseHopLimit': 2, - 'HttpEndpoint': 'enabled' + "HttpTokens": "required", + "HttpPutResponseHopLimit": 2, + "HttpEndpoint": "enabled", } try: r = ec2.create_launch_template( DryRun=False, LaunchTemplateName=launch_template_name, - VersionDescription=f'Created by the EKS Creation Engine on {createdAt}', + VersionDescription=f"Created by the EKS Creation Engine on {createdAt}", LaunchTemplateData={ - 'EbsOptimized': False, - 'BlockDeviceMappings': [ + "EbsOptimized": False, + "BlockDeviceMappings": [ { - 'DeviceName': '/dev/sda1', - 'Ebs': { - 'Encrypted': True, - 'DeleteOnTermination': True, - 'KmsKeyId': kms_key_arn, - 'VolumeSize': int(ebs_volume_size), - 'VolumeType': 'gp2' - } + "DeviceName": "/dev/sda1", + "Ebs": { + "Encrypted": True, + "DeleteOnTermination": True, + "KmsKeyId": kms_key_arn, + "VolumeSize": int(ebs_volume_size), + "VolumeType": "gp2", + }, } ], - 'ImageId': amiId, - 'InstanceType': instance_type, - 'UserData': str(userData), - 'SecurityGroupIds': [securityGroupId], - 'MetadataOptions': metadataOptions, - 'TagSpecifications': [ + "ImageId": amiId, + "InstanceType": instance_type, + "UserData": str(userData), + "SecurityGroupIds": [securityGroupId], + "MetadataOptions": metadataOptions, + "TagSpecifications": [ { - 'ResourceType': 'instance', - 'Tags': [ - { - 'Key': 'Name', - 'Value': str(f'{launch_template_name}Node') - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + "ResourceType": "instance", + "Tags": [ + {"Key": "Name", "Value": str(f"{launch_template_name}Node")}, + {"Key": "CreatedBy", "Value": createdBy}, + {"Key": "CreatedAt", "Value": createdAt}, + {"Key": "CreatedWith", "Value": "Lightspin ECE"}, + ], }, { - 'ResourceType': 'volume', - 'Tags': [ - { - 'Key': 'Name', - 'Value': str(f'{launch_template_name}Node') - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] - } - ] - } + "ResourceType": "volume", + "Tags": [ + {"Key": "Name", "Value": str(f"{launch_template_name}Node")}, + {"Key": "CreatedBy", "Value": createdBy}, + {"Key": "CreatedAt", "Value": createdAt}, + {"Key": "CreatedWith", "Value": "Lightspin ECE"}, + ], + }, + ], + }, ) - launchTemplateId = str(r['LaunchTemplate']['LaunchTemplateId']) + launchTemplateId = str(r["LaunchTemplate"]["LaunchTemplateId"]) except botocore.exceptions.ClientError as error: - print(f'Error encountered: {error}') + print(f"Error encountered: {error}") RollbackManager.rollback_from_cache(cache=cache) except Exception as e: - print(f'Error encountered: {e}') + print(f"Error encountered: {e}") RollbackManager.rollback_from_cache(cache=cache) return launchTemplateId - - def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_type, cluster_name, cluster_role_name, nodegroup_name, nodegroup_role_name, launch_template_name, vpc_id, subnet_ids, node_count, mde_on_nodes, additional_ports, falco_bool, falco_sidekick_destination_type, falco_sidekick_destination, ami_os, ami_architecture, datadog_api_key, datadog_bool, addtl_auth_principals): - ''' + + def builder( + kubernetes_version, + bucket_name, + ebs_volume_size, + ami_id, + instance_type, + cluster_name, + cluster_role_name, + nodegroup_name, + nodegroup_role_name, + launch_template_name, + vpc_id, + subnet_ids, + node_count, + mde_on_nodes, + additional_ports, + falco_bool, + falco_sidekick_destination_type, + falco_sidekick_destination, + ami_os, + ami_architecture, + datadog_api_key, + datadog_bool, + addtl_auth_principals, + ): + """ This function is the 'brain' that controls creation and calls the required functions to build infrastructure and services (EKS, EC2, IAM). This function also stores all required arguments into cache to facilitate rollbacks upon errors - ''' + """ # Write argument variables that are directly used for infrastructure creation to cache # Assemble names for Security Groups (these will be replicated everywhere but not passed around to minimize **kwargs bloat) cacheDict = { - 'ClusterName': cluster_name, - 'ClusterRoleName': cluster_role_name, - 'NodegroupName': nodegroup_name, - 'NodegroupRoleName': nodegroup_role_name, - 'LaunchTemplateName': launch_template_name + "ClusterName": cluster_name, + "ClusterRoleName": cluster_role_name, + "NodegroupName": nodegroup_name, + "NodegroupRoleName": nodegroup_role_name, + "LaunchTemplateName": launch_template_name, } cache.append(cacheDict) - print(f'Cache loaded with necessary rollback variables.') + print(f"Cache loaded with necessary rollback variables.") - eks = boto3.client('eks') - sts = boto3.client('sts') - kms = boto3.client('kms') - iam = boto3.client('iam') + eks = boto3.client("eks") + sts = boto3.client("sts") + kms = boto3.client("kms") + iam = boto3.client("iam") # Use STS GetCallerIdentity and Datetime to generate CreatedBy and CreatedAt information for tagging - createdBy = str(sts.get_caller_identity()['Arn']) + createdBy = str(sts.get_caller_identity()["Arn"]) createdAt = str(datetime.utcnow()) - acctId = str(sts.get_caller_identity()['Account']) + acctId = str(sts.get_caller_identity()["Account"]) # Create an EKS Cluster by calling `create_cluster` - this will take the longest, and if it fails, then other infrastructure won't be created # the positional selectors are for when you return multiple values, they are bundled in a tuple, and have to be accessed in the order they're provided - callClusterManager = ClusterManager.create_cluster(cluster_name, kubernetes_version, cluster_role_name, subnet_ids, vpc_id, additional_ports) + callClusterManager = ClusterManager.create_cluster( + cluster_name, + kubernetes_version, + cluster_role_name, + subnet_ids, + vpc_id, + additional_ports, + ) clusterName = callClusterManager[0] securityGroupId = callClusterManager[1] kms_key_arn = callClusterManager[2] @@ -1089,7 +974,9 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t # Passes the S3 Bucket name to the `create_managed_nodegroup_role` function which in turn passes it to the `create_managed_nodegroup_s3_policy` # function which allows your Nodegroups to pull artifacts from S3 as part of bootstrapping - nodegroupRoleArn = ClusterManager.create_managed_nodegroup_role(bucket_name, nodegroup_role_name, mde_on_nodes) + nodegroupRoleArn = ClusterManager.create_managed_nodegroup_role( + bucket_name, nodegroup_role_name, mde_on_nodes + ) # Now we can attach our proper Key Policy to the KMS Key since we now have all Roles ready @@ -1100,14 +987,16 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t # So this tries to create an ARN of the IAM Role you assumed (or transparently assumed if you are using WorkSpaces, Cloud9 or SSM Hybrid Activations or otherwise) # It is extremely stupid...YMMV for deletion # arn:aws:sts::ACCOUNT_ID:assumed-role/ROLE_NAME/ROLE_SESSION_NAME - seshRoleRegex = re.compile('assumed-role') + seshRoleRegex = re.compile("assumed-role") seshRoleCheck = seshRoleRegex.search(createdBy) # On match to Regex do stupid stuff >:( if seshRoleCheck: - print(f'Your ARN from STS AssumeRole {createdBy} matches a temporary Session ARN, attempting to find your upstream IAM Role') - roleNameSplit = createdBy.split('/')[1] - createdByRoleArn = f'arn:aws:iam::{acctId}:role/{roleNameSplit}' - print(f'Your Role ARN upstream to your session was determined as {createdByRoleArn}') + print( + f"Your ARN from STS AssumeRole {createdBy} matches a temporary Session ARN, attempting to find your upstream IAM Role" + ) + roleNameSplit = createdBy.split("/")[1] + createdByRoleArn = f"arn:aws:iam::{acctId}:role/{roleNameSplit}" + print(f"Your Role ARN upstream to your session was determined as {createdByRoleArn}") else: # If you're not an assumed Role you're just a User or a Role and should be fine?? createdByRoleArn = createdBy @@ -1115,14 +1004,17 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t # Setup a modified version of the Default KMS Policy, eliminating some Conditional statements to allow Autoscaling, EKS, and EC2 to use the key and set Grants # First, attempt to create the SLR for the Autoscaling group if it does not exist, see: https://docs.aws.amazon.com/IAM/latest/UserGuide/using-service-linked-roles.html try: - r = iam.create_service_linked_role(AWSServiceName='autoscaling.amazonaws.com') - slrRole = str(r['Role']['RoleName']) - print(f'Created Service-linked Role for Autoscaling called {slrRole}') + r = iam.create_service_linked_role(AWSServiceName="autoscaling.amazonaws.com") + slrRole = str(r["Role"]["RoleName"]) + print(f"Created Service-linked Role for Autoscaling called {slrRole}") except Exception as e: - if str(e) == 'An error occurred (InvalidInput) when calling the CreateServiceLinkedRole operation: Service role name AWSServiceRoleForAutoScaling has been taken in this account, please try a different suffix.': + if ( + str(e) + == "An error occurred (InvalidInput) when calling the CreateServiceLinkedRole operation: Service role name AWSServiceRoleForAutoScaling has been taken in this account, please try a different suffix." + ): pass else: - print(f'Error encountered: {e}') + print(f"Error encountered: {e}") RollbackManager.rollback_from_cache(cache=cache) # Then check if there are any additional authorized principals specified for the cluster to add to the below static list of principals @@ -1132,7 +1024,7 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t clusterRoleArn, nodegroupRoleArn, createdByRoleArn, - f'arn:aws:iam::{acctId}:role/aws-service-role/autoscaling.amazonaws.com/AWSServiceRoleForAutoScaling' + f"arn:aws:iam::{acctId}:role/aws-service-role/autoscaling.amazonaws.com/AWSServiceRoleForAutoScaling", ] # Check if additional AuthZ IAM Principals are even provided. If so, add them to the list if they're not there already @@ -1142,87 +1034,86 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t kmsAuthZPrincipals.append(arn) keyPolicyJson = { - 'Version':'2012-10-17', - 'Id':'ecekeypolicy', - 'Statement': [ + "Version": "2012-10-17", + "Id": "ecekeypolicy", + "Statement": [ # full key usage by whoever creates the key { - 'Sid': 'Key Creator Admin', - 'Effect': 'Allow', - 'Principal': { - 'AWS': createdByRoleArn - }, - 'Action':'kms:*', - 'Resource':'*' + "Sid": "Key Creator Admin", + "Effect": "Allow", + "Principal": {"AWS": createdByRoleArn}, + "Action": "kms:*", + "Resource": "*", }, # This allows usage of the key by the Cluster & Nodegroup and aws-managed service principals # Creator is added throughout as well # AWS Auto Scaling service role is added per: https://docs.aws.amazon.com/autoscaling/ec2/userguide/key-policy-requirements-EBS-encryption.html { - 'Sid': 'Allow use of the key', - 'Effect': 'Allow', - 'Principal': { - 'AWS': kmsAuthZPrincipals, - 'Service': [ - 'autoscaling.amazonaws.com', - 'ec2.amazonaws.com' - ] + "Sid": "Allow use of the key", + "Effect": "Allow", + "Principal": { + "AWS": kmsAuthZPrincipals, + "Service": ["autoscaling.amazonaws.com", "ec2.amazonaws.com"], }, - 'Action': [ - 'kms:Encrypt', - 'kms:Decrypt', - 'kms:ReEncrypt*', - 'kms:GenerateDataKey*', - 'kms:DescribeKey' + "Action": [ + "kms:Encrypt", + "kms:Decrypt", + "kms:ReEncrypt*", + "kms:GenerateDataKey*", + "kms:DescribeKey", ], - 'Resource': '*' + "Resource": "*", }, { - 'Sid': 'Allow attachment of persistent resources', - 'Effect': 'Allow', - 'Principal': { - 'AWS': kmsAuthZPrincipals, - 'Service': [ - 'autoscaling.amazonaws.com', - 'ec2.amazonaws.com' - ] + "Sid": "Allow attachment of persistent resources", + "Effect": "Allow", + "Principal": { + "AWS": kmsAuthZPrincipals, + "Service": ["autoscaling.amazonaws.com", "ec2.amazonaws.com"], }, - 'Action': [ - 'kms:CreateGrant', - 'kms:ListGrants', - 'kms:RevokeGrant' - ], - 'Resource': '*' - } - ] + "Action": ["kms:CreateGrant", "kms:ListGrants", "kms:RevokeGrant"], + "Resource": "*", + }, + ], } # For whatever reason, role propagation is a bit delayed with registration on the KMS Resource-based resource policy side # we will sleep for a few seconds on top of using waiters to make sure they propagate and avoid errors... - print(f'Attaching Key Policy to KMS Key {kms_key_arn}') + print(f"Attaching Key Policy to KMS Key {kms_key_arn}") time.sleep(20) try: kms.put_key_policy( - KeyId=kms_key_arn, - PolicyName='default', - Policy=json.dumps(keyPolicyJson) + KeyId=kms_key_arn, PolicyName="default", Policy=json.dumps(keyPolicyJson) ) - print(f'Key Policy attached to {kms_key_arn}') + print(f"Key Policy attached to {kms_key_arn}") except KeyError as ke: - print(f'Error encountered: {ke}') + print(f"Error encountered: {ke}") RollbackManager.rollback_from_cache(cache=cache) except botocore.exceptions.ParamValidationError as pe: - print(f'Error encountered: {pe}') + print(f"Error encountered: {pe}") RollbackManager.rollback_from_cache(cache=cache) except botocore.exceptions.ClientError as error: - print(f'Error encountered: {error}') + print(f"Error encountered: {error}") RollbackManager.rollback_from_cache(cache=cache) - + # Passes various arguements to the `create_launch_template` which returns a Launch Template ID (of the latest version) to pass to the Nodegroup creation payload - launchTemplateId = ClusterManager.create_launch_template(cluster_name, kubernetes_version, ami_id, bucket_name, launch_template_name, kms_key_arn, securityGroupId, ebs_volume_size, instance_type, mde_on_nodes, ami_os, ami_architecture) + launchTemplateId = ClusterManager.create_launch_template( + cluster_name, + kubernetes_version, + ami_id, + bucket_name, + launch_template_name, + kms_key_arn, + securityGroupId, + ebs_volume_size, + instance_type, + mde_on_nodes, + ami_os, + ami_architecture, + ) - print(f'Creating Nodegroup {nodegroup_name} for Cluster {clusterName}') + print(f"Creating Nodegroup {nodegroup_name} for Cluster {clusterName}") # Create and launch the Nodegroup try: @@ -1230,448 +1121,426 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t clusterName=clusterName, nodegroupName=nodegroup_name, scalingConfig={ - 'minSize': int(node_count), - 'maxSize': int(node_count) * 2, - 'desiredSize': int(node_count) + "minSize": int(node_count), + "maxSize": int(node_count) * 2, + "desiredSize": int(node_count), }, nodeRole=nodegroupRoleArn, subnets=subnet_ids, - launchTemplate={ - 'id': launchTemplateId - }, - capacityType='ON_DEMAND', + launchTemplate={"id": launchTemplateId}, + capacityType="ON_DEMAND", tags={ - 'Name': nodegroup_name, - 'CreatedBy': createdBy, - 'CreatedAt': createdAt, - 'CreatedWith': 'Lightspin ECE' - } + "Name": nodegroup_name, + "CreatedBy": createdBy, + "CreatedAt": createdAt, + "CreatedWith": "Lightspin ECE", + }, ) # Await Nodegroups to come online # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/eks.html#EKS.Waiter.NodegroupActive - waiter = eks.get_waiter('nodegroup_active') - print(f'Awaiting EKS Nodegroup {nodegroup_name} to come online') + waiter = eks.get_waiter("nodegroup_active") + print(f"Awaiting EKS Nodegroup {nodegroup_name} to come online") waiter.wait( clusterName=clusterName, nodegroupName=nodegroup_name, - WaiterConfig={ - 'Delay': 30, - 'MaxAttempts': 80 - } + WaiterConfig={"Delay": 30, "MaxAttempts": 80}, ) except botocore.exceptions.ClientError as error: - print(f'Error encountered: {error}') + print(f"Error encountered: {error}") RollbackManager.rollback_from_cache(cache=cache) except botocore.exceptions.WaiterError as we: - print(f'Error encountered: {we}') + print(f"Error encountered: {we}") RollbackManager.rollback_from_cache(cache=cache) - print(f'Creation complete. Nodegroup {nodegroup_name} in Cluster {clusterName} is online') + print(f"Creation complete. Nodegroup {nodegroup_name} in Cluster {clusterName} is online") # Retrieve region for AWS CLI kubectl generation session = boto3.session.Session() awsRegion = session.region_name # Setup first time cluster connection with AWS CLI - updateKubeconfigCmd = f'aws eks update-kubeconfig --region {awsRegion} --name {clusterName}' - updateKubeconfigProc = subprocess.run(updateKubeconfigCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - print(updateKubeconfigProc.stdout.decode('utf-8')) + updateKubeconfigCmd = f"aws eks update-kubeconfig --region {awsRegion} --name {clusterName}" + updateKubeconfigProc = subprocess.run(updateKubeconfigCmd, shell=True, capture_output=True) + print(updateKubeconfigProc.stdout.decode("utf-8")) # If additional principals are required to be authorized, attempt to do so if addtl_auth_principals: for arn in addtl_auth_principals: # Split out the name part of the Role - addtlRoleName = str(arn.split('/')[1]) + addtlRoleName = str(arn.split("/")[1]) # Create a patch object to add into - newAuthZScript=f'''ROLE=" - rolearn: {arn}\\n username: {addtlRoleName}\\n groups:\\n - system:masters" - kubectl get -n kube-system configmap/aws-auth -o yaml | awk "/mapRoles: \|/{{print;print \\"$ROLE\\";next}}1" > /tmp/aws-auth-patch.yml + newAuthZScript = f"""ROLE=" - rolearn: {arn}\\n username: {addtlRoleName}\\n groups:\\n - system:masters" + kubectl get -n kube-system configmap/aws-auth -o yaml | awk "/mapRoles: \\|/{{print;print \\"$ROLE\\";next}}1" > /tmp/aws-auth-patch.yml kubectl patch configmap/aws-auth -n kube-system --patch "$(cat /tmp/aws-auth-patch.yml)" - ''' + """ - newAuthZScriptProc = subprocess.run(newAuthZScript, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - print(newAuthZScriptProc.stdout.decode('utf-8')) + newAuthZScriptProc = subprocess.run(newAuthZScript, shell=True, capture_output=True) + print(newAuthZScriptProc.stdout.decode("utf-8")) - ''' + """ Send a call into plugins.ECEFalco - ''' - if falco_bool == 'True': + """ + if falco_bool == "True": FalcoSetup.falco_initialization( - cluster_name=clusterName, - falco_mode='Create', - falco_sidekick_destination_type=falco_sidekick_destination_type, + cluster_name=clusterName, + falco_mode="Create", + falco_sidekick_destination_type=falco_sidekick_destination_type, falco_sidekick_destination=falco_sidekick_destination, - datadog_api_key=datadog_api_key + datadog_api_key=datadog_api_key, ) - ''' + """ Send a call into plugins.ECEDatadog - ''' - if datadog_bool == 'True': + """ + if datadog_bool == "True": DatadogSetup.initialization( - cluster_name=clusterName, - datadog_mode='Create', - datadog_api_key=datadog_api_key + cluster_name=clusterName, datadog_mode="Create", datadog_api_key=datadog_api_key ) -''' + + +""" This Class handles all update tasks to the Clusters, such as version bumps to latest Kubenertes Versions -''' -class UpdateManager(): - +""" + + +class UpdateManager: def update_kubernetes_version(cluster_name, nodegroup_name, kubernetes_version): - ''' + """ This function attempts to update existing Cluster and Nodegroup to a specified Kubernetes Version by invoking separate functions after a basic version match test - ''' + """ - eks = boto3.client('eks') + eks = boto3.client("eks") # Lookup EKS Cluster to see if specified K8s version from main.py matches, if so exit try: - existingClusterVersion = eks.describe_cluster(name=cluster_name)['cluster']['version'] + existingClusterVersion = eks.describe_cluster(name=cluster_name)["cluster"]["version"] if existingClusterVersion == kubernetes_version: - print(f'EKS Cluster {cluster_name} is already at Kubernetes version {kubernetes_version}! Aborting') + print( + f"EKS Cluster {cluster_name} is already at Kubernetes version {kubernetes_version}! Aborting" + ) sys.exit(2) else: - print(f'EKS Cluster {cluster_name} is viable to update from Kubernetes version {existingClusterVersion} to {kubernetes_version}') + print( + f"EKS Cluster {cluster_name} is viable to update from Kubernetes version {existingClusterVersion} to {kubernetes_version}" + ) except botocore.exceptions.ClientError as error: # If we have an 'EntityAlreadyExists' error it means a Role of the same name exists, we can try to use it instead - if error.response['Error']['Code'] == 'ResourceNotFoundException': - print(f'EKS Cluster {cluster_name} does not exist! Aborting') + if error.response["Error"]["Code"] == "ResourceNotFoundException": + print(f"EKS Cluster {cluster_name} does not exist! Aborting") sys.exit(2) else: raise error # Lookup EKS Nodegroup to see if specified K8s version from main.py matches, if so exit try: - existingNodegroupVersion = eks.describe_cluster(name=cluster_name,nodegroupName=nodegroup_name)['nodegroup']['version'] + existingNodegroupVersion = eks.describe_cluster( + name=cluster_name, nodegroupName=nodegroup_name + )["nodegroup"]["version"] if existingNodegroupVersion == kubernetes_version: - print(f'EKS Nodegroup {nodegroup_name} in Cluster {cluster_name} is already at Kubernetes version {kubernetes_version}! Aborting') + print( + f"EKS Nodegroup {nodegroup_name} in Cluster {cluster_name} is already at Kubernetes version {kubernetes_version}! Aborting" + ) sys.exit(2) else: - print(f'EKS Nodegroup {nodegroup_name} in Cluster {cluster_name} is viable to update from Kubernetes version {existingNodegroupVersion} to {kubernetes_version}') + print( + f"EKS Nodegroup {nodegroup_name} in Cluster {cluster_name} is viable to update from Kubernetes version {existingNodegroupVersion} to {kubernetes_version}" + ) except botocore.exceptions.ClientError as error: # If we have an 'EntityAlreadyExists' error it means a Role of the same name exists, we can try to use it instead - if error.response['Error']['Code'] == 'ResourceNotFoundException': - print(f'EKS Nodegroup {nodegroup_name} in Cluster {cluster_name} does not exist! Aborting') + if error.response["Error"]["Code"] == "ResourceNotFoundException": + print( + f"EKS Nodegroup {nodegroup_name} in Cluster {cluster_name} does not exist! Aborting" + ) sys.exit(2) else: raise error - UpdateManager.update_nodegroup_kubernetes_version(cluster_name, nodegroup_name, kubernetes_version) + UpdateManager.update_nodegroup_kubernetes_version( + cluster_name, nodegroup_name, kubernetes_version + ) UpdateManager.update_cluster_kubernetes_version(cluster_name, kubernetes_version) def update_nodegroup_kubernetes_version(cluster_name, nodegroup_name, kubernetes_version): - ''' + """ This function carries out the update and waiter for EKS Nodegroup K8s version bumps - ''' - print(f'Updating Kubernetes version for EKS Nodegroup {nodegroup_name} in EKS Cluster {cluster_name}') + """ + print( + f"Updating Kubernetes version for EKS Nodegroup {nodegroup_name} in EKS Cluster {cluster_name}" + ) - eks = boto3.client('eks') + eks = boto3.client("eks") # Update the Nodegroup K8s version and parse the EKS Update ID for later use # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/eks.html#EKS.Client.update_nodegroup_version r = eks.update_nodegroup_version( - clusterName=cluster_name, - nodegroupName=nodegroup_name, - version=kubernetes_version + clusterName=cluster_name, nodegroupName=nodegroup_name, version=kubernetes_version ) - updateId = str(r['update']['id']) + updateId = str(r["update"]["id"]) - print(f'Monitoring EKS Update ID {updateId} for failure or success state.') + print(f"Monitoring EKS Update ID {updateId} for failure or success state.") # Use a `while True` loop and 15 second sleeps to watch the update progress of the cluster # Break the loop on Success, continue on 'InProgress', and exit code 2 on failures or cancellations while True: d = eks.describe_update( - name=cluster_name, - updateId=updateId, - nodegroupName=nodegroup_name + name=cluster_name, updateId=updateId, nodegroupName=nodegroup_name ) - updateStatus = str(d['update']['status']) + updateStatus = str(d["update"]["status"]) # if/else logic time - if updateStatus == 'Successful': - print(f'Nodegroup {nodegroup_name} in Cluster {cluster_name} has been successfully updated.') + if updateStatus == "Successful": + print( + f"Nodegroup {nodegroup_name} in Cluster {cluster_name} has been successfully updated." + ) break - elif updateStatus == 'Failed' or 'Cancelled': - errorMessage = str(d['update']['errors']) - print(f'Nodegroup {nodegroup_name} in Cluster {cluster_name} update has been cancelled or has failed!') - print(f'Error message: {errorMessage}') + elif updateStatus == "Failed" or "Cancelled": + errorMessage = str(d["update"]["errors"]) + print( + f"Nodegroup {nodegroup_name} in Cluster {cluster_name} update has been cancelled or has failed!" + ) + print(f"Error message: {errorMessage}") sys.exit(2) else: - print(f'Awaiting update status change for 15 more seconds...') + print(f"Awaiting update status change for 15 more seconds...") del d del updateStatus time.sleep(15) continue def update_cluster_kubernetes_version(cluster_name, kubernetes_version): - ''' + """ This function carries out the update and waiter for EKS Cluster K8s version bumps - ''' - print(f'Updating Kubernetes version for EKS Cluster {cluster_name}') + """ + print(f"Updating Kubernetes version for EKS Cluster {cluster_name}") - eks = boto3.client('eks') + eks = boto3.client("eks") # Update the Nodegroup K8s version and parse the EKS Update ID for later use # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/eks.html#EKS.Client.update_nodegroup_version - r = eks.update_nodegroup_version( - clusterName=cluster_name, - version=kubernetes_version - ) - updateId = str(r['update']['id']) + r = eks.update_nodegroup_version(clusterName=cluster_name, version=kubernetes_version) + updateId = str(r["update"]["id"]) - print(f'Monitoring EKS Update ID {updateId} for failure or success state.') + print(f"Monitoring EKS Update ID {updateId} for failure or success state.") # Use a `while True` loop and 15 second sleeps to watch the update progress of the cluster # Break the loop on Success, continue on 'InProgress', and exit code 2 on failures or cancellations while True: - d = eks.describe_update( - name=cluster_name, - updateId=updateId - ) - updateStatus = str(d['update']['status']) + d = eks.describe_update(name=cluster_name, updateId=updateId) + updateStatus = str(d["update"]["status"]) # if/else logic time - if updateStatus == 'Successful': - print(f'Cluster {cluster_name} has been successfully updated.') + if updateStatus == "Successful": + print(f"Cluster {cluster_name} has been successfully updated.") break - elif updateStatus == 'Failed' or 'Cancelled': - errorMessage = str(d['update']['errors']) - print(f'Cluster {cluster_name} update has been cancelled or has failed!') - print(f'Error message: {errorMessage}') + elif updateStatus == "Failed" or "Cancelled": + errorMessage = str(d["update"]["errors"]) + print(f"Cluster {cluster_name} update has been cancelled or has failed!") + print(f"Error message: {errorMessage}") sys.exit(2) else: - print(f'Awaiting update status change for 15 more seconds...') + print(f"Awaiting update status change for 15 more seconds...") del d del updateStatus time.sleep(15) continue -''' + +""" Despite it's name, this Class contains methods to conduct emergency deletions (rollback) from Cache as well as normal deletions from main.py commands this is purely for Create mode, other Classes may have their own self-contained rollback mechanism -''' -class RollbackManager(): +""" + - def scheduled_deletion(nodegroup_name, cluster_name, cluster_role_name, nodegroup_role_name, launch_template_name): - ''' +class RollbackManager: + def scheduled_deletion( + nodegroup_name, cluster_name, cluster_role_name, nodegroup_role_name, launch_template_name + ): + """ This function performs a graceful, scheduled deletion of all resources - or attempts to at least - ''' - eks = boto3.client('eks') + """ + eks = boto3.client("eks") - print(f'Deletion command received. Attempting to delete all resources') + print(f"Deletion command received. Attempting to delete all resources") # Retrieve the Security Groups from the Cluster to delete, as they are not provided as arguments and cannot be guessed (ID's and all that...) sgList = [] - for sg in eks.describe_cluster(name=cluster_name)['cluster']['resourcesVpcConfig']['securityGroupIds']: + for sg in eks.describe_cluster(name=cluster_name)["cluster"]["resourcesVpcConfig"][ + "securityGroupIds" + ]: sgList.append(sg) # First, attempt to delete Nodegroup - RollbackManager.delete_nodegroup( - nodegroup_name=nodegroup_name, - cluster_name=cluster_name - ) + RollbackManager.delete_nodegroup(nodegroup_name=nodegroup_name, cluster_name=cluster_name) # Then, try to find the Cluster KMS Key and attempt to delete it try: - kmsKeyArn= eks.describe_cluster(name=cluster_name)['cluster']['encryptionConfig'][0]['provider']['keyArn'] + kmsKeyArn = eks.describe_cluster(name=cluster_name)["cluster"]["encryptionConfig"][0][ + "provider" + ]["keyArn"] except Exception: kmsKeyArn = None - + if kmsKeyArn != None: - RollbackManager.delete_kms_key( - kms_key_arn=kmsKeyArn - ) + RollbackManager.delete_kms_key(kms_key_arn=kmsKeyArn) # Next, attempt to delete Cluster - RollbackManager.delete_cluster( - cluster_name=cluster_name - ) + RollbackManager.delete_cluster(cluster_name=cluster_name) # Next, attempt to delete all related IAM RollbackManager.delete_eks_iam( - cluster_role_name=cluster_role_name, - nodegroup_role_name=nodegroup_role_name + cluster_role_name=cluster_role_name, nodegroup_role_name=nodegroup_role_name ) # Next, attempt to delete the EC2 Launch Template - RollbackManager.delete_launch_template( - launch_template_name=launch_template_name - ) + RollbackManager.delete_launch_template(launch_template_name=launch_template_name) # Finally, loop the retrieved SGs and then delete them for sg in sgList: - print(f'Trying to delete EC2 Security Group {sg}') - RollbackManager.delete_security_groups( - cluster_security_group_id=sg - ) + print(f"Trying to delete EC2 Security Group {sg}") + RollbackManager.delete_security_groups(cluster_security_group_id=sg) - print(f'Deletion complete. Confirm resource deletion in Console in case of errors') + print(f"Deletion complete. Confirm resource deletion in Console in case of errors") def rollback_from_cache(cache): - ''' + """ This function is invoked during any error encountered during the creation process in the `ClusterManager` Class - a Cache is passed and any resource that would be created is attempted to be deleted as the failures can occur at any stage - ''' + """ - print(f'Error encountered! Rollback from cache initiated.') - eks = boto3.client('eks') + print(f"Error encountered! Rollback from cache initiated.") + eks = boto3.client("eks") # pull vars from Cache - nodegroupName = str(cache[0]['NodegroupName']) - clusterName = str(cache[0]['ClusterName']) - clusterRoleName = str(cache[0]['ClusterRoleName']) - nodegroupRoleName = str(cache[0]['NodegroupRoleName']) - launchTemplateName = str(cache[0]['LaunchTemplateName']) - clusterSgId = str(cache[1]['ClusterSecurityGroupId']) + nodegroupName = str(cache[0]["NodegroupName"]) + clusterName = str(cache[0]["ClusterName"]) + clusterRoleName = str(cache[0]["ClusterRoleName"]) + nodegroupRoleName = str(cache[0]["NodegroupRoleName"]) + launchTemplateName = str(cache[0]["LaunchTemplateName"]) + clusterSgId = str(cache[1]["ClusterSecurityGroupId"]) # First, attempt to delete Nodegroup - RollbackManager.delete_nodegroup( - nodegroup_name=nodegroupName, - cluster_name=clusterName - ) + RollbackManager.delete_nodegroup(nodegroup_name=nodegroupName, cluster_name=clusterName) # Then, try to find the Cluster KMS Key and attempt to delete it try: - kmsKeyArn= eks.describe_cluster(name=clusterName)['cluster']['encryptionConfig'][0]['provider']['keyArn'] + kmsKeyArn = eks.describe_cluster(name=clusterName)["cluster"]["encryptionConfig"][0][ + "provider" + ]["keyArn"] except Exception: kmsKeyArn = None if kmsKeyArn != None: - RollbackManager.delete_kms_key( - kms_key_arn=kmsKeyArn - ) + RollbackManager.delete_kms_key(kms_key_arn=kmsKeyArn) # Next, attempt to delete Cluster - RollbackManager.delete_cluster( - cluster_name=clusterName - ) + RollbackManager.delete_cluster(cluster_name=clusterName) # Next, attempt to delete all related IAM RollbackManager.delete_eks_iam( - cluster_role_name=clusterRoleName, - nodegroup_role_name=nodegroupRoleName + cluster_role_name=clusterRoleName, nodegroup_role_name=nodegroupRoleName ) # Next, attempt to delete the EC2 Launch Template - RollbackManager.delete_launch_template( - launch_template_name=launchTemplateName - ) + RollbackManager.delete_launch_template(launch_template_name=launchTemplateName) # Finally, delete the Security Groups - RollbackManager.delete_security_groups( - cluster_security_group_id=clusterSgId - ) + RollbackManager.delete_security_groups(cluster_security_group_id=clusterSgId) - print(f'Rollback complete. Confirm resource deletion in Console in case of errors') + print(f"Rollback complete. Confirm resource deletion in Console in case of errors") del cache sys.exit(2) def delete_nodegroup(cluster_name, nodegroup_name): - ''' + """ This function attempts to delete an EKS Nodegroup - ''' - print(f'Attempting to delete EKS Nodegroup {nodegroup_name} in EKS Cluster {cluster_name}.') + """ + print(f"Attempting to delete EKS Nodegroup {nodegroup_name} in EKS Cluster {cluster_name}.") - eks = boto3.client('eks') + eks = boto3.client("eks") try: - eks.delete_nodegroup( - clusterName=cluster_name, - nodegroupName=nodegroup_name - ) + eks.delete_nodegroup(clusterName=cluster_name, nodegroupName=nodegroup_name) except botocore.exceptions.ClientError as error: - print(f'Rollback error encounter {error}') + print(f"Rollback error encounter {error}") # Wait for the Nodegroup to be fully deleted before deleting the Cluster # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/eks.html#EKS.Waiter.NodegroupDeleted - print(f'Awaiting deletion of EKS Nodegroup {nodegroup_name} in EKS Cluster {cluster_name}.') + print(f"Awaiting deletion of EKS Nodegroup {nodegroup_name} in EKS Cluster {cluster_name}.") - waiter = eks.get_waiter('nodegroup_deleted') + waiter = eks.get_waiter("nodegroup_deleted") waiter.wait( clusterName=cluster_name, nodegroupName=nodegroup_name, - WaiterConfig={ - 'Delay': 30, - 'MaxAttempts': 40 - } + WaiterConfig={"Delay": 30, "MaxAttempts": 40}, ) - print(f'EKS Nodegroups rolled back.') + print(f"EKS Nodegroups rolled back.") del eks def delete_cluster(cluster_name): - ''' + """ This function attempts to delete an EKS Cluster - ''' - print(f'Attempting to delete EKS Cluster {cluster_name}.') + """ + print(f"Attempting to delete EKS Cluster {cluster_name}.") - eks = boto3.client('eks') + eks = boto3.client("eks") try: - eks.delete_cluster( - name=cluster_name - ) + eks.delete_cluster(name=cluster_name) except botocore.exceptions.ClientError as error: - print(f'Rollback error encounter {error}') + print(f"Rollback error encounter {error}") # Wait for the Cluster to be fully deleted before deleting the IAM Roles # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/eks.html#EKS.Waiter.ClusterDeleted - print(f'Awaiting deletion of EKS Cluster {cluster_name}.') + print(f"Awaiting deletion of EKS Cluster {cluster_name}.") - waiter = eks.get_waiter('cluster_deleted') + waiter = eks.get_waiter("cluster_deleted") - waiter.wait( - name=cluster_name, - WaiterConfig={ - 'Delay': 30, - 'MaxAttempts': 123 - } - ) + waiter.wait(name=cluster_name, WaiterConfig={"Delay": 30, "MaxAttempts": 123}) - print(f'EKS Clusters rolled back.') + print(f"EKS Clusters rolled back.") del eks def delete_eks_iam(cluster_role_name, nodegroup_role_name): - ''' + """ This function attempts to delete all related IAM entities for EKS (Cluster roles, Nodegroup roles, Nodegroup policies) - ''' - print(f'Attempting to delete various IAM entities. IAM Roles {cluster_role_name} and {nodegroup_role_name} and IAM Policy {nodegroup_role_name}Policy.') + """ + print( + f"Attempting to delete various IAM entities. IAM Roles {cluster_role_name} and {nodegroup_role_name} and IAM Policy {nodegroup_role_name}Policy." + ) - iam = boto3.client('iam') - sts = boto3.client('sts') - account = sts.get_caller_identity()['Account'] + iam = boto3.client("iam") + sts = boto3.client("sts") + account = sts.get_caller_identity()["Account"] # Assemble an IAM Policy ARN for nodegroup - nodegroupS3PolicyArn = f'arn:aws:iam::{account}:policy/{nodegroup_role_name}Policy' + nodegroupS3PolicyArn = f"arn:aws:iam::{account}:policy/{nodegroup_role_name}Policy" # Find and detach all policies from the Cluster Role try: - for policy in iam.list_attached_role_policies(RoleName=cluster_role_name)['AttachedPolicies']: - policyArn = str(policy['PolicyArn']) - iam.detach_role_policy( - RoleName=cluster_role_name, - PolicyArn=policyArn - ) + for policy in iam.list_attached_role_policies(RoleName=cluster_role_name)[ + "AttachedPolicies" + ]: + policyArn = str(policy["PolicyArn"]) + iam.detach_role_policy(RoleName=cluster_role_name, PolicyArn=policyArn) except botocore.exceptions.ClientError as error: - print(f'Rollback error encounter {error}') + print(f"Rollback error encounter {error}") # Detach all Policies from Nodegroup cluster try: - for policy in iam.list_attached_role_policies(RoleName=nodegroup_role_name)['AttachedPolicies']: - policyArn = str(policy['PolicyArn']) - iam.detach_role_policy( - RoleName=nodegroup_role_name, - PolicyArn=policyArn - ) + for policy in iam.list_attached_role_policies(RoleName=nodegroup_role_name)[ + "AttachedPolicies" + ]: + policyArn = str(policy["PolicyArn"]) + iam.detach_role_policy(RoleName=nodegroup_role_name, PolicyArn=policyArn) except botocore.exceptions.ClientError as error: - print(f'Rollback error encounter {error}') + print(f"Rollback error encounter {error}") try: iam.delete_policy(PolicyArn=nodegroupS3PolicyArn) @@ -1681,15 +1550,14 @@ def delete_eks_iam(cluster_role_name, nodegroup_role_name): try: iam.delete_role(RoleName=cluster_role_name) except botocore.exceptions.ClientError as error: - print(f'Rollback error encounter {error}') + print(f"Rollback error encounter {error}") try: iam.delete_role(RoleName=nodegroup_role_name) except botocore.exceptions.ClientError as error: - print(f'Rollback error encounter {error}') - + print(f"Rollback error encounter {error}") - print(f'IAM Roles and Policies rolled back.') + print(f"IAM Roles and Policies rolled back.") del iam del sts @@ -1697,58 +1565,52 @@ def delete_eks_iam(cluster_role_name, nodegroup_role_name): del nodegroupS3PolicyArn def delete_launch_template(launch_template_name): - ''' + """ This function attempts to delete the EC2 Launch Template used for EKS Nodegroups - ''' - print(f'Attempting to delete EC2 launch template {launch_template_name}.') + """ + print(f"Attempting to delete EC2 launch template {launch_template_name}.") - ec2 = boto3.client('ec2') + ec2 = boto3.client("ec2") try: - ec2.delete_launch_template( - DryRun=False, - LaunchTemplateName=launch_template_name - ) + ec2.delete_launch_template(DryRun=False, LaunchTemplateName=launch_template_name) except botocore.exceptions.ClientError as error: - print(f'Rollback error encounter {error}') + print(f"Rollback error encounter {error}") - print(f'EC2 Launch Templates rolled back.') + print(f"EC2 Launch Templates rolled back.") del ec2 def delete_security_groups(cluster_security_group_id): - ''' + """ This function attempts to delete the EC2 Security Groups used for EKS Clusters and Nodegroups - ''' - print(f'Attempting to delete EC2 Security Group {cluster_security_group_id}') + """ + print(f"Attempting to delete EC2 Security Group {cluster_security_group_id}") - ec2 = boto3.client('ec2') + ec2 = boto3.client("ec2") try: ec2.delete_security_group(GroupId=cluster_security_group_id) except botocore.exceptions.ClientError as error: - print(f'Rollback error encounter {error}') + print(f"Rollback error encounter {error}") - print(f'Security Group rolled back') + print(f"Security Group rolled back") del ec2 def delete_kms_key(kms_key_arn): - ''' + """ This function attempts to delete the KMS Key used for EKS Envelope Encryption - ''' - print(f'Attempting to delete KMS Key ARN {kms_key_arn}') + """ + print(f"Attempting to delete KMS Key ARN {kms_key_arn}") - kms = boto3.client('kms') + kms = boto3.client("kms") try: - kms.schedule_key_deletion( - KeyId=kms_key_arn, - PendingWindowInDays=7 - ) + kms.schedule_key_deletion(KeyId=kms_key_arn, PendingWindowInDays=7) except botocore.exceptions.ClientError as error: - print(f'Rollback error encounter {error}') + print(f"Rollback error encounter {error}") - print(f'KMS Key rolled back') + print(f"KMS Key rolled back") - del kms \ No newline at end of file + del kms diff --git a/README.md b/README.md index 5b49537..e72c98b 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ The Amazon Elastic Kubernetes Service (EKS) Creation Engine (ECE) is a Python command-line program created by the Lightspin Office of the CISO to facilitate the creation and enablement of secure EKS Clusters, optionally further assured with continual Kubernetes Security Posture Management (KSPM), Runtime Protection, and Application Performance Monitoring (APM) capabilities. -## What is this :eyes: :eyes: ?? +## What is this :eyes: :eyes: ?? As stated above, the ECE is a Python utility to create a fully functioning EKS Cluster, complete with Nodegroups which are built off of EC2 Launch Templates as it was meant for creating EKS Nodegroups with custom AMIs with custom bootstrapping. @@ -73,10 +73,39 @@ We are happy to take contributions from anywhere that will help expand this proj - Spot provider & Fargate Profile support for Nodegroups, and an option to not use Nodegroups - Create more Plugins for various utilities (e.g., Calico, OPA, NGINX Ingress Controller, etc.) +### Basic Contributing Setup + +1. Fork the repository. +2. Clone your fork and enter the `eks-creation-engine` directory. +3. Get your Python things Python-y. + +```bash +# Add upstream +git remote add upstream https://github.com/lightspin-tech/eks-creation-engine.git + +# Create virtual env +pip3 -m venv .env --prompt ece + +# Enter virtual env +source .env/bin/activate + +# Install ECE reqs +pip3 install -r requirements.txt + +# Install pre-commit +pip3 install pre-commit + +# Ensure pre-commit runs... pre... commit +pre-commit install + +# Init the pre-commit env and run checks +pre-commit run -a +``` + ## Contact Us :telephone_receiver: :telephone_receiver: For more information, contact us at support@lightspin.io. ## License :eight_spoked_asterisk: :eight_spoked_asterisk: -This repository is available under the [Apache License 2.0](https://github.com/lightspin-tech/eks-creation-engine/blob/main/LICENSE). \ No newline at end of file +This repository is available under the [Apache License 2.0](https://github.com/lightspin-tech/eks-creation-engine/blob/main/LICENSE). diff --git a/docs/HOWTO.md b/docs/HOWTO.md index 17317a5..1382f33 100644 --- a/docs/HOWTO.md +++ b/docs/HOWTO.md @@ -347,4 +347,4 @@ For more information, contact us at support@lightspin.io. ## License :eight_spoked_asterisk: :eight_spoked_asterisk: -This repository is available under the [Apache License 2.0](https://github.com/lightspin-tech/eks-creation-engine/blob/main/LICENSE). \ No newline at end of file +This repository is available under the [Apache License 2.0](https://github.com/lightspin-tech/eks-creation-engine/blob/main/LICENSE). diff --git a/main.py b/main.py index dd00a00..cc94451 100644 --- a/main.py +++ b/main.py @@ -1,154 +1,164 @@ -#This file is part of Lightspin EKS Creation Engine. -#SPDX-License-Identifier: Apache-2.0 - -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -#http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, -#software distributed under the License is distributed on an -#"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -#KIND, either express or implied. See the License for the -#specific language governing permissions and limitations -#under the License. - +# This file is part of Lightspin EKS Creation Engine. +# SPDX-License-Identifier: Apache-2.0 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import argparse import json -import sys import re +import subprocess +import sys + import boto3 import botocore -import argparse -import subprocess -from art import text2art import termcolor -from clint.textui import colored, puts -from EksCreationEngine import ClusterManager, UpdateManager, RollbackManager +from art import text2art +from clint.textui import colored +from clint.textui import puts + +from EksCreationEngine import ClusterManager +from EksCreationEngine import RollbackManager +from EksCreationEngine import UpdateManager from plugins.ECEDatadog import DatadogSetup from plugins.ECEFalco import FalcoSetup from plugins.ECESecurity import SecurityAssessment + def print_logo(): textArt = text2art("EKS CREATION ENGINE") - print(termcolor.colored(textArt, 'red')) + print(termcolor.colored(textArt, "red")) puts(colored.red("CREATED BY THE LIGHTSPIN OFFICE OF THE CISO")) - puts(colored.red("For more information about Lightspin reach out to support@lightspin.io or visit us at https://lightspin.io")) + puts( + colored.red( + "For more information about Lightspin reach out to support@lightspin.io or visit us at https://lightspin.io" + ) + ) + def stay_dangerous(): textArt = text2art("STAY DANGEROUS") - print(termcolor.colored(textArt, 'red')) + print(termcolor.colored(textArt, "red")) puts(colored.red("With Love, the Lightspin Office of the CISO")) + def create_preflight_check(): - ''' + """ This function conducts a "preflight check" to ensure that required arguments are provided for the specified "Mode" before attempting to execute them. - ''' + """ print_logo() - eks = boto3.client('eks') + eks = boto3.client("eks") # Conditional check to ensure that AMI ID (if provided) matches regex amiId = args.ami_id - if amiId != 'SSM': + if amiId != "SSM": # AMI Regex - amiRegex = re.compile('^(?:(?:ami)(?:-[a-zA-Z0-9]+)?\b|(?:[0-9]{1,3}\.){3}[0-9]{1,3})(?:\s*,\s*(?:(?:ami)(?:-[a-zA-Z0-9]+)?\b|(?:[0-9]{1,3}\.){3}[0-9]{1,3}))*$') + amiRegex = re.compile( + "^(?:(?:ami)(?:-[a-zA-Z0-9]+)?\b|(?:[0-9]{1,3}\\.){3}[0-9]{1,3})(?:\\s*,\\s*(?:(?:ami)(?:-[a-zA-Z0-9]+)?\b|(?:[0-9]{1,3}\\.){3}[0-9]{1,3}))*$" + ) # Attempt to match amiRegexCheck = amiRegex.search(amiId) if not amiRegexCheck: - print(f'Improperly AMI ID provided, does not match regex, check value and submit request again') + print( + f"Improperly AMI ID provided, does not match regex, check value and submit request again" + ) sys.exit(2) # Check if an EKS Cluster exists for provided name try: - eks.describe_cluster( - name=clusterName - ) + eks.describe_cluster(name=clusterName) except botocore.exceptions.ClientError as error: # If we have an "ResourceNotFoundException" error it means the cluster doesnt exist - which is what we want - if error.response['Error']['Code'] == 'ResourceNotFoundException': + if error.response["Error"]["Code"] == "ResourceNotFoundException": pass else: - print(f'An EKS Cluster with the name {clusterName} already exists. Please specify another name and try again') + print( + f"An EKS Cluster with the name {clusterName} already exists. Please specify another name and try again" + ) sys.exit(2) - + # Check if an EKS Nodegroup exists for provided name try: - eks.describe_nodegroup( - clusterName=clusterName, - nodegroupName=nodegroupName - ) + eks.describe_nodegroup(clusterName=clusterName, nodegroupName=nodegroupName) except botocore.exceptions.ClientError as error: # If we have an "ResourceNotFoundException" error it means the cluster/nodegroup doesnt exist - which is what we want - if error.response['Error']['Code'] == 'ResourceNotFoundException': + if error.response["Error"]["Code"] == "ResourceNotFoundException": pass else: - print(f'An EKS Nodegroup with the name {nodegroupName} already exists. Please specify another name and try again') + print( + f"An EKS Nodegroup with the name {nodegroupName} already exists. Please specify another name and try again" + ) sys.exit(2) - + # Check for a provided VPC if vpcId == None: - print(f'VPC ID is required for cluster creation. Please specify a VPC ID and try again.') + print(f"VPC ID is required for cluster creation. Please specify a VPC ID and try again.") sys.exit(2) # Check for non-empty lists for Subnets if args.subnets: pass else: - print(f'Subnets need to be specified for cluster creation') + print(f"Subnets need to be specified for cluster creation") sys.exit(2) # Ensure a S3 Bucket was provided if MDE installation is true - if installMdeOnNodes == 'True': + if installMdeOnNodes == "True": if bucketName == None: - print(f'S3 Bucket name was not provided. Please provide a valid S3 Bucket and try again') + print( + f"S3 Bucket name was not provided. Please provide a valid S3 Bucket and try again" + ) sys.exit(2) # Ensure a Datadog API key is provided if Datadog installation is true - if datadogBool == 'True': + if datadogBool == "True": if datadogApiKey == None: - print(f'Datadog setup was specified but a Datadog API was not provided. Please provide a valid API key and try again.') + print( + f"Datadog setup was specified but a Datadog API was not provided. Please provide a valid API key and try again." + ) sys.exit(2) # Print out creation specification - in the future this will be a "state file" for the cluster specDict = { - 'K8sVersion': k8sVersion, - 'S3BucketName': bucketName, - 'EBSVolumeSize': ebsVolumeSize, - 'AmiId': amiId, - 'InstanceType': instanceType, - 'ClusterName': clusterName, - 'ClusterRoleName': clusterRoleName, - 'NodegroupName': nodegroupName, - 'NodegroupRoleName': nodegroupRoleName, - 'LaunchTemplateName': launchTemplateName, - 'VpcId': vpcId, - 'SubnetIds': subnetIds, - 'NodeCount': eksNodeCount, - 'MDEOnNodes?': installMdeOnNodes, - 'AdditionalPorts': additionalPorts, - 'InstallFalco?': falcoBool, - 'FalcoDestinationType': falcoDestType, - 'FalcoDestination': falcoDest, - 'AmiOperatingSystem': amiOs, - 'AmiArhcitecture': amiArchitecture, - 'DatadogApiKey': datadogApiKey, - 'InstallDatadog?': datadogBool, - 'AdditionalAuthorizedPrincipals': additionalAuthZPrincipals + "K8sVersion": k8sVersion, + "S3BucketName": bucketName, + "EBSVolumeSize": ebsVolumeSize, + "AmiId": amiId, + "InstanceType": instanceType, + "ClusterName": clusterName, + "ClusterRoleName": clusterRoleName, + "NodegroupName": nodegroupName, + "NodegroupRoleName": nodegroupRoleName, + "LaunchTemplateName": launchTemplateName, + "VpcId": vpcId, + "SubnetIds": subnetIds, + "NodeCount": eksNodeCount, + "MDEOnNodes?": installMdeOnNodes, + "AdditionalPorts": additionalPorts, + "InstallFalco?": falcoBool, + "FalcoDestinationType": falcoDestType, + "FalcoDestination": falcoDest, + "AmiOperatingSystem": amiOs, + "AmiArhcitecture": amiArchitecture, + "DatadogApiKey": datadogApiKey, + "InstallDatadog?": datadogBool, + "AdditionalAuthorizedPrincipals": additionalAuthZPrincipals, } - print(f'The following attributes are set for your EKS Cluster') - print( - json.dumps( - specDict, - indent=4 - ) - ) + print(f"The following attributes are set for your EKS Cluster") + print(json.dumps(specDict, indent=4)) # TODO: Save state? del specDict @@ -176,11 +186,12 @@ def create_preflight_check(): ami_architecture=amiArchitecture, datadog_api_key=datadogApiKey, datadog_bool=datadogBool, - addtl_auth_principals=additionalAuthZPrincipals + addtl_auth_principals=additionalAuthZPrincipals, ) stay_dangerous() + def delete_preflight_check(): print_logo() @@ -190,162 +201,165 @@ def delete_preflight_check(): cluster_role_name=clusterRoleName, nodegroup_name=nodegroupName, nodegroup_role_name=nodegroupRoleName, - launch_template_name=launchTemplateName + launch_template_name=launchTemplateName, ) stay_dangerous() + def update_preflight_check(): print_logo() # Call the `update_kubernetes_version` function and attempt to version bump K8s of Clusters & Nodes UpdateManager.update_kubernetes_version( - cluster_name=clusterName, - kubernetes_version=k8sVersion, - nodegroup_name=nodegroupName + cluster_name=clusterName, kubernetes_version=k8sVersion, nodegroup_name=nodegroupName ) stay_dangerous() + def assessment_preflight_check(): - ''' + """ This function conducts a "preflight check" to ensure that required arguments are provided for the specified "Mode" before attempting to execute them. - ''' + """ print_logo() - eks = boto3.client('eks') + eks = boto3.client("eks") # Check if an EKS Cluster exists for provided name try: - eks.describe_cluster( - name=clusterName - ) + eks.describe_cluster(name=clusterName) except botocore.exceptions.ClientError as error: # If we have an "ResourceNotFoundException" error it means the cluster doesnt exist - which is what we want - if error.response['Error']['Code'] == 'ResourceNotFoundException': - print(f'An EKS Cluster with the name {clusterName} does not exist. Please specify another name and try again') + if error.response["Error"]["Code"] == "ResourceNotFoundException": + print( + f"An EKS Cluster with the name {clusterName} does not exist. Please specify another name and try again" + ) sys.exit(2) else: pass - print(f'Downloading latest Kube-bench EKS config YAML') + print(f"Downloading latest Kube-bench EKS config YAML") - url = 'https://raw.githubusercontent.com/aquasecurity/kube-bench/main/job-eks.yaml' - wgetCommand = f'wget {url}' - subProc = subprocess.run(wgetCommand, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - print(subProc.stderr.decode('utf-8')) + url = "https://raw.githubusercontent.com/aquasecurity/kube-bench/main/job-eks.yaml" + wgetCommand = f"wget {url}" + subProc = subprocess.run(wgetCommand, shell=True, capture_output=True) + print(subProc.stderr.decode("utf-8")) - print(f'Installing Trivy from source script for v0.24') + print(f"Installing Trivy from source script for v0.24") # TODO: Continual updates of Trivy version https://aquasecurity.github.io/trivy - trivyCmd = 'curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sudo sh -s -- -b /usr/local/bin v0.24.0' - trivyProc = subprocess.run(trivyCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - print(trivyProc.stdout.decode('utf-8')) + trivyCmd = "curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sudo sh -s -- -b /usr/local/bin v0.24.0" + trivyProc = subprocess.run(trivyCmd, shell=True, capture_output=True) + print(trivyProc.stdout.decode("utf-8")) - SecurityAssessment.start_assessment( - cluster_name=clusterName - ) + SecurityAssessment.start_assessment(cluster_name=clusterName) stay_dangerous() + def setup_falco_preflight_check(): - ''' + """ This function conducts a "preflight check" to ensure that required arguments are provided for the specified "Mode" before attempting to execute them. - ''' + """ print_logo() - eks = boto3.client('eks') + eks = boto3.client("eks") # Check if an EKS Cluster exists for provided name try: - eks.describe_cluster( - name=clusterName - ) + eks.describe_cluster(name=clusterName) except botocore.exceptions.ClientError as error: # If we have an "ResourceNotFoundException" error it means the cluster doesnt exist - which is what we want - if error.response['Error']['Code'] == 'ResourceNotFoundException': - print(f'An EKS Cluster with the name {clusterName} does not exist. Please specify another name and try again') + if error.response["Error"]["Code"] == "ResourceNotFoundException": + print( + f"An EKS Cluster with the name {clusterName} does not exist. Please specify another name and try again" + ) sys.exit(2) else: pass - - if mode == 'SetupFalco': - if falcoDestType == 'Slack' or falcoDestType == 'Teams': + + if mode == "SetupFalco": + if falcoDestType == "Slack" or falcoDestType == "Teams": if falcoDest == None: - print(f'No destination was provided for "--falco_sidekick_destination_type", please try again.') + print( + f'No destination was provided for "--falco_sidekick_destination_type", please try again.' + ) sys.exit(2) - elif falcoDestType == 'Datadog': + elif falcoDestType == "Datadog": if datadogApiKey == None: - print(f'Datadog destination for Falco was specified but a Datadog API was not provided. Please provide a valid API key and try again.') - sys.exit(2) + print( + f"Datadog destination for Falco was specified but a Datadog API was not provided. Please provide a valid API key and try again." + ) + sys.exit(2) FalcoSetup.falco_initialization( cluster_name=clusterName, - falco_mode='Create', + falco_mode="Create", falco_sidekick_destination_type=falcoDestType, - falco_sidekick_destination=falcoDest + falco_sidekick_destination=falcoDest, ) stay_dangerous() - elif mode == 'RemoveFalco': + elif mode == "RemoveFalco": FalcoSetup.falco_initialization( cluster_name=clusterName, - falco_mode='Delete', + falco_mode="Delete", falco_sidekick_destination_type=falcoDestType, falco_sidekick_destination=falcoDest, - datadog_api_key=datadogApiKey + datadog_api_key=datadogApiKey, ) stay_dangerous() else: - print(f'Somehow, an incompatible mode detected for Falco, please try again.') + print(f"Somehow, an incompatible mode detected for Falco, please try again.") sys.exit(2) + def setup_datadog_preflight_check(): - ''' + """ This function conducts a "preflight check" to ensure that required arguments are provided for the specified "Mode" before attempting to execute them. - ''' + """ print_logo() - eks = boto3.client('eks') + eks = boto3.client("eks") # Check if an EKS Cluster exists for provided name try: - eks.describe_cluster( - name=clusterName - ) + eks.describe_cluster(name=clusterName) except botocore.exceptions.ClientError as error: # If we have an "ResourceNotFoundException" error it means the cluster doesnt exist - which is what we want - if error.response['Error']['Code'] == 'ResourceNotFoundException': - print(f'An EKS Cluster with the name {clusterName} does not exist. Please specify another name and try again') + if error.response["Error"]["Code"] == "ResourceNotFoundException": + print( + f"An EKS Cluster with the name {clusterName} does not exist. Please specify another name and try again" + ) sys.exit(2) else: pass - if mode == 'SetupDatadog': + if mode == "SetupDatadog": if datadogApiKey == None: - print(f'Datadog setup was specified but a Datadog API was not provided. Please provide a valid API key and try again.') + print( + f"Datadog setup was specified but a Datadog API was not provided. Please provide a valid API key and try again." + ) sys.exit(2) # Datadoggy time! DatadogSetup.initialization( - cluster_name=clusterName, - datadog_mode='Setup', - datadog_api_key=datadogApiKey + cluster_name=clusterName, datadog_mode="Setup", datadog_api_key=datadogApiKey ) - elif mode == 'RemoveDatadog': + elif mode == "RemoveDatadog": # Bye Datadoggy time! DatadogSetup.initialization( - cluster_name=clusterName, - datadog_mode='Remove', - datadog_api_key=datadogApiKey + cluster_name=clusterName, datadog_mode="Remove", datadog_api_key=datadogApiKey ) else: - print(f'Somehow, an incompatible mode detected for Datadog, please try again.') + print(f"Somehow, an incompatible mode detected for Datadog, please try again.") sys.exit(2) stay_dangerous() + if __name__ == "__main__": - # Feed all of the arguments - ''' + # Feed all of the arguments + """ >> argparse argument | **kwargs << --profile | profile --mode | mode @@ -372,193 +386,202 @@ def setup_datadog_preflight_check(): --datadog | datadog_bool --datadog_api_key | datadog_api_key --addtl_auth_principals | addtl_auth_principals - ''' + """ parser = argparse.ArgumentParser() # --profile parser.add_argument( - '--profile', - help='Specify Profile name if multiple profiles are used', + "--profile", + help="Specify Profile name if multiple profiles are used", required=False, - default=[] + default=[], ) # --mode parser.add_argument( - '--mode', - help='Create, Destory or Update an existing Cluster. Updates limited to K8s Version bump. Destroy attempts to delete everything that this utility creates. Assessment will attempt to run various K8s security tools. SetupFalco will attempt to install Falco on existing Clusters. RemoveFalco will attempt to rollback SetupFalco deployments. SetupDatadog will attempt to install DataDog on existing Cluster. RemoveDatadog will attempt to rollback SetupDatadog deployments - defaults to Create', + "--mode", + help="Create, Destory or Update an existing Cluster. Updates limited to K8s Version bump. Destroy attempts to delete everything that this utility creates. Assessment will attempt to run various K8s security tools. SetupFalco will attempt to install Falco on existing Clusters. RemoveFalco will attempt to rollback SetupFalco deployments. SetupDatadog will attempt to install DataDog on existing Cluster. RemoveDatadog will attempt to rollback SetupDatadog deployments - defaults to Create", required=False, - choices=['Create', 'Destroy', 'Update', 'Assessment', 'SetupFalco', 'RemoveFalco', 'SetupDatadog', 'RemoveDatadog'], - default='Create' + choices=[ + "Create", + "Destroy", + "Update", + "Assessment", + "SetupFalco", + "RemoveFalco", + "SetupDatadog", + "RemoveDatadog", + ], + default="Create", ) # --k8s_version parser.add_argument( - '--k8s_version', - help='Version of K8s to use for EKS - defaults to 1.21 as of 13 JAN 2022 - used for Create and Update', + "--k8s_version", + help="Version of K8s to use for EKS - defaults to 1.21 as of 13 JAN 2022 - used for Create and Update", required=False, - default='1.21' + default="1.21", ) # --s3_bucket_name parser.add_argument( - '--s3_bucket_name', - help='S3 Bucket with required artifacts for EKS to access for bootstrapping if --mde_on_nodes=True - used for Create', + "--s3_bucket_name", + help="S3 Bucket with required artifacts for EKS to access for bootstrapping if --mde_on_nodes=True - used for Create", required=False, - default=None + default=None, ) # --ebs_volume_size parser.add_argument( - '--ebs_volume_size', - help='EBS volume size (in GB) for EKS nodegroup EC2 launch template - used for Create', + "--ebs_volume_size", + help="EBS volume size (in GB) for EKS nodegroup EC2 launch template - used for Create", required=False, - default='20' + default="20", ) # --ami parser.add_argument( - '--ami_id', + "--ami_id", help='Custom AMI ID for EKS nodegroup EC2 launch template. Defaults to "SSM" which tells the program to use an SSM-derived image for your K8s version matching --ami_os and --ami_architecture - used for Create', required=False, - default='SSM' + default="SSM", ) # --instance_type parser.add_argument( - '--instance_type', - help='EC2 Instance type for EKS nodegroup EC2 launch template', + "--instance_type", + help="EC2 Instance type for EKS nodegroup EC2 launch template", required=False, - default='t3.medium' + default="t3.medium", ) # --cluster_name parser.add_argument( - '--cluster_name', - help='Name for your EKS Cluster - used for Create, Delete and Update', + "--cluster_name", + help="Name for your EKS Cluster - used for Create, Delete and Update", required=False, - default='LightspinECECluster' + default="LightspinECECluster", ) # --cluster_role_name parser.add_argument( - '--cluster_role_name', - help='Name for your EKS Cluster Service IAM Role', + "--cluster_role_name", + help="Name for your EKS Cluster Service IAM Role", required=False, - default='ClusterServiceRoleForEKS' + default="ClusterServiceRoleForEKS", ) # --nodegroup_name parser.add_argument( - '--nodegroup_name', - help='Name for your EKS Nodegroup - used for Create, Delete and Update', + "--nodegroup_name", + help="Name for your EKS Nodegroup - used for Create, Delete and Update", required=False, - default='LightspinECENodegroup' + default="LightspinECENodegroup", ) # --nodegroup_role_name parser.add_argument( - '--nodegroup_role_name', - help='Name for your EKS Nodegroup Service IAM Role (also given to policy)', + "--nodegroup_role_name", + help="Name for your EKS Nodegroup Service IAM Role (also given to policy)", required=False, - default='NodegroupServiceRoleForEKS' + default="NodegroupServiceRoleForEKS", ) # --launch_template_name parser.add_argument( - '--launch_template_name', - help='Name for your Nodegroup EC2 launch template - used for Create and Delete', + "--launch_template_name", + help="Name for your Nodegroup EC2 launch template - used for Create and Delete", required=False, - default='LightspinECECustomEKSAMI' + default="LightspinECECustomEKSAMI", ) # --vpcid parser.add_argument( - '--vpcid', - help='VPC ID to launch EKS Cluster and Nodegroups into', + "--vpcid", + help="VPC ID to launch EKS Cluster and Nodegroups into", required=False, - default=None + default=None, ) # --subnets # for help https://www.kite.com/python/answers/how-to-pass-a-list-as-an-argument-using-argparse-in-python parser.add_argument( - '--subnets', - nargs='+', - help='Subnets to launch EKS Cluster and Nodegroups into - provide subnet IDs separated by spaces only', - required=False + "--subnets", + nargs="+", + help="Subnets to launch EKS Cluster and Nodegroups into - provide subnet IDs separated by spaces only", + required=False, ) # --node_count parser.add_argument( - '--node_count', - help='Amount of Nodes (EC2 instances) in EKS Nodegroup, will be used for min and desired values with 2 times for max - default 2', + "--node_count", + help="Amount of Nodes (EC2 instances) in EKS Nodegroup, will be used for min and desired values with 2 times for max - default 2", required=False, - default='2' + default="2", ) # --mde_on_nodes parser.add_argument( - '--mde_on_nodes', - help='Whether or not to install MDE on EKS Nodes via bootstrap - requires S3 Bucket and install scripts if true - defaults to False', + "--mde_on_nodes", + help="Whether or not to install MDE on EKS Nodes via bootstrap - requires S3 Bucket and install scripts if true - defaults to False", required=False, - choices=['True', 'False'], - default='False' + choices=["True", "False"], + default="False", ) # --additional_ports # for help https://www.kite.com/python/answers/how-to-pass-a-list-as-an-argument-using-argparse-in-python parser.add_argument( - '--additional_ports', - nargs='+', - help='Additional application ports which need to be allowed in EKS Security Groups - 443, 53, 8765, 2801, and 10250 already included', - required=False + "--additional_ports", + nargs="+", + help="Additional application ports which need to be allowed in EKS Security Groups - 443, 53, 8765, 2801, and 10250 already included", + required=False, ) # --falco parser.add_argument( - '--falco', - help='For CREATE Mode, this flag specifies if you want to install and configure Falco on your Clusters - defaults to False', + "--falco", + help="For CREATE Mode, this flag specifies if you want to install and configure Falco on your Clusters - defaults to False", required=False, - choices=['True', 'False'], - default='False' + choices=["True", "False"], + default="False", ) # --falco_sidekick_destination_type parser.add_argument( - '--falco_sidekick_destination_type', - help='The output location for Falco Sidekick to send Falco alerts to. Defaults to SNS which also creates a new Topic unless a Destination is provided', + "--falco_sidekick_destination_type", + help="The output location for Falco Sidekick to send Falco alerts to. Defaults to SNS which also creates a new Topic unless a Destination is provided", required=False, - choices=['SNS', 'Slack', 'Teams', 'Datadog'], - default='SNS' + choices=["SNS", "Slack", "Teams", "Datadog"], + default="SNS", ) # --falco_sidekick_destination parser.add_argument( - '--falco_sidekick_destination', - help='The logical location matching the Sidekick Destination Type to forward Falco alerts. E.g., ARN, Webhook URL, Datadog URL, etc.', + "--falco_sidekick_destination", + help="The logical location matching the Sidekick Destination Type to forward Falco alerts. E.g., ARN, Webhook URL, Datadog URL, etc.", required=False, - default=None + default=None, ) # --ami_os parser.add_argument( - '--ami_os', + "--ami_os", help='If using "SSM" for --ami use this argument to specify what OS you want to use (alas = Amazon Linux 2, ubuntu = Ubuntu 20.04) - defaults to ubuntu', required=False, - choices=['alas', 'ubuntu'], - default='ubuntu' + choices=["alas", "ubuntu"], + default="ubuntu", ) # --ami_architecture parser.add_argument( - '--ami_architecture', + "--ami_architecture", help='If using "SSM" for --ami use this argument to specify what architecture you want to use - defaults to amd64', required=False, - choices=['amd64', 'arm64'], - default='amd64' + choices=["amd64", "arm64"], + default="amd64", ) # --datadog parser.add_argument( - '--datadog', - help='For CREATE Mode, this flag specifies if you want to install and configure Datadog APM on your Clusters - defaults to False', + "--datadog", + help="For CREATE Mode, this flag specifies if you want to install and configure Datadog APM on your Clusters - defaults to False", required=False, - choices=['True', 'False'], - default='False' + choices=["True", "False"], + default="False", ) # --datadog_api_key parser.add_argument( - '--datadog_api_key', - help='Datadog API Key. This is used for setting up Datadog with Create and SetupDatadog Modes as well as Datadog integration for FalcoSidekick', + "--datadog_api_key", + help="Datadog API Key. This is used for setting up Datadog with Create and SetupDatadog Modes as well as Datadog integration for FalcoSidekick", required=False, - default=None + default=None, ) # addtl_auth_principals # for help https://www.kite.com/python/answers/how-to-pass-a-list-as-an-argument-using-argparse-in-python parser.add_argument( - '--addtl_auth_principals', - nargs='+', - help='Additional IAM Role ARNs to authorized as system:masters', - required=False + "--addtl_auth_principals", + nargs="+", + help="Additional IAM Role ARNs to authorized as system:masters", + required=False, ) args = parser.parse_args() @@ -593,22 +616,22 @@ def setup_datadog_preflight_check(): # This calls the creation function to create all needed IAM policies, roles and EC2/EKS infrastructure # will check if some infrastructure exists first to avoid needless exit later - if mode == 'Create': + if mode == "Create": create_preflight_check() - elif mode == 'Destroy': + elif mode == "Destroy": delete_preflight_check() - elif mode == 'Update': + elif mode == "Update": update_preflight_check() - elif mode == 'Assessment': + elif mode == "Assessment": assessment_preflight_check() - elif mode == 'SetupFalco': + elif mode == "SetupFalco": setup_falco_preflight_check() - elif mode == 'RemoveFalco': + elif mode == "RemoveFalco": setup_falco_preflight_check() - elif mode == 'SetupDatadog': + elif mode == "SetupDatadog": setup_datadog_preflight_check() - elif mode == 'RemoveDatadog': + elif mode == "RemoveDatadog": setup_datadog_preflight_check() else: - print(f'Somehow you provided an unexpected arguement, exiting!') - sys.exit(2) \ No newline at end of file + print(f"Somehow you provided an unexpected arguement, exiting!") + sys.exit(2) diff --git a/plugins/ECEDatadog.py b/plugins/ECEDatadog.py index 8caba57..1f0cdd4 100644 --- a/plugins/ECEDatadog.py +++ b/plugins/ECEDatadog.py @@ -1,68 +1,71 @@ -#This file is part of Lightspin EKS Creation Engine. -#SPDX-License-Identifier: Apache-2.0 - -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the +# This file is part of Lightspin EKS Creation Engine. +# SPDX-License-Identifier: Apache-2.0 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the #'License'); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -#http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, -#software distributed under the License is distributed on an +# with the License. You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an #'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -#KIND, either express or implied. See the License for the -#specific language governing permissions and limitations -#under the License. - +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. import subprocess -''' +""" This Class manages deployment of Datadog onto an EKS Cluster and rollbacks / manual deletions -''' -class DatadogSetup(): +""" + +class DatadogSetup: def initialization(cluster_name, datadog_mode, datadog_api_key): - ''' + """ This function controls initialization of the DatadogSetup Class. It will control installs, deletions, and rollbacks - ''' + """ - if datadog_mode == 'Setup': - print(f'Setting up Datadog on EKS Cluster {cluster_name}') + if datadog_mode == "Setup": + print(f"Setting up Datadog on EKS Cluster {cluster_name}") DatadogSetup.install_datadog(datadog_api_key) else: - print(f'Rolling back Datadog from EKS Cluster {cluster_name}') + print(f"Rolling back Datadog from EKS Cluster {cluster_name}") DatadogSetup.uninstall_datadog() def install_datadog(datadog_api_key): - ''' + """ This function adds and updates existing Datadog Charts and applies the Chart to your EKS Cluster - ''' + """ # Use subprocess to add Datadog Charts using Helm - print(f'Adding Datadog Helm Charts') - datadogHelmChartAddCmd = 'helm repo add datadog https://helm.datadoghq.com && helm repo update' - datadogHelmChartAddSubprocess = subprocess.run(datadogHelmChartAddCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - datadogHelmChartAddMsg = str(datadogHelmChartAddSubprocess.stdout.decode('utf-8')) + print(f"Adding Datadog Helm Charts") + datadogHelmChartAddCmd = ( + "helm repo add datadog https://helm.datadoghq.com && helm repo update" + ) + datadogHelmChartAddSubprocess = subprocess.run( + datadogHelmChartAddCmd, shell=True, capture_output=True + ) + datadogHelmChartAddMsg = str(datadogHelmChartAddSubprocess.stdout.decode("utf-8")) print(datadogHelmChartAddMsg) # Use subprocess to configure Datadog per initiation arguments from main.py - print(f'Installing Datadog') - installDatadogCmd = f'helm install datadog-agent --set targetSystem=linux --set datadog.apiKey={datadog_api_key} datadog/datadog' - installDatadogSubprocess = subprocess.run(installDatadogCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - installDatadogMsg = str(installDatadogSubprocess.stdout.decode('utf-8')) + print(f"Installing Datadog") + installDatadogCmd = f"helm install datadog-agent --set targetSystem=linux --set datadog.apiKey={datadog_api_key} datadog/datadog" + installDatadogSubprocess = subprocess.run( + installDatadogCmd, shell=True, capture_output=True + ) + installDatadogMsg = str(installDatadogSubprocess.stdout.decode("utf-8")) print(installDatadogMsg) def uninstall_datadog(): - ''' + """ This function uninstalls Datadog from your EKS Cluster - ''' + """ # Uninstall Datadog from EKS - datadogRemoveCmd = 'helm uninstall datadog-agent' - datadogRemoveSubprocess = subprocess.run(datadogRemoveCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - datadogRemoveMsg = str(datadogRemoveSubprocess.stdout.decode('utf-8')) - print(datadogRemoveMsg) \ No newline at end of file + datadogRemoveCmd = "helm uninstall datadog-agent" + datadogRemoveSubprocess = subprocess.run(datadogRemoveCmd, shell=True, capture_output=True) + datadogRemoveMsg = str(datadogRemoveSubprocess.stdout.decode("utf-8")) + print(datadogRemoveMsg) diff --git a/plugins/ECEFalco.py b/plugins/ECEFalco.py index c148cda..07870ae 100644 --- a/plugins/ECEFalco.py +++ b/plugins/ECEFalco.py @@ -1,195 +1,176 @@ -#This file is part of Lightspin EKS Creation Engine. -#SPDX-License-Identifier: Apache-2.0 - -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the +# This file is part of Lightspin EKS Creation Engine. +# SPDX-License-Identifier: Apache-2.0 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the #'License'); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -#http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, -#software distributed under the License is distributed on an +# with the License. You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an #'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -#KIND, either express or implied. See the License for the -#specific language governing permissions and limitations -#under the License. - +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import json +import subprocess import sys +from datetime import datetime + import boto3 import botocore.exceptions -import json -from datetime import datetime -import subprocess -''' +""" This Class manages an end-to-end deployment of Falco and FalcoSidekick to EKS using Helm. This class can be called from ClusterManager (if flag is set) or called independently to setup Falco -''' -class FalcoSetup(): - - def falco_initialization(cluster_name, falco_mode, falco_sidekick_destination_type, falco_sidekick_destination, datadog_api_key): - ''' +""" + + +class FalcoSetup: + def falco_initialization( + cluster_name, + falco_mode, + falco_sidekick_destination_type, + falco_sidekick_destination, + datadog_api_key, + ): + """ This function handles configuration of Falco and FalcoSidekick on a Cluster, whether in-line of an ECE Create or ECE SetupFalco `--mode` from main.py Depending on the destination configuration and mode, this function will either schedule deletion or creation of additional infrastructure and issue Helm commands to your cluster - ''' - print(f'Setting up Falco on {cluster_name}') + """ + print(f"Setting up Falco on {cluster_name}") - if falco_mode == 'Create': - print(f'Setting up Falco on running Nodes for {cluster_name}') - if falco_sidekick_destination_type == 'SNS': + if falco_mode == "Create": + print(f"Setting up Falco on running Nodes for {cluster_name}") + if falco_sidekick_destination_type == "SNS": # Create EKS Client - eks = boto3.client('eks') + eks = boto3.client("eks") # Create an empty list of Role ARNs to append Cluster & Node Roles to send to different functions roleArns = [] # First, since we may have to work with existing Clusters that ECE did not setup, find all Nodegroups for the cluster and retrieve their Role ARNs # and add them to the static list above if they are not already there - for nodegroup in eks.list_nodegroups(clusterName=cluster_name)['nodegroups']: + for nodegroup in eks.list_nodegroups(clusterName=cluster_name)["nodegroups"]: nodeRoleArn = eks.describe_nodegroup( - clusterName=cluster_name, - nodegroupName=nodegroup - )['nodegroup']['nodeRole'] + clusterName=cluster_name, nodegroupName=nodegroup + )["nodegroup"]["nodeRole"] if nodeRoleArn not in roleArns: roleArns.append(nodeRoleArn) # Retrieve Cluster Role Arn - clusterRoleArn = eks.describe_cluster(name=cluster_name)['cluster']['roleArn'] + clusterRoleArn = eks.describe_cluster(name=cluster_name)["cluster"]["roleArn"] if clusterRoleArn not in roleArns: roleArns.append(clusterRoleArn) topicArn = FalcoSetup.falco_sidekick_sns_iam_generator( cluster_name=cluster_name, falco_sidekick_destination=falco_sidekick_destination, - role_arns=roleArns + role_arns=roleArns, ) # Install Falco # All commands for FalcoSidekick come from the Helm vars spec in the chart GitHub repo: https://github.com/falcosecurity/charts/tree/master/falcosidekick#configuration - falcoHelmCmd = f'helm install falco falcosecurity/falco --set falcosidekick.enabled=true --set falcosidekick.webui.enabled=false --set falcosidekick.config.aws.sns.topicarn={topicArn}' - FalcoSetup.install_falco( - falco_install_command=falcoHelmCmd + falcoHelmCmd = f"helm install falco falcosecurity/falco --set falcosidekick.enabled=true --set falcosidekick.webui.enabled=false --set falcosidekick.config.aws.sns.topicarn={topicArn}" + FalcoSetup.install_falco(falco_install_command=falcoHelmCmd) + elif falco_sidekick_destination_type == "Slack": + print( + f"Configuring Falco and FalcoSidekick to send runtime alerts to Slack Webhook {falco_sidekick_destination}" ) - elif falco_sidekick_destination_type == 'Slack': - print(f'Configuring Falco and FalcoSidekick to send runtime alerts to Slack Webhook {falco_sidekick_destination}') - + # Install Falco - falcoHelmCmd = f'helm install falco falcosecurity/falco --set falcosidekick.enabled=true --set falcosidekick.webui.enabled=false --set falcosidekick.config.slack.webhookurl={falco_sidekick_destination}' - FalcoSetup.install_falco( - falco_install_command=falcoHelmCmd + falcoHelmCmd = f"helm install falco falcosecurity/falco --set falcosidekick.enabled=true --set falcosidekick.webui.enabled=false --set falcosidekick.config.slack.webhookurl={falco_sidekick_destination}" + FalcoSetup.install_falco(falco_install_command=falcoHelmCmd) + elif falco_sidekick_destination_type == "Teams": + print( + f"Configuring Falco and FalcoSidekick to send runtime alerts to Teams Webhook {falco_sidekick_destination}" ) - elif falco_sidekick_destination_type == 'Teams': - print(f'Configuring Falco and FalcoSidekick to send runtime alerts to Teams Webhook {falco_sidekick_destination}') # Install Falco - falcoHelmCmd = f'helm install falco falcosecurity/falco --set falcosidekick.enabled=true --set falcosidekick.webui.enabled=false --set falcosidekick.config.teams.webhookurl={falco_sidekick_destination}' - FalcoSetup.install_falco( - falco_install_command=falcoHelmCmd + falcoHelmCmd = f"helm install falco falcosecurity/falco --set falcosidekick.enabled=true --set falcosidekick.webui.enabled=false --set falcosidekick.config.teams.webhookurl={falco_sidekick_destination}" + FalcoSetup.install_falco(falco_install_command=falcoHelmCmd) + elif falco_sidekick_destination_type == "Datadog": + print( + f"Configuring Falco and FalcoSidekick to send runtime alerts to Datadog Host {falco_sidekick_destination}" ) - elif falco_sidekick_destination_type == 'Datadog': - print(f'Configuring Falco and FalcoSidekick to send runtime alerts to Datadog Host {falco_sidekick_destination}') # Install Falco - falcoHelmCmd = f'helm install falco falcosecurity/falco --set falcosidekick.enabled=true --set falcosidekick.webui.enabled=false --set falcosidekick.config.datadog.host={falco_sidekick_destination} --set falcosidekick.config.datadog.apikey={datadog_api_key}' - FalcoSetup.install_falco( - falco_install_command=falcoHelmCmd - ) + falcoHelmCmd = f"helm install falco falcosecurity/falco --set falcosidekick.enabled=true --set falcosidekick.webui.enabled=false --set falcosidekick.config.datadog.host={falco_sidekick_destination} --set falcosidekick.config.datadog.apikey={datadog_api_key}" + FalcoSetup.install_falco(falco_install_command=falcoHelmCmd) else: - print(f'Unsupported destination type provided, exiting') + print(f"Unsupported destination type provided, exiting") sys.exit(2) else: - print(f'Rolling back Falco on running Nodes for {cluster_name}') + print(f"Rolling back Falco on running Nodes for {cluster_name}") FalcoSetup.falco_setup_rollback(cluster_name=cluster_name) def falco_sidekick_sns_iam_generator(cluster_name, falco_sidekick_destination, role_arns): - ''' + """ This function will create IAM Policies to attach to the Roles of EKS Clusters and Nodegroups being boostrapped for Falco if they are configured to send messages to SNS. It will invoke `falco_sidekick_sns_creation` to receive the Topic ARN for the SNS Topic (if one is not provided) - ''' - print(f'Configuring {cluster_name} Cluster and Nodegroup IAM Roles to communicate with SNS') + """ + print(f"Configuring {cluster_name} Cluster and Nodegroup IAM Roles to communicate with SNS") - sts = boto3.client('sts') - iam = boto3.client('iam') + sts = boto3.client("sts") + iam = boto3.client("iam") # If the value for 'falco_sidekick_destination' is None, that means a SNS topic was not provided and needs to be setup if falco_sidekick_destination == None: topicArn = FalcoSetup.falco_sidekick_sns_creation( - cluster_name=cluster_name, - role_arns=role_arns + cluster_name=cluster_name, role_arns=role_arns ) else: topicArn = falco_sidekick_destination # Use STS GetCallerIdentity and Datetime to generate CreatedBy and CreatedAt information for tagging - createdBy = str(sts.get_caller_identity()['Arn']) + createdBy = str(sts.get_caller_identity()["Arn"]) createdAt = str(datetime.utcnow()) # AWS Account ID - acctId = str(sts.get_caller_identity()['Account']) + acctId = str(sts.get_caller_identity()["Account"]) # Bring in the list of Role ARNs to append Cluster & Node Roles into for attaching policies to roleArns = role_arns # Create the IAM Policy for SNS iamPolicyDoc = { - 'Version': '2012-10-17', - 'Statement': [ + "Version": "2012-10-17", + "Statement": [ { - 'Sid': 'Snssid', - 'Effect': 'Allow', - 'Action': [ - 'sns:Publish', - 'sns:GetTopicAttributes', - 'sns:ListTopics' - ], - 'Resource': [topicArn] + "Sid": "Snssid", + "Effect": "Allow", + "Action": ["sns:Publish", "sns:GetTopicAttributes", "sns:ListTopics"], + "Resource": [topicArn], } - ] + ], } - policyName = f'{cluster_name}FalcoSidekick-SNSPublishPolicy' + policyName = f"{cluster_name}FalcoSidekick-SNSPublishPolicy" try: iam.create_policy( PolicyName=policyName, - Path='/', + Path="/", PolicyDocument=json.dumps(iamPolicyDoc), - Description=f'Allows EKS Cluster {cluster_name} and Nodegroups to send Falco alerts to SNS - Created by Lightspin ECE', + Description=f"Allows EKS Cluster {cluster_name} and Nodegroups to send Falco alerts to SNS - Created by Lightspin ECE", Tags=[ - { - 'Key': 'Name', - 'Value': policyName - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {"Key": "Name", "Value": policyName}, + {"Key": "CreatedBy", "Value": createdBy}, + {"Key": "CreatedAt", "Value": createdAt}, + {"Key": "CreatedWith", "Value": "Lightspin ECE"}, + ], ) - policyArn = f'arn:aws:iam::{acctId}:policy/{policyName}' + policyArn = f"arn:aws:iam::{acctId}:policy/{policyName}" except botocore.exceptions.ClientError as error: - print(f'Error encountered: {error}') + print(f"Error encountered: {error}") FalcoSetup.falco_setup_rollback(cluster_name=cluster_name) for role in roleArns: - roleName = role.split('/')[1] + roleName = role.split("/")[1] try: - iam.attach_role_policy( - RoleName=roleName, - PolicyArn=policyArn - ) + iam.attach_role_policy(RoleName=roleName, PolicyArn=policyArn) except botocore.exceptions.ClientError as error: - print(f'Error encountered: {error}') + print(f"Error encountered: {error}") FalcoSetup.falco_setup_rollback(cluster_name=cluster_name) del sts @@ -203,97 +184,75 @@ def falco_sidekick_sns_iam_generator(cluster_name, falco_sidekick_destination, r return topicArn def falco_sidekick_sns_creation(cluster_name, role_arns): - ''' + """ This function creates an SNS Topic and Topic Policy for use as a FalcoSidekick destination if a valid ARN is not provided for the 'SNS' destination type and returns the Topic Arn to the `falco_sidekick_sns_iam_generator()` function which this function is called from. Otherwise, this function does nothing if not called from `falco_initialization` - ''' - print(f'Creating SNS Topic to send Falco alerts to for {cluster_name}') + """ + print(f"Creating SNS Topic to send Falco alerts to for {cluster_name}") - sns = boto3.client('sns') - sts = boto3.client('sts') + sns = boto3.client("sns") + sts = boto3.client("sts") # Use STS GetCallerIdentity and Datetime to generate CreatedBy and CreatedAt information for tagging - createdBy = str(sts.get_caller_identity()['Arn']) + createdBy = str(sts.get_caller_identity()["Arn"]) createdAt = str(datetime.utcnow()) # AWS Account ID - acctId = str(sts.get_caller_identity()['Account']) + acctId = str(sts.get_caller_identity()["Account"]) # Bring in the list of Role ARNs to append Cluster & Node Roles into for adding Principal permissions to the SNS Topic Policy roleArns = role_arns # Create an SNS Topic # NOTE: In the future, need to add KMS along with EncryptionCreationEngine https://docs.aws.amazon.com/sns/latest/dg/sns-key-management.html - topicName = f'{cluster_name}-FalcoSidekickTopic' + topicName = f"{cluster_name}-FalcoSidekickTopic" try: topicArn = sns.create_topic( Name=topicName, - Attributes={ - 'DisplayName': topicName - }, + Attributes={"DisplayName": topicName}, Tags=[ - { - 'Key': 'Name', - 'Value': topicName - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] - )['TopicArn'] + {"Key": "Name", "Value": topicName}, + {"Key": "CreatedBy", "Value": createdBy}, + {"Key": "CreatedAt", "Value": createdAt}, + {"Key": "CreatedWith", "Value": "Lightspin ECE"}, + ], + )["TopicArn"] except botocore.exceptions.ClientError as error: - print(f'Error encountered: {error}') + print(f"Error encountered: {error}") FalcoSetup.falco_setup_rollback(cluster_name=cluster_name) # Create a SNS Topic Policy Doc to pass in as an SNS Attribute topicPolicyJson = { - 'Version':'2008-10-17', - 'Id':'ecepolicy', - 'Statement':[ + "Version": "2008-10-17", + "Id": "ecepolicy", + "Statement": [ { - 'Sid':'ecesid-pub', - 'Effect':'Allow', - 'Principal':{ - 'AWS': roleArns - }, - 'Action':['SNS:Publish'], - 'Resource': topicArn + "Sid": "ecesid-pub", + "Effect": "Allow", + "Principal": {"AWS": roleArns}, + "Action": ["SNS:Publish"], + "Resource": topicArn, }, { - 'Sid':'ecesid-sub', - 'Effect':'Allow', - 'Principal':{ - 'AWS':'*' - }, - 'Action':['SNS:Subscribe'], - 'Resource': topicArn, - 'Condition':{ - 'StringEquals':{ - 'AWS:SourceOwner': acctId - } - } - } - ] + "Sid": "ecesid-sub", + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": ["SNS:Subscribe"], + "Resource": topicArn, + "Condition": {"StringEquals": {"AWS:SourceOwner": acctId}}, + }, + ], } try: sns.set_topic_attributes( TopicArn=topicArn, - AttributeName='Policy', - AttributeValue=json.dumps(topicPolicyJson) + AttributeName="Policy", + AttributeValue=json.dumps(topicPolicyJson), ) except botocore.exceptions.ClientError as error: - print(f'Error encountered: {error}') + print(f"Error encountered: {error}") FalcoSetup.falco_setup_rollback(cluster_name=cluster_name) del topicName @@ -306,60 +265,59 @@ def falco_sidekick_sns_creation(cluster_name, role_arns): return topicArn def install_falco(falco_install_command): - ''' + """ This function receives the final installation command from `falco_initialization` and will attempt to add the latest Falco Security Helm Charts and install Falco onto a new namespace on your EKS Cluster - ''' - print(f'Installing Falco and FalcoSidekick') + """ + print(f"Installing Falco and FalcoSidekick") # Use subprocess to add Falco Charts using Helm - print(f'Adding Falco Helm Charts') - falcoHelmChartAddCmd = 'helm repo add falcosecurity https://falcosecurity.github.io/charts && helm repo update' - falcoHelmChartAddSubprocess = subprocess.run(falcoHelmChartAddCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - falcoHelmChartAddMsg = str(falcoHelmChartAddSubprocess.stdout.decode('utf-8')) + print(f"Adding Falco Helm Charts") + falcoHelmChartAddCmd = ( + "helm repo add falcosecurity https://falcosecurity.github.io/charts && helm repo update" + ) + falcoHelmChartAddSubprocess = subprocess.run( + falcoHelmChartAddCmd, shell=True, capture_output=True + ) + falcoHelmChartAddMsg = str(falcoHelmChartAddSubprocess.stdout.decode("utf-8")) print(falcoHelmChartAddMsg) # Use subprocess to configure Falco and FalcoSidekick per initiation arguments from main.py - print(f'Installing Falco and FalcoSidekick') + print(f"Installing Falco and FalcoSidekick") installFalcoCmd = falco_install_command - installFalcoSubprocess = subprocess.run(installFalcoCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - installFalcoMsg = str(installFalcoSubprocess.stdout.decode('utf-8')) + installFalcoSubprocess = subprocess.run(installFalcoCmd, shell=True, capture_output=True) + installFalcoMsg = str(installFalcoSubprocess.stdout.decode("utf-8")) print(installFalcoMsg) # Rollback and manual deletes starts here def falco_setup_rollback(cluster_name): - ''' + """ This function handles rollback of resources just for Falco - ''' + """ - sts = boto3.client('sts') - acctId = str(sts.get_caller_identity()['Account']) - iam = boto3.client('iam') - sns = boto3.client('sns') + sts = boto3.client("sts") + acctId = str(sts.get_caller_identity()["Account"]) + iam = boto3.client("iam") + sns = boto3.client("sns") # Retrieve region for AWS CLI kubectl generation session = boto3.session.Session() awsRegion = session.region_name # Create & Stage Policy & SNS Names (only used if ECE created them) - policyName = f'{cluster_name}FalcoSidekick-SNSPublishPolicy' - policyArn = f'arn:aws:iam::{acctId}:policy/{policyName}' - topicName = f'{cluster_name}-FalcoSidekickTopic' - topicArn = f'arn:aws:sns:{awsRegion}:{acctId}:{topicName}' + policyName = f"{cluster_name}FalcoSidekick-SNSPublishPolicy" + policyArn = f"arn:aws:iam::{acctId}:policy/{policyName}" + topicName = f"{cluster_name}-FalcoSidekickTopic" + topicArn = f"arn:aws:sns:{awsRegion}:{acctId}:{topicName}" # If an IAM Policy for SNS was created, attempt to detach it before deletion try: rolesAttachedToPolicy = iam.list_entities_for_policy( - PolicyArn=policyArn, - EntityFilter='Role', - PolicyUsageFilter='PermissionsPolicy' - )['PolicyRoles'] + PolicyArn=policyArn, EntityFilter="Role", PolicyUsageFilter="PermissionsPolicy" + )["PolicyRoles"] if rolesAttachedToPolicy: for role in rolesAttachedToPolicy: - roleName = str(role['RoleName']) - iam.detach_role_policy( - RoleName=roleName, - PolicyArn=policyArn - ) + roleName = str(role["RoleName"]) + iam.detach_role_policy(RoleName=roleName, PolicyArn=policyArn) except botocore.exceptions.ClientError as error: print(error) except KeyError as ke: @@ -367,25 +325,23 @@ def falco_setup_rollback(cluster_name): # If an IAM Policy for SNS was created, attempt to delete it try: - iam.delete_policy( - PolicyArn=policyArn - ) - print(f'Falco SNS Policy {policyArn} deleted') + iam.delete_policy(PolicyArn=policyArn) + print(f"Falco SNS Policy {policyArn} deleted") except botocore.exceptions.ClientError as error: print(error) # If an SNS Topic was created, attempt to delete it try: sns.delete_topic(TopicArn=topicArn) - print(f'Falco SNS Topic {topicArn} deleted') + print(f"Falco SNS Topic {topicArn} deleted") except botocore.exceptions.ClientError as error: print(error) # Uninstall Falco from EKS - falcoRemoveCmd = 'helm uninstall falco' - falcoRemoveSubprocess = subprocess.run(falcoRemoveCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - falcoRemoveMsg = str(falcoRemoveSubprocess.stdout.decode('utf-8')) + falcoRemoveCmd = "helm uninstall falco" + falcoRemoveSubprocess = subprocess.run(falcoRemoveCmd, shell=True, capture_output=True) + falcoRemoveMsg = str(falcoRemoveSubprocess.stdout.decode("utf-8")) print(falcoRemoveMsg) - print(f'Falco rollback complete.') - sys.exit(2) \ No newline at end of file + print(f"Falco rollback complete.") + sys.exit(2) diff --git a/plugins/ECESecurity.py b/plugins/ECESecurity.py index 6958254..85279da 100644 --- a/plugins/ECESecurity.py +++ b/plugins/ECESecurity.py @@ -1,103 +1,105 @@ -#This file is part of Lightspin EKS Creation Engine. -#SPDX-License-Identifier: Apache-2.0 - -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the +# This file is part of Lightspin EKS Creation Engine. +# SPDX-License-Identifier: Apache-2.0 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the #'License'); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -#http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, -#software distributed under the License is distributed on an +# with the License. You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an #'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -#KIND, either express or implied. See the License for the -#specific language governing permissions and limitations -#under the License. - -import boto3 +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. import json -import time -import subprocess import re +import subprocess +import time -''' +import boto3 + +""" This Class manages various security assessment functions - such as running and saving Kube-bench CIS benchmarking and Trivy container scanning -''' -class SecurityAssessment(): +""" + +class SecurityAssessment: def start_assessment(cluster_name): - ''' + """ This function serves as the 'brain' of the security assessment. It will modify the Kubeconfig and attempt to run the other assessments it will also consolidate all findings in a SARIF JSON format for consumption in downstream tools - ''' - print(f'Starting security assessments for EKS Cluster {cluster_name}') + """ + print(f"Starting security assessments for EKS Cluster {cluster_name}") # Retrieve region for AWS CLI kubectl generation session = boto3.session.Session() awsRegion = session.region_name - updateKubeconfigCmd = f'aws eks update-kubeconfig --region {awsRegion} --name {cluster_name}' - updateKubeconfigProc = subprocess.run(updateKubeconfigCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - print(updateKubeconfigProc.stdout.decode('utf-8')) + updateKubeconfigCmd = ( + f"aws eks update-kubeconfig --region {awsRegion} --name {cluster_name}" + ) + updateKubeconfigProc = subprocess.run(updateKubeconfigCmd, shell=True, capture_output=True) + print(updateKubeconfigProc.stdout.decode("utf-8")) trivySarif = SecurityAssessment.run_trivy() kubebenchSarif = SecurityAssessment.run_kube_bench(cluster_name) - print(f'Security assessments completed, starting SARIF consolidation.') + print(f"Security assessments completed, starting SARIF consolidation.") sarifBase = { - '$schema': 'https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json', - 'version': '2.1.0', - 'runs': [] + "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json", + "version": "2.1.0", + "runs": [], } for runs in trivySarif: - sarifBase['runs'].append(runs) + sarifBase["runs"].append(runs) for runs in kubebenchSarif: - sarifBase['runs'].append(runs) + sarifBase["runs"].append(runs) - with open('./ECE_SecurityAssessment.sarif', 'w') as jsonfile: + with open("./ECE_SecurityAssessment.sarif", "w") as jsonfile: json.dump(sarifBase, jsonfile, indent=4, default=str) - print(f'Assessments completed and SARIF document created successfully as "ECE_SecurityAssessment.sarif".') + print( + f'Assessments completed and SARIF document created successfully as "ECE_SecurityAssessment.sarif".' + ) def run_trivy(): - ''' + """ This function will run Trivy container vuln scanning against all running Containers in your Cluster and generate a report - ''' + """ # Create empty lists to contain unique values for reporting uniqueContainers = [] trivyFindings = [] - print(f'Running Trivy') - + print(f"Running Trivy") + # Retrieve a list of all running Containers and create a unique list of them to pass to Trivy for scanning - print(f'Retrieving list of all running Containers from your EKS Cluster') + print(f"Retrieving list of all running Containers from your EKS Cluster") command = 'kubectl get pods --all-namespaces -o json | jq --raw-output ".items[].spec.containers[].image"' - sub = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + sub = subprocess.run(command, shell=True, capture_output=True) # pull list of container URIs from kubectl - strList = str(sub.stdout.decode('utf-8')) + strList = str(sub.stdout.decode("utf-8")) # split by newline, as that is how it is retruned - splitter = strList.split('\n') + splitter = strList.split("\n") # Read the newly created list (created by `.split()`) and write unique names to List, ignoring the stray whitespace for i in splitter: if i not in uniqueContainers: - if i == '': + if i == "": pass else: uniqueContainers.append(i) - + totalUniques = str(len(uniqueContainers)) - if totalUniques == '1': - print(f'Trivy will scan {totalUniques} unique container image') + if totalUniques == "1": + print(f"Trivy will scan {totalUniques} unique container image") else: - print(f'Trivy will scan {totalUniques} unique container images') + print(f"Trivy will scan {totalUniques} unique container images") # mem clean up del splitter del strList @@ -105,55 +107,55 @@ def run_trivy(): # loop the list of unique container URIs and write the vulns to a new list for c in uniqueContainers: # passing '--quiet' will ensure the setup text from Trivy scanning does not make it into the JSON and corrupt it - trivyScanCmd = f'trivy --quiet image --format sarif {c}' - trivyScanSubprocess = subprocess.run(trivyScanCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - trivyStdout = str(trivyScanSubprocess.stdout.decode('utf-8')) + trivyScanCmd = f"trivy --quiet image --format sarif {c}" + trivyScanSubprocess = subprocess.run(trivyScanCmd, shell=True, capture_output=True) + trivyStdout = str(trivyScanSubprocess.stdout.decode("utf-8")) # load JSON object from stdout jsonItem = json.loads(trivyStdout) # loop the list of vulns - print(f'Finished scanning Image URI {c}') - for v in jsonItem['runs']: + print(f"Finished scanning Image URI {c}") + for v in jsonItem["runs"]: trivyFindings.append(v) del v del c - print(f'Completed Trivy scans of all unique running Containers in your Cluster') + print(f"Completed Trivy scans of all unique running Containers in your Cluster") return trivyFindings def run_kube_bench(cluster_name): - ''' + """ This function will run Kube-bench EKS CIS benchmark against your cluster and generate a report - ''' + """ - print(f'Running Kube-bench') + print(f"Running Kube-bench") # Create an empty list to hold normalized JSON findings once Kube-bench is converted findings = [] # The SARIF JSON schema requires a URI for the 'artifact' location - which will point to the Cluster Endpoint - eks = boto3.client('eks') - clusterEndpoint = eks.describe_cluster(name=cluster_name)['cluster']['endpoint'] + eks = boto3.client("eks") + clusterEndpoint = eks.describe_cluster(name=cluster_name)["cluster"]["endpoint"] del eks # Schedule the Job onto your EKS Cluster - command = 'kubectl apply -f job-eks.yaml' - runJobSubproc = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - print(runJobSubproc.stdout.decode('utf-8')) + command = "kubectl apply -f job-eks.yaml" + runJobSubproc = subprocess.run(command, shell=True, capture_output=True) + print(runJobSubproc.stdout.decode("utf-8")) time.sleep(1.5) # Wait for Job to complete - use a short timeout to force a message to be piped sooner # https://stackoverflow.com/questions/63632084/kubectl-wait-for-a-pod-to-complete - jobWaitCmd = 'kubectl wait --for=condition=complete job/kube-bench --timeout=2s' + jobWaitCmd = "kubectl wait --for=condition=complete job/kube-bench --timeout=2s" # Really bad Regex hack to exit the `while True` loop - fuzzy match the stdout message - completionRegex = re.compile('job.batch/kube-bench condition met') + completionRegex = re.compile("job.batch/kube-bench condition met") while True: - jobWaitSubproc = subprocess.run(jobWaitCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - jobWaitMessage = str(jobWaitSubproc.stdout.decode('utf-8')) + jobWaitSubproc = subprocess.run(jobWaitCmd, shell=True, capture_output=True) + jobWaitMessage = str(jobWaitSubproc.stdout.decode("utf-8")) completionRegexCheck = completionRegex.search(jobWaitMessage) if completionRegexCheck: - print(f'Kube-bench Job completed! {jobWaitMessage}') + print(f"Kube-bench Job completed! {jobWaitMessage}") break else: time.sleep(2) @@ -161,69 +163,67 @@ def run_kube_bench(cluster_name): # `getPodCmd` used Kubectl to get pod names in all namespaces (-A). cut -d/ -f2 command is to split by the '/' and get the name # grep is used to ensure the right pod name is pulled as it always ends with a random 5 character hex (ex. kube-bench-z6r4b) - getPodCmd = 'kubectl get pods -o name -A | cut -d/ -f2 | grep kube-bench' - getPodSubproc = subprocess.run(getPodCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + getPodCmd = "kubectl get pods -o name -A | cut -d/ -f2 | grep kube-bench" + getPodSubproc = subprocess.run(getPodCmd, shell=True, capture_output=True) # decoding adds newline or blank spaces - attempt to trim them - kubebenchPodName = str(getPodSubproc.stdout.decode('utf-8')).replace('\n', '').replace(' ', '') + kubebenchPodName = ( + str(getPodSubproc.stdout.decode("utf-8")).replace("\n", "").replace(" ", "") + ) # Pull logs from Job - this is the actual results of the job - getLogsCmd = f'kubectl logs {kubebenchPodName}' - getLogsSubproc = subprocess.run(getLogsCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - getLogsStdout = str(getLogsSubproc.stdout.decode('utf-8')) + getLogsCmd = f"kubectl logs {kubebenchPodName}" + getLogsSubproc = subprocess.run(getLogsCmd, shell=True, capture_output=True) + getLogsStdout = str(getLogsSubproc.stdout.decode("utf-8")) # Split the block of text from STDOUT by newline delimiters to create a new list - splitter = getLogsStdout.split('\n') + splitter = getLogsStdout.split("\n") # Use regex to match the Kube-Bench findings, they always start with a '[' which contains info such as '[PASS]'. We then match anything with 2 periods # as Kube-bench outputs 'headers' such as 3 or 3.1 - we want results such as '[PASS] 3.1.3 Ensure that the kubelet configuration file has permissions set to 644 or more restrictive (Manual)' # this is a horrible way to do it....but it works - kubeBenchResultRegex = re.compile('^\[.*\..*\..*') + kubeBenchResultRegex = re.compile(r"^\[.*\..*\..*") for line in splitter: kubeBenchRegexCheck = kubeBenchResultRegex.search(line) if kubeBenchRegexCheck: # Once we find a match, split at the closing bracket and perform small transformations - splitFinding = line.split('] ') + splitFinding = line.split("] ") # Handle the pass/fail/warn labels - if splitFinding[0] == '[PASS': - findingStatus = 'Passed' - elif splitFinding[0] == '[WARN': - findingStatus = 'Warning' + if splitFinding[0] == "[PASS": + findingStatus = "Passed" + elif splitFinding[0] == "[WARN": + findingStatus = "Warning" else: - findingStatus = 'Failed' + findingStatus = "Failed" # Create a new dict of the findings that will match a SARIF JSON 'run' # https://github.com/microsoft/sarif-tutorials/blob/main/docs/2-Basics.md run = { - 'tool':{ - 'driver':{ - 'name':'Kube-bench', - 'semanticVersion': '0.6.6', - 'informationUri': 'https://github.com/aquasecurity/kube-bench', - 'organization': 'Aqua Security', - 'fullDescription': { - 'text': 'kube-bench is a tool that checks whether Kubernetes is deployed securely by running the checks documented in the CIS Kubernetes Benchmark.' - } + "tool": { + "driver": { + "name": "Kube-bench", + "semanticVersion": "0.6.6", + "informationUri": "https://github.com/aquasecurity/kube-bench", + "organization": "Aqua Security", + "fullDescription": { + "text": "kube-bench is a tool that checks whether Kubernetes is deployed securely by running the checks documented in the CIS Kubernetes Benchmark." + }, } }, - 'results':[ + "results": [ { - 'ruleId': splitFinding[1], - 'message':{ - 'text': findingStatus - }, - 'locations':[ + "ruleId": splitFinding[1], + "message": {"text": findingStatus}, + "locations": [ { - 'physicalLocation':{ - 'artifactLocation':{ - 'uri': clusterEndpoint, - 'description': { - 'text': cluster_name - } + "physicalLocation": { + "artifactLocation": { + "uri": clusterEndpoint, + "description": {"text": cluster_name}, } } } - ] + ], } ], - 'columnKind':'utf16CodeUnits' + "columnKind": "utf16CodeUnits", } findings.append(run) else: @@ -232,11 +232,13 @@ def run_kube_bench(cluster_name): del splitter # Delete the job from the EKS Cluster - deleteKubebenchJobCmd = 'kubectl delete -f job-eks.yaml' - deleteKubebenchJobSubproc = subprocess.run(deleteKubebenchJobCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - deleteKubebenchJobStdout = str(deleteKubebenchJobSubproc.stdout.decode('utf-8')) - print(f'{deleteKubebenchJobStdout}') + deleteKubebenchJobCmd = "kubectl delete -f job-eks.yaml" + deleteKubebenchJobSubproc = subprocess.run( + deleteKubebenchJobCmd, shell=True, capture_output=True + ) + deleteKubebenchJobStdout = str(deleteKubebenchJobSubproc.stdout.decode("utf-8")) + print(f"{deleteKubebenchJobStdout}") - print(f'Completed Kube-bench assessment of EKS Cluster {cluster_name}') + print(f"Completed Kube-bench assessment of EKS Cluster {cluster_name}") - return findings \ No newline at end of file + return findings diff --git a/plugins/__init__.py b/plugins/__init__.py index 8e8bdd2..da288a2 100644 --- a/plugins/__init__.py +++ b/plugins/__init__.py @@ -1,19 +1,16 @@ -#This file is part of Lightspin EKS Creation Engine. -#SPDX-License-Identifier: Apache-2.0 - -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -#http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, -#software distributed under the License is distributed on an -#"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -#KIND, either express or implied. See the License for the -#specific language governing permissions and limitations -#under the License. \ No newline at end of file +# This file is part of Lightspin EKS Creation Engine. +# SPDX-License-Identifier: Apache-2.0 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/requirements.txt b/requirements.txt index 2e1b37f..53bd91a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -awscli -boto3 -art -termcolor -clint -urllib3 \ No newline at end of file +art>=5.4,<5.5 +awscli>=1.22.65,<1.23.0 +boto3>=1.21.10,<1.22.0 +clint>=0.5.1,<0.6.0 +termcolor>=1.1.0,<1.2.0 +urllib3>=1.26.8,<1.27.0 From 9b73baca888e026404ce8cf13b21d28ba6c09d15 Mon Sep 17 00:00:00 2001 From: nicholasmhughes Date: Wed, 2 Mar 2022 11:11:10 -0500 Subject: [PATCH 2/3] Revert "put some pre-commit checks in place" This reverts commit 60e3d21067e9cee2c29eb30f50ddbea8794cf17b. --- .pre-commit-config.yaml | 55 -- EksCreationEngine.py | 1640 +++++++++++++++++++++------------------ README.md | 33 +- docs/HOWTO.md | 2 +- main.py | 509 ++++++------ plugins/ECEDatadog.py | 87 +-- plugins/ECEFalco.py | 366 +++++---- plugins/ECESecurity.py | 228 +++--- plugins/__init__.py | 35 +- requirements.txt | 12 +- 10 files changed, 1520 insertions(+), 1447 deletions(-) delete mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index 8d4d994..0000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,55 +0,0 @@ ---- -minimum_pre_commit_version: 2.4.0 -repos: - # ----- Formatting ----------------------------------------------------------------------------> - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.0.1 - hooks: - - id: trailing-whitespace # Trims trailing whitespace. - args: [--markdown-linebreak-ext=md] - - id: mixed-line-ending # Replaces or checks mixed line ending. - args: [--fix=lf] - - id: end-of-file-fixer # Makes sure files end in a newline and only a newline. - - id: check-merge-conflict # Check for files that contain merge conflict strings. - - id: check-ast # Simply check whether files parse as valid python. - - - repo: https://github.com/asottile/pyupgrade - rev: v2.23.3 - hooks: - - id: pyupgrade - name: Rewrite Code to be Py3.7+ - args: [ - --py37-plus - ] - - - repo: https://github.com/asottile/reorder_python_imports - rev: v2.6.0 - hooks: - - id: reorder-python-imports - args: [ - --py37-plus, - ] - - - repo: https://github.com/psf/black - rev: 21.7b0 - hooks: - - id: black - args: [-l 100] - - - repo: https://github.com/asottile/blacken-docs - rev: v1.10.0 - hooks: - - id: blacken-docs - args: [--skip-errors] - files: ^docs/.*\.md$ - additional_dependencies: [black==21.7b0] - # <---- Formatting ----------------------------------------------------------------------------- - - # ----- Security ------------------------------------------------------------------------------> - - repo: https://github.com/PyCQA/bandit - rev: "1.7.0" - hooks: - - id: bandit - name: Run bandit against the code base - args: [--silent, -lll] - # <---- Security ------------------------------------------------------------------------------- diff --git a/EksCreationEngine.py b/EksCreationEngine.py index 6aa6a7c..ea35bf1 100644 --- a/EksCreationEngine.py +++ b/EksCreationEngine.py @@ -1,78 +1,72 @@ -# This file is part of Lightspin EKS Creation Engine. -# SPDX-License-Identifier: Apache-2.0 -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the +#This file is part of Lightspin EKS Creation Engine. +#SPDX-License-Identifier: Apache-2.0 + +#Licensed to the Apache Software Foundation (ASF) under one +#or more contributor license agreements. See the NOTICE file +#distributed with this work for additional information +#regarding copyright ownership. The ASF licenses this file +#to you under the Apache License, Version 2.0 (the #'License'); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an +#with the License. You may obtain a copy of the License at + +#http://www.apache.org/licenses/LICENSE-2.0 + +#Unless required by applicable law or agreed to in writing, +#software distributed under the License is distributed on an #'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. +#KIND, either express or implied. See the License for the +#specific language governing permissions and limitations +#under the License. + import base64 -import json -import re -import subprocess import sys -import time -from datetime import datetime - import boto3 import botocore.exceptions - +import json +from datetime import datetime +import time +import subprocess +import re from plugins.ECEDatadog import DatadogSetup from plugins.ECEFalco import FalcoSetup cache = list() +class ClusterManager(): -class ClusterManager: def get_latest_eks_optimized_ubuntu(kubernetes_version, ami_id, ami_os, ami_architecture): - """ + ''' This function either receives an AMI ID from main.py or receives the default value of 'SSM' which is matched against the arguments `ami_os` and `ami_architecture` to dynamically pull the latest, stable AMI from SSM Public Parameters. - """ - ssm = boto3.client("ssm") + ''' + ssm = boto3.client('ssm') - if ami_id == "SSM": + if ami_id == 'SSM': # Ubuntu 20.04 LTS - if ami_os == "ubuntu": + if ami_os == 'ubuntu': # AMD64 - if ami_architecture == "amd64": + if ami_architecture == 'amd64': # /aws/service/canonical/ubuntu/eks/20.04/1.21/stable/current/amd64/hvm/ebs-gp2/ami-id - publicParameter = str( - f"/aws/service/canonical/{ami_os}/eks/20.04/{kubernetes_version}/stable/current/{ami_architecture}/hvm/ebs-gp2/ami-id" - ) + publicParameter = str(f'/aws/service/canonical/{ami_os}/eks/20.04/{kubernetes_version}/stable/current/{ami_architecture}/hvm/ebs-gp2/ami-id') # ARM64 else: # /aws/service/canonical/ubuntu/eks/20.04/1.21/stable/current/arm64/hvm/ebs-gp2/ami-id - publicParameter = str( - f"/aws/service/canonical/{ami_os}/eks/20.04/{kubernetes_version}/stable/current/{ami_architecture}/hvm/ebs-gp2/ami-id" - ) + publicParameter = str(f'/aws/service/canonical/{ami_os}/eks/20.04/{kubernetes_version}/stable/current/{ami_architecture}/hvm/ebs-gp2/ami-id') # Amazon Linux 2 # Public Params search in the console is fucky, check here: https://docs.aws.amazon.com/eks/latest/userguide/eks-optimized-ami.html else: # AMD64 - if ami_architecture == "amd64": + if ami_architecture == 'amd64': # /aws/service/eks/optimized-ami/1.21/amazon-linux-2/recommended/image_id - publicParameter = str( - f"/aws/service/eks/optimized-ami/{kubernetes_version}/amazon-linux-2/recommended/image_id" - ) + publicParameter = str(f'/aws/service/eks/optimized-ami/{kubernetes_version}/amazon-linux-2/recommended/image_id') # ARM64 else: # /aws/service/eks/optimized-ami/1.21/amazon-linux-2-arm64/recommended/image_id - publicParameter = str( - f"/aws/service/eks/optimized-ami/{kubernetes_version}/amazon-linux-2-arm64/recommended/image_id" - ) + publicParameter = str(f'/aws/service/eks/optimized-ami/{kubernetes_version}/amazon-linux-2-arm64/recommended/image_id') # retrieve the AMI ID and return it try: - amiId = ssm.get_parameter(Name=publicParameter)["Parameter"]["Value"] + amiId = ssm.get_parameter(Name=publicParameter)['Parameter']['Value'] except Exception as e: raise e else: @@ -83,70 +77,88 @@ def get_latest_eks_optimized_ubuntu(kubernetes_version, ami_id, ami_os, ami_arch del ssm del publicParameter - print(f"Your EKS Nodegroup AMI is {amiId}") + print(f'Your EKS Nodegroup AMI is {amiId}') return amiId def create_cluster_svc_role(cluster_role_name): - """ + ''' This function creates a Cluster Service Role for EKS, required for Cluster Creation - """ - iam = boto3.client("iam") - sts = boto3.client("sts") - acctId = sts.get_caller_identity()["Account"] + ''' + iam = boto3.client('iam') + sts = boto3.client('sts') + acctId = sts.get_caller_identity()['Account'] # Use STS GetCallerIdentity and Datetime to generate CreatedBy and CreatedAt information for tagging - createdBy = str(sts.get_caller_identity()["Arn"]) + createdBy = str(sts.get_caller_identity()['Arn']) createdAt = str(datetime.utcnow()) # Trust Policy for EKS trustPolicy = { - "Version": "2012-10-17", - "Statement": [ + 'Version': '2012-10-17', + 'Statement': [ { - "Effect": "Allow", - "Principal": {"Service": "eks.amazonaws.com"}, - "Action": "sts:AssumeRole", + 'Effect': 'Allow', + 'Principal': { + 'Service': 'eks.amazonaws.com' + }, + 'Action': 'sts:AssumeRole' } - ], + ] } try: r = iam.create_role( - Path="/", + Path='/', RoleName=cluster_role_name, AssumeRolePolicyDocument=json.dumps(trustPolicy), - Description="Allows access to other AWS service resources that are required to operate clusters managed by EKS", + Description='Allows access to other AWS service resources that are required to operate clusters managed by EKS', MaxSessionDuration=3600, Tags=[ - {"Key": "Name", "Value": cluster_role_name}, - {"Key": "CreatedBy", "Value": createdBy}, - {"Key": "CreatedAt", "Value": createdAt}, - {"Key": "CreatedWith", "Value": "Lightspin ECE"}, - ], + { + 'Key': 'Name', + 'Value': cluster_role_name + }, + { + 'Key': 'CreatedBy', + 'Value': createdBy + }, + { + 'Key': 'CreatedAt', + 'Value': createdAt + }, + { + 'Key': 'CreatedWith', + 'Value': 'Lightspin ECE' + } + ] ) # Attach required Cluster Policy (AWS Managed) or get following error # botocore.errorfactory.InvalidParameterException: An error occurred (InvalidParameterException) when calling the CreateCluster operation: The provided role doesn't have the Amazon EKS Managed Policies associated with it. Please ensure the following policies [arn:aws:iam::aws:policy/AmazonEKSClusterPolicy] are attached - waiter = iam.get_waiter("role_exists") + waiter = iam.get_waiter('role_exists') - waiter.wait(RoleName=cluster_role_name, WaiterConfig={"Delay": 3, "MaxAttempts": 20}) + waiter.wait( + RoleName=cluster_role_name, + WaiterConfig={ + 'Delay': 3, + 'MaxAttempts': 20 + } + ) iam.attach_role_policy( RoleName=cluster_role_name, - PolicyArn="arn:aws:iam::aws:policy/AmazonEKSClusterPolicy", + PolicyArn='arn:aws:iam::aws:policy/AmazonEKSClusterPolicy' ) - roleArn = str(r["Role"]["Arn"]) + roleArn = str(r['Role']['Arn']) except botocore.exceptions.ClientError as error: # If we have an 'EntityAlreadyExists' error it means a Role of the same name exists, we can try to use it instead - if error.response["Error"]["Code"] == "EntityAlreadyExists": - print( - f"The supplied role name of {cluster_role_name} already exists, attempting to use it" - ) - roleArn = f"arn:aws:iam::{acctId}:role/{cluster_role_name}" + if error.response['Error']['Code'] == 'EntityAlreadyExists': + print(f'The supplied role name of {cluster_role_name} already exists, attempting to use it') + roleArn = f'arn:aws:iam::{acctId}:role/{cluster_role_name}' else: - print(f"Error encountered: {error}") + print(f'Error encountered: {error}') RollbackManager.rollback_from_cache(cache=cache) except botocore.exceptions.WaiterError as we: - print(f"Error encountered: {we}") + print(f'Error encountered: {we}') RollbackManager.rollback_from_cache(cache=cache) del iam @@ -154,62 +166,75 @@ def create_cluster_svc_role(cluster_role_name): del acctId del trustPolicy - print(f"Your cluster role ARN is {roleArn}") + print(f'Your cluster role ARN is {roleArn}') return roleArn def create_managed_nodegroup_s3_policy(bucket_name, nodegroup_role_name): - """ + ''' Creates an IAM Policy that allows S3 GetObject permissions for use in the Nodegroup Role - """ - iam = boto3.client("iam") - sts = boto3.client("sts") - acctId = sts.get_caller_identity()["Account"] + ''' + iam = boto3.client('iam') + sts = boto3.client('sts') + acctId = sts.get_caller_identity()['Account'] # Use STS GetCallerIdentity and Datetime to generate CreatedBy and CreatedAt information for tagging - createdBy = str(sts.get_caller_identity()["Arn"]) + createdBy = str(sts.get_caller_identity()['Arn']) createdAt = str(datetime.utcnow()) - policyName = f"{nodegroup_role_name}Policy" + policyName = f'{nodegroup_role_name}Policy' iamPolicyDoc = { - "Version": "2012-10-17", - "Statement": [ + 'Version': '2012-10-17', + 'Statement': [ { - "Sid": "GetObjectSid", - "Effect": "Allow", - "Action": [ - "s3:GetObjectAcl", - "s3:GetObject", - "s3:GetBucketAcl", - "s3:GetBucketLocation", + 'Sid': 'GetObjectSid', + 'Effect': 'Allow', + 'Action': [ + 's3:GetObjectAcl', + 's3:GetObject', + 's3:GetBucketAcl', + 's3:GetBucketLocation' ], - "Resource": [f"arn:aws:s3:::{bucket_name}/*", f"arn:aws:s3:::{bucket_name}"], + 'Resource': [ + f'arn:aws:s3:::{bucket_name}/*', + f'arn:aws:s3:::{bucket_name}' + ] } - ], + ] } try: r = iam.create_policy( PolicyName=policyName, - Path="/", + Path='/', PolicyDocument=json.dumps(iamPolicyDoc), - Description="Allows access to specific S3 buckets for node groups managed by EKS - Created by Lightspin ECE", + Description='Allows access to specific S3 buckets for node groups managed by EKS - Created by Lightspin ECE', Tags=[ - {"Key": "Name", "Value": policyName}, - {"Key": "CreatedBy", "Value": createdBy}, - {"Key": "CreatedAt", "Value": createdAt}, - {"Key": "CreatedWith", "Value": "Lightspin ECE"}, - ], + { + 'Key': 'Name', + 'Value': policyName + }, + { + 'Key': 'CreatedBy', + 'Value': createdBy + }, + { + 'Key': 'CreatedAt', + 'Value': createdAt + }, + { + 'Key': 'CreatedWith', + 'Value': 'Lightspin ECE' + } + ] ) - policyArn = str(r["Policy"]["Arn"]) + policyArn = str(r['Policy']['Arn']) except botocore.exceptions.ClientError as error: # If we have an 'EntityAlreadyExists' error it means a Role of the same name exists, we can try to use it instead # we will assume it has the right permissions after all - if error.response["Error"]["Code"] == "EntityAlreadyExists": - print( - f"The supplied role policy name of {policyName} already exists, attempting to use it" - ) - policyArn = f"arn:aws:iam::{acctId}:policy/{policyName}" + if error.response['Error']['Code'] == 'EntityAlreadyExists': + print(f'The supplied role policy name of {policyName} already exists, attempting to use it') + policyArn = f'arn:aws:iam::{acctId}:policy/{policyName}' else: - print(f"Error encountered: {error}") + print(f'Error encountered: {error}') RollbackManager.rollback_from_cache(cache=cache) del iam @@ -218,89 +243,110 @@ def create_managed_nodegroup_s3_policy(bucket_name, nodegroup_role_name): del iamPolicyDoc del policyName - print(f"Your node group role policy ARN is {policyArn}") + print(f'Your node group role policy ARN is {policyArn}') return policyArn def create_managed_nodegroup_role(bucket_name, nodegroup_role_name, mde_on_nodes): - """ + ''' This function creates a Nodegroup Service Role for EKS, which gives Nodes permissions to interact with AWS APIs. This function calls the `create_managed_nodegroup_s3_policy` function and passes the S3 Bucket name specified in main.py to allow your Nodegroup Role to communicate with the S3 bucket for bootstrapping purposes - """ - iam = boto3.client("iam") - sts = boto3.client("sts") - acctId = sts.get_caller_identity()["Account"] + ''' + iam = boto3.client('iam') + sts = boto3.client('sts') + acctId = sts.get_caller_identity()['Account'] roleName = nodegroup_role_name # Use STS GetCallerIdentity and Datetime to generate CreatedBy and CreatedAt information for tagging - createdBy = str(sts.get_caller_identity()["Arn"]) + createdBy = str(sts.get_caller_identity()['Arn']) createdAt = str(datetime.utcnow()) # Static list of required AWS Managed Policies for EKS Managed Nodegroup Roles # Adding SSM for SSM access as SSH Keypairs are not specified nodegroupAwsManagedPolicies = [ - "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy", - "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly", - "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy", - "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore", + 'arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy', + 'arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly', + 'arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy', + 'arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore' ] # Grab S3 Node Group policy from other Function & add to List if MDE is enabled - if mde_on_nodes == "True": - s3PolicyArn = ClusterManager.create_managed_nodegroup_s3_policy( - bucket_name, nodegroup_role_name - ) + if mde_on_nodes == 'True': + s3PolicyArn = ClusterManager.create_managed_nodegroup_s3_policy(bucket_name, nodegroup_role_name) nodegroupAwsManagedPolicies.append(s3PolicyArn) # Trust Policy for EKS NodeGroup Role trusts EC2 trustPolicy = { - "Version": "2012-10-17", - "Statement": [ + 'Version': '2012-10-17', + 'Statement': [ { - "Effect": "Allow", - "Principal": {"Service": "ec2.amazonaws.com"}, - "Action": "sts:AssumeRole", + 'Effect': 'Allow', + 'Principal': { + 'Service': 'ec2.amazonaws.com' + }, + 'Action': 'sts:AssumeRole' } - ], + ] } try: r = iam.create_role( - Path="/", + Path='/', RoleName=roleName, AssumeRolePolicyDocument=json.dumps(trustPolicy), - Description="Allows access to other AWS service resources that are required to operate node groups managed by EKS", + Description='Allows access to other AWS service resources that are required to operate node groups managed by EKS', MaxSessionDuration=3600, Tags=[ - {"Key": "Name", "Value": roleName}, - {"Key": "CreatedBy", "Value": createdBy}, - {"Key": "CreatedAt", "Value": createdAt}, - {"Key": "CreatedWith", "Value": "Lightspin ECE"}, - ], + { + 'Key': 'Name', + 'Value': roleName + }, + { + 'Key': 'CreatedBy', + 'Value': createdBy + }, + { + 'Key': 'CreatedAt', + 'Value': createdAt + }, + { + 'Key': 'CreatedWith', + 'Value': 'Lightspin ECE' + } + ] ) - roleArn = str(r["Role"]["Arn"]) + roleArn = str(r['Role']['Arn']) - waiter = iam.get_waiter("role_exists") - waiter.wait(RoleName=roleName, WaiterConfig={"Delay": 3, "MaxAttempts": 20}) + waiter = iam.get_waiter('role_exists') + waiter.wait( + RoleName=roleName, + WaiterConfig={ + 'Delay': 3, + 'MaxAttempts': 20 + } + ) except botocore.exceptions.ClientError as error: # If we have an 'EntityAlreadyExists' error it means a Role of the same name exists, we can try to use it instead # we will assume it has the right permissions after all - if error.response["Error"]["Code"] == "EntityAlreadyExists": - print(f"The supplied role name of {roleName} already exists, attempting to use it") - roleArn = f"arn:aws:iam::{acctId}:role/{roleName}" + if error.response['Error']['Code'] == 'EntityAlreadyExists': + print(f'The supplied role name of {roleName} already exists, attempting to use it') + roleArn = f'arn:aws:iam::{acctId}:role/{roleName}' else: - print(f"Error encountered: {error}") + print(f'Error encountered: {error}') RollbackManager.rollback_from_cache(cache=cache) except botocore.exceptions.WaiterError as we: - print(f"Error encountered: {we}") + print(f'Error encountered: {we}') RollbackManager.rollback_from_cache(cache=cache) # Loop through List of policies and attach Policies to Role, handle errors if already attached try: for policy in nodegroupAwsManagedPolicies: - iam.attach_role_policy(RoleName=roleName, PolicyArn=policy) + iam.attach_role_policy( + RoleName=roleName, + PolicyArn=policy + ) except Exception as e: - print(f"Error encountered: {e}") + print(f'Error encountered: {e}') RollbackManager.rollback_from_cache(cache=cache) del iam @@ -309,28 +355,28 @@ def create_managed_nodegroup_role(bucket_name, nodegroup_role_name, mde_on_nodes del trustPolicy del roleName - print(f"Your node group role ARN is {roleArn}") + print(f'Your node group role ARN is {roleArn}') return roleArn def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): - """ + ''' This function creates a minimum necessary Security Group for your EKS Cluster based on AWS reccomendations https://docs.aws.amazon.com/eks/latest/userguide/sec-group-reqs.html this will also add permissions to ports TCP 2801 and TCP 8765 for FalcoSidekick and Falco Security, respectively, for At-Create or later configuration of Falco in a Cluster which provides real-time protection and event forwarding - """ + ''' - ec2 = boto3.client("ec2") - sts = boto3.client("sts") + ec2 = boto3.client('ec2') + sts = boto3.client('sts') - print(f"Setting up a Security Group for VPC {vpc_id} for EKS Cluster {cluster_name}") + print(f'Setting up a Security Group for VPC {vpc_id} for EKS Cluster {cluster_name}') # Use STS GetCallerIdentity and Datetime to generate CreatedBy and CreatedAt information for tagging - createdBy = str(sts.get_caller_identity()["Arn"]) + createdBy = str(sts.get_caller_identity()['Arn']) createdAt = str(datetime.utcnow()) # Generate SG Name, passed to the create_security_group() method, and used for general messaging - sgName = str(f"{cluster_name}ClusterSG") + sgName = str(f'{cluster_name}ClusterSG') # Load constants of ports needed reccomended by AWS and needed by Falco/Falco Sidekick defaultPortSet = [53, 443, 2801, 8765, 10250] @@ -339,7 +385,7 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): for p in additional_ports: if int(p) not in defaultPortSet: defaultPortSet.append(int(p)) - + # remove the list, it's not needed anymore del additional_ports @@ -348,52 +394,67 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): # Get CIDR information on the VPC try: - r = ec2.describe_vpcs(VpcIds=[vpc_id])["Vpcs"][0] - vpcMainCidr = str(r["CidrBlock"]) + r = ec2.describe_vpcs(VpcIds=[vpc_id])['Vpcs'][0] + vpcMainCidr = str(r['CidrBlock']) allVpcCidrs.append(vpcMainCidr) # Loop additional CIDRs if they exist and are associated - for cidr in r["CidrBlockAssociationSet"]: - if str(cidr["CidrBlockState"]["State"]) == "associated": - if str(cidr["CidrBlock"]) not in allVpcCidrs: - allVpcCidrs.append(str(cidr["CidrBlock"])) + for cidr in r['CidrBlockAssociationSet']: + if str(cidr['CidrBlockState']['State']) == 'associated': + if str(cidr['CidrBlock']) not in allVpcCidrs: + allVpcCidrs.append(str(cidr['CidrBlock'])) except KeyError as ke: - print(f"Error encountered: {ke}") + print(f'Error encountered: {ke}') RollbackManager.rollback_from_cache(cache=cache) except botocore.exceptions.ClientError as error: - print(f"Error encountered: {error}") + print(f'Error encountered: {error}') RollbackManager.rollback_from_cache(cache=cache) # All CIDRs collected and ports consolidated, Security Group creation starts now try: r = ec2.create_security_group( - Description=f"Security Group for EKS Cluster {cluster_name} - Created by {createdBy} using Lightspin ECE", + Description=f'Security Group for EKS Cluster {cluster_name} - Created by {createdBy} using Lightspin ECE', GroupName=sgName, VpcId=vpc_id, TagSpecifications=[ { - "ResourceType": "security-group", - "Tags": [ - {"Key": "Name", "Value": sgName}, - {"Key": "CreatedBy", "Value": createdBy}, - {"Key": "CreatedAt", "Value": createdAt}, - {"Key": "CreatedWith", "Value": "Lightspin ECE"}, + 'ResourceType': 'security-group', + 'Tags': [ + { + 'Key': 'Name', + 'Value': sgName + }, + { + 'Key': 'CreatedBy', + 'Value': createdBy + }, + { + 'Key': 'CreatedAt', + 'Value': createdAt + }, + { + 'Key': 'CreatedWith', + 'Value': 'Lightspin ECE' + }, # This tag is required per AWS Docs # One, and only one, of the security groups associated to your nodes should have the following tag applied: For more information about tagging, see Working with tags using the console. kubernetes.io/cluster/cluster-name: owned - {"Key": f"kubernetes.io/cluster/{cluster_name}", "Value": "owned"}, - ], + { + 'Key': f'kubernetes.io/cluster/{cluster_name}', + 'Value': 'owned' + } + ] } - ], + ] ) - secGroupId = str(r["GroupId"]) + secGroupId = str(r['GroupId']) - sgCache = {"ClusterSecurityGroupId": secGroupId} + sgCache = { + 'ClusterSecurityGroupId': secGroupId + } cache.append(sgCache) - print(f"Added {sgName} ID {secGroupId} to Cache") - print( - f"Authorizing ingress for Ports {defaultPortSet} for CIDRS {allVpcCidrs} for {sgName}" - ) + print(f'Added {sgName} ID {secGroupId} to Cache') + print(f'Authorizing ingress for Ports {defaultPortSet} for CIDRS {allVpcCidrs} for {sgName}') # Now start adding Inbound Rules per CIDR and per Port # Add conditional logic for port 53 (DNS) to create both TCP and UDP Rules @@ -404,67 +465,91 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): GroupId=secGroupId, IpPermissions=[ { - "FromPort": int(port), - "ToPort": int(port), - "IpProtocol": "tcp", - "IpRanges": [ + 'FromPort': int(port), + 'ToPort': int(port), + 'IpProtocol': 'tcp', + 'IpRanges': [ { - "CidrIp": cidr, - "Description": f"Allow tcp {port} to {cidr}", + 'CidrIp': cidr, + 'Description': f'Allow tcp {port} to {cidr}' } - ], + ] }, { - "FromPort": int(port), - "ToPort": int(port), - "IpProtocol": "udp", - "IpRanges": [ + 'FromPort': int(port), + 'ToPort': int(port), + 'IpProtocol': 'udp', + 'IpRanges': [ { - "CidrIp": cidr, - "Description": f"Allow udp {port} to {cidr}", + 'CidrIp': cidr, + 'Description': f'Allow udp {port} to {cidr}' } - ], - }, + ] + } ], TagSpecifications=[ { - "ResourceType": "security-group-rule", - "Tags": [ - {"Key": "Name", "Value": f"{sgName}{cidr}{port}"}, - {"Key": "CreatedBy", "Value": createdBy}, - {"Key": "CreatedAt", "Value": createdAt}, - {"Key": "CreatedWith", "Value": "Lightspin ECE"}, - ], + 'ResourceType': 'security-group-rule', + 'Tags': [ + { + 'Key': 'Name', + 'Value': f'{sgName}{cidr}{port}' + }, + { + 'Key': 'CreatedBy', + 'Value': createdBy + }, + { + 'Key': 'CreatedAt', + 'Value': createdAt + }, + { + 'Key': 'CreatedWith', + 'Value': 'Lightspin ECE' + } + ] } - ], + ] ) else: ec2.authorize_security_group_ingress( GroupId=secGroupId, IpPermissions=[ { - "FromPort": int(port), - "ToPort": int(port), - "IpProtocol": "tcp", - "IpRanges": [ + 'FromPort': int(port), + 'ToPort': int(port), + 'IpProtocol': 'tcp', + 'IpRanges': [ { - "CidrIp": cidr, - "Description": f"Allow tcp {port} to {cidr}", + 'CidrIp': cidr, + 'Description': f'Allow tcp {port} to {cidr}' } - ], + ] } ], TagSpecifications=[ { - "ResourceType": "security-group-rule", - "Tags": [ - {"Key": "Name", "Value": f"{sgName}{cidr}{port}"}, - {"Key": "CreatedBy", "Value": createdBy}, - {"Key": "CreatedAt", "Value": createdAt}, - {"Key": "CreatedWith", "Value": "Lightspin ECE"}, - ], + 'ResourceType': 'security-group-rule', + 'Tags': [ + { + 'Key': 'Name', + 'Value': f'{sgName}{cidr}{port}' + }, + { + 'Key': 'CreatedBy', + 'Value': createdBy + }, + { + 'Key': 'CreatedAt', + 'Value': createdAt + }, + { + 'Key': 'CreatedWith', + 'Value': 'Lightspin ECE' + } + ] } - ], + ] ) # Adding inbound rules per Port for the Security Group itself (talk to self for Node-Cluster Comms) @@ -474,67 +559,91 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): GroupId=secGroupId, IpPermissions=[ { - "FromPort": int(port), - "ToPort": int(port), - "IpProtocol": "tcp", - "UserIdGroupPairs": [ + 'FromPort': int(port), + 'ToPort': int(port), + 'IpProtocol': 'tcp', + 'UserIdGroupPairs': [ { - "Description": f"Allow tcp {port} to {secGroupId}", - "GroupId": secGroupId, + 'Description': f'Allow tcp {port} to {secGroupId}', + 'GroupId': secGroupId } - ], + ] }, { - "FromPort": int(port), - "ToPort": int(port), - "IpProtocol": "udp", - "UserIdGroupPairs": [ + 'FromPort': int(port), + 'ToPort': int(port), + 'IpProtocol': 'udp', + 'UserIdGroupPairs': [ { - "Description": f"Allow udp {port} to {secGroupId}", - "GroupId": secGroupId, + 'Description': f'Allow udp {port} to {secGroupId}', + 'GroupId': secGroupId } - ], - }, + ] + } ], TagSpecifications=[ { - "ResourceType": "security-group-rule", - "Tags": [ - {"Key": "Name", "Value": f"{sgName}{secGroupId}{port}"}, - {"Key": "CreatedBy", "Value": createdBy}, - {"Key": "CreatedAt", "Value": createdAt}, - {"Key": "CreatedWith", "Value": "Lightspin ECE"}, - ], + 'ResourceType': 'security-group-rule', + 'Tags': [ + { + 'Key': 'Name', + 'Value': f'{sgName}{secGroupId}{port}' + }, + { + 'Key': 'CreatedBy', + 'Value': createdBy + }, + { + 'Key': 'CreatedAt', + 'Value': createdAt + }, + { + 'Key': 'CreatedWith', + 'Value': 'Lightspin ECE' + } + ] } - ], + ] ) else: ec2.authorize_security_group_ingress( GroupId=secGroupId, IpPermissions=[ { - "FromPort": int(port), - "ToPort": int(port), - "IpProtocol": "tcp", - "UserIdGroupPairs": [ + 'FromPort': int(port), + 'ToPort': int(port), + 'IpProtocol': 'tcp', + 'UserIdGroupPairs': [ { - "Description": f"Allow tcp {port} to {secGroupId}", - "GroupId": secGroupId, + 'Description': f'Allow tcp {port} to {secGroupId}', + 'GroupId': secGroupId } - ], + ] } ], TagSpecifications=[ { - "ResourceType": "security-group-rule", - "Tags": [ - {"Key": "Name", "Value": f"{sgName}{secGroupId}{port}"}, - {"Key": "CreatedBy", "Value": createdBy}, - {"Key": "CreatedAt", "Value": createdAt}, - {"Key": "CreatedWith", "Value": "Lightspin ECE"}, - ], + 'ResourceType': 'security-group-rule', + 'Tags': [ + { + 'Key': 'Name', + 'Value': f'{sgName}{secGroupId}{port}' + }, + { + 'Key': 'CreatedBy', + 'Value': createdBy + }, + { + 'Key': 'CreatedAt', + 'Value': createdAt + }, + { + 'Key': 'CreatedWith', + 'Value': 'Lightspin ECE' + } + ] } - ], + ] ) # Adding TCP 443 (HTTPS) from the internet which is required for patching and agent communications @@ -542,97 +651,120 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): GroupId=secGroupId, IpPermissions=[ { - "FromPort": 443, - "ToPort": 443, - "IpProtocol": "tcp", - "IpRanges": [ - {"CidrIp": "0.0.0.0/0", "Description": f"Allow tcp 443 to Internet"} - ], + 'FromPort': 443, + 'ToPort': 443, + 'IpProtocol': 'tcp', + 'IpRanges': [ + { + 'CidrIp': '0.0.0.0/0', + 'Description': f'Allow tcp 443 to Internet' + } + ] } ], TagSpecifications=[ { - "ResourceType": "security-group-rule", - "Tags": [ - {"Key": "Name", "Value": f"{sgName}Internet{port}"}, - {"Key": "CreatedBy", "Value": createdBy}, - {"Key": "CreatedAt", "Value": createdAt}, - {"Key": "CreatedWith", "Value": "Lightspin ECE"}, - ], + 'ResourceType': 'security-group-rule', + 'Tags': [ + { + 'Key': 'Name', + 'Value': f'{sgName}Internet{port}' + }, + { + 'Key': 'CreatedBy', + 'Value': createdBy + }, + { + 'Key': 'CreatedAt', + 'Value': createdAt + }, + { + 'Key': 'CreatedWith', + 'Value': 'Lightspin ECE' + } + ] } - ], + ] ) except botocore.exceptions.ClientError as error: - print(f"Error encountered: {error}") + print(f'Error encountered: {error}') RollbackManager.rollback_from_cache(cache=cache) - print(f"Finished creating {sgName} and adding all required Rule Authorizations") + print(f'Finished creating {sgName} and adding all required Rule Authorizations') return secGroupId def encryption_key_factory(cluster_name): - """ + ''' This function is responsible for creating a KMS Key to use with EKS Secrets Envelope Encryption as well as Nodegroup (EC2) EBS Encryption we will attach a proper Key Policy later - """ - kms = boto3.client("kms") - sts = boto3.client("sts") + ''' + kms = boto3.client('kms') + sts = boto3.client('sts') # Use STS GetCallerIdentity and Datetime to generate CreatedBy and CreatedAt information for tagging # STS is also used for the Account ID to interpolate ARNs which will be created later - createdBy = str(sts.get_caller_identity()["Arn"]) + createdBy = str(sts.get_caller_identity()['Arn']) createdAt = str(datetime.utcnow()) - print(f"Creating KMS CMK for encryption operations") + print(f'Creating KMS CMK for encryption operations') # The first time we create the Key we must not attach a policy as the Roles we need to give permission to do not exist yet (nodegroup & cluster IAM role) # it will attach a default policy that allows our entire AWS Account access - this is good so we can override it later try: kmsKeyArn = kms.create_key( - Description=f"Used for EKS Envelope Encryption and EBS Volume Encryption for EKS Cluster {cluster_name} - Created by Lightspin ECE", + Description=f'Used for EKS Envelope Encryption and EBS Volume Encryption for EKS Cluster {cluster_name} - Created by Lightspin ECE', # Default values for AES-256/GCM Keys. Being verbose in case AWS ever changes the default values of these - KeyUsage="ENCRYPT_DECRYPT", - KeySpec="SYMMETRIC_DEFAULT", - Origin="AWS_KMS", + KeyUsage='ENCRYPT_DECRYPT', + KeySpec='SYMMETRIC_DEFAULT', + Origin='AWS_KMS', Tags=[ - {"TagKey": "Name", "TagValue": f"{cluster_name}-EKS-CMK"}, - {"TagKey": "CreatedBy", "TagValue": createdBy}, - {"TagKey": "CreatedAt", "TagValue": createdAt}, - {"TagKey": "CreatedWith", "TagValue": "Lightspin ECE"}, - ], - )["KeyMetadata"]["Arn"] + { + 'TagKey': 'Name', + 'TagValue': f'{cluster_name}-EKS-CMK' + }, + { + 'TagKey': 'CreatedBy', + 'TagValue': createdBy + }, + { + 'TagKey': 'CreatedAt', + 'TagValue': createdAt + }, + { + 'TagKey': 'CreatedWith', + 'TagValue': 'Lightspin ECE' + } + ] + )['KeyMetadata']['Arn'] except KeyError as ke: - print(f"Error encountered: {ke}") + print(f'Error encountered: {ke}') RollbackManager.rollback_from_cache(cache=cache) except botocore.exceptions.ParamValidationError as pe: - print(f"Error encountered: {pe}") + print(f'Error encountered: {pe}') RollbackManager.rollback_from_cache(cache=cache) except botocore.exceptions.ClientError as error: - print(f"Error encountered: {error}") + print(f'Error encountered: {error}') RollbackManager.rollback_from_cache(cache=cache) return kmsKeyArn - - def create_cluster( - cluster_name, kubernetes_version, cluster_role_name, subnet_ids, vpc_id, additional_ports - ): - """ + + def create_cluster(cluster_name, kubernetes_version, cluster_role_name, subnet_ids, vpc_id, additional_ports): + ''' This function uses the EKS Boto3 Client to create a cluster, taking inputs from main.py to determing naming & Encryption - """ - eks = boto3.client("eks") - sts = boto3.client("sts") + ''' + eks = boto3.client('eks') + sts = boto3.client('sts') # Use STS GetCallerIdentity and Datetime to generate CreatedBy and CreatedAt information for tagging - createdBy = str(sts.get_caller_identity()["Arn"]) + createdBy = str(sts.get_caller_identity()['Arn']) createdAt = str(datetime.utcnow()) # Call `create_cluster_svc_role` to create or re-use the EKS cluster service IAM role clusterRoleArn = ClusterManager.create_cluster_svc_role(cluster_role_name) # Call `cluster_security_group_factory` to create or re-use an EKS cluster security group that allows minimum necessary comms intra-VPC - securityGroupId = ClusterManager.cluster_security_group_factory( - cluster_name, vpc_id, additional_ports - ) + securityGroupId = ClusterManager.cluster_security_group_factory(cluster_name, vpc_id, additional_ports) # Call `encryption_key_factory` to create a KMS Key ARN. Simple! (We'll add the Key Policy later) kmsKeyArn = ClusterManager.encryption_key_factory(cluster_name) @@ -644,51 +776,60 @@ def create_cluster( version=str(kubernetes_version), roleArn=clusterRoleArn, resourcesVpcConfig={ - "subnetIds": subnet_ids, - "securityGroupIds": [securityGroupId], - "endpointPublicAccess": False, - "endpointPrivateAccess": True, + 'subnetIds': subnet_ids, + 'securityGroupIds': [securityGroupId], + 'endpointPublicAccess': False, + 'endpointPrivateAccess': True }, logging={ - "clusterLogging": [ - { + 'clusterLogging': [ + { # all Logging types are enabled here - "types": [ - "api", - "audit", - "authenticator", - "controllerManager", - "scheduler", - ], - "enabled": True, + 'types': ['api','audit','authenticator','controllerManager','scheduler'], + 'enabled': True } ] }, - encryptionConfig=[{"resources": ["secrets"], "provider": {"keyArn": kmsKeyArn}}], + encryptionConfig=[ + { + 'resources': [ + 'secrets' + ], + 'provider': { + 'keyArn': kmsKeyArn + } + } + ], tags={ - "Name": cluster_name, - "CreatedBy": createdBy, - "CreatedAt": createdAt, - "CreatedWith": "Lightspin ECE", - }, + 'Name': cluster_name, + 'CreatedBy': createdBy, + 'CreatedAt': createdAt, + 'CreatedWith': 'Lightspin ECE' + } ) # Establish provided EKS Waiter() for cluster to come up # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/eks.html#EKS.Waiter.ClusterActive - print(f"Waiting for your Cluster to come online") + print(f'Waiting for your Cluster to come online') - waiter = eks.get_waiter("cluster_active") + waiter = eks.get_waiter('cluster_active') - waiter.wait(name=cluster_name, WaiterConfig={"Delay": 30, "MaxAttempts": 40}) + waiter.wait( + name=cluster_name, + WaiterConfig={ + 'Delay': 30, + 'MaxAttempts': 40 + } + ) - finalClusterName = str(r["cluster"]["name"]) + finalClusterName = str(r['cluster']['name']) - print(f"EKS Cluster {finalClusterName} is now live") + print(f'EKS Cluster {finalClusterName} is now live') except botocore.exceptions.ClientError as error: - print(f"Error encountered: {error}") + print(f'Error encountered: {error}') RollbackManager.rollback_from_cache(cache=cache) except botocore.exceptions.WaiterError as we: - print(f"Error encountered: {we}") + print(f'Error encountered: {we}') RollbackManager.rollback_from_cache(cache=cache) del eks @@ -701,7 +842,7 @@ def create_cluster( return finalClusterName, securityGroupId, kmsKeyArn, clusterRoleArn def generate_nodegroup_bootstrap(bucket_name, cluster_name, mde_on_nodes, ami_os): - """ + ''' This function generates EC2 UserData (in Base64) to be passed to the `create_launch_template` Function for creating a custom launch template that uses custom AMIs passed in main.py or defaults to the EKS-optimized AMI for Ubuntu 20.04LTS corresponding to the K8s verson used. This function parses the S3 Bucket from main.py which stores the MDE activation scripts, if that is configured. @@ -711,26 +852,24 @@ def generate_nodegroup_bootstrap(bucket_name, cluster_name, mde_on_nodes, ami_os Details: https://aws.amazon.com/blogs/containers/introducing-launch-template-and-custom-ami-support-in-amazon-eks-managed-node-groups/ WTF is `set -ex`? https://askubuntu.com/questions/346900/what-does-set-e-do - """ - eks = boto3.client("eks") + ''' + eks = boto3.client('eks') - print( - f"Retrieving Certificate Authority and API Server URL information for bootstrap script" - ) + print(f'Retrieving Certificate Authority and API Server URL information for bootstrap script') # DescribeCluster and pull necessary values to set as env vars within the bootstrap c = eks.describe_cluster(name=cluster_name) - eksApiServerUrl = str(c["cluster"]["endpoint"]) - eksB64ClusterCa = str(c["cluster"]["certificateAuthority"]["data"]) + eksApiServerUrl = str(c['cluster']['endpoint']) + eksB64ClusterCa = str(c['cluster']['certificateAuthority']['data']) # Support for IMDSv2 Tokens for reaching metadata service # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html#instance-metadata-ex-7 # MDE Installation Scripts: https://docs.microsoft.com/en-us/microsoft-365/security/defender-endpoint/linux-install-manually?view=o365-worldwide - if mde_on_nodes == "True": + if mde_on_nodes == 'True': # Ubuntu - if ami_os == "ubuntu": - script = f"""#!/bin/bash + if ami_os == 'ubuntu': + script = f'''#!/bin/bash set -ex B64_CLUSTER_CA={eksB64ClusterCa} API_SERVER_URL={eksApiServerUrl} @@ -752,10 +891,10 @@ def generate_nodegroup_bootstrap(bucket_name, cluster_name, mde_on_nodes, ami_os TOKEN=$(curl -X PUT 'http://169.254.169.254/latest/api/token' -H 'X-aws-ec2-metadata-token-ttl-seconds: 21600') INSTANCE_ID=$(curl -H 'X-aws-ec2-metadata-token: $TOKEN' -v http://169.254.169.254/latest/meta-data/instance-id) mdatp edr tag set --name GROUP --value $INSTANCE_ID - """ + ''' # Amazon Linux 2 else: - script = f"""#!/bin/bash + script = f'''#!/bin/bash set -ex B64_CLUSTER_CA={eksB64ClusterCa} API_SERVER_URL={eksApiServerUrl} @@ -773,31 +912,31 @@ def generate_nodegroup_bootstrap(bucket_name, cluster_name, mde_on_nodes, ami_os TOKEN=$(curl -X PUT 'http://169.254.169.254/latest/api/token' -H 'X-aws-ec2-metadata-token-ttl-seconds: 21600') INSTANCE_ID=$(curl -H 'X-aws-ec2-metadata-token: $TOKEN' -v http://169.254.169.254/latest/meta-data/instance-id) mdatp edr tag set --name GROUP --value $INSTANCE_ID - """ + ''' else: # No need for MDE in this one, create a regular script # Ubuntu - if ami_os == "ubuntu": - script = f"""#!/bin/bash + if ami_os == 'ubuntu': + script = f'''#!/bin/bash set -ex B64_CLUSTER_CA={eksB64ClusterCa} API_SERVER_URL={eksApiServerUrl} /etc/eks/bootstrap.sh {cluster_name} --b64-cluster-ca $B64_CLUSTER_CA --apiserver-endpoint $API_SERVER_URL apt update apt upgrade -y - """ + ''' # Amazon Linux 2 else: - script = f"""#!/bin/bash + script = f'''#!/bin/bash set -ex B64_CLUSTER_CA={eksB64ClusterCa} API_SERVER_URL={eksApiServerUrl} /etc/eks/bootstrap.sh {cluster_name} --b64-cluster-ca $B64_CLUSTER_CA --apiserver-endpoint $API_SERVER_URL yum update -y - """ + ''' # Base64 encode the bootstrap script - userData = base64.b64encode(script.encode()).decode("ascii") + userData = base64.b64encode(script.encode()).decode('ascii') del eks del c @@ -806,167 +945,143 @@ def generate_nodegroup_bootstrap(bucket_name, cluster_name, mde_on_nodes, ami_os return userData - def create_launch_template( - cluster_name, - kubernetes_version, - ami_id, - bucket_name, - launch_template_name, - kms_key_arn, - securityGroupId, - ebs_volume_size, - instance_type, - mde_on_nodes, - ami_os, - ami_architecture, - ): - """ + def create_launch_template(cluster_name, kubernetes_version, ami_id, bucket_name, launch_template_name, kms_key_arn, securityGroupId, ebs_volume_size, instance_type, mde_on_nodes, ami_os, ami_architecture): + ''' This function creates an EC2 Launch Template using encryption and AMI data supplied from main.py and passes it to the `builder` function where final EKS Nodegroup creation takes place - """ + ''' # This is for creating the Launch Template used by EKS to launch Managed Node Groups with a custom AMI & bootstrap script - ec2 = boto3.client("ec2") - sts = boto3.client("sts") + ec2 = boto3.client('ec2') + sts = boto3.client('sts') # Use STS GetCallerIdentity and Datetime to generate CreatedBy and CreatedAt information for tagging - createdBy = str(sts.get_caller_identity()["Arn"]) + createdBy = str(sts.get_caller_identity()['Arn']) createdAt = str(datetime.utcnow()) # Pull latest AMI ID for EKS-optimized Ubuntu 20.04LTS for specified K8s Version in main.py - amiId = ClusterManager.get_latest_eks_optimized_ubuntu( - kubernetes_version, ami_id, ami_os, ami_architecture - ) + amiId = ClusterManager.get_latest_eks_optimized_ubuntu(kubernetes_version, ami_id, ami_os, ami_architecture) # Retrieve Base64 metadata from bootstrap generation function - this will download and install MDE (MDATP) from files in the S3 bucket specified in main.py if --mde_on_nodes is true. Will use ami_os arguements to create different UserData as well - userData = ClusterManager.generate_nodegroup_bootstrap( - bucket_name, cluster_name, mde_on_nodes, ami_os - ) + userData = ClusterManager.generate_nodegroup_bootstrap(bucket_name, cluster_name, mde_on_nodes, ami_os) # For IMDSv2 - keeping this outside for eventual modification of hop limits? metadataOptions = { - "HttpTokens": "required", - "HttpPutResponseHopLimit": 2, - "HttpEndpoint": "enabled", + 'HttpTokens': 'required', + 'HttpPutResponseHopLimit': 2, + 'HttpEndpoint': 'enabled' } try: r = ec2.create_launch_template( DryRun=False, LaunchTemplateName=launch_template_name, - VersionDescription=f"Created by the EKS Creation Engine on {createdAt}", + VersionDescription=f'Created by the EKS Creation Engine on {createdAt}', LaunchTemplateData={ - "EbsOptimized": False, - "BlockDeviceMappings": [ + 'EbsOptimized': False, + 'BlockDeviceMappings': [ { - "DeviceName": "/dev/sda1", - "Ebs": { - "Encrypted": True, - "DeleteOnTermination": True, - "KmsKeyId": kms_key_arn, - "VolumeSize": int(ebs_volume_size), - "VolumeType": "gp2", - }, + 'DeviceName': '/dev/sda1', + 'Ebs': { + 'Encrypted': True, + 'DeleteOnTermination': True, + 'KmsKeyId': kms_key_arn, + 'VolumeSize': int(ebs_volume_size), + 'VolumeType': 'gp2' + } } ], - "ImageId": amiId, - "InstanceType": instance_type, - "UserData": str(userData), - "SecurityGroupIds": [securityGroupId], - "MetadataOptions": metadataOptions, - "TagSpecifications": [ + 'ImageId': amiId, + 'InstanceType': instance_type, + 'UserData': str(userData), + 'SecurityGroupIds': [securityGroupId], + 'MetadataOptions': metadataOptions, + 'TagSpecifications': [ { - "ResourceType": "instance", - "Tags": [ - {"Key": "Name", "Value": str(f"{launch_template_name}Node")}, - {"Key": "CreatedBy", "Value": createdBy}, - {"Key": "CreatedAt", "Value": createdAt}, - {"Key": "CreatedWith", "Value": "Lightspin ECE"}, - ], + 'ResourceType': 'instance', + 'Tags': [ + { + 'Key': 'Name', + 'Value': str(f'{launch_template_name}Node') + }, + { + 'Key': 'CreatedBy', + 'Value': createdBy + }, + { + 'Key': 'CreatedAt', + 'Value': createdAt + }, + { + 'Key': 'CreatedWith', + 'Value': 'Lightspin ECE' + } + ] }, { - "ResourceType": "volume", - "Tags": [ - {"Key": "Name", "Value": str(f"{launch_template_name}Node")}, - {"Key": "CreatedBy", "Value": createdBy}, - {"Key": "CreatedAt", "Value": createdAt}, - {"Key": "CreatedWith", "Value": "Lightspin ECE"}, - ], - }, - ], - }, + 'ResourceType': 'volume', + 'Tags': [ + { + 'Key': 'Name', + 'Value': str(f'{launch_template_name}Node') + }, + { + 'Key': 'CreatedBy', + 'Value': createdBy + }, + { + 'Key': 'CreatedAt', + 'Value': createdAt + }, + { + 'Key': 'CreatedWith', + 'Value': 'Lightspin ECE' + } + ] + } + ] + } ) - launchTemplateId = str(r["LaunchTemplate"]["LaunchTemplateId"]) + launchTemplateId = str(r['LaunchTemplate']['LaunchTemplateId']) except botocore.exceptions.ClientError as error: - print(f"Error encountered: {error}") + print(f'Error encountered: {error}') RollbackManager.rollback_from_cache(cache=cache) except Exception as e: - print(f"Error encountered: {e}") + print(f'Error encountered: {e}') RollbackManager.rollback_from_cache(cache=cache) return launchTemplateId - - def builder( - kubernetes_version, - bucket_name, - ebs_volume_size, - ami_id, - instance_type, - cluster_name, - cluster_role_name, - nodegroup_name, - nodegroup_role_name, - launch_template_name, - vpc_id, - subnet_ids, - node_count, - mde_on_nodes, - additional_ports, - falco_bool, - falco_sidekick_destination_type, - falco_sidekick_destination, - ami_os, - ami_architecture, - datadog_api_key, - datadog_bool, - addtl_auth_principals, - ): - """ + + def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_type, cluster_name, cluster_role_name, nodegroup_name, nodegroup_role_name, launch_template_name, vpc_id, subnet_ids, node_count, mde_on_nodes, additional_ports, falco_bool, falco_sidekick_destination_type, falco_sidekick_destination, ami_os, ami_architecture, datadog_api_key, datadog_bool, addtl_auth_principals): + ''' This function is the 'brain' that controls creation and calls the required functions to build infrastructure and services (EKS, EC2, IAM). This function also stores all required arguments into cache to facilitate rollbacks upon errors - """ + ''' # Write argument variables that are directly used for infrastructure creation to cache # Assemble names for Security Groups (these will be replicated everywhere but not passed around to minimize **kwargs bloat) cacheDict = { - "ClusterName": cluster_name, - "ClusterRoleName": cluster_role_name, - "NodegroupName": nodegroup_name, - "NodegroupRoleName": nodegroup_role_name, - "LaunchTemplateName": launch_template_name, + 'ClusterName': cluster_name, + 'ClusterRoleName': cluster_role_name, + 'NodegroupName': nodegroup_name, + 'NodegroupRoleName': nodegroup_role_name, + 'LaunchTemplateName': launch_template_name } cache.append(cacheDict) - print(f"Cache loaded with necessary rollback variables.") + print(f'Cache loaded with necessary rollback variables.') - eks = boto3.client("eks") - sts = boto3.client("sts") - kms = boto3.client("kms") - iam = boto3.client("iam") + eks = boto3.client('eks') + sts = boto3.client('sts') + kms = boto3.client('kms') + iam = boto3.client('iam') # Use STS GetCallerIdentity and Datetime to generate CreatedBy and CreatedAt information for tagging - createdBy = str(sts.get_caller_identity()["Arn"]) + createdBy = str(sts.get_caller_identity()['Arn']) createdAt = str(datetime.utcnow()) - acctId = str(sts.get_caller_identity()["Account"]) + acctId = str(sts.get_caller_identity()['Account']) # Create an EKS Cluster by calling `create_cluster` - this will take the longest, and if it fails, then other infrastructure won't be created # the positional selectors are for when you return multiple values, they are bundled in a tuple, and have to be accessed in the order they're provided - callClusterManager = ClusterManager.create_cluster( - cluster_name, - kubernetes_version, - cluster_role_name, - subnet_ids, - vpc_id, - additional_ports, - ) + callClusterManager = ClusterManager.create_cluster(cluster_name, kubernetes_version, cluster_role_name, subnet_ids, vpc_id, additional_ports) clusterName = callClusterManager[0] securityGroupId = callClusterManager[1] kms_key_arn = callClusterManager[2] @@ -974,9 +1089,7 @@ def builder( # Passes the S3 Bucket name to the `create_managed_nodegroup_role` function which in turn passes it to the `create_managed_nodegroup_s3_policy` # function which allows your Nodegroups to pull artifacts from S3 as part of bootstrapping - nodegroupRoleArn = ClusterManager.create_managed_nodegroup_role( - bucket_name, nodegroup_role_name, mde_on_nodes - ) + nodegroupRoleArn = ClusterManager.create_managed_nodegroup_role(bucket_name, nodegroup_role_name, mde_on_nodes) # Now we can attach our proper Key Policy to the KMS Key since we now have all Roles ready @@ -987,16 +1100,14 @@ def builder( # So this tries to create an ARN of the IAM Role you assumed (or transparently assumed if you are using WorkSpaces, Cloud9 or SSM Hybrid Activations or otherwise) # It is extremely stupid...YMMV for deletion # arn:aws:sts::ACCOUNT_ID:assumed-role/ROLE_NAME/ROLE_SESSION_NAME - seshRoleRegex = re.compile("assumed-role") + seshRoleRegex = re.compile('assumed-role') seshRoleCheck = seshRoleRegex.search(createdBy) # On match to Regex do stupid stuff >:( if seshRoleCheck: - print( - f"Your ARN from STS AssumeRole {createdBy} matches a temporary Session ARN, attempting to find your upstream IAM Role" - ) - roleNameSplit = createdBy.split("/")[1] - createdByRoleArn = f"arn:aws:iam::{acctId}:role/{roleNameSplit}" - print(f"Your Role ARN upstream to your session was determined as {createdByRoleArn}") + print(f'Your ARN from STS AssumeRole {createdBy} matches a temporary Session ARN, attempting to find your upstream IAM Role') + roleNameSplit = createdBy.split('/')[1] + createdByRoleArn = f'arn:aws:iam::{acctId}:role/{roleNameSplit}' + print(f'Your Role ARN upstream to your session was determined as {createdByRoleArn}') else: # If you're not an assumed Role you're just a User or a Role and should be fine?? createdByRoleArn = createdBy @@ -1004,17 +1115,14 @@ def builder( # Setup a modified version of the Default KMS Policy, eliminating some Conditional statements to allow Autoscaling, EKS, and EC2 to use the key and set Grants # First, attempt to create the SLR for the Autoscaling group if it does not exist, see: https://docs.aws.amazon.com/IAM/latest/UserGuide/using-service-linked-roles.html try: - r = iam.create_service_linked_role(AWSServiceName="autoscaling.amazonaws.com") - slrRole = str(r["Role"]["RoleName"]) - print(f"Created Service-linked Role for Autoscaling called {slrRole}") + r = iam.create_service_linked_role(AWSServiceName='autoscaling.amazonaws.com') + slrRole = str(r['Role']['RoleName']) + print(f'Created Service-linked Role for Autoscaling called {slrRole}') except Exception as e: - if ( - str(e) - == "An error occurred (InvalidInput) when calling the CreateServiceLinkedRole operation: Service role name AWSServiceRoleForAutoScaling has been taken in this account, please try a different suffix." - ): + if str(e) == 'An error occurred (InvalidInput) when calling the CreateServiceLinkedRole operation: Service role name AWSServiceRoleForAutoScaling has been taken in this account, please try a different suffix.': pass else: - print(f"Error encountered: {e}") + print(f'Error encountered: {e}') RollbackManager.rollback_from_cache(cache=cache) # Then check if there are any additional authorized principals specified for the cluster to add to the below static list of principals @@ -1024,7 +1132,7 @@ def builder( clusterRoleArn, nodegroupRoleArn, createdByRoleArn, - f"arn:aws:iam::{acctId}:role/aws-service-role/autoscaling.amazonaws.com/AWSServiceRoleForAutoScaling", + f'arn:aws:iam::{acctId}:role/aws-service-role/autoscaling.amazonaws.com/AWSServiceRoleForAutoScaling' ] # Check if additional AuthZ IAM Principals are even provided. If so, add them to the list if they're not there already @@ -1034,86 +1142,87 @@ def builder( kmsAuthZPrincipals.append(arn) keyPolicyJson = { - "Version": "2012-10-17", - "Id": "ecekeypolicy", - "Statement": [ + 'Version':'2012-10-17', + 'Id':'ecekeypolicy', + 'Statement': [ # full key usage by whoever creates the key { - "Sid": "Key Creator Admin", - "Effect": "Allow", - "Principal": {"AWS": createdByRoleArn}, - "Action": "kms:*", - "Resource": "*", + 'Sid': 'Key Creator Admin', + 'Effect': 'Allow', + 'Principal': { + 'AWS': createdByRoleArn + }, + 'Action':'kms:*', + 'Resource':'*' }, # This allows usage of the key by the Cluster & Nodegroup and aws-managed service principals # Creator is added throughout as well # AWS Auto Scaling service role is added per: https://docs.aws.amazon.com/autoscaling/ec2/userguide/key-policy-requirements-EBS-encryption.html { - "Sid": "Allow use of the key", - "Effect": "Allow", - "Principal": { - "AWS": kmsAuthZPrincipals, - "Service": ["autoscaling.amazonaws.com", "ec2.amazonaws.com"], + 'Sid': 'Allow use of the key', + 'Effect': 'Allow', + 'Principal': { + 'AWS': kmsAuthZPrincipals, + 'Service': [ + 'autoscaling.amazonaws.com', + 'ec2.amazonaws.com' + ] }, - "Action": [ - "kms:Encrypt", - "kms:Decrypt", - "kms:ReEncrypt*", - "kms:GenerateDataKey*", - "kms:DescribeKey", + 'Action': [ + 'kms:Encrypt', + 'kms:Decrypt', + 'kms:ReEncrypt*', + 'kms:GenerateDataKey*', + 'kms:DescribeKey' ], - "Resource": "*", + 'Resource': '*' }, { - "Sid": "Allow attachment of persistent resources", - "Effect": "Allow", - "Principal": { - "AWS": kmsAuthZPrincipals, - "Service": ["autoscaling.amazonaws.com", "ec2.amazonaws.com"], + 'Sid': 'Allow attachment of persistent resources', + 'Effect': 'Allow', + 'Principal': { + 'AWS': kmsAuthZPrincipals, + 'Service': [ + 'autoscaling.amazonaws.com', + 'ec2.amazonaws.com' + ] }, - "Action": ["kms:CreateGrant", "kms:ListGrants", "kms:RevokeGrant"], - "Resource": "*", - }, - ], + 'Action': [ + 'kms:CreateGrant', + 'kms:ListGrants', + 'kms:RevokeGrant' + ], + 'Resource': '*' + } + ] } # For whatever reason, role propagation is a bit delayed with registration on the KMS Resource-based resource policy side # we will sleep for a few seconds on top of using waiters to make sure they propagate and avoid errors... - print(f"Attaching Key Policy to KMS Key {kms_key_arn}") + print(f'Attaching Key Policy to KMS Key {kms_key_arn}') time.sleep(20) try: kms.put_key_policy( - KeyId=kms_key_arn, PolicyName="default", Policy=json.dumps(keyPolicyJson) + KeyId=kms_key_arn, + PolicyName='default', + Policy=json.dumps(keyPolicyJson) ) - print(f"Key Policy attached to {kms_key_arn}") + print(f'Key Policy attached to {kms_key_arn}') except KeyError as ke: - print(f"Error encountered: {ke}") + print(f'Error encountered: {ke}') RollbackManager.rollback_from_cache(cache=cache) except botocore.exceptions.ParamValidationError as pe: - print(f"Error encountered: {pe}") + print(f'Error encountered: {pe}') RollbackManager.rollback_from_cache(cache=cache) except botocore.exceptions.ClientError as error: - print(f"Error encountered: {error}") + print(f'Error encountered: {error}') RollbackManager.rollback_from_cache(cache=cache) - + # Passes various arguements to the `create_launch_template` which returns a Launch Template ID (of the latest version) to pass to the Nodegroup creation payload - launchTemplateId = ClusterManager.create_launch_template( - cluster_name, - kubernetes_version, - ami_id, - bucket_name, - launch_template_name, - kms_key_arn, - securityGroupId, - ebs_volume_size, - instance_type, - mde_on_nodes, - ami_os, - ami_architecture, - ) + launchTemplateId = ClusterManager.create_launch_template(cluster_name, kubernetes_version, ami_id, bucket_name, launch_template_name, kms_key_arn, securityGroupId, ebs_volume_size, instance_type, mde_on_nodes, ami_os, ami_architecture) - print(f"Creating Nodegroup {nodegroup_name} for Cluster {clusterName}") + print(f'Creating Nodegroup {nodegroup_name} for Cluster {clusterName}') # Create and launch the Nodegroup try: @@ -1121,426 +1230,448 @@ def builder( clusterName=clusterName, nodegroupName=nodegroup_name, scalingConfig={ - "minSize": int(node_count), - "maxSize": int(node_count) * 2, - "desiredSize": int(node_count), + 'minSize': int(node_count), + 'maxSize': int(node_count) * 2, + 'desiredSize': int(node_count) }, nodeRole=nodegroupRoleArn, subnets=subnet_ids, - launchTemplate={"id": launchTemplateId}, - capacityType="ON_DEMAND", - tags={ - "Name": nodegroup_name, - "CreatedBy": createdBy, - "CreatedAt": createdAt, - "CreatedWith": "Lightspin ECE", + launchTemplate={ + 'id': launchTemplateId }, + capacityType='ON_DEMAND', + tags={ + 'Name': nodegroup_name, + 'CreatedBy': createdBy, + 'CreatedAt': createdAt, + 'CreatedWith': 'Lightspin ECE' + } ) # Await Nodegroups to come online # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/eks.html#EKS.Waiter.NodegroupActive - waiter = eks.get_waiter("nodegroup_active") - print(f"Awaiting EKS Nodegroup {nodegroup_name} to come online") + waiter = eks.get_waiter('nodegroup_active') + print(f'Awaiting EKS Nodegroup {nodegroup_name} to come online') waiter.wait( clusterName=clusterName, nodegroupName=nodegroup_name, - WaiterConfig={"Delay": 30, "MaxAttempts": 80}, + WaiterConfig={ + 'Delay': 30, + 'MaxAttempts': 80 + } ) except botocore.exceptions.ClientError as error: - print(f"Error encountered: {error}") + print(f'Error encountered: {error}') RollbackManager.rollback_from_cache(cache=cache) except botocore.exceptions.WaiterError as we: - print(f"Error encountered: {we}") + print(f'Error encountered: {we}') RollbackManager.rollback_from_cache(cache=cache) - print(f"Creation complete. Nodegroup {nodegroup_name} in Cluster {clusterName} is online") + print(f'Creation complete. Nodegroup {nodegroup_name} in Cluster {clusterName} is online') # Retrieve region for AWS CLI kubectl generation session = boto3.session.Session() awsRegion = session.region_name # Setup first time cluster connection with AWS CLI - updateKubeconfigCmd = f"aws eks update-kubeconfig --region {awsRegion} --name {clusterName}" - updateKubeconfigProc = subprocess.run(updateKubeconfigCmd, shell=True, capture_output=True) - print(updateKubeconfigProc.stdout.decode("utf-8")) + updateKubeconfigCmd = f'aws eks update-kubeconfig --region {awsRegion} --name {clusterName}' + updateKubeconfigProc = subprocess.run(updateKubeconfigCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + print(updateKubeconfigProc.stdout.decode('utf-8')) # If additional principals are required to be authorized, attempt to do so if addtl_auth_principals: for arn in addtl_auth_principals: # Split out the name part of the Role - addtlRoleName = str(arn.split("/")[1]) + addtlRoleName = str(arn.split('/')[1]) # Create a patch object to add into - newAuthZScript = f"""ROLE=" - rolearn: {arn}\\n username: {addtlRoleName}\\n groups:\\n - system:masters" - kubectl get -n kube-system configmap/aws-auth -o yaml | awk "/mapRoles: \\|/{{print;print \\"$ROLE\\";next}}1" > /tmp/aws-auth-patch.yml + newAuthZScript=f'''ROLE=" - rolearn: {arn}\\n username: {addtlRoleName}\\n groups:\\n - system:masters" + kubectl get -n kube-system configmap/aws-auth -o yaml | awk "/mapRoles: \|/{{print;print \\"$ROLE\\";next}}1" > /tmp/aws-auth-patch.yml kubectl patch configmap/aws-auth -n kube-system --patch "$(cat /tmp/aws-auth-patch.yml)" - """ + ''' - newAuthZScriptProc = subprocess.run(newAuthZScript, shell=True, capture_output=True) - print(newAuthZScriptProc.stdout.decode("utf-8")) + newAuthZScriptProc = subprocess.run(newAuthZScript, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + print(newAuthZScriptProc.stdout.decode('utf-8')) - """ + ''' Send a call into plugins.ECEFalco - """ - if falco_bool == "True": + ''' + if falco_bool == 'True': FalcoSetup.falco_initialization( - cluster_name=clusterName, - falco_mode="Create", - falco_sidekick_destination_type=falco_sidekick_destination_type, + cluster_name=clusterName, + falco_mode='Create', + falco_sidekick_destination_type=falco_sidekick_destination_type, falco_sidekick_destination=falco_sidekick_destination, - datadog_api_key=datadog_api_key, + datadog_api_key=datadog_api_key ) - """ + ''' Send a call into plugins.ECEDatadog - """ - if datadog_bool == "True": + ''' + if datadog_bool == 'True': DatadogSetup.initialization( - cluster_name=clusterName, datadog_mode="Create", datadog_api_key=datadog_api_key + cluster_name=clusterName, + datadog_mode='Create', + datadog_api_key=datadog_api_key ) - - -""" +''' This Class handles all update tasks to the Clusters, such as version bumps to latest Kubenertes Versions -""" - - -class UpdateManager: +''' +class UpdateManager(): + def update_kubernetes_version(cluster_name, nodegroup_name, kubernetes_version): - """ + ''' This function attempts to update existing Cluster and Nodegroup to a specified Kubernetes Version by invoking separate functions after a basic version match test - """ + ''' - eks = boto3.client("eks") + eks = boto3.client('eks') # Lookup EKS Cluster to see if specified K8s version from main.py matches, if so exit try: - existingClusterVersion = eks.describe_cluster(name=cluster_name)["cluster"]["version"] + existingClusterVersion = eks.describe_cluster(name=cluster_name)['cluster']['version'] if existingClusterVersion == kubernetes_version: - print( - f"EKS Cluster {cluster_name} is already at Kubernetes version {kubernetes_version}! Aborting" - ) + print(f'EKS Cluster {cluster_name} is already at Kubernetes version {kubernetes_version}! Aborting') sys.exit(2) else: - print( - f"EKS Cluster {cluster_name} is viable to update from Kubernetes version {existingClusterVersion} to {kubernetes_version}" - ) + print(f'EKS Cluster {cluster_name} is viable to update from Kubernetes version {existingClusterVersion} to {kubernetes_version}') except botocore.exceptions.ClientError as error: # If we have an 'EntityAlreadyExists' error it means a Role of the same name exists, we can try to use it instead - if error.response["Error"]["Code"] == "ResourceNotFoundException": - print(f"EKS Cluster {cluster_name} does not exist! Aborting") + if error.response['Error']['Code'] == 'ResourceNotFoundException': + print(f'EKS Cluster {cluster_name} does not exist! Aborting') sys.exit(2) else: raise error # Lookup EKS Nodegroup to see if specified K8s version from main.py matches, if so exit try: - existingNodegroupVersion = eks.describe_cluster( - name=cluster_name, nodegroupName=nodegroup_name - )["nodegroup"]["version"] + existingNodegroupVersion = eks.describe_cluster(name=cluster_name,nodegroupName=nodegroup_name)['nodegroup']['version'] if existingNodegroupVersion == kubernetes_version: - print( - f"EKS Nodegroup {nodegroup_name} in Cluster {cluster_name} is already at Kubernetes version {kubernetes_version}! Aborting" - ) + print(f'EKS Nodegroup {nodegroup_name} in Cluster {cluster_name} is already at Kubernetes version {kubernetes_version}! Aborting') sys.exit(2) else: - print( - f"EKS Nodegroup {nodegroup_name} in Cluster {cluster_name} is viable to update from Kubernetes version {existingNodegroupVersion} to {kubernetes_version}" - ) + print(f'EKS Nodegroup {nodegroup_name} in Cluster {cluster_name} is viable to update from Kubernetes version {existingNodegroupVersion} to {kubernetes_version}') except botocore.exceptions.ClientError as error: # If we have an 'EntityAlreadyExists' error it means a Role of the same name exists, we can try to use it instead - if error.response["Error"]["Code"] == "ResourceNotFoundException": - print( - f"EKS Nodegroup {nodegroup_name} in Cluster {cluster_name} does not exist! Aborting" - ) + if error.response['Error']['Code'] == 'ResourceNotFoundException': + print(f'EKS Nodegroup {nodegroup_name} in Cluster {cluster_name} does not exist! Aborting') sys.exit(2) else: raise error - UpdateManager.update_nodegroup_kubernetes_version( - cluster_name, nodegroup_name, kubernetes_version - ) + UpdateManager.update_nodegroup_kubernetes_version(cluster_name, nodegroup_name, kubernetes_version) UpdateManager.update_cluster_kubernetes_version(cluster_name, kubernetes_version) def update_nodegroup_kubernetes_version(cluster_name, nodegroup_name, kubernetes_version): - """ + ''' This function carries out the update and waiter for EKS Nodegroup K8s version bumps - """ - print( - f"Updating Kubernetes version for EKS Nodegroup {nodegroup_name} in EKS Cluster {cluster_name}" - ) + ''' + print(f'Updating Kubernetes version for EKS Nodegroup {nodegroup_name} in EKS Cluster {cluster_name}') - eks = boto3.client("eks") + eks = boto3.client('eks') # Update the Nodegroup K8s version and parse the EKS Update ID for later use # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/eks.html#EKS.Client.update_nodegroup_version r = eks.update_nodegroup_version( - clusterName=cluster_name, nodegroupName=nodegroup_name, version=kubernetes_version + clusterName=cluster_name, + nodegroupName=nodegroup_name, + version=kubernetes_version ) - updateId = str(r["update"]["id"]) + updateId = str(r['update']['id']) - print(f"Monitoring EKS Update ID {updateId} for failure or success state.") + print(f'Monitoring EKS Update ID {updateId} for failure or success state.') # Use a `while True` loop and 15 second sleeps to watch the update progress of the cluster # Break the loop on Success, continue on 'InProgress', and exit code 2 on failures or cancellations while True: d = eks.describe_update( - name=cluster_name, updateId=updateId, nodegroupName=nodegroup_name + name=cluster_name, + updateId=updateId, + nodegroupName=nodegroup_name ) - updateStatus = str(d["update"]["status"]) + updateStatus = str(d['update']['status']) # if/else logic time - if updateStatus == "Successful": - print( - f"Nodegroup {nodegroup_name} in Cluster {cluster_name} has been successfully updated." - ) + if updateStatus == 'Successful': + print(f'Nodegroup {nodegroup_name} in Cluster {cluster_name} has been successfully updated.') break - elif updateStatus == "Failed" or "Cancelled": - errorMessage = str(d["update"]["errors"]) - print( - f"Nodegroup {nodegroup_name} in Cluster {cluster_name} update has been cancelled or has failed!" - ) - print(f"Error message: {errorMessage}") + elif updateStatus == 'Failed' or 'Cancelled': + errorMessage = str(d['update']['errors']) + print(f'Nodegroup {nodegroup_name} in Cluster {cluster_name} update has been cancelled or has failed!') + print(f'Error message: {errorMessage}') sys.exit(2) else: - print(f"Awaiting update status change for 15 more seconds...") + print(f'Awaiting update status change for 15 more seconds...') del d del updateStatus time.sleep(15) continue def update_cluster_kubernetes_version(cluster_name, kubernetes_version): - """ + ''' This function carries out the update and waiter for EKS Cluster K8s version bumps - """ - print(f"Updating Kubernetes version for EKS Cluster {cluster_name}") + ''' + print(f'Updating Kubernetes version for EKS Cluster {cluster_name}') - eks = boto3.client("eks") + eks = boto3.client('eks') # Update the Nodegroup K8s version and parse the EKS Update ID for later use # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/eks.html#EKS.Client.update_nodegroup_version - r = eks.update_nodegroup_version(clusterName=cluster_name, version=kubernetes_version) - updateId = str(r["update"]["id"]) + r = eks.update_nodegroup_version( + clusterName=cluster_name, + version=kubernetes_version + ) + updateId = str(r['update']['id']) - print(f"Monitoring EKS Update ID {updateId} for failure or success state.") + print(f'Monitoring EKS Update ID {updateId} for failure or success state.') # Use a `while True` loop and 15 second sleeps to watch the update progress of the cluster # Break the loop on Success, continue on 'InProgress', and exit code 2 on failures or cancellations while True: - d = eks.describe_update(name=cluster_name, updateId=updateId) - updateStatus = str(d["update"]["status"]) + d = eks.describe_update( + name=cluster_name, + updateId=updateId + ) + updateStatus = str(d['update']['status']) # if/else logic time - if updateStatus == "Successful": - print(f"Cluster {cluster_name} has been successfully updated.") + if updateStatus == 'Successful': + print(f'Cluster {cluster_name} has been successfully updated.') break - elif updateStatus == "Failed" or "Cancelled": - errorMessage = str(d["update"]["errors"]) - print(f"Cluster {cluster_name} update has been cancelled or has failed!") - print(f"Error message: {errorMessage}") + elif updateStatus == 'Failed' or 'Cancelled': + errorMessage = str(d['update']['errors']) + print(f'Cluster {cluster_name} update has been cancelled or has failed!') + print(f'Error message: {errorMessage}') sys.exit(2) else: - print(f"Awaiting update status change for 15 more seconds...") + print(f'Awaiting update status change for 15 more seconds...') del d del updateStatus time.sleep(15) continue - -""" +''' Despite it's name, this Class contains methods to conduct emergency deletions (rollback) from Cache as well as normal deletions from main.py commands this is purely for Create mode, other Classes may have their own self-contained rollback mechanism -""" - +''' +class RollbackManager(): -class RollbackManager: - def scheduled_deletion( - nodegroup_name, cluster_name, cluster_role_name, nodegroup_role_name, launch_template_name - ): - """ + def scheduled_deletion(nodegroup_name, cluster_name, cluster_role_name, nodegroup_role_name, launch_template_name): + ''' This function performs a graceful, scheduled deletion of all resources - or attempts to at least - """ - eks = boto3.client("eks") + ''' + eks = boto3.client('eks') - print(f"Deletion command received. Attempting to delete all resources") + print(f'Deletion command received. Attempting to delete all resources') # Retrieve the Security Groups from the Cluster to delete, as they are not provided as arguments and cannot be guessed (ID's and all that...) sgList = [] - for sg in eks.describe_cluster(name=cluster_name)["cluster"]["resourcesVpcConfig"][ - "securityGroupIds" - ]: + for sg in eks.describe_cluster(name=cluster_name)['cluster']['resourcesVpcConfig']['securityGroupIds']: sgList.append(sg) # First, attempt to delete Nodegroup - RollbackManager.delete_nodegroup(nodegroup_name=nodegroup_name, cluster_name=cluster_name) + RollbackManager.delete_nodegroup( + nodegroup_name=nodegroup_name, + cluster_name=cluster_name + ) # Then, try to find the Cluster KMS Key and attempt to delete it try: - kmsKeyArn = eks.describe_cluster(name=cluster_name)["cluster"]["encryptionConfig"][0][ - "provider" - ]["keyArn"] + kmsKeyArn= eks.describe_cluster(name=cluster_name)['cluster']['encryptionConfig'][0]['provider']['keyArn'] except Exception: kmsKeyArn = None - + if kmsKeyArn != None: - RollbackManager.delete_kms_key(kms_key_arn=kmsKeyArn) + RollbackManager.delete_kms_key( + kms_key_arn=kmsKeyArn + ) # Next, attempt to delete Cluster - RollbackManager.delete_cluster(cluster_name=cluster_name) + RollbackManager.delete_cluster( + cluster_name=cluster_name + ) # Next, attempt to delete all related IAM RollbackManager.delete_eks_iam( - cluster_role_name=cluster_role_name, nodegroup_role_name=nodegroup_role_name + cluster_role_name=cluster_role_name, + nodegroup_role_name=nodegroup_role_name ) # Next, attempt to delete the EC2 Launch Template - RollbackManager.delete_launch_template(launch_template_name=launch_template_name) + RollbackManager.delete_launch_template( + launch_template_name=launch_template_name + ) # Finally, loop the retrieved SGs and then delete them for sg in sgList: - print(f"Trying to delete EC2 Security Group {sg}") - RollbackManager.delete_security_groups(cluster_security_group_id=sg) + print(f'Trying to delete EC2 Security Group {sg}') + RollbackManager.delete_security_groups( + cluster_security_group_id=sg + ) - print(f"Deletion complete. Confirm resource deletion in Console in case of errors") + print(f'Deletion complete. Confirm resource deletion in Console in case of errors') def rollback_from_cache(cache): - """ + ''' This function is invoked during any error encountered during the creation process in the `ClusterManager` Class - a Cache is passed and any resource that would be created is attempted to be deleted as the failures can occur at any stage - """ + ''' - print(f"Error encountered! Rollback from cache initiated.") - eks = boto3.client("eks") + print(f'Error encountered! Rollback from cache initiated.') + eks = boto3.client('eks') # pull vars from Cache - nodegroupName = str(cache[0]["NodegroupName"]) - clusterName = str(cache[0]["ClusterName"]) - clusterRoleName = str(cache[0]["ClusterRoleName"]) - nodegroupRoleName = str(cache[0]["NodegroupRoleName"]) - launchTemplateName = str(cache[0]["LaunchTemplateName"]) - clusterSgId = str(cache[1]["ClusterSecurityGroupId"]) + nodegroupName = str(cache[0]['NodegroupName']) + clusterName = str(cache[0]['ClusterName']) + clusterRoleName = str(cache[0]['ClusterRoleName']) + nodegroupRoleName = str(cache[0]['NodegroupRoleName']) + launchTemplateName = str(cache[0]['LaunchTemplateName']) + clusterSgId = str(cache[1]['ClusterSecurityGroupId']) # First, attempt to delete Nodegroup - RollbackManager.delete_nodegroup(nodegroup_name=nodegroupName, cluster_name=clusterName) + RollbackManager.delete_nodegroup( + nodegroup_name=nodegroupName, + cluster_name=clusterName + ) # Then, try to find the Cluster KMS Key and attempt to delete it try: - kmsKeyArn = eks.describe_cluster(name=clusterName)["cluster"]["encryptionConfig"][0][ - "provider" - ]["keyArn"] + kmsKeyArn= eks.describe_cluster(name=clusterName)['cluster']['encryptionConfig'][0]['provider']['keyArn'] except Exception: kmsKeyArn = None if kmsKeyArn != None: - RollbackManager.delete_kms_key(kms_key_arn=kmsKeyArn) + RollbackManager.delete_kms_key( + kms_key_arn=kmsKeyArn + ) # Next, attempt to delete Cluster - RollbackManager.delete_cluster(cluster_name=clusterName) + RollbackManager.delete_cluster( + cluster_name=clusterName + ) # Next, attempt to delete all related IAM RollbackManager.delete_eks_iam( - cluster_role_name=clusterRoleName, nodegroup_role_name=nodegroupRoleName + cluster_role_name=clusterRoleName, + nodegroup_role_name=nodegroupRoleName ) # Next, attempt to delete the EC2 Launch Template - RollbackManager.delete_launch_template(launch_template_name=launchTemplateName) + RollbackManager.delete_launch_template( + launch_template_name=launchTemplateName + ) # Finally, delete the Security Groups - RollbackManager.delete_security_groups(cluster_security_group_id=clusterSgId) + RollbackManager.delete_security_groups( + cluster_security_group_id=clusterSgId + ) - print(f"Rollback complete. Confirm resource deletion in Console in case of errors") + print(f'Rollback complete. Confirm resource deletion in Console in case of errors') del cache sys.exit(2) def delete_nodegroup(cluster_name, nodegroup_name): - """ + ''' This function attempts to delete an EKS Nodegroup - """ - print(f"Attempting to delete EKS Nodegroup {nodegroup_name} in EKS Cluster {cluster_name}.") + ''' + print(f'Attempting to delete EKS Nodegroup {nodegroup_name} in EKS Cluster {cluster_name}.') - eks = boto3.client("eks") + eks = boto3.client('eks') try: - eks.delete_nodegroup(clusterName=cluster_name, nodegroupName=nodegroup_name) + eks.delete_nodegroup( + clusterName=cluster_name, + nodegroupName=nodegroup_name + ) except botocore.exceptions.ClientError as error: - print(f"Rollback error encounter {error}") + print(f'Rollback error encounter {error}') # Wait for the Nodegroup to be fully deleted before deleting the Cluster # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/eks.html#EKS.Waiter.NodegroupDeleted - print(f"Awaiting deletion of EKS Nodegroup {nodegroup_name} in EKS Cluster {cluster_name}.") + print(f'Awaiting deletion of EKS Nodegroup {nodegroup_name} in EKS Cluster {cluster_name}.') - waiter = eks.get_waiter("nodegroup_deleted") + waiter = eks.get_waiter('nodegroup_deleted') waiter.wait( clusterName=cluster_name, nodegroupName=nodegroup_name, - WaiterConfig={"Delay": 30, "MaxAttempts": 40}, + WaiterConfig={ + 'Delay': 30, + 'MaxAttempts': 40 + } ) - print(f"EKS Nodegroups rolled back.") + print(f'EKS Nodegroups rolled back.') del eks def delete_cluster(cluster_name): - """ + ''' This function attempts to delete an EKS Cluster - """ - print(f"Attempting to delete EKS Cluster {cluster_name}.") + ''' + print(f'Attempting to delete EKS Cluster {cluster_name}.') - eks = boto3.client("eks") + eks = boto3.client('eks') try: - eks.delete_cluster(name=cluster_name) + eks.delete_cluster( + name=cluster_name + ) except botocore.exceptions.ClientError as error: - print(f"Rollback error encounter {error}") + print(f'Rollback error encounter {error}') # Wait for the Cluster to be fully deleted before deleting the IAM Roles # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/eks.html#EKS.Waiter.ClusterDeleted - print(f"Awaiting deletion of EKS Cluster {cluster_name}.") + print(f'Awaiting deletion of EKS Cluster {cluster_name}.') - waiter = eks.get_waiter("cluster_deleted") + waiter = eks.get_waiter('cluster_deleted') - waiter.wait(name=cluster_name, WaiterConfig={"Delay": 30, "MaxAttempts": 123}) + waiter.wait( + name=cluster_name, + WaiterConfig={ + 'Delay': 30, + 'MaxAttempts': 123 + } + ) - print(f"EKS Clusters rolled back.") + print(f'EKS Clusters rolled back.') del eks def delete_eks_iam(cluster_role_name, nodegroup_role_name): - """ + ''' This function attempts to delete all related IAM entities for EKS (Cluster roles, Nodegroup roles, Nodegroup policies) - """ - print( - f"Attempting to delete various IAM entities. IAM Roles {cluster_role_name} and {nodegroup_role_name} and IAM Policy {nodegroup_role_name}Policy." - ) + ''' + print(f'Attempting to delete various IAM entities. IAM Roles {cluster_role_name} and {nodegroup_role_name} and IAM Policy {nodegroup_role_name}Policy.') - iam = boto3.client("iam") - sts = boto3.client("sts") - account = sts.get_caller_identity()["Account"] + iam = boto3.client('iam') + sts = boto3.client('sts') + account = sts.get_caller_identity()['Account'] # Assemble an IAM Policy ARN for nodegroup - nodegroupS3PolicyArn = f"arn:aws:iam::{account}:policy/{nodegroup_role_name}Policy" + nodegroupS3PolicyArn = f'arn:aws:iam::{account}:policy/{nodegroup_role_name}Policy' # Find and detach all policies from the Cluster Role try: - for policy in iam.list_attached_role_policies(RoleName=cluster_role_name)[ - "AttachedPolicies" - ]: - policyArn = str(policy["PolicyArn"]) - iam.detach_role_policy(RoleName=cluster_role_name, PolicyArn=policyArn) + for policy in iam.list_attached_role_policies(RoleName=cluster_role_name)['AttachedPolicies']: + policyArn = str(policy['PolicyArn']) + iam.detach_role_policy( + RoleName=cluster_role_name, + PolicyArn=policyArn + ) except botocore.exceptions.ClientError as error: - print(f"Rollback error encounter {error}") + print(f'Rollback error encounter {error}') # Detach all Policies from Nodegroup cluster try: - for policy in iam.list_attached_role_policies(RoleName=nodegroup_role_name)[ - "AttachedPolicies" - ]: - policyArn = str(policy["PolicyArn"]) - iam.detach_role_policy(RoleName=nodegroup_role_name, PolicyArn=policyArn) + for policy in iam.list_attached_role_policies(RoleName=nodegroup_role_name)['AttachedPolicies']: + policyArn = str(policy['PolicyArn']) + iam.detach_role_policy( + RoleName=nodegroup_role_name, + PolicyArn=policyArn + ) except botocore.exceptions.ClientError as error: - print(f"Rollback error encounter {error}") + print(f'Rollback error encounter {error}') try: iam.delete_policy(PolicyArn=nodegroupS3PolicyArn) @@ -1550,14 +1681,15 @@ def delete_eks_iam(cluster_role_name, nodegroup_role_name): try: iam.delete_role(RoleName=cluster_role_name) except botocore.exceptions.ClientError as error: - print(f"Rollback error encounter {error}") + print(f'Rollback error encounter {error}') try: iam.delete_role(RoleName=nodegroup_role_name) except botocore.exceptions.ClientError as error: - print(f"Rollback error encounter {error}") + print(f'Rollback error encounter {error}') + - print(f"IAM Roles and Policies rolled back.") + print(f'IAM Roles and Policies rolled back.') del iam del sts @@ -1565,52 +1697,58 @@ def delete_eks_iam(cluster_role_name, nodegroup_role_name): del nodegroupS3PolicyArn def delete_launch_template(launch_template_name): - """ + ''' This function attempts to delete the EC2 Launch Template used for EKS Nodegroups - """ - print(f"Attempting to delete EC2 launch template {launch_template_name}.") + ''' + print(f'Attempting to delete EC2 launch template {launch_template_name}.') - ec2 = boto3.client("ec2") + ec2 = boto3.client('ec2') try: - ec2.delete_launch_template(DryRun=False, LaunchTemplateName=launch_template_name) + ec2.delete_launch_template( + DryRun=False, + LaunchTemplateName=launch_template_name + ) except botocore.exceptions.ClientError as error: - print(f"Rollback error encounter {error}") + print(f'Rollback error encounter {error}') - print(f"EC2 Launch Templates rolled back.") + print(f'EC2 Launch Templates rolled back.') del ec2 def delete_security_groups(cluster_security_group_id): - """ + ''' This function attempts to delete the EC2 Security Groups used for EKS Clusters and Nodegroups - """ - print(f"Attempting to delete EC2 Security Group {cluster_security_group_id}") + ''' + print(f'Attempting to delete EC2 Security Group {cluster_security_group_id}') - ec2 = boto3.client("ec2") + ec2 = boto3.client('ec2') try: ec2.delete_security_group(GroupId=cluster_security_group_id) except botocore.exceptions.ClientError as error: - print(f"Rollback error encounter {error}") + print(f'Rollback error encounter {error}') - print(f"Security Group rolled back") + print(f'Security Group rolled back') del ec2 def delete_kms_key(kms_key_arn): - """ + ''' This function attempts to delete the KMS Key used for EKS Envelope Encryption - """ - print(f"Attempting to delete KMS Key ARN {kms_key_arn}") + ''' + print(f'Attempting to delete KMS Key ARN {kms_key_arn}') - kms = boto3.client("kms") + kms = boto3.client('kms') try: - kms.schedule_key_deletion(KeyId=kms_key_arn, PendingWindowInDays=7) + kms.schedule_key_deletion( + KeyId=kms_key_arn, + PendingWindowInDays=7 + ) except botocore.exceptions.ClientError as error: - print(f"Rollback error encounter {error}") + print(f'Rollback error encounter {error}') - print(f"KMS Key rolled back") + print(f'KMS Key rolled back') - del kms + del kms \ No newline at end of file diff --git a/README.md b/README.md index e72c98b..5b49537 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ The Amazon Elastic Kubernetes Service (EKS) Creation Engine (ECE) is a Python command-line program created by the Lightspin Office of the CISO to facilitate the creation and enablement of secure EKS Clusters, optionally further assured with continual Kubernetes Security Posture Management (KSPM), Runtime Protection, and Application Performance Monitoring (APM) capabilities. -## What is this :eyes: :eyes: ?? +## What is this :eyes: :eyes: ?? As stated above, the ECE is a Python utility to create a fully functioning EKS Cluster, complete with Nodegroups which are built off of EC2 Launch Templates as it was meant for creating EKS Nodegroups with custom AMIs with custom bootstrapping. @@ -73,39 +73,10 @@ We are happy to take contributions from anywhere that will help expand this proj - Spot provider & Fargate Profile support for Nodegroups, and an option to not use Nodegroups - Create more Plugins for various utilities (e.g., Calico, OPA, NGINX Ingress Controller, etc.) -### Basic Contributing Setup - -1. Fork the repository. -2. Clone your fork and enter the `eks-creation-engine` directory. -3. Get your Python things Python-y. - -```bash -# Add upstream -git remote add upstream https://github.com/lightspin-tech/eks-creation-engine.git - -# Create virtual env -pip3 -m venv .env --prompt ece - -# Enter virtual env -source .env/bin/activate - -# Install ECE reqs -pip3 install -r requirements.txt - -# Install pre-commit -pip3 install pre-commit - -# Ensure pre-commit runs... pre... commit -pre-commit install - -# Init the pre-commit env and run checks -pre-commit run -a -``` - ## Contact Us :telephone_receiver: :telephone_receiver: For more information, contact us at support@lightspin.io. ## License :eight_spoked_asterisk: :eight_spoked_asterisk: -This repository is available under the [Apache License 2.0](https://github.com/lightspin-tech/eks-creation-engine/blob/main/LICENSE). +This repository is available under the [Apache License 2.0](https://github.com/lightspin-tech/eks-creation-engine/blob/main/LICENSE). \ No newline at end of file diff --git a/docs/HOWTO.md b/docs/HOWTO.md index 1382f33..17317a5 100644 --- a/docs/HOWTO.md +++ b/docs/HOWTO.md @@ -347,4 +347,4 @@ For more information, contact us at support@lightspin.io. ## License :eight_spoked_asterisk: :eight_spoked_asterisk: -This repository is available under the [Apache License 2.0](https://github.com/lightspin-tech/eks-creation-engine/blob/main/LICENSE). +This repository is available under the [Apache License 2.0](https://github.com/lightspin-tech/eks-creation-engine/blob/main/LICENSE). \ No newline at end of file diff --git a/main.py b/main.py index cc94451..dd00a00 100644 --- a/main.py +++ b/main.py @@ -1,164 +1,154 @@ -# This file is part of Lightspin EKS Creation Engine. -# SPDX-License-Identifier: Apache-2.0 -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -import argparse +#This file is part of Lightspin EKS Creation Engine. +#SPDX-License-Identifier: Apache-2.0 + +#Licensed to the Apache Software Foundation (ASF) under one +#or more contributor license agreements. See the NOTICE file +#distributed with this work for additional information +#regarding copyright ownership. The ASF licenses this file +#to you under the Apache License, Version 2.0 (the +#"License"); you may not use this file except in compliance +#with the License. You may obtain a copy of the License at + +#http://www.apache.org/licenses/LICENSE-2.0 + +#Unless required by applicable law or agreed to in writing, +#software distributed under the License is distributed on an +#"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +#KIND, either express or implied. See the License for the +#specific language governing permissions and limitations +#under the License. + import json -import re -import subprocess import sys - +import re import boto3 import botocore -import termcolor +import argparse +import subprocess from art import text2art -from clint.textui import colored -from clint.textui import puts - -from EksCreationEngine import ClusterManager -from EksCreationEngine import RollbackManager -from EksCreationEngine import UpdateManager +import termcolor +from clint.textui import colored, puts +from EksCreationEngine import ClusterManager, UpdateManager, RollbackManager from plugins.ECEDatadog import DatadogSetup from plugins.ECEFalco import FalcoSetup from plugins.ECESecurity import SecurityAssessment - def print_logo(): textArt = text2art("EKS CREATION ENGINE") - print(termcolor.colored(textArt, "red")) + print(termcolor.colored(textArt, 'red')) puts(colored.red("CREATED BY THE LIGHTSPIN OFFICE OF THE CISO")) - puts( - colored.red( - "For more information about Lightspin reach out to support@lightspin.io or visit us at https://lightspin.io" - ) - ) - + puts(colored.red("For more information about Lightspin reach out to support@lightspin.io or visit us at https://lightspin.io")) def stay_dangerous(): textArt = text2art("STAY DANGEROUS") - print(termcolor.colored(textArt, "red")) + print(termcolor.colored(textArt, 'red')) puts(colored.red("With Love, the Lightspin Office of the CISO")) - def create_preflight_check(): - """ + ''' This function conducts a "preflight check" to ensure that required arguments are provided for the specified "Mode" before attempting to execute them. - """ + ''' print_logo() - eks = boto3.client("eks") + eks = boto3.client('eks') # Conditional check to ensure that AMI ID (if provided) matches regex amiId = args.ami_id - if amiId != "SSM": + if amiId != 'SSM': # AMI Regex - amiRegex = re.compile( - "^(?:(?:ami)(?:-[a-zA-Z0-9]+)?\b|(?:[0-9]{1,3}\\.){3}[0-9]{1,3})(?:\\s*,\\s*(?:(?:ami)(?:-[a-zA-Z0-9]+)?\b|(?:[0-9]{1,3}\\.){3}[0-9]{1,3}))*$" - ) + amiRegex = re.compile('^(?:(?:ami)(?:-[a-zA-Z0-9]+)?\b|(?:[0-9]{1,3}\.){3}[0-9]{1,3})(?:\s*,\s*(?:(?:ami)(?:-[a-zA-Z0-9]+)?\b|(?:[0-9]{1,3}\.){3}[0-9]{1,3}))*$') # Attempt to match amiRegexCheck = amiRegex.search(amiId) if not amiRegexCheck: - print( - f"Improperly AMI ID provided, does not match regex, check value and submit request again" - ) + print(f'Improperly AMI ID provided, does not match regex, check value and submit request again') sys.exit(2) # Check if an EKS Cluster exists for provided name try: - eks.describe_cluster(name=clusterName) + eks.describe_cluster( + name=clusterName + ) except botocore.exceptions.ClientError as error: # If we have an "ResourceNotFoundException" error it means the cluster doesnt exist - which is what we want - if error.response["Error"]["Code"] == "ResourceNotFoundException": + if error.response['Error']['Code'] == 'ResourceNotFoundException': pass else: - print( - f"An EKS Cluster with the name {clusterName} already exists. Please specify another name and try again" - ) + print(f'An EKS Cluster with the name {clusterName} already exists. Please specify another name and try again') sys.exit(2) - + # Check if an EKS Nodegroup exists for provided name try: - eks.describe_nodegroup(clusterName=clusterName, nodegroupName=nodegroupName) + eks.describe_nodegroup( + clusterName=clusterName, + nodegroupName=nodegroupName + ) except botocore.exceptions.ClientError as error: # If we have an "ResourceNotFoundException" error it means the cluster/nodegroup doesnt exist - which is what we want - if error.response["Error"]["Code"] == "ResourceNotFoundException": + if error.response['Error']['Code'] == 'ResourceNotFoundException': pass else: - print( - f"An EKS Nodegroup with the name {nodegroupName} already exists. Please specify another name and try again" - ) + print(f'An EKS Nodegroup with the name {nodegroupName} already exists. Please specify another name and try again') sys.exit(2) - + # Check for a provided VPC if vpcId == None: - print(f"VPC ID is required for cluster creation. Please specify a VPC ID and try again.") + print(f'VPC ID is required for cluster creation. Please specify a VPC ID and try again.') sys.exit(2) # Check for non-empty lists for Subnets if args.subnets: pass else: - print(f"Subnets need to be specified for cluster creation") + print(f'Subnets need to be specified for cluster creation') sys.exit(2) # Ensure a S3 Bucket was provided if MDE installation is true - if installMdeOnNodes == "True": + if installMdeOnNodes == 'True': if bucketName == None: - print( - f"S3 Bucket name was not provided. Please provide a valid S3 Bucket and try again" - ) + print(f'S3 Bucket name was not provided. Please provide a valid S3 Bucket and try again') sys.exit(2) # Ensure a Datadog API key is provided if Datadog installation is true - if datadogBool == "True": + if datadogBool == 'True': if datadogApiKey == None: - print( - f"Datadog setup was specified but a Datadog API was not provided. Please provide a valid API key and try again." - ) + print(f'Datadog setup was specified but a Datadog API was not provided. Please provide a valid API key and try again.') sys.exit(2) # Print out creation specification - in the future this will be a "state file" for the cluster specDict = { - "K8sVersion": k8sVersion, - "S3BucketName": bucketName, - "EBSVolumeSize": ebsVolumeSize, - "AmiId": amiId, - "InstanceType": instanceType, - "ClusterName": clusterName, - "ClusterRoleName": clusterRoleName, - "NodegroupName": nodegroupName, - "NodegroupRoleName": nodegroupRoleName, - "LaunchTemplateName": launchTemplateName, - "VpcId": vpcId, - "SubnetIds": subnetIds, - "NodeCount": eksNodeCount, - "MDEOnNodes?": installMdeOnNodes, - "AdditionalPorts": additionalPorts, - "InstallFalco?": falcoBool, - "FalcoDestinationType": falcoDestType, - "FalcoDestination": falcoDest, - "AmiOperatingSystem": amiOs, - "AmiArhcitecture": amiArchitecture, - "DatadogApiKey": datadogApiKey, - "InstallDatadog?": datadogBool, - "AdditionalAuthorizedPrincipals": additionalAuthZPrincipals, + 'K8sVersion': k8sVersion, + 'S3BucketName': bucketName, + 'EBSVolumeSize': ebsVolumeSize, + 'AmiId': amiId, + 'InstanceType': instanceType, + 'ClusterName': clusterName, + 'ClusterRoleName': clusterRoleName, + 'NodegroupName': nodegroupName, + 'NodegroupRoleName': nodegroupRoleName, + 'LaunchTemplateName': launchTemplateName, + 'VpcId': vpcId, + 'SubnetIds': subnetIds, + 'NodeCount': eksNodeCount, + 'MDEOnNodes?': installMdeOnNodes, + 'AdditionalPorts': additionalPorts, + 'InstallFalco?': falcoBool, + 'FalcoDestinationType': falcoDestType, + 'FalcoDestination': falcoDest, + 'AmiOperatingSystem': amiOs, + 'AmiArhcitecture': amiArchitecture, + 'DatadogApiKey': datadogApiKey, + 'InstallDatadog?': datadogBool, + 'AdditionalAuthorizedPrincipals': additionalAuthZPrincipals } - print(f"The following attributes are set for your EKS Cluster") - print(json.dumps(specDict, indent=4)) + print(f'The following attributes are set for your EKS Cluster') + print( + json.dumps( + specDict, + indent=4 + ) + ) # TODO: Save state? del specDict @@ -186,12 +176,11 @@ def create_preflight_check(): ami_architecture=amiArchitecture, datadog_api_key=datadogApiKey, datadog_bool=datadogBool, - addtl_auth_principals=additionalAuthZPrincipals, + addtl_auth_principals=additionalAuthZPrincipals ) stay_dangerous() - def delete_preflight_check(): print_logo() @@ -201,165 +190,162 @@ def delete_preflight_check(): cluster_role_name=clusterRoleName, nodegroup_name=nodegroupName, nodegroup_role_name=nodegroupRoleName, - launch_template_name=launchTemplateName, + launch_template_name=launchTemplateName ) stay_dangerous() - def update_preflight_check(): print_logo() # Call the `update_kubernetes_version` function and attempt to version bump K8s of Clusters & Nodes UpdateManager.update_kubernetes_version( - cluster_name=clusterName, kubernetes_version=k8sVersion, nodegroup_name=nodegroupName + cluster_name=clusterName, + kubernetes_version=k8sVersion, + nodegroup_name=nodegroupName ) stay_dangerous() - def assessment_preflight_check(): - """ + ''' This function conducts a "preflight check" to ensure that required arguments are provided for the specified "Mode" before attempting to execute them. - """ + ''' print_logo() - eks = boto3.client("eks") + eks = boto3.client('eks') # Check if an EKS Cluster exists for provided name try: - eks.describe_cluster(name=clusterName) + eks.describe_cluster( + name=clusterName + ) except botocore.exceptions.ClientError as error: # If we have an "ResourceNotFoundException" error it means the cluster doesnt exist - which is what we want - if error.response["Error"]["Code"] == "ResourceNotFoundException": - print( - f"An EKS Cluster with the name {clusterName} does not exist. Please specify another name and try again" - ) + if error.response['Error']['Code'] == 'ResourceNotFoundException': + print(f'An EKS Cluster with the name {clusterName} does not exist. Please specify another name and try again') sys.exit(2) else: pass - print(f"Downloading latest Kube-bench EKS config YAML") + print(f'Downloading latest Kube-bench EKS config YAML') - url = "https://raw.githubusercontent.com/aquasecurity/kube-bench/main/job-eks.yaml" - wgetCommand = f"wget {url}" - subProc = subprocess.run(wgetCommand, shell=True, capture_output=True) - print(subProc.stderr.decode("utf-8")) + url = 'https://raw.githubusercontent.com/aquasecurity/kube-bench/main/job-eks.yaml' + wgetCommand = f'wget {url}' + subProc = subprocess.run(wgetCommand, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + print(subProc.stderr.decode('utf-8')) - print(f"Installing Trivy from source script for v0.24") + print(f'Installing Trivy from source script for v0.24') # TODO: Continual updates of Trivy version https://aquasecurity.github.io/trivy - trivyCmd = "curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sudo sh -s -- -b /usr/local/bin v0.24.0" - trivyProc = subprocess.run(trivyCmd, shell=True, capture_output=True) - print(trivyProc.stdout.decode("utf-8")) + trivyCmd = 'curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sudo sh -s -- -b /usr/local/bin v0.24.0' + trivyProc = subprocess.run(trivyCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + print(trivyProc.stdout.decode('utf-8')) - SecurityAssessment.start_assessment(cluster_name=clusterName) + SecurityAssessment.start_assessment( + cluster_name=clusterName + ) stay_dangerous() - def setup_falco_preflight_check(): - """ + ''' This function conducts a "preflight check" to ensure that required arguments are provided for the specified "Mode" before attempting to execute them. - """ + ''' print_logo() - eks = boto3.client("eks") + eks = boto3.client('eks') # Check if an EKS Cluster exists for provided name try: - eks.describe_cluster(name=clusterName) + eks.describe_cluster( + name=clusterName + ) except botocore.exceptions.ClientError as error: # If we have an "ResourceNotFoundException" error it means the cluster doesnt exist - which is what we want - if error.response["Error"]["Code"] == "ResourceNotFoundException": - print( - f"An EKS Cluster with the name {clusterName} does not exist. Please specify another name and try again" - ) + if error.response['Error']['Code'] == 'ResourceNotFoundException': + print(f'An EKS Cluster with the name {clusterName} does not exist. Please specify another name and try again') sys.exit(2) else: pass - - if mode == "SetupFalco": - if falcoDestType == "Slack" or falcoDestType == "Teams": + + if mode == 'SetupFalco': + if falcoDestType == 'Slack' or falcoDestType == 'Teams': if falcoDest == None: - print( - f'No destination was provided for "--falco_sidekick_destination_type", please try again.' - ) + print(f'No destination was provided for "--falco_sidekick_destination_type", please try again.') sys.exit(2) - elif falcoDestType == "Datadog": + elif falcoDestType == 'Datadog': if datadogApiKey == None: - print( - f"Datadog destination for Falco was specified but a Datadog API was not provided. Please provide a valid API key and try again." - ) - sys.exit(2) + print(f'Datadog destination for Falco was specified but a Datadog API was not provided. Please provide a valid API key and try again.') + sys.exit(2) FalcoSetup.falco_initialization( cluster_name=clusterName, - falco_mode="Create", + falco_mode='Create', falco_sidekick_destination_type=falcoDestType, - falco_sidekick_destination=falcoDest, + falco_sidekick_destination=falcoDest ) stay_dangerous() - elif mode == "RemoveFalco": + elif mode == 'RemoveFalco': FalcoSetup.falco_initialization( cluster_name=clusterName, - falco_mode="Delete", + falco_mode='Delete', falco_sidekick_destination_type=falcoDestType, falco_sidekick_destination=falcoDest, - datadog_api_key=datadogApiKey, + datadog_api_key=datadogApiKey ) stay_dangerous() else: - print(f"Somehow, an incompatible mode detected for Falco, please try again.") + print(f'Somehow, an incompatible mode detected for Falco, please try again.') sys.exit(2) - def setup_datadog_preflight_check(): - """ + ''' This function conducts a "preflight check" to ensure that required arguments are provided for the specified "Mode" before attempting to execute them. - """ + ''' print_logo() - eks = boto3.client("eks") + eks = boto3.client('eks') # Check if an EKS Cluster exists for provided name try: - eks.describe_cluster(name=clusterName) + eks.describe_cluster( + name=clusterName + ) except botocore.exceptions.ClientError as error: # If we have an "ResourceNotFoundException" error it means the cluster doesnt exist - which is what we want - if error.response["Error"]["Code"] == "ResourceNotFoundException": - print( - f"An EKS Cluster with the name {clusterName} does not exist. Please specify another name and try again" - ) + if error.response['Error']['Code'] == 'ResourceNotFoundException': + print(f'An EKS Cluster with the name {clusterName} does not exist. Please specify another name and try again') sys.exit(2) else: pass - if mode == "SetupDatadog": + if mode == 'SetupDatadog': if datadogApiKey == None: - print( - f"Datadog setup was specified but a Datadog API was not provided. Please provide a valid API key and try again." - ) + print(f'Datadog setup was specified but a Datadog API was not provided. Please provide a valid API key and try again.') sys.exit(2) # Datadoggy time! DatadogSetup.initialization( - cluster_name=clusterName, datadog_mode="Setup", datadog_api_key=datadogApiKey + cluster_name=clusterName, + datadog_mode='Setup', + datadog_api_key=datadogApiKey ) - elif mode == "RemoveDatadog": + elif mode == 'RemoveDatadog': # Bye Datadoggy time! DatadogSetup.initialization( - cluster_name=clusterName, datadog_mode="Remove", datadog_api_key=datadogApiKey + cluster_name=clusterName, + datadog_mode='Remove', + datadog_api_key=datadogApiKey ) else: - print(f"Somehow, an incompatible mode detected for Datadog, please try again.") + print(f'Somehow, an incompatible mode detected for Datadog, please try again.') sys.exit(2) stay_dangerous() - if __name__ == "__main__": - # Feed all of the arguments - """ + # Feed all of the arguments + ''' >> argparse argument | **kwargs << --profile | profile --mode | mode @@ -386,202 +372,193 @@ def setup_datadog_preflight_check(): --datadog | datadog_bool --datadog_api_key | datadog_api_key --addtl_auth_principals | addtl_auth_principals - """ + ''' parser = argparse.ArgumentParser() # --profile parser.add_argument( - "--profile", - help="Specify Profile name if multiple profiles are used", + '--profile', + help='Specify Profile name if multiple profiles are used', required=False, - default=[], + default=[] ) # --mode parser.add_argument( - "--mode", - help="Create, Destory or Update an existing Cluster. Updates limited to K8s Version bump. Destroy attempts to delete everything that this utility creates. Assessment will attempt to run various K8s security tools. SetupFalco will attempt to install Falco on existing Clusters. RemoveFalco will attempt to rollback SetupFalco deployments. SetupDatadog will attempt to install DataDog on existing Cluster. RemoveDatadog will attempt to rollback SetupDatadog deployments - defaults to Create", + '--mode', + help='Create, Destory or Update an existing Cluster. Updates limited to K8s Version bump. Destroy attempts to delete everything that this utility creates. Assessment will attempt to run various K8s security tools. SetupFalco will attempt to install Falco on existing Clusters. RemoveFalco will attempt to rollback SetupFalco deployments. SetupDatadog will attempt to install DataDog on existing Cluster. RemoveDatadog will attempt to rollback SetupDatadog deployments - defaults to Create', required=False, - choices=[ - "Create", - "Destroy", - "Update", - "Assessment", - "SetupFalco", - "RemoveFalco", - "SetupDatadog", - "RemoveDatadog", - ], - default="Create", + choices=['Create', 'Destroy', 'Update', 'Assessment', 'SetupFalco', 'RemoveFalco', 'SetupDatadog', 'RemoveDatadog'], + default='Create' ) # --k8s_version parser.add_argument( - "--k8s_version", - help="Version of K8s to use for EKS - defaults to 1.21 as of 13 JAN 2022 - used for Create and Update", + '--k8s_version', + help='Version of K8s to use for EKS - defaults to 1.21 as of 13 JAN 2022 - used for Create and Update', required=False, - default="1.21", + default='1.21' ) # --s3_bucket_name parser.add_argument( - "--s3_bucket_name", - help="S3 Bucket with required artifacts for EKS to access for bootstrapping if --mde_on_nodes=True - used for Create", + '--s3_bucket_name', + help='S3 Bucket with required artifacts for EKS to access for bootstrapping if --mde_on_nodes=True - used for Create', required=False, - default=None, + default=None ) # --ebs_volume_size parser.add_argument( - "--ebs_volume_size", - help="EBS volume size (in GB) for EKS nodegroup EC2 launch template - used for Create", + '--ebs_volume_size', + help='EBS volume size (in GB) for EKS nodegroup EC2 launch template - used for Create', required=False, - default="20", + default='20' ) # --ami parser.add_argument( - "--ami_id", + '--ami_id', help='Custom AMI ID for EKS nodegroup EC2 launch template. Defaults to "SSM" which tells the program to use an SSM-derived image for your K8s version matching --ami_os and --ami_architecture - used for Create', required=False, - default="SSM", + default='SSM' ) # --instance_type parser.add_argument( - "--instance_type", - help="EC2 Instance type for EKS nodegroup EC2 launch template", + '--instance_type', + help='EC2 Instance type for EKS nodegroup EC2 launch template', required=False, - default="t3.medium", + default='t3.medium' ) # --cluster_name parser.add_argument( - "--cluster_name", - help="Name for your EKS Cluster - used for Create, Delete and Update", + '--cluster_name', + help='Name for your EKS Cluster - used for Create, Delete and Update', required=False, - default="LightspinECECluster", + default='LightspinECECluster' ) # --cluster_role_name parser.add_argument( - "--cluster_role_name", - help="Name for your EKS Cluster Service IAM Role", + '--cluster_role_name', + help='Name for your EKS Cluster Service IAM Role', required=False, - default="ClusterServiceRoleForEKS", + default='ClusterServiceRoleForEKS' ) # --nodegroup_name parser.add_argument( - "--nodegroup_name", - help="Name for your EKS Nodegroup - used for Create, Delete and Update", + '--nodegroup_name', + help='Name for your EKS Nodegroup - used for Create, Delete and Update', required=False, - default="LightspinECENodegroup", + default='LightspinECENodegroup' ) # --nodegroup_role_name parser.add_argument( - "--nodegroup_role_name", - help="Name for your EKS Nodegroup Service IAM Role (also given to policy)", + '--nodegroup_role_name', + help='Name for your EKS Nodegroup Service IAM Role (also given to policy)', required=False, - default="NodegroupServiceRoleForEKS", + default='NodegroupServiceRoleForEKS' ) # --launch_template_name parser.add_argument( - "--launch_template_name", - help="Name for your Nodegroup EC2 launch template - used for Create and Delete", + '--launch_template_name', + help='Name for your Nodegroup EC2 launch template - used for Create and Delete', required=False, - default="LightspinECECustomEKSAMI", + default='LightspinECECustomEKSAMI' ) # --vpcid parser.add_argument( - "--vpcid", - help="VPC ID to launch EKS Cluster and Nodegroups into", + '--vpcid', + help='VPC ID to launch EKS Cluster and Nodegroups into', required=False, - default=None, + default=None ) # --subnets # for help https://www.kite.com/python/answers/how-to-pass-a-list-as-an-argument-using-argparse-in-python parser.add_argument( - "--subnets", - nargs="+", - help="Subnets to launch EKS Cluster and Nodegroups into - provide subnet IDs separated by spaces only", - required=False, + '--subnets', + nargs='+', + help='Subnets to launch EKS Cluster and Nodegroups into - provide subnet IDs separated by spaces only', + required=False ) # --node_count parser.add_argument( - "--node_count", - help="Amount of Nodes (EC2 instances) in EKS Nodegroup, will be used for min and desired values with 2 times for max - default 2", + '--node_count', + help='Amount of Nodes (EC2 instances) in EKS Nodegroup, will be used for min and desired values with 2 times for max - default 2', required=False, - default="2", + default='2' ) # --mde_on_nodes parser.add_argument( - "--mde_on_nodes", - help="Whether or not to install MDE on EKS Nodes via bootstrap - requires S3 Bucket and install scripts if true - defaults to False", + '--mde_on_nodes', + help='Whether or not to install MDE on EKS Nodes via bootstrap - requires S3 Bucket and install scripts if true - defaults to False', required=False, - choices=["True", "False"], - default="False", + choices=['True', 'False'], + default='False' ) # --additional_ports # for help https://www.kite.com/python/answers/how-to-pass-a-list-as-an-argument-using-argparse-in-python parser.add_argument( - "--additional_ports", - nargs="+", - help="Additional application ports which need to be allowed in EKS Security Groups - 443, 53, 8765, 2801, and 10250 already included", - required=False, + '--additional_ports', + nargs='+', + help='Additional application ports which need to be allowed in EKS Security Groups - 443, 53, 8765, 2801, and 10250 already included', + required=False ) # --falco parser.add_argument( - "--falco", - help="For CREATE Mode, this flag specifies if you want to install and configure Falco on your Clusters - defaults to False", + '--falco', + help='For CREATE Mode, this flag specifies if you want to install and configure Falco on your Clusters - defaults to False', required=False, - choices=["True", "False"], - default="False", + choices=['True', 'False'], + default='False' ) # --falco_sidekick_destination_type parser.add_argument( - "--falco_sidekick_destination_type", - help="The output location for Falco Sidekick to send Falco alerts to. Defaults to SNS which also creates a new Topic unless a Destination is provided", + '--falco_sidekick_destination_type', + help='The output location for Falco Sidekick to send Falco alerts to. Defaults to SNS which also creates a new Topic unless a Destination is provided', required=False, - choices=["SNS", "Slack", "Teams", "Datadog"], - default="SNS", + choices=['SNS', 'Slack', 'Teams', 'Datadog'], + default='SNS' ) # --falco_sidekick_destination parser.add_argument( - "--falco_sidekick_destination", - help="The logical location matching the Sidekick Destination Type to forward Falco alerts. E.g., ARN, Webhook URL, Datadog URL, etc.", + '--falco_sidekick_destination', + help='The logical location matching the Sidekick Destination Type to forward Falco alerts. E.g., ARN, Webhook URL, Datadog URL, etc.', required=False, - default=None, + default=None ) # --ami_os parser.add_argument( - "--ami_os", + '--ami_os', help='If using "SSM" for --ami use this argument to specify what OS you want to use (alas = Amazon Linux 2, ubuntu = Ubuntu 20.04) - defaults to ubuntu', required=False, - choices=["alas", "ubuntu"], - default="ubuntu", + choices=['alas', 'ubuntu'], + default='ubuntu' ) # --ami_architecture parser.add_argument( - "--ami_architecture", + '--ami_architecture', help='If using "SSM" for --ami use this argument to specify what architecture you want to use - defaults to amd64', required=False, - choices=["amd64", "arm64"], - default="amd64", + choices=['amd64', 'arm64'], + default='amd64' ) # --datadog parser.add_argument( - "--datadog", - help="For CREATE Mode, this flag specifies if you want to install and configure Datadog APM on your Clusters - defaults to False", + '--datadog', + help='For CREATE Mode, this flag specifies if you want to install and configure Datadog APM on your Clusters - defaults to False', required=False, - choices=["True", "False"], - default="False", + choices=['True', 'False'], + default='False' ) # --datadog_api_key parser.add_argument( - "--datadog_api_key", - help="Datadog API Key. This is used for setting up Datadog with Create and SetupDatadog Modes as well as Datadog integration for FalcoSidekick", + '--datadog_api_key', + help='Datadog API Key. This is used for setting up Datadog with Create and SetupDatadog Modes as well as Datadog integration for FalcoSidekick', required=False, - default=None, + default=None ) # addtl_auth_principals # for help https://www.kite.com/python/answers/how-to-pass-a-list-as-an-argument-using-argparse-in-python parser.add_argument( - "--addtl_auth_principals", - nargs="+", - help="Additional IAM Role ARNs to authorized as system:masters", - required=False, + '--addtl_auth_principals', + nargs='+', + help='Additional IAM Role ARNs to authorized as system:masters', + required=False ) args = parser.parse_args() @@ -616,22 +593,22 @@ def setup_datadog_preflight_check(): # This calls the creation function to create all needed IAM policies, roles and EC2/EKS infrastructure # will check if some infrastructure exists first to avoid needless exit later - if mode == "Create": + if mode == 'Create': create_preflight_check() - elif mode == "Destroy": + elif mode == 'Destroy': delete_preflight_check() - elif mode == "Update": + elif mode == 'Update': update_preflight_check() - elif mode == "Assessment": + elif mode == 'Assessment': assessment_preflight_check() - elif mode == "SetupFalco": + elif mode == 'SetupFalco': setup_falco_preflight_check() - elif mode == "RemoveFalco": + elif mode == 'RemoveFalco': setup_falco_preflight_check() - elif mode == "SetupDatadog": + elif mode == 'SetupDatadog': setup_datadog_preflight_check() - elif mode == "RemoveDatadog": + elif mode == 'RemoveDatadog': setup_datadog_preflight_check() else: - print(f"Somehow you provided an unexpected arguement, exiting!") - sys.exit(2) + print(f'Somehow you provided an unexpected arguement, exiting!') + sys.exit(2) \ No newline at end of file diff --git a/plugins/ECEDatadog.py b/plugins/ECEDatadog.py index 1f0cdd4..8caba57 100644 --- a/plugins/ECEDatadog.py +++ b/plugins/ECEDatadog.py @@ -1,71 +1,68 @@ -# This file is part of Lightspin EKS Creation Engine. -# SPDX-License-Identifier: Apache-2.0 -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the +#This file is part of Lightspin EKS Creation Engine. +#SPDX-License-Identifier: Apache-2.0 + +#Licensed to the Apache Software Foundation (ASF) under one +#or more contributor license agreements. See the NOTICE file +#distributed with this work for additional information +#regarding copyright ownership. The ASF licenses this file +#to you under the Apache License, Version 2.0 (the #'License'); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an +#with the License. You may obtain a copy of the License at + +#http://www.apache.org/licenses/LICENSE-2.0 + +#Unless required by applicable law or agreed to in writing, +#software distributed under the License is distributed on an #'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. +#KIND, either express or implied. See the License for the +#specific language governing permissions and limitations +#under the License. + import subprocess -""" +''' This Class manages deployment of Datadog onto an EKS Cluster and rollbacks / manual deletions -""" - +''' +class DatadogSetup(): -class DatadogSetup: def initialization(cluster_name, datadog_mode, datadog_api_key): - """ + ''' This function controls initialization of the DatadogSetup Class. It will control installs, deletions, and rollbacks - """ + ''' - if datadog_mode == "Setup": - print(f"Setting up Datadog on EKS Cluster {cluster_name}") + if datadog_mode == 'Setup': + print(f'Setting up Datadog on EKS Cluster {cluster_name}') DatadogSetup.install_datadog(datadog_api_key) else: - print(f"Rolling back Datadog from EKS Cluster {cluster_name}") + print(f'Rolling back Datadog from EKS Cluster {cluster_name}') DatadogSetup.uninstall_datadog() def install_datadog(datadog_api_key): - """ + ''' This function adds and updates existing Datadog Charts and applies the Chart to your EKS Cluster - """ + ''' # Use subprocess to add Datadog Charts using Helm - print(f"Adding Datadog Helm Charts") - datadogHelmChartAddCmd = ( - "helm repo add datadog https://helm.datadoghq.com && helm repo update" - ) - datadogHelmChartAddSubprocess = subprocess.run( - datadogHelmChartAddCmd, shell=True, capture_output=True - ) - datadogHelmChartAddMsg = str(datadogHelmChartAddSubprocess.stdout.decode("utf-8")) + print(f'Adding Datadog Helm Charts') + datadogHelmChartAddCmd = 'helm repo add datadog https://helm.datadoghq.com && helm repo update' + datadogHelmChartAddSubprocess = subprocess.run(datadogHelmChartAddCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + datadogHelmChartAddMsg = str(datadogHelmChartAddSubprocess.stdout.decode('utf-8')) print(datadogHelmChartAddMsg) # Use subprocess to configure Datadog per initiation arguments from main.py - print(f"Installing Datadog") - installDatadogCmd = f"helm install datadog-agent --set targetSystem=linux --set datadog.apiKey={datadog_api_key} datadog/datadog" - installDatadogSubprocess = subprocess.run( - installDatadogCmd, shell=True, capture_output=True - ) - installDatadogMsg = str(installDatadogSubprocess.stdout.decode("utf-8")) + print(f'Installing Datadog') + installDatadogCmd = f'helm install datadog-agent --set targetSystem=linux --set datadog.apiKey={datadog_api_key} datadog/datadog' + installDatadogSubprocess = subprocess.run(installDatadogCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + installDatadogMsg = str(installDatadogSubprocess.stdout.decode('utf-8')) print(installDatadogMsg) def uninstall_datadog(): - """ + ''' This function uninstalls Datadog from your EKS Cluster - """ + ''' # Uninstall Datadog from EKS - datadogRemoveCmd = "helm uninstall datadog-agent" - datadogRemoveSubprocess = subprocess.run(datadogRemoveCmd, shell=True, capture_output=True) - datadogRemoveMsg = str(datadogRemoveSubprocess.stdout.decode("utf-8")) - print(datadogRemoveMsg) + datadogRemoveCmd = 'helm uninstall datadog-agent' + datadogRemoveSubprocess = subprocess.run(datadogRemoveCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + datadogRemoveMsg = str(datadogRemoveSubprocess.stdout.decode('utf-8')) + print(datadogRemoveMsg) \ No newline at end of file diff --git a/plugins/ECEFalco.py b/plugins/ECEFalco.py index 07870ae..c148cda 100644 --- a/plugins/ECEFalco.py +++ b/plugins/ECEFalco.py @@ -1,176 +1,195 @@ -# This file is part of Lightspin EKS Creation Engine. -# SPDX-License-Identifier: Apache-2.0 -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the +#This file is part of Lightspin EKS Creation Engine. +#SPDX-License-Identifier: Apache-2.0 + +#Licensed to the Apache Software Foundation (ASF) under one +#or more contributor license agreements. See the NOTICE file +#distributed with this work for additional information +#regarding copyright ownership. The ASF licenses this file +#to you under the Apache License, Version 2.0 (the #'License'); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an +#with the License. You may obtain a copy of the License at + +#http://www.apache.org/licenses/LICENSE-2.0 + +#Unless required by applicable law or agreed to in writing, +#software distributed under the License is distributed on an #'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -import json -import subprocess -import sys -from datetime import datetime +#KIND, either express or implied. See the License for the +#specific language governing permissions and limitations +#under the License. +import sys import boto3 import botocore.exceptions +import json +from datetime import datetime +import subprocess -""" +''' This Class manages an end-to-end deployment of Falco and FalcoSidekick to EKS using Helm. This class can be called from ClusterManager (if flag is set) or called independently to setup Falco -""" - - -class FalcoSetup: - def falco_initialization( - cluster_name, - falco_mode, - falco_sidekick_destination_type, - falco_sidekick_destination, - datadog_api_key, - ): - """ +''' +class FalcoSetup(): + + def falco_initialization(cluster_name, falco_mode, falco_sidekick_destination_type, falco_sidekick_destination, datadog_api_key): + ''' This function handles configuration of Falco and FalcoSidekick on a Cluster, whether in-line of an ECE Create or ECE SetupFalco `--mode` from main.py Depending on the destination configuration and mode, this function will either schedule deletion or creation of additional infrastructure and issue Helm commands to your cluster - """ - print(f"Setting up Falco on {cluster_name}") + ''' + print(f'Setting up Falco on {cluster_name}') - if falco_mode == "Create": - print(f"Setting up Falco on running Nodes for {cluster_name}") - if falco_sidekick_destination_type == "SNS": + if falco_mode == 'Create': + print(f'Setting up Falco on running Nodes for {cluster_name}') + if falco_sidekick_destination_type == 'SNS': # Create EKS Client - eks = boto3.client("eks") + eks = boto3.client('eks') # Create an empty list of Role ARNs to append Cluster & Node Roles to send to different functions roleArns = [] # First, since we may have to work with existing Clusters that ECE did not setup, find all Nodegroups for the cluster and retrieve their Role ARNs # and add them to the static list above if they are not already there - for nodegroup in eks.list_nodegroups(clusterName=cluster_name)["nodegroups"]: + for nodegroup in eks.list_nodegroups(clusterName=cluster_name)['nodegroups']: nodeRoleArn = eks.describe_nodegroup( - clusterName=cluster_name, nodegroupName=nodegroup - )["nodegroup"]["nodeRole"] + clusterName=cluster_name, + nodegroupName=nodegroup + )['nodegroup']['nodeRole'] if nodeRoleArn not in roleArns: roleArns.append(nodeRoleArn) # Retrieve Cluster Role Arn - clusterRoleArn = eks.describe_cluster(name=cluster_name)["cluster"]["roleArn"] + clusterRoleArn = eks.describe_cluster(name=cluster_name)['cluster']['roleArn'] if clusterRoleArn not in roleArns: roleArns.append(clusterRoleArn) topicArn = FalcoSetup.falco_sidekick_sns_iam_generator( cluster_name=cluster_name, falco_sidekick_destination=falco_sidekick_destination, - role_arns=roleArns, + role_arns=roleArns ) # Install Falco # All commands for FalcoSidekick come from the Helm vars spec in the chart GitHub repo: https://github.com/falcosecurity/charts/tree/master/falcosidekick#configuration - falcoHelmCmd = f"helm install falco falcosecurity/falco --set falcosidekick.enabled=true --set falcosidekick.webui.enabled=false --set falcosidekick.config.aws.sns.topicarn={topicArn}" - FalcoSetup.install_falco(falco_install_command=falcoHelmCmd) - elif falco_sidekick_destination_type == "Slack": - print( - f"Configuring Falco and FalcoSidekick to send runtime alerts to Slack Webhook {falco_sidekick_destination}" + falcoHelmCmd = f'helm install falco falcosecurity/falco --set falcosidekick.enabled=true --set falcosidekick.webui.enabled=false --set falcosidekick.config.aws.sns.topicarn={topicArn}' + FalcoSetup.install_falco( + falco_install_command=falcoHelmCmd ) - + elif falco_sidekick_destination_type == 'Slack': + print(f'Configuring Falco and FalcoSidekick to send runtime alerts to Slack Webhook {falco_sidekick_destination}') + # Install Falco - falcoHelmCmd = f"helm install falco falcosecurity/falco --set falcosidekick.enabled=true --set falcosidekick.webui.enabled=false --set falcosidekick.config.slack.webhookurl={falco_sidekick_destination}" - FalcoSetup.install_falco(falco_install_command=falcoHelmCmd) - elif falco_sidekick_destination_type == "Teams": - print( - f"Configuring Falco and FalcoSidekick to send runtime alerts to Teams Webhook {falco_sidekick_destination}" + falcoHelmCmd = f'helm install falco falcosecurity/falco --set falcosidekick.enabled=true --set falcosidekick.webui.enabled=false --set falcosidekick.config.slack.webhookurl={falco_sidekick_destination}' + FalcoSetup.install_falco( + falco_install_command=falcoHelmCmd ) + elif falco_sidekick_destination_type == 'Teams': + print(f'Configuring Falco and FalcoSidekick to send runtime alerts to Teams Webhook {falco_sidekick_destination}') # Install Falco - falcoHelmCmd = f"helm install falco falcosecurity/falco --set falcosidekick.enabled=true --set falcosidekick.webui.enabled=false --set falcosidekick.config.teams.webhookurl={falco_sidekick_destination}" - FalcoSetup.install_falco(falco_install_command=falcoHelmCmd) - elif falco_sidekick_destination_type == "Datadog": - print( - f"Configuring Falco and FalcoSidekick to send runtime alerts to Datadog Host {falco_sidekick_destination}" + falcoHelmCmd = f'helm install falco falcosecurity/falco --set falcosidekick.enabled=true --set falcosidekick.webui.enabled=false --set falcosidekick.config.teams.webhookurl={falco_sidekick_destination}' + FalcoSetup.install_falco( + falco_install_command=falcoHelmCmd ) + elif falco_sidekick_destination_type == 'Datadog': + print(f'Configuring Falco and FalcoSidekick to send runtime alerts to Datadog Host {falco_sidekick_destination}') # Install Falco - falcoHelmCmd = f"helm install falco falcosecurity/falco --set falcosidekick.enabled=true --set falcosidekick.webui.enabled=false --set falcosidekick.config.datadog.host={falco_sidekick_destination} --set falcosidekick.config.datadog.apikey={datadog_api_key}" - FalcoSetup.install_falco(falco_install_command=falcoHelmCmd) + falcoHelmCmd = f'helm install falco falcosecurity/falco --set falcosidekick.enabled=true --set falcosidekick.webui.enabled=false --set falcosidekick.config.datadog.host={falco_sidekick_destination} --set falcosidekick.config.datadog.apikey={datadog_api_key}' + FalcoSetup.install_falco( + falco_install_command=falcoHelmCmd + ) else: - print(f"Unsupported destination type provided, exiting") + print(f'Unsupported destination type provided, exiting') sys.exit(2) else: - print(f"Rolling back Falco on running Nodes for {cluster_name}") + print(f'Rolling back Falco on running Nodes for {cluster_name}') FalcoSetup.falco_setup_rollback(cluster_name=cluster_name) def falco_sidekick_sns_iam_generator(cluster_name, falco_sidekick_destination, role_arns): - """ + ''' This function will create IAM Policies to attach to the Roles of EKS Clusters and Nodegroups being boostrapped for Falco if they are configured to send messages to SNS. It will invoke `falco_sidekick_sns_creation` to receive the Topic ARN for the SNS Topic (if one is not provided) - """ - print(f"Configuring {cluster_name} Cluster and Nodegroup IAM Roles to communicate with SNS") + ''' + print(f'Configuring {cluster_name} Cluster and Nodegroup IAM Roles to communicate with SNS') - sts = boto3.client("sts") - iam = boto3.client("iam") + sts = boto3.client('sts') + iam = boto3.client('iam') # If the value for 'falco_sidekick_destination' is None, that means a SNS topic was not provided and needs to be setup if falco_sidekick_destination == None: topicArn = FalcoSetup.falco_sidekick_sns_creation( - cluster_name=cluster_name, role_arns=role_arns + cluster_name=cluster_name, + role_arns=role_arns ) else: topicArn = falco_sidekick_destination # Use STS GetCallerIdentity and Datetime to generate CreatedBy and CreatedAt information for tagging - createdBy = str(sts.get_caller_identity()["Arn"]) + createdBy = str(sts.get_caller_identity()['Arn']) createdAt = str(datetime.utcnow()) # AWS Account ID - acctId = str(sts.get_caller_identity()["Account"]) + acctId = str(sts.get_caller_identity()['Account']) # Bring in the list of Role ARNs to append Cluster & Node Roles into for attaching policies to roleArns = role_arns # Create the IAM Policy for SNS iamPolicyDoc = { - "Version": "2012-10-17", - "Statement": [ + 'Version': '2012-10-17', + 'Statement': [ { - "Sid": "Snssid", - "Effect": "Allow", - "Action": ["sns:Publish", "sns:GetTopicAttributes", "sns:ListTopics"], - "Resource": [topicArn], + 'Sid': 'Snssid', + 'Effect': 'Allow', + 'Action': [ + 'sns:Publish', + 'sns:GetTopicAttributes', + 'sns:ListTopics' + ], + 'Resource': [topicArn] } - ], + ] } - policyName = f"{cluster_name}FalcoSidekick-SNSPublishPolicy" + policyName = f'{cluster_name}FalcoSidekick-SNSPublishPolicy' try: iam.create_policy( PolicyName=policyName, - Path="/", + Path='/', PolicyDocument=json.dumps(iamPolicyDoc), - Description=f"Allows EKS Cluster {cluster_name} and Nodegroups to send Falco alerts to SNS - Created by Lightspin ECE", + Description=f'Allows EKS Cluster {cluster_name} and Nodegroups to send Falco alerts to SNS - Created by Lightspin ECE', Tags=[ - {"Key": "Name", "Value": policyName}, - {"Key": "CreatedBy", "Value": createdBy}, - {"Key": "CreatedAt", "Value": createdAt}, - {"Key": "CreatedWith", "Value": "Lightspin ECE"}, - ], + { + 'Key': 'Name', + 'Value': policyName + }, + { + 'Key': 'CreatedBy', + 'Value': createdBy + }, + { + 'Key': 'CreatedAt', + 'Value': createdAt + }, + { + 'Key': 'CreatedWith', + 'Value': 'Lightspin ECE' + } + ] ) - policyArn = f"arn:aws:iam::{acctId}:policy/{policyName}" + policyArn = f'arn:aws:iam::{acctId}:policy/{policyName}' except botocore.exceptions.ClientError as error: - print(f"Error encountered: {error}") + print(f'Error encountered: {error}') FalcoSetup.falco_setup_rollback(cluster_name=cluster_name) for role in roleArns: - roleName = role.split("/")[1] + roleName = role.split('/')[1] try: - iam.attach_role_policy(RoleName=roleName, PolicyArn=policyArn) + iam.attach_role_policy( + RoleName=roleName, + PolicyArn=policyArn + ) except botocore.exceptions.ClientError as error: - print(f"Error encountered: {error}") + print(f'Error encountered: {error}') FalcoSetup.falco_setup_rollback(cluster_name=cluster_name) del sts @@ -184,75 +203,97 @@ def falco_sidekick_sns_iam_generator(cluster_name, falco_sidekick_destination, r return topicArn def falco_sidekick_sns_creation(cluster_name, role_arns): - """ + ''' This function creates an SNS Topic and Topic Policy for use as a FalcoSidekick destination if a valid ARN is not provided for the 'SNS' destination type and returns the Topic Arn to the `falco_sidekick_sns_iam_generator()` function which this function is called from. Otherwise, this function does nothing if not called from `falco_initialization` - """ - print(f"Creating SNS Topic to send Falco alerts to for {cluster_name}") + ''' + print(f'Creating SNS Topic to send Falco alerts to for {cluster_name}') - sns = boto3.client("sns") - sts = boto3.client("sts") + sns = boto3.client('sns') + sts = boto3.client('sts') # Use STS GetCallerIdentity and Datetime to generate CreatedBy and CreatedAt information for tagging - createdBy = str(sts.get_caller_identity()["Arn"]) + createdBy = str(sts.get_caller_identity()['Arn']) createdAt = str(datetime.utcnow()) # AWS Account ID - acctId = str(sts.get_caller_identity()["Account"]) + acctId = str(sts.get_caller_identity()['Account']) # Bring in the list of Role ARNs to append Cluster & Node Roles into for adding Principal permissions to the SNS Topic Policy roleArns = role_arns # Create an SNS Topic # NOTE: In the future, need to add KMS along with EncryptionCreationEngine https://docs.aws.amazon.com/sns/latest/dg/sns-key-management.html - topicName = f"{cluster_name}-FalcoSidekickTopic" + topicName = f'{cluster_name}-FalcoSidekickTopic' try: topicArn = sns.create_topic( Name=topicName, - Attributes={"DisplayName": topicName}, + Attributes={ + 'DisplayName': topicName + }, Tags=[ - {"Key": "Name", "Value": topicName}, - {"Key": "CreatedBy", "Value": createdBy}, - {"Key": "CreatedAt", "Value": createdAt}, - {"Key": "CreatedWith", "Value": "Lightspin ECE"}, - ], - )["TopicArn"] + { + 'Key': 'Name', + 'Value': topicName + }, + { + 'Key': 'CreatedBy', + 'Value': createdBy + }, + { + 'Key': 'CreatedAt', + 'Value': createdAt + }, + { + 'Key': 'CreatedWith', + 'Value': 'Lightspin ECE' + } + ] + )['TopicArn'] except botocore.exceptions.ClientError as error: - print(f"Error encountered: {error}") + print(f'Error encountered: {error}') FalcoSetup.falco_setup_rollback(cluster_name=cluster_name) # Create a SNS Topic Policy Doc to pass in as an SNS Attribute topicPolicyJson = { - "Version": "2008-10-17", - "Id": "ecepolicy", - "Statement": [ + 'Version':'2008-10-17', + 'Id':'ecepolicy', + 'Statement':[ { - "Sid": "ecesid-pub", - "Effect": "Allow", - "Principal": {"AWS": roleArns}, - "Action": ["SNS:Publish"], - "Resource": topicArn, + 'Sid':'ecesid-pub', + 'Effect':'Allow', + 'Principal':{ + 'AWS': roleArns + }, + 'Action':['SNS:Publish'], + 'Resource': topicArn }, { - "Sid": "ecesid-sub", - "Effect": "Allow", - "Principal": {"AWS": "*"}, - "Action": ["SNS:Subscribe"], - "Resource": topicArn, - "Condition": {"StringEquals": {"AWS:SourceOwner": acctId}}, - }, - ], + 'Sid':'ecesid-sub', + 'Effect':'Allow', + 'Principal':{ + 'AWS':'*' + }, + 'Action':['SNS:Subscribe'], + 'Resource': topicArn, + 'Condition':{ + 'StringEquals':{ + 'AWS:SourceOwner': acctId + } + } + } + ] } try: sns.set_topic_attributes( TopicArn=topicArn, - AttributeName="Policy", - AttributeValue=json.dumps(topicPolicyJson), + AttributeName='Policy', + AttributeValue=json.dumps(topicPolicyJson) ) except botocore.exceptions.ClientError as error: - print(f"Error encountered: {error}") + print(f'Error encountered: {error}') FalcoSetup.falco_setup_rollback(cluster_name=cluster_name) del topicName @@ -265,59 +306,60 @@ def falco_sidekick_sns_creation(cluster_name, role_arns): return topicArn def install_falco(falco_install_command): - """ + ''' This function receives the final installation command from `falco_initialization` and will attempt to add the latest Falco Security Helm Charts and install Falco onto a new namespace on your EKS Cluster - """ - print(f"Installing Falco and FalcoSidekick") + ''' + print(f'Installing Falco and FalcoSidekick') # Use subprocess to add Falco Charts using Helm - print(f"Adding Falco Helm Charts") - falcoHelmChartAddCmd = ( - "helm repo add falcosecurity https://falcosecurity.github.io/charts && helm repo update" - ) - falcoHelmChartAddSubprocess = subprocess.run( - falcoHelmChartAddCmd, shell=True, capture_output=True - ) - falcoHelmChartAddMsg = str(falcoHelmChartAddSubprocess.stdout.decode("utf-8")) + print(f'Adding Falco Helm Charts') + falcoHelmChartAddCmd = 'helm repo add falcosecurity https://falcosecurity.github.io/charts && helm repo update' + falcoHelmChartAddSubprocess = subprocess.run(falcoHelmChartAddCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + falcoHelmChartAddMsg = str(falcoHelmChartAddSubprocess.stdout.decode('utf-8')) print(falcoHelmChartAddMsg) # Use subprocess to configure Falco and FalcoSidekick per initiation arguments from main.py - print(f"Installing Falco and FalcoSidekick") + print(f'Installing Falco and FalcoSidekick') installFalcoCmd = falco_install_command - installFalcoSubprocess = subprocess.run(installFalcoCmd, shell=True, capture_output=True) - installFalcoMsg = str(installFalcoSubprocess.stdout.decode("utf-8")) + installFalcoSubprocess = subprocess.run(installFalcoCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + installFalcoMsg = str(installFalcoSubprocess.stdout.decode('utf-8')) print(installFalcoMsg) # Rollback and manual deletes starts here def falco_setup_rollback(cluster_name): - """ + ''' This function handles rollback of resources just for Falco - """ + ''' - sts = boto3.client("sts") - acctId = str(sts.get_caller_identity()["Account"]) - iam = boto3.client("iam") - sns = boto3.client("sns") + sts = boto3.client('sts') + acctId = str(sts.get_caller_identity()['Account']) + iam = boto3.client('iam') + sns = boto3.client('sns') # Retrieve region for AWS CLI kubectl generation session = boto3.session.Session() awsRegion = session.region_name # Create & Stage Policy & SNS Names (only used if ECE created them) - policyName = f"{cluster_name}FalcoSidekick-SNSPublishPolicy" - policyArn = f"arn:aws:iam::{acctId}:policy/{policyName}" - topicName = f"{cluster_name}-FalcoSidekickTopic" - topicArn = f"arn:aws:sns:{awsRegion}:{acctId}:{topicName}" + policyName = f'{cluster_name}FalcoSidekick-SNSPublishPolicy' + policyArn = f'arn:aws:iam::{acctId}:policy/{policyName}' + topicName = f'{cluster_name}-FalcoSidekickTopic' + topicArn = f'arn:aws:sns:{awsRegion}:{acctId}:{topicName}' # If an IAM Policy for SNS was created, attempt to detach it before deletion try: rolesAttachedToPolicy = iam.list_entities_for_policy( - PolicyArn=policyArn, EntityFilter="Role", PolicyUsageFilter="PermissionsPolicy" - )["PolicyRoles"] + PolicyArn=policyArn, + EntityFilter='Role', + PolicyUsageFilter='PermissionsPolicy' + )['PolicyRoles'] if rolesAttachedToPolicy: for role in rolesAttachedToPolicy: - roleName = str(role["RoleName"]) - iam.detach_role_policy(RoleName=roleName, PolicyArn=policyArn) + roleName = str(role['RoleName']) + iam.detach_role_policy( + RoleName=roleName, + PolicyArn=policyArn + ) except botocore.exceptions.ClientError as error: print(error) except KeyError as ke: @@ -325,23 +367,25 @@ def falco_setup_rollback(cluster_name): # If an IAM Policy for SNS was created, attempt to delete it try: - iam.delete_policy(PolicyArn=policyArn) - print(f"Falco SNS Policy {policyArn} deleted") + iam.delete_policy( + PolicyArn=policyArn + ) + print(f'Falco SNS Policy {policyArn} deleted') except botocore.exceptions.ClientError as error: print(error) # If an SNS Topic was created, attempt to delete it try: sns.delete_topic(TopicArn=topicArn) - print(f"Falco SNS Topic {topicArn} deleted") + print(f'Falco SNS Topic {topicArn} deleted') except botocore.exceptions.ClientError as error: print(error) # Uninstall Falco from EKS - falcoRemoveCmd = "helm uninstall falco" - falcoRemoveSubprocess = subprocess.run(falcoRemoveCmd, shell=True, capture_output=True) - falcoRemoveMsg = str(falcoRemoveSubprocess.stdout.decode("utf-8")) + falcoRemoveCmd = 'helm uninstall falco' + falcoRemoveSubprocess = subprocess.run(falcoRemoveCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + falcoRemoveMsg = str(falcoRemoveSubprocess.stdout.decode('utf-8')) print(falcoRemoveMsg) - print(f"Falco rollback complete.") - sys.exit(2) + print(f'Falco rollback complete.') + sys.exit(2) \ No newline at end of file diff --git a/plugins/ECESecurity.py b/plugins/ECESecurity.py index 85279da..6958254 100644 --- a/plugins/ECESecurity.py +++ b/plugins/ECESecurity.py @@ -1,105 +1,103 @@ -# This file is part of Lightspin EKS Creation Engine. -# SPDX-License-Identifier: Apache-2.0 -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the +#This file is part of Lightspin EKS Creation Engine. +#SPDX-License-Identifier: Apache-2.0 + +#Licensed to the Apache Software Foundation (ASF) under one +#or more contributor license agreements. See the NOTICE file +#distributed with this work for additional information +#regarding copyright ownership. The ASF licenses this file +#to you under the Apache License, Version 2.0 (the #'License'); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an +#with the License. You may obtain a copy of the License at + +#http://www.apache.org/licenses/LICENSE-2.0 + +#Unless required by applicable law or agreed to in writing, +#software distributed under the License is distributed on an #'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -import json -import re -import subprocess -import time +#KIND, either express or implied. See the License for the +#specific language governing permissions and limitations +#under the License. import boto3 +import json +import time +import subprocess +import re -""" +''' This Class manages various security assessment functions - such as running and saving Kube-bench CIS benchmarking and Trivy container scanning -""" +''' +class SecurityAssessment(): - -class SecurityAssessment: def start_assessment(cluster_name): - """ + ''' This function serves as the 'brain' of the security assessment. It will modify the Kubeconfig and attempt to run the other assessments it will also consolidate all findings in a SARIF JSON format for consumption in downstream tools - """ - print(f"Starting security assessments for EKS Cluster {cluster_name}") + ''' + print(f'Starting security assessments for EKS Cluster {cluster_name}') # Retrieve region for AWS CLI kubectl generation session = boto3.session.Session() awsRegion = session.region_name - updateKubeconfigCmd = ( - f"aws eks update-kubeconfig --region {awsRegion} --name {cluster_name}" - ) - updateKubeconfigProc = subprocess.run(updateKubeconfigCmd, shell=True, capture_output=True) - print(updateKubeconfigProc.stdout.decode("utf-8")) + updateKubeconfigCmd = f'aws eks update-kubeconfig --region {awsRegion} --name {cluster_name}' + updateKubeconfigProc = subprocess.run(updateKubeconfigCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + print(updateKubeconfigProc.stdout.decode('utf-8')) trivySarif = SecurityAssessment.run_trivy() kubebenchSarif = SecurityAssessment.run_kube_bench(cluster_name) - print(f"Security assessments completed, starting SARIF consolidation.") + print(f'Security assessments completed, starting SARIF consolidation.') sarifBase = { - "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json", - "version": "2.1.0", - "runs": [], + '$schema': 'https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json', + 'version': '2.1.0', + 'runs': [] } for runs in trivySarif: - sarifBase["runs"].append(runs) + sarifBase['runs'].append(runs) for runs in kubebenchSarif: - sarifBase["runs"].append(runs) + sarifBase['runs'].append(runs) - with open("./ECE_SecurityAssessment.sarif", "w") as jsonfile: + with open('./ECE_SecurityAssessment.sarif', 'w') as jsonfile: json.dump(sarifBase, jsonfile, indent=4, default=str) - print( - f'Assessments completed and SARIF document created successfully as "ECE_SecurityAssessment.sarif".' - ) + print(f'Assessments completed and SARIF document created successfully as "ECE_SecurityAssessment.sarif".') def run_trivy(): - """ + ''' This function will run Trivy container vuln scanning against all running Containers in your Cluster and generate a report - """ + ''' # Create empty lists to contain unique values for reporting uniqueContainers = [] trivyFindings = [] - print(f"Running Trivy") - + print(f'Running Trivy') + # Retrieve a list of all running Containers and create a unique list of them to pass to Trivy for scanning - print(f"Retrieving list of all running Containers from your EKS Cluster") + print(f'Retrieving list of all running Containers from your EKS Cluster') command = 'kubectl get pods --all-namespaces -o json | jq --raw-output ".items[].spec.containers[].image"' - sub = subprocess.run(command, shell=True, capture_output=True) + sub = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # pull list of container URIs from kubectl - strList = str(sub.stdout.decode("utf-8")) + strList = str(sub.stdout.decode('utf-8')) # split by newline, as that is how it is retruned - splitter = strList.split("\n") + splitter = strList.split('\n') # Read the newly created list (created by `.split()`) and write unique names to List, ignoring the stray whitespace for i in splitter: if i not in uniqueContainers: - if i == "": + if i == '': pass else: uniqueContainers.append(i) - + totalUniques = str(len(uniqueContainers)) - if totalUniques == "1": - print(f"Trivy will scan {totalUniques} unique container image") + if totalUniques == '1': + print(f'Trivy will scan {totalUniques} unique container image') else: - print(f"Trivy will scan {totalUniques} unique container images") + print(f'Trivy will scan {totalUniques} unique container images') # mem clean up del splitter del strList @@ -107,55 +105,55 @@ def run_trivy(): # loop the list of unique container URIs and write the vulns to a new list for c in uniqueContainers: # passing '--quiet' will ensure the setup text from Trivy scanning does not make it into the JSON and corrupt it - trivyScanCmd = f"trivy --quiet image --format sarif {c}" - trivyScanSubprocess = subprocess.run(trivyScanCmd, shell=True, capture_output=True) - trivyStdout = str(trivyScanSubprocess.stdout.decode("utf-8")) + trivyScanCmd = f'trivy --quiet image --format sarif {c}' + trivyScanSubprocess = subprocess.run(trivyScanCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + trivyStdout = str(trivyScanSubprocess.stdout.decode('utf-8')) # load JSON object from stdout jsonItem = json.loads(trivyStdout) # loop the list of vulns - print(f"Finished scanning Image URI {c}") - for v in jsonItem["runs"]: + print(f'Finished scanning Image URI {c}') + for v in jsonItem['runs']: trivyFindings.append(v) del v del c - print(f"Completed Trivy scans of all unique running Containers in your Cluster") + print(f'Completed Trivy scans of all unique running Containers in your Cluster') return trivyFindings def run_kube_bench(cluster_name): - """ + ''' This function will run Kube-bench EKS CIS benchmark against your cluster and generate a report - """ + ''' - print(f"Running Kube-bench") + print(f'Running Kube-bench') # Create an empty list to hold normalized JSON findings once Kube-bench is converted findings = [] # The SARIF JSON schema requires a URI for the 'artifact' location - which will point to the Cluster Endpoint - eks = boto3.client("eks") - clusterEndpoint = eks.describe_cluster(name=cluster_name)["cluster"]["endpoint"] + eks = boto3.client('eks') + clusterEndpoint = eks.describe_cluster(name=cluster_name)['cluster']['endpoint'] del eks # Schedule the Job onto your EKS Cluster - command = "kubectl apply -f job-eks.yaml" - runJobSubproc = subprocess.run(command, shell=True, capture_output=True) - print(runJobSubproc.stdout.decode("utf-8")) + command = 'kubectl apply -f job-eks.yaml' + runJobSubproc = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + print(runJobSubproc.stdout.decode('utf-8')) time.sleep(1.5) # Wait for Job to complete - use a short timeout to force a message to be piped sooner # https://stackoverflow.com/questions/63632084/kubectl-wait-for-a-pod-to-complete - jobWaitCmd = "kubectl wait --for=condition=complete job/kube-bench --timeout=2s" + jobWaitCmd = 'kubectl wait --for=condition=complete job/kube-bench --timeout=2s' # Really bad Regex hack to exit the `while True` loop - fuzzy match the stdout message - completionRegex = re.compile("job.batch/kube-bench condition met") + completionRegex = re.compile('job.batch/kube-bench condition met') while True: - jobWaitSubproc = subprocess.run(jobWaitCmd, shell=True, capture_output=True) - jobWaitMessage = str(jobWaitSubproc.stdout.decode("utf-8")) + jobWaitSubproc = subprocess.run(jobWaitCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + jobWaitMessage = str(jobWaitSubproc.stdout.decode('utf-8')) completionRegexCheck = completionRegex.search(jobWaitMessage) if completionRegexCheck: - print(f"Kube-bench Job completed! {jobWaitMessage}") + print(f'Kube-bench Job completed! {jobWaitMessage}') break else: time.sleep(2) @@ -163,67 +161,69 @@ def run_kube_bench(cluster_name): # `getPodCmd` used Kubectl to get pod names in all namespaces (-A). cut -d/ -f2 command is to split by the '/' and get the name # grep is used to ensure the right pod name is pulled as it always ends with a random 5 character hex (ex. kube-bench-z6r4b) - getPodCmd = "kubectl get pods -o name -A | cut -d/ -f2 | grep kube-bench" - getPodSubproc = subprocess.run(getPodCmd, shell=True, capture_output=True) + getPodCmd = 'kubectl get pods -o name -A | cut -d/ -f2 | grep kube-bench' + getPodSubproc = subprocess.run(getPodCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # decoding adds newline or blank spaces - attempt to trim them - kubebenchPodName = ( - str(getPodSubproc.stdout.decode("utf-8")).replace("\n", "").replace(" ", "") - ) + kubebenchPodName = str(getPodSubproc.stdout.decode('utf-8')).replace('\n', '').replace(' ', '') # Pull logs from Job - this is the actual results of the job - getLogsCmd = f"kubectl logs {kubebenchPodName}" - getLogsSubproc = subprocess.run(getLogsCmd, shell=True, capture_output=True) - getLogsStdout = str(getLogsSubproc.stdout.decode("utf-8")) + getLogsCmd = f'kubectl logs {kubebenchPodName}' + getLogsSubproc = subprocess.run(getLogsCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + getLogsStdout = str(getLogsSubproc.stdout.decode('utf-8')) # Split the block of text from STDOUT by newline delimiters to create a new list - splitter = getLogsStdout.split("\n") + splitter = getLogsStdout.split('\n') # Use regex to match the Kube-Bench findings, they always start with a '[' which contains info such as '[PASS]'. We then match anything with 2 periods # as Kube-bench outputs 'headers' such as 3 or 3.1 - we want results such as '[PASS] 3.1.3 Ensure that the kubelet configuration file has permissions set to 644 or more restrictive (Manual)' # this is a horrible way to do it....but it works - kubeBenchResultRegex = re.compile(r"^\[.*\..*\..*") + kubeBenchResultRegex = re.compile('^\[.*\..*\..*') for line in splitter: kubeBenchRegexCheck = kubeBenchResultRegex.search(line) if kubeBenchRegexCheck: # Once we find a match, split at the closing bracket and perform small transformations - splitFinding = line.split("] ") + splitFinding = line.split('] ') # Handle the pass/fail/warn labels - if splitFinding[0] == "[PASS": - findingStatus = "Passed" - elif splitFinding[0] == "[WARN": - findingStatus = "Warning" + if splitFinding[0] == '[PASS': + findingStatus = 'Passed' + elif splitFinding[0] == '[WARN': + findingStatus = 'Warning' else: - findingStatus = "Failed" + findingStatus = 'Failed' # Create a new dict of the findings that will match a SARIF JSON 'run' # https://github.com/microsoft/sarif-tutorials/blob/main/docs/2-Basics.md run = { - "tool": { - "driver": { - "name": "Kube-bench", - "semanticVersion": "0.6.6", - "informationUri": "https://github.com/aquasecurity/kube-bench", - "organization": "Aqua Security", - "fullDescription": { - "text": "kube-bench is a tool that checks whether Kubernetes is deployed securely by running the checks documented in the CIS Kubernetes Benchmark." - }, + 'tool':{ + 'driver':{ + 'name':'Kube-bench', + 'semanticVersion': '0.6.6', + 'informationUri': 'https://github.com/aquasecurity/kube-bench', + 'organization': 'Aqua Security', + 'fullDescription': { + 'text': 'kube-bench is a tool that checks whether Kubernetes is deployed securely by running the checks documented in the CIS Kubernetes Benchmark.' + } } }, - "results": [ + 'results':[ { - "ruleId": splitFinding[1], - "message": {"text": findingStatus}, - "locations": [ + 'ruleId': splitFinding[1], + 'message':{ + 'text': findingStatus + }, + 'locations':[ { - "physicalLocation": { - "artifactLocation": { - "uri": clusterEndpoint, - "description": {"text": cluster_name}, + 'physicalLocation':{ + 'artifactLocation':{ + 'uri': clusterEndpoint, + 'description': { + 'text': cluster_name + } } } } - ], + ] } ], - "columnKind": "utf16CodeUnits", + 'columnKind':'utf16CodeUnits' } findings.append(run) else: @@ -232,13 +232,11 @@ def run_kube_bench(cluster_name): del splitter # Delete the job from the EKS Cluster - deleteKubebenchJobCmd = "kubectl delete -f job-eks.yaml" - deleteKubebenchJobSubproc = subprocess.run( - deleteKubebenchJobCmd, shell=True, capture_output=True - ) - deleteKubebenchJobStdout = str(deleteKubebenchJobSubproc.stdout.decode("utf-8")) - print(f"{deleteKubebenchJobStdout}") + deleteKubebenchJobCmd = 'kubectl delete -f job-eks.yaml' + deleteKubebenchJobSubproc = subprocess.run(deleteKubebenchJobCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + deleteKubebenchJobStdout = str(deleteKubebenchJobSubproc.stdout.decode('utf-8')) + print(f'{deleteKubebenchJobStdout}') - print(f"Completed Kube-bench assessment of EKS Cluster {cluster_name}") + print(f'Completed Kube-bench assessment of EKS Cluster {cluster_name}') - return findings + return findings \ No newline at end of file diff --git a/plugins/__init__.py b/plugins/__init__.py index da288a2..8e8bdd2 100644 --- a/plugins/__init__.py +++ b/plugins/__init__.py @@ -1,16 +1,19 @@ -# This file is part of Lightspin EKS Creation Engine. -# SPDX-License-Identifier: Apache-2.0 -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. +#This file is part of Lightspin EKS Creation Engine. +#SPDX-License-Identifier: Apache-2.0 + +#Licensed to the Apache Software Foundation (ASF) under one +#or more contributor license agreements. See the NOTICE file +#distributed with this work for additional information +#regarding copyright ownership. The ASF licenses this file +#to you under the Apache License, Version 2.0 (the +#"License"); you may not use this file except in compliance +#with the License. You may obtain a copy of the License at + +#http://www.apache.org/licenses/LICENSE-2.0 + +#Unless required by applicable law or agreed to in writing, +#software distributed under the License is distributed on an +#"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +#KIND, either express or implied. See the License for the +#specific language governing permissions and limitations +#under the License. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 53bd91a..2e1b37f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -art>=5.4,<5.5 -awscli>=1.22.65,<1.23.0 -boto3>=1.21.10,<1.22.0 -clint>=0.5.1,<0.6.0 -termcolor>=1.1.0,<1.2.0 -urllib3>=1.26.8,<1.27.0 +awscli +boto3 +art +termcolor +clint +urllib3 \ No newline at end of file From 98e06be0524d93d4d572c0a1338b98886137d1a9 Mon Sep 17 00:00:00 2001 From: nicholasmhughes Date: Wed, 2 Mar 2022 11:16:35 -0500 Subject: [PATCH 3/3] put some pre-commit checks in place, retaining quoting --- .pre-commit-config.yaml | 55 +++ EksCreationEngine.py | 876 +++++++++++++++++----------------------- README.md | 33 +- docs/HOWTO.md | 2 +- main.py | 247 ++++++----- plugins/ECEDatadog.py | 51 +-- plugins/ECEFalco.py | 218 ++++------ plugins/ECESecurity.py | 112 ++--- plugins/__init__.py | 35 +- requirements.txt | 12 +- 10 files changed, 784 insertions(+), 857 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..b9642b0 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,55 @@ +--- +minimum_pre_commit_version: 2.4.0 +repos: + # ----- Formatting ----------------------------------------------------------------------------> + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.0.1 + hooks: + - id: trailing-whitespace # Trims trailing whitespace. + args: [--markdown-linebreak-ext=md] + - id: mixed-line-ending # Replaces or checks mixed line ending. + args: [--fix=lf] + - id: end-of-file-fixer # Makes sure files end in a newline and only a newline. + - id: check-merge-conflict # Check for files that contain merge conflict strings. + - id: check-ast # Simply check whether files parse as valid python. + + - repo: https://github.com/asottile/pyupgrade + rev: v2.23.3 + hooks: + - id: pyupgrade + name: Rewrite Code to be Py3.7+ + args: [ + --py37-plus + ] + + - repo: https://github.com/asottile/reorder_python_imports + rev: v2.6.0 + hooks: + - id: reorder-python-imports + args: [ + --py37-plus, + ] + + - repo: https://github.com/psf/black + rev: 21.7b0 + hooks: + - id: black + args: [-l 100, -S] + + - repo: https://github.com/asottile/blacken-docs + rev: v1.10.0 + hooks: + - id: blacken-docs + args: [--skip-errors] + files: ^docs/.*\.md$ + additional_dependencies: [black==21.7b0] + # <---- Formatting ----------------------------------------------------------------------------- + + # ----- Security ------------------------------------------------------------------------------> + - repo: https://github.com/PyCQA/bandit + rev: "1.7.0" + hooks: + - id: bandit + name: Run bandit against the code base + args: [--silent, -lll] + # <---- Security ------------------------------------------------------------------------------- diff --git a/EksCreationEngine.py b/EksCreationEngine.py index ea35bf1..fb0819d 100644 --- a/EksCreationEngine.py +++ b/EksCreationEngine.py @@ -1,39 +1,37 @@ -#This file is part of Lightspin EKS Creation Engine. -#SPDX-License-Identifier: Apache-2.0 - -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the +# This file is part of Lightspin EKS Creation Engine. +# SPDX-License-Identifier: Apache-2.0 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the #'License'); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -#http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, -#software distributed under the License is distributed on an +# with the License. You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an #'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -#KIND, either express or implied. See the License for the -#specific language governing permissions and limitations -#under the License. - +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. import base64 +import json +import re +import subprocess import sys +import time +from datetime import datetime + import boto3 import botocore.exceptions -import json -from datetime import datetime -import time -import subprocess -import re + from plugins.ECEDatadog import DatadogSetup from plugins.ECEFalco import FalcoSetup cache = list() -class ClusterManager(): +class ClusterManager: def get_latest_eks_optimized_ubuntu(kubernetes_version, ami_id, ami_os, ami_architecture): ''' This function either receives an AMI ID from main.py or receives the default value of 'SSM' which is matched against the arguments @@ -47,22 +45,30 @@ def get_latest_eks_optimized_ubuntu(kubernetes_version, ami_id, ami_os, ami_arch # AMD64 if ami_architecture == 'amd64': # /aws/service/canonical/ubuntu/eks/20.04/1.21/stable/current/amd64/hvm/ebs-gp2/ami-id - publicParameter = str(f'/aws/service/canonical/{ami_os}/eks/20.04/{kubernetes_version}/stable/current/{ami_architecture}/hvm/ebs-gp2/ami-id') + publicParameter = str( + f'/aws/service/canonical/{ami_os}/eks/20.04/{kubernetes_version}/stable/current/{ami_architecture}/hvm/ebs-gp2/ami-id' + ) # ARM64 else: # /aws/service/canonical/ubuntu/eks/20.04/1.21/stable/current/arm64/hvm/ebs-gp2/ami-id - publicParameter = str(f'/aws/service/canonical/{ami_os}/eks/20.04/{kubernetes_version}/stable/current/{ami_architecture}/hvm/ebs-gp2/ami-id') + publicParameter = str( + f'/aws/service/canonical/{ami_os}/eks/20.04/{kubernetes_version}/stable/current/{ami_architecture}/hvm/ebs-gp2/ami-id' + ) # Amazon Linux 2 # Public Params search in the console is fucky, check here: https://docs.aws.amazon.com/eks/latest/userguide/eks-optimized-ami.html else: # AMD64 if ami_architecture == 'amd64': # /aws/service/eks/optimized-ami/1.21/amazon-linux-2/recommended/image_id - publicParameter = str(f'/aws/service/eks/optimized-ami/{kubernetes_version}/amazon-linux-2/recommended/image_id') + publicParameter = str( + f'/aws/service/eks/optimized-ami/{kubernetes_version}/amazon-linux-2/recommended/image_id' + ) # ARM64 else: # /aws/service/eks/optimized-ami/1.21/amazon-linux-2-arm64/recommended/image_id - publicParameter = str(f'/aws/service/eks/optimized-ami/{kubernetes_version}/amazon-linux-2-arm64/recommended/image_id') + publicParameter = str( + f'/aws/service/eks/optimized-ami/{kubernetes_version}/amazon-linux-2-arm64/recommended/image_id' + ) # retrieve the AMI ID and return it try: @@ -98,12 +104,10 @@ def create_cluster_svc_role(cluster_role_name): 'Statement': [ { 'Effect': 'Allow', - 'Principal': { - 'Service': 'eks.amazonaws.com' - }, - 'Action': 'sts:AssumeRole' + 'Principal': {'Service': 'eks.amazonaws.com'}, + 'Action': 'sts:AssumeRole', } - ] + ], } try: @@ -114,45 +118,29 @@ def create_cluster_svc_role(cluster_role_name): Description='Allows access to other AWS service resources that are required to operate clusters managed by EKS', MaxSessionDuration=3600, Tags=[ - { - 'Key': 'Name', - 'Value': cluster_role_name - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {'Key': 'Name', 'Value': cluster_role_name}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, + ], ) # Attach required Cluster Policy (AWS Managed) or get following error # botocore.errorfactory.InvalidParameterException: An error occurred (InvalidParameterException) when calling the CreateCluster operation: The provided role doesn't have the Amazon EKS Managed Policies associated with it. Please ensure the following policies [arn:aws:iam::aws:policy/AmazonEKSClusterPolicy] are attached waiter = iam.get_waiter('role_exists') - waiter.wait( - RoleName=cluster_role_name, - WaiterConfig={ - 'Delay': 3, - 'MaxAttempts': 20 - } - ) + waiter.wait(RoleName=cluster_role_name, WaiterConfig={'Delay': 3, 'MaxAttempts': 20}) iam.attach_role_policy( RoleName=cluster_role_name, - PolicyArn='arn:aws:iam::aws:policy/AmazonEKSClusterPolicy' + PolicyArn='arn:aws:iam::aws:policy/AmazonEKSClusterPolicy', ) roleArn = str(r['Role']['Arn']) except botocore.exceptions.ClientError as error: # If we have an 'EntityAlreadyExists' error it means a Role of the same name exists, we can try to use it instead if error.response['Error']['Code'] == 'EntityAlreadyExists': - print(f'The supplied role name of {cluster_role_name} already exists, attempting to use it') + print( + f'The supplied role name of {cluster_role_name} already exists, attempting to use it' + ) roleArn = f'arn:aws:iam::{acctId}:role/{cluster_role_name}' else: print(f'Error encountered: {error}') @@ -191,14 +179,11 @@ def create_managed_nodegroup_s3_policy(bucket_name, nodegroup_role_name): 's3:GetObjectAcl', 's3:GetObject', 's3:GetBucketAcl', - 's3:GetBucketLocation' + 's3:GetBucketLocation', ], - 'Resource': [ - f'arn:aws:s3:::{bucket_name}/*', - f'arn:aws:s3:::{bucket_name}' - ] + 'Resource': [f'arn:aws:s3:::{bucket_name}/*', f'arn:aws:s3:::{bucket_name}'], } - ] + ], } try: @@ -208,30 +193,20 @@ def create_managed_nodegroup_s3_policy(bucket_name, nodegroup_role_name): PolicyDocument=json.dumps(iamPolicyDoc), Description='Allows access to specific S3 buckets for node groups managed by EKS - Created by Lightspin ECE', Tags=[ - { - 'Key': 'Name', - 'Value': policyName - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {'Key': 'Name', 'Value': policyName}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, + ], ) policyArn = str(r['Policy']['Arn']) except botocore.exceptions.ClientError as error: # If we have an 'EntityAlreadyExists' error it means a Role of the same name exists, we can try to use it instead # we will assume it has the right permissions after all if error.response['Error']['Code'] == 'EntityAlreadyExists': - print(f'The supplied role policy name of {policyName} already exists, attempting to use it') + print( + f'The supplied role policy name of {policyName} already exists, attempting to use it' + ) policyArn = f'arn:aws:iam::{acctId}:policy/{policyName}' else: print(f'Error encountered: {error}') @@ -266,12 +241,14 @@ def create_managed_nodegroup_role(bucket_name, nodegroup_role_name, mde_on_nodes 'arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy', 'arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly', 'arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy', - 'arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore' + 'arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore', ] # Grab S3 Node Group policy from other Function & add to List if MDE is enabled if mde_on_nodes == 'True': - s3PolicyArn = ClusterManager.create_managed_nodegroup_s3_policy(bucket_name, nodegroup_role_name) + s3PolicyArn = ClusterManager.create_managed_nodegroup_s3_policy( + bucket_name, nodegroup_role_name + ) nodegroupAwsManagedPolicies.append(s3PolicyArn) # Trust Policy for EKS NodeGroup Role trusts EC2 @@ -280,12 +257,10 @@ def create_managed_nodegroup_role(bucket_name, nodegroup_role_name, mde_on_nodes 'Statement': [ { 'Effect': 'Allow', - 'Principal': { - 'Service': 'ec2.amazonaws.com' - }, - 'Action': 'sts:AssumeRole' + 'Principal': {'Service': 'ec2.amazonaws.com'}, + 'Action': 'sts:AssumeRole', } - ] + ], } try: @@ -296,34 +271,16 @@ def create_managed_nodegroup_role(bucket_name, nodegroup_role_name, mde_on_nodes Description='Allows access to other AWS service resources that are required to operate node groups managed by EKS', MaxSessionDuration=3600, Tags=[ - { - 'Key': 'Name', - 'Value': roleName - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {'Key': 'Name', 'Value': roleName}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, + ], ) roleArn = str(r['Role']['Arn']) waiter = iam.get_waiter('role_exists') - waiter.wait( - RoleName=roleName, - WaiterConfig={ - 'Delay': 3, - 'MaxAttempts': 20 - } - ) + waiter.wait(RoleName=roleName, WaiterConfig={'Delay': 3, 'MaxAttempts': 20}) except botocore.exceptions.ClientError as error: # If we have an 'EntityAlreadyExists' error it means a Role of the same name exists, we can try to use it instead @@ -341,10 +298,7 @@ def create_managed_nodegroup_role(bucket_name, nodegroup_role_name, mde_on_nodes # Loop through List of policies and attach Policies to Role, handle errors if already attached try: for policy in nodegroupAwsManagedPolicies: - iam.attach_role_policy( - RoleName=roleName, - PolicyArn=policy - ) + iam.attach_role_policy(RoleName=roleName, PolicyArn=policy) except Exception as e: print(f'Error encountered: {e}') RollbackManager.rollback_from_cache(cache=cache) @@ -385,7 +339,7 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): for p in additional_ports: if int(p) not in defaultPortSet: defaultPortSet.append(int(p)) - + # remove the list, it's not needed anymore del additional_ports @@ -420,41 +374,26 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): { 'ResourceType': 'security-group', 'Tags': [ - { - 'Key': 'Name', - 'Value': sgName - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - }, + {'Key': 'Name', 'Value': sgName}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, # This tag is required per AWS Docs # One, and only one, of the security groups associated to your nodes should have the following tag applied: For more information about tagging, see Working with tags using the console. kubernetes.io/cluster/cluster-name: owned - { - 'Key': f'kubernetes.io/cluster/{cluster_name}', - 'Value': 'owned' - } - ] + {'Key': f'kubernetes.io/cluster/{cluster_name}', 'Value': 'owned'}, + ], } - ] + ], ) secGroupId = str(r['GroupId']) - sgCache = { - 'ClusterSecurityGroupId': secGroupId - } + sgCache = {'ClusterSecurityGroupId': secGroupId} cache.append(sgCache) print(f'Added {sgName} ID {secGroupId} to Cache') - print(f'Authorizing ingress for Ports {defaultPortSet} for CIDRS {allVpcCidrs} for {sgName}') + print( + f'Authorizing ingress for Ports {defaultPortSet} for CIDRS {allVpcCidrs} for {sgName}' + ) # Now start adding Inbound Rules per CIDR and per Port # Add conditional logic for port 53 (DNS) to create both TCP and UDP Rules @@ -471,9 +410,9 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): 'IpRanges': [ { 'CidrIp': cidr, - 'Description': f'Allow tcp {port} to {cidr}' + 'Description': f'Allow tcp {port} to {cidr}', } - ] + ], }, { 'FromPort': int(port), @@ -482,34 +421,22 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): 'IpRanges': [ { 'CidrIp': cidr, - 'Description': f'Allow udp {port} to {cidr}' + 'Description': f'Allow udp {port} to {cidr}', } - ] - } + ], + }, ], TagSpecifications=[ { 'ResourceType': 'security-group-rule', 'Tags': [ - { - 'Key': 'Name', - 'Value': f'{sgName}{cidr}{port}' - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {'Key': 'Name', 'Value': f'{sgName}{cidr}{port}'}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, + ], } - ] + ], ) else: ec2.authorize_security_group_ingress( @@ -522,34 +449,22 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): 'IpRanges': [ { 'CidrIp': cidr, - 'Description': f'Allow tcp {port} to {cidr}' + 'Description': f'Allow tcp {port} to {cidr}', } - ] + ], } ], TagSpecifications=[ { 'ResourceType': 'security-group-rule', 'Tags': [ - { - 'Key': 'Name', - 'Value': f'{sgName}{cidr}{port}' - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {'Key': 'Name', 'Value': f'{sgName}{cidr}{port}'}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, + ], } - ] + ], ) # Adding inbound rules per Port for the Security Group itself (talk to self for Node-Cluster Comms) @@ -565,9 +480,9 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): 'UserIdGroupPairs': [ { 'Description': f'Allow tcp {port} to {secGroupId}', - 'GroupId': secGroupId + 'GroupId': secGroupId, } - ] + ], }, { 'FromPort': int(port), @@ -576,34 +491,22 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): 'UserIdGroupPairs': [ { 'Description': f'Allow udp {port} to {secGroupId}', - 'GroupId': secGroupId + 'GroupId': secGroupId, } - ] - } + ], + }, ], TagSpecifications=[ { 'ResourceType': 'security-group-rule', 'Tags': [ - { - 'Key': 'Name', - 'Value': f'{sgName}{secGroupId}{port}' - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {'Key': 'Name', 'Value': f'{sgName}{secGroupId}{port}'}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, + ], } - ] + ], ) else: ec2.authorize_security_group_ingress( @@ -612,38 +515,26 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): { 'FromPort': int(port), 'ToPort': int(port), - 'IpProtocol': 'tcp', + 'IpProtocol': 'tcp', 'UserIdGroupPairs': [ { 'Description': f'Allow tcp {port} to {secGroupId}', - 'GroupId': secGroupId + 'GroupId': secGroupId, } - ] + ], } ], TagSpecifications=[ { 'ResourceType': 'security-group-rule', 'Tags': [ - { - 'Key': 'Name', - 'Value': f'{sgName}{secGroupId}{port}' - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {'Key': 'Name', 'Value': f'{sgName}{secGroupId}{port}'}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, + ], } - ] + ], ) # Adding TCP 443 (HTTPS) from the internet which is required for patching and agent communications @@ -655,36 +546,21 @@ def cluster_security_group_factory(cluster_name, vpc_id, additional_ports): 'ToPort': 443, 'IpProtocol': 'tcp', 'IpRanges': [ - { - 'CidrIp': '0.0.0.0/0', - 'Description': f'Allow tcp 443 to Internet' - } - ] + {'CidrIp': '0.0.0.0/0', 'Description': f'Allow tcp 443 to Internet'} + ], } ], TagSpecifications=[ { 'ResourceType': 'security-group-rule', 'Tags': [ - { - 'Key': 'Name', - 'Value': f'{sgName}Internet{port}' - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {'Key': 'Name', 'Value': f'{sgName}Internet{port}'}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, + ], } - ] + ], ) except botocore.exceptions.ClientError as error: print(f'Error encountered: {error}') @@ -719,23 +595,11 @@ def encryption_key_factory(cluster_name): KeySpec='SYMMETRIC_DEFAULT', Origin='AWS_KMS', Tags=[ - { - 'TagKey': 'Name', - 'TagValue': f'{cluster_name}-EKS-CMK' - }, - { - 'TagKey': 'CreatedBy', - 'TagValue': createdBy - }, - { - 'TagKey': 'CreatedAt', - 'TagValue': createdAt - }, - { - 'TagKey': 'CreatedWith', - 'TagValue': 'Lightspin ECE' - } - ] + {'TagKey': 'Name', 'TagValue': f'{cluster_name}-EKS-CMK'}, + {'TagKey': 'CreatedBy', 'TagValue': createdBy}, + {'TagKey': 'CreatedAt', 'TagValue': createdAt}, + {'TagKey': 'CreatedWith', 'TagValue': 'Lightspin ECE'}, + ], )['KeyMetadata']['Arn'] except KeyError as ke: print(f'Error encountered: {ke}') @@ -748,8 +612,10 @@ def encryption_key_factory(cluster_name): RollbackManager.rollback_from_cache(cache=cache) return kmsKeyArn - - def create_cluster(cluster_name, kubernetes_version, cluster_role_name, subnet_ids, vpc_id, additional_ports): + + def create_cluster( + cluster_name, kubernetes_version, cluster_role_name, subnet_ids, vpc_id, additional_ports + ): ''' This function uses the EKS Boto3 Client to create a cluster, taking inputs from main.py to determing naming & Encryption ''' @@ -764,7 +630,9 @@ def create_cluster(cluster_name, kubernetes_version, cluster_role_name, subnet_i clusterRoleArn = ClusterManager.create_cluster_svc_role(cluster_role_name) # Call `cluster_security_group_factory` to create or re-use an EKS cluster security group that allows minimum necessary comms intra-VPC - securityGroupId = ClusterManager.cluster_security_group_factory(cluster_name, vpc_id, additional_ports) + securityGroupId = ClusterManager.cluster_security_group_factory( + cluster_name, vpc_id, additional_ports + ) # Call `encryption_key_factory` to create a KMS Key ARN. Simple! (We'll add the Key Policy later) kmsKeyArn = ClusterManager.encryption_key_factory(cluster_name) @@ -779,33 +647,30 @@ def create_cluster(cluster_name, kubernetes_version, cluster_role_name, subnet_i 'subnetIds': subnet_ids, 'securityGroupIds': [securityGroupId], 'endpointPublicAccess': False, - 'endpointPrivateAccess': True + 'endpointPrivateAccess': True, }, logging={ 'clusterLogging': [ - { + { # all Logging types are enabled here - 'types': ['api','audit','authenticator','controllerManager','scheduler'], - 'enabled': True + 'types': [ + 'api', + 'audit', + 'authenticator', + 'controllerManager', + 'scheduler', + ], + 'enabled': True, } ] }, - encryptionConfig=[ - { - 'resources': [ - 'secrets' - ], - 'provider': { - 'keyArn': kmsKeyArn - } - } - ], + encryptionConfig=[{'resources': ['secrets'], 'provider': {'keyArn': kmsKeyArn}}], tags={ 'Name': cluster_name, 'CreatedBy': createdBy, 'CreatedAt': createdAt, - 'CreatedWith': 'Lightspin ECE' - } + 'CreatedWith': 'Lightspin ECE', + }, ) # Establish provided EKS Waiter() for cluster to come up @@ -814,13 +679,7 @@ def create_cluster(cluster_name, kubernetes_version, cluster_role_name, subnet_i waiter = eks.get_waiter('cluster_active') - waiter.wait( - name=cluster_name, - WaiterConfig={ - 'Delay': 30, - 'MaxAttempts': 40 - } - ) + waiter.wait(name=cluster_name, WaiterConfig={'Delay': 30, 'MaxAttempts': 40}) finalClusterName = str(r['cluster']['name']) @@ -855,7 +714,9 @@ def generate_nodegroup_bootstrap(bucket_name, cluster_name, mde_on_nodes, ami_os ''' eks = boto3.client('eks') - print(f'Retrieving Certificate Authority and API Server URL information for bootstrap script') + print( + f'Retrieving Certificate Authority and API Server URL information for bootstrap script' + ) # DescribeCluster and pull necessary values to set as env vars within the bootstrap c = eks.describe_cluster(name=cluster_name) @@ -945,7 +806,20 @@ def generate_nodegroup_bootstrap(bucket_name, cluster_name, mde_on_nodes, ami_os return userData - def create_launch_template(cluster_name, kubernetes_version, ami_id, bucket_name, launch_template_name, kms_key_arn, securityGroupId, ebs_volume_size, instance_type, mde_on_nodes, ami_os, ami_architecture): + def create_launch_template( + cluster_name, + kubernetes_version, + ami_id, + bucket_name, + launch_template_name, + kms_key_arn, + securityGroupId, + ebs_volume_size, + instance_type, + mde_on_nodes, + ami_os, + ami_architecture, + ): ''' This function creates an EC2 Launch Template using encryption and AMI data supplied from main.py and passes it to the `builder` function where final EKS Nodegroup creation takes place @@ -958,16 +832,20 @@ def create_launch_template(cluster_name, kubernetes_version, ami_id, bucket_name createdAt = str(datetime.utcnow()) # Pull latest AMI ID for EKS-optimized Ubuntu 20.04LTS for specified K8s Version in main.py - amiId = ClusterManager.get_latest_eks_optimized_ubuntu(kubernetes_version, ami_id, ami_os, ami_architecture) + amiId = ClusterManager.get_latest_eks_optimized_ubuntu( + kubernetes_version, ami_id, ami_os, ami_architecture + ) # Retrieve Base64 metadata from bootstrap generation function - this will download and install MDE (MDATP) from files in the S3 bucket specified in main.py if --mde_on_nodes is true. Will use ami_os arguements to create different UserData as well - userData = ClusterManager.generate_nodegroup_bootstrap(bucket_name, cluster_name, mde_on_nodes, ami_os) + userData = ClusterManager.generate_nodegroup_bootstrap( + bucket_name, cluster_name, mde_on_nodes, ami_os + ) # For IMDSv2 - keeping this outside for eventual modification of hop limits? metadataOptions = { 'HttpTokens': 'required', 'HttpPutResponseHopLimit': 2, - 'HttpEndpoint': 'enabled' + 'HttpEndpoint': 'enabled', } try: @@ -985,8 +863,8 @@ def create_launch_template(cluster_name, kubernetes_version, ami_id, bucket_name 'DeleteOnTermination': True, 'KmsKeyId': kms_key_arn, 'VolumeSize': int(ebs_volume_size), - 'VolumeType': 'gp2' - } + 'VolumeType': 'gp2', + }, } ], 'ImageId': amiId, @@ -998,47 +876,23 @@ def create_launch_template(cluster_name, kubernetes_version, ami_id, bucket_name { 'ResourceType': 'instance', 'Tags': [ - { - 'Key': 'Name', - 'Value': str(f'{launch_template_name}Node') - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {'Key': 'Name', 'Value': str(f'{launch_template_name}Node')}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, + ], }, { 'ResourceType': 'volume', 'Tags': [ - { - 'Key': 'Name', - 'Value': str(f'{launch_template_name}Node') - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] - } - ] - } + {'Key': 'Name', 'Value': str(f'{launch_template_name}Node')}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, + ], + }, + ], + }, ) launchTemplateId = str(r['LaunchTemplate']['LaunchTemplateId']) @@ -1050,8 +904,32 @@ def create_launch_template(cluster_name, kubernetes_version, ami_id, bucket_name RollbackManager.rollback_from_cache(cache=cache) return launchTemplateId - - def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_type, cluster_name, cluster_role_name, nodegroup_name, nodegroup_role_name, launch_template_name, vpc_id, subnet_ids, node_count, mde_on_nodes, additional_ports, falco_bool, falco_sidekick_destination_type, falco_sidekick_destination, ami_os, ami_architecture, datadog_api_key, datadog_bool, addtl_auth_principals): + + def builder( + kubernetes_version, + bucket_name, + ebs_volume_size, + ami_id, + instance_type, + cluster_name, + cluster_role_name, + nodegroup_name, + nodegroup_role_name, + launch_template_name, + vpc_id, + subnet_ids, + node_count, + mde_on_nodes, + additional_ports, + falco_bool, + falco_sidekick_destination_type, + falco_sidekick_destination, + ami_os, + ami_architecture, + datadog_api_key, + datadog_bool, + addtl_auth_principals, + ): ''' This function is the 'brain' that controls creation and calls the required functions to build infrastructure and services (EKS, EC2, IAM). This function also stores all required arguments into cache to facilitate rollbacks upon errors @@ -1064,7 +942,7 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t 'ClusterRoleName': cluster_role_name, 'NodegroupName': nodegroup_name, 'NodegroupRoleName': nodegroup_role_name, - 'LaunchTemplateName': launch_template_name + 'LaunchTemplateName': launch_template_name, } cache.append(cacheDict) @@ -1081,7 +959,14 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t # Create an EKS Cluster by calling `create_cluster` - this will take the longest, and if it fails, then other infrastructure won't be created # the positional selectors are for when you return multiple values, they are bundled in a tuple, and have to be accessed in the order they're provided - callClusterManager = ClusterManager.create_cluster(cluster_name, kubernetes_version, cluster_role_name, subnet_ids, vpc_id, additional_ports) + callClusterManager = ClusterManager.create_cluster( + cluster_name, + kubernetes_version, + cluster_role_name, + subnet_ids, + vpc_id, + additional_ports, + ) clusterName = callClusterManager[0] securityGroupId = callClusterManager[1] kms_key_arn = callClusterManager[2] @@ -1089,7 +974,9 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t # Passes the S3 Bucket name to the `create_managed_nodegroup_role` function which in turn passes it to the `create_managed_nodegroup_s3_policy` # function which allows your Nodegroups to pull artifacts from S3 as part of bootstrapping - nodegroupRoleArn = ClusterManager.create_managed_nodegroup_role(bucket_name, nodegroup_role_name, mde_on_nodes) + nodegroupRoleArn = ClusterManager.create_managed_nodegroup_role( + bucket_name, nodegroup_role_name, mde_on_nodes + ) # Now we can attach our proper Key Policy to the KMS Key since we now have all Roles ready @@ -1104,7 +991,9 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t seshRoleCheck = seshRoleRegex.search(createdBy) # On match to Regex do stupid stuff >:( if seshRoleCheck: - print(f'Your ARN from STS AssumeRole {createdBy} matches a temporary Session ARN, attempting to find your upstream IAM Role') + print( + f'Your ARN from STS AssumeRole {createdBy} matches a temporary Session ARN, attempting to find your upstream IAM Role' + ) roleNameSplit = createdBy.split('/')[1] createdByRoleArn = f'arn:aws:iam::{acctId}:role/{roleNameSplit}' print(f'Your Role ARN upstream to your session was determined as {createdByRoleArn}') @@ -1119,7 +1008,10 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t slrRole = str(r['Role']['RoleName']) print(f'Created Service-linked Role for Autoscaling called {slrRole}') except Exception as e: - if str(e) == 'An error occurred (InvalidInput) when calling the CreateServiceLinkedRole operation: Service role name AWSServiceRoleForAutoScaling has been taken in this account, please try a different suffix.': + if ( + str(e) + == 'An error occurred (InvalidInput) when calling the CreateServiceLinkedRole operation: Service role name AWSServiceRoleForAutoScaling has been taken in this account, please try a different suffix.' + ): pass else: print(f'Error encountered: {e}') @@ -1132,7 +1024,7 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t clusterRoleArn, nodegroupRoleArn, createdByRoleArn, - f'arn:aws:iam::{acctId}:role/aws-service-role/autoscaling.amazonaws.com/AWSServiceRoleForAutoScaling' + f'arn:aws:iam::{acctId}:role/aws-service-role/autoscaling.amazonaws.com/AWSServiceRoleForAutoScaling', ] # Check if additional AuthZ IAM Principals are even provided. If so, add them to the list if they're not there already @@ -1142,18 +1034,16 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t kmsAuthZPrincipals.append(arn) keyPolicyJson = { - 'Version':'2012-10-17', - 'Id':'ecekeypolicy', + 'Version': '2012-10-17', + 'Id': 'ecekeypolicy', 'Statement': [ # full key usage by whoever creates the key { 'Sid': 'Key Creator Admin', 'Effect': 'Allow', - 'Principal': { - 'AWS': createdByRoleArn - }, - 'Action':'kms:*', - 'Resource':'*' + 'Principal': {'AWS': createdByRoleArn}, + 'Action': 'kms:*', + 'Resource': '*', }, # This allows usage of the key by the Cluster & Nodegroup and aws-managed service principals # Creator is added throughout as well @@ -1163,38 +1053,28 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t 'Effect': 'Allow', 'Principal': { 'AWS': kmsAuthZPrincipals, - 'Service': [ - 'autoscaling.amazonaws.com', - 'ec2.amazonaws.com' - ] + 'Service': ['autoscaling.amazonaws.com', 'ec2.amazonaws.com'], }, 'Action': [ 'kms:Encrypt', 'kms:Decrypt', 'kms:ReEncrypt*', 'kms:GenerateDataKey*', - 'kms:DescribeKey' + 'kms:DescribeKey', ], - 'Resource': '*' + 'Resource': '*', }, { 'Sid': 'Allow attachment of persistent resources', 'Effect': 'Allow', 'Principal': { 'AWS': kmsAuthZPrincipals, - 'Service': [ - 'autoscaling.amazonaws.com', - 'ec2.amazonaws.com' - ] + 'Service': ['autoscaling.amazonaws.com', 'ec2.amazonaws.com'], }, - 'Action': [ - 'kms:CreateGrant', - 'kms:ListGrants', - 'kms:RevokeGrant' - ], - 'Resource': '*' - } - ] + 'Action': ['kms:CreateGrant', 'kms:ListGrants', 'kms:RevokeGrant'], + 'Resource': '*', + }, + ], } # For whatever reason, role propagation is a bit delayed with registration on the KMS Resource-based resource policy side @@ -1204,9 +1084,7 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t try: kms.put_key_policy( - KeyId=kms_key_arn, - PolicyName='default', - Policy=json.dumps(keyPolicyJson) + KeyId=kms_key_arn, PolicyName='default', Policy=json.dumps(keyPolicyJson) ) print(f'Key Policy attached to {kms_key_arn}') except KeyError as ke: @@ -1218,9 +1096,22 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t except botocore.exceptions.ClientError as error: print(f'Error encountered: {error}') RollbackManager.rollback_from_cache(cache=cache) - + # Passes various arguements to the `create_launch_template` which returns a Launch Template ID (of the latest version) to pass to the Nodegroup creation payload - launchTemplateId = ClusterManager.create_launch_template(cluster_name, kubernetes_version, ami_id, bucket_name, launch_template_name, kms_key_arn, securityGroupId, ebs_volume_size, instance_type, mde_on_nodes, ami_os, ami_architecture) + launchTemplateId = ClusterManager.create_launch_template( + cluster_name, + kubernetes_version, + ami_id, + bucket_name, + launch_template_name, + kms_key_arn, + securityGroupId, + ebs_volume_size, + instance_type, + mde_on_nodes, + ami_os, + ami_architecture, + ) print(f'Creating Nodegroup {nodegroup_name} for Cluster {clusterName}') @@ -1232,20 +1123,18 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t scalingConfig={ 'minSize': int(node_count), 'maxSize': int(node_count) * 2, - 'desiredSize': int(node_count) + 'desiredSize': int(node_count), }, nodeRole=nodegroupRoleArn, subnets=subnet_ids, - launchTemplate={ - 'id': launchTemplateId - }, + launchTemplate={'id': launchTemplateId}, capacityType='ON_DEMAND', tags={ 'Name': nodegroup_name, 'CreatedBy': createdBy, 'CreatedAt': createdAt, - 'CreatedWith': 'Lightspin ECE' - } + 'CreatedWith': 'Lightspin ECE', + }, ) # Await Nodegroups to come online @@ -1256,10 +1145,7 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t waiter.wait( clusterName=clusterName, nodegroupName=nodegroup_name, - WaiterConfig={ - 'Delay': 30, - 'MaxAttempts': 80 - } + WaiterConfig={'Delay': 30, 'MaxAttempts': 80}, ) except botocore.exceptions.ClientError as error: print(f'Error encountered: {error}') @@ -1276,7 +1162,7 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t # Setup first time cluster connection with AWS CLI updateKubeconfigCmd = f'aws eks update-kubeconfig --region {awsRegion} --name {clusterName}' - updateKubeconfigProc = subprocess.run(updateKubeconfigCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + updateKubeconfigProc = subprocess.run(updateKubeconfigCmd, shell=True, capture_output=True) print(updateKubeconfigProc.stdout.decode('utf-8')) # If additional principals are required to be authorized, attempt to do so @@ -1285,12 +1171,12 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t # Split out the name part of the Role addtlRoleName = str(arn.split('/')[1]) # Create a patch object to add into - newAuthZScript=f'''ROLE=" - rolearn: {arn}\\n username: {addtlRoleName}\\n groups:\\n - system:masters" - kubectl get -n kube-system configmap/aws-auth -o yaml | awk "/mapRoles: \|/{{print;print \\"$ROLE\\";next}}1" > /tmp/aws-auth-patch.yml + newAuthZScript = f'''ROLE=" - rolearn: {arn}\\n username: {addtlRoleName}\\n groups:\\n - system:masters" + kubectl get -n kube-system configmap/aws-auth -o yaml | awk "/mapRoles: \\|/{{print;print \\"$ROLE\\";next}}1" > /tmp/aws-auth-patch.yml kubectl patch configmap/aws-auth -n kube-system --patch "$(cat /tmp/aws-auth-patch.yml)" ''' - newAuthZScriptProc = subprocess.run(newAuthZScript, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + newAuthZScriptProc = subprocess.run(newAuthZScript, shell=True, capture_output=True) print(newAuthZScriptProc.stdout.decode('utf-8')) ''' @@ -1298,26 +1184,27 @@ def builder(kubernetes_version, bucket_name, ebs_volume_size, ami_id, instance_t ''' if falco_bool == 'True': FalcoSetup.falco_initialization( - cluster_name=clusterName, + cluster_name=clusterName, falco_mode='Create', - falco_sidekick_destination_type=falco_sidekick_destination_type, + falco_sidekick_destination_type=falco_sidekick_destination_type, falco_sidekick_destination=falco_sidekick_destination, - datadog_api_key=datadog_api_key + datadog_api_key=datadog_api_key, ) ''' Send a call into plugins.ECEDatadog ''' if datadog_bool == 'True': DatadogSetup.initialization( - cluster_name=clusterName, - datadog_mode='Create', - datadog_api_key=datadog_api_key + cluster_name=clusterName, datadog_mode='Create', datadog_api_key=datadog_api_key ) + + ''' This Class handles all update tasks to the Clusters, such as version bumps to latest Kubenertes Versions ''' -class UpdateManager(): - + + +class UpdateManager: def update_kubernetes_version(cluster_name, nodegroup_name, kubernetes_version): ''' This function attempts to update existing Cluster and Nodegroup to a specified Kubernetes Version by invoking @@ -1330,10 +1217,14 @@ def update_kubernetes_version(cluster_name, nodegroup_name, kubernetes_version): try: existingClusterVersion = eks.describe_cluster(name=cluster_name)['cluster']['version'] if existingClusterVersion == kubernetes_version: - print(f'EKS Cluster {cluster_name} is already at Kubernetes version {kubernetes_version}! Aborting') + print( + f'EKS Cluster {cluster_name} is already at Kubernetes version {kubernetes_version}! Aborting' + ) sys.exit(2) else: - print(f'EKS Cluster {cluster_name} is viable to update from Kubernetes version {existingClusterVersion} to {kubernetes_version}') + print( + f'EKS Cluster {cluster_name} is viable to update from Kubernetes version {existingClusterVersion} to {kubernetes_version}' + ) except botocore.exceptions.ClientError as error: # If we have an 'EntityAlreadyExists' error it means a Role of the same name exists, we can try to use it instead if error.response['Error']['Code'] == 'ResourceNotFoundException': @@ -1344,22 +1235,32 @@ def update_kubernetes_version(cluster_name, nodegroup_name, kubernetes_version): # Lookup EKS Nodegroup to see if specified K8s version from main.py matches, if so exit try: - existingNodegroupVersion = eks.describe_cluster(name=cluster_name,nodegroupName=nodegroup_name)['nodegroup']['version'] + existingNodegroupVersion = eks.describe_cluster( + name=cluster_name, nodegroupName=nodegroup_name + )['nodegroup']['version'] if existingNodegroupVersion == kubernetes_version: - print(f'EKS Nodegroup {nodegroup_name} in Cluster {cluster_name} is already at Kubernetes version {kubernetes_version}! Aborting') + print( + f'EKS Nodegroup {nodegroup_name} in Cluster {cluster_name} is already at Kubernetes version {kubernetes_version}! Aborting' + ) sys.exit(2) else: - print(f'EKS Nodegroup {nodegroup_name} in Cluster {cluster_name} is viable to update from Kubernetes version {existingNodegroupVersion} to {kubernetes_version}') + print( + f'EKS Nodegroup {nodegroup_name} in Cluster {cluster_name} is viable to update from Kubernetes version {existingNodegroupVersion} to {kubernetes_version}' + ) except botocore.exceptions.ClientError as error: # If we have an 'EntityAlreadyExists' error it means a Role of the same name exists, we can try to use it instead if error.response['Error']['Code'] == 'ResourceNotFoundException': - print(f'EKS Nodegroup {nodegroup_name} in Cluster {cluster_name} does not exist! Aborting') + print( + f'EKS Nodegroup {nodegroup_name} in Cluster {cluster_name} does not exist! Aborting' + ) sys.exit(2) else: raise error - UpdateManager.update_nodegroup_kubernetes_version(cluster_name, nodegroup_name, kubernetes_version) + UpdateManager.update_nodegroup_kubernetes_version( + cluster_name, nodegroup_name, kubernetes_version + ) UpdateManager.update_cluster_kubernetes_version(cluster_name, kubernetes_version) @@ -1367,16 +1268,16 @@ def update_nodegroup_kubernetes_version(cluster_name, nodegroup_name, kubernetes ''' This function carries out the update and waiter for EKS Nodegroup K8s version bumps ''' - print(f'Updating Kubernetes version for EKS Nodegroup {nodegroup_name} in EKS Cluster {cluster_name}') + print( + f'Updating Kubernetes version for EKS Nodegroup {nodegroup_name} in EKS Cluster {cluster_name}' + ) eks = boto3.client('eks') # Update the Nodegroup K8s version and parse the EKS Update ID for later use # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/eks.html#EKS.Client.update_nodegroup_version r = eks.update_nodegroup_version( - clusterName=cluster_name, - nodegroupName=nodegroup_name, - version=kubernetes_version + clusterName=cluster_name, nodegroupName=nodegroup_name, version=kubernetes_version ) updateId = str(r['update']['id']) @@ -1386,18 +1287,20 @@ def update_nodegroup_kubernetes_version(cluster_name, nodegroup_name, kubernetes # Break the loop on Success, continue on 'InProgress', and exit code 2 on failures or cancellations while True: d = eks.describe_update( - name=cluster_name, - updateId=updateId, - nodegroupName=nodegroup_name + name=cluster_name, updateId=updateId, nodegroupName=nodegroup_name ) updateStatus = str(d['update']['status']) # if/else logic time if updateStatus == 'Successful': - print(f'Nodegroup {nodegroup_name} in Cluster {cluster_name} has been successfully updated.') + print( + f'Nodegroup {nodegroup_name} in Cluster {cluster_name} has been successfully updated.' + ) break elif updateStatus == 'Failed' or 'Cancelled': errorMessage = str(d['update']['errors']) - print(f'Nodegroup {nodegroup_name} in Cluster {cluster_name} update has been cancelled or has failed!') + print( + f'Nodegroup {nodegroup_name} in Cluster {cluster_name} update has been cancelled or has failed!' + ) print(f'Error message: {errorMessage}') sys.exit(2) else: @@ -1417,10 +1320,7 @@ def update_cluster_kubernetes_version(cluster_name, kubernetes_version): # Update the Nodegroup K8s version and parse the EKS Update ID for later use # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/eks.html#EKS.Client.update_nodegroup_version - r = eks.update_nodegroup_version( - clusterName=cluster_name, - version=kubernetes_version - ) + r = eks.update_nodegroup_version(clusterName=cluster_name, version=kubernetes_version) updateId = str(r['update']['id']) print(f'Monitoring EKS Update ID {updateId} for failure or success state.') @@ -1428,10 +1328,7 @@ def update_cluster_kubernetes_version(cluster_name, kubernetes_version): # Use a `while True` loop and 15 second sleeps to watch the update progress of the cluster # Break the loop on Success, continue on 'InProgress', and exit code 2 on failures or cancellations while True: - d = eks.describe_update( - name=cluster_name, - updateId=updateId - ) + d = eks.describe_update(name=cluster_name, updateId=updateId) updateStatus = str(d['update']['status']) # if/else logic time if updateStatus == 'Successful': @@ -1449,13 +1346,17 @@ def update_cluster_kubernetes_version(cluster_name, kubernetes_version): time.sleep(15) continue + ''' Despite it's name, this Class contains methods to conduct emergency deletions (rollback) from Cache as well as normal deletions from main.py commands this is purely for Create mode, other Classes may have their own self-contained rollback mechanism ''' -class RollbackManager(): - def scheduled_deletion(nodegroup_name, cluster_name, cluster_role_name, nodegroup_role_name, launch_template_name): + +class RollbackManager: + def scheduled_deletion( + nodegroup_name, cluster_name, cluster_role_name, nodegroup_role_name, launch_template_name + ): ''' This function performs a graceful, scheduled deletion of all resources - or attempts to at least ''' @@ -1465,48 +1366,40 @@ def scheduled_deletion(nodegroup_name, cluster_name, cluster_role_name, nodegrou # Retrieve the Security Groups from the Cluster to delete, as they are not provided as arguments and cannot be guessed (ID's and all that...) sgList = [] - for sg in eks.describe_cluster(name=cluster_name)['cluster']['resourcesVpcConfig']['securityGroupIds']: + for sg in eks.describe_cluster(name=cluster_name)['cluster']['resourcesVpcConfig'][ + 'securityGroupIds' + ]: sgList.append(sg) # First, attempt to delete Nodegroup - RollbackManager.delete_nodegroup( - nodegroup_name=nodegroup_name, - cluster_name=cluster_name - ) + RollbackManager.delete_nodegroup(nodegroup_name=nodegroup_name, cluster_name=cluster_name) # Then, try to find the Cluster KMS Key and attempt to delete it try: - kmsKeyArn= eks.describe_cluster(name=cluster_name)['cluster']['encryptionConfig'][0]['provider']['keyArn'] + kmsKeyArn = eks.describe_cluster(name=cluster_name)['cluster']['encryptionConfig'][0][ + 'provider' + ]['keyArn'] except Exception: kmsKeyArn = None - + if kmsKeyArn != None: - RollbackManager.delete_kms_key( - kms_key_arn=kmsKeyArn - ) + RollbackManager.delete_kms_key(kms_key_arn=kmsKeyArn) # Next, attempt to delete Cluster - RollbackManager.delete_cluster( - cluster_name=cluster_name - ) + RollbackManager.delete_cluster(cluster_name=cluster_name) # Next, attempt to delete all related IAM RollbackManager.delete_eks_iam( - cluster_role_name=cluster_role_name, - nodegroup_role_name=nodegroup_role_name + cluster_role_name=cluster_role_name, nodegroup_role_name=nodegroup_role_name ) # Next, attempt to delete the EC2 Launch Template - RollbackManager.delete_launch_template( - launch_template_name=launch_template_name - ) + RollbackManager.delete_launch_template(launch_template_name=launch_template_name) # Finally, loop the retrieved SGs and then delete them for sg in sgList: print(f'Trying to delete EC2 Security Group {sg}') - RollbackManager.delete_security_groups( - cluster_security_group_id=sg - ) + RollbackManager.delete_security_groups(cluster_security_group_id=sg) print(f'Deletion complete. Confirm resource deletion in Console in case of errors') @@ -1528,42 +1421,32 @@ def rollback_from_cache(cache): clusterSgId = str(cache[1]['ClusterSecurityGroupId']) # First, attempt to delete Nodegroup - RollbackManager.delete_nodegroup( - nodegroup_name=nodegroupName, - cluster_name=clusterName - ) + RollbackManager.delete_nodegroup(nodegroup_name=nodegroupName, cluster_name=clusterName) # Then, try to find the Cluster KMS Key and attempt to delete it try: - kmsKeyArn= eks.describe_cluster(name=clusterName)['cluster']['encryptionConfig'][0]['provider']['keyArn'] + kmsKeyArn = eks.describe_cluster(name=clusterName)['cluster']['encryptionConfig'][0][ + 'provider' + ]['keyArn'] except Exception: kmsKeyArn = None if kmsKeyArn != None: - RollbackManager.delete_kms_key( - kms_key_arn=kmsKeyArn - ) + RollbackManager.delete_kms_key(kms_key_arn=kmsKeyArn) # Next, attempt to delete Cluster - RollbackManager.delete_cluster( - cluster_name=clusterName - ) + RollbackManager.delete_cluster(cluster_name=clusterName) # Next, attempt to delete all related IAM RollbackManager.delete_eks_iam( - cluster_role_name=clusterRoleName, - nodegroup_role_name=nodegroupRoleName + cluster_role_name=clusterRoleName, nodegroup_role_name=nodegroupRoleName ) # Next, attempt to delete the EC2 Launch Template - RollbackManager.delete_launch_template( - launch_template_name=launchTemplateName - ) + RollbackManager.delete_launch_template(launch_template_name=launchTemplateName) # Finally, delete the Security Groups - RollbackManager.delete_security_groups( - cluster_security_group_id=clusterSgId - ) + RollbackManager.delete_security_groups(cluster_security_group_id=clusterSgId) print(f'Rollback complete. Confirm resource deletion in Console in case of errors') @@ -1579,10 +1462,7 @@ def delete_nodegroup(cluster_name, nodegroup_name): eks = boto3.client('eks') try: - eks.delete_nodegroup( - clusterName=cluster_name, - nodegroupName=nodegroup_name - ) + eks.delete_nodegroup(clusterName=cluster_name, nodegroupName=nodegroup_name) except botocore.exceptions.ClientError as error: print(f'Rollback error encounter {error}') @@ -1595,10 +1475,7 @@ def delete_nodegroup(cluster_name, nodegroup_name): waiter.wait( clusterName=cluster_name, nodegroupName=nodegroup_name, - WaiterConfig={ - 'Delay': 30, - 'MaxAttempts': 40 - } + WaiterConfig={'Delay': 30, 'MaxAttempts': 40}, ) print(f'EKS Nodegroups rolled back.') @@ -1614,9 +1491,7 @@ def delete_cluster(cluster_name): eks = boto3.client('eks') try: - eks.delete_cluster( - name=cluster_name - ) + eks.delete_cluster(name=cluster_name) except botocore.exceptions.ClientError as error: print(f'Rollback error encounter {error}') @@ -1626,13 +1501,7 @@ def delete_cluster(cluster_name): waiter = eks.get_waiter('cluster_deleted') - waiter.wait( - name=cluster_name, - WaiterConfig={ - 'Delay': 30, - 'MaxAttempts': 123 - } - ) + waiter.wait(name=cluster_name, WaiterConfig={'Delay': 30, 'MaxAttempts': 123}) print(f'EKS Clusters rolled back.') @@ -1642,7 +1511,9 @@ def delete_eks_iam(cluster_role_name, nodegroup_role_name): ''' This function attempts to delete all related IAM entities for EKS (Cluster roles, Nodegroup roles, Nodegroup policies) ''' - print(f'Attempting to delete various IAM entities. IAM Roles {cluster_role_name} and {nodegroup_role_name} and IAM Policy {nodegroup_role_name}Policy.') + print( + f'Attempting to delete various IAM entities. IAM Roles {cluster_role_name} and {nodegroup_role_name} and IAM Policy {nodegroup_role_name}Policy.' + ) iam = boto3.client('iam') sts = boto3.client('sts') @@ -1653,23 +1524,21 @@ def delete_eks_iam(cluster_role_name, nodegroup_role_name): # Find and detach all policies from the Cluster Role try: - for policy in iam.list_attached_role_policies(RoleName=cluster_role_name)['AttachedPolicies']: + for policy in iam.list_attached_role_policies(RoleName=cluster_role_name)[ + 'AttachedPolicies' + ]: policyArn = str(policy['PolicyArn']) - iam.detach_role_policy( - RoleName=cluster_role_name, - PolicyArn=policyArn - ) + iam.detach_role_policy(RoleName=cluster_role_name, PolicyArn=policyArn) except botocore.exceptions.ClientError as error: print(f'Rollback error encounter {error}') # Detach all Policies from Nodegroup cluster try: - for policy in iam.list_attached_role_policies(RoleName=nodegroup_role_name)['AttachedPolicies']: + for policy in iam.list_attached_role_policies(RoleName=nodegroup_role_name)[ + 'AttachedPolicies' + ]: policyArn = str(policy['PolicyArn']) - iam.detach_role_policy( - RoleName=nodegroup_role_name, - PolicyArn=policyArn - ) + iam.detach_role_policy(RoleName=nodegroup_role_name, PolicyArn=policyArn) except botocore.exceptions.ClientError as error: print(f'Rollback error encounter {error}') @@ -1687,7 +1556,6 @@ def delete_eks_iam(cluster_role_name, nodegroup_role_name): iam.delete_role(RoleName=nodegroup_role_name) except botocore.exceptions.ClientError as error: print(f'Rollback error encounter {error}') - print(f'IAM Roles and Policies rolled back.') @@ -1705,10 +1573,7 @@ def delete_launch_template(launch_template_name): ec2 = boto3.client('ec2') try: - ec2.delete_launch_template( - DryRun=False, - LaunchTemplateName=launch_template_name - ) + ec2.delete_launch_template(DryRun=False, LaunchTemplateName=launch_template_name) except botocore.exceptions.ClientError as error: print(f'Rollback error encounter {error}') @@ -1742,13 +1607,10 @@ def delete_kms_key(kms_key_arn): kms = boto3.client('kms') try: - kms.schedule_key_deletion( - KeyId=kms_key_arn, - PendingWindowInDays=7 - ) + kms.schedule_key_deletion(KeyId=kms_key_arn, PendingWindowInDays=7) except botocore.exceptions.ClientError as error: print(f'Rollback error encounter {error}') print(f'KMS Key rolled back') - del kms \ No newline at end of file + del kms diff --git a/README.md b/README.md index 5b49537..e72c98b 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ The Amazon Elastic Kubernetes Service (EKS) Creation Engine (ECE) is a Python command-line program created by the Lightspin Office of the CISO to facilitate the creation and enablement of secure EKS Clusters, optionally further assured with continual Kubernetes Security Posture Management (KSPM), Runtime Protection, and Application Performance Monitoring (APM) capabilities. -## What is this :eyes: :eyes: ?? +## What is this :eyes: :eyes: ?? As stated above, the ECE is a Python utility to create a fully functioning EKS Cluster, complete with Nodegroups which are built off of EC2 Launch Templates as it was meant for creating EKS Nodegroups with custom AMIs with custom bootstrapping. @@ -73,10 +73,39 @@ We are happy to take contributions from anywhere that will help expand this proj - Spot provider & Fargate Profile support for Nodegroups, and an option to not use Nodegroups - Create more Plugins for various utilities (e.g., Calico, OPA, NGINX Ingress Controller, etc.) +### Basic Contributing Setup + +1. Fork the repository. +2. Clone your fork and enter the `eks-creation-engine` directory. +3. Get your Python things Python-y. + +```bash +# Add upstream +git remote add upstream https://github.com/lightspin-tech/eks-creation-engine.git + +# Create virtual env +pip3 -m venv .env --prompt ece + +# Enter virtual env +source .env/bin/activate + +# Install ECE reqs +pip3 install -r requirements.txt + +# Install pre-commit +pip3 install pre-commit + +# Ensure pre-commit runs... pre... commit +pre-commit install + +# Init the pre-commit env and run checks +pre-commit run -a +``` + ## Contact Us :telephone_receiver: :telephone_receiver: For more information, contact us at support@lightspin.io. ## License :eight_spoked_asterisk: :eight_spoked_asterisk: -This repository is available under the [Apache License 2.0](https://github.com/lightspin-tech/eks-creation-engine/blob/main/LICENSE). \ No newline at end of file +This repository is available under the [Apache License 2.0](https://github.com/lightspin-tech/eks-creation-engine/blob/main/LICENSE). diff --git a/docs/HOWTO.md b/docs/HOWTO.md index 17317a5..1382f33 100644 --- a/docs/HOWTO.md +++ b/docs/HOWTO.md @@ -347,4 +347,4 @@ For more information, contact us at support@lightspin.io. ## License :eight_spoked_asterisk: :eight_spoked_asterisk: -This repository is available under the [Apache License 2.0](https://github.com/lightspin-tech/eks-creation-engine/blob/main/LICENSE). \ No newline at end of file +This repository is available under the [Apache License 2.0](https://github.com/lightspin-tech/eks-creation-engine/blob/main/LICENSE). diff --git a/main.py b/main.py index dd00a00..3925de7 100644 --- a/main.py +++ b/main.py @@ -1,49 +1,57 @@ -#This file is part of Lightspin EKS Creation Engine. -#SPDX-License-Identifier: Apache-2.0 - -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -#http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, -#software distributed under the License is distributed on an -#"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -#KIND, either express or implied. See the License for the -#specific language governing permissions and limitations -#under the License. - +# This file is part of Lightspin EKS Creation Engine. +# SPDX-License-Identifier: Apache-2.0 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import argparse import json -import sys import re +import subprocess +import sys + import boto3 import botocore -import argparse -import subprocess -from art import text2art import termcolor -from clint.textui import colored, puts -from EksCreationEngine import ClusterManager, UpdateManager, RollbackManager +from art import text2art +from clint.textui import colored +from clint.textui import puts + +from EksCreationEngine import ClusterManager +from EksCreationEngine import RollbackManager +from EksCreationEngine import UpdateManager from plugins.ECEDatadog import DatadogSetup from plugins.ECEFalco import FalcoSetup from plugins.ECESecurity import SecurityAssessment + def print_logo(): textArt = text2art("EKS CREATION ENGINE") print(termcolor.colored(textArt, 'red')) puts(colored.red("CREATED BY THE LIGHTSPIN OFFICE OF THE CISO")) - puts(colored.red("For more information about Lightspin reach out to support@lightspin.io or visit us at https://lightspin.io")) + puts( + colored.red( + "For more information about Lightspin reach out to support@lightspin.io or visit us at https://lightspin.io" + ) + ) + def stay_dangerous(): textArt = text2art("STAY DANGEROUS") print(termcolor.colored(textArt, 'red')) puts(colored.red("With Love, the Lightspin Office of the CISO")) + def create_preflight_check(): ''' This function conducts a "preflight check" to ensure that required arguments are provided for the specified "Mode" before @@ -57,40 +65,43 @@ def create_preflight_check(): amiId = args.ami_id if amiId != 'SSM': # AMI Regex - amiRegex = re.compile('^(?:(?:ami)(?:-[a-zA-Z0-9]+)?\b|(?:[0-9]{1,3}\.){3}[0-9]{1,3})(?:\s*,\s*(?:(?:ami)(?:-[a-zA-Z0-9]+)?\b|(?:[0-9]{1,3}\.){3}[0-9]{1,3}))*$') + amiRegex = re.compile( + '^(?:(?:ami)(?:-[a-zA-Z0-9]+)?\b|(?:[0-9]{1,3}\\.){3}[0-9]{1,3})(?:\\s*,\\s*(?:(?:ami)(?:-[a-zA-Z0-9]+)?\b|(?:[0-9]{1,3}\\.){3}[0-9]{1,3}))*$' + ) # Attempt to match amiRegexCheck = amiRegex.search(amiId) if not amiRegexCheck: - print(f'Improperly AMI ID provided, does not match regex, check value and submit request again') + print( + f'Improperly AMI ID provided, does not match regex, check value and submit request again' + ) sys.exit(2) # Check if an EKS Cluster exists for provided name try: - eks.describe_cluster( - name=clusterName - ) + eks.describe_cluster(name=clusterName) except botocore.exceptions.ClientError as error: # If we have an "ResourceNotFoundException" error it means the cluster doesnt exist - which is what we want if error.response['Error']['Code'] == 'ResourceNotFoundException': pass else: - print(f'An EKS Cluster with the name {clusterName} already exists. Please specify another name and try again') + print( + f'An EKS Cluster with the name {clusterName} already exists. Please specify another name and try again' + ) sys.exit(2) - + # Check if an EKS Nodegroup exists for provided name try: - eks.describe_nodegroup( - clusterName=clusterName, - nodegroupName=nodegroupName - ) + eks.describe_nodegroup(clusterName=clusterName, nodegroupName=nodegroupName) except botocore.exceptions.ClientError as error: # If we have an "ResourceNotFoundException" error it means the cluster/nodegroup doesnt exist - which is what we want if error.response['Error']['Code'] == 'ResourceNotFoundException': pass else: - print(f'An EKS Nodegroup with the name {nodegroupName} already exists. Please specify another name and try again') + print( + f'An EKS Nodegroup with the name {nodegroupName} already exists. Please specify another name and try again' + ) sys.exit(2) - + # Check for a provided VPC if vpcId == None: print(f'VPC ID is required for cluster creation. Please specify a VPC ID and try again.') @@ -106,13 +117,17 @@ def create_preflight_check(): # Ensure a S3 Bucket was provided if MDE installation is true if installMdeOnNodes == 'True': if bucketName == None: - print(f'S3 Bucket name was not provided. Please provide a valid S3 Bucket and try again') + print( + f'S3 Bucket name was not provided. Please provide a valid S3 Bucket and try again' + ) sys.exit(2) # Ensure a Datadog API key is provided if Datadog installation is true if datadogBool == 'True': if datadogApiKey == None: - print(f'Datadog setup was specified but a Datadog API was not provided. Please provide a valid API key and try again.') + print( + f'Datadog setup was specified but a Datadog API was not provided. Please provide a valid API key and try again.' + ) sys.exit(2) # Print out creation specification - in the future this will be a "state file" for the cluster @@ -139,16 +154,11 @@ def create_preflight_check(): 'AmiArhcitecture': amiArchitecture, 'DatadogApiKey': datadogApiKey, 'InstallDatadog?': datadogBool, - 'AdditionalAuthorizedPrincipals': additionalAuthZPrincipals + 'AdditionalAuthorizedPrincipals': additionalAuthZPrincipals, } print(f'The following attributes are set for your EKS Cluster') - print( - json.dumps( - specDict, - indent=4 - ) - ) + print(json.dumps(specDict, indent=4)) # TODO: Save state? del specDict @@ -176,11 +186,12 @@ def create_preflight_check(): ami_architecture=amiArchitecture, datadog_api_key=datadogApiKey, datadog_bool=datadogBool, - addtl_auth_principals=additionalAuthZPrincipals + addtl_auth_principals=additionalAuthZPrincipals, ) stay_dangerous() + def delete_preflight_check(): print_logo() @@ -190,23 +201,23 @@ def delete_preflight_check(): cluster_role_name=clusterRoleName, nodegroup_name=nodegroupName, nodegroup_role_name=nodegroupRoleName, - launch_template_name=launchTemplateName + launch_template_name=launchTemplateName, ) stay_dangerous() + def update_preflight_check(): print_logo() # Call the `update_kubernetes_version` function and attempt to version bump K8s of Clusters & Nodes UpdateManager.update_kubernetes_version( - cluster_name=clusterName, - kubernetes_version=k8sVersion, - nodegroup_name=nodegroupName + cluster_name=clusterName, kubernetes_version=k8sVersion, nodegroup_name=nodegroupName ) stay_dangerous() + def assessment_preflight_check(): ''' This function conducts a "preflight check" to ensure that required arguments are provided for the specified "Mode" before @@ -217,13 +228,13 @@ def assessment_preflight_check(): eks = boto3.client('eks') # Check if an EKS Cluster exists for provided name try: - eks.describe_cluster( - name=clusterName - ) + eks.describe_cluster(name=clusterName) except botocore.exceptions.ClientError as error: # If we have an "ResourceNotFoundException" error it means the cluster doesnt exist - which is what we want if error.response['Error']['Code'] == 'ResourceNotFoundException': - print(f'An EKS Cluster with the name {clusterName} does not exist. Please specify another name and try again') + print( + f'An EKS Cluster with the name {clusterName} does not exist. Please specify another name and try again' + ) sys.exit(2) else: pass @@ -232,21 +243,20 @@ def assessment_preflight_check(): url = 'https://raw.githubusercontent.com/aquasecurity/kube-bench/main/job-eks.yaml' wgetCommand = f'wget {url}' - subProc = subprocess.run(wgetCommand, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + subProc = subprocess.run(wgetCommand, shell=True, capture_output=True) print(subProc.stderr.decode('utf-8')) print(f'Installing Trivy from source script for v0.24') # TODO: Continual updates of Trivy version https://aquasecurity.github.io/trivy trivyCmd = 'curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sudo sh -s -- -b /usr/local/bin v0.24.0' - trivyProc = subprocess.run(trivyCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + trivyProc = subprocess.run(trivyCmd, shell=True, capture_output=True) print(trivyProc.stdout.decode('utf-8')) - SecurityAssessment.start_assessment( - cluster_name=clusterName - ) + SecurityAssessment.start_assessment(cluster_name=clusterName) stay_dangerous() + def setup_falco_preflight_check(): ''' This function conducts a "preflight check" to ensure that required arguments are provided for the specified "Mode" before @@ -258,32 +268,36 @@ def setup_falco_preflight_check(): eks = boto3.client('eks') # Check if an EKS Cluster exists for provided name try: - eks.describe_cluster( - name=clusterName - ) + eks.describe_cluster(name=clusterName) except botocore.exceptions.ClientError as error: # If we have an "ResourceNotFoundException" error it means the cluster doesnt exist - which is what we want if error.response['Error']['Code'] == 'ResourceNotFoundException': - print(f'An EKS Cluster with the name {clusterName} does not exist. Please specify another name and try again') + print( + f'An EKS Cluster with the name {clusterName} does not exist. Please specify another name and try again' + ) sys.exit(2) else: pass - + if mode == 'SetupFalco': if falcoDestType == 'Slack' or falcoDestType == 'Teams': if falcoDest == None: - print(f'No destination was provided for "--falco_sidekick_destination_type", please try again.') + print( + f'No destination was provided for "--falco_sidekick_destination_type", please try again.' + ) sys.exit(2) elif falcoDestType == 'Datadog': if datadogApiKey == None: - print(f'Datadog destination for Falco was specified but a Datadog API was not provided. Please provide a valid API key and try again.') - sys.exit(2) + print( + f'Datadog destination for Falco was specified but a Datadog API was not provided. Please provide a valid API key and try again.' + ) + sys.exit(2) FalcoSetup.falco_initialization( cluster_name=clusterName, falco_mode='Create', falco_sidekick_destination_type=falcoDestType, - falco_sidekick_destination=falcoDest + falco_sidekick_destination=falcoDest, ) stay_dangerous() elif mode == 'RemoveFalco': @@ -292,13 +306,14 @@ def setup_falco_preflight_check(): falco_mode='Delete', falco_sidekick_destination_type=falcoDestType, falco_sidekick_destination=falcoDest, - datadog_api_key=datadogApiKey + datadog_api_key=datadogApiKey, ) stay_dangerous() else: print(f'Somehow, an incompatible mode detected for Falco, please try again.') sys.exit(2) + def setup_datadog_preflight_check(): ''' This function conducts a "preflight check" to ensure that required arguments are provided for the specified "Mode" before @@ -309,33 +324,31 @@ def setup_datadog_preflight_check(): eks = boto3.client('eks') # Check if an EKS Cluster exists for provided name try: - eks.describe_cluster( - name=clusterName - ) + eks.describe_cluster(name=clusterName) except botocore.exceptions.ClientError as error: # If we have an "ResourceNotFoundException" error it means the cluster doesnt exist - which is what we want if error.response['Error']['Code'] == 'ResourceNotFoundException': - print(f'An EKS Cluster with the name {clusterName} does not exist. Please specify another name and try again') + print( + f'An EKS Cluster with the name {clusterName} does not exist. Please specify another name and try again' + ) sys.exit(2) else: pass if mode == 'SetupDatadog': if datadogApiKey == None: - print(f'Datadog setup was specified but a Datadog API was not provided. Please provide a valid API key and try again.') + print( + f'Datadog setup was specified but a Datadog API was not provided. Please provide a valid API key and try again.' + ) sys.exit(2) # Datadoggy time! DatadogSetup.initialization( - cluster_name=clusterName, - datadog_mode='Setup', - datadog_api_key=datadogApiKey + cluster_name=clusterName, datadog_mode='Setup', datadog_api_key=datadogApiKey ) elif mode == 'RemoveDatadog': # Bye Datadoggy time! DatadogSetup.initialization( - cluster_name=clusterName, - datadog_mode='Remove', - datadog_api_key=datadogApiKey + cluster_name=clusterName, datadog_mode='Remove', datadog_api_key=datadogApiKey ) else: print(f'Somehow, an incompatible mode detected for Datadog, please try again.') @@ -343,8 +356,9 @@ def setup_datadog_preflight_check(): stay_dangerous() + if __name__ == "__main__": - # Feed all of the arguments + # Feed all of the arguments ''' >> argparse argument | **kwargs << --profile | profile @@ -380,92 +394,101 @@ def setup_datadog_preflight_check(): '--profile', help='Specify Profile name if multiple profiles are used', required=False, - default=[] + default=[], ) # --mode parser.add_argument( '--mode', help='Create, Destory or Update an existing Cluster. Updates limited to K8s Version bump. Destroy attempts to delete everything that this utility creates. Assessment will attempt to run various K8s security tools. SetupFalco will attempt to install Falco on existing Clusters. RemoveFalco will attempt to rollback SetupFalco deployments. SetupDatadog will attempt to install DataDog on existing Cluster. RemoveDatadog will attempt to rollback SetupDatadog deployments - defaults to Create', required=False, - choices=['Create', 'Destroy', 'Update', 'Assessment', 'SetupFalco', 'RemoveFalco', 'SetupDatadog', 'RemoveDatadog'], - default='Create' + choices=[ + 'Create', + 'Destroy', + 'Update', + 'Assessment', + 'SetupFalco', + 'RemoveFalco', + 'SetupDatadog', + 'RemoveDatadog', + ], + default='Create', ) # --k8s_version parser.add_argument( '--k8s_version', help='Version of K8s to use for EKS - defaults to 1.21 as of 13 JAN 2022 - used for Create and Update', required=False, - default='1.21' + default='1.21', ) # --s3_bucket_name parser.add_argument( '--s3_bucket_name', help='S3 Bucket with required artifacts for EKS to access for bootstrapping if --mde_on_nodes=True - used for Create', required=False, - default=None + default=None, ) # --ebs_volume_size parser.add_argument( '--ebs_volume_size', help='EBS volume size (in GB) for EKS nodegroup EC2 launch template - used for Create', required=False, - default='20' + default='20', ) # --ami parser.add_argument( '--ami_id', help='Custom AMI ID for EKS nodegroup EC2 launch template. Defaults to "SSM" which tells the program to use an SSM-derived image for your K8s version matching --ami_os and --ami_architecture - used for Create', required=False, - default='SSM' + default='SSM', ) # --instance_type parser.add_argument( '--instance_type', help='EC2 Instance type for EKS nodegroup EC2 launch template', required=False, - default='t3.medium' + default='t3.medium', ) # --cluster_name parser.add_argument( '--cluster_name', help='Name for your EKS Cluster - used for Create, Delete and Update', required=False, - default='LightspinECECluster' + default='LightspinECECluster', ) # --cluster_role_name parser.add_argument( '--cluster_role_name', help='Name for your EKS Cluster Service IAM Role', required=False, - default='ClusterServiceRoleForEKS' + default='ClusterServiceRoleForEKS', ) # --nodegroup_name parser.add_argument( '--nodegroup_name', help='Name for your EKS Nodegroup - used for Create, Delete and Update', required=False, - default='LightspinECENodegroup' + default='LightspinECENodegroup', ) # --nodegroup_role_name parser.add_argument( '--nodegroup_role_name', help='Name for your EKS Nodegroup Service IAM Role (also given to policy)', required=False, - default='NodegroupServiceRoleForEKS' + default='NodegroupServiceRoleForEKS', ) # --launch_template_name parser.add_argument( '--launch_template_name', help='Name for your Nodegroup EC2 launch template - used for Create and Delete', required=False, - default='LightspinECECustomEKSAMI' + default='LightspinECECustomEKSAMI', ) # --vpcid parser.add_argument( '--vpcid', help='VPC ID to launch EKS Cluster and Nodegroups into', required=False, - default=None + default=None, ) # --subnets # for help https://www.kite.com/python/answers/how-to-pass-a-list-as-an-argument-using-argparse-in-python @@ -473,14 +496,14 @@ def setup_datadog_preflight_check(): '--subnets', nargs='+', help='Subnets to launch EKS Cluster and Nodegroups into - provide subnet IDs separated by spaces only', - required=False + required=False, ) # --node_count parser.add_argument( '--node_count', help='Amount of Nodes (EC2 instances) in EKS Nodegroup, will be used for min and desired values with 2 times for max - default 2', required=False, - default='2' + default='2', ) # --mde_on_nodes parser.add_argument( @@ -488,7 +511,7 @@ def setup_datadog_preflight_check(): help='Whether or not to install MDE on EKS Nodes via bootstrap - requires S3 Bucket and install scripts if true - defaults to False', required=False, choices=['True', 'False'], - default='False' + default='False', ) # --additional_ports # for help https://www.kite.com/python/answers/how-to-pass-a-list-as-an-argument-using-argparse-in-python @@ -496,7 +519,7 @@ def setup_datadog_preflight_check(): '--additional_ports', nargs='+', help='Additional application ports which need to be allowed in EKS Security Groups - 443, 53, 8765, 2801, and 10250 already included', - required=False + required=False, ) # --falco parser.add_argument( @@ -504,7 +527,7 @@ def setup_datadog_preflight_check(): help='For CREATE Mode, this flag specifies if you want to install and configure Falco on your Clusters - defaults to False', required=False, choices=['True', 'False'], - default='False' + default='False', ) # --falco_sidekick_destination_type parser.add_argument( @@ -512,14 +535,14 @@ def setup_datadog_preflight_check(): help='The output location for Falco Sidekick to send Falco alerts to. Defaults to SNS which also creates a new Topic unless a Destination is provided', required=False, choices=['SNS', 'Slack', 'Teams', 'Datadog'], - default='SNS' + default='SNS', ) # --falco_sidekick_destination parser.add_argument( '--falco_sidekick_destination', help='The logical location matching the Sidekick Destination Type to forward Falco alerts. E.g., ARN, Webhook URL, Datadog URL, etc.', required=False, - default=None + default=None, ) # --ami_os parser.add_argument( @@ -527,7 +550,7 @@ def setup_datadog_preflight_check(): help='If using "SSM" for --ami use this argument to specify what OS you want to use (alas = Amazon Linux 2, ubuntu = Ubuntu 20.04) - defaults to ubuntu', required=False, choices=['alas', 'ubuntu'], - default='ubuntu' + default='ubuntu', ) # --ami_architecture parser.add_argument( @@ -535,7 +558,7 @@ def setup_datadog_preflight_check(): help='If using "SSM" for --ami use this argument to specify what architecture you want to use - defaults to amd64', required=False, choices=['amd64', 'arm64'], - default='amd64' + default='amd64', ) # --datadog parser.add_argument( @@ -543,14 +566,14 @@ def setup_datadog_preflight_check(): help='For CREATE Mode, this flag specifies if you want to install and configure Datadog APM on your Clusters - defaults to False', required=False, choices=['True', 'False'], - default='False' + default='False', ) # --datadog_api_key parser.add_argument( '--datadog_api_key', help='Datadog API Key. This is used for setting up Datadog with Create and SetupDatadog Modes as well as Datadog integration for FalcoSidekick', required=False, - default=None + default=None, ) # addtl_auth_principals # for help https://www.kite.com/python/answers/how-to-pass-a-list-as-an-argument-using-argparse-in-python @@ -558,7 +581,7 @@ def setup_datadog_preflight_check(): '--addtl_auth_principals', nargs='+', help='Additional IAM Role ARNs to authorized as system:masters', - required=False + required=False, ) args = parser.parse_args() @@ -611,4 +634,4 @@ def setup_datadog_preflight_check(): setup_datadog_preflight_check() else: print(f'Somehow you provided an unexpected arguement, exiting!') - sys.exit(2) \ No newline at end of file + sys.exit(2) diff --git a/plugins/ECEDatadog.py b/plugins/ECEDatadog.py index 8caba57..65108e2 100644 --- a/plugins/ECEDatadog.py +++ b/plugins/ECEDatadog.py @@ -1,30 +1,27 @@ -#This file is part of Lightspin EKS Creation Engine. -#SPDX-License-Identifier: Apache-2.0 - -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the +# This file is part of Lightspin EKS Creation Engine. +# SPDX-License-Identifier: Apache-2.0 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the #'License'); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -#http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, -#software distributed under the License is distributed on an +# with the License. You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an #'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -#KIND, either express or implied. See the License for the -#specific language governing permissions and limitations -#under the License. - +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. import subprocess ''' This Class manages deployment of Datadog onto an EKS Cluster and rollbacks / manual deletions ''' -class DatadogSetup(): + +class DatadogSetup: def initialization(cluster_name, datadog_mode, datadog_api_key): ''' This function controls initialization of the DatadogSetup Class. It will control installs, deletions, and rollbacks @@ -44,15 +41,21 @@ def install_datadog(datadog_api_key): # Use subprocess to add Datadog Charts using Helm print(f'Adding Datadog Helm Charts') - datadogHelmChartAddCmd = 'helm repo add datadog https://helm.datadoghq.com && helm repo update' - datadogHelmChartAddSubprocess = subprocess.run(datadogHelmChartAddCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + datadogHelmChartAddCmd = ( + 'helm repo add datadog https://helm.datadoghq.com && helm repo update' + ) + datadogHelmChartAddSubprocess = subprocess.run( + datadogHelmChartAddCmd, shell=True, capture_output=True + ) datadogHelmChartAddMsg = str(datadogHelmChartAddSubprocess.stdout.decode('utf-8')) print(datadogHelmChartAddMsg) # Use subprocess to configure Datadog per initiation arguments from main.py print(f'Installing Datadog') installDatadogCmd = f'helm install datadog-agent --set targetSystem=linux --set datadog.apiKey={datadog_api_key} datadog/datadog' - installDatadogSubprocess = subprocess.run(installDatadogCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + installDatadogSubprocess = subprocess.run( + installDatadogCmd, shell=True, capture_output=True + ) installDatadogMsg = str(installDatadogSubprocess.stdout.decode('utf-8')) print(installDatadogMsg) @@ -63,6 +66,6 @@ def uninstall_datadog(): # Uninstall Datadog from EKS datadogRemoveCmd = 'helm uninstall datadog-agent' - datadogRemoveSubprocess = subprocess.run(datadogRemoveCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + datadogRemoveSubprocess = subprocess.run(datadogRemoveCmd, shell=True, capture_output=True) datadogRemoveMsg = str(datadogRemoveSubprocess.stdout.decode('utf-8')) - print(datadogRemoveMsg) \ No newline at end of file + print(datadogRemoveMsg) diff --git a/plugins/ECEFalco.py b/plugins/ECEFalco.py index c148cda..5b838c2 100644 --- a/plugins/ECEFalco.py +++ b/plugins/ECEFalco.py @@ -1,36 +1,40 @@ -#This file is part of Lightspin EKS Creation Engine. -#SPDX-License-Identifier: Apache-2.0 - -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the +# This file is part of Lightspin EKS Creation Engine. +# SPDX-License-Identifier: Apache-2.0 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the #'License'); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -#http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, -#software distributed under the License is distributed on an +# with the License. You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an #'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -#KIND, either express or implied. See the License for the -#specific language governing permissions and limitations -#under the License. - +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import json +import subprocess import sys +from datetime import datetime + import boto3 import botocore.exceptions -import json -from datetime import datetime -import subprocess ''' This Class manages an end-to-end deployment of Falco and FalcoSidekick to EKS using Helm. This class can be called from ClusterManager (if flag is set) or called independently to setup Falco ''' -class FalcoSetup(): - def falco_initialization(cluster_name, falco_mode, falco_sidekick_destination_type, falco_sidekick_destination, datadog_api_key): + +class FalcoSetup: + def falco_initialization( + cluster_name, + falco_mode, + falco_sidekick_destination_type, + falco_sidekick_destination, + datadog_api_key, + ): ''' This function handles configuration of Falco and FalcoSidekick on a Cluster, whether in-line of an ECE Create or ECE SetupFalco `--mode` from main.py Depending on the destination configuration and mode, this function will either schedule deletion or creation of additional infrastructure and issue Helm @@ -51,8 +55,7 @@ def falco_initialization(cluster_name, falco_mode, falco_sidekick_destination_ty # and add them to the static list above if they are not already there for nodegroup in eks.list_nodegroups(clusterName=cluster_name)['nodegroups']: nodeRoleArn = eks.describe_nodegroup( - clusterName=cluster_name, - nodegroupName=nodegroup + clusterName=cluster_name, nodegroupName=nodegroup )['nodegroup']['nodeRole'] if nodeRoleArn not in roleArns: roleArns.append(nodeRoleArn) @@ -65,37 +68,35 @@ def falco_initialization(cluster_name, falco_mode, falco_sidekick_destination_ty topicArn = FalcoSetup.falco_sidekick_sns_iam_generator( cluster_name=cluster_name, falco_sidekick_destination=falco_sidekick_destination, - role_arns=roleArns + role_arns=roleArns, ) # Install Falco # All commands for FalcoSidekick come from the Helm vars spec in the chart GitHub repo: https://github.com/falcosecurity/charts/tree/master/falcosidekick#configuration falcoHelmCmd = f'helm install falco falcosecurity/falco --set falcosidekick.enabled=true --set falcosidekick.webui.enabled=false --set falcosidekick.config.aws.sns.topicarn={topicArn}' - FalcoSetup.install_falco( - falco_install_command=falcoHelmCmd - ) + FalcoSetup.install_falco(falco_install_command=falcoHelmCmd) elif falco_sidekick_destination_type == 'Slack': - print(f'Configuring Falco and FalcoSidekick to send runtime alerts to Slack Webhook {falco_sidekick_destination}') - + print( + f'Configuring Falco and FalcoSidekick to send runtime alerts to Slack Webhook {falco_sidekick_destination}' + ) + # Install Falco falcoHelmCmd = f'helm install falco falcosecurity/falco --set falcosidekick.enabled=true --set falcosidekick.webui.enabled=false --set falcosidekick.config.slack.webhookurl={falco_sidekick_destination}' - FalcoSetup.install_falco( - falco_install_command=falcoHelmCmd - ) + FalcoSetup.install_falco(falco_install_command=falcoHelmCmd) elif falco_sidekick_destination_type == 'Teams': - print(f'Configuring Falco and FalcoSidekick to send runtime alerts to Teams Webhook {falco_sidekick_destination}') + print( + f'Configuring Falco and FalcoSidekick to send runtime alerts to Teams Webhook {falco_sidekick_destination}' + ) # Install Falco falcoHelmCmd = f'helm install falco falcosecurity/falco --set falcosidekick.enabled=true --set falcosidekick.webui.enabled=false --set falcosidekick.config.teams.webhookurl={falco_sidekick_destination}' - FalcoSetup.install_falco( - falco_install_command=falcoHelmCmd - ) + FalcoSetup.install_falco(falco_install_command=falcoHelmCmd) elif falco_sidekick_destination_type == 'Datadog': - print(f'Configuring Falco and FalcoSidekick to send runtime alerts to Datadog Host {falco_sidekick_destination}') + print( + f'Configuring Falco and FalcoSidekick to send runtime alerts to Datadog Host {falco_sidekick_destination}' + ) # Install Falco falcoHelmCmd = f'helm install falco falcosecurity/falco --set falcosidekick.enabled=true --set falcosidekick.webui.enabled=false --set falcosidekick.config.datadog.host={falco_sidekick_destination} --set falcosidekick.config.datadog.apikey={datadog_api_key}' - FalcoSetup.install_falco( - falco_install_command=falcoHelmCmd - ) + FalcoSetup.install_falco(falco_install_command=falcoHelmCmd) else: print(f'Unsupported destination type provided, exiting') sys.exit(2) @@ -116,8 +117,7 @@ def falco_sidekick_sns_iam_generator(cluster_name, falco_sidekick_destination, r # If the value for 'falco_sidekick_destination' is None, that means a SNS topic was not provided and needs to be setup if falco_sidekick_destination == None: topicArn = FalcoSetup.falco_sidekick_sns_creation( - cluster_name=cluster_name, - role_arns=role_arns + cluster_name=cluster_name, role_arns=role_arns ) else: topicArn = falco_sidekick_destination @@ -139,14 +139,10 @@ def falco_sidekick_sns_iam_generator(cluster_name, falco_sidekick_destination, r { 'Sid': 'Snssid', 'Effect': 'Allow', - 'Action': [ - 'sns:Publish', - 'sns:GetTopicAttributes', - 'sns:ListTopics' - ], - 'Resource': [topicArn] + 'Action': ['sns:Publish', 'sns:GetTopicAttributes', 'sns:ListTopics'], + 'Resource': [topicArn], } - ] + ], } policyName = f'{cluster_name}FalcoSidekick-SNSPublishPolicy' @@ -158,23 +154,11 @@ def falco_sidekick_sns_iam_generator(cluster_name, falco_sidekick_destination, r PolicyDocument=json.dumps(iamPolicyDoc), Description=f'Allows EKS Cluster {cluster_name} and Nodegroups to send Falco alerts to SNS - Created by Lightspin ECE', Tags=[ - { - 'Key': 'Name', - 'Value': policyName - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {'Key': 'Name', 'Value': policyName}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, + ], ) policyArn = f'arn:aws:iam::{acctId}:policy/{policyName}' except botocore.exceptions.ClientError as error: @@ -184,10 +168,7 @@ def falco_sidekick_sns_iam_generator(cluster_name, falco_sidekick_destination, r for role in roleArns: roleName = role.split('/')[1] try: - iam.attach_role_policy( - RoleName=roleName, - PolicyArn=policyArn - ) + iam.attach_role_policy(RoleName=roleName, PolicyArn=policyArn) except botocore.exceptions.ClientError as error: print(f'Error encountered: {error}') FalcoSetup.falco_setup_rollback(cluster_name=cluster_name) @@ -229,27 +210,13 @@ def falco_sidekick_sns_creation(cluster_name, role_arns): try: topicArn = sns.create_topic( Name=topicName, - Attributes={ - 'DisplayName': topicName - }, + Attributes={'DisplayName': topicName}, Tags=[ - { - 'Key': 'Name', - 'Value': topicName - }, - { - 'Key': 'CreatedBy', - 'Value': createdBy - }, - { - 'Key': 'CreatedAt', - 'Value': createdAt - }, - { - 'Key': 'CreatedWith', - 'Value': 'Lightspin ECE' - } - ] + {'Key': 'Name', 'Value': topicName}, + {'Key': 'CreatedBy', 'Value': createdBy}, + {'Key': 'CreatedAt', 'Value': createdAt}, + {'Key': 'CreatedWith', 'Value': 'Lightspin ECE'}, + ], )['TopicArn'] except botocore.exceptions.ClientError as error: print(f'Error encountered: {error}') @@ -257,40 +224,32 @@ def falco_sidekick_sns_creation(cluster_name, role_arns): # Create a SNS Topic Policy Doc to pass in as an SNS Attribute topicPolicyJson = { - 'Version':'2008-10-17', - 'Id':'ecepolicy', - 'Statement':[ + 'Version': '2008-10-17', + 'Id': 'ecepolicy', + 'Statement': [ { - 'Sid':'ecesid-pub', - 'Effect':'Allow', - 'Principal':{ - 'AWS': roleArns - }, - 'Action':['SNS:Publish'], - 'Resource': topicArn + 'Sid': 'ecesid-pub', + 'Effect': 'Allow', + 'Principal': {'AWS': roleArns}, + 'Action': ['SNS:Publish'], + 'Resource': topicArn, }, { - 'Sid':'ecesid-sub', - 'Effect':'Allow', - 'Principal':{ - 'AWS':'*' - }, - 'Action':['SNS:Subscribe'], + 'Sid': 'ecesid-sub', + 'Effect': 'Allow', + 'Principal': {'AWS': '*'}, + 'Action': ['SNS:Subscribe'], 'Resource': topicArn, - 'Condition':{ - 'StringEquals':{ - 'AWS:SourceOwner': acctId - } - } - } - ] + 'Condition': {'StringEquals': {'AWS:SourceOwner': acctId}}, + }, + ], } try: sns.set_topic_attributes( TopicArn=topicArn, AttributeName='Policy', - AttributeValue=json.dumps(topicPolicyJson) + AttributeValue=json.dumps(topicPolicyJson), ) except botocore.exceptions.ClientError as error: print(f'Error encountered: {error}') @@ -314,15 +273,19 @@ def install_falco(falco_install_command): # Use subprocess to add Falco Charts using Helm print(f'Adding Falco Helm Charts') - falcoHelmChartAddCmd = 'helm repo add falcosecurity https://falcosecurity.github.io/charts && helm repo update' - falcoHelmChartAddSubprocess = subprocess.run(falcoHelmChartAddCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + falcoHelmChartAddCmd = ( + 'helm repo add falcosecurity https://falcosecurity.github.io/charts && helm repo update' + ) + falcoHelmChartAddSubprocess = subprocess.run( + falcoHelmChartAddCmd, shell=True, capture_output=True + ) falcoHelmChartAddMsg = str(falcoHelmChartAddSubprocess.stdout.decode('utf-8')) print(falcoHelmChartAddMsg) # Use subprocess to configure Falco and FalcoSidekick per initiation arguments from main.py print(f'Installing Falco and FalcoSidekick') installFalcoCmd = falco_install_command - installFalcoSubprocess = subprocess.run(installFalcoCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + installFalcoSubprocess = subprocess.run(installFalcoCmd, shell=True, capture_output=True) installFalcoMsg = str(installFalcoSubprocess.stdout.decode('utf-8')) print(installFalcoMsg) @@ -349,17 +312,12 @@ def falco_setup_rollback(cluster_name): # If an IAM Policy for SNS was created, attempt to detach it before deletion try: rolesAttachedToPolicy = iam.list_entities_for_policy( - PolicyArn=policyArn, - EntityFilter='Role', - PolicyUsageFilter='PermissionsPolicy' + PolicyArn=policyArn, EntityFilter='Role', PolicyUsageFilter='PermissionsPolicy' )['PolicyRoles'] if rolesAttachedToPolicy: for role in rolesAttachedToPolicy: roleName = str(role['RoleName']) - iam.detach_role_policy( - RoleName=roleName, - PolicyArn=policyArn - ) + iam.detach_role_policy(RoleName=roleName, PolicyArn=policyArn) except botocore.exceptions.ClientError as error: print(error) except KeyError as ke: @@ -367,9 +325,7 @@ def falco_setup_rollback(cluster_name): # If an IAM Policy for SNS was created, attempt to delete it try: - iam.delete_policy( - PolicyArn=policyArn - ) + iam.delete_policy(PolicyArn=policyArn) print(f'Falco SNS Policy {policyArn} deleted') except botocore.exceptions.ClientError as error: print(error) @@ -383,9 +339,9 @@ def falco_setup_rollback(cluster_name): # Uninstall Falco from EKS falcoRemoveCmd = 'helm uninstall falco' - falcoRemoveSubprocess = subprocess.run(falcoRemoveCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + falcoRemoveSubprocess = subprocess.run(falcoRemoveCmd, shell=True, capture_output=True) falcoRemoveMsg = str(falcoRemoveSubprocess.stdout.decode('utf-8')) print(falcoRemoveMsg) print(f'Falco rollback complete.') - sys.exit(2) \ No newline at end of file + sys.exit(2) diff --git a/plugins/ECESecurity.py b/plugins/ECESecurity.py index 6958254..40f2b54 100644 --- a/plugins/ECESecurity.py +++ b/plugins/ECESecurity.py @@ -1,34 +1,32 @@ -#This file is part of Lightspin EKS Creation Engine. -#SPDX-License-Identifier: Apache-2.0 - -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the +# This file is part of Lightspin EKS Creation Engine. +# SPDX-License-Identifier: Apache-2.0 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the #'License'); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -#http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, -#software distributed under the License is distributed on an +# with the License. You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an #'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -#KIND, either express or implied. See the License for the -#specific language governing permissions and limitations -#under the License. - -import boto3 +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. import json -import time -import subprocess import re +import subprocess +import time + +import boto3 ''' This Class manages various security assessment functions - such as running and saving Kube-bench CIS benchmarking and Trivy container scanning ''' -class SecurityAssessment(): + +class SecurityAssessment: def start_assessment(cluster_name): ''' This function serves as the 'brain' of the security assessment. It will modify the Kubeconfig and attempt to run the other assessments @@ -40,8 +38,10 @@ def start_assessment(cluster_name): session = boto3.session.Session() awsRegion = session.region_name - updateKubeconfigCmd = f'aws eks update-kubeconfig --region {awsRegion} --name {cluster_name}' - updateKubeconfigProc = subprocess.run(updateKubeconfigCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + updateKubeconfigCmd = ( + f'aws eks update-kubeconfig --region {awsRegion} --name {cluster_name}' + ) + updateKubeconfigProc = subprocess.run(updateKubeconfigCmd, shell=True, capture_output=True) print(updateKubeconfigProc.stdout.decode('utf-8')) trivySarif = SecurityAssessment.run_trivy() @@ -52,7 +52,7 @@ def start_assessment(cluster_name): sarifBase = { '$schema': 'https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json', 'version': '2.1.0', - 'runs': [] + 'runs': [], } for runs in trivySarif: @@ -64,7 +64,9 @@ def start_assessment(cluster_name): with open('./ECE_SecurityAssessment.sarif', 'w') as jsonfile: json.dump(sarifBase, jsonfile, indent=4, default=str) - print(f'Assessments completed and SARIF document created successfully as "ECE_SecurityAssessment.sarif".') + print( + f'Assessments completed and SARIF document created successfully as "ECE_SecurityAssessment.sarif".' + ) def run_trivy(): ''' @@ -75,12 +77,12 @@ def run_trivy(): trivyFindings = [] print(f'Running Trivy') - + # Retrieve a list of all running Containers and create a unique list of them to pass to Trivy for scanning print(f'Retrieving list of all running Containers from your EKS Cluster') command = 'kubectl get pods --all-namespaces -o json | jq --raw-output ".items[].spec.containers[].image"' - sub = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + sub = subprocess.run(command, shell=True, capture_output=True) # pull list of container URIs from kubectl strList = str(sub.stdout.decode('utf-8')) # split by newline, as that is how it is retruned @@ -92,7 +94,7 @@ def run_trivy(): pass else: uniqueContainers.append(i) - + totalUniques = str(len(uniqueContainers)) if totalUniques == '1': print(f'Trivy will scan {totalUniques} unique container image') @@ -106,7 +108,7 @@ def run_trivy(): for c in uniqueContainers: # passing '--quiet' will ensure the setup text from Trivy scanning does not make it into the JSON and corrupt it trivyScanCmd = f'trivy --quiet image --format sarif {c}' - trivyScanSubprocess = subprocess.run(trivyScanCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + trivyScanSubprocess = subprocess.run(trivyScanCmd, shell=True, capture_output=True) trivyStdout = str(trivyScanSubprocess.stdout.decode('utf-8')) # load JSON object from stdout jsonItem = json.loads(trivyStdout) @@ -139,7 +141,7 @@ def run_kube_bench(cluster_name): # Schedule the Job onto your EKS Cluster command = 'kubectl apply -f job-eks.yaml' - runJobSubproc = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + runJobSubproc = subprocess.run(command, shell=True, capture_output=True) print(runJobSubproc.stdout.decode('utf-8')) time.sleep(1.5) @@ -149,7 +151,7 @@ def run_kube_bench(cluster_name): # Really bad Regex hack to exit the `while True` loop - fuzzy match the stdout message completionRegex = re.compile('job.batch/kube-bench condition met') while True: - jobWaitSubproc = subprocess.run(jobWaitCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + jobWaitSubproc = subprocess.run(jobWaitCmd, shell=True, capture_output=True) jobWaitMessage = str(jobWaitSubproc.stdout.decode('utf-8')) completionRegexCheck = completionRegex.search(jobWaitMessage) if completionRegexCheck: @@ -162,13 +164,15 @@ def run_kube_bench(cluster_name): # `getPodCmd` used Kubectl to get pod names in all namespaces (-A). cut -d/ -f2 command is to split by the '/' and get the name # grep is used to ensure the right pod name is pulled as it always ends with a random 5 character hex (ex. kube-bench-z6r4b) getPodCmd = 'kubectl get pods -o name -A | cut -d/ -f2 | grep kube-bench' - getPodSubproc = subprocess.run(getPodCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + getPodSubproc = subprocess.run(getPodCmd, shell=True, capture_output=True) # decoding adds newline or blank spaces - attempt to trim them - kubebenchPodName = str(getPodSubproc.stdout.decode('utf-8')).replace('\n', '').replace(' ', '') + kubebenchPodName = ( + str(getPodSubproc.stdout.decode('utf-8')).replace('\n', '').replace(' ', '') + ) # Pull logs from Job - this is the actual results of the job getLogsCmd = f'kubectl logs {kubebenchPodName}' - getLogsSubproc = subprocess.run(getLogsCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + getLogsSubproc = subprocess.run(getLogsCmd, shell=True, capture_output=True) getLogsStdout = str(getLogsSubproc.stdout.decode('utf-8')) # Split the block of text from STDOUT by newline delimiters to create a new list splitter = getLogsStdout.split('\n') @@ -176,7 +180,7 @@ def run_kube_bench(cluster_name): # Use regex to match the Kube-Bench findings, they always start with a '[' which contains info such as '[PASS]'. We then match anything with 2 periods # as Kube-bench outputs 'headers' such as 3 or 3.1 - we want results such as '[PASS] 3.1.3 Ensure that the kubelet configuration file has permissions set to 644 or more restrictive (Manual)' # this is a horrible way to do it....but it works - kubeBenchResultRegex = re.compile('^\[.*\..*\..*') + kubeBenchResultRegex = re.compile(r'^\[.*\..*\..*') for line in splitter: kubeBenchRegexCheck = kubeBenchResultRegex.search(line) if kubeBenchRegexCheck: @@ -192,38 +196,34 @@ def run_kube_bench(cluster_name): # Create a new dict of the findings that will match a SARIF JSON 'run' # https://github.com/microsoft/sarif-tutorials/blob/main/docs/2-Basics.md run = { - 'tool':{ - 'driver':{ - 'name':'Kube-bench', + 'tool': { + 'driver': { + 'name': 'Kube-bench', 'semanticVersion': '0.6.6', 'informationUri': 'https://github.com/aquasecurity/kube-bench', 'organization': 'Aqua Security', 'fullDescription': { 'text': 'kube-bench is a tool that checks whether Kubernetes is deployed securely by running the checks documented in the CIS Kubernetes Benchmark.' - } + }, } }, - 'results':[ + 'results': [ { 'ruleId': splitFinding[1], - 'message':{ - 'text': findingStatus - }, - 'locations':[ + 'message': {'text': findingStatus}, + 'locations': [ { - 'physicalLocation':{ - 'artifactLocation':{ - 'uri': clusterEndpoint, - 'description': { - 'text': cluster_name - } + 'physicalLocation': { + 'artifactLocation': { + 'uri': clusterEndpoint, + 'description': {'text': cluster_name}, } } } - ] + ], } ], - 'columnKind':'utf16CodeUnits' + 'columnKind': 'utf16CodeUnits', } findings.append(run) else: @@ -233,10 +233,12 @@ def run_kube_bench(cluster_name): # Delete the job from the EKS Cluster deleteKubebenchJobCmd = 'kubectl delete -f job-eks.yaml' - deleteKubebenchJobSubproc = subprocess.run(deleteKubebenchJobCmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + deleteKubebenchJobSubproc = subprocess.run( + deleteKubebenchJobCmd, shell=True, capture_output=True + ) deleteKubebenchJobStdout = str(deleteKubebenchJobSubproc.stdout.decode('utf-8')) print(f'{deleteKubebenchJobStdout}') print(f'Completed Kube-bench assessment of EKS Cluster {cluster_name}') - return findings \ No newline at end of file + return findings diff --git a/plugins/__init__.py b/plugins/__init__.py index 8e8bdd2..da288a2 100644 --- a/plugins/__init__.py +++ b/plugins/__init__.py @@ -1,19 +1,16 @@ -#This file is part of Lightspin EKS Creation Engine. -#SPDX-License-Identifier: Apache-2.0 - -#Licensed to the Apache Software Foundation (ASF) under one -#or more contributor license agreements. See the NOTICE file -#distributed with this work for additional information -#regarding copyright ownership. The ASF licenses this file -#to you under the Apache License, Version 2.0 (the -#"License"); you may not use this file except in compliance -#with the License. You may obtain a copy of the License at - -#http://www.apache.org/licenses/LICENSE-2.0 - -#Unless required by applicable law or agreed to in writing, -#software distributed under the License is distributed on an -#"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -#KIND, either express or implied. See the License for the -#specific language governing permissions and limitations -#under the License. \ No newline at end of file +# This file is part of Lightspin EKS Creation Engine. +# SPDX-License-Identifier: Apache-2.0 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/requirements.txt b/requirements.txt index 2e1b37f..53bd91a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -awscli -boto3 -art -termcolor -clint -urllib3 \ No newline at end of file +art>=5.4,<5.5 +awscli>=1.22.65,<1.23.0 +boto3>=1.21.10,<1.22.0 +clint>=0.5.1,<0.6.0 +termcolor>=1.1.0,<1.2.0 +urllib3>=1.26.8,<1.27.0