diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..0008d33 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,10 @@ +echo """ +venv/ +*.pyc +__pycache__ +.git +.env +*.egg-info +dist/ +build/ +""" > .dockerignore \ No newline at end of file diff --git a/.gitignore b/.gitignore index e3db042..65d4606 100644 --- a/.gitignore +++ b/.gitignore @@ -204,5 +204,4 @@ cython_debug/ models/ configs/ .pdm-python -.magemaker_configs .env.save \ No newline at end of file diff --git a/Dockerfile-server b/Dockerfile-server index 79bb9a6..6351096 100644 --- a/Dockerfile-server +++ b/Dockerfile-server @@ -1,22 +1,55 @@ -# Use an official Python runtime as a parent image -FROM python:3.12-slim +FROM python:3.11-slim -# Set environment variables -ENV PYTHONDONTWRITEBYTECODE 1 -ENV PYTHONUNBUFFERED 1 +# Install system dependencies +RUN apt-get update && apt-get install -y \ + git \ + curl \ + build-essential \ + unzip \ + nano \ + vim \ + gnupg \ + lsb-release \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* -# Set the working directory in the container +# Set working directory WORKDIR /app -# Copy the project files to the working directory -COPY . . +# Copy your magemaker package +COPY . /app/ -# Install PDM and use it to install dependencies -RUN pip install --no-cache-dir pdm \ - && pdm install --no-interactive +# Install package and dependencies +RUN pip install --no-cache-dir -e . -# Expose port 8000 to the outside world -EXPOSE 8000 +# Install AWS CLI +RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" \ + && unzip awscliv2.zip \ + && ./aws/install \ + && rm awscliv2.zip \ + && rm -rf aws -# Run uvicorn when the container launches -CMD ["pdm", "run", "uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8000"] +# Install Google Cloud SDK +RUN curl https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-458.0.0-linux-x86_64.tar.gz -o google-cloud-sdk.tar.gz \ + && tar -xf google-cloud-sdk.tar.gz \ + && ./google-cloud-sdk/install.sh --quiet \ + && rm google-cloud-sdk.tar.gz + +# Install Azure CLI +RUN mkdir -p /etc/apt/keyrings && \ + curl -sLS https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor | tee /etc/apt/keyrings/microsoft.gpg > /dev/null && \ + chmod go+r /etc/apt/keyrings/microsoft.gpg && \ + echo "deb [arch=`dpkg --print-architecture` signed-by=/etc/apt/keyrings/microsoft.gpg] https://packages.microsoft.com/repos/azure-cli/ $(lsb_release -cs) main" | tee /etc/apt/sources.list.d/azure-cli.list && \ + apt-get update && \ + apt-get install -y azure-cli && \ + rm -rf /var/lib/apt/lists/* + +# Add Google Cloud SDK to PATH +ENV PATH $PATH:/app/google-cloud-sdk/bin + +# Copy and setup entrypoint +COPY magemaker/docker/entrypoint.sh /usr/local/bin/ +RUN chmod +x /usr/local/bin/entrypoint.sh + +ENTRYPOINT ["entrypoint.sh"] +CMD ["bash"] \ No newline at end of file diff --git a/magemaker/docker/entrypoint.sh b/magemaker/docker/entrypoint.sh new file mode 100755 index 0000000..b291f38 --- /dev/null +++ b/magemaker/docker/entrypoint.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -e + +# Setup AWS credentials if mounted +if [ -f "/root/.aws/credentials" ]; then + export AWS_SHARED_CREDENTIALS_FILE="/root/.aws/credentials" +fi + +# Setup GCP credentials if mounted +if [ -f "/root/.config/gcloud/application_default_credentials.json" ]; then + export GOOGLE_APPLICATION_CREDENTIALS="/root/.config/gcloud/application_default_credentials.json" +fi + +exec "$@" \ No newline at end of file diff --git a/magemaker/sagemaker/create_model.py b/magemaker/sagemaker/create_model.py deleted file mode 100644 index 8612d33..0000000 --- a/magemaker/sagemaker/create_model.py +++ /dev/null @@ -1,218 +0,0 @@ -import json -from dotenv import dotenv_values -from rich.table import Table -from sagemaker import image_uris, model_uris, script_uris -from sagemaker.huggingface import get_huggingface_llm_image_uri -from sagemaker.huggingface.model import HuggingFaceModel -from sagemaker.jumpstart.model import JumpStartModel -from sagemaker.jumpstart.estimator import JumpStartEstimator -from sagemaker.model import Model -from sagemaker.predictor import Predictor -from sagemaker.s3 import S3Uploader -from magemaker.config import write_config -from magemaker.schemas.model import Model, ModelSource -from magemaker.schemas.deployment import Deployment -from magemaker.session import session, sagemaker_session -from magemaker.console import console -from magemaker.utils.aws_utils import construct_s3_uri, is_s3_uri -from magemaker.utils.rich_utils import print_error, print_success -from magemaker.utils.model_utils import get_unique_endpoint_name, get_model_and_task -from magemaker.huggingface import HuggingFaceTask -from magemaker.huggingface.hf_hub_api import get_hf_task - - - -def deploy_huggingface_model_to_sagemaker(deployment, model): - HUGGING_FACE_HUB_TOKEN = dotenv_values(".env").get("HUGGING_FACE_HUB_KEY") - SAGEMAKER_ROLE = dotenv_values(".env").get("SAGEMAKER_ROLE") - - region_name = session.region_name - task = get_hf_task(model) - model.task = task - env = { - 'HF_MODEL_ID': model.id, - 'HF_TASK': task, - } - - if HUGGING_FACE_HUB_TOKEN is not None: - env['HUGGING_FACE_HUB_TOKEN'] = HUGGING_FACE_HUB_TOKEN - - image_uri = None - if deployment.num_gpus: - env['SM_NUM_GPUS'] = json.dumps(deployment.num_gpus) - - if deployment.quantization: - env['HF_MODEL_QUANTIZE'] = deployment.quantization - - if task == HuggingFaceTask.TextGeneration: - # use TGI imageq if llm. - image_uri = get_huggingface_llm_image_uri( - "huggingface", - version="1.4.2" - ) - - huggingface_model = HuggingFaceModel( - env=env, - role=SAGEMAKER_ROLE, - transformers_version="4.37", - pytorch_version="2.1", - py_version="py310", - image_uri=image_uri - ) - - endpoint_name = get_unique_endpoint_name( - model.id, deployment.endpoint_name) - - deployment.endpoint_name = endpoint_name - - console.log( - "Deploying model to AWS. [magenta]This may take up to 10 minutes for very large models.[/magenta] See full logs here:") - console.print( - f"https://{region_name}.console.aws.amazon.com/cloudwatch/home#logsV2:log-groups/log-group/$252Faws$252Fsagemaker$252FEndpoints$252F{endpoint_name}") - - with console.status("[bold green]Deploying model...") as status: - table = Table(show_header=False, header_style="magenta") - table.add_column("Resource", style="dim") - table.add_column("Value", style="blue") - table.add_row("model", model.id) - table.add_row("EC2 instance type", deployment.instance_type) - table.add_row("Number of instances", str( - deployment.instance_count)) - table.add_row("task", task) - console.print(table) - - try: - predictor = huggingface_model.deploy( - initial_instance_count=deployment.instance_count, - instance_type=deployment.instance_type, - endpoint_name=endpoint_name, - ) - except Exception: - console.print_exception() - quit() - - print_success( - f"{model.id} is now up and running at the endpoint [blue]{predictor.endpoint_name}") - - write_config(deployment, model) - return predictor - -def deploy_huggingface_model_to_vertexai(deployment, model): - pass - - - -def deploy_custom_huggingface_model(deployment: Deployment, model: Model): - SAGEMAKER_ROLE = dotenv_values(".env").get("SAGEMAKER_ROLE") - - region_name = session.region_name - if model.location is None: - print_error("Missing model source location.") - return - - s3_path = model.location - if not is_s3_uri(model.location): - # Local file. Upload to s3 before deploying - bucket = sagemaker_session.default_bucket() - s3_path = construct_s3_uri(bucket, f"models/{model.id}") - with console.status(f"[bold green]Uploading custom {model.id} model to S3 at {s3_path}...") as status: - try: - s3_path = S3Uploader.upload( - model.location, s3_path) - except Exception: - print_error("[red] Model failed to upload to S3") - - endpoint_name = get_unique_endpoint_name( - model.id, deployment.endpoint_name) - - deployment.endpoint_name = endpoint_name - model.task = get_model_and_task(model.id)['task'] - - console.log( - "Deploying model to AWS. [magenta]This may take up to 10 minutes for very large models.[/magenta] See full logs here:") - console.print( - f"https://{region_name}.console.aws.amazon.com/cloudwatch/home#logsV2:log-groups/log-group/$252Faws$252Fsagemaker$252FEndpoints$252F{endpoint_name}") - - # create Hugging Face Model Class - huggingface_model = HuggingFaceModel( - # path to your trained sagemaker model - model_data=s3_path, - role=SAGEMAKER_ROLE, # iam role with permissions to create an Endpoint - transformers_version="4.37", - pytorch_version="2.1", - py_version="py310", - ) - - with console.status("[bold green]Deploying model...") as status: - table = Table(show_header=False, header_style="magenta") - table.add_column("Resource", style="dim") - table.add_column("Value", style="blue") - table.add_row("S3 Path", s3_path) - table.add_row("EC2 instance type", deployment.instance_type) - table.add_row("Number of instances", str( - deployment.instance_count)) - console.print(table) - - try: - predictor = huggingface_model.deploy( - initial_instance_count=deployment.instance_count, - instance_type=deployment.instance_type, - endpoint_name=endpoint_name - ) - except Exception: - console.print_exception() - quit() - - print_success( - f"Custom {model.id} is now up and running at the endpoint [blue]{predictor.endpoint_name}") - - write_config(deployment, model) - return predictor - - -def create_and_deploy_jumpstart_model(deployment: Deployment, model: Model): - SAGEMAKER_ROLE = dotenv_values(".env").get("SAGEMAKER_ROLE") - - region_name = session.region_name - endpoint_name = get_unique_endpoint_name( - model.id, deployment.endpoint_name) - deployment.endpoint_name = endpoint_name - model.task = get_model_and_task(model.id)['task'] - - console.log( - "Deploying model to AWS. [magenta]This may take up to 10 minutes for very large models.[/magenta] See full logs here:") - - console.print( - f"https://{region_name}.console.aws.amazon.com/cloudwatch/home#logsV2:log-groups/log-group/$252Faws$252Fsagemaker$252FEndpoints$252F{endpoint_name}") - - with console.status("[bold green]Deploying model...") as status: - table = Table(show_header=False, header_style="magenta") - table.add_column("Resource", style="dim") - table.add_column("Value", style="blue") - table.add_row("model", model.id) - table.add_row("EC2 instance type", deployment.instance_type) - table.add_row("Number of instances", str( - deployment.instance_count)) - console.print(table) - - jumpstart_model = JumpStartModel( - model_id=model.id, instance_type=deployment.instance_type, role=SAGEMAKER_ROLE) - - # Attempt to deploy to AWS - try: - predictor = jumpstart_model.deploy( - initial_instance_count=deployment.instance_count, - instance_type=deployment.instance_type, - endpoint_name=endpoint_name, - accept_eula=True - ) - pass - except Exception: - console.print_exception() - quit() - - write_config(deployment, model) - print_success( - f"{model.id} is now up and running at the endpoint [blue]{predictor.endpoint_name}") - - return predictor diff --git a/magemaker/sagemaker/delete_model.py b/magemaker/sagemaker/delete_model.py deleted file mode 100644 index 7c10078..0000000 --- a/magemaker/sagemaker/delete_model.py +++ /dev/null @@ -1,17 +0,0 @@ -import boto3 -from rich import print -from magemaker.utils.rich_utils import print_success -from typing import List - - -def delete_sagemaker_model(endpoint_names: List[str] = None): - sagemaker_client = boto3.client('sagemaker') - - if len(endpoint_names) == 0: - print_success("No Endpoints to delete!") - return - - # Add validation / error handling - for endpoint in endpoint_names: - print(f"Deleting [blue]{endpoint}") - sagemaker_client.delete_endpoint(EndpointName=endpoint) diff --git a/magemaker/sagemaker/fine_tune_model.py b/magemaker/sagemaker/fine_tune_model.py deleted file mode 100644 index 4fd597d..0000000 --- a/magemaker/sagemaker/fine_tune_model.py +++ /dev/null @@ -1,119 +0,0 @@ -import logging -import os -import sagemaker -from botocore.exceptions import ClientError -from datasets import load_dataset -from rich import print -from rich.table import Table -from sagemaker.jumpstart.estimator import JumpStartEstimator -from magemaker.console import console -from magemaker.schemas.model import Model, ModelSource -from magemaker.schemas.training import Training -from magemaker.session import sagemaker_session -from magemaker.utils.aws_utils import is_s3_uri -from magemaker.utils.rich_utils import print_success, print_error -from transformers import AutoTokenizer - -from dotenv import load_dotenv -load_dotenv() - - -def prep_hf_data(s3_bucket: str, dataset_name_or_path: str, model: Model): - train_dataset, test_dataset = load_dataset( - dataset_name_or_path, split=["train", "test"]) - tokenizer = AutoTokenizer.from_pretrained(model.id) - - def tokenize(batch): - return tokenizer(batch["text"], padding="max_length", truncation=True) - - # tokenize train and test datasets - train_dataset = train_dataset.map(tokenize, batched=True) - test_dataset = test_dataset.map(tokenize, batched=True) - - # set dataset format for PyTorch - train_dataset = train_dataset.rename_column("label", "labels") - train_dataset.set_format( - "torch", columns=["input_ids", "attention_mask", "labels"]) - test_dataset = test_dataset.rename_column("label", "labels") - test_dataset.set_format( - "torch", columns=["input_ids", "attention_mask", "labels"]) - - # save train_dataset to s3 - training_input_path = f's3://{s3_bucket}/datasets/train' - train_dataset.save_to_disk(training_input_path) - - # save test_dataset to s3 - test_input_path = f's3://{s3_bucket}/datasets/test' - test_dataset.save_to_disk(test_input_path) - - return training_input_path, test_input_path - - -def train_model(training: Training, model: Model, estimator): - # TODO: Accept hf datasets or local paths to upload to s3 - if not is_s3_uri(training.training_input_path): - raise Exception("Training data needs to be uploaded to s3") - - # TODO: Implement training, validation, and test split or accept a directory of files - training_dataset_s3_path = training.training_input_path - - table = Table(show_header=False, header_style="magenta") - table.add_column("Resource", style="dim") - table.add_column("Value", style="blue") - table.add_row("model", model.id) - table.add_row("model_version", model.version) - table.add_row("base_model_uri", estimator.model_uri) - table.add_row("image_uri", estimator.image_uri) - table.add_row("EC2 instance type", training.instance_type) - table.add_row("Number of instances", str(training.instance_count)) - console.print(table) - - estimator.fit({"training": training_dataset_s3_path}) - - predictor = estimator.deploy( - initial_instance_count=training.instance_count, instance_type=training.instance_type) - - print_success( - f"Trained model {model.id} is now up and running at the endpoint [blue]{predictor.endpoint_name}") - - -def fine_tune_model(training: Training, model: Model): - SAGEMAKER_ROLE = os.environ.get("SAGEMAKER_ROLE") - - estimator = None - match model.source: - case ModelSource.Sagemaker: - hyperparameters = get_hyperparameters_for_model(training, model) - estimator = JumpStartEstimator( - model_id=model.id, - model_version=model.version, - instance_type=training.instance_type, - instance_count=training.instance_count, - output_path=training.output_path, - environment={"accept_eula": "true"}, - role=SAGEMAKER_ROLE, - sagemaker_session=sagemaker_session, - hyperparameters=hyperparameters - ) - case ModelSource.HuggingFace: - raise NotImplementedError - case ModelSource.Custom: - raise NotImplementedError - - try: - print_success("Enqueuing training job") - res = train_model(training, model, estimator) - except ClientError as e: - logging.error(e) - print_error("Training job enqueue fail") - return False - - -def get_hyperparameters_for_model(training: Training, model: Model): - hyperparameters = sagemaker.hyperparameters.retrieve_default( - model_id=model.id, model_version=model.version) - - if training.hyperparameters is not None: - hyperparameters.update( - (k, v) for k, v in training.hyperparameters.model_dump().items() if v is not None) - return hyperparameters diff --git a/magemaker/sagemaker/query_endpoint.py b/magemaker/sagemaker/query_endpoint.py deleted file mode 100644 index 8c425a4..0000000 --- a/magemaker/sagemaker/query_endpoint.py +++ /dev/null @@ -1,205 +0,0 @@ -import boto3 -import json -import inquirer -from InquirerPy import prompt -from sagemaker.huggingface.model import HuggingFacePredictor -from magemaker.config import ModelDeployment -from magemaker.console import console -from magemaker.sagemaker import SagemakerTask -from magemaker.huggingface import HuggingFaceTask -from magemaker.utils.model_utils import get_model_and_task, is_sagemaker_model, get_text_generation_hyperpameters -from magemaker.utils.rich_utils import print_error -from magemaker.schemas.deployment import Deployment -from magemaker.schemas.model import Model -from magemaker.schemas.query import Query -from magemaker.session import sagemaker_session -from typing import Dict, Tuple, Optional - - -def make_query_request(endpoint_name: str, query: Query, config: Tuple[Deployment, Model]): - if is_sagemaker_model(endpoint_name, config): - return query_sagemaker_endpoint(endpoint_name, query, config) - else: - return query_hugging_face_endpoint(endpoint_name, query, config) - - -def parse_response(query_response): - model_predictions = json.loads(query_response['Body'].read()) - probabilities, labels, predicted_label = model_predictions[ - 'probabilities'], model_predictions['labels'], model_predictions['predicted_label'] - return probabilities, labels, predicted_label - - -def query_hugging_face_endpoint(endpoint_name: str, user_query: Query, config: Tuple[Deployment, Model]): - task = get_model_and_task(endpoint_name, config)['task'] - predictor = HuggingFacePredictor(endpoint_name=endpoint_name, - sagemaker_session=sagemaker_session) - - query = user_query.query - context = user_query.context - - input = {"inputs": query} - if task is not None and task == HuggingFaceTask.QuestionAnswering: - if context is None: - questions = [{ - "type": "input", "message": "What context would you like to provide?:", "name": "context"}] - answers = prompt(questions) - context = answers.get('context', '') - - if not context: - raise Exception("Must provide context for question-answering") - - input = {} - input['context'] = answers['context'] - input['question'] = query - - if task is not None and task == HuggingFaceTask.TextGeneration: - parameters = get_text_generation_hyperpameters(config, user_query) - input['parameters'] = parameters - - if task is not None and task == HuggingFaceTask.ZeroShotClassification: - if context is None: - questions = [ - inquirer.Text('labels', - message="What labels would you like to use? (comma separated values)?", - ) - ] - answers = inquirer.prompt(questions) - context = answers.get('labels', '') - - if not context: - raise Exception( - "Must provide labels for zero shot text classification") - - labels = context.split(',') - input = json.dumps({ - "sequences": query, - "candidate_labels": labels - }) - - try: - result = predictor.predict(input) - except Exception: - console.print_exception() - quit() - - print(result) - return result - - -def query_sagemaker_endpoint(endpoint_name: str, user_query: Query, config: Tuple[Deployment, Model]): - client = boto3.client('runtime.sagemaker') - task = get_model_and_task(endpoint_name, config)['task'] - - if task not in [ - SagemakerTask.ExtractiveQuestionAnswering, - SagemakerTask.TextClassification, - SagemakerTask.SentenceSimilarity, - SagemakerTask.SentencePairClassification, - SagemakerTask.Summarization, - SagemakerTask.NamedEntityRecognition, - SagemakerTask.TextEmbedding, - SagemakerTask.TcEmbedding, - SagemakerTask.TextGeneration, - SagemakerTask.TextGeneration1, - SagemakerTask.TextGeneration2, - SagemakerTask.Translation, - SagemakerTask.FillMask, - SagemakerTask.ZeroShotTextClassification - ]: - print_error(""" -Querying this model type inside of Model Manager isn’t yet supported. -You can query it directly through the API endpoint - see here for documentation on how to do this: -https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html - """) - raise Exception("Unsupported") - - # MIME content type varies per deployment - content_type = "application/x-text" - accept_type = "application/json;verbose" - - # Depending on the task, input needs to be formatted differently. - # e.g. question-answering needs to have {question: , context: } - query = user_query.query - context = user_query.context - input = query.encode("utf-8") - match task: - case SagemakerTask.ExtractiveQuestionAnswering: - if context is None: - questions = [ - { - 'type': 'input', - 'name': 'context', - 'message': "What context would you like to provide?", - } - ] - answers = prompt(questions) - context = answers.get("context", '') - - if not context: - raise Exception("Must provide context for question-answering") - - content_type = "application/list-text" - input = json.dumps([query, context]).encode("utf-8") - - case SagemakerTask.SentencePairClassification: - if context is None: - questions = [ - inquirer.Text('context', - message="What sentence would you like to compare against?", - ) - ] - answers = inquirer.prompt(questions) - context = answers.get("context", '') - if not context: - raise Exception( - "Must provide a second sentence for sentence pair classification") - - content_type = "application/list-text" - input = json.dumps([query, context]).encode("utf-8") - case SagemakerTask.ZeroShotTextClassification: - if context is None: - questions = [ - inquirer.Text('labels', - message="What labels would you like to use? (comma separated values)?", - ) - ] - answers = inquirer.prompt(questions) - context = answers.get('labels', '') - - if not context: - raise Exception( - "must provide labels for zero shot text classification") - labels = context.split(',') - - content_type = "application/json" - input = json.dumps({ - "sequences": query, - "candidate_labels": labels, - }).encode("utf-8") - case SagemakerTask.TextGeneration: - parameters = get_text_generation_hyperpameters(config, user_query) - input = json.dumps({ - "inputs": query, - "parameters": parameters, - }).encode("utf-8") - content_type = "application/json" - - try: - response = client.invoke_endpoint( - EndpointName=endpoint_name, ContentType=content_type, Body=input, Accept=accept_type) - except Exception: - console.print_exception() - quit() - - model_predictions = json.loads(response['Body'].read()) - print(model_predictions) - return model_predictions - - -def test(endpoint_name: str): - text1 = 'astonishing ... ( frames ) profound ethical and philosophical questions in the form of dazzling pop entertainment' - text2 = 'simply stupid , irrelevant and deeply , truly , bottomlessly cynical ' - - for text in [text1, text2]: - query_sagemaker_endpoint(endpoint_name, text.encode('utf-8')) diff --git a/pyproject.toml b/pyproject.toml index 5a0b4a5..34ab6c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,8 @@ dependencies = [ 'azure-identity==1.19.0', 'azure-mgmt-resource==23.2.0', 'marshmallow==3.23.2', + 'click>=8.0.0', + 'docker>=6.1.0', ] requires-python = ">=3.11" readme = "README.md" @@ -45,7 +47,7 @@ distribution = true package-dir = "magemaker" [tool.pdm.build] -includes = ["magemaker", "magemaker/scripts/preflight.sh", "magemaker/scripts/setup_role.sh"] # Include setup.sh in the package distribution +includes = ["magemaker", "magemaker/scripts/preflight.sh", "magemaker/scripts/setup_role.sh", "magemaker/docker/Dockerfile", "magemaker/docker/entrypoint.sh"] # Include setup.sh in the package distribution [tool.pdm.dev-dependencies]