diff --git a/.github/workflows/profile.yml b/.github/workflows/profile.yml
index ff6c82a..c1070c4 100644
--- a/.github/workflows/profile.yml
+++ b/.github/workflows/profile.yml
@@ -1,26 +1,94 @@
name: profiling-online-endpoints
on:
workflow_dispatch:
+ inputs:
+ SKU_LIST:
+ description: 'Define the list of skus in the format of ["sku:num_concurrent_requests", "sku:num_concurrent_requests"]'
+ required: true
+ default: '["Standard_F2s_v2:1", "Standard_F4s_v2:2"]'
jobs:
- build:
+ create_profiler_compute:
runs-on: ubuntu-latest
+ outputs:
+ PROFILER_COMPUTE_NAME: ${{ steps.set_profiler_compute_info.outputs.PROFILER_COMPUTE_NAME }}
steps:
- - name: check out repo
+ - name: Azure login
+ uses: azure/login@v1
+ with:
+ creds: ${{secrets.AZURE_CREDENTIALS}}
+ - name: Install dependencies
+ run: |
+ az extension add -n ml -y
+ sudo apt-get update -y && sudo apt-get install jq
+ - name: Setup az environment
+ run: |
+ az config set defaults.workspace=${{secrets.AML_WORKSPACE}}
+ az config set defaults.group=${{secrets.RESOURCE_GROUP}}
+ az account set -s ${{secrets.SUBSCRIPTION_ID}}
+ - name: Set profiler compute info
+ id: set_profiler_compute_info
+ run: |
+ export PROFILER_COMPUTE_NAME=profilingTest
+ echo PROFILER_COMPUTE_NAME=$PROFILER_COMPUTE_NAME >> $GITHUB_ENV
+ echo PROFILER_COMPUTE_SIZE=Standard_F4s_v2 >> $GITHUB_ENV
+ echo "::set-output name=PROFILER_COMPUTE_NAME::$PROFILER_COMPUTE_NAME"
+ - name: Check out repo
uses: actions/checkout@v2
- - name: azure login
+ - name: Create profiling compute
+ run: bash -x profiling/create-profiling-compute.sh
+ env:
+ PROFILER_COMPUTE_NAME: ${{ env.PROFILER_COMPUTE_NAME }}
+ PROFILER_COMPUTE_SIZE: ${{ env.PROFILER_COMPUTE_SIZE }}
+ working-directory: code
+
+ profiling:
+ runs-on: ubuntu-latest
+ needs: create_profiler_compute
+ strategy:
+ fail-fast: false
+ matrix:
+ sku_connection_pair: ${{ fromJson(github.event.inputs.SKU_LIST) }}
+ steps:
+ - name: Check out repo
+ uses: actions/checkout@v2
+ - name: Azure login
uses: azure/login@v1
with:
creds: ${{secrets.AZURE_CREDENTIALS}}
- - name: install dependencies
+ - name: Install dependencies
run: |
- az extension add -n ml -y
- apt-get update -y && apt-get install jq
- - name: setup
+ az extension add -n ml -y
+ sudo apt-get update -y && sudo apt-get install jq
+ - name: Setup az environment
run: |
- az config set defaults.workspace=${{secrets.AML_WORKSPACE}}
- az config set defaults.group=${{secrets.RESOURCE_GROUP}}
- az account set -s ${{secrets.SUBSCRIPTION_ID}}
- - name: run job
+ az config set defaults.workspace=${{secrets.AML_WORKSPACE}}
+ az config set defaults.group=${{secrets.RESOURCE_GROUP}}
+ az account set -s ${{secrets.SUBSCRIPTION_ID}}
+ - name: Generate unique online-endpoint name and online-deployment name
+ run: |
+ export ENDPOINT_NAME=endpt-`echo $RANDOM`
+ echo ENDPOINT_NAME=$ENDPOINT_NAME >> $GITHUB_ENV
+ echo DEPLOYMENT_NAME=$ENDPOINT_NAME-dep >> $GITHUB_ENV
+ - name: Create online-endpoint and online-deployment
+ run: bash -x profiling/create-online-endpoint.sh
+ env:
+ ENDPOINT_NAME: ${{ env.ENDPOINT_NAME }}
+ DEPLOYMENT_NAME: ${{ env.DEPLOYMENT_NAME }}
+ SKU_CONNECTION_PAIR: ${{ matrix.sku_connection_pair }}
+ working-directory: code
+ - name: Run profiling job
run: bash -x profiling/how-to-profile-online-endpoint.sh
+ env:
+ ENDPOINT_NAME: ${{ env.ENDPOINT_NAME }}
+ DEPLOYMENT_NAME: ${{ env.DEPLOYMENT_NAME }}
+ SKU_CONNECTION_PAIR: ${{ matrix.sku_connection_pair }}
+ PROFILER_COMPUTE_NAME: ${{ needs.create_profiler_compute.outputs.PROFILER_COMPUTE_NAME }}
+ working-directory: code
+ - name: Delete online-endpoint and online-deployment
+ run: bash -x profiling/delete-online-endpoint.sh
+ env:
+ ENDPOINT_NAME: ${{ env.ENDPOINT_NAME }}
working-directory: code
+
+
\ No newline at end of file
diff --git a/code/online-endpoint/blue-deployment.yml b/code/online-endpoint/blue-deployment-tmpl.yml
similarity index 64%
rename from code/online-endpoint/blue-deployment.yml
rename to code/online-endpoint/blue-deployment-tmpl.yml
index 874aebb..44622aa 100644
--- a/code/online-endpoint/blue-deployment.yml
+++ b/code/online-endpoint/blue-deployment-tmpl.yml
@@ -2,13 +2,15 @@ $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.sch
name: blue
endpoint_name: my-endpoint
model:
- local_path: model-1/model/sklearn_regression_model.pkl
+ path: model-1/model/
code_configuration:
- code:
- local_path: model-1/onlinescoring/
+ code: model-1/onlinescoring/
scoring_script: score.py
environment:
conda_file: model-1/environment/conda.yml
image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210727.v1
-instance_type: Standard_F2s_v2
+instance_type: <% COMPUTER_SIZE %>
instance_count: 1
+request_settings:
+ request_timeout_ms: 3000
+ max_concurrent_requests_per_instance: 1024
diff --git a/code/online-endpoint/model-1/environment/conda.yml b/code/online-endpoint/model-1/environment/conda.yml
index c23bd70..97f5beb 100644
--- a/code/online-endpoint/model-1/environment/conda.yml
+++ b/code/online-endpoint/model-1/environment/conda.yml
@@ -8,6 +8,5 @@ dependencies:
- scikit-learn=0.24.2
- scipy=1.7.1
- pip:
- - azureml-defaults==1.33.0
- - inference-schema[numpy-support]==1.3.0
+ - azureml-defaults==1.38.0
- joblib==1.0.1
diff --git a/code/online-endpoint/model-1/onlinescoring/score.py b/code/online-endpoint/model-1/onlinescoring/score.py
index ac565f7..5d5c3a7 100644
--- a/code/online-endpoint/model-1/onlinescoring/score.py
+++ b/code/online-endpoint/model-1/onlinescoring/score.py
@@ -13,8 +13,9 @@ def init():
global model
# AZUREML_MODEL_DIR is an environment variable created during deployment.
# It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)
+ # Please provide your model's folder name if there is one
model_path = os.path.join(
- os.getenv("AZUREML_MODEL_DIR"), "sklearn_regression_model.pkl"
+ os.getenv("AZUREML_MODEL_DIR"), "model/sklearn_regression_model.pkl"
)
# deserialize the model file back into a sklearn model
model = joblib.load(model_path)
@@ -27,7 +28,7 @@ def run(raw_data):
In the example we extract the data from the json input and call the scikit-learn model's predict()
method and return the result back
"""
- logging.info("Request received")
+ logging.info("model 1: request received")
data = json.loads(raw_data)["data"]
data = numpy.array(data)
result = model.predict(data)
diff --git a/code/online-endpoint/model-1/onlinescoring/score_managedidentity.py b/code/online-endpoint/model-1/onlinescoring/score_managedidentity.py
index c1704fa..18eea27 100644
--- a/code/online-endpoint/model-1/onlinescoring/score_managedidentity.py
+++ b/code/online-endpoint/model-1/onlinescoring/score_managedidentity.py
@@ -56,8 +56,9 @@ def init():
# AZUREML_MODEL_DIR is an environment variable created during deployment.
# It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)
# For multiple models, it points to the folder containing all deployed models (./azureml-models)
+ # Please provide your model's folder name if there is one
model_path = os.path.join(
- os.getenv("AZUREML_MODEL_DIR"), "sklearn_regression_model.pkl"
+ os.getenv("AZUREML_MODEL_DIR"), "model/sklearn_regression_model.pkl"
)
# deserialize the model file back into a sklearn model
model = joblib.load(model_path)
diff --git a/code/online-endpoint/model-2/environment/conda.yml b/code/online-endpoint/model-2/environment/conda.yml
index 30ba157..87af935 100644
--- a/code/online-endpoint/model-2/environment/conda.yml
+++ b/code/online-endpoint/model-2/environment/conda.yml
@@ -8,6 +8,5 @@ dependencies:
- scikit-learn=0.24.2
- scipy=1.7.1
- pip:
- - azureml-defaults==1.33.0
- - inference-schema[numpy-support]==1.3.0
+ - azureml-defaults==1.38.0
- joblib==1.0.1
\ No newline at end of file
diff --git a/code/online-endpoint/model-2/onlinescoring/score.py b/code/online-endpoint/model-2/onlinescoring/score.py
index ac565f7..e248af3 100644
--- a/code/online-endpoint/model-2/onlinescoring/score.py
+++ b/code/online-endpoint/model-2/onlinescoring/score.py
@@ -13,8 +13,9 @@ def init():
global model
# AZUREML_MODEL_DIR is an environment variable created during deployment.
# It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)
+ # Please provide your model's folder name if there is one
model_path = os.path.join(
- os.getenv("AZUREML_MODEL_DIR"), "sklearn_regression_model.pkl"
+ os.getenv("AZUREML_MODEL_DIR"), "model/sklearn_regression_model.pkl"
)
# deserialize the model file back into a sklearn model
model = joblib.load(model_path)
@@ -27,9 +28,14 @@ def run(raw_data):
In the example we extract the data from the json input and call the scikit-learn model's predict()
method and return the result back
"""
- logging.info("Request received")
- data = json.loads(raw_data)["data"]
- data = numpy.array(data)
- result = model.predict(data)
+ logging.info("model 2: request received")
+ result = [0.5, 0.5]
logging.info("Request processed")
- return result.tolist()
+ # return hardcoded result so that it is easy to validate safe rollout scenario: https://docs.microsoft.com/en-us/azure/machine-learning/how-to-safely-rollout-managed-endpoints
+ return result
+
+ # actual scoring logic for reference:
+ # data = json.loads(raw_data)["data"]
+ # data = numpy.array(data)
+ # result = model.predict(data)
+ # return result.tolist()
diff --git a/code/profiling/create-online-endpoint.sh b/code/profiling/create-online-endpoint.sh
new file mode 100644
index 0000000..9313e33
--- /dev/null
+++ b/code/profiling/create-online-endpoint.sh
@@ -0,0 +1,34 @@
+#
+export SKU_CONNECTION_PAIR=${SKU_CONNECTION_PAIR}
+export ENDPOINT_NAME=${ENDPOINT_NAME}
+export DEPLOYMENT_NAME=${DEPLOYMENT_NAME}
+export DEPLOYMENT_COMPUTER_SIZE=`echo $SKU_CONNECTION_PAIR | awk -F: '{print $1}'`
+# the computer size for the online-deployment
+#
+
+#
+echo "Creating Endpoint $ENDPOINT_NAME of size $DEPLOYMENT_COMPUTER_SIZE..."
+sed -e "s/<% COMPUTER_SIZE %>/$DEPLOYMENT_COMPUTER_SIZE/g" online-endpoint/blue-deployment-tmpl.yml > online-endpoint/${DEPLOYMENT_NAME}.yml
+az ml online-endpoint create --name $ENDPOINT_NAME -f online-endpoint/endpoint.yml
+az ml online-deployment create --name $DEPLOYMENT_NAME --endpoint $ENDPOINT_NAME -f online-endpoint/${DEPLOYMENT_NAME}.yml --all-traffic
+#
+
+#
+endpoint_status=`az ml online-endpoint show -n $ENDPOINT_NAME --query "provisioning_state" -o tsv`
+echo $endpoint_status
+if [[ $endpoint_status == "Succeeded" ]]; then
+ echo "Endpoint $ENDPOINT_NAME created successfully"
+else
+ echo "Endpoint $ENDPOINT_NAME creation failed"
+ exit 1
+fi
+
+deploy_status=`az ml online-deployment show --name $DEPLOYMENT_NAME --endpoint-name $ENDPOINT_NAME --query "provisioning_state" -o tsv`
+echo $deploy_status
+if [[ $deploy_status == "Succeeded" ]]; then
+ echo "Deployment $DEPLOYMENT_NAME completed successfully"
+else
+ echo "Deployment $DEPLOYMENT_NAME failed"
+ exit 1
+fi
+#
\ No newline at end of file
diff --git a/code/profiling/create-profiling-compute.sh b/code/profiling/create-profiling-compute.sh
new file mode 100644
index 0000000..b3f8c9f
--- /dev/null
+++ b/code/profiling/create-profiling-compute.sh
@@ -0,0 +1,30 @@
+#
+export PROFILER_COMPUTE_NAME="${PROFILER_COMPUTE_NAME}" # the compute name for hosting the profiler
+export PROFILER_COMPUTE_SIZE="${PROFILER_COMPUTE_SIZE}" # the compute size for hosting the profiler
+#
+
+#
+# skip compute creation if compute exists already
+az ml compute show --name $PROFILER_COMPUTE_NAME
+if [[ $? -eq 0 ]]; then echo "compute $PROFILER_COMPUTE_NAME exists already, will skip creation and role assignment." && exit 0; fi
+
+echo "Creating Compute $PROFILER_COMPUTE_NAME ..."
+az ml compute create --name $PROFILER_COMPUTE_NAME --size $PROFILER_COMPUTE_SIZE --identity-type SystemAssigned --type amlcompute --max-instances 3
+
+# check compute status
+compute_status=`az ml compute show --name $PROFILER_COMPUTE_NAME --query "provisioning_state" -o tsv`
+echo $compute_status
+if [[ $compute_status == "Succeeded" ]]; then
+ echo "Compute $PROFILER_COMPUTE_NAME created successfully"
+else
+ echo "Compute $PROFILER_COMPUTE_NAME creation failed"
+ exit 1
+fi
+
+# create role assignment for acessing workspace resources
+compute_info=`az ml compute show --name $PROFILER_COMPUTE_NAME --query '{"id": id, "identity_object_id": identity.principal_id}' -o json`
+workspace_resource_id=`echo $compute_info | jq -r '.id' | sed 's/\(.*\)\/computes\/.*/\1/'`
+identity_object_id=`echo $compute_info | jq -r '.identity_object_id'`
+az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id
+if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi
+#
\ No newline at end of file
diff --git a/code/profiling/delete-online-endpoint.sh b/code/profiling/delete-online-endpoint.sh
new file mode 100644
index 0000000..c07fa3b
--- /dev/null
+++ b/code/profiling/delete-online-endpoint.sh
@@ -0,0 +1,7 @@
+#
+export ENDPOINT_NAME=${ENDPOINT_NAME}
+#
+
+#
+az ml online-endpoint delete --name $ENDPOINT_NAME -y
+#
\ No newline at end of file
diff --git a/code/profiling/how-to-profile-online-endpoint.sh b/code/profiling/how-to-profile-online-endpoint.sh
index b459257..06e6945 100644
--- a/code/profiling/how-to-profile-online-endpoint.sh
+++ b/code/profiling/how-to-profile-online-endpoint.sh
@@ -11,83 +11,26 @@
## 7. az configure --defaults group= workspace=
#
-export ENDPOINT_NAME=""
-export DEPLOYMENT_NAME=""
-export PROFILING_TOOL="" # allowed values: wrk, wrk2 and labench
-export PROFILER_COMPUTE_NAME=""
-export PROFILER_COMPUTE_SIZE="" # required only when compute does not exist already
+export ENDPOINT_NAME="${ENDPOINT_NAME}"
+export DEPLOYMENT_NAME="${DEPLOYMENT_NAME}"
+export SKU_CONNECTION_PAIR=${SKU_CONNECTION_PAIR}
+export PROFILING_TOOL=wrk # allowed values: wrk, wrk2 and labench
+export PROFILER_COMPUTE_NAME="${PROFILER_COMPUTE_NAME}" # the compute name for hosting the profiler
export DURATION="" # time for running the profiling tool (duration for each wrk call or labench call), default value is 300s
-export CONNECTIONS="" # for wrk and wrk2 only, no. of connections for the profiling tool, default value is set to be the same as the no. of workers, or 1 if no. of workers is not set
+export CONNECTIONS=`echo $SKU_CONNECTION_PAIR | awk -F: '{print $2}'` # for wrk and wrk2 only, no. of connections for the profiling tool, default value is set to be the same as the no. of workers, or 1 if no. of workers is not set
export THREAD="" # for wrk and wrk2 only, no. of threads allocated for the profiling tool, default value is 1
export TARGET_RPS="" # for labench and wrk2 only, target rps for the profiling tool, default value is 50
export CLIENTS="" # for labench only, no. of clients for the profiling tool, default value is set to be the same as the no. of workers, or 1 if no. of workers is not set
export TIMEOUT="" # for labench only, timeout for each request, default value is 10s
#
-export ENDPOINT_NAME=endpt-`echo $RANDOM`
-export DEPLOYMENT_NAME=blue
-export PROFILING_TOOL=wrk
-export PROFILER_COMPUTE_NAME=profilingTest # the compute name for hosting the profiler
-export PROFILER_COMPUTE_SIZE=Standard_F4s_v2 # the compute size for hosting the profiler
-
-#
-echo "Creating Endpoint $ENDPOINT_NAME ..."
-az ml online-endpoint create --name $ENDPOINT_NAME -f online-endpoint/endpoint.yml
-az ml online-deployment create --name $DEPLOYMENT_NAME --endpoint $ENDPOINT_NAME -f online-endpoint/blue-deployment.yml --all-traffic
-#
-
-#
-endpoint_status=`az ml online-endpoint show -n $ENDPOINT_NAME --query "provisioning_state" -o tsv`
-echo $endpoint_status
-if [[ $endpoint_status == "Succeeded" ]]; then
- echo "Endpoint $ENDPOINT_NAME created successfully"
-else
- echo "Endpoint $ENDPOINT_NAME creation failed"
- exit 1
-fi
-
-deploy_status=`az ml online-deployment show --name $DEPLOYMENT_NAME --endpoint-name $ENDPOINT_NAME --query "provisioning_state" -o tsv`
-echo $deploy_status
-if [[ $deploy_status == "Succeeded" ]]; then
- echo "Deployment $DEPLOYMENT_NAME completed successfully"
-else
- echo "Deployment $DEPLOYMENT_NAME failed"
- exit 1
-fi
-#
-
-#
-echo "Creating Compute $PROFILER_COMPUTE_NAME ..."
-az ml compute create --name $PROFILER_COMPUTE_NAME --size $PROFILER_COMPUTE_SIZE --identity-type SystemAssigned --type amlcompute
-
-# check compute status
-compute_status=`az ml compute show --name $PROFILER_COMPUTE_NAME --query "provisioning_state" -o tsv`
-echo $compute_status
-if [[ $compute_status == "Succeeded" ]]; then
- echo "Compute $PROFILER_COMPUTE_NAME created successfully"
-else
- echo "Compute $PROFILER_COMPUTE_NAME creation failed"
- exit 1
-fi
-
-# create role assignment for acessing workspace resources
-compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv`
-workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'`
-access_token=`az account get-access-token --query accessToken -o tsv`
-compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"`
-if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi
-identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"`
-az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id
-if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi
-#
-
-#
+#
default_datastore_info=`az ml datastore show --name workspaceblobstore -o json`
account_name=`echo $default_datastore_info | jq '.account_name' | sed "s/\"//g"`
container_name=`echo $default_datastore_info | jq '.container_name' | sed "s/\"//g"`
connection_string=`az storage account show-connection-string --name $account_name -o tsv`
-az storage blob upload --container-name $container_name/profiling_payloads --name payload.txt --file endpoints/online/profiling/payload.txt --connection-string $connection_string
-#
+az storage blob upload --container-name $container_name/profiling_payloads --name ${ENDPOINT_NAME}_payload.txt --file profiling/payload.txt --connection-string $connection_string
+#
#
# please specify environment variable "IDENTITY_ACCESS_TOKEN" when working with ml compute with no appropriate MSI attached
@@ -102,11 +45,12 @@ sed \
-e "s/<% TIMEOUT %>/$TIMEOUT/g" \
-e "s/<% THREAD %>/$THREAD/g" \
-e "s/<% COMPUTE_NAME %>/$PROFILER_COMPUTE_NAME/g" \
- profiling/profiling_job_tmpl.yml > profiling_job.yml
+ -e "s/<% SKU_CONNECTION_PAIR %>/$SKU_CONNECTION_PAIR/g" \
+ profiling/profiling_job_tmpl.yml > ${ENDPOINT_NAME}_profiling_job.yml
#
#
-run_id=$(az ml job create -f profiling_job.yml --query name -o tsv)
+run_id=$(az ml job create -f ${ENDPOINT_NAME}_profiling_job.yml --query name -o tsv)
#
#
@@ -121,8 +65,4 @@ sleep 10
#
az ml job download --name $run_id --download-path report_$run_id
echo "Job result has been downloaded to dir report_$run_id"
-#
-
-#
-az ml online-endpoint delete --name $ENDPOINT_NAME -y
-#
\ No newline at end of file
+#
\ No newline at end of file
diff --git a/code/profiling/profiling_job_tmpl.yml b/code/profiling/profiling_job_tmpl.yml
index db738bf..45eb576 100644
--- a/code/profiling/profiling_job_tmpl.yml
+++ b/code/profiling/profiling_job_tmpl.yml
@@ -1,9 +1,10 @@
$schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
command: >
- profiling.sh -p {inputs.payload}
+ python -m online_endpoints_model_profiler --payload_path ${{inputs.payload}}
experiment_name: profiling-job
+display_name: <% SKU_CONNECTION_PAIR %>
environment:
- image: docker.io/rachyong/profilers:latest
+ image: mcr.microsoft.com/azureml/online-endpoints-model-profiler:latest
environment_variables:
ONLINE_ENDPOINT: "<% ENDPOINT_NAME %>"
DEPLOYMENT: "<% DEPLOYMENT_NAME %>"
@@ -17,4 +18,5 @@ environment_variables:
compute: "azureml:<% COMPUTE_NAME %>"
inputs:
payload:
- file: azureml://datastores/workspaceblobstore/paths/profiling_payloads/payload.txt
+ type: uri_file
+ path: azureml://datastores/workspaceblobstore/paths/profiling_payloads/<% ENDPOINT_NAME %>_payload.txt
\ No newline at end of file