tracychms · xy223 · Nov 18, 2021 · Nov 19, 2021 · Nov 19, 2021 · Nov 19, 2021
diff --git a/.github/workflows/profile.yml b/.github/workflows/profile.yml
@@ -1,26 +1,94 @@
 name: profiling-online-endpoints
 on:
   workflow_dispatch:
+    inputs:
+      SKU_LIST:
+        description: 'Define the list of skus in the format of ["sku:num_concurrent_requests", "sku:num_concurrent_requests"]'     
+        required: true
+        default: '["Standard_F2s_v2:1", "Standard_F4s_v2:2"]'
 jobs:
-  build:
+  create_profiler_compute:
     runs-on: ubuntu-latest
+    outputs:
+      PROFILER_COMPUTE_NAME: ${{ steps.set_profiler_compute_info.outputs.PROFILER_COMPUTE_NAME }}
     steps:
-    - name: check out repo
+    - name: Azure login
+      uses: azure/login@v1
+      with:
+        creds: ${{secrets.AZURE_CREDENTIALS}}
+    - name: Install dependencies
+      run: |
+        az extension add -n ml -y
+        sudo apt-get update -y && sudo apt-get install jq
+    - name: Setup az environment
+      run: |
+        az config set defaults.workspace=${{secrets.AML_WORKSPACE}}
+        az config set defaults.group=${{secrets.RESOURCE_GROUP}}          
+        az account set -s ${{secrets.SUBSCRIPTION_ID}}
+    - name: Set profiler compute info
+      id: set_profiler_compute_info
+      run: |
+        export PROFILER_COMPUTE_NAME=profilingTest
+        echo PROFILER_COMPUTE_NAME=$PROFILER_COMPUTE_NAME >> $GITHUB_ENV
+        echo PROFILER_COMPUTE_SIZE=Standard_F4s_v2 >> $GITHUB_ENV
+        echo "::set-output name=PROFILER_COMPUTE_NAME::$PROFILER_COMPUTE_NAME"
+    - name: Check out repo
       uses: actions/checkout@v2
-    - name: azure login
+    - name: Create profiling compute
+      run: bash -x profiling/create-profiling-compute.sh
+      env:
+        PROFILER_COMPUTE_NAME: ${{ env.PROFILER_COMPUTE_NAME }}
+        PROFILER_COMPUTE_SIZE: ${{ env.PROFILER_COMPUTE_SIZE }}
+      working-directory: code
+
+  profiling:
+    runs-on: ubuntu-latest
+    needs: create_profiler_compute
+    strategy:
+      fail-fast: false
+      matrix:
+        sku_connection_pair: ${{ fromJson(github.event.inputs.SKU_LIST) }}
+    steps:
+    - name: Check out repo
+      uses: actions/checkout@v2
+    - name: Azure login
       uses: azure/login@v1
       with:
         creds: ${{secrets.AZURE_CREDENTIALS}}
-    - name: install dependencies
+    - name: Install dependencies
       run: |
-          az extension add -n ml -y
-          apt-get update -y && apt-get install jq
-    - name: setup
+        az extension add -n ml -y
+        sudo apt-get update -y && sudo apt-get install jq
+    - name: Setup az environment
       run: |
-          az config set defaults.workspace=${{secrets.AML_WORKSPACE}}
-          az config set defaults.group=${{secrets.RESOURCE_GROUP}}          
-          az account set -s ${{secrets.SUBSCRIPTION_ID}}
-    - name: run job
+        az config set defaults.workspace=${{secrets.AML_WORKSPACE}}
+        az config set defaults.group=${{secrets.RESOURCE_GROUP}}          
+        az account set -s ${{secrets.SUBSCRIPTION_ID}}
+    - name: Generate unique online-endpoint name and online-deployment name
+      run: |
+        export ENDPOINT_NAME=endpt-`echo $RANDOM`
+        echo ENDPOINT_NAME=$ENDPOINT_NAME >> $GITHUB_ENV
+        echo DEPLOYMENT_NAME=$ENDPOINT_NAME-dep >> $GITHUB_ENV 
+    - name: Create online-endpoint and online-deployment
+      run: bash -x profiling/create-online-endpoint.sh
+      env:
+        ENDPOINT_NAME: ${{ env.ENDPOINT_NAME }}
+        DEPLOYMENT_NAME: ${{ env.DEPLOYMENT_NAME }}
+        SKU_CONNECTION_PAIR: ${{ matrix.sku_connection_pair }}
+      working-directory: code
+    - name: Run profiling job
       run: bash -x profiling/how-to-profile-online-endpoint.sh
+      env:
+        ENDPOINT_NAME: ${{ env.ENDPOINT_NAME }}
+        DEPLOYMENT_NAME: ${{ env.DEPLOYMENT_NAME }}
+        SKU_CONNECTION_PAIR: ${{ matrix.sku_connection_pair }}
+        PROFILER_COMPUTE_NAME: ${{ needs.create_profiler_compute.outputs.PROFILER_COMPUTE_NAME }}
+      working-directory: code
+    - name: Delete online-endpoint and online-deployment
+      run: bash -x profiling/delete-online-endpoint.sh
+      env:
+        ENDPOINT_NAME: ${{ env.ENDPOINT_NAME }}
       working-directory: code
+
+
 
diff --git a/code/online-endpoint/blue-deployment.yml → .../online-endpoint/blue-deployment-tmpl.yml b/code/online-endpoint/blue-deployment.yml → .../online-endpoint/blue-deployment-tmpl.yml
@@ -2,13 +2,15 @@ $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.sch
 name: blue
 endpoint_name: my-endpoint
 model:
-  local_path: model-1/model/sklearn_regression_model.pkl
+  path: model-1/model/
 code_configuration:
-  code: 
-    local_path: model-1/onlinescoring/
+  code: model-1/onlinescoring/
   scoring_script: score.py
 environment: 
   conda_file: model-1/environment/conda.yml
   image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210727.v1
-instance_type: Standard_F2s_v2
+instance_type: <% COMPUTER_SIZE %>
 instance_count: 1
+request_settings:
+  request_timeout_ms: 3000
+  max_concurrent_requests_per_instance: 1024
diff --git a/code/online-endpoint/model-1/environment/conda.yml b/code/online-endpoint/model-1/environment/conda.yml
@@ -8,6 +8,5 @@ dependencies:
   - scikit-learn=0.24.2
   - scipy=1.7.1
   - pip:
-    - azureml-defaults==1.33.0
-    - inference-schema[numpy-support]==1.3.0
+    - azureml-defaults==1.38.0
     - joblib==1.0.1
diff --git a/code/online-endpoint/model-1/onlinescoring/score.py b/code/online-endpoint/model-1/onlinescoring/score.py
@@ -13,8 +13,9 @@ def init():
     global model
     # AZUREML_MODEL_DIR is an environment variable created during deployment.
     # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)
+    # Please provide your model's folder name if there is one
     model_path = os.path.join(
-        os.getenv("AZUREML_MODEL_DIR"), "sklearn_regression_model.pkl"
+        os.getenv("AZUREML_MODEL_DIR"), "model/sklearn_regression_model.pkl"
     )
     # deserialize the model file back into a sklearn model
     model = joblib.load(model_path)
@@ -27,7 +28,7 @@ def run(raw_data):
     In the example we extract the data from the json input and call the scikit-learn model's predict()
     method and return the result back
     """
-    logging.info("Request received")
+    logging.info("model 1: request received")
     data = json.loads(raw_data)["data"]
     data = numpy.array(data)
     result = model.predict(data)

diff --git a/code/online-endpoint/model-1/onlinescoring/score_managedidentity.py b/code/online-endpoint/model-1/onlinescoring/score_managedidentity.py
@@ -56,8 +56,9 @@ def init():
     # AZUREML_MODEL_DIR is an environment variable created during deployment.
     # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)
     # For multiple models, it points to the folder containing all deployed models (./azureml-models)
+    # Please provide your model's folder name if there is one
     model_path = os.path.join(
-        os.getenv("AZUREML_MODEL_DIR"), "sklearn_regression_model.pkl"
+        os.getenv("AZUREML_MODEL_DIR"), "model/sklearn_regression_model.pkl"
     )
     # deserialize the model file back into a sklearn model
     model = joblib.load(model_path)

diff --git a/code/online-endpoint/model-2/environment/conda.yml b/code/online-endpoint/model-2/environment/conda.yml
@@ -8,6 +8,5 @@ dependencies:
   - scikit-learn=0.24.2
   - scipy=1.7.1
   - pip:
-    - azureml-defaults==1.33.0
-    - inference-schema[numpy-support]==1.3.0
+    - azureml-defaults==1.38.0
     - joblib==1.0.1
diff --git a/code/online-endpoint/model-2/onlinescoring/score.py b/code/online-endpoint/model-2/onlinescoring/score.py
@@ -13,8 +13,9 @@ def init():
     global model
     # AZUREML_MODEL_DIR is an environment variable created during deployment.
     # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)
+    # Please provide your model's folder name if there is one
     model_path = os.path.join(
-        os.getenv("AZUREML_MODEL_DIR"), "sklearn_regression_model.pkl"
+        os.getenv("AZUREML_MODEL_DIR"), "model/sklearn_regression_model.pkl"
     )
     # deserialize the model file back into a sklearn model
     model = joblib.load(model_path)
@@ -27,9 +28,14 @@ def run(raw_data):
     In the example we extract the data from the json input and call the scikit-learn model's predict()
     method and return the result back
     """
-    logging.info("Request received")
-    data = json.loads(raw_data)["data"]
-    data = numpy.array(data)
-    result = model.predict(data)
+    logging.info("model 2: request received")
+    result = [0.5, 0.5]
     logging.info("Request processed")
-    return result.tolist()
+    # return hardcoded result so that it is easy to validate safe rollout scenario: https://docs.microsoft.com/en-us/azure/machine-learning/how-to-safely-rollout-managed-endpoints
+    return result
+
+    # actual scoring logic for reference:
+    # data = json.loads(raw_data)["data"]
+    # data = numpy.array(data)
+    # result = model.predict(data)
+    # return result.tolist()
diff --git a/code/profiling/create-online-endpoint.sh b/code/profiling/create-online-endpoint.sh
@@ -0,0 +1,34 @@
+# <set_variables>
+export SKU_CONNECTION_PAIR=${SKU_CONNECTION_PAIR}
+export ENDPOINT_NAME=${ENDPOINT_NAME}
+export DEPLOYMENT_NAME=${DEPLOYMENT_NAME}
+export DEPLOYMENT_COMPUTER_SIZE=`echo $SKU_CONNECTION_PAIR | awk -F: '{print $1}'`
+# the computer size for the online-deployment
+# </set_variables>
+
+# <create_endpoint>
+echo "Creating Endpoint $ENDPOINT_NAME of size $DEPLOYMENT_COMPUTER_SIZE..."
+sed -e "s/<% COMPUTER_SIZE %>/$DEPLOYMENT_COMPUTER_SIZE/g" online-endpoint/blue-deployment-tmpl.yml > online-endpoint/${DEPLOYMENT_NAME}.yml
+az ml online-endpoint create --name $ENDPOINT_NAME -f online-endpoint/endpoint.yml
+az ml online-deployment create --name $DEPLOYMENT_NAME --endpoint $ENDPOINT_NAME -f online-endpoint/${DEPLOYMENT_NAME}.yml --all-traffic
+# </create_endpoint>
+
+# <check_endpoint_Status>
+endpoint_status=`az ml online-endpoint show -n $ENDPOINT_NAME --query "provisioning_state" -o tsv`
+echo $endpoint_status
+if [[ $endpoint_status == "Succeeded" ]]; then
+  echo "Endpoint $ENDPOINT_NAME created successfully"
+else 
+  echo "Endpoint $ENDPOINT_NAME creation failed"
+  exit 1
+fi
+
+deploy_status=`az ml online-deployment show --name $DEPLOYMENT_NAME --endpoint-name $ENDPOINT_NAME --query "provisioning_state" -o tsv`
+echo $deploy_status
+if [[ $deploy_status == "Succeeded" ]]; then
+  echo "Deployment $DEPLOYMENT_NAME completed successfully"
+else
+  echo "Deployment $DEPLOYMENT_NAME failed"
+  exit 1
+fi
+# </check_endpoint_Status>
diff --git a/code/profiling/create-profiling-compute.sh b/code/profiling/create-profiling-compute.sh
@@ -0,0 +1,30 @@
+# <set_variables>
+export PROFILER_COMPUTE_NAME="${PROFILER_COMPUTE_NAME}" # the compute name for hosting the profiler
+export PROFILER_COMPUTE_SIZE="${PROFILER_COMPUTE_SIZE}" # the compute size for hosting the profiler
+# </set_variables>
+
+# <create_compute_cluster_for_hosting_the_profiler>
+# skip compute creation if compute exists already
+az ml compute show --name $PROFILER_COMPUTE_NAME
+if [[ $? -eq 0 ]]; then echo "compute $PROFILER_COMPUTE_NAME exists already, will skip creation and role assignment." && exit 0; fi
+
+echo "Creating Compute $PROFILER_COMPUTE_NAME ..."
+az ml compute create --name $PROFILER_COMPUTE_NAME --size $PROFILER_COMPUTE_SIZE --identity-type SystemAssigned --type amlcompute --max-instances 3
+
+# check compute status
+compute_status=`az ml compute show --name $PROFILER_COMPUTE_NAME --query "provisioning_state" -o tsv`
+echo $compute_status
+if [[ $compute_status == "Succeeded" ]]; then
+  echo "Compute $PROFILER_COMPUTE_NAME created successfully"
+else 
+  echo "Compute $PROFILER_COMPUTE_NAME creation failed"
+  exit 1
+fi
+
+# create role assignment for acessing workspace resources
+compute_info=`az ml compute show --name $PROFILER_COMPUTE_NAME --query '{"id": id, "identity_object_id": identity.principal_id}' -o json`
+workspace_resource_id=`echo $compute_info | jq -r '.id' | sed 's/\(.*\)\/computes\/.*/\1/'`
+identity_object_id=`echo $compute_info | jq -r '.identity_object_id'`
+az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id
+if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi
+# </create_compute_cluster_for_hosting_the_profiler>
diff --git a/code/profiling/delete-online-endpoint.sh b/code/profiling/delete-online-endpoint.sh
@@ -0,0 +1,7 @@
+# <set_variables>
+export ENDPOINT_NAME=${ENDPOINT_NAME}
+# </set_variables>
+
+# <delete_endpoint>
+az ml online-endpoint delete --name $ENDPOINT_NAME -y
+# </delete_endpoint>