From 467416299b9ed8929ac29ec8d31219ce75a242d3 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Thu, 18 Nov 2021 20:18:19 +0800 Subject: [PATCH 01/56] modify workflow code --- .github/workflows/profile.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/profile.yml b/.github/workflows/profile.yml index ff6c82a..67fbe2d 100644 --- a/.github/workflows/profile.yml +++ b/.github/workflows/profile.yml @@ -14,7 +14,7 @@ jobs: - name: install dependencies run: | az extension add -n ml -y - apt-get update -y && apt-get install jq + sudo apt-get update -y && sudo apt-get install jq - name: setup run: | az config set defaults.workspace=${{secrets.AML_WORKSPACE}} From 37943ac5179a27d239fff28a15a075b1ea75a56e Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Fri, 19 Nov 2021 12:01:28 +0800 Subject: [PATCH 02/56] add parallel jobs --- .github/workflows/profile.yml | 6 +++++ ...eployment.yml => blue-deployment-tmpl.yml} | 2 +- .../how-to-profile-online-endpoint.sh | 22 ++++++++++--------- 3 files changed, 19 insertions(+), 11 deletions(-) rename code/online-endpoint/{blue-deployment.yml => blue-deployment-tmpl.yml} (92%) diff --git a/.github/workflows/profile.yml b/.github/workflows/profile.yml index 67fbe2d..e81f0c6 100644 --- a/.github/workflows/profile.yml +++ b/.github/workflows/profile.yml @@ -4,6 +4,10 @@ on: jobs: build: runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + deployment_computer_size: ['Standard_F2s_v2', 'Standard_F4s_v2'] steps: - name: check out repo uses: actions/checkout@v2 @@ -22,5 +26,7 @@ jobs: az account set -s ${{secrets.SUBSCRIPTION_ID}} - name: run job run: bash -x profiling/how-to-profile-online-endpoint.sh + env: + DEPLOYMENT_COMPUTER_SIZE: ${{matrix.deployment_computer_size}} working-directory: code \ No newline at end of file diff --git a/code/online-endpoint/blue-deployment.yml b/code/online-endpoint/blue-deployment-tmpl.yml similarity index 92% rename from code/online-endpoint/blue-deployment.yml rename to code/online-endpoint/blue-deployment-tmpl.yml index 874aebb..32be647 100644 --- a/code/online-endpoint/blue-deployment.yml +++ b/code/online-endpoint/blue-deployment-tmpl.yml @@ -10,5 +10,5 @@ code_configuration: environment: conda_file: model-1/environment/conda.yml image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210727.v1 -instance_type: Standard_F2s_v2 +instance_type: <% COMPUTER_SIZE %> instance_count: 1 diff --git a/code/profiling/how-to-profile-online-endpoint.sh b/code/profiling/how-to-profile-online-endpoint.sh index b459257..f3c1507 100644 --- a/code/profiling/how-to-profile-online-endpoint.sh +++ b/code/profiling/how-to-profile-online-endpoint.sh @@ -13,6 +13,7 @@ # export ENDPOINT_NAME="" export DEPLOYMENT_NAME="" +export DEPLOYMENT_COMPUTER_SIZE="${DEPLOYMENT_COMPUTER_SIZE:-Standard_F2s_v2}" # the computer size for the online-deployment export PROFILING_TOOL="" # allowed values: wrk, wrk2 and labench export PROFILER_COMPUTE_NAME="" export PROFILER_COMPUTE_SIZE="" # required only when compute does not exist already @@ -25,13 +26,14 @@ export TIMEOUT="" # for labench only, timeout for each request, default value is # export ENDPOINT_NAME=endpt-`echo $RANDOM` -export DEPLOYMENT_NAME=blue +export DEPLOYMENT_NAME=${ENDPOINT_NAME}-dep export PROFILING_TOOL=wrk export PROFILER_COMPUTE_NAME=profilingTest # the compute name for hosting the profiler export PROFILER_COMPUTE_SIZE=Standard_F4s_v2 # the compute size for hosting the profiler # -echo "Creating Endpoint $ENDPOINT_NAME ..." +echo "Creating Endpoint $ENDPOINT_NAME of size $DEPLOYMENT_COMPUTER_SIZE..." +sed -e "s/<% COMPUTER_SIZE %>/$DEPLOYMENT_COMPUTER_SIZE/g" online-endpoint/blue-deployment-tmpl.yml > online-endpoint/blue-deployment.yml az ml online-endpoint create --name $ENDPOINT_NAME -f online-endpoint/endpoint.yml az ml online-deployment create --name $DEPLOYMENT_NAME --endpoint $ENDPOINT_NAME -f online-endpoint/blue-deployment.yml --all-traffic # @@ -71,14 +73,14 @@ else fi # create role assignment for acessing workspace resources -compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` -workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` -access_token=`az account get-access-token --query accessToken -o tsv` -compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` -if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi -identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` -az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id -if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +# compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` +# workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` +# access_token=`az account get-access-token --query accessToken -o tsv` +# compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` +# if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +# identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` +# az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id +# if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi # # From 0971b401876f74a788f6be93348eb7a4a4f4f582 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Fri, 19 Nov 2021 15:03:42 +0800 Subject: [PATCH 03/56] fix pipeline --- code/profiling/how-to-profile-online-endpoint.sh | 10 +++++----- code/profiling/profiling_job_tmpl.yml | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/code/profiling/how-to-profile-online-endpoint.sh b/code/profiling/how-to-profile-online-endpoint.sh index f3c1507..ad7c059 100644 --- a/code/profiling/how-to-profile-online-endpoint.sh +++ b/code/profiling/how-to-profile-online-endpoint.sh @@ -33,9 +33,9 @@ export PROFILER_COMPUTE_SIZE=Standard_F4s_v2 # the compute size for hosting the # echo "Creating Endpoint $ENDPOINT_NAME of size $DEPLOYMENT_COMPUTER_SIZE..." -sed -e "s/<% COMPUTER_SIZE %>/$DEPLOYMENT_COMPUTER_SIZE/g" online-endpoint/blue-deployment-tmpl.yml > online-endpoint/blue-deployment.yml +sed -e "s/<% COMPUTER_SIZE %>/$DEPLOYMENT_COMPUTER_SIZE/g" online-endpoint/blue-deployment-tmpl.yml > online-endpoint/${DEPLOYMENT_NAME}.yml az ml online-endpoint create --name $ENDPOINT_NAME -f online-endpoint/endpoint.yml -az ml online-deployment create --name $DEPLOYMENT_NAME --endpoint $ENDPOINT_NAME -f online-endpoint/blue-deployment.yml --all-traffic +az ml online-deployment create --name $DEPLOYMENT_NAME --endpoint $ENDPOINT_NAME -f online-endpoint/${DEPLOYMENT_NAME}.yml --all-traffic # # @@ -88,7 +88,7 @@ default_datastore_info=`az ml datastore show --name workspaceblobstore -o json` account_name=`echo $default_datastore_info | jq '.account_name' | sed "s/\"//g"` container_name=`echo $default_datastore_info | jq '.container_name' | sed "s/\"//g"` connection_string=`az storage account show-connection-string --name $account_name -o tsv` -az storage blob upload --container-name $container_name/profiling_payloads --name payload.txt --file endpoints/online/profiling/payload.txt --connection-string $connection_string +az storage blob upload --container-name $container_name/profiling_payloads --name ${ENDPOINT_NAME}_payload.txt --file profiling/payload.txt --connection-string $connection_string # # @@ -104,11 +104,11 @@ sed \ -e "s/<% TIMEOUT %>/$TIMEOUT/g" \ -e "s/<% THREAD %>/$THREAD/g" \ -e "s/<% COMPUTE_NAME %>/$PROFILER_COMPUTE_NAME/g" \ - profiling/profiling_job_tmpl.yml > profiling_job.yml + profiling/profiling_job_tmpl.yml > ${ENDPOINT_NAME}_profiling_job.yml # # -run_id=$(az ml job create -f profiling_job.yml --query name -o tsv) +run_id=$(az ml job create -f ${ENDPOINT_NAME}_profiling_job.yml --query name -o tsv) # # diff --git a/code/profiling/profiling_job_tmpl.yml b/code/profiling/profiling_job_tmpl.yml index db738bf..5b83287 100644 --- a/code/profiling/profiling_job_tmpl.yml +++ b/code/profiling/profiling_job_tmpl.yml @@ -17,4 +17,4 @@ environment_variables: compute: "azureml:<% COMPUTE_NAME %>" inputs: payload: - file: azureml://datastores/workspaceblobstore/paths/profiling_payloads/payload.txt + file: azureml://datastores/workspaceblobstore/paths/profiling_payloads/<% ENDPOINT_NAME %>_payload.txt From 94024a19f47b73c6421c627b5a099ad019ce4392 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Fri, 19 Nov 2021 15:41:55 +0800 Subject: [PATCH 04/56] uncomment necessary codes --- code/profiling/how-to-profile-online-endpoint.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/code/profiling/how-to-profile-online-endpoint.sh b/code/profiling/how-to-profile-online-endpoint.sh index ad7c059..704f382 100644 --- a/code/profiling/how-to-profile-online-endpoint.sh +++ b/code/profiling/how-to-profile-online-endpoint.sh @@ -73,14 +73,14 @@ else fi # create role assignment for acessing workspace resources -# compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` -# workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` -# access_token=`az account get-access-token --query accessToken -o tsv` -# compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` -# if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi -# identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` -# az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id -# if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` +workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` +access_token=`az account get-access-token --query accessToken -o tsv` +compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` +if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` +az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id +if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi # # From df79cd61b38f0346c8443405119b9cce5acdb788 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Tue, 30 Nov 2021 16:11:14 +0800 Subject: [PATCH 05/56] fix profile on onlineEndpoint pipeline --- .github/workflows/profile.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/profile.yml b/.github/workflows/profile.yml index e81f0c6..b91e9f3 100644 --- a/.github/workflows/profile.yml +++ b/.github/workflows/profile.yml @@ -1,13 +1,18 @@ name: profiling-online-endpoints on: workflow_dispatch: + inputs: + SKU_LIST: + description: 'Define the list of skus in the format of [Standard_F2s_v2, Standard_F4s_v2]' + required: true + default: '[Standard_F2s_v2, Standard_F4s_v2]' jobs: build: runs-on: ubuntu-latest strategy: fail-fast: false matrix: - deployment_computer_size: ['Standard_F2s_v2', 'Standard_F4s_v2'] + deployment_computer_size: ${{ fromJson(github.event.inputs.SKU_LIST) }} steps: - name: check out repo uses: actions/checkout@v2 From e6c29cde47c5e9fdce712203dd0987b4e279eb17 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Tue, 30 Nov 2021 16:13:15 +0800 Subject: [PATCH 06/56] set sku list from input --- .github/workflows/profile.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/profile.yml b/.github/workflows/profile.yml index b91e9f3..fe3ec54 100644 --- a/.github/workflows/profile.yml +++ b/.github/workflows/profile.yml @@ -3,9 +3,9 @@ on: workflow_dispatch: inputs: SKU_LIST: - description: 'Define the list of skus in the format of [Standard_F2s_v2, Standard_F4s_v2]' + description: 'Define the list of skus in the format of ["Standard_F2s_v2", "Standard_F4s_v2"]' required: true - default: '[Standard_F2s_v2, Standard_F4s_v2]' + default: '["Standard_F2s_v2", "Standard_F4s_v2"]' jobs: build: runs-on: ubuntu-latest From f59f424e33ef36771fefe348fd143690c12b3f7f Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Tue, 30 Nov 2021 16:34:06 +0800 Subject: [PATCH 07/56] comment to pass tests --- code/profiling/how-to-profile-online-endpoint.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/code/profiling/how-to-profile-online-endpoint.sh b/code/profiling/how-to-profile-online-endpoint.sh index 704f382..ad7c059 100644 --- a/code/profiling/how-to-profile-online-endpoint.sh +++ b/code/profiling/how-to-profile-online-endpoint.sh @@ -73,14 +73,14 @@ else fi # create role assignment for acessing workspace resources -compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` -workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` -access_token=`az account get-access-token --query accessToken -o tsv` -compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` -if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi -identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` -az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id -if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +# compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` +# workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` +# access_token=`az account get-access-token --query accessToken -o tsv` +# compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` +# if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +# identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` +# az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id +# if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi # # From 607a9c660588cc7517ad8f76af15f0cb86910cd9 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Tue, 30 Nov 2021 17:00:53 +0800 Subject: [PATCH 08/56] uncomment --- code/profiling/how-to-profile-online-endpoint.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/code/profiling/how-to-profile-online-endpoint.sh b/code/profiling/how-to-profile-online-endpoint.sh index ad7c059..704f382 100644 --- a/code/profiling/how-to-profile-online-endpoint.sh +++ b/code/profiling/how-to-profile-online-endpoint.sh @@ -73,14 +73,14 @@ else fi # create role assignment for acessing workspace resources -# compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` -# workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` -# access_token=`az account get-access-token --query accessToken -o tsv` -# compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` -# if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi -# identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` -# az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id -# if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` +workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` +access_token=`az account get-access-token --query accessToken -o tsv` +compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` +if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` +az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id +if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi # # From 5af6ffac8a47fc7b07f7f6fb1486070f355b4fa9 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Thu, 2 Dec 2021 15:05:54 +0800 Subject: [PATCH 09/56] split profiling process --- .github/workflows/profile.yml | 36 +++++++++++++------ code/profiling/create-online-endpoint.sh | 32 +++++++++++++++++ .../how-to-profile-online-endpoint.sh | 33 ++--------------- 3 files changed, 59 insertions(+), 42 deletions(-) create mode 100644 code/profiling/create-online-endpoint.sh diff --git a/.github/workflows/profile.yml b/.github/workflows/profile.yml index fe3ec54..3d225d0 100644 --- a/.github/workflows/profile.yml +++ b/.github/workflows/profile.yml @@ -7,31 +7,45 @@ on: required: true default: '["Standard_F2s_v2", "Standard_F4s_v2"]' jobs: - build: + profiling: runs-on: ubuntu-latest strategy: fail-fast: false matrix: deployment_computer_size: ${{ fromJson(github.event.inputs.SKU_LIST) }} steps: - - name: check out repo + - name: Check out repo uses: actions/checkout@v2 - name: azure login uses: azure/login@v1 with: creds: ${{secrets.AZURE_CREDENTIALS}} - - name: install dependencies + - name: Install dependencies run: | - az extension add -n ml -y - sudo apt-get update -y && sudo apt-get install jq - - name: setup + az extension add -n ml -y + sudo apt-get update -y && sudo apt-get install jq + - name: Setup az environment run: | - az config set defaults.workspace=${{secrets.AML_WORKSPACE}} - az config set defaults.group=${{secrets.RESOURCE_GROUP}} - az account set -s ${{secrets.SUBSCRIPTION_ID}} - - name: run job - run: bash -x profiling/how-to-profile-online-endpoint.sh + az config set defaults.workspace=${{secrets.AML_WORKSPACE}} + az config set defaults.group=${{secrets.RESOURCE_GROUP}} + az account set -s ${{secrets.SUBSCRIPTION_ID}} + - name: Generate unique online-endpoint name and online-deployment name + run: | + echo ENDPOINT_NAME=endpt-`echo $RANDOM` >> $GITHUB_ENV + echo DEPLOYMENT_NAME=$ENDPOINT_NAME-dep >> $GITHUB_ENV + - name: Create online-endpoint and online-deployment + run: bash -x profiling/create-online-endpoint.sh env: + ENDPOINT_NAME: ${{ env.ENDPOINT_NAME }} + DEPLOYMENT_NAME: ${{ env.DEPLOYMENT_NAME }} DEPLOYMENT_COMPUTER_SIZE: ${{matrix.deployment_computer_size}} working-directory: code + - name: Run profiling job + run: bash -x profiling/how-to-profile-online-endpoint.sh + env: + ENDPOINT_NAME: ${{ env.ENDPOINT_NAME }} + DEPLOYMENT_NAME: ${{ env.DEPLOYMENT_NAME }} + working-directory: code + + \ No newline at end of file diff --git a/code/profiling/create-online-endpoint.sh b/code/profiling/create-online-endpoint.sh new file mode 100644 index 0000000..729bcbb --- /dev/null +++ b/code/profiling/create-online-endpoint.sh @@ -0,0 +1,32 @@ +# +export ENDPOINT_NAME=${ENDPOINT_NAME} +export DEPLOYMENT_NAME="${DEPLOYMENT_NAME}" +export DEPLOYMENT_COMPUTER_SIZE="${DEPLOYMENT_COMPUTER_SIZE:-Standard_F2s_v2}" # the computer size for the online-deployment +# + +# +echo "Creating Endpoint $ENDPOINT_NAME of size $DEPLOYMENT_COMPUTER_SIZE..." +sed -e "s/<% COMPUTER_SIZE %>/$DEPLOYMENT_COMPUTER_SIZE/g" online-endpoint/blue-deployment-tmpl.yml > online-endpoint/${DEPLOYMENT_NAME}.yml +az ml online-endpoint create --name $ENDPOINT_NAME -f online-endpoint/endpoint.yml +az ml online-deployment create --name $DEPLOYMENT_NAME --endpoint $ENDPOINT_NAME -f online-endpoint/${DEPLOYMENT_NAME}.yml --all-traffic +# + +# +endpoint_status=`az ml online-endpoint show -n $ENDPOINT_NAME --query "provisioning_state" -o tsv` +echo $endpoint_status +if [[ $endpoint_status == "Succeeded" ]]; then + echo "Endpoint $ENDPOINT_NAME created successfully" +else + echo "Endpoint $ENDPOINT_NAME creation failed" + exit 1 +fi + +deploy_status=`az ml online-deployment show --name $DEPLOYMENT_NAME --endpoint-name $ENDPOINT_NAME --query "provisioning_state" -o tsv` +echo $deploy_status +if [[ $deploy_status == "Succeeded" ]]; then + echo "Deployment $DEPLOYMENT_NAME completed successfully" +else + echo "Deployment $DEPLOYMENT_NAME failed" + exit 1 +fi +# \ No newline at end of file diff --git a/code/profiling/how-to-profile-online-endpoint.sh b/code/profiling/how-to-profile-online-endpoint.sh index 704f382..4ef169a 100644 --- a/code/profiling/how-to-profile-online-endpoint.sh +++ b/code/profiling/how-to-profile-online-endpoint.sh @@ -11,8 +11,8 @@ ## 7. az configure --defaults group= workspace= # -export ENDPOINT_NAME="" -export DEPLOYMENT_NAME="" +export ENDPOINT_NAME="${ENDPOINT_NAME}" +export DEPLOYMENT_NAME="${DEPLOYMENT_NAME}" export DEPLOYMENT_COMPUTER_SIZE="${DEPLOYMENT_COMPUTER_SIZE:-Standard_F2s_v2}" # the computer size for the online-deployment export PROFILING_TOOL="" # allowed values: wrk, wrk2 and labench export PROFILER_COMPUTE_NAME="" @@ -25,39 +25,10 @@ export CLIENTS="" # for labench only, no. of clients for the profiling tool, def export TIMEOUT="" # for labench only, timeout for each request, default value is 10s # -export ENDPOINT_NAME=endpt-`echo $RANDOM` -export DEPLOYMENT_NAME=${ENDPOINT_NAME}-dep export PROFILING_TOOL=wrk export PROFILER_COMPUTE_NAME=profilingTest # the compute name for hosting the profiler export PROFILER_COMPUTE_SIZE=Standard_F4s_v2 # the compute size for hosting the profiler -# -echo "Creating Endpoint $ENDPOINT_NAME of size $DEPLOYMENT_COMPUTER_SIZE..." -sed -e "s/<% COMPUTER_SIZE %>/$DEPLOYMENT_COMPUTER_SIZE/g" online-endpoint/blue-deployment-tmpl.yml > online-endpoint/${DEPLOYMENT_NAME}.yml -az ml online-endpoint create --name $ENDPOINT_NAME -f online-endpoint/endpoint.yml -az ml online-deployment create --name $DEPLOYMENT_NAME --endpoint $ENDPOINT_NAME -f online-endpoint/${DEPLOYMENT_NAME}.yml --all-traffic -# - -# -endpoint_status=`az ml online-endpoint show -n $ENDPOINT_NAME --query "provisioning_state" -o tsv` -echo $endpoint_status -if [[ $endpoint_status == "Succeeded" ]]; then - echo "Endpoint $ENDPOINT_NAME created successfully" -else - echo "Endpoint $ENDPOINT_NAME creation failed" - exit 1 -fi - -deploy_status=`az ml online-deployment show --name $DEPLOYMENT_NAME --endpoint-name $ENDPOINT_NAME --query "provisioning_state" -o tsv` -echo $deploy_status -if [[ $deploy_status == "Succeeded" ]]; then - echo "Deployment $DEPLOYMENT_NAME completed successfully" -else - echo "Deployment $DEPLOYMENT_NAME failed" - exit 1 -fi -# - # echo "Creating Compute $PROFILER_COMPUTE_NAME ..." az ml compute create --name $PROFILER_COMPUTE_NAME --size $PROFILER_COMPUTE_SIZE --identity-type SystemAssigned --type amlcompute From 103110f71d6317304f8867d5ee6a85e6ed2fe39c Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Thu, 2 Dec 2021 15:07:03 +0800 Subject: [PATCH 10/56] comment codes to pass actions --- .../how-to-profile-online-endpoint.sh | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/code/profiling/how-to-profile-online-endpoint.sh b/code/profiling/how-to-profile-online-endpoint.sh index 4ef169a..a231a43 100644 --- a/code/profiling/how-to-profile-online-endpoint.sh +++ b/code/profiling/how-to-profile-online-endpoint.sh @@ -44,23 +44,23 @@ else fi # create role assignment for acessing workspace resources -compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` -workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` -access_token=`az account get-access-token --query accessToken -o tsv` -compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` -if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi -identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` -az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id -if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +# compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` +# workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` +# access_token=`az account get-access-token --query accessToken -o tsv` +# compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` +# if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +# identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` +# az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id +# if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi # -# +# default_datastore_info=`az ml datastore show --name workspaceblobstore -o json` account_name=`echo $default_datastore_info | jq '.account_name' | sed "s/\"//g"` container_name=`echo $default_datastore_info | jq '.container_name' | sed "s/\"//g"` connection_string=`az storage account show-connection-string --name $account_name -o tsv` az storage blob upload --container-name $container_name/profiling_payloads --name ${ENDPOINT_NAME}_payload.txt --file profiling/payload.txt --connection-string $connection_string -# +# # # please specify environment variable "IDENTITY_ACCESS_TOKEN" when working with ml compute with no appropriate MSI attached From ffd0c2ae82e8e18f94afaf85e4c5ad1063fb436a Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Thu, 2 Dec 2021 15:13:52 +0800 Subject: [PATCH 11/56] fix ci pipeline --- .github/workflows/profile.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/profile.yml b/.github/workflows/profile.yml index 3d225d0..c84ffa7 100644 --- a/.github/workflows/profile.yml +++ b/.github/workflows/profile.yml @@ -16,7 +16,7 @@ jobs: steps: - name: Check out repo uses: actions/checkout@v2 - - name: azure login + - name: Azure login uses: azure/login@v1 with: creds: ${{secrets.AZURE_CREDENTIALS}} @@ -31,7 +31,8 @@ jobs: az account set -s ${{secrets.SUBSCRIPTION_ID}} - name: Generate unique online-endpoint name and online-deployment name run: | - echo ENDPOINT_NAME=endpt-`echo $RANDOM` >> $GITHUB_ENV + export ENDPOINT_NAME=endpt-`echo $RANDOM` + echo ENDPOINT_NAME=$ENDPOINT_NAME >> $GITHUB_ENV echo DEPLOYMENT_NAME=$ENDPOINT_NAME-dep >> $GITHUB_ENV - name: Create online-endpoint and online-deployment run: bash -x profiling/create-online-endpoint.sh From e4cc1e786c815f152035445b9da003a4c0b84da1 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Thu, 2 Dec 2021 17:35:28 +0800 Subject: [PATCH 12/56] add default value for max_concurrent_requests_per_instance --- code/online-endpoint/blue-deployment-tmpl.yml | 3 +++ code/profiling/how-to-profile-online-endpoint.sh | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/code/online-endpoint/blue-deployment-tmpl.yml b/code/online-endpoint/blue-deployment-tmpl.yml index 32be647..88f22a9 100644 --- a/code/online-endpoint/blue-deployment-tmpl.yml +++ b/code/online-endpoint/blue-deployment-tmpl.yml @@ -12,3 +12,6 @@ environment: image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210727.v1 instance_type: <% COMPUTER_SIZE %> instance_count: 1 +request_settings: + request_timeout_ms: 3000 + max_concurrent_requests_per_instance: 4294967295 diff --git a/code/profiling/how-to-profile-online-endpoint.sh b/code/profiling/how-to-profile-online-endpoint.sh index a231a43..399c67d 100644 --- a/code/profiling/how-to-profile-online-endpoint.sh +++ b/code/profiling/how-to-profile-online-endpoint.sh @@ -18,7 +18,7 @@ export PROFILING_TOOL="" # allowed values: wrk, wrk2 and labench export PROFILER_COMPUTE_NAME="" export PROFILER_COMPUTE_SIZE="" # required only when compute does not exist already export DURATION="" # time for running the profiling tool (duration for each wrk call or labench call), default value is 300s -export CONNECTIONS="" # for wrk and wrk2 only, no. of connections for the profiling tool, default value is set to be the same as the no. of workers, or 1 if no. of workers is not set +export CONNECTIONS="${CONNECTIONS}" # for wrk and wrk2 only, no. of connections for the profiling tool, default value is set to be the same as the no. of workers, or 1 if no. of workers is not set export THREAD="" # for wrk and wrk2 only, no. of threads allocated for the profiling tool, default value is 1 export TARGET_RPS="" # for labench and wrk2 only, target rps for the profiling tool, default value is 50 export CLIENTS="" # for labench only, no. of clients for the profiling tool, default value is set to be the same as the no. of workers, or 1 if no. of workers is not set From 19e813e851a96d0596b0a7dd658dee4b1977e26d Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Thu, 2 Dec 2021 22:01:57 +0800 Subject: [PATCH 13/56] change the # of max_concurrent_requests_per_instance --- code/online-endpoint/blue-deployment-tmpl.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/online-endpoint/blue-deployment-tmpl.yml b/code/online-endpoint/blue-deployment-tmpl.yml index 88f22a9..39c891f 100644 --- a/code/online-endpoint/blue-deployment-tmpl.yml +++ b/code/online-endpoint/blue-deployment-tmpl.yml @@ -14,4 +14,4 @@ instance_type: <% COMPUTER_SIZE %> instance_count: 1 request_settings: request_timeout_ms: 3000 - max_concurrent_requests_per_instance: 4294967295 + max_concurrent_requests_per_instance: 1024 From 7137ebfaa5a8df5b6b17f87151f18b2dfe0fd2f7 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Thu, 2 Dec 2021 22:04:59 +0800 Subject: [PATCH 14/56] set default value for CONNECTIONS --- code/profiling/how-to-profile-online-endpoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/profiling/how-to-profile-online-endpoint.sh b/code/profiling/how-to-profile-online-endpoint.sh index 399c67d..30aa5a1 100644 --- a/code/profiling/how-to-profile-online-endpoint.sh +++ b/code/profiling/how-to-profile-online-endpoint.sh @@ -18,7 +18,7 @@ export PROFILING_TOOL="" # allowed values: wrk, wrk2 and labench export PROFILER_COMPUTE_NAME="" export PROFILER_COMPUTE_SIZE="" # required only when compute does not exist already export DURATION="" # time for running the profiling tool (duration for each wrk call or labench call), default value is 300s -export CONNECTIONS="${CONNECTIONS}" # for wrk and wrk2 only, no. of connections for the profiling tool, default value is set to be the same as the no. of workers, or 1 if no. of workers is not set +export CONNECTIONS="${CONNECTIONS:-1}" # for wrk and wrk2 only, no. of connections for the profiling tool, default value is set to be the same as the no. of workers, or 1 if no. of workers is not set export THREAD="" # for wrk and wrk2 only, no. of threads allocated for the profiling tool, default value is 1 export TARGET_RPS="" # for labench and wrk2 only, target rps for the profiling tool, default value is 50 export CLIENTS="" # for labench only, no. of clients for the profiling tool, default value is set to be the same as the no. of workers, or 1 if no. of workers is not set From 203d17fb0ed3b0721f8ef13d409164d5d55c1214 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Fri, 3 Dec 2021 16:15:56 +0800 Subject: [PATCH 15/56] change sku_list to sku_connection_pair --- .github/workflows/profile.yml | 9 +++++---- code/profiling/create-online-endpoint.sh | 6 ++++-- code/profiling/how-to-profile-online-endpoint.sh | 4 ++-- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/.github/workflows/profile.yml b/.github/workflows/profile.yml index c84ffa7..b4119fa 100644 --- a/.github/workflows/profile.yml +++ b/.github/workflows/profile.yml @@ -3,16 +3,16 @@ on: workflow_dispatch: inputs: SKU_LIST: - description: 'Define the list of skus in the format of ["Standard_F2s_v2", "Standard_F4s_v2"]' + description: 'Define the list of skus in the format of ["sku:num_concurrent_requests", "sku:num_concurrent_requests"]' required: true - default: '["Standard_F2s_v2", "Standard_F4s_v2"]' + default: '["Standard_F2s_v2:1", "Standard_F4s_v2:2"]' jobs: profiling: runs-on: ubuntu-latest strategy: fail-fast: false matrix: - deployment_computer_size: ${{ fromJson(github.event.inputs.SKU_LIST) }} + sku_connection_pair: ${{ fromJson(github.event.inputs.SKU_LIST) }} steps: - name: Check out repo uses: actions/checkout@v2 @@ -39,13 +39,14 @@ jobs: env: ENDPOINT_NAME: ${{ env.ENDPOINT_NAME }} DEPLOYMENT_NAME: ${{ env.DEPLOYMENT_NAME }} - DEPLOYMENT_COMPUTER_SIZE: ${{matrix.deployment_computer_size}} + SKU_CONNECTION_PAIR: ${{ matrix.sku_connection_pair }} working-directory: code - name: Run profiling job run: bash -x profiling/how-to-profile-online-endpoint.sh env: ENDPOINT_NAME: ${{ env.ENDPOINT_NAME }} DEPLOYMENT_NAME: ${{ env.DEPLOYMENT_NAME }} + SKU_CONNECTION_PAIR: ${{ matrix.sku_connection_pair }} working-directory: code diff --git a/code/profiling/create-online-endpoint.sh b/code/profiling/create-online-endpoint.sh index 729bcbb..9313e33 100644 --- a/code/profiling/create-online-endpoint.sh +++ b/code/profiling/create-online-endpoint.sh @@ -1,7 +1,9 @@ # +export SKU_CONNECTION_PAIR=${SKU_CONNECTION_PAIR} export ENDPOINT_NAME=${ENDPOINT_NAME} -export DEPLOYMENT_NAME="${DEPLOYMENT_NAME}" -export DEPLOYMENT_COMPUTER_SIZE="${DEPLOYMENT_COMPUTER_SIZE:-Standard_F2s_v2}" # the computer size for the online-deployment +export DEPLOYMENT_NAME=${DEPLOYMENT_NAME} +export DEPLOYMENT_COMPUTER_SIZE=`echo $SKU_CONNECTION_PAIR | awk -F: '{print $1}'` +# the computer size for the online-deployment # # diff --git a/code/profiling/how-to-profile-online-endpoint.sh b/code/profiling/how-to-profile-online-endpoint.sh index 30aa5a1..35da536 100644 --- a/code/profiling/how-to-profile-online-endpoint.sh +++ b/code/profiling/how-to-profile-online-endpoint.sh @@ -13,12 +13,12 @@ # export ENDPOINT_NAME="${ENDPOINT_NAME}" export DEPLOYMENT_NAME="${DEPLOYMENT_NAME}" -export DEPLOYMENT_COMPUTER_SIZE="${DEPLOYMENT_COMPUTER_SIZE:-Standard_F2s_v2}" # the computer size for the online-deployment +export SKU_CONNECTION_PAIR=${SKU_CONNECTION_PAIR} export PROFILING_TOOL="" # allowed values: wrk, wrk2 and labench export PROFILER_COMPUTE_NAME="" export PROFILER_COMPUTE_SIZE="" # required only when compute does not exist already export DURATION="" # time for running the profiling tool (duration for each wrk call or labench call), default value is 300s -export CONNECTIONS="${CONNECTIONS:-1}" # for wrk and wrk2 only, no. of connections for the profiling tool, default value is set to be the same as the no. of workers, or 1 if no. of workers is not set +export CONNECTIONS=`echo $SKU_CONNECTION_PAIR | awk -F: '{print $2}'` # for wrk and wrk2 only, no. of connections for the profiling tool, default value is set to be the same as the no. of workers, or 1 if no. of workers is not set export THREAD="" # for wrk and wrk2 only, no. of threads allocated for the profiling tool, default value is 1 export TARGET_RPS="" # for labench and wrk2 only, target rps for the profiling tool, default value is 50 export CLIENTS="" # for labench only, no. of clients for the profiling tool, default value is set to be the same as the no. of workers, or 1 if no. of workers is not set From 7aa9d2d5f46df87f9d89a98d24a65164841282e8 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Fri, 3 Dec 2021 18:03:31 +0800 Subject: [PATCH 16/56] update profiling image version for test only --- code/profiling/profiling_job_tmpl.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/profiling/profiling_job_tmpl.yml b/code/profiling/profiling_job_tmpl.yml index 5b83287..7641f13 100644 --- a/code/profiling/profiling_job_tmpl.yml +++ b/code/profiling/profiling_job_tmpl.yml @@ -3,7 +3,7 @@ command: > profiling.sh -p {inputs.payload} experiment_name: profiling-job environment: - image: docker.io/rachyong/profilers:latest + image: docker.io/rachyong/profilers:20211203.1 environment_variables: ONLINE_ENDPOINT: "<% ENDPOINT_NAME %>" DEPLOYMENT: "<% DEPLOYMENT_NAME %>" From e79aef9a814d78621ebcfc08aedf8c485f72af42 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Mon, 6 Dec 2021 17:19:02 +0800 Subject: [PATCH 17/56] change profiling image version for testing purpose --- code/profiling/profiling_job_tmpl.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/profiling/profiling_job_tmpl.yml b/code/profiling/profiling_job_tmpl.yml index 7641f13..3fdab82 100644 --- a/code/profiling/profiling_job_tmpl.yml +++ b/code/profiling/profiling_job_tmpl.yml @@ -3,7 +3,7 @@ command: > profiling.sh -p {inputs.payload} experiment_name: profiling-job environment: - image: docker.io/rachyong/profilers:20211203.1 + image: docker.io/rachyong/profilers:20211206.27 environment_variables: ONLINE_ENDPOINT: "<% ENDPOINT_NAME %>" DEPLOYMENT: "<% DEPLOYMENT_NAME %>" From 0a016fdeba06bf518bdd833629268ba9323d63b6 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Tue, 7 Dec 2021 14:37:46 +0800 Subject: [PATCH 18/56] update profiling image version for testing purpose only --- code/profiling/profiling_job_tmpl.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/profiling/profiling_job_tmpl.yml b/code/profiling/profiling_job_tmpl.yml index 3fdab82..aa06a9f 100644 --- a/code/profiling/profiling_job_tmpl.yml +++ b/code/profiling/profiling_job_tmpl.yml @@ -3,7 +3,7 @@ command: > profiling.sh -p {inputs.payload} experiment_name: profiling-job environment: - image: docker.io/rachyong/profilers:20211206.27 + image: docker.io/rachyong/profilers:20211207.28 environment_variables: ONLINE_ENDPOINT: "<% ENDPOINT_NAME %>" DEPLOYMENT: "<% DEPLOYMENT_NAME %>" From c6ee4b30bfc46484a2734218b1af4729d17a1488 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Tue, 7 Dec 2021 16:34:21 +0800 Subject: [PATCH 19/56] uncomment necessary codes --- code/profiling/how-to-profile-online-endpoint.sh | 16 ++++++++-------- code/profiling/profiling_job_tmpl.yml | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/code/profiling/how-to-profile-online-endpoint.sh b/code/profiling/how-to-profile-online-endpoint.sh index 35da536..a0b86e8 100644 --- a/code/profiling/how-to-profile-online-endpoint.sh +++ b/code/profiling/how-to-profile-online-endpoint.sh @@ -44,14 +44,14 @@ else fi # create role assignment for acessing workspace resources -# compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` -# workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` -# access_token=`az account get-access-token --query accessToken -o tsv` -# compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` -# if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi -# identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` -# az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id -# if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` +workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` +access_token=`az account get-access-token --query accessToken -o tsv` +compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` +if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` +az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id +if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi # # diff --git a/code/profiling/profiling_job_tmpl.yml b/code/profiling/profiling_job_tmpl.yml index aa06a9f..5b83287 100644 --- a/code/profiling/profiling_job_tmpl.yml +++ b/code/profiling/profiling_job_tmpl.yml @@ -3,7 +3,7 @@ command: > profiling.sh -p {inputs.payload} experiment_name: profiling-job environment: - image: docker.io/rachyong/profilers:20211207.28 + image: docker.io/rachyong/profilers:latest environment_variables: ONLINE_ENDPOINT: "<% ENDPOINT_NAME %>" DEPLOYMENT: "<% DEPLOYMENT_NAME %>" From 886bce6088c6d3e33b25932e9606fe0f39448058 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Tue, 18 Jan 2022 11:36:11 +0800 Subject: [PATCH 20/56] make creating of profiler compute a separate step --- .github/workflows/profile.yml | 40 +++++++++++++++---- code/profiling/create-profiling-compute.sh | 29 ++++++++++++++ .../how-to-profile-online-endpoint.sh | 29 +------------- 3 files changed, 63 insertions(+), 35 deletions(-) create mode 100644 code/profiling/create-profiling-compute.sh diff --git a/.github/workflows/profile.yml b/.github/workflows/profile.yml index b4119fa..eabe1b4 100644 --- a/.github/workflows/profile.yml +++ b/.github/workflows/profile.yml @@ -7,15 +7,9 @@ on: required: true default: '["Standard_F2s_v2:1", "Standard_F4s_v2:2"]' jobs: - profiling: + setup: runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - sku_connection_pair: ${{ fromJson(github.event.inputs.SKU_LIST) }} steps: - - name: Check out repo - uses: actions/checkout@v2 - name: Azure login uses: azure/login@v1 with: @@ -29,6 +23,37 @@ jobs: az config set defaults.workspace=${{secrets.AML_WORKSPACE}} az config set defaults.group=${{secrets.RESOURCE_GROUP}} az account set -s ${{secrets.SUBSCRIPTION_ID}} + + create_profiling_compute: + runs-on: ubuntu-latest + outputs: + PROFILER_COMPUTE_NAME: ${{ steps.set_profiler_compute_info.outputs.PROFILER_COMPUTE_NAME }} + steps: + - name: Set profiler compute info + id: set_profiler_compute_info + run: | + echo PROFILER_COMPUTE_NAME=profilingTest >> $GITHUB_ENV + echo PROFILER_COMPUTE_SIZE=Standard_F4s_v2 >> $GITHUB_ENV + Write-Output "::set-output name=PROFILER_COMPUTE_NAME::$PROFILER_COMPUTE_NAME" + - name: Check out repo + uses: actions/checkout@v2 + - name: Create profiling compute + run: bash -x profiling/create-profiling-compute.sh + env: + PROFILER_COMPUTE_NAME: ${{ env.PROFILER_COMPUTE_NAME }} + PROFILER_COMPUTE_SIZE: ${{ env.PROFILER_COMPUTE_SIZE }} + working-directory: code + + profiling: + runs-on: ubuntu-latest + needs: create_profiling_compute + strategy: + fail-fast: false + matrix: + sku_connection_pair: ${{ fromJson(github.event.inputs.SKU_LIST) }} + steps: + - name: Check out repo + uses: actions/checkout@v2 - name: Generate unique online-endpoint name and online-deployment name run: | export ENDPOINT_NAME=endpt-`echo $RANDOM` @@ -47,6 +72,7 @@ jobs: ENDPOINT_NAME: ${{ env.ENDPOINT_NAME }} DEPLOYMENT_NAME: ${{ env.DEPLOYMENT_NAME }} SKU_CONNECTION_PAIR: ${{ matrix.sku_connection_pair }} + PROFILER_COMPUTE_NAME: ${{needs.create_profiling_compute.outputs.PROFILER_COMPUTE_NAME}} working-directory: code diff --git a/code/profiling/create-profiling-compute.sh b/code/profiling/create-profiling-compute.sh new file mode 100644 index 0000000..83d3bb1 --- /dev/null +++ b/code/profiling/create-profiling-compute.sh @@ -0,0 +1,29 @@ +# +export PROFILER_COMPUTE_NAME="${PROFILER_COMPUTE_NAME}" # the compute name for hosting the profiler +export PROFILER_COMPUTE_SIZE="${PROFILER_COMPUTE_SIZE}" # the compute size for hosting the profiler +# + +# +echo "Creating Compute $PROFILER_COMPUTE_NAME ..." +az ml compute create --name $PROFILER_COMPUTE_NAME --size $PROFILER_COMPUTE_SIZE --identity-type SystemAssigned --type amlcompute + +# check compute status +compute_status=`az ml compute show --name $PROFILER_COMPUTE_NAME --query "provisioning_state" -o tsv` +echo $compute_status +if [[ $compute_status == "Succeeded" ]]; then + echo "Compute $PROFILER_COMPUTE_NAME created successfully" +else + echo "Compute $PROFILER_COMPUTE_NAME creation failed" + exit 1 +fi + +# create role assignment for acessing workspace resources +# compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` +# workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` +# access_token=`az account get-access-token --query accessToken -o tsv` +# compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` +# if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +# identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` +# az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id +# if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +# \ No newline at end of file diff --git a/code/profiling/how-to-profile-online-endpoint.sh b/code/profiling/how-to-profile-online-endpoint.sh index a0b86e8..02c692d 100644 --- a/code/profiling/how-to-profile-online-endpoint.sh +++ b/code/profiling/how-to-profile-online-endpoint.sh @@ -16,7 +16,6 @@ export DEPLOYMENT_NAME="${DEPLOYMENT_NAME}" export SKU_CONNECTION_PAIR=${SKU_CONNECTION_PAIR} export PROFILING_TOOL="" # allowed values: wrk, wrk2 and labench export PROFILER_COMPUTE_NAME="" -export PROFILER_COMPUTE_SIZE="" # required only when compute does not exist already export DURATION="" # time for running the profiling tool (duration for each wrk call or labench call), default value is 300s export CONNECTIONS=`echo $SKU_CONNECTION_PAIR | awk -F: '{print $2}'` # for wrk and wrk2 only, no. of connections for the profiling tool, default value is set to be the same as the no. of workers, or 1 if no. of workers is not set export THREAD="" # for wrk and wrk2 only, no. of threads allocated for the profiling tool, default value is 1 @@ -26,33 +25,7 @@ export TIMEOUT="" # for labench only, timeout for each request, default value is # export PROFILING_TOOL=wrk -export PROFILER_COMPUTE_NAME=profilingTest # the compute name for hosting the profiler -export PROFILER_COMPUTE_SIZE=Standard_F4s_v2 # the compute size for hosting the profiler - -# -echo "Creating Compute $PROFILER_COMPUTE_NAME ..." -az ml compute create --name $PROFILER_COMPUTE_NAME --size $PROFILER_COMPUTE_SIZE --identity-type SystemAssigned --type amlcompute - -# check compute status -compute_status=`az ml compute show --name $PROFILER_COMPUTE_NAME --query "provisioning_state" -o tsv` -echo $compute_status -if [[ $compute_status == "Succeeded" ]]; then - echo "Compute $PROFILER_COMPUTE_NAME created successfully" -else - echo "Compute $PROFILER_COMPUTE_NAME creation failed" - exit 1 -fi - -# create role assignment for acessing workspace resources -compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` -workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` -access_token=`az account get-access-token --query accessToken -o tsv` -compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` -if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi -identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` -az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id -if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi -# +export PROFILER_COMPUTE_NAME="${PROFILER_COMPUTE_NAME}" # the compute name for hosting the profiler # default_datastore_info=`az ml datastore show --name workspaceblobstore -o json` From 9d7c1d314f47d5a1e2f81d610117e88bf66d5b88 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Tue, 18 Jan 2022 11:44:04 +0800 Subject: [PATCH 21/56] fix workflow error --- .github/workflows/profile.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/profile.yml b/.github/workflows/profile.yml index eabe1b4..2aa02c2 100644 --- a/.github/workflows/profile.yml +++ b/.github/workflows/profile.yml @@ -26,6 +26,7 @@ jobs: create_profiling_compute: runs-on: ubuntu-latest + needs: setup outputs: PROFILER_COMPUTE_NAME: ${{ steps.set_profiler_compute_info.outputs.PROFILER_COMPUTE_NAME }} steps: @@ -34,7 +35,7 @@ jobs: run: | echo PROFILER_COMPUTE_NAME=profilingTest >> $GITHUB_ENV echo PROFILER_COMPUTE_SIZE=Standard_F4s_v2 >> $GITHUB_ENV - Write-Output "::set-output name=PROFILER_COMPUTE_NAME::$PROFILER_COMPUTE_NAME" + echo "::set-output name=PROFILER_COMPUTE_NAME::$PROFILER_COMPUTE_NAME" - name: Check out repo uses: actions/checkout@v2 - name: Create profiling compute From 7aefc7c89c16bfbd90b1ac448cc392d4c7b7ac37 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Tue, 18 Jan 2022 11:50:55 +0800 Subject: [PATCH 22/56] fix workflow error --- .github/workflows/profile.yml | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/.github/workflows/profile.yml b/.github/workflows/profile.yml index 2aa02c2..16164e3 100644 --- a/.github/workflows/profile.yml +++ b/.github/workflows/profile.yml @@ -7,8 +7,10 @@ on: required: true default: '["Standard_F2s_v2:1", "Standard_F4s_v2:2"]' jobs: - setup: + create_profiling_compute: runs-on: ubuntu-latest + outputs: + PROFILER_COMPUTE_NAME: ${{ steps.set_profiler_compute_info.outputs.PROFILER_COMPUTE_NAME }} steps: - name: Azure login uses: azure/login@v1 @@ -23,13 +25,6 @@ jobs: az config set defaults.workspace=${{secrets.AML_WORKSPACE}} az config set defaults.group=${{secrets.RESOURCE_GROUP}} az account set -s ${{secrets.SUBSCRIPTION_ID}} - - create_profiling_compute: - runs-on: ubuntu-latest - needs: setup - outputs: - PROFILER_COMPUTE_NAME: ${{ steps.set_profiler_compute_info.outputs.PROFILER_COMPUTE_NAME }} - steps: - name: Set profiler compute info id: set_profiler_compute_info run: | @@ -55,6 +50,19 @@ jobs: steps: - name: Check out repo uses: actions/checkout@v2 + - name: Azure login + uses: azure/login@v1 + with: + creds: ${{secrets.AZURE_CREDENTIALS}} + - name: Install dependencies + run: | + az extension add -n ml -y + sudo apt-get update -y && sudo apt-get install jq + - name: Setup az environment + run: | + az config set defaults.workspace=${{secrets.AML_WORKSPACE}} + az config set defaults.group=${{secrets.RESOURCE_GROUP}} + az account set -s ${{secrets.SUBSCRIPTION_ID}} - name: Generate unique online-endpoint name and online-deployment name run: | export ENDPOINT_NAME=endpt-`echo $RANDOM` From 22fca3bfd62c18455e92bf8803245f4dac102551 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Tue, 18 Jan 2022 11:53:41 +0800 Subject: [PATCH 23/56] fix workflow error --- .github/workflows/profile.yml | 4 ++-- code/profiling/create-profiling-compute.sh | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/profile.yml b/.github/workflows/profile.yml index 16164e3..a7fa9f0 100644 --- a/.github/workflows/profile.yml +++ b/.github/workflows/profile.yml @@ -7,7 +7,7 @@ on: required: true default: '["Standard_F2s_v2:1", "Standard_F4s_v2:2"]' jobs: - create_profiling_compute: + create_profiler_compute: runs-on: ubuntu-latest outputs: PROFILER_COMPUTE_NAME: ${{ steps.set_profiler_compute_info.outputs.PROFILER_COMPUTE_NAME }} @@ -42,7 +42,7 @@ jobs: profiling: runs-on: ubuntu-latest - needs: create_profiling_compute + needs: create_profiler_compute strategy: fail-fast: false matrix: diff --git a/code/profiling/create-profiling-compute.sh b/code/profiling/create-profiling-compute.sh index 83d3bb1..b4a8b36 100644 --- a/code/profiling/create-profiling-compute.sh +++ b/code/profiling/create-profiling-compute.sh @@ -18,12 +18,12 @@ else fi # create role assignment for acessing workspace resources -# compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` -# workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` -# access_token=`az account get-access-token --query accessToken -o tsv` -# compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` -# if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi -# identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` -# az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id -# if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` +workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` +access_token=`az account get-access-token --query accessToken -o tsv` +compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` +if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` +az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id +if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi # \ No newline at end of file From 587b71396dad958cae8ff4787e9fecceb3417996 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Tue, 18 Jan 2022 15:10:55 +0800 Subject: [PATCH 24/56] update for testing purpose only --- code/profiling/create-profiling-compute.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/code/profiling/create-profiling-compute.sh b/code/profiling/create-profiling-compute.sh index b4a8b36..83d3bb1 100644 --- a/code/profiling/create-profiling-compute.sh +++ b/code/profiling/create-profiling-compute.sh @@ -18,12 +18,12 @@ else fi # create role assignment for acessing workspace resources -compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` -workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` -access_token=`az account get-access-token --query accessToken -o tsv` -compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` -if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi -identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` -az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id -if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +# compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` +# workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` +# access_token=`az account get-access-token --query accessToken -o tsv` +# compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` +# if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +# identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` +# az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id +# if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi # \ No newline at end of file From 64b82c13abc48cfe035bac78072deb0a7ea76515 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Tue, 18 Jan 2022 15:51:17 +0800 Subject: [PATCH 25/56] fix workflow errors --- code/profiling/how-to-profile-online-endpoint.sh | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/code/profiling/how-to-profile-online-endpoint.sh b/code/profiling/how-to-profile-online-endpoint.sh index 02c692d..017cde9 100644 --- a/code/profiling/how-to-profile-online-endpoint.sh +++ b/code/profiling/how-to-profile-online-endpoint.sh @@ -14,8 +14,8 @@ export ENDPOINT_NAME="${ENDPOINT_NAME}" export DEPLOYMENT_NAME="${DEPLOYMENT_NAME}" export SKU_CONNECTION_PAIR=${SKU_CONNECTION_PAIR} -export PROFILING_TOOL="" # allowed values: wrk, wrk2 and labench -export PROFILER_COMPUTE_NAME="" +export PROFILING_TOOL=wrk # allowed values: wrk, wrk2 and labench +export PROFILER_COMPUTE_NAME="${PROFILER_COMPUTE_NAME}" # the compute name for hosting the profiler export DURATION="" # time for running the profiling tool (duration for each wrk call or labench call), default value is 300s export CONNECTIONS=`echo $SKU_CONNECTION_PAIR | awk -F: '{print $2}'` # for wrk and wrk2 only, no. of connections for the profiling tool, default value is set to be the same as the no. of workers, or 1 if no. of workers is not set export THREAD="" # for wrk and wrk2 only, no. of threads allocated for the profiling tool, default value is 1 @@ -24,9 +24,6 @@ export CLIENTS="" # for labench only, no. of clients for the profiling tool, def export TIMEOUT="" # for labench only, timeout for each request, default value is 10s # -export PROFILING_TOOL=wrk -export PROFILER_COMPUTE_NAME="${PROFILER_COMPUTE_NAME}" # the compute name for hosting the profiler - # default_datastore_info=`az ml datastore show --name workspaceblobstore -o json` account_name=`echo $default_datastore_info | jq '.account_name' | sed "s/\"//g"` From df05a8fff48bff7bf3d69c4fbcf39cf38e1b3a3d Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Tue, 18 Jan 2022 16:18:10 +0800 Subject: [PATCH 26/56] fix workflow errors --- .github/workflows/profile.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/profile.yml b/.github/workflows/profile.yml index a7fa9f0..e2c585a 100644 --- a/.github/workflows/profile.yml +++ b/.github/workflows/profile.yml @@ -81,7 +81,7 @@ jobs: ENDPOINT_NAME: ${{ env.ENDPOINT_NAME }} DEPLOYMENT_NAME: ${{ env.DEPLOYMENT_NAME }} SKU_CONNECTION_PAIR: ${{ matrix.sku_connection_pair }} - PROFILER_COMPUTE_NAME: ${{needs.create_profiling_compute.outputs.PROFILER_COMPUTE_NAME}} + PROFILER_COMPUTE_NAME: ${{ jobs.create_profiler_compute.outputs.PROFILER_COMPUTE_NAME }} working-directory: code From fd66b8c578165537fe464fdbe3769aef22404d8b Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Tue, 18 Jan 2022 16:19:12 +0800 Subject: [PATCH 27/56] fix workflow errors --- .github/workflows/profile.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/profile.yml b/.github/workflows/profile.yml index e2c585a..bb19441 100644 --- a/.github/workflows/profile.yml +++ b/.github/workflows/profile.yml @@ -81,7 +81,7 @@ jobs: ENDPOINT_NAME: ${{ env.ENDPOINT_NAME }} DEPLOYMENT_NAME: ${{ env.DEPLOYMENT_NAME }} SKU_CONNECTION_PAIR: ${{ matrix.sku_connection_pair }} - PROFILER_COMPUTE_NAME: ${{ jobs.create_profiler_compute.outputs.PROFILER_COMPUTE_NAME }} + PROFILER_COMPUTE_NAME: ${{ needs.create_profiler_compute.outputs.PROFILER_COMPUTE_NAME }} working-directory: code From 95fa6f4100bd83136b1384c45d1975f27002ae49 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Tue, 18 Jan 2022 18:07:28 +0800 Subject: [PATCH 28/56] fix workflow errors --- .github/workflows/profile.yml | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/.github/workflows/profile.yml b/.github/workflows/profile.yml index bb19441..3941a63 100644 --- a/.github/workflows/profile.yml +++ b/.github/workflows/profile.yml @@ -68,20 +68,21 @@ jobs: export ENDPOINT_NAME=endpt-`echo $RANDOM` echo ENDPOINT_NAME=$ENDPOINT_NAME >> $GITHUB_ENV echo DEPLOYMENT_NAME=$ENDPOINT_NAME-dep >> $GITHUB_ENV - - name: Create online-endpoint and online-deployment - run: bash -x profiling/create-online-endpoint.sh - env: - ENDPOINT_NAME: ${{ env.ENDPOINT_NAME }} - DEPLOYMENT_NAME: ${{ env.DEPLOYMENT_NAME }} - SKU_CONNECTION_PAIR: ${{ matrix.sku_connection_pair }} - working-directory: code + echo PROFILER_COMPUTE_NAME=${{ needs.create_profiler_compute.outputs.PROFILER_COMPUTE_NAME }} >> $GITHUB_ENV + # - name: Create online-endpoint and online-deployment + # run: bash -x profiling/create-online-endpoint.sh + # env: + # ENDPOINT_NAME: ${{ env.ENDPOINT_NAME }} + # DEPLOYMENT_NAME: ${{ env.DEPLOYMENT_NAME }} + # SKU_CONNECTION_PAIR: ${{ matrix.sku_connection_pair }} + # working-directory: code - name: Run profiling job run: bash -x profiling/how-to-profile-online-endpoint.sh env: ENDPOINT_NAME: ${{ env.ENDPOINT_NAME }} DEPLOYMENT_NAME: ${{ env.DEPLOYMENT_NAME }} SKU_CONNECTION_PAIR: ${{ matrix.sku_connection_pair }} - PROFILER_COMPUTE_NAME: ${{ needs.create_profiler_compute.outputs.PROFILER_COMPUTE_NAME }} + PROFILER_COMPUTE_NAME: ${{ env.PROFILER_COMPUTE_NAME }} working-directory: code From 0dd4e01e559be00db105bf06e6673b83102d5b45 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Tue, 18 Jan 2022 18:14:26 +0800 Subject: [PATCH 29/56] fix workflow errors --- .github/workflows/profile.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/profile.yml b/.github/workflows/profile.yml index 3941a63..afd87a1 100644 --- a/.github/workflows/profile.yml +++ b/.github/workflows/profile.yml @@ -28,7 +28,8 @@ jobs: - name: Set profiler compute info id: set_profiler_compute_info run: | - echo PROFILER_COMPUTE_NAME=profilingTest >> $GITHUB_ENV + export PROFILER_COMPUTE_NAME=profilingTest + echo PROFILER_COMPUTE_NAME=$PROFILER_COMPUTE_NAME >> $GITHUB_ENV echo PROFILER_COMPUTE_SIZE=Standard_F4s_v2 >> $GITHUB_ENV echo "::set-output name=PROFILER_COMPUTE_NAME::$PROFILER_COMPUTE_NAME" - name: Check out repo @@ -68,7 +69,6 @@ jobs: export ENDPOINT_NAME=endpt-`echo $RANDOM` echo ENDPOINT_NAME=$ENDPOINT_NAME >> $GITHUB_ENV echo DEPLOYMENT_NAME=$ENDPOINT_NAME-dep >> $GITHUB_ENV - echo PROFILER_COMPUTE_NAME=${{ needs.create_profiler_compute.outputs.PROFILER_COMPUTE_NAME }} >> $GITHUB_ENV # - name: Create online-endpoint and online-deployment # run: bash -x profiling/create-online-endpoint.sh # env: @@ -82,7 +82,7 @@ jobs: ENDPOINT_NAME: ${{ env.ENDPOINT_NAME }} DEPLOYMENT_NAME: ${{ env.DEPLOYMENT_NAME }} SKU_CONNECTION_PAIR: ${{ matrix.sku_connection_pair }} - PROFILER_COMPUTE_NAME: ${{ env.PROFILER_COMPUTE_NAME }} + PROFILER_COMPUTE_NAME: ${{ needs.create_profiler_compute.outputs.PROFILER_COMPUTE_NAME }} working-directory: code From 2ed3fb851f8b4a89cf401f715b157442853189a5 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Tue, 18 Jan 2022 18:24:02 +0800 Subject: [PATCH 30/56] fix workflow errors --- .github/workflows/profile.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/profile.yml b/.github/workflows/profile.yml index afd87a1..c418b02 100644 --- a/.github/workflows/profile.yml +++ b/.github/workflows/profile.yml @@ -69,13 +69,13 @@ jobs: export ENDPOINT_NAME=endpt-`echo $RANDOM` echo ENDPOINT_NAME=$ENDPOINT_NAME >> $GITHUB_ENV echo DEPLOYMENT_NAME=$ENDPOINT_NAME-dep >> $GITHUB_ENV - # - name: Create online-endpoint and online-deployment - # run: bash -x profiling/create-online-endpoint.sh - # env: - # ENDPOINT_NAME: ${{ env.ENDPOINT_NAME }} - # DEPLOYMENT_NAME: ${{ env.DEPLOYMENT_NAME }} - # SKU_CONNECTION_PAIR: ${{ matrix.sku_connection_pair }} - # working-directory: code + - name: Create online-endpoint and online-deployment + run: bash -x profiling/create-online-endpoint.sh + env: + ENDPOINT_NAME: ${{ env.ENDPOINT_NAME }} + DEPLOYMENT_NAME: ${{ env.DEPLOYMENT_NAME }} + SKU_CONNECTION_PAIR: ${{ matrix.sku_connection_pair }} + working-directory: code - name: Run profiling job run: bash -x profiling/how-to-profile-online-endpoint.sh env: From b72030dc6557153ac325907eeee30de59ef9ab59 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Tue, 18 Jan 2022 22:36:29 +0800 Subject: [PATCH 31/56] uncomment codes --- code/profiling/create-profiling-compute.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/code/profiling/create-profiling-compute.sh b/code/profiling/create-profiling-compute.sh index 83d3bb1..b4a8b36 100644 --- a/code/profiling/create-profiling-compute.sh +++ b/code/profiling/create-profiling-compute.sh @@ -18,12 +18,12 @@ else fi # create role assignment for acessing workspace resources -# compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` -# workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` -# access_token=`az account get-access-token --query accessToken -o tsv` -# compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` -# if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi -# identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` -# az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id -# if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` +workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` +access_token=`az account get-access-token --query accessToken -o tsv` +compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` +if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` +az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id +if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi # \ No newline at end of file From 1b2e3b7146234263b523c9d0ded0ec061392498f Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Mon, 24 Jan 2022 22:45:09 +0800 Subject: [PATCH 32/56] make online-endpoint deletion into separated step --- .github/workflows/profile.yml | 5 +++++ code/profiling/create-profiling-compute.sh | 16 ++++++++-------- code/profiling/delete-online-endpoint.sh | 7 +++++++ code/profiling/how-to-profile-online-endpoint.sh | 6 +----- 4 files changed, 21 insertions(+), 13 deletions(-) create mode 100644 code/profiling/delete-online-endpoint.sh diff --git a/.github/workflows/profile.yml b/.github/workflows/profile.yml index c418b02..c1070c4 100644 --- a/.github/workflows/profile.yml +++ b/.github/workflows/profile.yml @@ -84,6 +84,11 @@ jobs: SKU_CONNECTION_PAIR: ${{ matrix.sku_connection_pair }} PROFILER_COMPUTE_NAME: ${{ needs.create_profiler_compute.outputs.PROFILER_COMPUTE_NAME }} working-directory: code + - name: Delete online-endpoint and online-deployment + run: bash -x profiling/delete-online-endpoint.sh + env: + ENDPOINT_NAME: ${{ env.ENDPOINT_NAME }} + working-directory: code \ No newline at end of file diff --git a/code/profiling/create-profiling-compute.sh b/code/profiling/create-profiling-compute.sh index b4a8b36..83d3bb1 100644 --- a/code/profiling/create-profiling-compute.sh +++ b/code/profiling/create-profiling-compute.sh @@ -18,12 +18,12 @@ else fi # create role assignment for acessing workspace resources -compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` -workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` -access_token=`az account get-access-token --query accessToken -o tsv` -compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` -if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi -identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` -az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id -if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +# compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` +# workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` +# access_token=`az account get-access-token --query accessToken -o tsv` +# compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` +# if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +# identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` +# az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id +# if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi # \ No newline at end of file diff --git a/code/profiling/delete-online-endpoint.sh b/code/profiling/delete-online-endpoint.sh new file mode 100644 index 0000000..c07fa3b --- /dev/null +++ b/code/profiling/delete-online-endpoint.sh @@ -0,0 +1,7 @@ +# +export ENDPOINT_NAME=${ENDPOINT_NAME} +# + +# +az ml online-endpoint delete --name $ENDPOINT_NAME -y +# \ No newline at end of file diff --git a/code/profiling/how-to-profile-online-endpoint.sh b/code/profiling/how-to-profile-online-endpoint.sh index 017cde9..0340b4d 100644 --- a/code/profiling/how-to-profile-online-endpoint.sh +++ b/code/profiling/how-to-profile-online-endpoint.sh @@ -64,8 +64,4 @@ sleep 10 # az ml job download --name $run_id --download-path report_$run_id echo "Job result has been downloaded to dir report_$run_id" -# - -# -az ml online-endpoint delete --name $ENDPOINT_NAME -y -# \ No newline at end of file +# \ No newline at end of file From de6e9fc270560ab8a7b1184a2bdc2d0cc2021ae5 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Tue, 15 Feb 2022 17:51:04 +0800 Subject: [PATCH 33/56] add tags to jobs --- code/profiling/how-to-profile-online-endpoint.sh | 1 + code/profiling/profiling_job_tmpl.yml | 2 ++ 2 files changed, 3 insertions(+) diff --git a/code/profiling/how-to-profile-online-endpoint.sh b/code/profiling/how-to-profile-online-endpoint.sh index 0340b4d..9c707c0 100644 --- a/code/profiling/how-to-profile-online-endpoint.sh +++ b/code/profiling/how-to-profile-online-endpoint.sh @@ -45,6 +45,7 @@ sed \ -e "s/<% TIMEOUT %>/$TIMEOUT/g" \ -e "s/<% THREAD %>/$THREAD/g" \ -e "s/<% COMPUTE_NAME %>/$PROFILER_COMPUTE_NAME/g" \ + -e "s/<% SKU_CONNECTION_PAIR %>/$SKU_CONNECTION_PAIR/g" profiling/profiling_job_tmpl.yml > ${ENDPOINT_NAME}_profiling_job.yml # diff --git a/code/profiling/profiling_job_tmpl.yml b/code/profiling/profiling_job_tmpl.yml index 5b83287..d01d88d 100644 --- a/code/profiling/profiling_job_tmpl.yml +++ b/code/profiling/profiling_job_tmpl.yml @@ -18,3 +18,5 @@ compute: "azureml:<% COMPUTE_NAME %>" inputs: payload: file: azureml://datastores/workspaceblobstore/paths/profiling_payloads/<% ENDPOINT_NAME %>_payload.txt +tags: + sku_conn: "<% SKU_CONNECTION_PAIR %>" From 6b27afd8ccbc25f1a57a12bd421b60ef0d4dae36 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Tue, 15 Feb 2022 17:55:05 +0800 Subject: [PATCH 34/56] add job display name --- code/profiling/profiling_job_tmpl.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/code/profiling/profiling_job_tmpl.yml b/code/profiling/profiling_job_tmpl.yml index d01d88d..a0772e2 100644 --- a/code/profiling/profiling_job_tmpl.yml +++ b/code/profiling/profiling_job_tmpl.yml @@ -2,6 +2,7 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json command: > profiling.sh -p {inputs.payload} experiment_name: profiling-job +display_name: <% SKU_CONNECTION_PAIR %> environment: image: docker.io/rachyong/profilers:latest environment_variables: @@ -17,6 +18,4 @@ environment_variables: compute: "azureml:<% COMPUTE_NAME %>" inputs: payload: - file: azureml://datastores/workspaceblobstore/paths/profiling_payloads/<% ENDPOINT_NAME %>_payload.txt -tags: - sku_conn: "<% SKU_CONNECTION_PAIR %>" + file: azureml://datastores/workspaceblobstore/paths/profiling_payloads/<% ENDPOINT_NAME %>_payload.txt \ No newline at end of file From 95bf0512a774fa37ce20971e42d8e0584bd0563e Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Tue, 15 Feb 2022 22:54:27 +0800 Subject: [PATCH 35/56] fix pipeline --- code/profiling/how-to-profile-online-endpoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/profiling/how-to-profile-online-endpoint.sh b/code/profiling/how-to-profile-online-endpoint.sh index 9c707c0..06e6945 100644 --- a/code/profiling/how-to-profile-online-endpoint.sh +++ b/code/profiling/how-to-profile-online-endpoint.sh @@ -45,7 +45,7 @@ sed \ -e "s/<% TIMEOUT %>/$TIMEOUT/g" \ -e "s/<% THREAD %>/$THREAD/g" \ -e "s/<% COMPUTE_NAME %>/$PROFILER_COMPUTE_NAME/g" \ - -e "s/<% SKU_CONNECTION_PAIR %>/$SKU_CONNECTION_PAIR/g" + -e "s/<% SKU_CONNECTION_PAIR %>/$SKU_CONNECTION_PAIR/g" \ profiling/profiling_job_tmpl.yml > ${ENDPOINT_NAME}_profiling_job.yml # From eee3b9c2cd3494f68a89d7669a04b28f48a9946d Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Thu, 17 Feb 2022 16:32:43 +0800 Subject: [PATCH 36/56] modify profilers image --- code/profiling/profiling_job_tmpl.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/profiling/profiling_job_tmpl.yml b/code/profiling/profiling_job_tmpl.yml index a0772e2..43207b0 100644 --- a/code/profiling/profiling_job_tmpl.yml +++ b/code/profiling/profiling_job_tmpl.yml @@ -4,7 +4,7 @@ command: > experiment_name: profiling-job display_name: <% SKU_CONNECTION_PAIR %> environment: - image: docker.io/rachyong/profilers:latest + image: mcr.microsoft.com/azureml/profilers:latest environment_variables: ONLINE_ENDPOINT: "<% ENDPOINT_NAME %>" DEPLOYMENT: "<% DEPLOYMENT_NAME %>" From 07082d7aeca2fd2ca4e2fd14a5e67bd17f3fd933 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Fri, 25 Feb 2022 16:36:22 +0800 Subject: [PATCH 37/56] update image name --- code/profiling/profiling_job_tmpl.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/profiling/profiling_job_tmpl.yml b/code/profiling/profiling_job_tmpl.yml index 43207b0..bb5fe64 100644 --- a/code/profiling/profiling_job_tmpl.yml +++ b/code/profiling/profiling_job_tmpl.yml @@ -4,7 +4,7 @@ command: > experiment_name: profiling-job display_name: <% SKU_CONNECTION_PAIR %> environment: - image: mcr.microsoft.com/azureml/profilers:latest + image: mcr.microsoft.com/azureml/online-endpoints-model-profiler:latest environment_variables: ONLINE_ENDPOINT: "<% ENDPOINT_NAME %>" DEPLOYMENT: "<% DEPLOYMENT_NAME %>" From d58de8b4866bc8949a8c1af56349d60919eaecba Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Tue, 1 Mar 2022 15:00:50 +0800 Subject: [PATCH 38/56] modify profiling job starting command --- code/profiling/profiling_job_tmpl.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/profiling/profiling_job_tmpl.yml b/code/profiling/profiling_job_tmpl.yml index bb5fe64..b20d511 100644 --- a/code/profiling/profiling_job_tmpl.yml +++ b/code/profiling/profiling_job_tmpl.yml @@ -1,6 +1,6 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json command: > - profiling.sh -p {inputs.payload} + cd /profiling-service && python profile.py ${{inputs.payload}} experiment_name: profiling-job display_name: <% SKU_CONNECTION_PAIR %> environment: From c70b0cea9161118ec0b639b1ee28f5b11b963136 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Tue, 1 Mar 2022 16:18:48 +0800 Subject: [PATCH 39/56] uncomment the part for creating compute role assignments --- code/profiling/create-profiling-compute.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/code/profiling/create-profiling-compute.sh b/code/profiling/create-profiling-compute.sh index 83d3bb1..b4a8b36 100644 --- a/code/profiling/create-profiling-compute.sh +++ b/code/profiling/create-profiling-compute.sh @@ -18,12 +18,12 @@ else fi # create role assignment for acessing workspace resources -# compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` -# workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` -# access_token=`az account get-access-token --query accessToken -o tsv` -# compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` -# if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi -# identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` -# az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id -# if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` +workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` +access_token=`az account get-access-token --query accessToken -o tsv` +compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` +if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` +az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id +if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi # \ No newline at end of file From 6907e5706bd17caf24739eb9cbc7f02bb1aa315a Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Wed, 16 Mar 2022 16:19:05 +0800 Subject: [PATCH 40/56] update yaml schema for new cli --- code/online-endpoint/blue-deployment-tmpl.yml | 5 ++--- code/profiling/profiling_job_tmpl.yml | 3 ++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/code/online-endpoint/blue-deployment-tmpl.yml b/code/online-endpoint/blue-deployment-tmpl.yml index 39c891f..d3c328a 100644 --- a/code/online-endpoint/blue-deployment-tmpl.yml +++ b/code/online-endpoint/blue-deployment-tmpl.yml @@ -2,10 +2,9 @@ $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.sch name: blue endpoint_name: my-endpoint model: - local_path: model-1/model/sklearn_regression_model.pkl + path: model-1/model/sklearn_regression_model.pkl code_configuration: - code: - local_path: model-1/onlinescoring/ + code: model-1/onlinescoring/ scoring_script: score.py environment: conda_file: model-1/environment/conda.yml diff --git a/code/profiling/profiling_job_tmpl.yml b/code/profiling/profiling_job_tmpl.yml index b20d511..3e90c42 100644 --- a/code/profiling/profiling_job_tmpl.yml +++ b/code/profiling/profiling_job_tmpl.yml @@ -18,4 +18,5 @@ environment_variables: compute: "azureml:<% COMPUTE_NAME %>" inputs: payload: - file: azureml://datastores/workspaceblobstore/paths/profiling_payloads/<% ENDPOINT_NAME %>_payload.txt \ No newline at end of file + type: uri_file + path: azureml://datastores/workspaceblobstore/paths/profiling_payloads/<% ENDPOINT_NAME %>_payload.txt \ No newline at end of file From 32a9d8cdd64738ca3d2d34281f1cda2aedb6ff56 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Thu, 14 Apr 2022 21:05:17 +0800 Subject: [PATCH 41/56] modify profiling job template --- code/profiling/profiling_job_tmpl.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/profiling/profiling_job_tmpl.yml b/code/profiling/profiling_job_tmpl.yml index 3e90c42..90a91a9 100644 --- a/code/profiling/profiling_job_tmpl.yml +++ b/code/profiling/profiling_job_tmpl.yml @@ -1,6 +1,6 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json command: > - cd /profiling-service && python profile.py ${{inputs.payload}} + python -m online_endpoints_model_profiler ${{inputs.payload}} experiment_name: profiling-job display_name: <% SKU_CONNECTION_PAIR %> environment: From a57d9ad27e870bf036e2c50b3e2a0ea40f957fba Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Thu, 14 Apr 2022 21:10:28 +0800 Subject: [PATCH 42/56] comment for test purpose --- code/profiling/create-profiling-compute.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/code/profiling/create-profiling-compute.sh b/code/profiling/create-profiling-compute.sh index b4a8b36..83d3bb1 100644 --- a/code/profiling/create-profiling-compute.sh +++ b/code/profiling/create-profiling-compute.sh @@ -18,12 +18,12 @@ else fi # create role assignment for acessing workspace resources -compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` -workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` -access_token=`az account get-access-token --query accessToken -o tsv` -compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` -if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi -identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` -az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id -if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +# compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` +# workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` +# access_token=`az account get-access-token --query accessToken -o tsv` +# compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` +# if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +# identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` +# az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id +# if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi # \ No newline at end of file From 625969b56c1644dcc473060783df3d176083bae8 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Fri, 15 Apr 2022 10:50:45 +0800 Subject: [PATCH 43/56] uncomment code --- code/profiling/create-profiling-compute.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/code/profiling/create-profiling-compute.sh b/code/profiling/create-profiling-compute.sh index 83d3bb1..b4a8b36 100644 --- a/code/profiling/create-profiling-compute.sh +++ b/code/profiling/create-profiling-compute.sh @@ -18,12 +18,12 @@ else fi # create role assignment for acessing workspace resources -# compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` -# workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` -# access_token=`az account get-access-token --query accessToken -o tsv` -# compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` -# if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi -# identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` -# az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id -# if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` +workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` +access_token=`az account get-access-token --query accessToken -o tsv` +compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` +if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` +az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id +if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi # \ No newline at end of file From 7a765a27640e6810897a53b1d3ff9ba794d36e06 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Fri, 15 Apr 2022 11:00:28 +0800 Subject: [PATCH 44/56] fix code error --- code/profiling/create-profiling-compute.sh | 16 ++++++++-------- code/profiling/how-to-profile-online-endpoint.sh | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/code/profiling/create-profiling-compute.sh b/code/profiling/create-profiling-compute.sh index b4a8b36..83d3bb1 100644 --- a/code/profiling/create-profiling-compute.sh +++ b/code/profiling/create-profiling-compute.sh @@ -18,12 +18,12 @@ else fi # create role assignment for acessing workspace resources -compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` -workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` -access_token=`az account get-access-token --query accessToken -o tsv` -compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` -if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi -identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` -az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id -if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +# compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` +# workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` +# access_token=`az account get-access-token --query accessToken -o tsv` +# compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` +# if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +# identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` +# az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id +# if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi # \ No newline at end of file diff --git a/code/profiling/how-to-profile-online-endpoint.sh b/code/profiling/how-to-profile-online-endpoint.sh index 06e6945..2148258 100644 --- a/code/profiling/how-to-profile-online-endpoint.sh +++ b/code/profiling/how-to-profile-online-endpoint.sh @@ -18,7 +18,7 @@ export PROFILING_TOOL=wrk # allowed values: wrk, wrk2 and labench export PROFILER_COMPUTE_NAME="${PROFILER_COMPUTE_NAME}" # the compute name for hosting the profiler export DURATION="" # time for running the profiling tool (duration for each wrk call or labench call), default value is 300s export CONNECTIONS=`echo $SKU_CONNECTION_PAIR | awk -F: '{print $2}'` # for wrk and wrk2 only, no. of connections for the profiling tool, default value is set to be the same as the no. of workers, or 1 if no. of workers is not set -export THREAD="" # for wrk and wrk2 only, no. of threads allocated for the profiling tool, default value is 1 +export THREAD=1 # for wrk and wrk2 only, no. of threads allocated for the profiling tool, default value is 1 export TARGET_RPS="" # for labench and wrk2 only, target rps for the profiling tool, default value is 50 export CLIENTS="" # for labench only, no. of clients for the profiling tool, default value is set to be the same as the no. of workers, or 1 if no. of workers is not set export TIMEOUT="" # for labench only, timeout for each request, default value is 10s From abff912a661cf7e51ffb6ad7e92fecdeb52c499f Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Fri, 15 Apr 2022 11:21:35 +0800 Subject: [PATCH 45/56] fix code error --- code/profiling/how-to-profile-online-endpoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/profiling/how-to-profile-online-endpoint.sh b/code/profiling/how-to-profile-online-endpoint.sh index 2148258..d499933 100644 --- a/code/profiling/how-to-profile-online-endpoint.sh +++ b/code/profiling/how-to-profile-online-endpoint.sh @@ -16,7 +16,7 @@ export DEPLOYMENT_NAME="${DEPLOYMENT_NAME}" export SKU_CONNECTION_PAIR=${SKU_CONNECTION_PAIR} export PROFILING_TOOL=wrk # allowed values: wrk, wrk2 and labench export PROFILER_COMPUTE_NAME="${PROFILER_COMPUTE_NAME}" # the compute name for hosting the profiler -export DURATION="" # time for running the profiling tool (duration for each wrk call or labench call), default value is 300s +export DURATION="300s" # time for running the profiling tool (duration for each wrk call or labench call), default value is 300s export CONNECTIONS=`echo $SKU_CONNECTION_PAIR | awk -F: '{print $2}'` # for wrk and wrk2 only, no. of connections for the profiling tool, default value is set to be the same as the no. of workers, or 1 if no. of workers is not set export THREAD=1 # for wrk and wrk2 only, no. of threads allocated for the profiling tool, default value is 1 export TARGET_RPS="" # for labench and wrk2 only, target rps for the profiling tool, default value is 50 From d504a6a0a6b1e92e2a954df6a2fc5d716a034b6f Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Fri, 15 Apr 2022 11:40:20 +0800 Subject: [PATCH 46/56] add code for testing purpose only --- code/profiling/how-to-profile-online-endpoint.sh | 4 ++-- code/profiling/profiling_job_tmpl.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/code/profiling/how-to-profile-online-endpoint.sh b/code/profiling/how-to-profile-online-endpoint.sh index d499933..06e6945 100644 --- a/code/profiling/how-to-profile-online-endpoint.sh +++ b/code/profiling/how-to-profile-online-endpoint.sh @@ -16,9 +16,9 @@ export DEPLOYMENT_NAME="${DEPLOYMENT_NAME}" export SKU_CONNECTION_PAIR=${SKU_CONNECTION_PAIR} export PROFILING_TOOL=wrk # allowed values: wrk, wrk2 and labench export PROFILER_COMPUTE_NAME="${PROFILER_COMPUTE_NAME}" # the compute name for hosting the profiler -export DURATION="300s" # time for running the profiling tool (duration for each wrk call or labench call), default value is 300s +export DURATION="" # time for running the profiling tool (duration for each wrk call or labench call), default value is 300s export CONNECTIONS=`echo $SKU_CONNECTION_PAIR | awk -F: '{print $2}'` # for wrk and wrk2 only, no. of connections for the profiling tool, default value is set to be the same as the no. of workers, or 1 if no. of workers is not set -export THREAD=1 # for wrk and wrk2 only, no. of threads allocated for the profiling tool, default value is 1 +export THREAD="" # for wrk and wrk2 only, no. of threads allocated for the profiling tool, default value is 1 export TARGET_RPS="" # for labench and wrk2 only, target rps for the profiling tool, default value is 50 export CLIENTS="" # for labench only, no. of clients for the profiling tool, default value is set to be the same as the no. of workers, or 1 if no. of workers is not set export TIMEOUT="" # for labench only, timeout for each request, default value is 10s diff --git a/code/profiling/profiling_job_tmpl.yml b/code/profiling/profiling_job_tmpl.yml index 90a91a9..e67c258 100644 --- a/code/profiling/profiling_job_tmpl.yml +++ b/code/profiling/profiling_job_tmpl.yml @@ -4,7 +4,7 @@ command: > experiment_name: profiling-job display_name: <% SKU_CONNECTION_PAIR %> environment: - image: mcr.microsoft.com/azureml/online-endpoints-model-profiler:latest + image: rachyong/profilers:20220415.1 environment_variables: ONLINE_ENDPOINT: "<% ENDPOINT_NAME %>" DEPLOYMENT: "<% DEPLOYMENT_NAME %>" From 71d48812dbfa4f282def607316164e2f6a31cf99 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Fri, 15 Apr 2022 12:20:31 +0800 Subject: [PATCH 47/56] add code for testing purpose only --- code/profiling/create-profiling-compute.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/profiling/create-profiling-compute.sh b/code/profiling/create-profiling-compute.sh index 83d3bb1..d87731e 100644 --- a/code/profiling/create-profiling-compute.sh +++ b/code/profiling/create-profiling-compute.sh @@ -5,7 +5,7 @@ export PROFILER_COMPUTE_SIZE="${PROFILER_COMPUTE_SIZE}" # the compute size for h # echo "Creating Compute $PROFILER_COMPUTE_NAME ..." -az ml compute create --name $PROFILER_COMPUTE_NAME --size $PROFILER_COMPUTE_SIZE --identity-type SystemAssigned --type amlcompute +az ml compute create --name $PROFILER_COMPUTE_NAME --size $PROFILER_COMPUTE_SIZE --identity-type SystemAssigned --type amlcompute --max-instances 3 # check compute status compute_status=`az ml compute show --name $PROFILER_COMPUTE_NAME --query "provisioning_state" -o tsv` From 71d8853918430cc85e6e65b630cabef984a1ad6a Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Fri, 15 Apr 2022 14:07:02 +0800 Subject: [PATCH 48/56] add code for testing purpose only --- code/profiling/create-profiling-compute.sh | 16 ++++++++-------- code/profiling/profiling_job_tmpl.yml | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/code/profiling/create-profiling-compute.sh b/code/profiling/create-profiling-compute.sh index d87731e..3c35d2e 100644 --- a/code/profiling/create-profiling-compute.sh +++ b/code/profiling/create-profiling-compute.sh @@ -18,12 +18,12 @@ else fi # create role assignment for acessing workspace resources -# compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` -# workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` -# access_token=`az account get-access-token --query accessToken -o tsv` -# compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` -# if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi -# identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` -# az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id -# if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` +workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` +access_token=`az account get-access-token --query accessToken -o tsv` +compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` +if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` +az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id +if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi # \ No newline at end of file diff --git a/code/profiling/profiling_job_tmpl.yml b/code/profiling/profiling_job_tmpl.yml index e67c258..90a91a9 100644 --- a/code/profiling/profiling_job_tmpl.yml +++ b/code/profiling/profiling_job_tmpl.yml @@ -4,7 +4,7 @@ command: > experiment_name: profiling-job display_name: <% SKU_CONNECTION_PAIR %> environment: - image: rachyong/profilers:20220415.1 + image: mcr.microsoft.com/azureml/online-endpoints-model-profiler:latest environment_variables: ONLINE_ENDPOINT: "<% ENDPOINT_NAME %>" DEPLOYMENT: "<% DEPLOYMENT_NAME %>" From 20915efdb39cc820a8110107ff5054a34d709e89 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Fri, 15 Apr 2022 21:59:49 +0800 Subject: [PATCH 49/56] change create-compute script --- code/profiling/create-profiling-compute.sh | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/code/profiling/create-profiling-compute.sh b/code/profiling/create-profiling-compute.sh index 3c35d2e..21c8d90 100644 --- a/code/profiling/create-profiling-compute.sh +++ b/code/profiling/create-profiling-compute.sh @@ -18,12 +18,9 @@ else fi # create role assignment for acessing workspace resources -compute_resource_id=`az ml compute show --name $PROFILER_COMPUTE_NAME --query id -o tsv` -workspace_resource_id=`echo $compute_resource_id | sed 's/\(.*\)\/computes\/.*/\1/'` -access_token=`az account get-access-token --query accessToken -o tsv` -compute_info=`curl https://management.azure.com$compute_resource_id?api-version=2021-03-01-preview -H "Content-Type: application/json" -H "Authorization: Bearer $access_token"` -if [[ $? -ne 0 ]]; then echo "Failed to get info for compute $PROFILER_COMPUTE_NAME" && exit 1; fi -identity_object_id=`echo $compute_info | jq '.identity.principalId' | sed "s/\"//g"` +compute_info=`az ml compute show --name $PROFILER_COMPUTE_NAME --query '{"id": id, "identity_object_id": identity.principal_id}' -o json` +workspace_resource_id=`echo $compute_info | jq -r '.id' | sed 's/\(.*\)\/computes\/.*/\1/'` +identity_object_id=`echo $compute_info | jq -r '.identity_object_id'` az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi # \ No newline at end of file From 6294e696c3c631f121eda7da6e474802b001cb57 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Fri, 15 Apr 2022 22:01:37 +0800 Subject: [PATCH 50/56] modify for testing purpose --- code/profiling/create-profiling-compute.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/code/profiling/create-profiling-compute.sh b/code/profiling/create-profiling-compute.sh index 21c8d90..319cc33 100644 --- a/code/profiling/create-profiling-compute.sh +++ b/code/profiling/create-profiling-compute.sh @@ -18,9 +18,9 @@ else fi # create role assignment for acessing workspace resources -compute_info=`az ml compute show --name $PROFILER_COMPUTE_NAME --query '{"id": id, "identity_object_id": identity.principal_id}' -o json` -workspace_resource_id=`echo $compute_info | jq -r '.id' | sed 's/\(.*\)\/computes\/.*/\1/'` -identity_object_id=`echo $compute_info | jq -r '.identity_object_id'` -az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id -if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +# compute_info=`az ml compute show --name $PROFILER_COMPUTE_NAME --query '{"id": id, "identity_object_id": identity.principal_id}' -o json` +# workspace_resource_id=`echo $compute_info | jq -r '.id' | sed 's/\(.*\)\/computes\/.*/\1/'` +# identity_object_id=`echo $compute_info | jq -r '.identity_object_id'` +# az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id +# if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi # \ No newline at end of file From 34ea3ee3c706f1b4901d7242df76d32f953a6cc7 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Wed, 8 Jun 2022 00:31:37 +0800 Subject: [PATCH 51/56] uncomment code --- code/profiling/create-profiling-compute.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/code/profiling/create-profiling-compute.sh b/code/profiling/create-profiling-compute.sh index 319cc33..21c8d90 100644 --- a/code/profiling/create-profiling-compute.sh +++ b/code/profiling/create-profiling-compute.sh @@ -18,9 +18,9 @@ else fi # create role assignment for acessing workspace resources -# compute_info=`az ml compute show --name $PROFILER_COMPUTE_NAME --query '{"id": id, "identity_object_id": identity.principal_id}' -o json` -# workspace_resource_id=`echo $compute_info | jq -r '.id' | sed 's/\(.*\)\/computes\/.*/\1/'` -# identity_object_id=`echo $compute_info | jq -r '.identity_object_id'` -# az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id -# if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi +compute_info=`az ml compute show --name $PROFILER_COMPUTE_NAME --query '{"id": id, "identity_object_id": identity.principal_id}' -o json` +workspace_resource_id=`echo $compute_info | jq -r '.id' | sed 's/\(.*\)\/computes\/.*/\1/'` +identity_object_id=`echo $compute_info | jq -r '.identity_object_id'` +az role assignment create --role Contributor --assignee-object-id $identity_object_id --scope $workspace_resource_id +if [[ $? -ne 0 ]]; then echo "Failed to create role assignment for compute $PROFILER_COMPUTE_NAME" && exit 1; fi # \ No newline at end of file From 12c78a77e2e63a56e7daf48e9f15178511b7eb9d Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Wed, 8 Jun 2022 00:34:27 +0800 Subject: [PATCH 52/56] update profiling template --- code/profiling/profiling_job_tmpl.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/profiling/profiling_job_tmpl.yml b/code/profiling/profiling_job_tmpl.yml index 90a91a9..3e90c42 100644 --- a/code/profiling/profiling_job_tmpl.yml +++ b/code/profiling/profiling_job_tmpl.yml @@ -1,6 +1,6 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json command: > - python -m online_endpoints_model_profiler ${{inputs.payload}} + cd /profiling-service && python profile.py ${{inputs.payload}} experiment_name: profiling-job display_name: <% SKU_CONNECTION_PAIR %> environment: From 950cc3fc5dd07cb491e3ae7269581b67af96352e Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Thu, 9 Jun 2022 15:09:26 +0800 Subject: [PATCH 53/56] add profiling template change --- code/profiling/profiling_job_tmpl.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/profiling/profiling_job_tmpl.yml b/code/profiling/profiling_job_tmpl.yml index 3e90c42..45eb576 100644 --- a/code/profiling/profiling_job_tmpl.yml +++ b/code/profiling/profiling_job_tmpl.yml @@ -1,6 +1,6 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json command: > - cd /profiling-service && python profile.py ${{inputs.payload}} + python -m online_endpoints_model_profiler --payload_path ${{inputs.payload}} experiment_name: profiling-job display_name: <% SKU_CONNECTION_PAIR %> environment: From 0de197e7ddf7666c272ced5acf1b56e55eca3be5 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Thu, 9 Jun 2022 15:22:14 +0800 Subject: [PATCH 54/56] skip compute creation if compute exists already --- code/profiling/create-profiling-compute.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/code/profiling/create-profiling-compute.sh b/code/profiling/create-profiling-compute.sh index 21c8d90..b3f8c9f 100644 --- a/code/profiling/create-profiling-compute.sh +++ b/code/profiling/create-profiling-compute.sh @@ -4,6 +4,10 @@ export PROFILER_COMPUTE_SIZE="${PROFILER_COMPUTE_SIZE}" # the compute size for h # # +# skip compute creation if compute exists already +az ml compute show --name $PROFILER_COMPUTE_NAME +if [[ $? -eq 0 ]]; then echo "compute $PROFILER_COMPUTE_NAME exists already, will skip creation and role assignment." && exit 0; fi + echo "Creating Compute $PROFILER_COMPUTE_NAME ..." az ml compute create --name $PROFILER_COMPUTE_NAME --size $PROFILER_COMPUTE_SIZE --identity-type SystemAssigned --type amlcompute --max-instances 3 From 3b65e1c65739d9d4dc0ade44ac9334b48147150c Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Thu, 9 Jun 2022 21:35:59 +0800 Subject: [PATCH 55/56] fix deployment files --- code/online-endpoint/blue-deployment-tmpl.yml | 2 +- code/online-endpoint/model-1/environment/conda.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/code/online-endpoint/blue-deployment-tmpl.yml b/code/online-endpoint/blue-deployment-tmpl.yml index d3c328a..f0c627d 100644 --- a/code/online-endpoint/blue-deployment-tmpl.yml +++ b/code/online-endpoint/blue-deployment-tmpl.yml @@ -8,7 +8,7 @@ code_configuration: scoring_script: score.py environment: conda_file: model-1/environment/conda.yml - image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210727.v1 + image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:latest instance_type: <% COMPUTER_SIZE %> instance_count: 1 request_settings: diff --git a/code/online-endpoint/model-1/environment/conda.yml b/code/online-endpoint/model-1/environment/conda.yml index c23bd70..1f12d22 100644 --- a/code/online-endpoint/model-1/environment/conda.yml +++ b/code/online-endpoint/model-1/environment/conda.yml @@ -8,6 +8,6 @@ dependencies: - scikit-learn=0.24.2 - scipy=1.7.1 - pip: - - azureml-defaults==1.33.0 + - azureml-defaults==1.38.0 - inference-schema[numpy-support]==1.3.0 - - joblib==1.0.1 + - joblib==1.0.1 \ No newline at end of file From ae57b40972977ece89a50f2a89a86e0fa3060494 Mon Sep 17 00:00:00 2001 From: Xin Yong Date: Thu, 9 Jun 2022 22:04:23 +0800 Subject: [PATCH 56/56] update online-endpoint and online-deployment file --- code/online-endpoint/blue-deployment-tmpl.yml | 4 ++-- .../model-1/environment/conda.yml | 3 +-- .../model-1/onlinescoring/score.py | 5 +++-- .../onlinescoring/score_managedidentity.py | 3 ++- .../model-2/environment/conda.yml | 3 +-- .../model-2/onlinescoring/score.py | 18 ++++++++++++------ 6 files changed, 21 insertions(+), 15 deletions(-) diff --git a/code/online-endpoint/blue-deployment-tmpl.yml b/code/online-endpoint/blue-deployment-tmpl.yml index f0c627d..44622aa 100644 --- a/code/online-endpoint/blue-deployment-tmpl.yml +++ b/code/online-endpoint/blue-deployment-tmpl.yml @@ -2,13 +2,13 @@ $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.sch name: blue endpoint_name: my-endpoint model: - path: model-1/model/sklearn_regression_model.pkl + path: model-1/model/ code_configuration: code: model-1/onlinescoring/ scoring_script: score.py environment: conda_file: model-1/environment/conda.yml - image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:latest + image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210727.v1 instance_type: <% COMPUTER_SIZE %> instance_count: 1 request_settings: diff --git a/code/online-endpoint/model-1/environment/conda.yml b/code/online-endpoint/model-1/environment/conda.yml index 1f12d22..97f5beb 100644 --- a/code/online-endpoint/model-1/environment/conda.yml +++ b/code/online-endpoint/model-1/environment/conda.yml @@ -9,5 +9,4 @@ dependencies: - scipy=1.7.1 - pip: - azureml-defaults==1.38.0 - - inference-schema[numpy-support]==1.3.0 - - joblib==1.0.1 \ No newline at end of file + - joblib==1.0.1 diff --git a/code/online-endpoint/model-1/onlinescoring/score.py b/code/online-endpoint/model-1/onlinescoring/score.py index ac565f7..5d5c3a7 100644 --- a/code/online-endpoint/model-1/onlinescoring/score.py +++ b/code/online-endpoint/model-1/onlinescoring/score.py @@ -13,8 +13,9 @@ def init(): global model # AZUREML_MODEL_DIR is an environment variable created during deployment. # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION) + # Please provide your model's folder name if there is one model_path = os.path.join( - os.getenv("AZUREML_MODEL_DIR"), "sklearn_regression_model.pkl" + os.getenv("AZUREML_MODEL_DIR"), "model/sklearn_regression_model.pkl" ) # deserialize the model file back into a sklearn model model = joblib.load(model_path) @@ -27,7 +28,7 @@ def run(raw_data): In the example we extract the data from the json input and call the scikit-learn model's predict() method and return the result back """ - logging.info("Request received") + logging.info("model 1: request received") data = json.loads(raw_data)["data"] data = numpy.array(data) result = model.predict(data) diff --git a/code/online-endpoint/model-1/onlinescoring/score_managedidentity.py b/code/online-endpoint/model-1/onlinescoring/score_managedidentity.py index c1704fa..18eea27 100644 --- a/code/online-endpoint/model-1/onlinescoring/score_managedidentity.py +++ b/code/online-endpoint/model-1/onlinescoring/score_managedidentity.py @@ -56,8 +56,9 @@ def init(): # AZUREML_MODEL_DIR is an environment variable created during deployment. # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION) # For multiple models, it points to the folder containing all deployed models (./azureml-models) + # Please provide your model's folder name if there is one model_path = os.path.join( - os.getenv("AZUREML_MODEL_DIR"), "sklearn_regression_model.pkl" + os.getenv("AZUREML_MODEL_DIR"), "model/sklearn_regression_model.pkl" ) # deserialize the model file back into a sklearn model model = joblib.load(model_path) diff --git a/code/online-endpoint/model-2/environment/conda.yml b/code/online-endpoint/model-2/environment/conda.yml index 30ba157..87af935 100644 --- a/code/online-endpoint/model-2/environment/conda.yml +++ b/code/online-endpoint/model-2/environment/conda.yml @@ -8,6 +8,5 @@ dependencies: - scikit-learn=0.24.2 - scipy=1.7.1 - pip: - - azureml-defaults==1.33.0 - - inference-schema[numpy-support]==1.3.0 + - azureml-defaults==1.38.0 - joblib==1.0.1 \ No newline at end of file diff --git a/code/online-endpoint/model-2/onlinescoring/score.py b/code/online-endpoint/model-2/onlinescoring/score.py index ac565f7..e248af3 100644 --- a/code/online-endpoint/model-2/onlinescoring/score.py +++ b/code/online-endpoint/model-2/onlinescoring/score.py @@ -13,8 +13,9 @@ def init(): global model # AZUREML_MODEL_DIR is an environment variable created during deployment. # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION) + # Please provide your model's folder name if there is one model_path = os.path.join( - os.getenv("AZUREML_MODEL_DIR"), "sklearn_regression_model.pkl" + os.getenv("AZUREML_MODEL_DIR"), "model/sklearn_regression_model.pkl" ) # deserialize the model file back into a sklearn model model = joblib.load(model_path) @@ -27,9 +28,14 @@ def run(raw_data): In the example we extract the data from the json input and call the scikit-learn model's predict() method and return the result back """ - logging.info("Request received") - data = json.loads(raw_data)["data"] - data = numpy.array(data) - result = model.predict(data) + logging.info("model 2: request received") + result = [0.5, 0.5] logging.info("Request processed") - return result.tolist() + # return hardcoded result so that it is easy to validate safe rollout scenario: https://docs.microsoft.com/en-us/azure/machine-learning/how-to-safely-rollout-managed-endpoints + return result + + # actual scoring logic for reference: + # data = json.loads(raw_data)["data"] + # data = numpy.array(data) + # result = model.predict(data) + # return result.tolist()