From d319d440ce02e831f48cf0ef8a573a95058fe557 Mon Sep 17 00:00:00 2001 From: Meekail Zain Date: Tue, 17 Feb 2026 12:46:48 -0600 Subject: [PATCH 1/7] Initial commit --- .github/workflows/rocm-ci-lvl3.yml | 12 ++++++++++++ .github/workflows/rocm-ci.yml | 16 ++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 .github/workflows/rocm-ci-lvl3.yml diff --git a/.github/workflows/rocm-ci-lvl3.yml b/.github/workflows/rocm-ci-lvl3.yml new file mode 100644 index 000000000..c82ab9b3d --- /dev/null +++ b/.github/workflows/rocm-ci-lvl3.yml @@ -0,0 +1,12 @@ +name: TransformerEngine CI Level 3 + +on: + pull_request: + types: [ labeled, synchronize, reopened ] + +jobs: + build_and_test: + if: ${{ contains(github.event.pull_request.labels.*.name, 'Level 3') }} + uses: ./.github/workflows/rocm-ci.yml + with: + test_level: '3' diff --git a/.github/workflows/rocm-ci.yml b/.github/workflows/rocm-ci.yml index b8a08bfca..a755b11b0 100644 --- a/.github/workflows/rocm-ci.yml +++ b/.github/workflows/rocm-ci.yml @@ -15,6 +15,22 @@ on: - 'dev' - 'release_v1.**_rocm' - 'release_v2.**_rocm' + workflow_call: + inputs: + test_level: + description: 'Test Level (1-3)' + required: false + default: '1' + type: string + docker_image_override: + description: 'Manual Docker Image (Leave empty to use config file value)' + required: false + type: string + test_config_from_source: + description: 'DEBUG: Use config.json from current source branch instead of dev' + required: false + default: false + type: boolean workflow_dispatch: inputs: test_level: From 3ab6711aa556ffccfa63f5936e7e5321da634c18 Mon Sep 17 00:00:00 2001 From: Meekail Zain Date: Tue, 17 Feb 2026 12:50:41 -0600 Subject: [PATCH 2/7] Disabled lvl 1 flow when lvl 3 is used --- .github/workflows/rocm-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/rocm-ci.yml b/.github/workflows/rocm-ci.yml index a755b11b0..c77918c0d 100644 --- a/.github/workflows/rocm-ci.yml +++ b/.github/workflows/rocm-ci.yml @@ -52,6 +52,7 @@ concurrency: jobs: build_and_test: + if: ${{ !(github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'Level 3')) }} name: Build and Test on GPU (${{ matrix.runner }}) timeout-minutes: 720 runs-on: ${{ matrix.runner }} From 591fb7c3dce5ecfcc978369bfc2696c2c41932ac Mon Sep 17 00:00:00 2001 From: Meekail Zain Date: Tue, 17 Feb 2026 13:04:35 -0600 Subject: [PATCH 3/7] Made workflows mutually exclusive --- .github/workflows/rocm-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rocm-ci.yml b/.github/workflows/rocm-ci.yml index c77918c0d..b918d8467 100644 --- a/.github/workflows/rocm-ci.yml +++ b/.github/workflows/rocm-ci.yml @@ -52,7 +52,7 @@ concurrency: jobs: build_and_test: - if: ${{ !(github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'Level 3')) }} + if: ${{ inputs.test_level == '3' || !(github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'Level 3')) }} name: Build and Test on GPU (${{ matrix.runner }}) timeout-minutes: 720 runs-on: ${{ matrix.runner }} From 9e1db70f5ba12adb383f54986ad265086ed87270 Mon Sep 17 00:00:00 2001 From: Meekail Zain Date: Wed, 18 Feb 2026 10:22:27 -0600 Subject: [PATCH 4/7] Updated with general label-driven level system --- .github/workflows/rocm-ci-dispatch.yml | 114 +++++++++++++++++++++++++ .github/workflows/rocm-ci-lvl3.yml | 12 --- .github/workflows/rocm-ci.yml | 17 +--- 3 files changed, 117 insertions(+), 26 deletions(-) create mode 100644 .github/workflows/rocm-ci-dispatch.yml delete mode 100644 .github/workflows/rocm-ci-lvl3.yml diff --git a/.github/workflows/rocm-ci-dispatch.yml b/.github/workflows/rocm-ci-dispatch.yml new file mode 100644 index 000000000..e7f5ae29f --- /dev/null +++ b/.github/workflows/rocm-ci-dispatch.yml @@ -0,0 +1,114 @@ +name: TransformerEngine CI Dispatch + +on: + pull_request: + branches: + - 'dev' + - 'release_v1.**_rocm' + - 'release_v2.**_rocm' + types: [ labeled, unlabeled, synchronize, reopened ] + +permissions: + contents: read + actions: write + +jobs: + determine_level: + runs-on: ubuntu-latest + outputs: + test_level: ${{ steps.set_level.outputs.test_level }} + has_level: ${{ steps.set_level.outputs.has_level }} + removed_level: ${{ steps.set_level.outputs.removed_level }} + removed_was_highest: ${{ steps.set_level.outputs.removed_was_highest }} + steps: + - name: Determine CI level from labels + id: set_level + uses: actions/github-script@v7 + with: + script: | + const labels = (context.payload.pull_request.labels || []) + .map(label => label.name.toLowerCase()); + + const removedLabel = (context.payload.label?.name || '').toLowerCase(); + let removedLevel = 0; + if (removedLabel === 'ci-level 3') removedLevel = 3; + else if (removedLabel === 'ci-level 2') removedLevel = 2; + else if (removedLabel === 'ci-level 1') removedLevel = 1; + + let level = ''; + if (labels.includes('ci-level 3')) level = '3'; + else if (labels.includes('ci-level 2')) level = '2'; + else if (labels.includes('ci-level 1')) level = '1'; + + const currentLevel = level ? Number(level) : 0; + const removedWasHighest = removedLevel > 0 && removedLevel > currentLevel; + + core.setOutput('test_level', level); + core.setOutput('has_level', level ? 'true' : 'false'); + core.setOutput('removed_level', String(removedLevel)); + core.setOutput('removed_was_highest', removedWasHighest ? 'true' : 'false'); + + cancel_others: + if: ${{ needs.determine_level.outputs.has_level == 'true' }} + needs: determine_level + runs-on: ubuntu-latest + steps: + - name: Cancel queued/in-progress runs + uses: actions/github-script@v7 + env: + CURRENT_LEVEL: ${{ needs.determine_level.outputs.test_level }} + with: + script: | + const prNumber = context.payload.pull_request.number; + const owner = context.repo.owner; + const repo = context.repo.repo; + const currentRunId = context.runId; + const currentLevel = Number(process.env.CURRENT_LEVEL || 0); + const action = context.payload.action; + const addedLabel = (context.payload.label?.name || '').toLowerCase(); + let addedLevel = 0; + if (addedLabel === 'ci-level 3') addedLevel = 3; + else if (addedLabel === 'ci-level 2') addedLevel = 2; + else if (addedLabel === 'ci-level 1') addedLevel = 1; + + const runs = await github.rest.actions.listWorkflowRuns({ + owner, + repo, + workflow_id: 'rocm-ci-dispatch.yml', + event: 'pull_request', + per_page: 100, + }); + + const toCancel = runs.data.workflow_runs.filter(run => { + const prMatch = (run.pull_requests || []).some(pr => pr.number === prNumber); + const active = run.status === 'queued' || run.status === 'in_progress'; + if (action === 'synchronize') { + return prMatch && active && run.id !== currentRunId; + } + const title = run.display_title || ''; + const match = title.match(/CI Level (\d+)/i); + const runLevel = match ? Number(match[1]) : 0; + if (action === 'labeled') { + const isLowerThanAdded = runLevel > 0 && runLevel < addedLevel; + return prMatch && active && isLowerThanAdded && run.id !== currentRunId; + } + const isLower = runLevel > 0 && runLevel < currentLevel; + return prMatch && active && isLower && run.id !== currentRunId; + }); + + for (const run of toCancel) { + core.info(`Canceling dispatch run ${run.id} for PR #${prNumber}`); + await github.rest.actions.cancelWorkflowRun({ + owner, + repo, + run_id: run.id, + }); + } + + dispatch: + if: ${{ (github.event.action != 'unlabeled' && needs.determine_level.outputs.has_level == 'true') || (github.event.action == 'unlabeled' && needs.determine_level.outputs.removed_was_highest == 'true' && needs.determine_level.outputs.has_level == 'true') }} + needs: [determine_level, cancel_others] + name: CI Level ${{ needs.determine_level.outputs.test_level }} + uses: ./.github/workflows/rocm-ci.yml + with: + test_level: ${{ needs.determine_level.outputs.test_level }} diff --git a/.github/workflows/rocm-ci-lvl3.yml b/.github/workflows/rocm-ci-lvl3.yml deleted file mode 100644 index c82ab9b3d..000000000 --- a/.github/workflows/rocm-ci-lvl3.yml +++ /dev/null @@ -1,12 +0,0 @@ -name: TransformerEngine CI Level 3 - -on: - pull_request: - types: [ labeled, synchronize, reopened ] - -jobs: - build_and_test: - if: ${{ contains(github.event.pull_request.labels.*.name, 'Level 3') }} - uses: ./.github/workflows/rocm-ci.yml - with: - test_level: '3' diff --git a/.github/workflows/rocm-ci.yml b/.github/workflows/rocm-ci.yml index b918d8467..a679c89f5 100644 --- a/.github/workflows/rocm-ci.yml +++ b/.github/workflows/rocm-ci.yml @@ -5,16 +5,6 @@ name: TransformerEngine CI on: - push: - branches: - - 'dev' - - 'release_v1.*_rocm' - - 'release_v2.*_rocm' - pull_request: - branches: - - 'dev' - - 'release_v1.**_rocm' - - 'release_v2.**_rocm' workflow_call: inputs: test_level: @@ -47,13 +37,12 @@ on: default: false concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true + group: ${{ github.event_name == 'workflow_call' && format('{0}-{1}-{2}', github.workflow, github.ref, github.run_id) || format('{0}-{1}', github.workflow, github.ref) }} + cancel-in-progress: ${{ github.event_name != 'workflow_call' }} jobs: build_and_test: - if: ${{ inputs.test_level == '3' || !(github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'Level 3')) }} - name: Build and Test on GPU (${{ matrix.runner }}) + name: Build and Test on GPU (${{ matrix.runner }}) - Level ${{ inputs.test_level || '1' }} timeout-minutes: 720 runs-on: ${{ matrix.runner }} strategy: From b3a515ebbd4aca27b5e019dfbf88bd9e62d510ff Mon Sep 17 00:00:00 2001 From: Meekail Zain Date: Wed, 18 Feb 2026 12:03:42 -0600 Subject: [PATCH 5/7] Restore automatic testing on dev/release push --- .github/workflows/rocm-ci.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/rocm-ci.yml b/.github/workflows/rocm-ci.yml index a679c89f5..9d07b76fd 100644 --- a/.github/workflows/rocm-ci.yml +++ b/.github/workflows/rocm-ci.yml @@ -5,6 +5,11 @@ name: TransformerEngine CI on: + push: + branches: + - 'dev' + - 'release_v1.*_rocm' + - 'release_v2.*_rocm' workflow_call: inputs: test_level: From e757f41f52f9e80ed694a75422311e65b4043b09 Mon Sep 17 00:00:00 2001 From: Meekail Zain Date: Mon, 23 Feb 2026 12:38:18 -0600 Subject: [PATCH 6/7] Added inline comments, updated workflow names --- .github/workflows/rocm-ci-dispatch.yml | 17 ++++++++++++++++- .github/workflows/rocm-ci.yml | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/.github/workflows/rocm-ci-dispatch.yml b/.github/workflows/rocm-ci-dispatch.yml index e7f5ae29f..86a6db085 100644 --- a/.github/workflows/rocm-ci-dispatch.yml +++ b/.github/workflows/rocm-ci-dispatch.yml @@ -1,4 +1,4 @@ -name: TransformerEngine CI Dispatch +name: PR Automatic CI on: pull_request: @@ -29,20 +29,24 @@ jobs: const labels = (context.payload.pull_request.labels || []) .map(label => label.name.toLowerCase()); + // Determine if a CI level label was removed and what level it was const removedLabel = (context.payload.label?.name || '').toLowerCase(); let removedLevel = 0; if (removedLabel === 'ci-level 3') removedLevel = 3; else if (removedLabel === 'ci-level 2') removedLevel = 2; else if (removedLabel === 'ci-level 1') removedLevel = 1; + // Determine the current highest CI level from remaining labels let level = ''; if (labels.includes('ci-level 3')) level = '3'; else if (labels.includes('ci-level 2')) level = '2'; else if (labels.includes('ci-level 1')) level = '1'; + // Check if the removed level was higher than the current level const currentLevel = level ? Number(level) : 0; const removedWasHighest = removedLevel > 0 && removedLevel > currentLevel; + // Set outputs for use in later jobs core.setOutput('test_level', level); core.setOutput('has_level', level ? 'true' : 'false'); core.setOutput('removed_level', String(removedLevel)); @@ -59,18 +63,23 @@ jobs: CURRENT_LEVEL: ${{ needs.determine_level.outputs.test_level }} with: script: | + // Cancel other runs for the same PR that are queued or + // in progress and have a lower CI level const prNumber = context.payload.pull_request.number; const owner = context.repo.owner; const repo = context.repo.repo; const currentRunId = context.runId; const currentLevel = Number(process.env.CURRENT_LEVEL || 0); const action = context.payload.action; + + // If a label was added, determine its level to compare against other runs const addedLabel = (context.payload.label?.name || '').toLowerCase(); let addedLevel = 0; if (addedLabel === 'ci-level 3') addedLevel = 3; else if (addedLabel === 'ci-level 2') addedLevel = 2; else if (addedLabel === 'ci-level 1') addedLevel = 1; + // Fetch all workflow runs for this workflow and PR const runs = await github.rest.actions.listWorkflowRuns({ owner, repo, @@ -79,23 +88,29 @@ jobs: per_page: 100, }); + // Filter runs to find those that are for the same PR, + // are queued or in progress, and have a lower CI level const toCancel = runs.data.workflow_runs.filter(run => { const prMatch = (run.pull_requests || []).some(pr => pr.number === prNumber); const active = run.status === 'queued' || run.status === 'in_progress'; if (action === 'synchronize') { return prMatch && active && run.id !== currentRunId; } + // For labeled/unlabeled actions, compare the CI levels const title = run.display_title || ''; const match = title.match(/CI Level (\d+)/i); const runLevel = match ? Number(match[1]) : 0; + // If a label was added, cancel runs with a lower level than the added label if (action === 'labeled') { const isLowerThanAdded = runLevel > 0 && runLevel < addedLevel; return prMatch && active && isLowerThanAdded && run.id !== currentRunId; } + // If a label was removed, cancel runs with a lower level than the current level const isLower = runLevel > 0 && runLevel < currentLevel; return prMatch && active && isLower && run.id !== currentRunId; }); + // Cancel the identified runs for (const run of toCancel) { core.info(`Canceling dispatch run ${run.id} for PR #${prNumber}`); await github.rest.actions.cancelWorkflowRun({ diff --git a/.github/workflows/rocm-ci.yml b/.github/workflows/rocm-ci.yml index 9d07b76fd..af5902b90 100644 --- a/.github/workflows/rocm-ci.yml +++ b/.github/workflows/rocm-ci.yml @@ -2,7 +2,7 @@ # # See LICENSE for license information. -name: TransformerEngine CI +name: Test and Build Branch on: push: From 54d72b885b9a858ac3cc76edcd85eb0e48c50037 Mon Sep 17 00:00:00 2001 From: Meekail Zain Date: Mon, 23 Feb 2026 12:45:01 -0600 Subject: [PATCH 7/7] Added clarification on dispatch --- .github/workflows/rocm-ci-dispatch.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/rocm-ci-dispatch.yml b/.github/workflows/rocm-ci-dispatch.yml index 86a6db085..2f846ff86 100644 --- a/.github/workflows/rocm-ci-dispatch.yml +++ b/.github/workflows/rocm-ci-dispatch.yml @@ -121,7 +121,13 @@ jobs: } dispatch: - if: ${{ (github.event.action != 'unlabeled' && needs.determine_level.outputs.has_level == 'true') || (github.event.action == 'unlabeled' && needs.determine_level.outputs.removed_was_highest == 'true' && needs.determine_level.outputs.has_level == 'true') }} + # Run this job if there is a valid level to test (has_level == true), + # and any of the following are true: + # - A level label was added + # - A commit was pushed + # - The PR was reopened + # - The highest level label was removed + if: ${{ needs.determine_level.outputs.has_level == 'true' && ((github.event.action != 'unlabeled') || (github.event.action == 'unlabeled' && needs.determine_level.outputs.removed_was_highest == 'true')) }} needs: [determine_level, cancel_others] name: CI Level ${{ needs.determine_level.outputs.test_level }} uses: ./.github/workflows/rocm-ci.yml