Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
bd7ca9a
👷 replace monitor checks by logs query
thomas-lebeau Nov 19, 2025
06a759e
✅ add unit test for deploy-prod-dc script
thomas-lebeau Nov 21, 2025
931ed9e
fixup! 👷 replace monitor checks by logs query
thomas-lebeau Dec 18, 2025
855e546
♻️ refactor check-monitors scripts to query by orgs id and message
thomas-lebeau Dec 19, 2025
54cf67e
🛠️ update error handling in check-monitors script to log missing data…
thomas-lebeau Dec 22, 2025
1f23c10
👷 refactor deployment scripts to remove hardcoded list of DCs and sites
thomas-lebeau Nov 20, 2025
b805742
♻️ replace ddtool with runtime-metadata-service API for datacenter fe…
thomas-lebeau Jan 7, 2026
db6bc46
✨ add test script for datacenter API in CI
thomas-lebeau Jan 7, 2026
6a534b9
🔍 add debug log for fetched datacenters in getAllDatacentersMetadata …
thomas-lebeau Jan 7, 2026
455dad7
fix datacenter response types
thomas-lebeau Jan 7, 2026
9019c76
fix: add selector to RMS call
thomas-lebeau Jan 7, 2026
eba4073
refactor: update deploy-prod-dc script to skip monitor checks for gov…
thomas-lebeau Jan 7, 2026
fca0209
fix: ts errors
thomas-lebeau Jan 7, 2026
38ec673
fix formating
thomas-lebeau Jan 7, 2026
eae7dc1
remove test script
thomas-lebeau Jan 7, 2026
c142ae7
fix: missing root -> gov renaming
thomas-lebeau Jan 7, 2026
f710785
feat: Deploy to private regions
thomas-lebeau Jan 7, 2026
deb6f86
Merge remote-tracking branch 'origin/main' into thomas.lebeau/no-hard…
thomas-lebeau Jan 8, 2026
454dac6
fix merge conflicts
thomas-lebeau Jan 8, 2026
2a815da
fix: tidy up
thomas-lebeau Jan 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 25 additions & 16 deletions .gitlab/deploy-auto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ stages:
- VERSION=$(node -p -e "require('./lerna.json').version")
- yarn
- yarn build:bundle
- node ./scripts/deploy/deploy-prod-dc.ts v${VERSION%%.*} $UPLOAD_PATH --check-telemetry-errors
- node ./scripts/deploy/deploy-prod-dc.ts v${VERSION%%.*} $DATACENTER --check-telemetry-errors

step-1_deploy-prod-minor-dcs:
when: manual
Expand All @@ -26,38 +26,47 @@ step-1_deploy-prod-minor-dcs:
- .base-configuration
- .deploy-prod
variables:
UPLOAD_PATH: minor-dcs
DATACENTER: minor-dcs

step-2_deploy-prod-eu1:
step-2_deploy-prod-private-regions:
needs:
- step-1_deploy-prod-minor-dcs
extends:
- .base-configuration
- .deploy-prod
variables:
UPLOAD_PATH: eu1
DATACENTER: private-regions

step-3_deploy-prod-us1:
step-3_deploy-prod-eu1:
needs:
- step-2_deploy-prod-eu1
- step-2_deploy-prod-private-regions
extends:
- .base-configuration
- .deploy-prod
variables:
UPLOAD_PATH: us1
DATACENTER: eu1

step-4_deploy-prod-gov:
step-4_deploy-prod-us1:
needs:
- step-3_deploy-prod-us1
- step-3_deploy-prod-eu1
extends:
- .base-configuration
- .deploy-prod
variables:
UPLOAD_PATH: root
DATACENTER: us1

step-5_publish-npm:
step-5_deploy-prod-gov:
needs:
- step-4_deploy-prod-gov
- step-4_deploy-prod-us1
extends:
- .base-configuration
- .deploy-prod
variables:
DATACENTER: gov

step-6_publish-npm:
needs:
- step-5_deploy-prod-gov
stage: deploy
extends:
- .base-configuration
Expand All @@ -66,9 +75,9 @@ step-5_publish-npm:
- yarn
- node ./scripts/deploy/publish-npm.ts

step-6_publish-developer-extension:
step-7_publish-developer-extension:
needs:
- step-5_publish-npm
- step-6_publish-npm
stage: deploy
extends:
- .base-configuration
Expand All @@ -77,9 +86,9 @@ step-6_publish-developer-extension:
- yarn
- node ./scripts/deploy/publish-developer-extension.ts

step-7_create-github-release:
step-8_create-github-release:
needs:
- step-6_publish-developer-extension
- step-7_publish-developer-extension
stage: deploy
extends:
- .base-configuration
Expand Down
31 changes: 19 additions & 12 deletions .gitlab/deploy-manual.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,37 +19,44 @@ stages:
- VERSION=$(node -p -e "require('./lerna.json').version")
- yarn
- yarn build:bundle
- node ./scripts/deploy/deploy-prod-dc.ts v${VERSION%%.*} $UPLOAD_PATH --no-check-telemetry-errors
- node ./scripts/deploy/deploy-prod-dc.ts v${VERSION%%.*} $DATACENTER --no-check-telemetry-errors

step-1_deploy-prod-minor-dcs:
extends:
- .base-configuration
- .deploy-prod
variables:
UPLOAD_PATH: minor-dcs
DATACENTER: minor-dcs

step-2_deploy-prod-eu1:
step-2_deploy-prod-private-regions:
extends:
- .base-configuration
- .deploy-prod
variables:
UPLOAD_PATH: eu1
DATACENTER: private-regions

step-3_deploy-prod-us1:
step-3_deploy-prod-eu1:
extends:
- .base-configuration
- .deploy-prod
variables:
UPLOAD_PATH: us1
DATACENTER: eu1

step-4_deploy-prod-gov:
step-4_deploy-prod-us1:
extends:
- .base-configuration
- .deploy-prod
variables:
UPLOAD_PATH: root
DATACENTER: us1

step-5_publish-npm:
step-5_deploy-prod-gov:
extends:
- .base-configuration
- .deploy-prod
variables:
DATACENTER: gov

step-6_publish-npm:
stage: deploy
extends:
- .base-configuration
Expand All @@ -59,7 +66,7 @@ step-5_publish-npm:
- yarn
- node ./scripts/deploy/publish-npm.ts

step-6_publish-developer-extension:
step-7_publish-developer-extension:
stage: deploy
extends:
- .base-configuration
Expand All @@ -69,7 +76,7 @@ step-6_publish-developer-extension:
- yarn
- node ./scripts/deploy/publish-developer-extension.ts

step-7_create-github-release:
step-8_create-github-release:
stage: deploy
extends:
- .base-configuration
Expand All @@ -80,7 +87,7 @@ step-7_create-github-release:
- node scripts/release/create-github-release.ts

# This step is used to deploy the SDK to a new datacenter.
# the `UPLOAD_PATH` variable needs to be provided as an argument when starting the manual job
# the `DATACENTER` variable needs to be provided as an argument when starting the manual job
optional_step-deploy-to-new-datacenter:
extends:
- .base-configuration
Expand Down
41 changes: 26 additions & 15 deletions scripts/deploy/deploy-prod-dc.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,23 @@ import path from 'node:path'
import { beforeEach, before, describe, it, mock, type Mock } from 'node:test'
import { browserSdkVersion } from '../lib/browserSdkVersion.ts'
import type { CommandDetail } from './lib/testHelpers.ts'
import { mockModule, mockCommandImplementation } from './lib/testHelpers.ts'
import { mockModule, mockCommandImplementation, mockFetchHandlingError } from './lib/testHelpers.ts'

const currentBrowserSdkVersionMajor = browserSdkVersion.split('.')[0]

describe('deploy-prod-dc', () => {
const commandMock = mock.fn()
const checkTelemetryErrorsMock: Mock<(datacenters: string[], version: string) => Promise<void>> = mock.fn()
const fetchHandlingErrorMock = mock.fn()

let commands: CommandDetail[]
let checkTelemetryErrorsCalls: Array<{ version: string; datacenters: string[] }>

before(async () => {
mockFetchHandlingError(fetchHandlingErrorMock)
await mockModule(path.resolve(import.meta.dirname, '../lib/command.ts'), { command: commandMock })
await mockModule(path.resolve(import.meta.dirname, '../lib/executionUtils.ts'), {
fetchHandlingError: fetchHandlingErrorMock,
timeout: () => Promise.resolve(),
})
await mockModule(path.resolve(import.meta.dirname, './lib/checkTelemetryErrors.ts'), {
Expand Down Expand Up @@ -62,31 +65,39 @@ describe('deploy-prod-dc', () => {
])
})

it('should only check telemetry errors before deploying if the upload path is root', async () => {
await runScript('./deploy-prod-dc.ts', 'v6', 'root', '--check-telemetry-errors')
it('should deploy all minor datacenters', async () => {
await runScript('./deploy-prod-dc.ts', 'v6', 'minor-dcs', '--no-check-telemetry-errors')

// Should only call checkTelemetryErrors once (no gating for root)
assert.strictEqual(checkTelemetryErrorsCalls.length, 1)
assert.deepEqual(checkTelemetryErrorsCalls[0], {
version: `${currentBrowserSdkVersionMajor}.*`,
datacenters: ['root'],
})
// Should not call checkTelemetryErrors when --no-check-telemetry-errors is used
assert.strictEqual(checkTelemetryErrorsCalls.length, 0)

assert.deepEqual(commands, [
{ command: 'node ./scripts/deploy/deploy.ts prod v6 root' },
{ command: 'node ./scripts/deploy/upload-source-maps.ts v6 root' },
{ command: 'node ./scripts/deploy/deploy.ts prod v6 ap1,ap2,us3,us5' },
{ command: 'node ./scripts/deploy/upload-source-maps.ts v6 ap1,ap2,us3,us5' },
])
})

it('should deploy all minor datacenters', async () => {
await runScript('./deploy-prod-dc.ts', 'v6', 'minor-dcs', '--no-check-telemetry-errors')
it('should deploy all private regions', async () => {
await runScript('./deploy-prod-dc.ts', 'v6', 'private-regions', '--no-check-telemetry-errors')

// Should not call checkTelemetryErrors when --no-check-telemetry-errors is used
assert.strictEqual(checkTelemetryErrorsCalls.length, 0)

assert.deepEqual(commands, [
{ command: 'node ./scripts/deploy/deploy.ts prod v6 us3,us5,ap1,ap2,prtest00' },
{ command: 'node ./scripts/deploy/upload-source-maps.ts v6 us3,us5,ap1,ap2,prtest00' },
{ command: 'node ./scripts/deploy/deploy.ts prod v6 prtest00,prtest01' },
{ command: 'node ./scripts/deploy/upload-source-maps.ts v6 prtest00,prtest01' },
])
})

it('should deploy gov datacenters to the root upload path and skip all telemetry error checks', async () => {
await runScript('./deploy-prod-dc.ts', 'v6', 'gov', '--check-telemetry-errors')

// gov datacenters should not be checked for telemetry errors
assert.strictEqual(checkTelemetryErrorsCalls.length, 0)

assert.deepEqual(commands, [
{ command: 'node ./scripts/deploy/deploy.ts prod v6 root' },
{ command: 'node ./scripts/deploy/upload-source-maps.ts v6 root' },
])
})
})
Expand Down
63 changes: 41 additions & 22 deletions scripts/deploy/deploy-prod-dc.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { parseArgs } from 'node:util'
import { printLog, runMain, timeout } from '../lib/executionUtils.ts'
import { command } from '../lib/command.ts'
import { siteByDatacenter } from '../lib/datacenter.ts'
import { getAllMinorDcs, getAllPrivateDcs } from '../lib/datacenter.ts'
import { browserSdkVersion } from '../lib/browserSdkVersion.ts'
import { checkTelemetryErrors } from './lib/checkTelemetryErrors.ts'

Expand All @@ -14,22 +14,13 @@ const ONE_MINUTE_IN_SECOND = 60
const GATE_DURATION = 30 * ONE_MINUTE_IN_SECOND
const GATE_INTERVAL = ONE_MINUTE_IN_SECOND

// Major DCs are the ones that are deployed last.
// They have their own step jobs in `deploy-manual.yml` and `deploy-auto.yml`.
const MAJOR_DCS = ['root', 'us1', 'eu1']

// Minor DCs are all the DCs from `siteByDatacenter` that are not in `MAJOR_DCS`.
function getAllMinorDcs(): string[] {
return Object.keys(siteByDatacenter).filter((dc) => !MAJOR_DCS.includes(dc))
}

if (!process.env.NODE_TEST_CONTEXT) {
runMain(() => main(...process.argv.slice(2)))
}

export async function main(...args: string[]): Promise<void> {
const {
values: { 'check-telemetry-errors': shouldCheckTelemetryErrors },
values: { 'check-telemetry-errors': checkTelemetryErrorsFlag },
positionals,
} = parseArgs({
args,
Expand All @@ -44,32 +35,60 @@ export async function main(...args: string[]): Promise<void> {
})

const version = positionals[0]
const uploadPath = positionals[1] === 'minor-dcs' ? getAllMinorDcs().join(',') : positionals[1]
const datacenters = await getDatacenters(positionals[1])

if (!uploadPath) {
throw new Error('UPLOAD_PATH argument is required')
if (!datacenters) {
throw new Error('DATACENTER argument is required')
}

// Skip all telemetry error checks for gov datacenter deployments
const shouldCheckTelemetryErrors = checkTelemetryErrorsFlag && !datacenters.every((dc) => dc === 'gov')

if (shouldCheckTelemetryErrors) {
// Make sure system is in a good state before deploying
const currentBrowserSdkVersionMajor = browserSdkVersion.split('.')[0]
await checkTelemetryErrors(uploadPath.split(','), `${currentBrowserSdkVersionMajor}.*`)
await checkTelemetryErrors(datacenters, `${currentBrowserSdkVersionMajor}.*`)
}

command`node ./scripts/deploy/deploy.ts prod ${version} ${uploadPath}`.withLogs().run()
command`node ./scripts/deploy/upload-source-maps.ts ${version} ${uploadPath}`.withLogs().run()
const uploadPathTypes = toDatacenterUploadPathType(datacenters).join(',')

command`node ./scripts/deploy/deploy.ts prod ${version} ${uploadPathTypes}`.withLogs().run()
command`node ./scripts/deploy/upload-source-maps.ts ${version} ${uploadPathTypes}`.withLogs().run()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thought: there is a bit of back-and-forth between datacenters and "upload path types": in deploy-prod-dc, we convert datacenters to path types, and in upload-source-maps we convert path types back to datacenters. Maybe we can improve this in the future.

Also deploy-source-maps.ts is only used here, so we could imagine changing this script to a function. (same for deploy.ts)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these two scripts are also used directly in .gitlab.yml for deploying to staging, canary and next major version.

I agree though this is a bit of a mess, I have attempted to clean it a bit to be a bit less confusing (differentiating datacenter from uploadPath, even though if sometimes they are the same, it's not always the case). There is certainly room for more improvement but I'd rather leave that for later

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these two scripts are also used directly in .gitlab.yml for deploying to staging, canary and next major version.

Oooh my editor grep is skipping dotfiles, damn :D


if (shouldCheckTelemetryErrors && uploadPath !== 'root') {
await gateTelemetryErrors(uploadPath)
if (shouldCheckTelemetryErrors) {
await gateTelemetryErrors(datacenters)
}
}

async function gateTelemetryErrors(uploadPath: string): Promise<void> {
printLog(`Check telemetry errors for ${uploadPath} during ${GATE_DURATION / ONE_MINUTE_IN_SECOND} minutes`)
async function gateTelemetryErrors(datacenters: string[]): Promise<void> {
printLog(`Check telemetry errors for ${datacenters.join(',')} during ${GATE_DURATION / ONE_MINUTE_IN_SECOND} minutes`)
for (let i = 0; i < GATE_DURATION; i += GATE_INTERVAL) {
await checkTelemetryErrors(uploadPath.split(','), browserSdkVersion)
await checkTelemetryErrors(datacenters, browserSdkVersion)
process.stdout.write('.') // progress indicator
await timeout(GATE_INTERVAL * 1000)
}

printLog() // new line
}

async function getDatacenters(datacenterGroup: string): Promise<string[]> {
if (datacenterGroup === 'minor-dcs') {
return await getAllMinorDcs()
}

if (datacenterGroup === 'private-regions') {
return await getAllPrivateDcs()
}

return datacenterGroup.split(',')
}

function toDatacenterUploadPathType(datacenters: string[]): string[] {
return datacenters.map((datacenter) => {
if (datacenter === 'gov') {
return 'root'
}

return datacenter
})
}
15 changes: 15 additions & 0 deletions scripts/deploy/lib/checkTelemetryErrors.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,21 @@ describe('check-telemetry-errors', () => {
fetchHandlingError: fetchHandlingErrorMock,
})

await mockModule(path.resolve(import.meta.dirname, '../../lib/datacenter.ts'), {
getSite: (datacenter: string) => {
const siteByDatacenter: Record<string, string> = {
us1: 'datadoghq.com',
eu1: 'datadoghq.eu',
us3: 'us3.datadoghq.com',
us5: 'us5.datadoghq.com',
ap1: 'ap1.datadoghq.com',
ap2: 'ap2.datadoghq.com',
prtest00: 'prtest00.datad0g.com',
}
return Promise.resolve(siteByDatacenter[datacenter])
},
})

checkTelemetryErrors = (await import('./checkTelemetryErrors.ts')).checkTelemetryErrors
})

Expand Down
4 changes: 2 additions & 2 deletions scripts/deploy/lib/checkTelemetryErrors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
*/
import { printLog, fetchHandlingError } from '../../lib/executionUtils.ts'
import { getTelemetryOrgApiKey, getTelemetryOrgApplicationKey } from '../../lib/secrets.ts'
import { siteByDatacenter } from '../../lib/datacenter.ts'
import { getSite } from '../../lib/datacenter.ts'

const TIME_WINDOW_IN_MINUTES = 5

Expand Down Expand Up @@ -41,7 +41,7 @@ export async function checkTelemetryErrors(datacenters: string[], version: strin
const queries = getQueries(version)

for (const datacenter of datacenters) {
const site = siteByDatacenter[datacenter]
const site = await getSite(datacenter)

if (!site) {
printLog(`No site is configured for datacenter ${datacenter}. skipping...`)
Expand Down
Loading