From 70c4b2190adaf92b31e5696d85ce5858aea7110a Mon Sep 17 00:00:00 2001 From: Cory Bullinger Date: Mon, 12 Jan 2026 09:23:03 -0500 Subject: [PATCH 1/4] Add Kanopy cronjob deployment for GitHub metrics collection - Add Dockerfile to containerize the Node.js application - Add cronjobs.yml with weekly schedule and persistent volume for state tracking - Add .drone.yml CI/CD pipeline for build and deployment - Add check-last-run.js to prevent duplicate runs within 14-day window - Update index.js to integrate smart 14-day check logic - Update README.md with deployment documentation and monitoring instructions The cronjob runs weekly but only collects metrics if 14+ days have passed, ensuring no data gaps while preventing duplicate collection. --- github-metrics/.drone.yml | 95 +++++++++++++++++++++++ github-metrics/Dockerfile | 29 +++++++ github-metrics/README.md | 126 ++++++++++++++++++++++++++++++- github-metrics/check-last-run.js | 99 ++++++++++++++++++++++++ github-metrics/cronjobs.yml | 35 +++++++++ github-metrics/index.js | 27 ++++++- github-metrics/package-lock.json | 1 + 7 files changed, 406 insertions(+), 6 deletions(-) create mode 100644 github-metrics/.drone.yml create mode 100644 github-metrics/Dockerfile create mode 100644 github-metrics/check-last-run.js create mode 100644 github-metrics/cronjobs.yml diff --git a/github-metrics/.drone.yml b/github-metrics/.drone.yml new file mode 100644 index 0000000..fce213e --- /dev/null +++ b/github-metrics/.drone.yml @@ -0,0 +1,95 @@ +--- +kind: pipeline +type: kubernetes +name: github-metrics-test + +trigger: + branch: + - main + event: + - push # Runs when PR is merged to main + - pull_request # Also runs on PRs for early feedback + +steps: + - name: check-changes + image: alpine/git + commands: + - | + # Check if any files in github-metrics/ directory changed + if [ "$DRONE_BUILD_EVENT" = "pull_request" ]; then + # For PRs, compare against target branch + git diff --name-only origin/$DRONE_TARGET_BRANCH...HEAD | grep -q "^github-metrics/" && echo "Changes detected" || (echo "No changes in github-metrics/, skipping" && exit 78) + else + # For pushes, compare against previous commit + git diff --name-only $DRONE_COMMIT_BEFORE $DRONE_COMMIT_AFTER | grep -q "^github-metrics/" && echo "Changes detected" || (echo "No changes in github-metrics/, skipping" && exit 78) + fi + + - name: test + image: node:20-alpine + commands: + - cd github-metrics + - npm ci + - node --version + - npm --version + - echo "Validating package.json and dependencies..." + +--- +depends_on: ['github-metrics-test'] +kind: pipeline +type: kubernetes +name: github-metrics-build + +trigger: + branch: + - main + event: + - push + - tag + +steps: + # Builds and publishes Docker image for production + - name: publish-production + image: plugins/kaniko-ecr + settings: + create_repository: true + registry: 795250896452.dkr.ecr.us-east-1.amazonaws.com + repo: docs/github-metrics + tags: + - git-${DRONE_COMMIT_SHA:0:7} + - latest + access_key: + from_secret: ecr_access_key + secret_key: + from_secret: ecr_secret_key + context: github-metrics + dockerfile: github-metrics/Dockerfile + +--- +depends_on: ['github-metrics-build'] +kind: pipeline +type: kubernetes +name: github-metrics-deploy + +trigger: + branch: + - main + event: + - push + - tag + +steps: + # Deploys cronjob to production using Helm + - name: deploy-production + image: quay.io/mongodb/drone-helm:v3 + settings: + chart: mongodb/cronjobs + chart_version: 1.21.2 + add_repos: [mongodb=https://10gen.github.io/helm-charts] + namespace: docs + release: github-metrics + values: image.tag=git-${DRONE_COMMIT_SHA:0:7},image.repository=795250896452.dkr.ecr.us-east-1.amazonaws.com/docs/github-metrics + values_files: ['github-metrics/cronjobs.yml'] + api_server: https://api.prod.corp.mongodb.com + kubernetes_token: + from_secret: kubernetes_token + diff --git a/github-metrics/Dockerfile b/github-metrics/Dockerfile new file mode 100644 index 0000000..e4bd3f0 --- /dev/null +++ b/github-metrics/Dockerfile @@ -0,0 +1,29 @@ +FROM node:20-alpine + +# Set working directory +WORKDIR /app + +# Copy package files first (for better Docker layer caching) +COPY package.json package-lock.json ./ + +# Install dependencies (use ci for reproducible builds) +RUN npm ci --only=production + +# Copy the rest of the application files +COPY . . + +# Create a non-root user for security best practices +RUN addgroup -g 1001 -S nodejs && \ + adduser -S nodejs -u 1001 && \ + chown -R nodejs:nodejs /app + +# Switch to non-root user +USER nodejs + +# Set NODE_ENV to production +ENV NODE_ENV=production + +# Command to run the application +# This will be executed by the Kubernetes CronJob +CMD ["node", "index.js"] + diff --git a/github-metrics/README.md b/github-metrics/README.md index 3dd81eb..7ec914f 100644 --- a/github-metrics/README.md +++ b/github-metrics/README.md @@ -2,9 +2,9 @@ This directory contains tooling to enable us to track various GitHub project metrics programmatically. -Currently, it contains a PoC for a simple pipeline to pull metrics from GitHub into MongoDB Atlas. +This tool runs as a Kubernetes CronJob on Kanopy, automatically collecting metrics from GitHub every 14 days and storing them in MongoDB Atlas. -Planned future work: +Planned future work: - Add logic to work with pulled maintenance metrics once available in the test repo - Set up Atlas Charts to visualize the data @@ -119,7 +119,7 @@ For this project, as a MongoDB org member, you must also auth your PAT with SSO. npm install ``` -3. **Run the utility** +3. **Manually run the utility** From the root of the directory, run the following command to run the utility: @@ -132,3 +132,123 @@ For this project, as a MongoDB org member, you must also auth your PAT with SSO. ``` A document was inserted into mongodb_docs-notebooks with the _id: 678197a0ffe1539ff213bd86 ``` + +## Automated Deployment (Kanopy CronJob) + +This tool is deployed as a Kubernetes CronJob on Kanopy that runs automatically every 14 days. + +### Deployment Architecture + +The deployment consists of three main components: + +1. **Dockerfile**: Containerizes the Node.js application +2. **cronjobs.yml**: Helm values file that configures the CronJob schedule and resources +3. **.drone.yml**: CI/CD pipeline that builds, publishes, and deploys the application + +### CronJob Schedule + +The cronjob is **scheduled to run weekly on Mondays at 8:00 AM UTC** (`0 8 * * 1`), but the application includes smart logic to prevent running too frequently: + +- The cronjob triggers every Monday +- The application checks if 14 days have passed since the last successful run +- If less than 14 days have passed, the job exits early without collecting metrics +- If 14 days or more have passed, it collects metrics and updates the timestamp + +The last run timestamp is stored in a persistent volume (`/data/last-run.json`) that survives between cronjob executions. + +### Required Kubernetes Secrets + +The cronjob requires two Kubernetes secrets to be created in the `docs` namespace: + +1. **github-token**: Contains the GitHub Personal Access Token + ```bash + kubectl create secret generic github-token \ + --from-literal=GITHUB_TOKEN='your-github-token' \ + -n docs + ``` + +2. **atlas-connection-string**: Contains the MongoDB Atlas connection string + ```bash + kubectl create secret generic atlas-connection-string \ + --from-literal=ATLAS_CONNECTION_STRING='your-connection-string' \ + -n docs + ``` + +> **Note**: These secrets should already exist in the production environment. Contact the DevOps team if you need to create or update them. + +### Deployment Process + +The deployment is fully automated via Drone CI/CD: + +1. **Test Pipeline** (`github-metrics-test`): + - Checks if files in `github-metrics/` directory changed + - Validates dependencies with `npm ci` + - Runs on pull requests and pushes to main + +2. **Build Pipeline** (`github-metrics-build`): + - Builds Docker image using Kaniko + - Publishes to ECR: `795250896452.dkr.ecr.us-east-1.amazonaws.com/docs/github-metrics` + - Tags with git commit SHA and `latest` + +3. **Deploy Pipeline** (`github-metrics-deploy`): + - Deploys to production Kanopy cluster using Helm + - Uses the `mongodb/cronjobs` chart (version 1.21.2) + - Deploys to the `docs` namespace + +### Manual Deployment + +To manually trigger a deployment: + +1. Push changes to the `main` branch +2. Drone will automatically run the test, build, and deploy pipelines + +### Manually Triggering the CronJob + +To manually run the cronjob outside of its schedule: + +```bash +# Find the cronjob +kubectl get cronjobs -n docs + +# Create a one-time job from the cronjob +kubectl create job --from=cronjob/github-metrics-collection \ + github-metrics-manual-$(date +%s) -n docs + +# Check the job status +kubectl get jobs -n docs + +# View logs +kubectl logs -n docs job/github-metrics-manual- +``` + +### Monitoring + +To check the status of the cronjob: + +```bash +# View cronjob details +kubectl get cronjob github-metrics-collection -n docs + +# View recent job runs +kubectl get jobs -n docs | grep github-metrics + +# View logs from the most recent run +kubectl logs -n docs -l job-name= + +# Check the last run timestamp (requires exec into a pod) +kubectl exec -n docs -- cat /data/last-run.json +``` + +The logs will show whether the job ran or was skipped: +- `⏭️ Skipping run - only X days since last run (need 14)` - Job skipped, not enough time passed +- `✅ Proceeding with run - X days since last run` - Job is collecting metrics + +### Configuration Changes + +To modify the cronjob configuration: + +1. **Change schedule**: Edit `cronjobs.yml` and update the `schedule` field +2. **Change resources**: Edit `cronjobs.yml` and update the `resources` section +3. **Change repositories tracked**: Edit `repo-details.json` + +After making changes, commit and push to the `main` branch. Drone will automatically deploy the updates. diff --git a/github-metrics/check-last-run.js b/github-metrics/check-last-run.js new file mode 100644 index 0000000..ca83cd4 --- /dev/null +++ b/github-metrics/check-last-run.js @@ -0,0 +1,99 @@ +import fs from 'fs'; +import path from 'path'; + +// Path to the state file (mounted from persistent volume) +const STATE_FILE_PATH = process.env.STATE_FILE_PATH || '/data/last-run.json'; + +// Minimum days between runs +const MIN_DAYS_BETWEEN_RUNS = parseInt(process.env.MIN_DAYS_BETWEEN_RUNS || '14', 10); + +/** + * Check if enough time has passed since the last run + * @returns {boolean} true if should run, false if should skip + */ +export function shouldRun() { + try { + // Check if state file exists + if (!fs.existsSync(STATE_FILE_PATH)) { + console.log('No previous run found. Running for the first time.'); + return true; + } + + // Read the last run timestamp + const stateData = JSON.parse(fs.readFileSync(STATE_FILE_PATH, 'utf8')); + const lastRunTime = new Date(stateData.lastRun); + const now = new Date(); + + // Calculate days since last run + const daysSinceLastRun = (now - lastRunTime) / (1000 * 60 * 60 * 24); + + console.log(`Last run: ${lastRunTime.toISOString()}`); + console.log(`Days since last run: ${daysSinceLastRun.toFixed(2)}`); + console.log(`Minimum days required: ${MIN_DAYS_BETWEEN_RUNS}`); + + if (daysSinceLastRun < MIN_DAYS_BETWEEN_RUNS) { + console.log(`⏭️ Skipping run - only ${daysSinceLastRun.toFixed(2)} days since last run (need ${MIN_DAYS_BETWEEN_RUNS})`); + return false; + } + + console.log(`✅ Proceeding with run - ${daysSinceLastRun.toFixed(2)} days since last run`); + return true; + + } catch (error) { + console.error('Error checking last run time:', error.message); + console.log('Proceeding with run due to error reading state file'); + return true; // Run if we can't read the state file + } +} + +/** + * Update the state file with the current timestamp + */ +export function updateLastRun() { + try { + const now = new Date(); + const stateData = { + lastRun: now.toISOString(), + timestamp: now.getTime() + }; + + // Ensure the directory exists + const dir = path.dirname(STATE_FILE_PATH); + if (!fs.existsSync(dir)) { + fs.mkdirSync(dir, { recursive: true }); + } + + // Write the state file + fs.writeFileSync(STATE_FILE_PATH, JSON.stringify(stateData, null, 2), 'utf8'); + console.log(`✅ Updated last run timestamp: ${now.toISOString()}`); + + } catch (error) { + console.error('Error updating last run time:', error.message); + // Don't throw - we don't want to fail the job just because we can't write the state file + } +} + +/** + * Get the last run information + * @returns {Object|null} Object with lastRun date and timestamp, or null if no previous run + */ +export function getLastRunInfo() { + try { + if (!fs.existsSync(STATE_FILE_PATH)) { + return null; + } + + const stateData = JSON.parse(fs.readFileSync(STATE_FILE_PATH, 'utf8')); + return { + lastRun: new Date(stateData.lastRun), + timestamp: stateData.timestamp + }; + + } catch (error) { + console.error('Error reading last run info:', error.message); + return null; + } +} + +export { MIN_DAYS_BETWEEN_RUNS }; + diff --git a/github-metrics/cronjobs.yml b/github-metrics/cronjobs.yml new file mode 100644 index 0000000..b0e757a --- /dev/null +++ b/github-metrics/cronjobs.yml @@ -0,0 +1,35 @@ +--- +# `image` can be skipped if the values are being set in your .drone.yml file +image: + repository: 795250896452.dkr.ecr.us-east-1.amazonaws.com/docs/github-metrics + tag: latest + +# global secrets are references to k8s Secrets +globalEnvSecrets: + GITHUB_TOKEN: github-token + ATLAS_CONNECTION_STRING: atlas-connection-string + +cronJobs: +- name: github-metrics-collection + # Run weekly on Mondays at 8am UTC + # The application checks if it ran in the last 14 days and skips if so + # Cron format: minute hour day-of-month month day-of-week + # 0 = Sunday, 1 = Monday, etc. + schedule: "0 8 * * 1" + command: + - node + - index.js + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + # Persistent volume to store last run timestamp + persistence: + enabled: true + storageClass: "standard" + accessMode: ReadWriteOnce + size: 1Gi + mountPath: /data diff --git a/github-metrics/index.js b/github-metrics/index.js index 0616e92..9220960 100644 --- a/github-metrics/index.js +++ b/github-metrics/index.js @@ -2,6 +2,7 @@ import { readFile } from 'fs/promises'; import { getGitHubMetrics } from "./get-github-metrics.js"; import { addMetricsToAtlas } from "./write-to-db.js"; import { RepoDetails } from './RepoDetails.js'; // Import the RepoDetails class +import { shouldRun, updateLastRun } from './check-last-run.js'; /* To change which repos to track metrics for, update the `repo-details.json` file. To track metrics for a new repo, add a new entry with the owner and repo name. @@ -36,13 +37,33 @@ async function processRepos() { } await addMetricsToAtlas(metricsDocs); + + // Update the last run timestamp after successful completion + updateLastRun(); } catch (error) { console.error('Error processing repos:', error); + throw error; // Re-throw to be caught by main handler } } -// Call the function -processRepos().catch(error => { - console.error('Fatal error:', error); +// Main execution +async function main() { + console.log('🚀 GitHub Metrics Collection Starting...'); + + // Check if enough time has passed since last run + if (!shouldRun()) { + console.log('⏭️ Exiting - not enough time has passed since last run'); + process.exit(0); + } + + // Process repos and collect metrics + await processRepos(); + + console.log('✅ GitHub Metrics Collection Complete'); +} + +// Call the main function +main().catch(error => { + console.error('❌ Fatal error:', error); process.exit(1); }); diff --git a/github-metrics/package-lock.json b/github-metrics/package-lock.json index f52d4b0..fc1bfbe 100644 --- a/github-metrics/package-lock.json +++ b/github-metrics/package-lock.json @@ -134,6 +134,7 @@ "resolved": "https://registry.npmjs.org/@octokit/core/-/core-7.0.5.tgz", "integrity": "sha512-t54CUOsFMappY1Jbzb7fetWeO0n6K0k/4+/ZpkS+3Joz8I4VcvY9OiEBFRYISqaI2fq5sCiPtAjRDOzVYG8m+Q==", "license": "MIT", + "peer": true, "dependencies": { "@octokit/auth-token": "^6.0.0", "@octokit/graphql": "^9.0.2", From d8c121cc0c6d642624c7ea30966add9dc3333239 Mon Sep 17 00:00:00 2001 From: Cory Bullinger Date: Mon, 12 Jan 2026 09:29:55 -0500 Subject: [PATCH 2/4] Simplify Drone pipeline to single pipeline with sequential steps - Combine test, build, and deploy into one pipeline - Remove unnecessary pipeline dependencies - Simplify change detection logic - Fix typo: DevOps -> DevDocs team in README --- github-metrics/.drone.yml | 134 ++++++++++++++------------------------ github-metrics/README.md | 2 +- 2 files changed, 49 insertions(+), 87 deletions(-) diff --git a/github-metrics/.drone.yml b/github-metrics/.drone.yml index fce213e..81726dc 100644 --- a/github-metrics/.drone.yml +++ b/github-metrics/.drone.yml @@ -1,95 +1,57 @@ --- kind: pipeline type: kubernetes -name: github-metrics-test +name: github-metrics trigger: branch: - - main + - main event: - - push # Runs when PR is merged to main - - pull_request # Also runs on PRs for early feedback + - push steps: - - name: check-changes - image: alpine/git - commands: - - | - # Check if any files in github-metrics/ directory changed - if [ "$DRONE_BUILD_EVENT" = "pull_request" ]; then - # For PRs, compare against target branch - git diff --name-only origin/$DRONE_TARGET_BRANCH...HEAD | grep -q "^github-metrics/" && echo "Changes detected" || (echo "No changes in github-metrics/, skipping" && exit 78) - else - # For pushes, compare against previous commit - git diff --name-only $DRONE_COMMIT_BEFORE $DRONE_COMMIT_AFTER | grep -q "^github-metrics/" && echo "Changes detected" || (echo "No changes in github-metrics/, skipping" && exit 78) - fi - - - name: test - image: node:20-alpine - commands: - - cd github-metrics - - npm ci - - node --version - - npm --version - - echo "Validating package.json and dependencies..." - ---- -depends_on: ['github-metrics-test'] -kind: pipeline -type: kubernetes -name: github-metrics-build - -trigger: - branch: - - main - event: - - push - - tag - -steps: - # Builds and publishes Docker image for production - - name: publish-production - image: plugins/kaniko-ecr - settings: - create_repository: true - registry: 795250896452.dkr.ecr.us-east-1.amazonaws.com - repo: docs/github-metrics - tags: - - git-${DRONE_COMMIT_SHA:0:7} - - latest - access_key: - from_secret: ecr_access_key - secret_key: - from_secret: ecr_secret_key - context: github-metrics - dockerfile: github-metrics/Dockerfile - ---- -depends_on: ['github-metrics-build'] -kind: pipeline -type: kubernetes -name: github-metrics-deploy - -trigger: - branch: - - main - event: - - push - - tag - -steps: - # Deploys cronjob to production using Helm - - name: deploy-production - image: quay.io/mongodb/drone-helm:v3 - settings: - chart: mongodb/cronjobs - chart_version: 1.21.2 - add_repos: [mongodb=https://10gen.github.io/helm-charts] - namespace: docs - release: github-metrics - values: image.tag=git-${DRONE_COMMIT_SHA:0:7},image.repository=795250896452.dkr.ecr.us-east-1.amazonaws.com/docs/github-metrics - values_files: ['github-metrics/cronjobs.yml'] - api_server: https://api.prod.corp.mongodb.com - kubernetes_token: - from_secret: kubernetes_token - +- name: check-changes + image: alpine/git + commands: + - | + # Check if any files in github-metrics/ directory changed + git diff --name-only $DRONE_COMMIT_BEFORE $DRONE_COMMIT_AFTER | grep -q "^github-metrics/" && echo "Changes detected" || (echo "No changes in github-metrics/, skipping" && exit 78) + +- name: test + image: node:20-alpine + commands: + - cd github-metrics + - npm ci + - node --version + - npm --version + - echo "Validating package.json and dependencies..." + +- name: publish + image: plugins/kaniko-ecr + settings: + create_repository: true + registry: 795250896452.dkr.ecr.us-east-1.amazonaws.com + repo: docs/github-metrics + tags: + - git-${DRONE_COMMIT_SHA:0:7} + - latest + access_key: + from_secret: ecr_access_key + secret_key: + from_secret: ecr_secret_key + context: github-metrics + dockerfile: github-metrics/Dockerfile + +- name: deploy + image: quay.io/mongodb/drone-helm:v3 + settings: + chart: mongodb/cronjobs + chart_version: 1.21.2 + add_repos: [ mongodb=https://10gen.github.io/helm-charts ] + namespace: docs + release: github-metrics + values: image.tag=git-${DRONE_COMMIT_SHA:0:7},image.repository=795250896452.dkr.ecr.us-east-1.amazonaws.com/docs/github-metrics + values_files: [ 'github-metrics/cronjobs.yml' ] + api_server: https://api.prod.corp.mongodb.com + kubernetes_token: + from_secret: kubernetes_token diff --git a/github-metrics/README.md b/github-metrics/README.md index 7ec914f..bcabb48 100644 --- a/github-metrics/README.md +++ b/github-metrics/README.md @@ -174,7 +174,7 @@ The cronjob requires two Kubernetes secrets to be created in the `docs` namespac -n docs ``` -> **Note**: These secrets should already exist in the production environment. Contact the DevOps team if you need to create or update them. +> **Note**: These secrets should already exist in the production environment. Contact the DevDocs team if you need to create or update them. ### Deployment Process From 2c6bd435cdaf6ca4fb35c507fe0e2d125a358cec Mon Sep 17 00:00:00 2001 From: Cory Bullinger Date: Mon, 12 Jan 2026 11:37:12 -0500 Subject: [PATCH 3/4] apply dachary feedback --- github-metrics/.drone.yml | 20 +++++++++++++++++++ github-metrics/README.md | 41 +++++++-------------------------------- 2 files changed, 27 insertions(+), 34 deletions(-) diff --git a/github-metrics/.drone.yml b/github-metrics/.drone.yml index 81726dc..2fe65c2 100644 --- a/github-metrics/.drone.yml +++ b/github-metrics/.drone.yml @@ -55,3 +55,23 @@ steps: api_server: https://api.prod.corp.mongodb.com kubernetes_token: from_secret: kubernetes_token + +- name: notify-slack + image: alpine/curl + environment: + SLACK_WEBHOOK: + from_secret: slack_webhook_url + commands: + - | + if [ "$DRONE_BUILD_STATUS" = "success" ]; then + STATUS_MSG="✅ *GitHub Metrics CronJob Deploy Succeeded*" + else + STATUS_MSG="❌ *GitHub Metrics CronJob Deploy Failed*" + fi + curl -X POST -H 'Content-type: application/json' \ + --data "{\"text\": \"$STATUS_MSG\n*Repo:* $DRONE_REPO_NAME\n*Branch:* $DRONE_BRANCH\n*Commit:* ${DRONE_COMMIT_SHA:0:7}\n*Author:* $DRONE_COMMIT_AUTHOR\n*Build:* <$DRONE_BUILD_LINK|#$DRONE_BUILD_NUMBER>\"}" \ + "$SLACK_WEBHOOK" + when: + status: + - success + - failure diff --git a/github-metrics/README.md b/github-metrics/README.md index bcabb48..7387ab5 100644 --- a/github-metrics/README.md +++ b/github-metrics/README.md @@ -156,44 +156,17 @@ The cronjob is **scheduled to run weekly on Mondays at 8:00 AM UTC** (`0 8 * * 1 The last run timestamp is stored in a persistent volume (`/data/last-run.json`) that survives between cronjob executions. -### Required Kubernetes Secrets - -The cronjob requires two Kubernetes secrets to be created in the `docs` namespace: - -1. **github-token**: Contains the GitHub Personal Access Token - ```bash - kubectl create secret generic github-token \ - --from-literal=GITHUB_TOKEN='your-github-token' \ - -n docs - ``` - -2. **atlas-connection-string**: Contains the MongoDB Atlas connection string - ```bash - kubectl create secret generic atlas-connection-string \ - --from-literal=ATLAS_CONNECTION_STRING='your-connection-string' \ - -n docs - ``` - -> **Note**: These secrets should already exist in the production environment. Contact the DevDocs team if you need to create or update them. - ### Deployment Process -The deployment is fully automated via Drone CI/CD: - -1. **Test Pipeline** (`github-metrics-test`): - - Checks if files in `github-metrics/` directory changed - - Validates dependencies with `npm ci` - - Runs on pull requests and pushes to main +The deployment is fully automated via Drone CI/CD with the following steps: -2. **Build Pipeline** (`github-metrics-build`): - - Builds Docker image using Kaniko - - Publishes to ECR: `795250896452.dkr.ecr.us-east-1.amazonaws.com/docs/github-metrics` - - Tags with git commit SHA and `latest` +1. **Check Changes**: Verifies if files in `github-metrics/` directory changed +2. **Test**: Validates dependencies with `npm ci` +3. **Build**: Builds Docker image using Kaniko and publishes to ECR +4. **Deploy**: Deploys to production Kanopy cluster using Helm +5. **Notify**: Sends Slack notification on success or failure -3. **Deploy Pipeline** (`github-metrics-deploy`): - - Deploys to production Kanopy cluster using Helm - - Uses the `mongodb/cronjobs` chart (version 1.21.2) - - Deploys to the `docs` namespace +The pipeline only runs on pushes to the `main` branch and skips if no github-metrics files changed. ### Manual Deployment From a90dcae750a78d3e39b8834f7b62c2e7e47f228f Mon Sep 17 00:00:00 2001 From: Cory Bullinger Date: Thu, 22 Jan 2026 08:27:08 -0500 Subject: [PATCH 4/4] Apply Dachary and Kyle feedback --- github-metrics/README.md | 29 ++++++++++++++++++-------- github-metrics/check-last-run.js | 35 ++++++++------------------------ github-metrics/index.js | 4 ++-- 3 files changed, 30 insertions(+), 38 deletions(-) diff --git a/github-metrics/README.md b/github-metrics/README.md index 7387ab5..ef65d82 100644 --- a/github-metrics/README.md +++ b/github-metrics/README.md @@ -2,7 +2,7 @@ This directory contains tooling to enable us to track various GitHub project metrics programmatically. -This tool runs as a Kubernetes CronJob on Kanopy, automatically collecting metrics from GitHub every 14 days and storing them in MongoDB Atlas. +This tool runs as a Kubernetes CronJob on Kanopy, automatically collecting metrics from GitHub approximately every 13-14 days and storing them in MongoDB Atlas. Planned future work: @@ -14,7 +14,7 @@ Planned future work: ### Get metrics from GitHub This is a simple PoC that uses [octokit](https://github.com/octokit/octokit.js) to get the following data out of GitHub -for a given repository over a trailing 14 day period: +for a given repository over a trailing 14-day period: - Views - Unique Views @@ -24,7 +24,7 @@ for a given repository over a trailing 14 day period: - Top 10 referral sources - Top 10 paths/destinations in the repo -The intent is to also get the following maintenance-related stats for a given repository over a trailing 14 day period: +The intent is to also get the following maintenance-related stats for a given repository over a trailing 14-day period: - Code frequency - Commit count @@ -135,7 +135,7 @@ For this project, as a MongoDB org member, you must also auth your PAT with SSO. ## Automated Deployment (Kanopy CronJob) -This tool is deployed as a Kubernetes CronJob on Kanopy that runs automatically every 14 days. +This tool is deployed as a Kubernetes CronJob on Kanopy that runs automatically approximately every 13-14 days. ### Deployment Architecture @@ -150,12 +150,23 @@ The deployment consists of three main components: The cronjob is **scheduled to run weekly on Mondays at 8:00 AM UTC** (`0 8 * * 1`), but the application includes smart logic to prevent running too frequently: - The cronjob triggers every Monday -- The application checks if 14 days have passed since the last successful run -- If less than 14 days have passed, the job exits early without collecting metrics -- If 14 days or more have passed, it collects metrics and updates the timestamp +- The application checks if 13 days have passed since the last successful run +- If less than 13 days have passed, the job exits early without collecting metrics +- If 13 days or more have passed, it collects metrics and updates the timestamp The last run timestamp is stored in a persistent volume (`/data/last-run.json`) that survives between cronjob executions. +#### Environment Variables + +The following environment variables can be configured: + +- **`ATLAS_CONNECTION_STRING`** (required): MongoDB Atlas connection string for storing metrics +- **`GITHUB_TOKEN`** (required): GitHub Personal Access Token with `repo` permissions +- **`STATE_FILE_PATH`** (optional): Path to the state file for tracking last run timestamp. Default: `/data/last-run.json` +- **`MIN_DAYS_BETWEEN_RUNS`** (optional): Minimum number of days between metric collection runs. Default: `13` + +The required secrets (`ATLAS_CONNECTION_STRING` and `GITHUB_TOKEN`) are configured in `cronjobs.yml` as Kubernetes secrets. + ### Deployment Process The deployment is fully automated via Drone CI/CD with the following steps: @@ -213,8 +224,8 @@ kubectl exec -n docs -- cat /data/last-run.json ``` The logs will show whether the job ran or was skipped: -- `⏭️ Skipping run - only X days since last run (need 14)` - Job skipped, not enough time passed -- `✅ Proceeding with run - X days since last run` - Job is collecting metrics +- `Skipping run - only X days since last run (need 13)` - Job skipped, not enough time passed +- `Proceeding with run - X days since last run` - Job is collecting metrics ### Configuration Changes diff --git a/github-metrics/check-last-run.js b/github-metrics/check-last-run.js index ca83cd4..104e157 100644 --- a/github-metrics/check-last-run.js +++ b/github-metrics/check-last-run.js @@ -1,11 +1,14 @@ import fs from 'fs'; +import { writeFile, mkdir } from 'fs/promises'; import path from 'path'; // Path to the state file (mounted from persistent volume) +// Can be overridden via STATE_FILE_PATH environment variable const STATE_FILE_PATH = process.env.STATE_FILE_PATH || '/data/last-run.json'; -// Minimum days between runs -const MIN_DAYS_BETWEEN_RUNS = parseInt(process.env.MIN_DAYS_BETWEEN_RUNS || '14', 10); +// Minimum days between runs (13 days to account for timing variations with weekly Monday runs) +// Can be overridden via MIN_DAYS_BETWEEN_RUNS environment variable +const MIN_DAYS_BETWEEN_RUNS = parseInt(process.env.MIN_DAYS_BETWEEN_RUNS || '13', 10); /** * Check if enough time has passed since the last run @@ -49,7 +52,7 @@ export function shouldRun() { /** * Update the state file with the current timestamp */ -export function updateLastRun() { +export async function updateLastRun() { try { const now = new Date(); const stateData = { @@ -60,11 +63,11 @@ export function updateLastRun() { // Ensure the directory exists const dir = path.dirname(STATE_FILE_PATH); if (!fs.existsSync(dir)) { - fs.mkdirSync(dir, { recursive: true }); + await mkdir(dir, { recursive: true }); } // Write the state file - fs.writeFileSync(STATE_FILE_PATH, JSON.stringify(stateData, null, 2), 'utf8'); + await writeFile(STATE_FILE_PATH, JSON.stringify(stateData, null, 2), 'utf8'); console.log(`✅ Updated last run timestamp: ${now.toISOString()}`); } catch (error) { @@ -73,27 +76,5 @@ export function updateLastRun() { } } -/** - * Get the last run information - * @returns {Object|null} Object with lastRun date and timestamp, or null if no previous run - */ -export function getLastRunInfo() { - try { - if (!fs.existsSync(STATE_FILE_PATH)) { - return null; - } - - const stateData = JSON.parse(fs.readFileSync(STATE_FILE_PATH, 'utf8')); - return { - lastRun: new Date(stateData.lastRun), - timestamp: stateData.timestamp - }; - - } catch (error) { - console.error('Error reading last run info:', error.message); - return null; - } -} - export { MIN_DAYS_BETWEEN_RUNS }; diff --git a/github-metrics/index.js b/github-metrics/index.js index 9220960..0f6df2b 100644 --- a/github-metrics/index.js +++ b/github-metrics/index.js @@ -39,7 +39,7 @@ async function processRepos() { await addMetricsToAtlas(metricsDocs); // Update the last run timestamp after successful completion - updateLastRun(); + await updateLastRun(); } catch (error) { console.error('Error processing repos:', error); throw error; // Re-throw to be caught by main handler @@ -52,7 +52,7 @@ async function main() { // Check if enough time has passed since last run if (!shouldRun()) { - console.log('⏭️ Exiting - not enough time has passed since last run'); + console.log('Exiting - not enough time has passed since last run'); process.exit(0); }