-
-
Notifications
You must be signed in to change notification settings - Fork 311
chore(sc-41159): Add Braintrust evaluation cronjob infrastructure #3076
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,137 @@ | ||
| name: Braintrust Container | ||
|
|
||
| on: | ||
| push: | ||
| branches: | ||
| - master | ||
| paths: | ||
| - 'scripts/scheduled/braintrust_*.py' | ||
| - 'requirements-braintrust.txt' | ||
| - 'build/braintrust/**' | ||
| - '.github/workflows/braintrust-container.yaml' | ||
| tags: | ||
| - 'v*' | ||
| pull_request: | ||
| paths: | ||
| - 'scripts/scheduled/braintrust_*.py' | ||
| - 'requirements-braintrust.txt' | ||
| - 'build/braintrust/**' | ||
| - '.github/workflows/braintrust-container.yaml' | ||
| workflow_dispatch: | ||
|
|
||
| concurrency: | ||
| group: braintrust-${{ github.ref }} | ||
| cancel-in-progress: true | ||
|
|
||
| jobs: | ||
| build-dev: | ||
| if: ${{ github.event_name == 'pull_request' || (github.event_name == 'push' && !startsWith(github.ref, 'refs/tags/')) || github.event_name == 'workflow_dispatch' }} | ||
| name: "Braintrust Image Build (Dev)" | ||
| permissions: | ||
| contents: 'read' | ||
| id-token: 'write' | ||
| runs-on: ubuntu-latest | ||
| steps: | ||
| - uses: actions/checkout@v4 | ||
| - name: Set up QEMU | ||
| uses: docker/setup-qemu-action@v3 | ||
| - name: Set up Docker Buildx | ||
| uses: docker/setup-buildx-action@v3 | ||
| - id: auth | ||
| name: Authenticate to Google Cloud | ||
| uses: google-github-actions/auth@v2 | ||
| with: | ||
| token_format: 'access_token' | ||
| workload_identity_provider: 'projects/${{ secrets.DEV_GKE_PROJECT_ID}}/locations/global/workloadIdentityPools/github/providers/github' | ||
| service_account: '${{ secrets.DEV_GKE_SA }}' | ||
| - name: Login to GAR | ||
| uses: docker/login-action@v3 | ||
| with: | ||
| registry: us-east1-docker.pkg.dev | ||
| username: oauth2accesstoken | ||
| password: '${{ steps.auth.outputs.access_token }}' | ||
| - name: Get branch name | ||
| id: branch-raw | ||
| uses: tj-actions/branch-names@v5.1 | ||
| - name: Format branch name | ||
| id: branch-name | ||
| run: >- | ||
| echo "current_branch="$(echo ${{ steps.branch-raw.outputs.current_branch }} | ||
| | awk '{print tolower($0)}' | ||
| | sed 's|.*/\([^/]*\)/.*|\1|; t; s|.*|\0|' | ||
| | sed 's/[^a-z0-9\.\-]//g') | ||
| >> $GITHUB_OUTPUT | ||
| - name: Get current date | ||
| id: date | ||
| run: echo "date=$(date +'%Y%m%d%H%M')" >> $GITHUB_OUTPUT | ||
| - name: Generate image metadata | ||
| id: meta | ||
| uses: docker/metadata-action@v3 | ||
| with: | ||
| images: | | ||
| us-east1-docker.pkg.dev/${{ secrets.DEV_PROJECT }}/containers/sefaria-braintrust-${{ steps.branch-name.outputs.current_branch }} | ||
| tags: | | ||
| type=ref,event=branch | ||
| type=sha,enable=true,priority=100,prefix=sha-,suffix=-${{ steps.date.outputs.date }},format=short | ||
| type=sha | ||
| flavor: | | ||
| latest=true | ||
| - name: Build and push | ||
| uses: docker/build-push-action@v6 | ||
| with: | ||
| context: . | ||
| push: true | ||
| file: ./build/braintrust/Dockerfile | ||
| tags: ${{ steps.meta.outputs.tags }} | ||
| labels: ${{ steps.meta.outputs.labels }} | ||
|
|
||
| build-prod: | ||
| if: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') }} | ||
| name: "Braintrust Image Build (Prod)" | ||
| permissions: | ||
| contents: 'read' | ||
| id-token: 'write' | ||
| runs-on: ubuntu-latest | ||
| steps: | ||
| - uses: actions/checkout@v4 | ||
| - name: Set up QEMU | ||
| uses: docker/setup-qemu-action@v3 | ||
| - name: Set up Docker Buildx | ||
| uses: docker/setup-buildx-action@v3 | ||
| - id: auth | ||
| name: Authenticate to Google Cloud | ||
| uses: google-github-actions/auth@v2 | ||
| with: | ||
| token_format: 'access_token' | ||
| workload_identity_provider: 'projects/${{ secrets.PROD_GKE_PROJECT_ID}}/locations/global/workloadIdentityPools/github/providers/github' | ||
| service_account: '${{ secrets.PROD_GKE_SA }}' | ||
| - name: Login to GAR | ||
| uses: docker/login-action@v3 | ||
| with: | ||
| registry: us-east1-docker.pkg.dev | ||
| username: oauth2accesstoken | ||
| password: '${{ steps.auth.outputs.access_token }}' | ||
| - name: Get current date | ||
| id: date | ||
| run: echo "date=$(date +'%Y%m%d%H%M')" >> $GITHUB_OUTPUT | ||
| - name: Generate image metadata | ||
| id: meta | ||
| uses: docker/metadata-action@v3 | ||
| with: | ||
| images: | | ||
| us-east1-docker.pkg.dev/${{ secrets.PROD_GKE_PROJECT }}/containers/${{ secrets.IMAGE_NAME }}-braintrust | ||
| tags: | | ||
| type=ref,event=tag | ||
| type=sha,enable=true,priority=100,prefix=sha-,suffix=-${{ steps.date.outputs.date }},format=short | ||
| type=sha | ||
| type=semver,pattern={{raw}} | ||
| flavor: | | ||
| latest=true | ||
| - name: Build and push | ||
| uses: docker/build-push-action@v6 | ||
| with: | ||
| context: . | ||
| push: true | ||
| file: ./build/braintrust/Dockerfile | ||
| tags: ${{ steps.meta.outputs.tags }} | ||
| labels: ${{ steps.meta.outputs.labels }} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| FROM python:3.11-slim | ||
|
|
||
| LABEL org.opencontainers.image.source="https://github.com/Sefaria/Sefaria-Project" | ||
| LABEL org.opencontainers.image.description="Braintrust automation scripts for log backup and dataset tagging" | ||
|
|
||
| COPY requirements-braintrust.txt /tmp/requirements.txt | ||
| RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt | ||
|
|
||
| RUN useradd --create-home --shell /bin/bash --uid 1000 --gid 0 braintrust | ||
|
|
||
| WORKDIR /app | ||
|
|
||
| RUN mkdir -p /app/scripts /app/shared && chown -R braintrust:braintrust /app | ||
|
|
||
| COPY scripts/scheduled/braintrust_backup_logs.py /app/scripts/braintrust_backup_logs.py | ||
| COPY scripts/scheduled/braintrust_tag_and_push.py /app/scripts/braintrust_tag_and_push.py | ||
|
|
||
| USER braintrust | ||
|
|
||
| ENTRYPOINT ["python"] |
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,92 @@ | ||||||||||||
| {{- if and .Values.cronJobs.braintrust.enabled .Values.cronJobs.braintrust.backupLogs.enabled }} | ||||||||||||
| --- | ||||||||||||
| apiVersion: batch/v1 | ||||||||||||
| kind: CronJob | ||||||||||||
| metadata: | ||||||||||||
| name: {{ .Values.deployEnv }}-braintrust-backup-logs | ||||||||||||
| labels: | ||||||||||||
| {{- include "sefaria.labels" . | nindent 4 }} | ||||||||||||
| spec: | ||||||||||||
| schedule: "{{ .Values.cronJobs.braintrust.backupLogs.schedule }}" | ||||||||||||
| concurrencyPolicy: Forbid | ||||||||||||
| jobTemplate: | ||||||||||||
| spec: | ||||||||||||
| backoffLimit: 1 | ||||||||||||
|
Comment on lines
9
to
14
|
||||||||||||
| template: | ||||||||||||
| spec: | ||||||||||||
| serviceAccount: {{ .Values.cronJobs.braintrust.backupLogs.serviceAccount }} | ||||||||||||
| initContainers: | ||||||||||||
| # Init container: Query Braintrust logs and create CSV | ||||||||||||
| - name: braintrust-log-exporter | ||||||||||||
| image: "{{ .Values.cronJobs.braintrust.image.repository }}:{{ .Values.cronJobs.braintrust.image.tag }}" | ||||||||||||
| env: | ||||||||||||
| - name: BRAINTRUST_API_KEY | ||||||||||||
| valueFrom: | ||||||||||||
| secretKeyRef: | ||||||||||||
| name: {{ .Values.secrets.braintrust.ref }} | ||||||||||||
| key: api-key | ||||||||||||
| - name: BRAINTRUST_PROJECT_ID | ||||||||||||
| valueFrom: | ||||||||||||
| secretKeyRef: | ||||||||||||
| name: {{ .Values.secrets.braintrust.ref }} | ||||||||||||
| key: project-id | ||||||||||||
| volumeMounts: | ||||||||||||
| - mountPath: /tmp | ||||||||||||
| name: shared-volume | ||||||||||||
| command: ["python"] | ||||||||||||
| args: ["/app/scripts/braintrust_backup_logs.py"] | ||||||||||||
| resources: | ||||||||||||
| requests: | ||||||||||||
| memory: "256Mi" | ||||||||||||
| cpu: "250m" | ||||||||||||
| limits: | ||||||||||||
| memory: "500Mi" | ||||||||||||
| cpu: "1000m" | ||||||||||||
| containers: | ||||||||||||
| # Main container: Upload CSV to GCS bucket | ||||||||||||
| - name: braintrust-log-uploader | ||||||||||||
| image: google/cloud-sdk | ||||||||||||
| volumeMounts: | ||||||||||||
| - mountPath: /tmp | ||||||||||||
| name: shared-volume | ||||||||||||
| env: | ||||||||||||
| - name: BUCKET | ||||||||||||
| value: {{ .Values.cronJobs.braintrust.backupLogs.bucket }} | ||||||||||||
| - name: PREFIX | ||||||||||||
| value: {{ .Values.cronJobs.braintrust.backupLogs.prefix }} | ||||||||||||
| command: ["bash"] | ||||||||||||
| args: | ||||||||||||
| - "-c" | ||||||||||||
| - | | ||||||||||||
| set -e | ||||||||||||
|
|
||||||||||||
| # Find the most recent CSV file | ||||||||||||
| CSV_FILE=$(ls -t /tmp/logs_backup_*.csv 2>/dev/null | head -1) | ||||||||||||
|
|
||||||||||||
| if [ -z "$CSV_FILE" ]; then | ||||||||||||
| echo "No CSV file found in /tmp" | ||||||||||||
| exit 0 | ||||||||||||
| fi | ||||||||||||
|
|
||||||||||||
| FILENAME=$(basename "$CSV_FILE") | ||||||||||||
| DESTINATION="gs://${BUCKET}/${PREFIX}${FILENAME}" | ||||||||||||
|
|
||||||||||||
| echo "Uploading $CSV_FILE to $DESTINATION" | ||||||||||||
| gsutil cp "$CSV_FILE" "$DESTINATION" | ||||||||||||
| echo "Upload complete" | ||||||||||||
|
|
||||||||||||
| # Cleanup | ||||||||||||
| rm -f "$CSV_FILE" | ||||||||||||
| resources: | ||||||||||||
| requests: | ||||||||||||
| memory: "256Mi" | ||||||||||||
| cpu: "100m" | ||||||||||||
| limits: | ||||||||||||
| memory: "500Mi" | ||||||||||||
|
||||||||||||
| memory: "500Mi" | |
| memory: "500Mi" | |
| requests: | |
| memory: "500Mi" | |
| cpu: "100m" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,64 @@ | ||
| {{- if and .Values.cronJobs.braintrust.enabled .Values.cronJobs.braintrust.tagAndPush.enabled }} | ||
| --- | ||
| apiVersion: batch/v1 | ||
| kind: CronJob | ||
| metadata: | ||
| name: {{ .Values.deployEnv }}-braintrust-tag-and-push | ||
| labels: | ||
| {{- include "sefaria.labels" . | nindent 4 }} | ||
| spec: | ||
| schedule: "{{ .Values.cronJobs.braintrust.tagAndPush.schedule }}" | ||
| concurrencyPolicy: Forbid | ||
| jobTemplate: | ||
| spec: | ||
| backoffLimit: 1 | ||
|
Comment on lines
9
to
14
|
||
| template: | ||
| spec: | ||
| serviceAccount: {{ .Values.cronJobs.braintrust.tagAndPush.serviceAccount }} | ||
| securityContext: | ||
| fsGroup: 1000 | ||
| containers: | ||
| - name: braintrust-tag-and-push | ||
| image: "{{ .Values.cronJobs.braintrust.image.repository }}:{{ .Values.cronJobs.braintrust.image.tag }}" | ||
| env: | ||
| - name: BRAINTRUST_API_KEY | ||
| valueFrom: | ||
|
Comment on lines
+21
to
+25
|
||
| secretKeyRef: | ||
| name: {{ .Values.secrets.braintrust.ref }} | ||
| key: api-key | ||
| - name: BRAINTRUST_PROJECT_ID | ||
| valueFrom: | ||
| secretKeyRef: | ||
| name: {{ .Values.secrets.braintrust.ref }} | ||
| key: project-id | ||
| - name: ANTHROPIC_API_KEY | ||
| valueFrom: | ||
| secretKeyRef: | ||
| name: {{ .Values.secrets.anthropic.ref }} | ||
| key: api-key | ||
| - name: BRAINTRUST_SHARED_STORAGE | ||
| value: "/shared/braintrust" | ||
| volumeMounts: | ||
| - mountPath: /shared/braintrust | ||
| name: shared-storage | ||
| command: ["python"] | ||
| args: ["/app/scripts/braintrust_tag_and_push.py", "all"] | ||
|
Comment on lines
+41
to
+45
|
||
| resources: | ||
| limits: | ||
| memory: "3Gi" | ||
| cpu: "2000m" | ||
| requests: | ||
| memory: "1Gi" | ||
| cpu: "500m" | ||
| restartPolicy: OnFailure | ||
| volumes: | ||
| - name: shared-storage | ||
| {{- if .Values.cronJobs.braintrust.tagAndPush.usePvc }} | ||
| persistentVolumeClaim: | ||
| claimName: {{ .Values.cronJobs.braintrust.tagAndPush.pvcName }} | ||
| {{- else }} | ||
| emptyDir: {} | ||
| {{- end }} | ||
| successfulJobsHistoryLimit: 1 | ||
| failedJobsHistoryLimit: 2 | ||
| {{- end }} | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
PR description says the Braintrust cronjobs are disabled by default, but production-values.yaml enables them (and both sub-jobs) for production. Either keep them disabled here until explicitly enabled, or update the PR description to reflect that production will run them after deploy.