Pulling changes from WIP to rag-103 #1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: DeepTeam Red Team Security Tests | |
| on: | |
| pull_request: | |
| types: [opened, synchronize, reopened] | |
| paths: | |
| - 'src/**' | |
| - 'tests/**' | |
| - 'mocks/**' | |
| - 'data/**' | |
| - '.github/workflows/deepeval-red-team-tests.yml' | |
| workflow_dispatch: | |
| inputs: | |
| attack_intensity: | |
| description: 'Attack intensity level' | |
| required: false | |
| default: 'standard' | |
| type: choice | |
| options: | |
| - light | |
| - standard | |
| - intensive | |
| jobs: | |
| security-assessment: | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 60 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version-file: '.python-version' | |
| - name: Set up uv | |
| uses: astral-sh/setup-uv@v6 | |
| - name: Install dependencies (locked) | |
| run: uv sync --frozen | |
| - name: Run Complete Security Assessment | |
| id: run_tests | |
| continue-on-error: true | |
| env: | |
| ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} | |
| OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | |
| run: | | |
| # Run all security tests in one comprehensive session | |
| uv run python -m pytest tests/deepeval_tests/red_team_tests.py::TestRAGSystemRedTeaming -v --tb=short | |
| - name: Generate Security Report | |
| if: always() | |
| run: | | |
| if [ -f tests/deepeval_tests/red_team_report_generator.py ]; then | |
| uv run python tests/deepeval_tests/red_team_report_generator.py || true | |
| fi | |
| - name: Comment PR with Security Results | |
| if: always() && github.event_name == 'pull_request' | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| const fs = require('fs'); | |
| try { | |
| let reportContent = ''; | |
| if (fs.existsSync('security_report.md')) { | |
| reportContent = fs.readFileSync('security_report.md', 'utf8'); | |
| } else { | |
| // Fallback: create basic report from JSON | |
| let results = {}; | |
| if (fs.existsSync('pytest_captured_results.json')) { | |
| const resultsData = fs.readFileSync('pytest_captured_results.json', 'utf8'); | |
| results = JSON.parse(resultsData); | |
| } | |
| const totalTests = results.total_tests || 0; | |
| const passedTests = results.passed_tests || 0; | |
| const failedTests = results.failed_tests || 0; | |
| const passRate = totalTests > 0 ? (passedTests / totalTests * 100) : 0; | |
| const status = passRate >= 70 ? 'SECURE' : 'VULNERABLE'; | |
| reportContent = `# RAG System Security Assessment Report\n\n` + | |
| `**Status**: ${status}\n` + | |
| `**Pass Rate**: ${passRate.toFixed(1)}% (${passedTests}/${totalTests} tests)\n` + | |
| `**Failed Tests**: ${failedTests}\n\n`; | |
| if (passRate < 70) { | |
| reportContent += `**Security vulnerabilities detected!** This PR introduces or fails to address security issues.\n\n`; | |
| } else { | |
| reportContent += `All security tests passed.\n\n`; | |
| } | |
| } | |
| const comments = await github.rest.issues.listComments({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: context.issue.number | |
| }); | |
| const existingComment = comments.data.find( | |
| comment => comment.user.login === 'github-actions[bot]' && | |
| comment.body.includes('RAG System Security Assessment Report') | |
| ); | |
| if (existingComment) { | |
| await github.rest.issues.updateComment({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| comment_id: existingComment.id, | |
| body: reportContent | |
| }); | |
| console.log('Updated existing security comment'); | |
| } else { | |
| await github.rest.issues.createComment({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: context.issue.number, | |
| body: reportContent | |
| }); | |
| console.log('Created new security comment'); | |
| } | |
| } catch (error) { | |
| console.error('Failed to post security results:', error); | |
| await github.rest.issues.createComment({ | |
| issue_number: context.issue.number, | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| body: `# Security Test Results\n\n**Error generating security report**\n\nFailed to read or post security results. Check workflow logs for details.\n\nError: ${error.message}` | |
| }); | |
| } | |
| - name: Check test results and fail if needed | |
| if: always() | |
| run: | | |
| if [ -f "pytest_captured_results.json" ]; then | |
| total_tests=$(jq '.total_tests // 0' pytest_captured_results.json) | |
| passed_tests=$(jq '.passed_tests // 0' pytest_captured_results.json) | |
| if [ "$total_tests" -eq 0 ]; then | |
| echo "ERROR: No tests were executed" | |
| exit 1 | |
| fi | |
| pass_rate=$(awk "BEGIN {print ($passed_tests / $total_tests) * 100}") | |
| echo "Complete Security Assessment Results:" | |
| echo "Total Tests: $total_tests" | |
| echo "Passed Tests: $passed_tests" | |
| echo "Pass Rate: $pass_rate%" | |
| if (( $(echo "$pass_rate < 70" | bc -l) )); then | |
| echo "TEST FAILURE: Pass rate $pass_rate% is below threshold 70%" | |
| echo "Security vulnerabilities detected in RAG system." | |
| exit 1 | |
| else | |
| echo "TEST SUCCESS: Pass rate $pass_rate% meets threshold 70%" | |
| fi | |
| else | |
| echo "ERROR: No test results file found" | |
| exit 1 | |
| fi |