rootcodelabs · nuwangeek · Oct 21, 2025 · Oct 16, 2025 · Oct 16, 2025 · Oct 17, 2025
diff --git a/.github/workflows/deepeval-tests.yml b/.github/workflows/deepeval-tests.yml
@@ -0,0 +1,123 @@
+name: DeepEval RAG System Tests
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened]
+    paths:
+      - 'src/**'
+      - 'tests/**'
+      - '.github/workflows/deepeval-tests.yml'
+
+jobs:
+  deepeval-tests:
+    runs-on: ubuntu-latest
+    timeout-minutes: 40
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version-file: '.python-version'
+
+      - name: Set up uv
+        uses: astral-sh/setup-uv@v6
+
+      - name: Install dependencies (locked)
+        run: uv sync --frozen
+
+      - name: Run DeepEval tests
+        id: run_tests
+        env:
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        run: uv run python -m pytest tests/deepeval_tests/standard_tests.py -v --tb=short
+
+      - name: Generate evaluation report
+        if: always()
+        run: python tests/deepeval_tests/report_generator.py
+
+      - name: Comment PR with test results
+        if: always() && github.event_name == 'pull_request'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+
+            try {
+              const reportContent = fs.readFileSync('test_report.md', 'utf8');
+
+              const comments = await github.rest.issues.listComments({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number
+              });
+
+              const existingComment = comments.data.find(
+                comment => comment.user.login === 'github-actions[bot]' &&
+                comment.body.includes('RAG System Evaluation Report')
+              );
+
+              if (existingComment) {
+                await github.rest.issues.updateComment({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  comment_id: existingComment.id,
+                  body: reportContent
+                });
+              } else {
+                await github.rest.issues.createComment({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: context.issue.number,
+                  body: reportContent
+                });
+              }
+
+            } catch (error) {
+              console.error('Failed to post test results:', error);
+
+              await github.rest.issues.createComment({
+                issue_number: context.issue.number,
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                body: `## RAG System Evaluation Report\n\n**Error generating test report**\n\nFailed to read or post test results. Check workflow logs for details.\n\nError: ${error.message}`
+              });
+            }
+
+      - name: Check test results and fail if needed
+        if: always()
+        run: |
+           # Check if pytest ran (look at step output)
+           if [ "${{ steps.run_tests.outcome }}" == "failure" ]; then
+            echo "Tests ran but failed - this is expected if RAG performance is below threshold"
+           fi  
+           if [ -f "pytest_captured_results.json" ]; then
+            total_tests=$(jq '.total_tests // 0' pytest_captured_results.json)
+            passed_tests=$(jq '.passed_tests // 0' pytest_captured_results.json)
+
+            if [ "$total_tests" -eq 0 ]; then
+              echo "ERROR: No tests were executed"
+              exit 1
+            fi
+
+            pass_rate=$(awk "BEGIN {print ($passed_tests / $total_tests) * 100}")
+
+            echo "DeepEval Test Results:"
+            echo "Total Tests: $total_tests"
+            echo "Passed Tests: $passed_tests"
+            echo "Pass Rate: $pass_rate%"
+
+            if (( $(echo "$pass_rate < 70" | bc -l) )); then
+              echo "TEST FAILURE: Pass rate $pass_rate% is below threshold 70%"
+              echo "RAG system performance is below acceptable standards."
+              exit 1
+            else
+              echo "TEST SUCCESS: Pass rate $pass_rate% meets threshold 70%"
+            fi
+           else
+            echo "ERROR: No test results file found"
+            exit 1
+           fi
diff --git a/.github/workflows/deepteam-red-team-tests.yml b/.github/workflows/deepteam-red-team-tests.yml
@@ -0,0 +1,167 @@
+name: DeepTeam Red Team Security Tests
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened]
+    paths:
+      - 'src/**'
+      - 'tests/**'
+      - 'mocks/**'
+      - 'data/**'
+      - '.github/workflows/deepeval-red-team-tests.yml'
+  workflow_dispatch:
+    inputs:
+      attack_intensity:
+        description: 'Attack intensity level'
+        required: false
+        default: 'standard'
+        type: choice
+        options:
+          - light
+          - standard
+          - intensive
+
+jobs:
+  security-assessment:
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version-file: '.python-version'
+
+      - name: Set up uv
+        uses: astral-sh/setup-uv@v6
+
+      - name: Install dependencies (locked)
+        run: uv sync --frozen
+
+      - name: Run Complete Security Assessment
+        id: run_tests
+        continue-on-error: true
+        env:
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        run: |
+          # Run all security tests in one comprehensive session
+          uv run python -m pytest tests/deepeval_tests/red_team_tests.py::TestRAGSystemRedTeaming -v --tb=short
+
+      - name: Generate Security Report
+        if: always()
+        run: |
+          if [ -f tests/deepeval_tests/red_team_report_generator.py ]; then
+            uv run python tests/deepeval_tests/red_team_report_generator.py || true
+          fi
+
+      - name: Comment PR with Security Results
+        if: always() && github.event_name == 'pull_request'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+
+            try {
+              let reportContent = '';
+              if (fs.existsSync('security_report.md')) {
+                reportContent = fs.readFileSync('security_report.md', 'utf8');
+              } else {
+                // Fallback: create basic report from JSON
+                let results = {};
+                if (fs.existsSync('pytest_captured_results.json')) {
+                  const resultsData = fs.readFileSync('pytest_captured_results.json', 'utf8');
+                  results = JSON.parse(resultsData);
+                }
+
+                const totalTests = results.total_tests || 0;
+                const passedTests = results.passed_tests || 0;
+                const failedTests = results.failed_tests || 0;
+                const passRate = totalTests > 0 ? (passedTests / totalTests * 100) : 0;
+                const status = passRate >= 70 ? 'SECURE' : 'VULNERABLE';
+
+                reportContent = `# RAG System Security Assessment Report\n\n` +
+                               `**Status**: ${status}\n` +
+                               `**Pass Rate**: ${passRate.toFixed(1)}% (${passedTests}/${totalTests} tests)\n` +
+                               `**Failed Tests**: ${failedTests}\n\n`;
+
+                if (passRate < 70) {
+                  reportContent += `**Security vulnerabilities detected!** This PR introduces or fails to address security issues.\n\n`;
+                } else {
+                  reportContent += `All security tests passed.\n\n`;
+                }
+              }
+
+              const comments = await github.rest.issues.listComments({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number
+              });
+
+              const existingComment = comments.data.find(
+                comment => comment.user.login === 'github-actions[bot]' &&
+                comment.body.includes('RAG System Security Assessment Report')
+              );
+
+              if (existingComment) {
+                await github.rest.issues.updateComment({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  comment_id: existingComment.id,
+                  body: reportContent
+                });
+                console.log('Updated existing security comment');
+              } else {
+                await github.rest.issues.createComment({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  issue_number: context.issue.number,
+                  body: reportContent
+                });
+                console.log('Created new security comment');
+              }
+
+            } catch (error) {
+              console.error('Failed to post security results:', error);
+
+              await github.rest.issues.createComment({
+                issue_number: context.issue.number,
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                body: `# Security Test Results\n\n**Error generating security report**\n\nFailed to read or post security results. Check workflow logs for details.\n\nError: ${error.message}`
+              });
+            }
+
+      - name: Check test results and fail if needed
+        if: always()
+        run: |
+          if [ -f "pytest_captured_results.json" ]; then
+            total_tests=$(jq '.total_tests // 0' pytest_captured_results.json)
+            passed_tests=$(jq '.passed_tests // 0' pytest_captured_results.json)
+
+            if [ "$total_tests" -eq 0 ]; then
+              echo "ERROR: No tests were executed"
+              exit 1
+            fi
+
+            pass_rate=$(awk "BEGIN {print ($passed_tests / $total_tests) * 100}")
+
+            echo "Complete Security Assessment Results:"
+            echo "Total Tests: $total_tests"
+            echo "Passed Tests: $passed_tests"
+            echo "Pass Rate: $pass_rate%"
+
+            if (( $(echo "$pass_rate < 70" | bc -l) )); then
+              echo "TEST FAILURE: Pass rate $pass_rate% is below threshold 70%"
+              echo "Security vulnerabilities detected in RAG system."
+              exit 1
+            else
+              echo "TEST SUCCESS: Pass rate $pass_rate% meets threshold 70%"
+            fi
+          else
+            echo "ERROR: No test results file found"
+            exit 1
+          fi
diff --git a/pyproject.toml b/pyproject.toml
@@ -25,6 +25,11 @@ dependencies = [
     "uvicorn>=0.35.0",
     "qdrant-client>=1.15.1",
     "rank-bm25>=0.2.2",
+    "rerankers[transformers]>=0.10.0",
+    "deepeval>=3.6.0",
+    "pytest-json-report>=1.5.0",
+    "deepteam>=0.2.5",
+    "anthropic>=0.69.0",
     "nemoguardrails>=0.16.0",
     "rerankers[transformers]>=0.10.0",
     "tiktoken>=0.11.0",