Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 123 additions & 0 deletions .github/workflows/deepeval-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
name: DeepEval RAG System Tests

on:
pull_request:
types: [opened, synchronize, reopened]
paths:
- 'src/**'
- 'tests/**'
- '.github/workflows/deepeval-tests.yml'

jobs:
deepeval-tests:
runs-on: ubuntu-latest
timeout-minutes: 40

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version-file: '.python-version'

- name: Set up uv
uses: astral-sh/setup-uv@v6

- name: Install dependencies (locked)
run: uv sync --frozen

- name: Run DeepEval tests
id: run_tests
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: uv run python -m pytest tests/deepeval_tests/standard_tests.py -v --tb=short

- name: Generate evaluation report
if: always()
run: python tests/deepeval_tests/report_generator.py

- name: Comment PR with test results
if: always() && github.event_name == 'pull_request'
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');

try {
const reportContent = fs.readFileSync('test_report.md', 'utf8');

const comments = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number
});

const existingComment = comments.data.find(
comment => comment.user.login === 'github-actions[bot]' &&
comment.body.includes('RAG System Evaluation Report')
);

if (existingComment) {
await github.rest.issues.updateComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: existingComment.id,
body: reportContent
});
} else {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: reportContent
});
}

} catch (error) {
console.error('Failed to post test results:', error);

await github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: `## RAG System Evaluation Report\n\n**Error generating test report**\n\nFailed to read or post test results. Check workflow logs for details.\n\nError: ${error.message}`
});
}

- name: Check test results and fail if needed
if: always()
run: |
# Check if pytest ran (look at step output)
if [ "${{ steps.run_tests.outcome }}" == "failure" ]; then
echo "Tests ran but failed - this is expected if RAG performance is below threshold"
fi
if [ -f "pytest_captured_results.json" ]; then
total_tests=$(jq '.total_tests // 0' pytest_captured_results.json)
passed_tests=$(jq '.passed_tests // 0' pytest_captured_results.json)

if [ "$total_tests" -eq 0 ]; then
echo "ERROR: No tests were executed"
exit 1
fi

pass_rate=$(awk "BEGIN {print ($passed_tests / $total_tests) * 100}")

echo "DeepEval Test Results:"
echo "Total Tests: $total_tests"
echo "Passed Tests: $passed_tests"
echo "Pass Rate: $pass_rate%"

if (( $(echo "$pass_rate < 70" | bc -l) )); then
echo "TEST FAILURE: Pass rate $pass_rate% is below threshold 70%"
echo "RAG system performance is below acceptable standards."
exit 1
else
echo "TEST SUCCESS: Pass rate $pass_rate% meets threshold 70%"
fi
else
echo "ERROR: No test results file found"
exit 1
fi
167 changes: 167 additions & 0 deletions .github/workflows/deepteam-red-team-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
name: DeepTeam Red Team Security Tests

on:
pull_request:
types: [opened, synchronize, reopened]
paths:
- 'src/**'
- 'tests/**'
- 'mocks/**'
- 'data/**'
- '.github/workflows/deepeval-red-team-tests.yml'
workflow_dispatch:
inputs:
attack_intensity:
description: 'Attack intensity level'
required: false
default: 'standard'
type: choice
options:
- light
- standard
- intensive

jobs:
security-assessment:
runs-on: ubuntu-latest
timeout-minutes: 60

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version-file: '.python-version'

- name: Set up uv
uses: astral-sh/setup-uv@v6

- name: Install dependencies (locked)
run: uv sync --frozen

- name: Run Complete Security Assessment
id: run_tests
continue-on-error: true
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
# Run all security tests in one comprehensive session
uv run python -m pytest tests/deepeval_tests/red_team_tests.py::TestRAGSystemRedTeaming -v --tb=short

- name: Generate Security Report
if: always()
run: |
if [ -f tests/deepeval_tests/red_team_report_generator.py ]; then
uv run python tests/deepeval_tests/red_team_report_generator.py || true
fi

- name: Comment PR with Security Results
if: always() && github.event_name == 'pull_request'
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');

try {
let reportContent = '';
if (fs.existsSync('security_report.md')) {
reportContent = fs.readFileSync('security_report.md', 'utf8');
} else {
// Fallback: create basic report from JSON
let results = {};
if (fs.existsSync('pytest_captured_results.json')) {
const resultsData = fs.readFileSync('pytest_captured_results.json', 'utf8');
results = JSON.parse(resultsData);
}

const totalTests = results.total_tests || 0;
const passedTests = results.passed_tests || 0;
const failedTests = results.failed_tests || 0;
const passRate = totalTests > 0 ? (passedTests / totalTests * 100) : 0;
const status = passRate >= 70 ? 'SECURE' : 'VULNERABLE';

reportContent = `# RAG System Security Assessment Report\n\n` +
`**Status**: ${status}\n` +
`**Pass Rate**: ${passRate.toFixed(1)}% (${passedTests}/${totalTests} tests)\n` +
`**Failed Tests**: ${failedTests}\n\n`;

if (passRate < 70) {
reportContent += `**Security vulnerabilities detected!** This PR introduces or fails to address security issues.\n\n`;
} else {
reportContent += `All security tests passed.\n\n`;
}
}

const comments = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number
});

const existingComment = comments.data.find(
comment => comment.user.login === 'github-actions[bot]' &&
comment.body.includes('RAG System Security Assessment Report')
);

if (existingComment) {
await github.rest.issues.updateComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: existingComment.id,
body: reportContent
});
console.log('Updated existing security comment');
} else {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: reportContent
});
console.log('Created new security comment');
}

} catch (error) {
console.error('Failed to post security results:', error);

await github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: `# Security Test Results\n\n**Error generating security report**\n\nFailed to read or post security results. Check workflow logs for details.\n\nError: ${error.message}`
});
}

- name: Check test results and fail if needed
if: always()
run: |
if [ -f "pytest_captured_results.json" ]; then
total_tests=$(jq '.total_tests // 0' pytest_captured_results.json)
passed_tests=$(jq '.passed_tests // 0' pytest_captured_results.json)

if [ "$total_tests" -eq 0 ]; then
echo "ERROR: No tests were executed"
exit 1
fi

pass_rate=$(awk "BEGIN {print ($passed_tests / $total_tests) * 100}")

echo "Complete Security Assessment Results:"
echo "Total Tests: $total_tests"
echo "Passed Tests: $passed_tests"
echo "Pass Rate: $pass_rate%"

if (( $(echo "$pass_rate < 70" | bc -l) )); then
echo "TEST FAILURE: Pass rate $pass_rate% is below threshold 70%"
echo "Security vulnerabilities detected in RAG system."
exit 1
else
echo "TEST SUCCESS: Pass rate $pass_rate% meets threshold 70%"
fi
else
echo "ERROR: No test results file found"
exit 1
fi
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ dependencies = [
"uvicorn>=0.35.0",
"qdrant-client>=1.15.1",
"rank-bm25>=0.2.2",
"rerankers[transformers]>=0.10.0",
"deepeval>=3.6.0",
"pytest-json-report>=1.5.0",
"deepteam>=0.2.5",
"anthropic>=0.69.0",
"nemoguardrails>=0.16.0",
"rerankers[transformers]>=0.10.0",
"tiktoken>=0.11.0",
Expand Down
Loading
Loading