Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 109 additions & 11 deletions backend/api/middleware/cors.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,126 @@
import logging
import os

from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware

logger = logging.getLogger(__name__)


def setup_cors(app: FastAPI) -> None:
"""Configure CORS middleware for the FastAPI application"""
"""Configure secure CORS middleware for the FastAPI application"""

# Get allowed origins from environment
allowed_origins = [
"http://localhost:3000", # Next.js development server
"https://localhost:3000", # HTTPS development
os.getenv("FRONTEND_URL", "http://localhost:3000"), # Production frontend URL
]
environment = os.getenv("ENVIRONMENT", "development")
is_production = environment == "production"

# Get allowed origins from environment with security considerations
allowed_origins = []

if not is_production:
# Development origins
allowed_origins.extend(
[
"http://localhost:3000", # Next.js development server
"http://127.0.0.1:3000", # Alternative localhost
"https://localhost:3000", # HTTPS development
"https://127.0.0.1:3000", # HTTPS alternative localhost
]
)

# Add production frontend URL
frontend_url = os.getenv("FRONTEND_URL")
if frontend_url:
allowed_origins.append(frontend_url)
# Also add HTTPS version if HTTP is provided
if frontend_url.startswith("http://"):
allowed_origins.append(frontend_url.replace("http://", "https://"))

# Add additional origins from environment variable if specified
additional_origins = os.getenv("ADDITIONAL_CORS_ORIGINS", "")
if additional_origins:
allowed_origins.extend(additional_origins.split(","))
# Validate and sanitize additional origins
origins = [
origin.strip() for origin in additional_origins.split(",") if origin.strip()
]
for origin in origins:
if _is_valid_origin(origin):
allowed_origins.append(origin)
else:
logger.warning(f"Invalid CORS origin ignored: {origin}")

# Remove duplicates while preserving order
allowed_origins = list(dict.fromkeys(allowed_origins))

# Secure methods - restrict to only what we need
allowed_methods = [
"GET",
"POST",
"PUT",
"DELETE",
"OPTIONS", # Required for CORS preflight
]

# Secure headers - be specific about what we allow
allowed_headers = [
"Accept",
"Accept-Language",
"Content-Type",
"Authorization",
"X-Requested-With",
"Cache-Control",
]

# Expose only necessary headers
expose_headers = [
"X-Total-Count",
"X-RateLimit-Limit",
"X-RateLimit-Remaining",
"X-Process-Time",
]

logger.info(f"CORS configured for environment: {environment}")
logger.info(f"Allowed origins: {allowed_origins}")

app.add_middleware(
CORSMiddleware,
allow_origins=allowed_origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
allow_credentials=True, # Required for auth cookies/headers
allow_methods=allowed_methods,
allow_headers=allowed_headers,
expose_headers=expose_headers,
max_age=600, # Cache preflight responses for 10 minutes
)


def _is_valid_origin(origin: str) -> bool:
"""Validate that an origin is properly formatted and secure"""
import re

# Basic URL pattern validation
url_pattern = re.compile(
r"^https?://" # http:// or https://
r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|" # domain
r"localhost|" # localhost
r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})" # IP
r"(?::\d+)?" # optional port
r"(?:/?|[/?]\S+)$",
re.IGNORECASE,
)

if not url_pattern.match(origin):
return False

# Prevent potentially dangerous origins
dangerous_patterns = [
r"javascript:",
r"data:",
r"file:",
r"ftp:",
r"about:",
]

for pattern in dangerous_patterns:
if re.search(pattern, origin, re.IGNORECASE):
return False

return True
259 changes: 259 additions & 0 deletions backend/docs/security_implementation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
# SmartQuery Security Implementation - Task B28

This document outlines the comprehensive security measures implemented in SmartQuery API as part of Task B28: Security and Error Handling.

## Security Overview

SmartQuery implements a multi-layered security approach covering:
- Authentication and authorization
- Input validation and sanitization
- Rate limiting and request throttling
- Comprehensive error handling
- Security headers and CORS configuration
- Data protection and secure storage

## Authentication & Authorization

### JWT Token Security
- **Strong Secret Keys**: Production requires minimum 32-character JWT secrets
- **Token Expiration**: Access tokens expire in 60 minutes, refresh tokens in 30 days
- **Token Blacklisting**: Implements token revocation and blacklisting system
- **Unique Token IDs**: Each token has a unique JWT ID (jti) for tracking

### Google OAuth Integration
- **Token Verification**: Validates Google OAuth tokens against Google's servers
- **Email Verification**: Requires verified email addresses from Google
- **Mock Mode**: Secure development mode with mock tokens
- **Error Handling**: Comprehensive OAuth error handling

### Authentication Middleware
- **Bearer Token Validation**: Proper HTTP Bearer token handling
- **User Context Injection**: Secure user context for protected routes
- **Role-Based Access**: Support for user roles and permissions
- **Session Management**: Secure session handling and cleanup

## Input Validation & Sanitization

### Comprehensive Input Validation
- **String Length Limits**: Enforced limits on all text inputs
- Project names: 100 characters
- Descriptions: 500 characters
- Queries: 2000 characters
- Email: 254 characters
- **File Upload Validation**: Restricts file types to CSV only, max 100MB
- **UUID Validation**: Strict UUID format validation
- **Email Validation**: RFC-compliant email validation

### Malicious Content Detection
- **SQL Injection Prevention**: Filters dangerous SQL keywords and patterns
- **XSS Prevention**: HTML entity encoding for all user inputs
- **Script Injection Detection**: Blocks JavaScript and VBScript injection attempts
- **Path Traversal Prevention**: Blocks directory traversal attempts
- **Command Injection Prevention**: Filters command injection patterns

### Sanitization Process
- **HTML Encoding**: All user inputs are HTML-encoded
- **Control Character Removal**: Strips null bytes and control characters
- **Pattern Matching**: Uses regex patterns to detect malicious content
- **Recursive Sanitization**: Sanitizes nested data structures

## Rate Limiting & Throttling

### Multi-Tier Rate Limiting
- **Endpoint-Specific Limits**:
- Authentication: 20 requests/minute
- Projects: 50 requests/minute
- Chat/AI: 30 requests/minute
- Default: 100 requests/minute

### Advanced Rate Limiting Features
- **User-Based Tracking**: Tracks requests per authenticated user
- **IP-Based Fallback**: Rate limits for anonymous users
- **Temporary Blocking**: Blocks users exceeding 3x the limit
- **Sliding Windows**: Uses time-window based counting
- **Graceful Headers**: Returns rate limit headers to clients

### Protection Against Abuse
- **Burst Protection**: Prevents rapid-fire requests
- **Distributed Denial of Service (DDoS) Mitigation**: Basic protection
- **Request Pattern Analysis**: Monitors for suspicious patterns

## Error Handling & Security

### Secure Error Messages
- **Information Leakage Prevention**: Sanitizes error messages in production
- **Generic Production Errors**: Returns generic messages to prevent reconnaissance
- **Detailed Development Errors**: Full error details in development mode
- **Error ID Tracking**: Unique error IDs for support and debugging

### Comprehensive Error Logging
- **Security Event Logging**: Dedicated security event logger
- **Attack Detection**: Logs potential attack patterns
- **Authentication Failures**: Tracks failed login attempts
- **Input Validation Failures**: Logs validation errors for analysis

### Error Response Standardization
- **Consistent Format**: All errors use standardized ApiResponse format
- **Security Headers**: Security headers added to all error responses
- **Status Code Mapping**: Proper HTTP status codes for different error types
- **Sanitized Stack Traces**: Stack traces hidden in production

## Security Headers & CORS

### Comprehensive Security Headers
- **Content Security Policy (CSP)**: Prevents XSS attacks
- **X-Frame-Options**: Prevents clickjacking (set to DENY)
- **X-Content-Type-Options**: Prevents MIME sniffing (set to nosniff)
- **X-XSS-Protection**: Browser XSS protection enabled
- **Strict-Transport-Security**: Forces HTTPS in production
- **Referrer-Policy**: Controls referrer information leakage
- **Permissions-Policy**: Restricts browser features

### Secure CORS Configuration
- **Environment-Specific Origins**: Different origins for development/production
- **Origin Validation**: Validates and sanitizes CORS origins
- **Restricted Methods**: Only allows necessary HTTP methods
- **Specific Headers**: Restricts allowed request headers
- **Credential Support**: Secure credential handling for authenticated requests

## Data Protection

### Sensitive Data Handling
- **Environment Variables**: All secrets stored in environment variables
- **API Key Security**: OpenAI and other API keys properly secured
- **Database Credentials**: Secure database connection handling
- **Password Policies**: No plain text password storage
- **Data Encryption**: Sensitive data encrypted at rest and in transit

### Secure Configuration
- **Production Secrets**: Strong, unique secrets in production
- **Development Defaults**: Secure defaults for development environment
- **Configuration Validation**: Validates security configuration on startup
- **Environment Separation**: Clear separation between development and production

## Security Middleware Architecture

### SecurityMiddleware
- **Request Size Validation**: Prevents oversized requests
- **Content Validation**: Validates request content types and structures
- **Pattern Detection**: Real-time malicious pattern detection
- **Response Headers**: Adds security headers to all responses

### Rate Limiting Integration
- **Middleware Integration**: Seamlessly integrated with FastAPI
- **Memory Efficient**: Efficient in-memory tracking with cleanup
- **Redis Ready**: Prepared for Redis integration in production
- **Configurable Limits**: Environment-based configuration

### Error Handler Integration
- **Exception Tracking**: Comprehensive exception handling
- **Security Event Generation**: Automatic security event logging
- **Response Sanitization**: Sanitizes all error responses
- **Attack Detection**: Detects and logs potential attacks

## Security Testing & Validation

### Input Validation Testing
- **Boundary Testing**: Tests input length limits
- **Injection Testing**: Tests for SQL injection, XSS, and other attacks
- **Format Validation**: Tests UUID, email, and other format validators
- **Malicious Pattern Testing**: Tests detection of malicious patterns

### Authentication Testing
- **Token Validation**: Tests JWT token validation and expiration
- **OAuth Integration**: Tests Google OAuth token verification
- **Authorization Testing**: Tests protected endpoint access
- **Session Management**: Tests session handling and cleanup

### Rate Limiting Testing
- **Limit Enforcement**: Tests rate limit enforcement
- **Burst Protection**: Tests rapid request handling
- **User Isolation**: Tests per-user rate limiting
- **Recovery Testing**: Tests limit reset and recovery

## Production Security Checklist

### Environment Configuration
- [ ] JWT_SECRET set to strong, unique value (minimum 32 characters)
- [ ] OPENAI_API_KEY properly configured
- [ ] Database credentials secured
- [ ] ENVIRONMENT set to "production"
- [ ] Security headers enabled
- [ ] Rate limiting enabled

### Network Security
- [ ] HTTPS enforced with valid SSL certificates
- [ ] CORS origins restricted to production domains
- [ ] Firewall rules configured
- [ ] Database access restricted
- [ ] API endpoints not publicly indexed

### Monitoring & Alerting
- [ ] Security event logging enabled
- [ ] Error tracking configured
- [ ] Rate limiting alerts set up
- [ ] Authentication failure monitoring
- [ ] Unusual activity detection

### Data Protection
- [ ] Database encrypted at rest
- [ ] Secure backup procedures
- [ ] PII handling compliance
- [ ] Data retention policies
- [ ] Access logging enabled

## Security Incident Response

### Detection
- **Automated Monitoring**: Real-time security event detection
- **Log Analysis**: Regular log analysis for security events
- **Rate Limit Violations**: Automatic detection of abuse
- **Authentication Anomalies**: Detection of unusual login patterns

### Response Procedures
1. **Immediate Response**: Automatically block suspicious IPs
2. **Investigation**: Analyze security logs and patterns
3. **Mitigation**: Implement additional protective measures
4. **Communication**: Notify relevant stakeholders
5. **Recovery**: Restore normal operations
6. **Post-Incident**: Review and improve security measures

## Security Maintenance

### Regular Updates
- **Dependency Updates**: Regular updates of all dependencies
- **Security Patches**: Prompt application of security patches
- **Configuration Review**: Regular review of security configuration
- **Access Review**: Regular review of user access and permissions

### Security Audits
- **Code Reviews**: Regular security-focused code reviews
- **Penetration Testing**: Periodic penetration testing
- **Vulnerability Scanning**: Regular vulnerability assessments
- **Compliance Checks**: Regular compliance validation

## Security Contact

For security-related issues or vulnerabilities:
- Review security logs in the application
- Check error handling and rate limiting effectiveness
- Validate input sanitization is working correctly
- Ensure all security headers are present

## Implementation Status

✅ **Completed Tasks (Task B28):**
- Authentication and authorization security audit
- Sensitive data handling and environment variable security
- Comprehensive error handling implementation
- Input validation and sanitization system
- Rate limiting and request throttling
- Security headers and CORS configuration
- Security documentation and guidelines

**Security Implementation: COMPLETE**
All security measures have been implemented according to Task B28 requirements.

---

*This document is part of the SmartQuery MVP security implementation and should be regularly updated as new security measures are implemented.*
Loading
Loading