SmartQueryy · tanzilahmed0 · Aug 7, 2025 · Aug 7, 2025 · Aug 7, 2025 · Aug 7, 2025
diff --git a/backend/api/middleware/cors.py b/backend/api/middleware/cors.py
@@ -1,28 +1,126 @@
+import logging
 import os
 
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 
+logger = logging.getLogger(__name__)
+
 
 def setup_cors(app: FastAPI) -> None:
-    """Configure CORS middleware for the FastAPI application"""
+    """Configure secure CORS middleware for the FastAPI application"""
 
-    # Get allowed origins from environment
-    allowed_origins = [
-        "http://localhost:3000",  # Next.js development server
-        "https://localhost:3000",  # HTTPS development
-        os.getenv("FRONTEND_URL", "http://localhost:3000"),  # Production frontend URL
-    ]
+    environment = os.getenv("ENVIRONMENT", "development")
+    is_production = environment == "production"
+
+    # Get allowed origins from environment with security considerations
+    allowed_origins = []
+
+    if not is_production:
+        # Development origins
+        allowed_origins.extend(
+            [
+                "http://localhost:3000",  # Next.js development server
+                "http://127.0.0.1:3000",  # Alternative localhost
+                "https://localhost:3000",  # HTTPS development
+                "https://127.0.0.1:3000",  # HTTPS alternative localhost
+            ]
+        )
+
+    # Add production frontend URL
+    frontend_url = os.getenv("FRONTEND_URL")
+    if frontend_url:
+        allowed_origins.append(frontend_url)
+        # Also add HTTPS version if HTTP is provided
+        if frontend_url.startswith("http://"):
+            allowed_origins.append(frontend_url.replace("http://", "https://"))
 
     # Add additional origins from environment variable if specified
     additional_origins = os.getenv("ADDITIONAL_CORS_ORIGINS", "")
     if additional_origins:
-        allowed_origins.extend(additional_origins.split(","))
+        # Validate and sanitize additional origins
+        origins = [
+            origin.strip() for origin in additional_origins.split(",") if origin.strip()
+        ]
+        for origin in origins:
+            if _is_valid_origin(origin):
+                allowed_origins.append(origin)
+            else:
+                logger.warning(f"Invalid CORS origin ignored: {origin}")
+
+    # Remove duplicates while preserving order
+    allowed_origins = list(dict.fromkeys(allowed_origins))
+
+    # Secure methods - restrict to only what we need
+    allowed_methods = [
+        "GET",
+        "POST",
+        "PUT",
+        "DELETE",
+        "OPTIONS",  # Required for CORS preflight
+    ]
+
+    # Secure headers - be specific about what we allow
+    allowed_headers = [
+        "Accept",
+        "Accept-Language",
+        "Content-Type",
+        "Authorization",
+        "X-Requested-With",
+        "Cache-Control",
+    ]
+
+    # Expose only necessary headers
+    expose_headers = [
+        "X-Total-Count",
+        "X-RateLimit-Limit",
+        "X-RateLimit-Remaining",
+        "X-Process-Time",
+    ]
+
+    logger.info(f"CORS configured for environment: {environment}")
+    logger.info(f"Allowed origins: {allowed_origins}")
 
     app.add_middleware(
         CORSMiddleware,
         allow_origins=allowed_origins,
-        allow_credentials=True,
-        allow_methods=["*"],
-        allow_headers=["*"],
+        allow_credentials=True,  # Required for auth cookies/headers
+        allow_methods=allowed_methods,
+        allow_headers=allowed_headers,
+        expose_headers=expose_headers,
+        max_age=600,  # Cache preflight responses for 10 minutes
+    )
+
+
+def _is_valid_origin(origin: str) -> bool:
+    """Validate that an origin is properly formatted and secure"""
+    import re
+
+    # Basic URL pattern validation
+    url_pattern = re.compile(
+        r"^https?://"  # http:// or https://
+        r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|"  # domain
+        r"localhost|"  # localhost
+        r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})"  # IP
+        r"(?::\d+)?"  # optional port
+        r"(?:/?|[/?]\S+)$",
+        re.IGNORECASE,
     )
+
+    if not url_pattern.match(origin):
+        return False
+
+    # Prevent potentially dangerous origins
+    dangerous_patterns = [
+        r"javascript:",
+        r"data:",
+        r"file:",
+        r"ftp:",
+        r"about:",
+    ]
+
+    for pattern in dangerous_patterns:
+        if re.search(pattern, origin, re.IGNORECASE):
+            return False
+
+    return True
diff --git a/backend/docs/security_implementation.md b/backend/docs/security_implementation.md
@@ -0,0 +1,259 @@
+# SmartQuery Security Implementation - Task B28
+
+This document outlines the comprehensive security measures implemented in SmartQuery API as part of Task B28: Security and Error Handling.
+
+## Security Overview
+
+SmartQuery implements a multi-layered security approach covering:
+- Authentication and authorization
+- Input validation and sanitization
+- Rate limiting and request throttling
+- Comprehensive error handling
+- Security headers and CORS configuration
+- Data protection and secure storage
+
+## Authentication & Authorization
+
+### JWT Token Security
+- **Strong Secret Keys**: Production requires minimum 32-character JWT secrets
+- **Token Expiration**: Access tokens expire in 60 minutes, refresh tokens in 30 days
+- **Token Blacklisting**: Implements token revocation and blacklisting system
+- **Unique Token IDs**: Each token has a unique JWT ID (jti) for tracking
+
+### Google OAuth Integration
+- **Token Verification**: Validates Google OAuth tokens against Google's servers
+- **Email Verification**: Requires verified email addresses from Google
+- **Mock Mode**: Secure development mode with mock tokens
+- **Error Handling**: Comprehensive OAuth error handling
+
+### Authentication Middleware
+- **Bearer Token Validation**: Proper HTTP Bearer token handling
+- **User Context Injection**: Secure user context for protected routes
+- **Role-Based Access**: Support for user roles and permissions
+- **Session Management**: Secure session handling and cleanup
+
+## Input Validation & Sanitization
+
+### Comprehensive Input Validation
+- **String Length Limits**: Enforced limits on all text inputs
+  - Project names: 100 characters
+  - Descriptions: 500 characters
+  - Queries: 2000 characters
+  - Email: 254 characters
+- **File Upload Validation**: Restricts file types to CSV only, max 100MB
+- **UUID Validation**: Strict UUID format validation
+- **Email Validation**: RFC-compliant email validation
+
+### Malicious Content Detection
+- **SQL Injection Prevention**: Filters dangerous SQL keywords and patterns
+- **XSS Prevention**: HTML entity encoding for all user inputs
+- **Script Injection Detection**: Blocks JavaScript and VBScript injection attempts
+- **Path Traversal Prevention**: Blocks directory traversal attempts
+- **Command Injection Prevention**: Filters command injection patterns
+
+### Sanitization Process
+- **HTML Encoding**: All user inputs are HTML-encoded
+- **Control Character Removal**: Strips null bytes and control characters
+- **Pattern Matching**: Uses regex patterns to detect malicious content
+- **Recursive Sanitization**: Sanitizes nested data structures
+
+## Rate Limiting & Throttling
+
+### Multi-Tier Rate Limiting
+- **Endpoint-Specific Limits**:
+  - Authentication: 20 requests/minute
+  - Projects: 50 requests/minute
+  - Chat/AI: 30 requests/minute
+  - Default: 100 requests/minute
+
+### Advanced Rate Limiting Features
+- **User-Based Tracking**: Tracks requests per authenticated user
+- **IP-Based Fallback**: Rate limits for anonymous users
+- **Temporary Blocking**: Blocks users exceeding 3x the limit
+- **Sliding Windows**: Uses time-window based counting
+- **Graceful Headers**: Returns rate limit headers to clients
+
+### Protection Against Abuse
+- **Burst Protection**: Prevents rapid-fire requests
+- **Distributed Denial of Service (DDoS) Mitigation**: Basic protection
+- **Request Pattern Analysis**: Monitors for suspicious patterns
+
+## Error Handling & Security
+
+### Secure Error Messages
+- **Information Leakage Prevention**: Sanitizes error messages in production
+- **Generic Production Errors**: Returns generic messages to prevent reconnaissance
+- **Detailed Development Errors**: Full error details in development mode
+- **Error ID Tracking**: Unique error IDs for support and debugging
+
+### Comprehensive Error Logging
+- **Security Event Logging**: Dedicated security event logger
+- **Attack Detection**: Logs potential attack patterns
+- **Authentication Failures**: Tracks failed login attempts
+- **Input Validation Failures**: Logs validation errors for analysis
+
+### Error Response Standardization
+- **Consistent Format**: All errors use standardized ApiResponse format
+- **Security Headers**: Security headers added to all error responses
+- **Status Code Mapping**: Proper HTTP status codes for different error types
+- **Sanitized Stack Traces**: Stack traces hidden in production
+
+## Security Headers & CORS
+
+### Comprehensive Security Headers
+- **Content Security Policy (CSP)**: Prevents XSS attacks
+- **X-Frame-Options**: Prevents clickjacking (set to DENY)
+- **X-Content-Type-Options**: Prevents MIME sniffing (set to nosniff)
+- **X-XSS-Protection**: Browser XSS protection enabled
+- **Strict-Transport-Security**: Forces HTTPS in production
+- **Referrer-Policy**: Controls referrer information leakage
+- **Permissions-Policy**: Restricts browser features
+
+### Secure CORS Configuration
+- **Environment-Specific Origins**: Different origins for development/production
+- **Origin Validation**: Validates and sanitizes CORS origins
+- **Restricted Methods**: Only allows necessary HTTP methods
+- **Specific Headers**: Restricts allowed request headers
+- **Credential Support**: Secure credential handling for authenticated requests
+
+## Data Protection
+
+### Sensitive Data Handling
+- **Environment Variables**: All secrets stored in environment variables
+- **API Key Security**: OpenAI and other API keys properly secured
+- **Database Credentials**: Secure database connection handling
+- **Password Policies**: No plain text password storage
+- **Data Encryption**: Sensitive data encrypted at rest and in transit
+
+### Secure Configuration
+- **Production Secrets**: Strong, unique secrets in production
+- **Development Defaults**: Secure defaults for development environment
+- **Configuration Validation**: Validates security configuration on startup
+- **Environment Separation**: Clear separation between development and production
+
+## Security Middleware Architecture
+
+### SecurityMiddleware
+- **Request Size Validation**: Prevents oversized requests
+- **Content Validation**: Validates request content types and structures
+- **Pattern Detection**: Real-time malicious pattern detection
+- **Response Headers**: Adds security headers to all responses
+
+### Rate Limiting Integration
+- **Middleware Integration**: Seamlessly integrated with FastAPI
+- **Memory Efficient**: Efficient in-memory tracking with cleanup
+- **Redis Ready**: Prepared for Redis integration in production
+- **Configurable Limits**: Environment-based configuration
+
+### Error Handler Integration
+- **Exception Tracking**: Comprehensive exception handling
+- **Security Event Generation**: Automatic security event logging
+- **Response Sanitization**: Sanitizes all error responses
+- **Attack Detection**: Detects and logs potential attacks
+
+## Security Testing & Validation
+
+### Input Validation Testing
+- **Boundary Testing**: Tests input length limits
+- **Injection Testing**: Tests for SQL injection, XSS, and other attacks
+- **Format Validation**: Tests UUID, email, and other format validators
+- **Malicious Pattern Testing**: Tests detection of malicious patterns
+
+### Authentication Testing
+- **Token Validation**: Tests JWT token validation and expiration
+- **OAuth Integration**: Tests Google OAuth token verification
+- **Authorization Testing**: Tests protected endpoint access
+- **Session Management**: Tests session handling and cleanup
+
+### Rate Limiting Testing
+- **Limit Enforcement**: Tests rate limit enforcement
+- **Burst Protection**: Tests rapid request handling
+- **User Isolation**: Tests per-user rate limiting
+- **Recovery Testing**: Tests limit reset and recovery
+
+## Production Security Checklist
+
+### Environment Configuration
+- [ ] JWT_SECRET set to strong, unique value (minimum 32 characters)
+- [ ] OPENAI_API_KEY properly configured
+- [ ] Database credentials secured
+- [ ] ENVIRONMENT set to "production"
+- [ ] Security headers enabled
+- [ ] Rate limiting enabled
+
+### Network Security
+- [ ] HTTPS enforced with valid SSL certificates
+- [ ] CORS origins restricted to production domains
+- [ ] Firewall rules configured
+- [ ] Database access restricted
+- [ ] API endpoints not publicly indexed
+
+### Monitoring & Alerting
+- [ ] Security event logging enabled
+- [ ] Error tracking configured
+- [ ] Rate limiting alerts set up
+- [ ] Authentication failure monitoring
+- [ ] Unusual activity detection
+
+### Data Protection
+- [ ] Database encrypted at rest
+- [ ] Secure backup procedures
+- [ ] PII handling compliance
+- [ ] Data retention policies
+- [ ] Access logging enabled
+
+## Security Incident Response
+
+### Detection
+- **Automated Monitoring**: Real-time security event detection
+- **Log Analysis**: Regular log analysis for security events
+- **Rate Limit Violations**: Automatic detection of abuse
+- **Authentication Anomalies**: Detection of unusual login patterns
+
+### Response Procedures
+1. **Immediate Response**: Automatically block suspicious IPs
+2. **Investigation**: Analyze security logs and patterns
+3. **Mitigation**: Implement additional protective measures
+4. **Communication**: Notify relevant stakeholders
+5. **Recovery**: Restore normal operations
+6. **Post-Incident**: Review and improve security measures
+
+## Security Maintenance
+
+### Regular Updates
+- **Dependency Updates**: Regular updates of all dependencies
+- **Security Patches**: Prompt application of security patches
+- **Configuration Review**: Regular review of security configuration
+- **Access Review**: Regular review of user access and permissions
+
+### Security Audits
+- **Code Reviews**: Regular security-focused code reviews
+- **Penetration Testing**: Periodic penetration testing
+- **Vulnerability Scanning**: Regular vulnerability assessments
+- **Compliance Checks**: Regular compliance validation
+
+## Security Contact
+
+For security-related issues or vulnerabilities:
+- Review security logs in the application
+- Check error handling and rate limiting effectiveness
+- Validate input sanitization is working correctly
+- Ensure all security headers are present
+
+## Implementation Status
+
+✅ **Completed Tasks (Task B28):**
+- Authentication and authorization security audit
+- Sensitive data handling and environment variable security
+- Comprehensive error handling implementation
+- Input validation and sanitization system
+- Rate limiting and request throttling
+- Security headers and CORS configuration
+- Security documentation and guidelines
+
+**Security Implementation: COMPLETE**
+All security measures have been implemented according to Task B28 requirements.
+
+---
+
+*This document is part of the SmartQuery MVP security implementation and should be regularly updated as new security measures are implemented.*