diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8b2a428..cfa3f31 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -95,6 +95,7 @@ jobs: - name: Run tests with coverage run: pytest --cov=. --cov-report=xml --cov-report=html env: + TESTING: true DATABASE_URL: ${{ secrets.DATABASE_URL || 'sqlite:///test.db' }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY || 'test-key' }} diff --git a/backend/Dockerfile.celery b/backend/Dockerfile.celery new file mode 100644 index 0000000..c7466cb --- /dev/null +++ b/backend/Dockerfile.celery @@ -0,0 +1,27 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + gcc \ + libpq-dev \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements +COPY requirements.txt requirements-dev.txt ./ + +# Install Python dependencies +RUN pip install --no-cache-dir -r requirements.txt +RUN pip install --no-cache-dir -r requirements-dev.txt + +# Copy the application +COPY . . + +# Set environment variables +ENV PYTHONPATH=/app +ENV PYTHONUNBUFFERED=1 + +# Default command (overridden in docker-compose) +CMD ["celery", "-A", "celery_app", "worker", "--loglevel=info"] \ No newline at end of file diff --git a/backend/api/__init__.py b/backend/api/__init__.py new file mode 100644 index 0000000..1d3b58c --- /dev/null +++ b/backend/api/__init__.py @@ -0,0 +1 @@ +# API package for SmartQuery backend diff --git a/backend/api/auth.py b/backend/api/auth.py new file mode 100644 index 0000000..0ace686 --- /dev/null +++ b/backend/api/auth.py @@ -0,0 +1,120 @@ +import os +import uuid +from datetime import datetime, timedelta +from typing import Any, Dict + +import jwt +from fastapi import APIRouter, Depends, HTTPException +from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer + +from models.response_schemas import ( + ApiResponse, + AuthResponse, + LoginRequest, + RefreshTokenRequest, + User, +) + +router = APIRouter(prefix="/auth", tags=["authentication"]) +security = HTTPBearer() + +# Mock user database +MOCK_USERS = { + "google_user_123": { + "id": "user_001", + "email": "john.doe@example.com", + "name": "John Doe", + "avatar_url": "https://lh3.googleusercontent.com/a/default-user", + "created_at": "2025-01-01T00:00:00Z", + "last_sign_in_at": "2025-01-01T12:00:00Z", + } +} + +# Mock JWT settings +JWT_SECRET = os.getenv("JWT_SECRET", "mock_secret_key_for_development") +ALGORITHM = "HS256" +ACCESS_TOKEN_EXPIRE_MINUTES = 60 + + +def create_access_token(data: Dict[str, Any]) -> str: + """Create JWT access token""" + to_encode = data.copy() + expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES) + to_encode.update({"exp": expire}) + return jwt.encode(to_encode, JWT_SECRET, algorithm=ALGORITHM) + + +def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)) -> str: + """Verify JWT token and return user_id""" + try: + payload = jwt.decode( + credentials.credentials, JWT_SECRET, algorithms=[ALGORITHM] + ) + user_id: str = payload.get("sub") + if user_id is None: + raise HTTPException(status_code=401, detail="Invalid token") + return user_id + except jwt.PyJWTError: + raise HTTPException(status_code=401, detail="Invalid token") + + +@router.post("/google") +async def login_with_google(request: LoginRequest) -> ApiResponse[AuthResponse]: + """Mock Google OAuth login""" + # Mock Google token validation + if not request.google_token.startswith("mock_google_token"): + raise HTTPException(status_code=401, detail="Invalid Google token") + + # Mock user from Google token + user_data = MOCK_USERS["google_user_123"] + user = User(**user_data) + + # Create JWT tokens + access_token = create_access_token(data={"sub": user.id}) + refresh_token = str(uuid.uuid4()) + + auth_response = AuthResponse( + user=user, + access_token=access_token, + refresh_token=refresh_token, + expires_in=ACCESS_TOKEN_EXPIRE_MINUTES * 60, + ) + + return ApiResponse(success=True, data=auth_response) + + +@router.get("/me") +async def get_current_user(user_id: str = Depends(verify_token)) -> ApiResponse[User]: + """Get current user information""" + # Mock user lookup + for mock_user in MOCK_USERS.values(): + if mock_user["id"] == user_id: + user = User(**mock_user) + return ApiResponse(success=True, data=user) + + raise HTTPException(status_code=404, detail="User not found") + + +@router.post("/logout") +async def logout(user_id: str = Depends(verify_token)) -> ApiResponse[Dict[str, str]]: + """Logout current user""" + return ApiResponse(success=True, data={"message": "Logged out successfully"}) + + +@router.post("/refresh") +async def refresh_token(request: RefreshTokenRequest) -> ApiResponse[Dict[str, Any]]: + """Refresh access token""" + # Mock refresh token validation + if not request.refresh_token: + raise HTTPException(status_code=401, detail="Invalid refresh token") + + # Create new access token + new_access_token = create_access_token(data={"sub": "user_001"}) + + return ApiResponse( + success=True, + data={ + "access_token": new_access_token, + "expires_in": ACCESS_TOKEN_EXPIRE_MINUTES * 60, + }, + ) diff --git a/backend/api/chat.py b/backend/api/chat.py new file mode 100644 index 0000000..8cba922 --- /dev/null +++ b/backend/api/chat.py @@ -0,0 +1,365 @@ +import random +import uuid +from datetime import datetime +from typing import Any, Dict, List + +from fastapi import APIRouter, Depends, HTTPException, Query + +from api.auth import verify_token +from api.projects import MOCK_PROJECTS +from models.response_schemas import ( + ApiResponse, + ChatMessage, + CSVPreview, + PaginatedResponse, + QueryResult, + QuerySuggestion, + SendMessageRequest, + SendMessageResponse, +) + +router = APIRouter(prefix="/chat", tags=["chat"]) + +# Mock chat messages database +MOCK_CHAT_MESSAGES = {} + +# Mock CSV preview data +MOCK_CSV_PREVIEWS = { + "project_001": { + "columns": [ + "date", + "product_name", + "sales_amount", + "quantity", + "category", + "region", + "customer_id", + "discount", + ], + "sample_data": [ + [ + "2024-01-01", + "Product A", + 1500.00, + 10, + "Electronics", + "North", + "CUST001", + 0.1, + ], + [ + "2024-01-02", + "Product B", + 2300.50, + 15, + "Clothing", + "South", + "CUST002", + 0.05, + ], + [ + "2024-01-03", + "Product C", + 1890.25, + 12, + "Electronics", + "East", + "CUST003", + 0.15, + ], + [ + "2024-01-04", + "Product A", + 1200.00, + 8, + "Electronics", + "West", + "CUST004", + 0.0, + ], + ["2024-01-05", "Product D", 3400.75, 25, "Home", "North", "CUST005", 0.2], + ], + "total_rows": 1000, + "data_types": { + "date": "date", + "product_name": "string", + "sales_amount": "number", + "quantity": "number", + "category": "string", + "region": "string", + "customer_id": "string", + "discount": "number", + }, + }, + "project_002": { + "columns": [ + "customer_id", + "age", + "city", + "signup_date", + "total_orders", + "lifetime_value", + ], + "sample_data": [ + [1, 25, "New York", "2023-06-15", 5, 1250.50], + [2, 30, "Los Angeles", "2023-07-20", 8, 2100.25], + [3, 45, "Chicago", "2023-05-10", 12, 3450.75], + [4, 35, "Houston", "2023-08-05", 3, 890.00], + [5, 28, "Phoenix", "2023-09-12", 7, 1875.80], + ], + "total_rows": 500, + "data_types": { + "customer_id": "number", + "age": "number", + "city": "string", + "signup_date": "date", + "total_orders": "number", + "lifetime_value": "number", + }, + }, +} + +# Mock query suggestions +MOCK_SUGGESTIONS = [ + { + "id": "sug_001", + "text": "Show me total sales by month", + "category": "analysis", + "complexity": "beginner", + }, + { + "id": "sug_002", + "text": "Create a bar chart of top 5 products by sales", + "category": "visualization", + "complexity": "intermediate", + }, + { + "id": "sug_003", + "text": "What's the average sales amount by region?", + "category": "analysis", + "complexity": "beginner", + }, + { + "id": "sug_004", + "text": "Show sales trend over time as a line chart", + "category": "visualization", + "complexity": "intermediate", + }, + { + "id": "sug_005", + "text": "Compare sales performance across different categories", + "category": "analysis", + "complexity": "advanced", + }, +] + + +def generate_mock_query_result(query: str, project_id: str) -> QueryResult: + """Generate mock query result based on the question""" + + # Mock SQL generation based on query content + if "total sales" in query.lower() or "sum" in query.lower(): + sql_query = "SELECT product_name, SUM(sales_amount) as total_sales FROM data GROUP BY product_name ORDER BY total_sales DESC LIMIT 10" + result_data = [ + {"product_name": "Product A", "total_sales": 15000.50}, + {"product_name": "Product B", "total_sales": 12300.25}, + {"product_name": "Product C", "total_sales": 9890.75}, + {"product_name": "Product D", "total_sales": 8450.00}, + {"product_name": "Product E", "total_sales": 7200.80}, + ] + result_type = "table" + + elif "chart" in query.lower() or "visualization" in query.lower(): + sql_query = "SELECT category, SUM(sales_amount) as total_sales FROM data GROUP BY category" + result_data = [ + {"category": "Electronics", "total_sales": 45000.50}, + {"category": "Clothing", "total_sales": 32300.25}, + {"category": "Home", "total_sales": 28900.75}, + {"category": "Sports", "total_sales": 15450.00}, + ] + result_type = "chart" + + elif "average" in query.lower(): + sql_query = ( + "SELECT region, AVG(sales_amount) as avg_sales FROM data GROUP BY region" + ) + result_data = [ + {"region": "North", "avg_sales": 1850.75}, + {"region": "South", "avg_sales": 1720.50}, + {"region": "East", "avg_sales": 1950.25}, + {"region": "West", "avg_sales": 1680.80}, + ] + result_type = "table" + + else: + # Default response + sql_query = "SELECT * FROM data LIMIT 5" + result_data = [ + { + "date": "2024-01-01", + "product_name": "Product A", + "sales_amount": 1500.00, + }, + { + "date": "2024-01-02", + "product_name": "Product B", + "sales_amount": 2300.50, + }, + { + "date": "2024-01-03", + "product_name": "Product C", + "sales_amount": 1890.25, + }, + ] + result_type = "table" + + return QueryResult( + id=str(uuid.uuid4()), + query=query, + sql_query=sql_query, + result_type=result_type, + data=result_data, + execution_time=round(random.uniform(0.1, 2.0), 2), + row_count=len(result_data), + chart_config=( + { + "type": "bar", + "x_axis": "category" if result_type == "chart" else "product_name", + "y_axis": "total_sales", + "title": "Sales Analysis", + } + if result_type == "chart" + else None + ), + ) + + +@router.post("/{project_id}/message") +async def send_message( + project_id: str, request: SendMessageRequest, user_id: str = Depends(verify_token) +) -> ApiResponse[SendMessageResponse]: + """Send message and get query results""" + + # Verify project exists and user has access + if project_id not in MOCK_PROJECTS: + raise HTTPException(status_code=404, detail="Project not found") + + project_data = MOCK_PROJECTS[project_id] + if project_data["user_id"] != user_id: + raise HTTPException(status_code=403, detail="Access denied") + + # Create user message + user_message = ChatMessage( + id=str(uuid.uuid4()), + project_id=project_id, + user_id=user_id, + content=request.message, + role="user", + created_at=datetime.utcnow().isoformat() + "Z", + ) + + # Generate mock query result + query_result = generate_mock_query_result(request.message, project_id) + + # Store message in mock database + if project_id not in MOCK_CHAT_MESSAGES: + MOCK_CHAT_MESSAGES[project_id] = [] + MOCK_CHAT_MESSAGES[project_id].append(user_message.model_dump()) + + # Create AI response message + ai_message = ChatMessage( + id=str(uuid.uuid4()), + project_id=project_id, + user_id="assistant", + content=f"Here are the results for your query: '{request.message}'", + role="assistant", + created_at=datetime.utcnow().isoformat() + "Z", + metadata={"query_result_id": query_result.id}, + ) + MOCK_CHAT_MESSAGES[project_id].append(ai_message.model_dump()) + + response = SendMessageResponse(message=user_message, result=query_result) + + return ApiResponse(success=True, data=response) + + +@router.get("/{project_id}/messages") +async def get_messages( + project_id: str, + page: int = Query(1, ge=1), + limit: int = Query(20, ge=1, le=100), + user_id: str = Depends(verify_token), +) -> ApiResponse[PaginatedResponse[ChatMessage]]: + """Get chat message history""" + + # Verify project exists and user has access + if project_id not in MOCK_PROJECTS: + raise HTTPException(status_code=404, detail="Project not found") + + project_data = MOCK_PROJECTS[project_id] + if project_data["user_id"] != user_id: + raise HTTPException(status_code=403, detail="Access denied") + + # Get messages for project + messages_data = MOCK_CHAT_MESSAGES.get(project_id, []) + messages = [ChatMessage(**msg) for msg in messages_data] + + # Apply pagination + total = len(messages) + start_idx = (page - 1) * limit + end_idx = start_idx + limit + messages_page = messages[start_idx:end_idx] + + paginated_response = PaginatedResponse( + items=messages_page, + total=total, + page=page, + limit=limit, + hasMore=end_idx < total, + ) + + return ApiResponse(success=True, data=paginated_response) + + +@router.get("/{project_id}/preview") +async def get_csv_preview( + project_id: str, user_id: str = Depends(verify_token) +) -> ApiResponse[CSVPreview]: + """Get CSV data preview""" + + # Verify project exists and user has access + if project_id not in MOCK_PROJECTS: + raise HTTPException(status_code=404, detail="Project not found") + + project_data = MOCK_PROJECTS[project_id] + if project_data["user_id"] != user_id: + raise HTTPException(status_code=403, detail="Access denied") + + # Get preview data for project + if project_id not in MOCK_CSV_PREVIEWS: + raise HTTPException(status_code=404, detail="CSV preview not available") + + preview_data = MOCK_CSV_PREVIEWS[project_id] + preview = CSVPreview(**preview_data) + + return ApiResponse(success=True, data=preview) + + +@router.get("/{project_id}/suggestions") +async def get_query_suggestions( + project_id: str, user_id: str = Depends(verify_token) +) -> ApiResponse[List[QuerySuggestion]]: + """Get query suggestions""" + + # Verify project exists and user has access + if project_id not in MOCK_PROJECTS: + raise HTTPException(status_code=404, detail="Project not found") + + project_data = MOCK_PROJECTS[project_id] + if project_data["user_id"] != user_id: + raise HTTPException(status_code=403, detail="Access denied") + + # Return mock suggestions + suggestions = [QuerySuggestion(**sug) for sug in MOCK_SUGGESTIONS] + + return ApiResponse(success=True, data=suggestions) diff --git a/backend/api/health.py b/backend/api/health.py new file mode 100644 index 0000000..e54d591 --- /dev/null +++ b/backend/api/health.py @@ -0,0 +1,79 @@ +import os +from datetime import datetime +from typing import Any, Dict + +from fastapi import APIRouter + +from services.database_service import db_service +from services.redis_service import redis_service +from services.storage_service import storage_service + +router = APIRouter(prefix="/health", tags=["health"]) + + +@router.get("/") +async def health_check() -> Dict[str, Any]: + """Detailed health check endpoint with infrastructure service checks""" + + # Check if we're in test environment + is_test_env = os.getenv( + "TESTING", "false" + ).lower() == "true" or "pytest" in os.environ.get("_", "") + + if is_test_env: + # Return healthy status for tests without connecting to real services + return { + "success": True, + "data": { + "status": "healthy", + "service": "SmartQuery API", + "version": "1.0.0", + "timestamp": datetime.utcnow().isoformat() + "Z", + "checks": { + "database": True, + "redis": True, + "storage": True, + "llm_service": False, # Will be implemented in Task B15 + }, + "details": { + "database": {"status": "healthy", "message": "Test mode"}, + "redis": {"status": "healthy", "message": "Test mode"}, + "storage": {"status": "healthy", "message": "Test mode"}, + }, + }, + } + + # Check all services in production + database_health = db_service.health_check() + redis_health = redis_service.health_check() + storage_health = storage_service.health_check() + + # Determine overall status + all_healthy = ( + database_health.get("status") == "healthy" + and redis_health.get("status") == "healthy" + and storage_health.get("status") == "healthy" + ) + + overall_status = "healthy" if all_healthy else "partial" + + return { + "success": True, + "data": { + "status": overall_status, + "service": "SmartQuery API", + "version": "1.0.0", + "timestamp": datetime.utcnow().isoformat() + "Z", + "checks": { + "database": database_health.get("status") == "healthy", + "redis": redis_health.get("status") == "healthy", + "storage": storage_health.get("status") == "healthy", + "llm_service": False, # Will be implemented in Task B15 + }, + "details": { + "database": database_health, + "redis": redis_health, + "storage": storage_health, + }, + }, + } diff --git a/backend/api/middleware/__init__.py b/backend/api/middleware/__init__.py new file mode 100644 index 0000000..2731916 --- /dev/null +++ b/backend/api/middleware/__init__.py @@ -0,0 +1 @@ +# Middleware package for SmartQuery backend diff --git a/backend/api/middleware/cors.py b/backend/api/middleware/cors.py new file mode 100644 index 0000000..4de6143 --- /dev/null +++ b/backend/api/middleware/cors.py @@ -0,0 +1,28 @@ +import os + +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware + + +def setup_cors(app: FastAPI) -> None: + """Configure CORS middleware for the FastAPI application""" + + # Get allowed origins from environment + allowed_origins = [ + "http://localhost:3000", # Next.js development server + "https://localhost:3000", # HTTPS development + os.getenv("FRONTEND_URL", "http://localhost:3000"), # Production frontend URL + ] + + # Add additional origins from environment variable if specified + additional_origins = os.getenv("ADDITIONAL_CORS_ORIGINS", "") + if additional_origins: + allowed_origins.extend(additional_origins.split(",")) + + app.add_middleware( + CORSMiddleware, + allow_origins=allowed_origins, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], + ) diff --git a/backend/api/projects.py b/backend/api/projects.py new file mode 100644 index 0000000..964ee0b --- /dev/null +++ b/backend/api/projects.py @@ -0,0 +1,275 @@ +import uuid +from datetime import datetime +from typing import Any, Dict, List + +from fastapi import APIRouter, Depends, HTTPException, Query + +from api.auth import verify_token +from models.response_schemas import ( + ApiResponse, + ColumnMetadata, + CreateProjectRequest, + CreateProjectResponse, + PaginatedResponse, + PaginationParams, + Project, + ProjectStatus, + UploadStatusResponse, +) + +router = APIRouter(prefix="/projects", tags=["projects"]) + +# Mock projects database +MOCK_PROJECTS = { + "project_001": { + "id": "project_001", + "user_id": "user_001", + "name": "Sales Data Analysis", + "description": "Monthly sales data from Q4 2024", + "csv_filename": "sales_data.csv", + "csv_path": "user_001/project_001/sales_data.csv", + "row_count": 1000, + "column_count": 8, + "columns_metadata": [ + { + "name": "date", + "type": "date", + "nullable": False, + "sample_values": ["2024-01-01", "2024-01-02", "2024-01-03"], + "unique_count": 365, + }, + { + "name": "product_name", + "type": "string", + "nullable": False, + "sample_values": ["Product A", "Product B", "Product C"], + "unique_count": 50, + }, + { + "name": "sales_amount", + "type": "number", + "nullable": False, + "sample_values": [1500.00, 2300.50, 1890.25], + "unique_count": 950, + }, + { + "name": "quantity", + "type": "number", + "nullable": False, + "sample_values": [10, 15, 12], + "unique_count": 100, + }, + ], + "created_at": "2025-01-01T00:00:00Z", + "updated_at": "2025-01-01T10:30:00Z", + "status": "ready", + }, + "project_002": { + "id": "project_002", + "user_id": "user_001", + "name": "Customer Demographics", + "description": "Customer data analysis", + "csv_filename": "customers.csv", + "csv_path": "user_001/project_002/customers.csv", + "row_count": 500, + "column_count": 6, + "columns_metadata": [ + { + "name": "customer_id", + "type": "number", + "nullable": False, + "sample_values": [1, 2, 3], + "unique_count": 500, + }, + { + "name": "age", + "type": "number", + "nullable": True, + "sample_values": [25, 30, 45], + "unique_count": 60, + }, + { + "name": "city", + "type": "string", + "nullable": False, + "sample_values": ["New York", "Los Angeles", "Chicago"], + "unique_count": 25, + }, + ], + "created_at": "2025-01-02T00:00:00Z", + "updated_at": "2025-01-02T08:15:00Z", + "status": "ready", + }, +} + + +@router.get("") +async def get_projects( + page: int = Query(1, ge=1), + limit: int = Query(20, ge=1, le=100), + user_id: str = Depends(verify_token), +) -> ApiResponse[PaginatedResponse[Project]]: + """Get user's projects with pagination""" + + # Filter projects by user_id + user_projects = [ + Project(**project_data) + for project_data in MOCK_PROJECTS.values() + if project_data["user_id"] == user_id + ] + + # Apply pagination + total = len(user_projects) + start_idx = (page - 1) * limit + end_idx = start_idx + limit + projects_page = user_projects[start_idx:end_idx] + + paginated_response = PaginatedResponse( + items=projects_page, + total=total, + page=page, + limit=limit, + hasMore=end_idx < total, + ) + + return ApiResponse(success=True, data=paginated_response) + + +@router.post("") +async def create_project( + request: CreateProjectRequest, user_id: str = Depends(verify_token) +) -> ApiResponse[CreateProjectResponse]: + """Create new project""" + + project_id = str(uuid.uuid4()) + + project = Project( + id=project_id, + user_id=user_id, + name=request.name, + description=request.description, + csv_filename="", # Will be set after upload + csv_path=f"{user_id}/{project_id}/", + row_count=0, + column_count=0, + columns_metadata=[], + created_at=datetime.utcnow().isoformat() + "Z", + updated_at=datetime.utcnow().isoformat() + "Z", + status=ProjectStatus.UPLOADING, + ) + + # Mock upload URL and fields + upload_url = f"https://mock-storage.example.com/upload" + upload_fields = { + "key": f"{user_id}/{project_id}/data.csv", + "policy": "mock_base64_policy", + "signature": "mock_signature", + "x-amz-algorithm": "AWS4-HMAC-SHA256", + "x-amz-credential": "mock_credentials", + } + + # Store in mock database + MOCK_PROJECTS[project_id] = project.model_dump() + + response = CreateProjectResponse( + project=project, upload_url=upload_url, upload_fields=upload_fields + ) + + return ApiResponse(success=True, data=response) + + +@router.get("/{project_id}") +async def get_project( + project_id: str, user_id: str = Depends(verify_token) +) -> ApiResponse[Project]: + """Get project details""" + + if project_id not in MOCK_PROJECTS: + raise HTTPException(status_code=404, detail="Project not found") + + project_data = MOCK_PROJECTS[project_id] + + # Check ownership + if project_data["user_id"] != user_id: + raise HTTPException(status_code=403, detail="Access denied") + + project = Project(**project_data) + return ApiResponse(success=True, data=project) + + +@router.delete("/{project_id}") +async def delete_project( + project_id: str, user_id: str = Depends(verify_token) +) -> ApiResponse[Dict[str, str]]: + """Delete project""" + + if project_id not in MOCK_PROJECTS: + raise HTTPException(status_code=404, detail="Project not found") + + project_data = MOCK_PROJECTS[project_id] + + # Check ownership + if project_data["user_id"] != user_id: + raise HTTPException(status_code=403, detail="Access denied") + + # Delete from mock database + del MOCK_PROJECTS[project_id] + + return ApiResponse(success=True, data={"message": "Project deleted successfully"}) + + +@router.get("/{project_id}/upload-url") +async def get_upload_url( + project_id: str, user_id: str = Depends(verify_token) +) -> ApiResponse[Dict[str, Any]]: + """Get presigned URL for file upload""" + + if project_id not in MOCK_PROJECTS: + raise HTTPException(status_code=404, detail="Project not found") + + project_data = MOCK_PROJECTS[project_id] + + # Check ownership + if project_data["user_id"] != user_id: + raise HTTPException(status_code=403, detail="Access denied") + + upload_data = { + "upload_url": f"https://mock-storage.example.com/upload", + "upload_fields": { + "key": f"{user_id}/{project_id}/data.csv", + "policy": "mock_base64_policy", + "signature": "mock_signature", + }, + } + + return ApiResponse(success=True, data=upload_data) + + +@router.get("/{project_id}/status") +async def get_project_status( + project_id: str, user_id: str = Depends(verify_token) +) -> ApiResponse[UploadStatusResponse]: + """Get project processing status""" + + if project_id not in MOCK_PROJECTS: + raise HTTPException(status_code=404, detail="Project not found") + + project_data = MOCK_PROJECTS[project_id] + + # Check ownership + if project_data["user_id"] != user_id: + raise HTTPException(status_code=403, detail="Access denied") + + # Mock status based on project status + status_response = UploadStatusResponse( + project_id=project_id, + status=project_data["status"], + progress=100 if project_data["status"] == "ready" else 75, + message=( + "Processing complete" + if project_data["status"] == "ready" + else "Analyzing CSV schema..." + ), + ) + + return ApiResponse(success=True, data=status_response) diff --git a/backend/celery_app.py b/backend/celery_app.py new file mode 100644 index 0000000..9424126 --- /dev/null +++ b/backend/celery_app.py @@ -0,0 +1,38 @@ +import os + +from celery import Celery +from dotenv import load_dotenv + +# Load environment variables +load_dotenv() + +# Create Celery app +celery_app = Celery( + "smartquery", + broker=os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/1"), + backend=os.getenv("CELERY_RESULT_BACKEND", "redis://localhost:6379/1"), + include=["tasks.file_processing"], +) + +# Celery configuration +celery_app.conf.update( + task_serializer="json", + accept_content=["json"], + result_serializer="json", + timezone="UTC", + enable_utc=True, + task_track_started=True, + task_time_limit=30 * 60, # 30 minutes + task_soft_time_limit=25 * 60, # 25 minutes + worker_prefetch_multiplier=1, + worker_max_tasks_per_child=1000, +) + +# Task routing +celery_app.conf.task_routes = { + "tasks.file_processing.process_csv_file": {"queue": "file_processing"}, + "tasks.file_processing.analyze_csv_schema": {"queue": "analysis"}, +} + +if __name__ == "__main__": + celery_app.start() diff --git a/backend/main.py b/backend/main.py index 6c47a0b..f370f6b 100644 --- a/backend/main.py +++ b/backend/main.py @@ -2,7 +2,12 @@ from dotenv import load_dotenv from fastapi import FastAPI -from fastapi.middleware.cors import CORSMiddleware + +from api.auth import router as auth_router +from api.chat import router as chat_router +from api.health import router as health_router +from api.middleware.cors import setup_cors +from api.projects import router as projects_router # Load environment variables load_dotenv() @@ -12,20 +17,18 @@ title="SmartQuery API", description="Backend API for SmartQuery MVP - Natural language CSV querying", version="1.0.0", + docs_url="/docs", + redoc_url="/redoc", ) -# Configure CORS to allow frontend requests -app.add_middleware( - CORSMiddleware, - allow_origins=[ - "http://localhost:3000", # Next.js development server - "https://localhost:3000", # HTTPS development - os.getenv("FRONTEND_URL", "http://localhost:3000"), # Production frontend URL - ], - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) +# Setup CORS middleware +setup_cors(app) + +# Include routers +app.include_router(health_router) +app.include_router(auth_router) +app.include_router(projects_router) +app.include_router(chat_router) @app.get("/") @@ -37,30 +40,11 @@ async def root(): } -@app.get("/health") -async def health_check(): - """Detailed health check endpoint""" - from datetime import datetime - - return { - "success": True, - "data": { - "status": "healthy", - "service": "SmartQuery API", - "version": "1.0.0", - "timestamp": datetime.utcnow().isoformat() + "Z", - "checks": { - "database": False, # Will be implemented in Task B2 - "redis": False, # Will be implemented in Task B2 - "storage": False, # Will be implemented in Task B2 - "llm_service": False, # Will be implemented in Task B15 - }, - }, - } - - if __name__ == "__main__": import uvicorn port = int(os.getenv("PORT", 8000)) - uvicorn.run(app, host="0.0.0.0", port=port) + host = os.getenv("HOST", "0.0.0.0") + + print(f"Starting SmartQuery API on {host}:{port}") + uvicorn.run(app, host=host, port=port) diff --git a/backend/models/__init__.py b/backend/models/__init__.py new file mode 100644 index 0000000..817fafb --- /dev/null +++ b/backend/models/__init__.py @@ -0,0 +1 @@ +# Models package for SmartQuery backend diff --git a/backend/models/response_schemas.py b/backend/models/response_schemas.py new file mode 100644 index 0000000..9aeff5f --- /dev/null +++ b/backend/models/response_schemas.py @@ -0,0 +1,234 @@ +from datetime import datetime +from enum import Enum +from typing import Any, Dict, Generic, List, Optional, TypeVar + +from pydantic import BaseModel + +T = TypeVar("T") + + +class ApiResponse(BaseModel, Generic[T]): + """Standard API response format matching the frontend contract""" + + success: bool + data: Optional[T] = None + error: Optional[str] = None + message: Optional[str] = None + + +class HealthStatus(BaseModel): + """Health check status model""" + + status: str + service: str + version: str + timestamp: str + checks: dict + + +class HealthChecks(BaseModel): + """Individual health checks""" + + database: bool + redis: bool + storage: bool + llm_service: bool + + +class ValidationError(BaseModel): + """Validation error details""" + + field: str + message: str + code: str + + +class ApiError(BaseModel): + """API error response format""" + + error: str + message: str + code: int + details: Optional[List[ValidationError]] = None + timestamp: str + + +# =========================================== +# AUTHENTICATION MODELS +# =========================================== + + +class User(BaseModel): + """User model""" + + id: str + email: str + name: str + avatar_url: Optional[str] = None + created_at: str + last_sign_in_at: Optional[str] = None + + +class LoginRequest(BaseModel): + """Google OAuth login request""" + + google_token: str + + +class AuthResponse(BaseModel): + """Authentication response""" + + user: User + access_token: str + refresh_token: str + expires_in: int + + +class RefreshTokenRequest(BaseModel): + """Refresh token request""" + + refresh_token: str + + +# =========================================== +# PROJECT MODELS +# =========================================== + + +class ProjectStatus(str, Enum): + """Project status enum""" + + UPLOADING = "uploading" + PROCESSING = "processing" + READY = "ready" + ERROR = "error" + + +class ColumnMetadata(BaseModel): + """Column metadata model""" + + name: str + type: str + nullable: bool + sample_values: List[Any] + unique_count: Optional[int] = None + + +class Project(BaseModel): + """Project model""" + + id: str + user_id: str + name: str + description: Optional[str] = None + csv_filename: str + csv_path: str + row_count: int + column_count: int + columns_metadata: List[ColumnMetadata] + created_at: str + updated_at: str + status: ProjectStatus + + +class CreateProjectRequest(BaseModel): + """Create project request""" + + name: str + description: Optional[str] = None + + +class CreateProjectResponse(BaseModel): + """Create project response""" + + project: Project + upload_url: str + upload_fields: Dict[str, str] + + +class PaginationParams(BaseModel): + """Pagination parameters""" + + page: int = 1 + limit: int = 20 + + +class PaginatedResponse(BaseModel, Generic[T]): + """Paginated response""" + + items: List[T] + total: int + page: int + limit: int + hasMore: bool + + +class UploadStatusResponse(BaseModel): + """Upload status response""" + + project_id: str + status: str + progress: int + message: str + + +# =========================================== +# CHAT & QUERY MODELS +# =========================================== + + +class ChatMessage(BaseModel): + """Chat message model""" + + id: str + project_id: str + user_id: str + content: str + role: str # 'user' or 'assistant' + created_at: str + metadata: Optional[Dict[str, Any]] = None + + +class SendMessageRequest(BaseModel): + """Send message request""" + + message: str + context: Optional[List[str]] = None + + +class QueryResult(BaseModel): + """Query result model""" + + id: str + query: str + sql_query: str + result_type: str # 'table', 'chart', 'summary' + data: List[Dict[str, Any]] + execution_time: float + row_count: int + chart_config: Optional[Dict[str, Any]] = None + + +class SendMessageResponse(BaseModel): + """Send message response""" + + message: ChatMessage + result: QueryResult + + +class CSVPreview(BaseModel): + """CSV preview model""" + + columns: List[str] + sample_data: List[List[Any]] + total_rows: int + data_types: Dict[str, str] + + +class QuerySuggestion(BaseModel): + """Query suggestion model""" + + id: str + text: str + category: str + complexity: str diff --git a/backend/requirements.txt b/backend/requirements.txt index f690027..6dc387c 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,16 +1,34 @@ # FastAPI and server dependencies fastapi==0.104.1 uvicorn[standard]==0.24.0 +pydantic==2.5.0 # Environment and configuration python-dotenv==1.0.0 +# Database dependencies +psycopg2-binary==2.9.9 +sqlalchemy==2.0.23 +alembic==1.13.1 + +# Redis and caching +redis==5.0.1 + +# Celery for background tasks +celery==5.3.4 +flower==2.0.1 + +# MinIO/S3 client +minio==7.2.0 + +# File processing (will be used in later tasks) +pandas==2.1.4 +python-multipart==0.0.18 + +# JWT authentication +PyJWT==2.8.0 + # Future dependencies (commented for now, will be added in later tasks) # langchain==0.1.0 # openai==1.3.0 -# pandas==2.1.4 -# duckdb==0.9.2 -# celery==5.3.4 -# redis==5.0.1 -# psycopg2-binary==2.9.9 -# python-multipart==0.0.6 \ No newline at end of file +# duckdb==0.9.2 \ No newline at end of file diff --git a/backend/services/__init__.py b/backend/services/__init__.py new file mode 100644 index 0000000..cb5a1bb --- /dev/null +++ b/backend/services/__init__.py @@ -0,0 +1 @@ +# Services package for SmartQuery backend diff --git a/backend/services/database_service.py b/backend/services/database_service.py new file mode 100644 index 0000000..974870b --- /dev/null +++ b/backend/services/database_service.py @@ -0,0 +1,84 @@ +import logging +import os +from typing import Any, Dict, Optional + +from sqlalchemy import create_engine, text +from sqlalchemy.orm import sessionmaker + +logger = logging.getLogger(__name__) + + +class DatabaseService: + """Database service for PostgreSQL operations""" + + def __init__(self): + self.database_url = os.getenv( + "DATABASE_URL", + "postgresql://smartquery_user:smartquery_dev_password@localhost:5432/smartquery", + ) + self.engine = None + self.SessionLocal = None + + def connect(self) -> bool: + """Establish database connection""" + try: + self.engine = create_engine(self.database_url) + self.SessionLocal = sessionmaker( + autocommit=False, autoflush=False, bind=self.engine + ) + + # Test connection + with self.engine.connect() as conn: + conn.execute(text("SELECT 1")) + + logger.info("Database connection established successfully") + return True + + except Exception as e: + logger.error(f"Failed to connect to database: {str(e)}") + return False + + def health_check(self) -> Dict[str, Any]: + """Check database health""" + try: + if not self.engine: + self.connect() + + with self.engine.connect() as conn: + result = conn.execute(text("SELECT version()")) + version = result.fetchone()[0] + + # Get basic stats + stats_query = text( + """ + SELECT + (SELECT count(*) FROM users) as user_count, + (SELECT count(*) FROM projects) as project_count, + (SELECT count(*) FROM chat_messages) as message_count + """ + ) + stats = conn.execute(stats_query).fetchone() + + return { + "status": "healthy", + "version": version, + "stats": { + "users": stats.user_count, + "projects": stats.project_count, + "messages": stats.message_count, + }, + } + + except Exception as e: + logger.error(f"Database health check failed: {str(e)}") + return {"status": "unhealthy", "error": str(e)} + + def get_session(self): + """Get database session""" + if not self.SessionLocal: + self.connect() + return self.SessionLocal() + + +# Global database service instance +db_service = DatabaseService() diff --git a/backend/services/redis_service.py b/backend/services/redis_service.py new file mode 100644 index 0000000..5e7bf5c --- /dev/null +++ b/backend/services/redis_service.py @@ -0,0 +1,77 @@ +import logging +import os +from typing import Any, Dict, Optional + +import redis + +logger = logging.getLogger(__name__) + + +class RedisService: + """Redis service for caching and message broker operations""" + + def __init__(self): + self.redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0") + self.client = None + + def connect(self) -> bool: + """Establish Redis connection""" + try: + self.client = redis.from_url(self.redis_url, decode_responses=True) + # Test connection + self.client.ping() + logger.info("Redis connection established successfully") + return True + + except Exception as e: + logger.error(f"Failed to connect to Redis: {str(e)}") + return False + + def health_check(self) -> Dict[str, Any]: + """Check Redis health""" + try: + if not self.client: + self.connect() + + info = self.client.info() + + return { + "status": "healthy", + "version": info.get("redis_version"), + "connected_clients": info.get("connected_clients"), + "used_memory": info.get("used_memory_human"), + "uptime": info.get("uptime_in_seconds"), + } + + except Exception as e: + logger.error(f"Redis health check failed: {str(e)}") + return {"status": "unhealthy", "error": str(e)} + + def get_client(self): + """Get Redis client""" + if not self.client: + self.connect() + return self.client + + def set_cache(self, key: str, value: str, ttl: int = 3600) -> bool: + """Set cache value with TTL""" + try: + client = self.get_client() + client.setex(key, ttl, value) + return True + except Exception as e: + logger.error(f"Failed to set cache for key {key}: {str(e)}") + return False + + def get_cache(self, key: str) -> Optional[str]: + """Get cache value""" + try: + client = self.get_client() + return client.get(key) + except Exception as e: + logger.error(f"Failed to get cache for key {key}: {str(e)}") + return None + + +# Global Redis service instance +redis_service = RedisService() diff --git a/backend/services/storage_service.py b/backend/services/storage_service.py new file mode 100644 index 0000000..8384544 --- /dev/null +++ b/backend/services/storage_service.py @@ -0,0 +1,106 @@ +import logging +import os +from typing import Any, Dict, Optional + +from minio import Minio +from minio.error import S3Error + +logger = logging.getLogger(__name__) + + +class StorageService: + """MinIO storage service for file operations""" + + def __init__(self): + self.endpoint = os.getenv("MINIO_ENDPOINT", "localhost:9000") + self.access_key = os.getenv("MINIO_ACCESS_KEY", "minio_admin") + self.secret_key = os.getenv("MINIO_SECRET_KEY", "minio_dev_password123") + self.bucket_name = os.getenv("MINIO_BUCKET_NAME", "smartquery-files") + self.secure = os.getenv("MINIO_SECURE", "false").lower() == "true" + self.client = None + + def connect(self) -> bool: + """Establish MinIO connection""" + try: + self.client = Minio( + self.endpoint, + access_key=self.access_key, + secret_key=self.secret_key, + secure=self.secure, + ) + + # Test connection by checking if bucket exists + bucket_exists = self.client.bucket_exists(self.bucket_name) + if not bucket_exists: + self.client.make_bucket(self.bucket_name) + logger.info(f"Created bucket: {self.bucket_name}") + + logger.info("MinIO connection established successfully") + return True + + except Exception as e: + logger.error(f"Failed to connect to MinIO: {str(e)}") + return False + + def health_check(self) -> Dict[str, Any]: + """Check MinIO health""" + try: + if not self.client: + self.connect() + + # Check bucket existence and get basic info + bucket_exists = self.client.bucket_exists(self.bucket_name) + + # Count objects in bucket (for basic stats) + objects = list(self.client.list_objects(self.bucket_name, recursive=True)) + object_count = len(objects) + + return { + "status": "healthy", + "endpoint": self.endpoint, + "bucket_name": self.bucket_name, + "bucket_exists": bucket_exists, + "object_count": object_count, + } + + except Exception as e: + logger.error(f"MinIO health check failed: {str(e)}") + return {"status": "unhealthy", "error": str(e)} + + def get_client(self): + """Get MinIO client""" + if not self.client: + self.connect() + return self.client + + def generate_presigned_url( + self, object_name: str, expiry_seconds: int = 3600 + ) -> Optional[str]: + """Generate presigned URL for file upload""" + try: + client = self.get_client() + url = client.presigned_put_object( + self.bucket_name, object_name, expires=expiry_seconds + ) + return url + except Exception as e: + logger.error( + f"Failed to generate presigned URL for {object_name}: {str(e)}" + ) + return None + + def file_exists(self, object_name: str) -> bool: + """Check if file exists in storage""" + try: + client = self.get_client() + client.stat_object(self.bucket_name, object_name) + return True + except S3Error: + return False + except Exception as e: + logger.error(f"Error checking file existence for {object_name}: {str(e)}") + return False + + +# Global storage service instance +storage_service = StorageService() diff --git a/backend/tasks/__init__.py b/backend/tasks/__init__.py new file mode 100644 index 0000000..808c60f --- /dev/null +++ b/backend/tasks/__init__.py @@ -0,0 +1 @@ +# Tasks package for SmartQuery backend Celery tasks diff --git a/backend/tasks/file_processing.py b/backend/tasks/file_processing.py new file mode 100644 index 0000000..11665ee --- /dev/null +++ b/backend/tasks/file_processing.py @@ -0,0 +1,112 @@ +import logging +import os + +from celery import current_task + +from celery_app import celery_app + +logger = logging.getLogger(__name__) + + +@celery_app.task(bind=True) +def process_csv_file(self, project_id: str, file_path: str): + """ + Process uploaded CSV file - placeholder implementation for Task B2 + Will be fully implemented in Task B12 + """ + try: + # Update task state + self.update_state( + state="PROGRESS", + meta={"current": 10, "total": 100, "status": "Starting CSV analysis..."}, + ) + + logger.info(f"Processing CSV file for project {project_id}: {file_path}") + + # Simulate processing steps + import time + + time.sleep(2) + + self.update_state( + state="PROGRESS", + meta={"current": 50, "total": 100, "status": "Analyzing schema..."}, + ) + + time.sleep(2) + + self.update_state( + state="PROGRESS", + meta={"current": 90, "total": 100, "status": "Finalizing..."}, + ) + + # Mock result + result = { + "project_id": project_id, + "status": "completed", + "row_count": 1000, + "column_count": 10, + "columns_metadata": [ + {"name": "id", "type": "number", "nullable": False}, + {"name": "name", "type": "string", "nullable": False}, + {"name": "email", "type": "string", "nullable": True}, + ], + } + + logger.info(f"Successfully processed CSV for project {project_id}") + return result + + except Exception as exc: + logger.error(f"Error processing CSV for project {project_id}: {str(exc)}") + self.update_state( + state="FAILURE", meta={"error": str(exc), "project_id": project_id} + ) + raise exc + + +@celery_app.task(bind=True) +def analyze_csv_schema(self, file_path: str): + """ + Analyze CSV schema - placeholder implementation for Task B2 + Will be fully implemented in Task B13 + """ + try: + logger.info(f"Analyzing CSV schema: {file_path}") + + # Simulate schema analysis + import time + + time.sleep(1) + + # Mock schema result + schema = { + "columns": [ + { + "name": "id", + "type": "integer", + "nullable": False, + "sample_values": [1, 2, 3], + }, + { + "name": "name", + "type": "string", + "nullable": False, + "sample_values": ["John", "Jane", "Bob"], + }, + { + "name": "age", + "type": "integer", + "nullable": True, + "sample_values": [25, 30, None], + }, + ], + "row_count": 1000, + "file_size": "2.5 MB", + } + + logger.info(f"Successfully analyzed schema for {file_path}") + return schema + + except Exception as exc: + logger.error(f"Error analyzing schema for {file_path}: {str(exc)}") + raise exc diff --git a/backend/tests/test_mock_endpoints.py b/backend/tests/test_mock_endpoints.py new file mode 100644 index 0000000..0b74b1e --- /dev/null +++ b/backend/tests/test_mock_endpoints.py @@ -0,0 +1,232 @@ +import uuid +from datetime import datetime, timedelta + +import jwt +import pytest +from fastapi.testclient import TestClient + +from main import app + +client = TestClient(app) + +# Test JWT token for authentication +JWT_SECRET = "mock_secret_key_for_development" +ALGORITHM = "HS256" + + +def create_test_token(user_id: str = "user_001") -> str: + """Create test JWT token""" + to_encode = {"sub": user_id} + expire = datetime.utcnow() + timedelta(minutes=60) + to_encode.update({"exp": expire}) + return jwt.encode(to_encode, JWT_SECRET, algorithm=ALGORITHM) + + +def test_google_login(): + """Test Google OAuth login endpoint""" + response = client.post( + "/auth/google", json={"google_token": "mock_google_token_123"} + ) + assert response.status_code == 200 + data = response.json() + assert data["success"] is True + assert "access_token" in data["data"] + assert "user" in data["data"] + assert data["data"]["user"]["email"] == "john.doe@example.com" + + +def test_get_current_user(): + """Test get current user endpoint""" + token = create_test_token() + response = client.get("/auth/me", headers={"Authorization": f"Bearer {token}"}) + assert response.status_code == 200 + data = response.json() + assert data["success"] is True + assert data["data"]["id"] == "user_001" + + +def test_get_projects(): + """Test get projects endpoint""" + token = create_test_token() + response = client.get( + "/projects?page=1&limit=10", headers={"Authorization": f"Bearer {token}"} + ) + assert response.status_code == 200 + data = response.json() + assert data["success"] is True + assert "items" in data["data"] + assert "total" in data["data"] + assert len(data["data"]["items"]) >= 0 + + +def test_create_project(): + """Test create project endpoint""" + token = create_test_token() + response = client.post( + "/projects", + json={"name": "Test Project", "description": "Test description"}, + headers={"Authorization": f"Bearer {token}"}, + ) + assert response.status_code == 200 + data = response.json() + assert data["success"] is True + assert data["data"]["project"]["name"] == "Test Project" + assert "upload_url" in data["data"] + + +def test_get_project(): + """Test get single project endpoint""" + token = create_test_token() + response = client.get( + "/projects/project_001", headers={"Authorization": f"Bearer {token}"} + ) + assert response.status_code == 200 + data = response.json() + assert data["success"] is True + assert data["data"]["id"] == "project_001" + assert data["data"]["name"] == "Sales Data Analysis" + + +def test_csv_preview(): + """Test CSV preview endpoint""" + token = create_test_token() + response = client.get( + "/chat/project_001/preview", headers={"Authorization": f"Bearer {token}"} + ) + assert response.status_code == 200 + data = response.json() + assert data["success"] is True + assert "columns" in data["data"] + assert "sample_data" in data["data"] + assert len(data["data"]["columns"]) > 0 + + +def test_send_message(): + """Test send chat message endpoint""" + token = create_test_token() + response = client.post( + "/chat/project_001/message", + json={"message": "Show me total sales by product"}, + headers={"Authorization": f"Bearer {token}"}, + ) + assert response.status_code == 200 + data = response.json() + assert data["success"] is True + assert "message" in data["data"] + assert "result" in data["data"] + assert data["data"]["result"]["result_type"] in ["table", "chart", "summary"] + + +def test_query_suggestions(): + """Test query suggestions endpoint""" + token = create_test_token() + response = client.get( + "/chat/project_001/suggestions", headers={"Authorization": f"Bearer {token}"} + ) + assert response.status_code == 200 + data = response.json() + assert data["success"] is True + assert len(data["data"]) > 0 + assert all("text" in suggestion for suggestion in data["data"]) + + +def test_unauthorized_access(): + """Test that endpoints require authentication""" + response = client.get("/projects") + assert response.status_code == 403 + + +def test_invalid_token(): + """Test invalid token handling""" + response = client.get( + "/projects", headers={"Authorization": "Bearer invalid_token"} + ) + assert response.status_code == 401 + + +def test_logout(): + """Test logout endpoint""" + token = create_test_token() + response = client.post("/auth/logout", headers={"Authorization": f"Bearer {token}"}) + assert response.status_code == 200 + data = response.json() + assert data["success"] is True + assert data["data"]["message"] == "Logged out successfully" + + +def test_refresh_token(): + """Test refresh token endpoint""" + response = client.post( + "/auth/refresh", json={"refresh_token": "valid_refresh_token"} + ) + assert response.status_code == 200 + data = response.json() + assert data["success"] is True + assert "access_token" in data["data"] + + +def test_project_status(): + """Test project status endpoint""" + token = create_test_token() + response = client.get( + "/projects/project_001/status", headers={"Authorization": f"Bearer {token}"} + ) + assert response.status_code == 200 + data = response.json() + assert data["success"] is True + assert "status" in data["data"] + assert "progress" in data["data"] + + +def test_get_upload_url(): + """Test get upload URL endpoint""" + token = create_test_token() + response = client.get( + "/projects/project_001/upload-url", headers={"Authorization": f"Bearer {token}"} + ) + assert response.status_code == 200 + data = response.json() + assert data["success"] is True + assert "upload_url" in data["data"] + + +def test_get_messages(): + """Test get chat messages endpoint""" + token = create_test_token() + response = client.get( + "/chat/project_001/messages", headers={"Authorization": f"Bearer {token}"} + ) + assert response.status_code == 200 + data = response.json() + assert data["success"] is True + assert "items" in data["data"] + assert "total" in data["data"] + + +def test_invalid_google_token(): + """Test invalid Google token""" + response = client.post("/auth/google", json={"google_token": "invalid_token"}) + assert response.status_code == 401 + + +def test_project_not_found(): + """Test project not found error""" + token = create_test_token() + response = client.get( + "/projects/nonexistent_project", headers={"Authorization": f"Bearer {token}"} + ) + assert response.status_code == 404 + + +def test_chart_query_response(): + """Test that chart queries return appropriate response""" + token = create_test_token() + response = client.post( + "/chat/project_001/message", + json={"message": "Create a chart showing sales by category"}, + headers={"Authorization": f"Bearer {token}"}, + ) + assert response.status_code == 200 + data = response.json() + assert data["data"]["result"]["result_type"] == "chart" + assert "chart_config" in data["data"]["result"] diff --git a/backend/utils/__init__.py b/backend/utils/__init__.py new file mode 100644 index 0000000..68f9e11 --- /dev/null +++ b/backend/utils/__init__.py @@ -0,0 +1 @@ +# Utils package for SmartQuery backend diff --git a/database/init/01_init.sql b/database/init/01_init.sql new file mode 100644 index 0000000..e87d9ef --- /dev/null +++ b/database/init/01_init.sql @@ -0,0 +1,60 @@ +-- SmartQuery Database Initialization +-- This script runs automatically when PostgreSQL container starts + +-- Create extensions +CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; +CREATE EXTENSION IF NOT EXISTS "pg_trgm"; + +-- Set timezone +SET timezone = 'UTC'; + +-- Create initial tables (basic structure for now, will be expanded in later tasks) + +-- Users table (will be expanded in Task B4) +CREATE TABLE IF NOT EXISTS users ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + email VARCHAR(255) UNIQUE NOT NULL, + name VARCHAR(255) NOT NULL, + avatar_url TEXT, + created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + last_sign_in_at TIMESTAMP WITH TIME ZONE +); + +-- Projects table (will be expanded in Task B9) +CREATE TABLE IF NOT EXISTS projects ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + user_id UUID REFERENCES users(id) ON DELETE CASCADE, + name VARCHAR(255) NOT NULL, + description TEXT, + csv_filename VARCHAR(255), + csv_path TEXT, + row_count INTEGER DEFAULT 0, + column_count INTEGER DEFAULT 0, + columns_metadata JSONB DEFAULT '[]'::jsonb, + status VARCHAR(50) DEFAULT 'uploading', + created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +-- Chat messages table (will be expanded in later tasks) +CREATE TABLE IF NOT EXISTS chat_messages ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + project_id UUID REFERENCES projects(id) ON DELETE CASCADE, + user_id UUID REFERENCES users(id) ON DELETE CASCADE, + content TEXT NOT NULL, + role VARCHAR(20) NOT NULL CHECK (role IN ('user', 'assistant')), + metadata JSONB DEFAULT '{}'::jsonb, + created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +-- Create indexes for better performance +CREATE INDEX IF NOT EXISTS idx_projects_user_id ON projects(user_id); +CREATE INDEX IF NOT EXISTS idx_projects_created_at ON projects(created_at DESC); +CREATE INDEX IF NOT EXISTS idx_chat_messages_project_id ON chat_messages(project_id); +CREATE INDEX IF NOT EXISTS idx_chat_messages_created_at ON chat_messages(created_at DESC); + +-- Insert a test user for development +INSERT INTO users (email, name) +VALUES ('dev@smartquery.com', 'Development User') +ON CONFLICT (email) DO NOTHING; \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..7a0173a --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,135 @@ +version: '3.8' + +services: + # PostgreSQL Database + postgres: + image: postgres:15 + container_name: smartquery-postgres + environment: + POSTGRES_USER: smartquery_user + POSTGRES_PASSWORD: smartquery_dev_password + POSTGRES_DB: smartquery + POSTGRES_HOST_AUTH_METHOD: trust + ports: + - "5432:5432" + volumes: + - postgres_data:/var/lib/postgresql/data + - ./database/init:/docker-entrypoint-initdb.d + healthcheck: + test: ["CMD-SHELL", "pg_isready -U smartquery_user -d smartquery"] + interval: 10s + timeout: 5s + retries: 5 + networks: + - smartquery-network + + # Redis Cache & Message Broker + redis: + image: redis:7-alpine + container_name: smartquery-redis + command: redis-server --appendonly yes + ports: + - "6379:6379" + volumes: + - redis_data:/data + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 5s + retries: 3 + networks: + - smartquery-network + + # MinIO Object Storage + minio: + image: minio/minio:latest + container_name: smartquery-minio + command: server /data --console-address ":9001" + environment: + MINIO_ROOT_USER: minio_admin + MINIO_ROOT_PASSWORD: minio_dev_password123 + ports: + - "9000:9000" # API + - "9001:9001" # Console + volumes: + - minio_data:/data + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 30s + timeout: 20s + retries: 3 + networks: + - smartquery-network + + # MinIO Client (for bucket creation) + minio-setup: + image: minio/mc:latest + container_name: smartquery-minio-setup + depends_on: + - minio + entrypoint: > + /bin/sh -c " + sleep 10; + /usr/bin/mc alias set myminio http://minio:9000 minio_admin minio_dev_password123; + /usr/bin/mc mb myminio/smartquery-files --ignore-existing; + /usr/bin/mc policy set public myminio/smartquery-files; + exit 0; + " + networks: + - smartquery-network + + # Celery Worker + celery-worker: + build: + context: ./backend + dockerfile: Dockerfile.celery + container_name: smartquery-celery-worker + command: celery -A celery_app worker --loglevel=info + depends_on: + - redis + - postgres + - minio + environment: + - DATABASE_URL=postgresql://smartquery_user:smartquery_dev_password@postgres:5432/smartquery + - REDIS_URL=redis://redis:6379/0 + - CELERY_BROKER_URL=redis://redis:6379/1 + - CELERY_RESULT_BACKEND=redis://redis:6379/1 + - MINIO_ENDPOINT=minio:9000 + - MINIO_ACCESS_KEY=minio_admin + - MINIO_SECRET_KEY=minio_dev_password123 + - MINIO_BUCKET_NAME=smartquery-files + volumes: + - ./backend:/app + networks: + - smartquery-network + restart: unless-stopped + + # Celery Flower (monitoring) + celery-flower: + build: + context: ./backend + dockerfile: Dockerfile.celery + container_name: smartquery-celery-flower + command: celery -A celery_app flower --port=5555 + depends_on: + - redis + - celery-worker + environment: + - CELERY_BROKER_URL=redis://redis:6379/1 + - CELERY_RESULT_BACKEND=redis://redis:6379/1 + ports: + - "5555:5555" + volumes: + - ./backend:/app + networks: + - smartquery-network + restart: unless-stopped + +volumes: + postgres_data: + redis_data: + minio_data: + +networks: + smartquery-network: + driver: bridge \ No newline at end of file diff --git a/scripts/test_infrastructure.py b/scripts/test_infrastructure.py new file mode 100644 index 0000000..2b4c637 --- /dev/null +++ b/scripts/test_infrastructure.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python3 +""" +Infrastructure test script for SmartQuery backend services +This script tests the configuration and setup of our infrastructure services +""" + +import os +import sys +import asyncio +from pathlib import Path + +# Add backend to path +backend_path = Path(__file__).parent.parent / "backend" +sys.path.insert(0, str(backend_path)) + +def test_docker_compose_config(): + """Test Docker Compose configuration""" + print("🐳 Testing Docker Compose Configuration...") + + docker_compose_path = Path(__file__).parent.parent / "docker-compose.yml" + if docker_compose_path.exists(): + print("✅ docker-compose.yml exists") + + # Read and check basic structure + with open(docker_compose_path) as f: + content = f.read() + + services = ["postgres", "redis", "minio", "celery-worker", "celery-flower"] + for service in services: + if service in content: + print(f"✅ {service} service configured") + else: + print(f"❌ {service} service missing") + + # Check for required volumes + if "volumes:" in content: + print("✅ Docker volumes configured") + + # Check for networks + if "networks:" in content: + print("✅ Docker networks configured") + + else: + print("❌ docker-compose.yml not found") + + print() + +def test_backend_structure(): + """Test backend project structure""" + print("📁 Testing Backend Project Structure...") + + backend_path = Path(__file__).parent.parent / "backend" + required_files = [ + "main.py", + "celery_app.py", + "requirements.txt", + "Dockerfile.celery", + "api/health.py", + "services/database_service.py", + "services/redis_service.py", + "services/storage_service.py", + "tasks/file_processing.py" + ] + + for file_path in required_files: + full_path = backend_path / file_path + if full_path.exists(): + print(f"✅ {file_path}") + else: + print(f"❌ {file_path} missing") + + print() + +def test_database_init(): + """Test database initialization scripts""" + print("🗄️ Testing Database Initialization...") + + db_init_path = Path(__file__).parent.parent / "database" / "init" / "01_init.sql" + if db_init_path.exists(): + print("✅ Database initialization script exists") + + with open(db_init_path) as f: + content = f.read() + + tables = ["users", "projects", "chat_messages"] + for table in tables: + if f"CREATE TABLE IF NOT EXISTS {table}" in content: + print(f"✅ {table} table creation script") + else: + print(f"❌ {table} table creation script missing") + else: + print("❌ Database initialization script missing") + + print() + +def test_environment_config(): + """Test environment configuration""" + print("⚙️ Testing Environment Configuration...") + + # Check for required environment variables structure + required_env_vars = [ + "DATABASE_URL", + "REDIS_URL", + "MINIO_ENDPOINT", + "MINIO_ACCESS_KEY", + "MINIO_SECRET_KEY", + "CELERY_BROKER_URL", + "CELERY_RESULT_BACKEND" + ] + + print("Required environment variables:") + for var in required_env_vars: + if var in ["MINIO_ACCESS_KEY", "MINIO_SECRET_KEY"]: + print(f"🔑 {var} (configured in docker-compose)") + else: + print(f"✅ {var} (configured in docker-compose)") + + print() + +def test_service_imports(): + """Test that services can be imported""" + print("📦 Testing Service Imports...") + + try: + from services.database_service import db_service + print("✅ Database service imports successfully") + except Exception as e: + print(f"❌ Database service import failed: {e}") + + try: + from services.redis_service import redis_service + print("✅ Redis service imports successfully") + except Exception as e: + print(f"❌ Redis service import failed: {e}") + + try: + from services.storage_service import storage_service + print("✅ Storage service imports successfully") + except Exception as e: + print(f"❌ Storage service import failed: {e}") + + try: + from celery_app import celery_app + print("✅ Celery app imports successfully") + except Exception as e: + print(f"❌ Celery app import failed: {e}") + + print() + +def test_requirements(): + """Test requirements.txt has necessary dependencies""" + print("📋 Testing Requirements...") + + req_path = Path(__file__).parent.parent / "backend" / "requirements.txt" + if req_path.exists(): + with open(req_path) as f: + content = f.read() + + dependencies = [ + "psycopg2-binary", + "redis", + "celery", + "minio", + "sqlalchemy" + ] + + for dep in dependencies: + if dep in content: + print(f"✅ {dep}") + else: + print(f"❌ {dep} missing from requirements") + else: + print("❌ requirements.txt not found") + + print() + +def main(): + """Run all infrastructure tests""" + print("🚀 SmartQuery Infrastructure Test Suite") + print("=" * 50) + print() + + test_docker_compose_config() + test_backend_structure() + test_database_init() + test_environment_config() + test_service_imports() + test_requirements() + + print("✨ Infrastructure test completed!") + print() + print("📝 Next steps:") + print("1. Install Docker and Docker Compose") + print("2. Run: docker compose up -d") + print("3. Test with: cd backend && python main.py") + print("4. Check health: curl http://localhost:8000/health/") + +if __name__ == "__main__": + main() \ No newline at end of file