From 410c715c13c916e24be14f30e610e6e45cff7d69 Mon Sep 17 00:00:00 2001 From: tanzilahmed0 Date: Wed, 9 Jul 2025 00:50:09 -0700 Subject: [PATCH 1/2] Implemented Task B9 - Project Model and Database --- backend/models/__init__.py | 77 +++++ backend/models/project.py | 264 ++++++++++++++++++ backend/models/user.py | 11 +- .../migrations/002_create_projects_table.sql | 74 +++++ 4 files changed, 422 insertions(+), 4 deletions(-) create mode 100644 backend/models/project.py create mode 100644 database/migrations/002_create_projects_table.sql diff --git a/backend/models/__init__.py b/backend/models/__init__.py index 817fafb..4f853a1 100644 --- a/backend/models/__init__.py +++ b/backend/models/__init__.py @@ -1 +1,78 @@ # Models package for SmartQuery backend + +# Import all models to ensure they are registered with SQLAlchemy +from models.base import Base +from models.project import ( + ColumnMetadata, + ProjectBase, + ProjectCreate, + ProjectInDB, + ProjectPublic, + ProjectStatusEnum, + ProjectTable, + ProjectUpdate, +) +from models.response_schemas import ( + ApiResponse, + AuthResponse, + ChatMessage, + CreateProjectRequest, + CreateProjectResponse, + CSVPreview, + HealthChecks, + HealthStatus, + PaginatedResponse, + PaginationParams, + Project, + QueryResult, + QuerySuggestion, + UploadStatusResponse, + User, + ValidationError, +) +from models.user import ( + GoogleOAuthData, + UserBase, + UserCreate, + UserInDB, + UserTable, + UserUpdate, +) + +__all__ = [ + # Base + "Base", + # User models + "UserTable", + "UserBase", + "UserCreate", + "UserUpdate", + "UserInDB", + "GoogleOAuthData", + # Project models + "ProjectTable", + "ProjectStatusEnum", + "ColumnMetadata", + "ProjectBase", + "ProjectCreate", + "ProjectUpdate", + "ProjectInDB", + "ProjectPublic", + # Response schemas + "ApiResponse", + "HealthStatus", + "HealthChecks", + "ValidationError", + "User", + "AuthResponse", + "Project", + "CreateProjectRequest", + "CreateProjectResponse", + "PaginationParams", + "PaginatedResponse", + "UploadStatusResponse", + "ChatMessage", + "QueryResult", + "CSVPreview", + "QuerySuggestion", +] diff --git a/backend/models/project.py b/backend/models/project.py new file mode 100644 index 0000000..3891b95 --- /dev/null +++ b/backend/models/project.py @@ -0,0 +1,264 @@ +import uuid +from datetime import datetime +from enum import Enum +from typing import TYPE_CHECKING, Any, Dict, List, Optional + +from pydantic import BaseModel, Field, field_validator +from sqlalchemy import ( + Boolean, + Column, + DateTime, +) +from sqlalchemy import Enum as SQLEnum +from sqlalchemy import ( + ForeignKey, + Integer, + String, + Text, + TypeDecorator, + func, +) +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.dialects.postgresql import UUID as PG_UUID +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from models.base import Base + +if TYPE_CHECKING: + from models.user import UserTable + + +class UUID(TypeDecorator): + """ + Platform-independent UUID type. + + Uses PostgreSQL's UUID type, otherwise uses + CHAR(32), storing as string. + """ + + impl = PG_UUID + cache_ok = True + + def load_dialect_impl(self, dialect): + if dialect.name == "postgresql": + return dialect.type_descriptor(PG_UUID()) + else: + return dialect.type_descriptor(String(32)) + + def process_bind_param(self, value, dialect): + if value is None: + return value + elif dialect.name == "postgresql": + return str(value) + else: + if not isinstance(value, uuid.UUID): + return "%.32x" % uuid.UUID(value).int + else: + # hexstring + return "%.32x" % value.int + + def process_result_value(self, value, dialect): + if value is None: + return value + else: + if not isinstance(value, uuid.UUID): + value = uuid.UUID(value) + return value + + +class ProjectStatusEnum(str, Enum): + """Project status enumeration""" + + UPLOADING = "uploading" + PROCESSING = "processing" + READY = "ready" + ERROR = "error" + + +class ProjectTable(Base): + """SQLAlchemy Project table model for PostgreSQL""" + + __tablename__ = "projects" + + id: Mapped[uuid.UUID] = mapped_column(UUID, primary_key=True, default=uuid.uuid4) + user_id: Mapped[uuid.UUID] = mapped_column( + UUID, ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True + ) + name: Mapped[str] = mapped_column(String(255), nullable=False) + description = Column(Text, nullable=True) + csv_filename: Mapped[str] = mapped_column(String(255), nullable=False) + csv_path: Mapped[str] = mapped_column(Text, nullable=False) + row_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0) + column_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0) + columns_metadata = Column(JSONB, nullable=True) + status: Mapped[ProjectStatusEnum] = mapped_column( + SQLEnum(ProjectStatusEnum), nullable=False, default=ProjectStatusEnum.UPLOADING + ) + + # Timestamps + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), server_default=func.now() + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), server_default=func.now(), onupdate=func.now() + ) + + # Relationships + user: Mapped["UserTable"] = relationship(back_populates="projects") + # chat_messages: Mapped[List["ChatMessageTable"]] = relationship( + # back_populates="project", cascade="all, delete-orphan" + # ) + + def __repr__(self): + return f"" + + +# Pydantic models for API validation and serialization + + +class ColumnMetadata(BaseModel): + """Column metadata model""" + + name: str + type: str + nullable: bool = True + sample_values: List[Any] = Field(default_factory=list) + unique_count: Optional[int] = None + min_value: Optional[float] = None + max_value: Optional[float] = None + + class Config: + from_attributes = True + + +class ProjectBase(BaseModel): + """Base project model with common fields""" + + name: str + description: Optional[str] = None + csv_filename: str + csv_path: str + row_count: int = 0 + column_count: int = 0 + columns_metadata: List[ColumnMetadata] = Field(default_factory=list) + status: ProjectStatusEnum = ProjectStatusEnum.UPLOADING + + class Config: + from_attributes = True + + +class ProjectCreate(BaseModel): + """Project creation model""" + + name: str + description: Optional[str] = None + + @field_validator("name") + @classmethod + def validate_name(cls, v): + if not v or not v.strip(): + raise ValueError("Project name cannot be empty") + if len(v.strip()) > 255: + raise ValueError("Project name cannot exceed 255 characters") + return v.strip() + + @field_validator("description") + @classmethod + def validate_description(cls, v): + if v is not None and len(v.strip()) > 1000: + raise ValueError("Description cannot exceed 1000 characters") + return v.strip() if v else None + + class Config: + from_attributes = True + + +class ProjectUpdate(BaseModel): + """Project update model""" + + name: Optional[str] = None + description: Optional[str] = None + csv_filename: Optional[str] = None + csv_path: Optional[str] = None + row_count: Optional[int] = None + column_count: Optional[int] = None + columns_metadata: Optional[List[ColumnMetadata]] = None + status: Optional[ProjectStatusEnum] = None + + @field_validator("name") + @classmethod + def validate_name(cls, v): + if v is not None: + if not v or not v.strip(): + raise ValueError("Project name cannot be empty") + if len(v.strip()) > 255: + raise ValueError("Project name cannot exceed 255 characters") + return v.strip() + return v + + @field_validator("description") + @classmethod + def validate_description(cls, v): + if v is not None and len(v.strip()) > 1000: + raise ValueError("Description cannot exceed 1000 characters") + return v.strip() if v else None + + @field_validator("row_count", "column_count") + @classmethod + def validate_counts(cls, v): + if v is not None and v < 0: + raise ValueError("Counts cannot be negative") + return v + + class Config: + from_attributes = True + + +class ProjectInDB(ProjectBase): + """Project model as stored in database""" + + id: uuid.UUID + user_id: uuid.UUID + created_at: datetime + updated_at: datetime + + class Config: + from_attributes = True + + +class ProjectPublic(BaseModel): + """Public project model for API responses""" + + id: str + user_id: str + name: str + description: Optional[str] = None + csv_filename: str + csv_path: str + row_count: int + column_count: int + columns_metadata: List[ColumnMetadata] + status: ProjectStatusEnum + created_at: str + updated_at: str + + @classmethod + def from_db_project(cls, project: ProjectInDB) -> "ProjectPublic": + """Convert ProjectInDB to ProjectPublic""" + return cls( + id=str(project.id), + user_id=str(project.user_id), + name=project.name, + description=project.description, + csv_filename=project.csv_filename, + csv_path=project.csv_path, + row_count=project.row_count, + column_count=project.column_count, + columns_metadata=project.columns_metadata, + status=project.status, + created_at=project.created_at.isoformat(), + updated_at=project.updated_at.isoformat(), + ) + + class Config: + from_attributes = True diff --git a/backend/models/user.py b/backend/models/user.py index 57364c7..49362f6 100644 --- a/backend/models/user.py +++ b/backend/models/user.py @@ -1,6 +1,6 @@ import uuid from datetime import datetime -from typing import List, Optional +from typing import TYPE_CHECKING, List, Optional from pydantic import BaseModel, EmailStr, Field, field_validator from sqlalchemy import Boolean, Column, DateTime, String, Text, TypeDecorator, func @@ -9,6 +9,9 @@ from models.base import Base +if TYPE_CHECKING: + from models.project import ProjectTable + class UUID(TypeDecorator): """ @@ -70,9 +73,9 @@ class UserTable(Base): ) # Relationships - # projects: Mapped[List["ProjectTable"]] = relationship( - # back_populates="user", cascade="all, delete-orphan" - # ) + projects: Mapped[List["ProjectTable"]] = relationship( + back_populates="user", cascade="all, delete-orphan" + ) # chat_messages: Mapped[List["ChatMessageTable"]] = relationship( # back_populates="user", cascade="all, delete-orphan" # ) diff --git a/database/migrations/002_create_projects_table.sql b/database/migrations/002_create_projects_table.sql new file mode 100644 index 0000000..62cda8d --- /dev/null +++ b/database/migrations/002_create_projects_table.sql @@ -0,0 +1,74 @@ +-- Migration: 002_create_projects_table.sql +-- Description: Create projects table with foreign key relationship to users table +-- Date: January 2025 + +-- Create project status enum type +CREATE TYPE project_status AS ENUM ('uploading', 'processing', 'ready', 'error'); + +-- Create projects table +CREATE TABLE IF NOT EXISTS projects ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE, + name VARCHAR(255) NOT NULL, + description TEXT, + csv_filename VARCHAR(255) NOT NULL, + csv_path TEXT NOT NULL, + row_count INTEGER NOT NULL DEFAULT 0, + column_count INTEGER NOT NULL DEFAULT 0, + columns_metadata JSONB, + status project_status NOT NULL DEFAULT 'uploading', + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP +); + +-- Create indexes for performance +CREATE INDEX IF NOT EXISTS idx_projects_user_id ON projects(user_id); +CREATE INDEX IF NOT EXISTS idx_projects_name ON projects(name); +CREATE INDEX IF NOT EXISTS idx_projects_status ON projects(status); +CREATE INDEX IF NOT EXISTS idx_projects_created_at ON projects(created_at); +CREATE INDEX IF NOT EXISTS idx_projects_updated_at ON projects(updated_at); +CREATE INDEX IF NOT EXISTS idx_projects_user_status ON projects(user_id, status); +CREATE INDEX IF NOT EXISTS idx_projects_user_created ON projects(user_id, created_at DESC); + +-- Apply trigger to projects table for automatic updated_at updates +CREATE TRIGGER update_projects_updated_at + BEFORE UPDATE ON projects + FOR EACH ROW + EXECUTE FUNCTION update_updated_at_column(); + +-- Add constraints +ALTER TABLE projects ADD CONSTRAINT projects_name_check + CHECK (LENGTH(TRIM(name)) > 0 AND LENGTH(TRIM(name)) <= 255); + +ALTER TABLE projects ADD CONSTRAINT projects_csv_filename_check + CHECK (LENGTH(TRIM(csv_filename)) > 0); + +ALTER TABLE projects ADD CONSTRAINT projects_csv_path_check + CHECK (LENGTH(TRIM(csv_path)) > 0); + +ALTER TABLE projects ADD CONSTRAINT projects_row_count_check + CHECK (row_count >= 0); + +ALTER TABLE projects ADD CONSTRAINT projects_column_count_check + CHECK (column_count >= 0); + +ALTER TABLE projects ADD CONSTRAINT projects_description_check + CHECK (description IS NULL OR LENGTH(TRIM(description)) <= 1000); + +-- Add comments for documentation +COMMENT ON TABLE projects IS 'User projects containing CSV data for analysis'; +COMMENT ON COLUMN projects.id IS 'Primary key, UUID'; +COMMENT ON COLUMN projects.user_id IS 'Foreign key to users table'; +COMMENT ON COLUMN projects.name IS 'Project name, user-defined'; +COMMENT ON COLUMN projects.description IS 'Optional project description'; +COMMENT ON COLUMN projects.csv_filename IS 'Original CSV filename'; +COMMENT ON COLUMN projects.csv_path IS 'Storage path for CSV file'; +COMMENT ON COLUMN projects.row_count IS 'Number of rows in CSV'; +COMMENT ON COLUMN projects.column_count IS 'Number of columns in CSV'; +COMMENT ON COLUMN projects.columns_metadata IS 'JSON metadata about CSV columns'; +COMMENT ON COLUMN projects.status IS 'Current processing status of the project'; +COMMENT ON COLUMN projects.created_at IS 'Project creation timestamp'; +COMMENT ON COLUMN projects.updated_at IS 'Last update timestamp'; + +-- Add comment on enum type +COMMENT ON TYPE project_status IS 'Project processing status: uploading, processing, ready, error'; \ No newline at end of file From d063508fdbaac962b5fa711ab071aa4fee8ea233 Mon Sep 17 00:00:00 2001 From: tanzilahmed0 Date: Wed, 9 Jul 2025 01:01:55 -0700 Subject: [PATCH 2/2] Made JSON column compatible with SQLite and PostgreSQL --- backend/models/project.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/backend/models/project.py b/backend/models/project.py index 3891b95..1cf4b6a 100644 --- a/backend/models/project.py +++ b/backend/models/project.py @@ -5,6 +5,7 @@ from pydantic import BaseModel, Field, field_validator from sqlalchemy import ( + JSON, Boolean, Column, DateTime, @@ -66,6 +67,24 @@ def process_result_value(self, value, dialect): return value +class CrossDatabaseJSON(TypeDecorator): + """ + Platform-independent JSON type. + + Uses PostgreSQL's JSONB type for better performance, + otherwise uses standard JSON type. + """ + + impl = JSON + cache_ok = True + + def load_dialect_impl(self, dialect): + if dialect.name == "postgresql": + return dialect.type_descriptor(JSONB()) + else: + return dialect.type_descriptor(JSON()) + + class ProjectStatusEnum(str, Enum): """Project status enumeration""" @@ -90,7 +109,7 @@ class ProjectTable(Base): csv_path: Mapped[str] = mapped_column(Text, nullable=False) row_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0) column_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0) - columns_metadata = Column(JSONB, nullable=True) + columns_metadata = Column(CrossDatabaseJSON, nullable=True) status: Mapped[ProjectStatusEnum] = mapped_column( SQLEnum(ProjectStatusEnum), nullable=False, default=ProjectStatusEnum.UPLOADING )