Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 95 additions & 10 deletions backend/api/chat.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import logging
import random
import uuid
from datetime import datetime
from typing import Any, Dict, List
from typing import Any, Dict, List, Optional

from fastapi import APIRouter, Depends, HTTPException, Query

Expand All @@ -21,6 +22,7 @@

router = APIRouter(prefix="/chat", tags=["chat"])
project_service = get_project_service()
logger = logging.getLogger(__name__)

# Mock chat messages database
MOCK_CHAT_MESSAGES = {}
Expand Down Expand Up @@ -378,10 +380,97 @@ async def get_csv_preview(
if not project_obj:
raise HTTPException(status_code=404, detail="Project not found")

# Generate preview from project metadata
if not project_obj.columns_metadata:
# Check if CSV file exists
if not project_obj.csv_path:
raise HTTPException(status_code=404, detail="CSV preview not available")

# Load actual CSV data from storage
preview = _load_csv_preview_from_storage(project_obj)

if not preview:
# Fallback to metadata-based preview if file loading fails
preview = _generate_preview_from_metadata(project_obj)

if not preview:
raise HTTPException(status_code=404, detail="CSV preview not available")

return ApiResponse(success=True, data=preview)

except HTTPException:
# Re-raise HTTPExceptions (like 404) as-is
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error loading CSV preview: {str(e)}")


def _load_csv_preview_from_storage(project_obj) -> Optional[CSVPreview]:
"""Load CSV preview from actual file in storage"""
try:
from services.storage_service import storage_service
import pandas as pd
import io

# Download CSV file from storage
csv_bytes = storage_service.download_file(project_obj.csv_path)
if not csv_bytes:
return None

# Read CSV into pandas DataFrame
csv_buffer = io.BytesIO(csv_bytes)
df = pd.read_csv(csv_buffer)

# Get first 5 rows for preview
preview_df = df.head(5)

# Extract column information
columns = list(df.columns)
sample_data = preview_df.values.tolist()
total_rows = len(df)

# Determine data types
data_types = {}
for col in columns:
dtype = str(df[col].dtype)
if 'int' in dtype or 'float' in dtype:
data_types[col] = 'number'
elif 'datetime' in dtype or 'date' in dtype:
data_types[col] = 'date'
elif 'bool' in dtype:
data_types[col] = 'boolean'
else:
data_types[col] = 'string'

# Convert any non-serializable values to strings
serializable_sample_data = []
for row in sample_data:
serializable_row = []
for value in row:
if pd.isna(value):
serializable_row.append(None)
elif isinstance(value, (pd.Timestamp, pd.Timedelta)):
serializable_row.append(str(value))
else:
serializable_row.append(value)
serializable_sample_data.append(serializable_row)

return CSVPreview(
columns=columns,
sample_data=serializable_sample_data,
total_rows=total_rows,
data_types=data_types
)

except Exception as e:
logger.error(f"Error loading CSV preview from storage: {str(e)}")
return None


def _generate_preview_from_metadata(project_obj) -> Optional[CSVPreview]:
"""Generate preview from project metadata as fallback"""
try:
if not project_obj.columns_metadata:
return None

# Extract column names and types
columns = [col.get('name', '') for col in project_obj.columns_metadata]
data_types = {col.get('name', ''): col.get('type', 'unknown') for col in project_obj.columns_metadata}
Expand All @@ -405,20 +494,16 @@ async def get_csv_preview(
row.append(f"Sample {i+1}")
sample_data.append(row)

preview = CSVPreview(
return CSVPreview(
columns=columns,
sample_data=sample_data,
total_rows=project_obj.row_count or 0,
data_types=data_types
)

return ApiResponse(success=True, data=preview)

except HTTPException:
# Re-raise HTTPExceptions (like 404) as-is
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error loading CSV preview: {str(e)}")
logger.error(f"Error generating preview from metadata: {str(e)}")
return None


@router.get("/{project_id}/suggestions")
Expand Down
Binary file modified backend/test.db
Binary file not shown.
126 changes: 126 additions & 0 deletions backend/test_csv_preview.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#!/usr/bin/env python3
"""
Test script for CSV preview endpoint - Task B18
"""

import io
import pandas as pd
from unittest.mock import Mock, patch
from api.chat import _load_csv_preview_from_storage, _generate_preview_from_metadata

def test_load_csv_preview_from_storage():
"""Test loading CSV preview from storage"""

# Create sample CSV data
sample_csv = """name,age,city,salary
Alice,25,New York,75000
Bob,30,Los Angeles,85000
Charlie,35,Chicago,90000
Diana,28,Houston,80000
Eve,32,Phoenix,77000"""

# Mock project object
mock_project = Mock()
mock_project.csv_path = "test/sample.csv"

# Mock storage service
with patch('api.chat.storage_service') as mock_storage:
mock_storage.download_file.return_value = sample_csv.encode('utf-8')

# Test the function
result = _load_csv_preview_from_storage(mock_project)

# Verify results
assert result is not None
assert result.columns == ["name", "age", "city", "salary"]
assert len(result.sample_data) == 5 # Should have 5 rows
assert result.total_rows == 5
assert result.data_types["name"] == "string"
assert result.data_types["age"] == "number"
assert result.data_types["salary"] == "number"

# Check sample data
assert result.sample_data[0] == ["Alice", 25, "New York", 75000]
assert result.sample_data[1] == ["Bob", 30, "Los Angeles", 85000]

print("✅ CSV preview from storage test passed!")

def test_generate_preview_from_metadata():
"""Test generating preview from metadata"""

# Mock project object with metadata
mock_project = Mock()
mock_project.row_count = 100
mock_project.columns_metadata = [
{
"name": "product_name",
"type": "string",
"sample_values": ["Product A", "Product B", "Product C"]
},
{
"name": "sales_amount",
"type": "number",
"sample_values": [1500.0, 2300.5, 1890.25]
},
{
"name": "date",
"type": "date",
"sample_values": ["2024-01-01", "2024-01-02", "2024-01-03"]
}
]

# Test the function
result = _generate_preview_from_metadata(mock_project)

# Verify results
assert result is not None
assert result.columns == ["product_name", "sales_amount", "date"]
assert len(result.sample_data) == 5 # Should have 5 rows
assert result.total_rows == 100
assert result.data_types["product_name"] == "string"
assert result.data_types["sales_amount"] == "number"
assert result.data_types["date"] == "date"

# Check sample data uses actual sample values
assert result.sample_data[0] == ["Product A", 1500.0, "2024-01-01"]
assert result.sample_data[1] == ["Product B", 2300.5, "2024-01-02"]
assert result.sample_data[2] == ["Product C", 1890.25, "2024-01-03"]

print("✅ CSV preview from metadata test passed!")

def test_csv_data_types_detection():
"""Test data type detection for different CSV column types"""

# Create CSV with various data types
sample_csv = """id,name,active,price,created_date,rating
1,Product A,true,19.99,2024-01-01,4.5
2,Product B,false,29.99,2024-01-02,3.8
3,Product C,true,39.99,2024-01-03,4.2"""

mock_project = Mock()
mock_project.csv_path = "test/types.csv"

with patch('api.chat.storage_service') as mock_storage:
mock_storage.download_file.return_value = sample_csv.encode('utf-8')

result = _load_csv_preview_from_storage(mock_project)

assert result is not None
assert result.data_types["id"] == "number"
assert result.data_types["name"] == "string"
assert result.data_types["active"] == "boolean"
assert result.data_types["price"] == "number"
assert result.data_types["rating"] == "number"

print("✅ Data type detection test passed!")

if __name__ == "__main__":
print("Testing CSV Preview Endpoint - Task B18")
print("=" * 50)

test_load_csv_preview_from_storage()
test_generate_preview_from_metadata()
test_csv_data_types_detection()

print("\n🎉 All CSV preview tests passed!")
print("Task B18 implementation verified!")
Loading