From ceb0c31aaef57264d78473b289ba8475f2e3d748 Mon Sep 17 00:00:00 2001 From: Ravishankar Sivasubramaniam Date: Thu, 2 Oct 2025 16:51:17 -0500 Subject: [PATCH 1/4] fix: fixed documentaion and added logo and styles --- .github/copilot-instructions.md | 31 +++ .streamlit/config.toml | 8 +- CONTRIBUTING.md | 14 +- README.md | 368 ++++++++------------------- app.py | 407 +++++++++++++++++++++++------- assets/banner.png | Bin 0 -> 1135102 bytes assets/banner_960x640.png | Bin 0 -> 619188 bytes assets/conversql_logo.svg | 51 ++++ assets/favicon.ico | Bin 0 -> 23027 bytes assets/favicon.png | Bin 0 -> 13665 bytes docs/AI_ENGINES.md | 4 + docs/ARCHITECTURE.md | 4 + requirements.txt | 1 + src/ai_engines/bedrock_adapter.py | 35 ++- src/ai_engines/claude_adapter.py | 26 +- src/ai_engines/gemini_adapter.py | 68 +++-- src/ai_service.py | 82 +----- src/branding.py | 52 ++++ src/core.py | 21 +- src/d1_logger.py | 12 +- src/prompts/__init__.py | 5 + src/prompts/sql_generation.py | 91 +++++++ src/simple_auth_components.py | 275 +++++++++++++------- 23 files changed, 971 insertions(+), 584 deletions(-) create mode 100644 .github/copilot-instructions.md create mode 100644 assets/banner.png create mode 100644 assets/banner_960x640.png create mode 100644 assets/conversql_logo.svg create mode 100644 assets/favicon.ico create mode 100644 assets/favicon.png create mode 100644 src/branding.py create mode 100644 src/prompts/__init__.py create mode 100644 src/prompts/sql_generation.py diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..6c50672 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,31 @@ +# converSQL Copilot Guide + +## System map +- `app.py` is the Streamlit shell: it hydrates cached data via `initialize_app_data()`, gates the UI through `simple_auth_wrapper`, and delegates all heavy work to `src/core.py` and `src/ai_service.py`. +- `src/core.py` owns DuckDB execution and orchestrates data prep. `scan_parquet_files()` will run `scripts/sync_data.py` if `data/processed/*.parquet` are missing, so keep a local Parquet copy handy during tests to avoid network pulls. +- `src/ai_service.py` routes natural-language prompts into adapter implementations in `src/ai_engines/`. The prompt embeds the mortgage risk heuristics baked into `src/data_dictionary.py`; reuse `AIService._build_sql_prompt()` instead of crafting ad-hoc prompts. + +## Data + ontology expectations +- Loan metadata lives in `data/processed/data.parquet`; schema text comes from `generate_enhanced_schema_context()` which stitches DuckDB types with ontology metadata from `src/data_dictionary.py` and `docs/DATA_DICTIONARY.md`. +- When adding derived features, update both the Parquet schema and the ontology entry so AI output and the Ontology Explorer tab stay in sync. +- The Streamlit Ontology tab imports `LOAN_ONTOLOGY` and `PORTFOLIO_CONTEXT`; breaking their shape (dict → FieldMetadata) will crash the UI. + +## AI engine adapters +- Adapters must subclass `AIEngineAdapter` in `src/ai_engines/base.py`, expose `provider_id`, `name`, `is_available()`, and `generate_sql()`, then be exported via `src/ai_engines/__init__.py` and registered inside `AIService.adapters`. +- Use `clean_sql_response()` to strip markdown fences, and return `(sql, "")` on success; downstream callers treat any non-empty error string as failure. +- Keep `AI_PROVIDER` fallbacks working—tests rely on `AIService` surviving with zero credentials, so default to "unavailable" rather than raising. + +## Developer workflows +- Install deps with `pip install -r requirements.txt`; prefer `make setup` for a clean environment (installs + cleanup). +- Fast test cycle: `make test-unit` skips integration markers; `make test` mirrors CI (pytest + coverage). Integration adapters are ignored by default via `pytest.ini`; remove the `--ignore` flags there if you really need live API coverage. +- Lint/format stack is Black 120 cols + isort + flake8 + mypy. `make ci` runs the whole suite and matches the GitHub Actions workflow. + +## Environment & secrets +- Copy `.env.example` to `.env`, then set one provider block (`CLAUDE_API_KEY`, `AWS_*`, or `GEMINI_API_KEY`). Without credentials the UI drops to “AI unavailable” but manual SQL still works. +- Data sync needs Cloudflare R2 keys (`R2_ACCESS_KEY_ID`, `R2_SECRET_ACCESS_KEY`, `R2_ENDPOINT_URL`). In offline dev, set `FORCE_DATA_REFRESH=false` and place Parquet files under `data/processed/`. +- Authentication defaults to Google OAuth (`ENABLE_AUTH=true`); set it to `false` for local hacking or provide `GOOGLE_CLIENT_ID/SECRET` plus HTTPS when deploying. + +## Practical tips +- Clear Streamlit caches with `streamlit cache clear` if schema or ontology changes; otherwise stale `@st.cache_data` results linger. +- When writing new ingest code, mirror the type-casting helpers in `notebooks/pipeline_csv_to_parquet*.ipynb` so DuckDB types stay compatible. +- Logging to Cloudflare D1 is optional—`src/d1_logger.py` silently no-ops without `CLOUDFLARE_*` secrets, so you can call it safely even in tests. diff --git a/.streamlit/config.toml b/.streamlit/config.toml index eb125bf..2e5248e 100644 --- a/.streamlit/config.toml +++ b/.streamlit/config.toml @@ -4,7 +4,7 @@ enableCORS = true port = 5000 [theme] -primaryColor = "#1f77b4" -backgroundColor = "#ffffff" -secondaryBackgroundColor = "#f0f2f6" -textColor = "#262730" \ No newline at end of file +primaryColor = "#B45F4D" +backgroundColor = "#FAF6F0" +secondaryBackgroundColor = "#FDFDFD" +textColor = "#3A3A3A" \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 88f6d03..9ff536d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -22,11 +22,11 @@ There are many ways to contribute to converSQL: ```bash # Fork the repository on GitHub, then clone your fork -git clone https://github.com/YOUR_USERNAME/conversql.git -cd conversql +git clone https://github.com/YOUR_USERNAME/converSQL.git +cd converSQL # Add upstream remote -git remote add upstream https://github.com/ravishan16/conversql.git +git remote add upstream https://github.com/ravishan16/converSQL.git ``` ### 2. Set Up Development Environment @@ -83,6 +83,14 @@ black src/ app.py flake8 src/ app.py --max-line-length=100 ``` +### Front-end Styling + +When updating Streamlit UI components: + +- Re-use the CSS custom properties defined in `app.py` (`--color-background`, `--color-accent-primary`, etc.) instead of hard-coded hex values. +- Mirror changes in `.streamlit/config.toml` when altering primary/secondary colors so the Streamlit theme and custom CSS stay aligned. +- Include before/after screenshots in your pull request whenever you adjust layout, typography, or palette usage. + **Example:** ```python def execute_sql_query(sql_query: str, parquet_files: List[str]) -> pd.DataFrame: diff --git a/README.md b/README.md index 9164a40..d82cf75 100644 --- a/README.md +++ b/README.md @@ -1,86 +1,60 @@ -# Conver--- +

+ converSQL logo +

-## 📖 The Story Behind converSQL - -### The Problem - -Data is everywhere, but accessing it remains a technical barrier. Analysts spend hours writing SQL queries. Business users wait for reports. Data scientists translate questions into complex joins and aggregations. Meanwhile, the insights trapped in your data remain just out of reach for those who need them most. - -Traditional BI tools offer pre-built dashboards, but they're rigid. They can't answer the questions you didn't anticipate. And when you need a custom query, you're back to writing SQL or waiting in the queue for engineering support. - -### The Open Data Opportunity - -What if we could turn this around? What if anyone could ask questions in plain English and get instant, accurate SQL queries tailored to their specific data domain? - -That's where converSQL comes in. Built on the principle that **data should be conversational**, converSQL combines: -- **Ontological modeling**: Structured knowledge about your data domains, relationships, and business rules -- **AI-powered generation**: Multiple AI engines (Bedrock, Claude, Gemini, Ollama) that understand context and generate accurate SQL -- **Open data focus**: Showcasing what's possible with publicly available datasets like Fannie Mae's Single Family Loan Performance Data - -### Our Mission - -We believe data analysis should be: -- **Accessible**: Ask questions in natural language, get answers in seconds -- **Intelligent**: Understand business context, not just column names -- **Extensible**: Easy to adapt to any domain with any data structure -- **Open**: Built on open-source principles, welcoming community contributions +# converSQL ---- +![CI](https://github.com/ravishan16/converSQL/actions/workflows/ci.yml/badge.svg) +![Format & Lint](https://github.com/ravishan16/converSQL/actions/workflows/format-code.yml/badge.svg) +![License: MIT](https://img.shields.io/badge/License-MIT-CA9C72.svg) +![Built with Streamlit](https://img.shields.io/badge/Built%20with-Streamlit-FF4B4B.svg?logo=streamlit&logoColor=white) -## 🏡 Flagship Implementation: Single Family Loan Analytics +> Transform natural language questions into production-ready SQL with ontological context and warm, human-centered design. -To demonstrate converSQL's capabilities, we've built a production-ready application analyzing **9+ million mortgage loan records** from Fannie Mae's public dataset. +## Why converSQL -### Why This Matters +### The challenge +- Business teams wait on backlogs of custom SQL while analysts juggle endless report tweaks. +- Complex domains like mortgage analytics demand institutional knowledge that traditional BI tools cannot encode. +- Open data is abundant, but combining it with AI safely and accurately remains tedious. -The Single Family Loan Performance Data represents one of the most comprehensive public datasets on U.S. mortgage markets. It contains granular loan-level data spanning originations, performance, modifications, and defaults. But with 110+ columns and complex domain knowledge required, it's challenging to analyze effectively. +### Our approach +- **Ontology-first modeling** captures relationships, risk logic, and business vocabulary once and reuses it everywhere. +- **Adapter-based AI orchestration** lets you swap Claude, Bedrock, Gemini, or local engines without touching the UI. +- **Streamlit experience design** bridges analysts and executives with curated prompts, cached schemas, and explainable results. -**converSQL makes it conversational:** +## Flagship implementation: Single Family Loan Analytics +The reference app ships with 9M+ rows of Fannie Mae loan performance data. Ask the AI for “high-risk California loans under 620 credit score” and get DuckDB-ready SQL plus rich metrics at a glance. -🔍 **Natural Language Query:** -*"Show me high-risk loans in California with credit scores below 620"* +### Spotlight features +- 🧠 110+ fields grouped into 15 ontology domains with risk heuristics baked into prompts. +- ⚡ CSV ➜ Parquet pipeline with enforced types, 10× compression, and predicate pushdown via DuckDB. +- 🔐 Google OAuth guardrails with optional Cloudflare D1 logging. +- 🤖 Multi-provider AI adapters (Bedrock, Claude, Gemini) with graceful fallbacks and prompt caching. -✨ **Generated SQL:** ```sql SELECT LOAN_ID, STATE, CSCORE_B, OLTV, DTI, DLQ_STATUS, CURRENT_UPB FROM data -WHERE STATE = 'CA' +WHERE STATE = 'CA' AND CSCORE_B < 620 AND CSCORE_B IS NOT NULL ORDER BY CSCORE_B ASC, OLTV DESC -LIMIT 20 +LIMIT 20; ``` -📊 **Instant Results** — with context-aware risk metrics and portfolio insights. - -# converSQL - -> **Transform Natural Language into SQL — Intelligently** - -**converSQL** is an open-source framework that bridges the gap between human questions and database queries. Using ontological data modeling and AI-powered query generation, converSQL makes complex data analysis accessible to everyone — from analysts to executives — without requiring SQL expertise. - -## 🚀 Why Conversational SQL? - -Stop writing complex SQL by hand! With Conversational SQL, you can: -- Ask questions in plain English and get optimized SQL instantly -- Integrate with multiple AI providers (Anthropic Claude, AWS Bedrock, local models) -- Extend to any domain with ontological data modeling -- Build interactive dashboards, query builders, and analytics apps - -## 🏆 Flagship Use Case: Single Family Loan Analytics - -This repo features a production-grade implementation for mortgage loan portfolio analysis. It’s a showcase of how Conversational SQL can power real-world, domain-specific analytics. - -### Key Features - -- **🧠 Ontological Intelligence**: 110+ fields organized into 15 business domains (Credit Risk, Geographic, Temporal, Performance, etc.) -- **🎯 Domain-Aware Context**: AI understands mortgage terminology — "high-risk" automatically considers credit scores, LTV ratios, and DTI -- **⚡ High-Performance Pipeline**: Pipe-separated CSVs → Parquet with schema enforcement, achieving 10x compression and instant query performance -- **🔐 Enterprise Security**: Google OAuth integration with Cloudflare D1 query logging -- **🚀 Multiple AI Engines**: Out-of-the-box support for AWS Bedrock, Claude API, and extensible to Gemini, Ollama, and more - ---- +## Architecture at a glance +``` +Streamlit UI (app.py) + └─ Core orchestration (src/core.py) + ├─ DuckDB execution + ├─ Cached schema + ontology context + └─ Data sync checks (scripts/sync_data.py) + └─ AI service (src/ai_service.py) + ├─ Adapter registry (src/ai_engines/*) + ├─ Prompt construction with risk framework + └─ Clean SQL post-processing +``` ## 🏗️ Architecture @@ -120,195 +94,75 @@ Our showcase implementation demonstrates a complete data engineering workflow: 📄 **[Learn more about the data pipeline →](docs/DATA_PIPELINE.md)** ---- - -## �️ Quick Start - -### Prerequisites -- Python 3.11+ -- Google OAuth credentials -- AI Provider (Claude API or AWS Bedrock) -- Cloudflare R2 or local data storage - -### Installation -```bash -git clone -cd converSQL -pip install -r requirements.txt -``` - -### Configuration -```bash -# Copy environment template -cp .env.example .env - -# Configure your settings -# See setup guides for detailed instructions -``` - -### Launch -```bash -streamlit run app.py -``` - - -## 📖 Developer Setup Guides - -All setup and deployment guides are located in the `docs/` directory: - -- **[Google OAuth Setup](docs/GOOGLE_OAUTH_SETUP.md)** — Authentication configuration -- **[Cloud Storage Setup](docs/R2_SETUP.md)** — Cloudflare R2 data storage configuration -- **[Cloudflare D1 Setup](docs/D1_SETUP.md)** — Logging user activity with Cloudflare D1 -- **[Environment Setup](docs/ENVIRONMENT_SETUP.md)** — Environment variables and dependencies -- **[Deployment Guide](docs/DEPLOYMENT.md)** — Deploy to Streamlit Cloud or locally - - - -## � Documentation - -### Setup Guides -- **[Environment Setup](docs/ENVIRONMENT_SETUP.md)** — Configure environment variables and dependencies -- **[Data Pipeline Setup](docs/DATA_PIPELINE.md)** — Understand and customize the data pipeline -- **[Google OAuth Setup](docs/GOOGLE_OAUTH_SETUP.md)** — Enable authentication -- **[Cloud Storage Setup](docs/R2_SETUP.md)** — Configure Cloudflare R2 -- **[Deployment Guide](docs/DEPLOYMENT.md)** — Deploy to production - -### Developer Guides -- **[Contributing Guide](CONTRIBUTING.md)** — How to contribute to converSQL -- **[AI Engine Development](docs/AI_ENGINES.md)** — Add support for new AI providers -- **[Architecture Overview](docs/ARCHITECTURE.md)** — Deep dive into system design - ---- - -## 🤝 Contributing - -We welcome contributions from the community! Whether you're: -- 🐛 Reporting bugs -- 💡 Suggesting features -- 🔧 Adding new AI engine adapters -- 📖 Improving documentation -- 🎨 Enhancing the UI - -**Your contributions make converSQL better for everyone.** - -### How to Contribute - -1. **Fork the repository** -2. **Create a feature branch**: `git checkout -b feature/your-feature-name` -3. **Make your changes** with clear commit messages -4. **Test thoroughly** — ensure existing functionality still works -5. **Submit a pull request** with a detailed description - -📄 **[Read the full contributing guide →](CONTRIBUTING.md)** - -### Adding New AI Engines - -converSQL uses an adapter pattern for AI engines. Adding a new provider is straightforward: - -1. Implement the `AIEngineAdapter` interface -2. Add configuration options -3. Register in the AI service -4. Test and document - -📄 **[AI Engine Development Guide →](docs/AI_ENGINES.md)** - ---- - -## 🎯 Use Cases Beyond Loan Analytics - -While our flagship implementation focuses on mortgage data, converSQL is designed for **any domain** with tabular data: - -### Financial Services -- Credit card transaction analysis -- Investment portfolio performance -- Fraud detection patterns -- Regulatory reporting - -### Healthcare -- Patient outcomes analysis -- Clinical trial data exploration -- Hospital performance metrics -- Insurance claims analytics - -### E-commerce -- Customer behavior patterns -- Inventory optimization -- Sales performance tracking -- Supply chain analytics - -### Your Domain -**Bring your own data** — converSQL adapts through ontological modeling. Define your domains, specify relationships, and let AI handle the query generation. - ---- - -## 🌟 Why converSQL? - -### For Analysts -- **Stop writing SQL by hand** — describe what you want, get optimized queries -- **Explore data faster** — try different angles without syntax barriers -- **Focus on insights** — spend time analyzing, not coding - -### For Data Engineers -- **Modular architecture** — swap AI providers, storage backends, or UI components -- **Production-ready** — authentication, logging, caching, error handling built-in -- **Extensible ontology** — encode business logic once, reuse everywhere - -### For Organizations -- **Democratize data access** — empower non-technical users to explore data -- **Reduce bottlenecks** — less waiting for custom reports and queries -- **Open source** — no vendor lock-in, full transparency, community-driven development - ---- - -## 🛣️ Roadmap - -### Current Focus (v1.0) -- ✅ Multi-AI engine support (Bedrock, Claude, Gemini) -- ✅ Bedrock Guardrails integration for content filtering -- ✅ Ontological data modeling -- ✅ Single Family Loan Analytics showcase -- 🔄 Ollama adapter implementation -- 🔄 Enhanced query validation and optimization - -### Future Enhancements (v2.0+) -- Multi-table query generation with JOIN intelligence -- Query explanation and visualization -- Historical query learning and optimization -- More domain-specific implementations (healthcare, e-commerce, etc.) -- API server mode for programmatic access -- Web-based ontology editor - -**Have ideas?** [Open an issue](https://github.com/ravishan16/conversql/issues) or join the discussion! - ---- - -## 📄 License - -**MIT License** — Free to use, modify, and distribute. - -See the [LICENSE](LICENSE) file for details. - ---- - -## 🙏 Acknowledgments - -- **Fannie Mae** for making Single Family Loan Performance Data publicly available -- **DuckDB** team for an incredible analytical database engine -- **Anthropic** and **AWS** for powerful AI models -- **Streamlit** for making data apps beautiful and easy -- **Open source community** for inspiration and contributions - ---- - -## 📬 Stay Connected - -- **⭐ Star this repo** to follow development -- **🐦 Share your use cases** — we'd love to hear how you're using converSQL -- **💬 Join discussions** — ask questions, share ideas, help others -- **🐛 Report issues** — help us improve - ---- - -**Built with ❤️ by the converSQL community** - -*Making data conversational, one query at a time.* \ No newline at end of file +## Brand palette +| Token | Hex | Description | +| --- | --- | --- | +| `--color-background` | `#FAF6F0` | Ivory linen canvas across the app | +| `--color-background-alt` | `#FDFDFD` | Porcelain surfaces for cards and modals | +| `--color-text-primary` | `#3A3A3A` | Charcoal Plum headings | +| `--color-text-secondary` | `#7C6F64` | Warm Taupe body copy | +| `--color-accent-primary` | `#DDBEA9` | Soft Clay primary accent | +| `--color-accent-primary-darker` | `#B45F4D` | Terracotta hover and emphasis | +| `--color-border-light` | `#E4C590` | Gold Sand borders, dividers, and tags | + +## Quick start +1. **Install prerequisites** + ```bash + git clone https://github.com/ravishan16/converSQL.git + cd converSQL + pip install -r requirements.txt + ``` +2. **Configure environment** + ```bash + cp .env.example .env + # Enable one AI block (CLAUDE_API_KEY, AWS_* for Bedrock, or GEMINI_API_KEY) + # Provide Google OAuth or set ENABLE_AUTH=false for local dev + ``` +3. **Launch the app** + ```bash + streamlit run app.py + ``` + +## Key documentation +- [Architecture](docs/ARCHITECTURE.md) – layered design and component interactions. +- [Data pipeline](docs/DATA_PIPELINE.md) – ingest, transformation, and Parquet strategy. +- [AI engines](docs/AI_ENGINES.md) – adapter contracts and extension guides. +- [Environment setup](docs/ENVIRONMENT_SETUP.md) – required variables for auth, data, and providers. + +## Developer workflow +- `make setup` – clean install + cache purge. +- `make test-unit` / `make test` – pytest with coverage that mirrors CI. +- `make format` and `make lint` – Black (120 cols), isort, flake8, mypy. +- Cached helpers such as `scan_parquet_files()` trigger `scripts/sync_data.py` when Parquet is missing—keep `data/processed/` warm during tests. + +## Contributing +1. Fork and branch: `git checkout -b feature/my-update`. +2. Run formatting + tests before committing. +3. Open a PR describing the change, provider credentials (if applicable), and test strategy. + +See [CONTRIBUTING.md](CONTRIBUTING.md) for templates, AI adapter expectations, and review checklists. + +## Broader use cases +- **Financial services** – credit risk, portfolio concentrations, regulatory stress tests. +- **Healthcare** – patient outcomes, clinical trial cohorts, claims analytics. +- **E-commerce** – customer segments, inventory velocity, supply chain exceptions. +- **Any ontology-driven domain** – define your schema metadata and let converSQL converse. + +## Roadmap snapshot +- ✅ Multi-AI adapter support with prompt caching and fallbacks. +- ✅ Mortgage analytics reference implementation. +- 🔄 Ollama adapter and enhanced SQL validation. +- 🔮 Upcoming: multi-table joins, query explanations, historical learning, self-serve ontology editor. + +## License +Released under the [MIT License](LICENSE). + +## Acknowledgments +- Fannie Mae for the Single Family Loan Performance dataset. +- The DuckDB, Streamlit, and Anthropic/AWS/Google teams for exceptional tooling. +- The converSQL community for ideas, issues, and adapters. + +## Stay connected +- ⭐ Star the repo to follow releases. +- 💬 Join discussions or open issues at [github.com/ravishan16/converSQL/issues](https://github.com/ravishan16/converSQL/issues). +- 📨 Share what you build—data should feel conversational. \ No newline at end of file diff --git a/app.py b/app.py index 434bb5c..99d66fc 100644 --- a/app.py +++ b/app.py @@ -14,6 +14,7 @@ # Import AI service with new adapter pattern from src.ai_service import generate_sql_with_ai, get_ai_service +from src.branding import get_favicon_path, get_logo_data_uri # Import core functionality from src.core import ( @@ -29,9 +30,11 @@ from src.simple_auth_components import simple_auth_wrapper # Configure page with professional styling +favicon_path = get_favicon_path() + st.set_page_config( page_title="converSQL - Natural Language to SQL", - page_icon="💬", + page_icon=str(favicon_path) if favicon_path else "💬", layout="wide", initial_sidebar_state="expanded", ) @@ -40,56 +43,257 @@ st.markdown( """ """, @@ -116,6 +320,7 @@ def format_file_size(size_bytes: int) -> str: def display_results(result_df: pd.DataFrame, title: str, execution_time: float = None): """Display query results with download option and performance metrics.""" if not result_df.empty: + st.markdown("
", unsafe_allow_html=True) # Compact performance header performance_info = f"✅ {title}: {len(result_df):,} rows" if execution_time: @@ -145,7 +350,9 @@ def display_results(result_df: pd.DataFrame, title: str, execution_time: float = # Use full width for the dataframe with responsive height height = min(600, max(200, len(result_df) * 35 + 50)) # Dynamic height based on rows - st.dataframe(result_df, width="stretch", height=height) + st.dataframe(result_df, use_container_width=True, height=height) + + st.markdown("
", unsafe_allow_html=True) else: st.warning("⚠️ No results found") @@ -213,9 +420,9 @@ def main(): st.markdown( """
-

📊 System Status

+ background: linear-gradient(135deg, var(--color-background) 0%, var(--color-background-alt) 100%); + border-radius: 8px; border: 1px solid var(--color-border-light);'> +

📊 System Status

""", unsafe_allow_html=True, @@ -226,8 +433,8 @@ def main(): st.markdown( """
- Data Files: - {} + Data Files: + {}
""".format( len(parquet_files) @@ -244,9 +451,9 @@ def main(): provider_name = ai_status["active_provider"].title() st.markdown( """ -
-
+
🤖 AI Assistant: {}
@@ -303,12 +510,12 @@ def main(): else: st.markdown( """ -
-
+
🤖 AI Assistant: Unavailable
-
+
Configure Claude API or Bedrock access
@@ -320,9 +527,9 @@ def main(): if DEMO_MODE: st.markdown( """ -
-
+
🧪 Demo Mode Active
@@ -367,7 +574,7 @@ def main(): st.markdown(f"- **Enable Auth**: {os.getenv('ENABLE_AUTH', 'true')}") st.markdown( - "
", + "
", unsafe_allow_html=True, ) @@ -378,12 +585,12 @@ def main(): for file_path in parquet_files: table_name = os.path.splitext(os.path.basename(file_path))[0] st.markdown( - f"
{table_name}
", + f"
{table_name}
", unsafe_allow_html=True, ) else: st.markdown( - "
No tables loaded
", + "
No tables loaded
", unsafe_allow_html=True, ) @@ -391,7 +598,7 @@ def main(): with st.expander("📈 Portfolio Overview", expanded=True): if st.session_state.parquet_files: try: - import duckdb + import duckdb # type: ignore[import-not-found] # Use in-memory connection for stats only with duckdb.connect() as conn: @@ -426,27 +633,31 @@ def main(): ) else: st.markdown( - "
No data loaded
", + "
No data loaded
", unsafe_allow_html=True, ) # Professional header with subtle styling + logo_data_uri = get_logo_data_uri() + if logo_data_uri: + st.markdown( + f""" + + """, + unsafe_allow_html=True, + ) + st.markdown( """ -
-

- 💬 converSQL -

-

- Natural Language to SQL Query Generation -

-

- Multi-Provider AI Intelligence -

-
- Dataset: - 🏠 Single Family Loan Analytics +
+ Multi-Provider AI Intelligence for Mortgage Portfolios +

Natural Language to SQL Query Generation

+

Conversational analytics with ontological guardrails for mortgage portfolios.

+
+ Dataset + 🏠 Single Family Loan Analytics
""", @@ -461,14 +672,11 @@ def main(): with tab1: st.markdown( """ -
-

- Ask Questions About Your Loan Data -

-

- Use natural language to query your loan portfolio data -

-
+
+
+

Ask Questions About Your Loan Data

+

Use natural language to query your loan portfolio data.

+
""", unsafe_allow_html=True, ) @@ -476,26 +684,23 @@ def main(): # More compact analyst question dropdown analyst_questions = get_analyst_questions() - col1, col2 = st.columns([4, 1]) - with col1: + query_col1, query_col2 = st.columns([4, 1], gap="medium") + with query_col1: selected_question = st.selectbox( "💡 **Common Questions:**", [""] + list(analyst_questions.keys()), help="Select a pre-defined question", ) - with col2: - st.write("") # Add spacing to align button - if st.button("🎯 Use", disabled=not selected_question): + with query_col2: + st.write("") + if st.button("🎯 Use", disabled=not selected_question, use_container_width=True): if selected_question in analyst_questions: st.session_state.user_question = analyst_questions[selected_question] st.rerun() # Professional question input with better styling - st.markdown( - "
", - unsafe_allow_html=True, - ) + st.markdown("", unsafe_allow_html=True) user_question = st.text_area( "Your Question", value=st.session_state.get("user_question", ""), @@ -522,7 +727,7 @@ def main(): generate_button = st.button( f"🤖 Generate SQL with {provider_name}", type="primary", - width="stretch", + use_container_width=True, disabled=not is_ai_ready, help="Enter a question above to generate SQL" if not is_ai_ready else None, ) @@ -569,7 +774,7 @@ def main(): execute_button = st.button( "✅ Execute Query", type="primary", - width="stretch", + use_container_width=True, disabled=not has_sql, help="Generate SQL first to execute" if not has_sql else None, ) @@ -590,7 +795,7 @@ def main(): with col2: edit_button = st.button( "✏️ Edit", - width="stretch", + use_container_width=True, disabled=not has_sql, help="Generate SQL first to edit" if not has_sql else None, ) @@ -609,7 +814,7 @@ def main(): col1, col2 = st.columns([3, 1]) with col1: - if st.button("🚀 Run Edited Query", type="primary", width="stretch"): + if st.button("🚀 Run Edited Query", type="primary", use_container_width=True): with st.spinner("⚡ Running edited query..."): try: start_time = time.time() @@ -623,7 +828,7 @@ def main(): st.error(f"❌ Query execution failed: {str(e)}") st.info("💡 Check your SQL syntax and try again") with col2: - if st.button("❌ Cancel", width="stretch"): + if st.button("❌ Cancel", use_container_width=True): st.session_state.show_edit_sql = False st.rerun() @@ -631,10 +836,10 @@ def main(): st.markdown( """
-

+

🗺️ Data Ontology Explorer

-

+

Explore the structured organization of all 110+ data fields across 15 business domains

@@ -657,8 +862,11 @@ def main(): # st.metric( # label="📅 Data Vintage", # value=PORTFOLIO_CONTEXT['overview']['vintage_range'] + + st.markdown("
", unsafe_allow_html=True) + st.markdown("
", unsafe_allow_html=True) # ) - # with col3: + st.markdown("---") # st.metric( # label="🎯 Loss Rate", # value=PORTFOLIO_CONTEXT['performance_summary']['lifetime_loss_rate'] @@ -683,7 +891,7 @@ def main(): # Domain header st.markdown( f""" -

{selected_domain.replace('_', ' ').title()} @@ -745,8 +953,8 @@ def main(): # Field details card st.markdown( f""" -
-
{selected_field}
+
+
{selected_field}

Domain: {field_meta.domain}

Data Type: {field_meta.data_type}

Description: {field_meta.description}

@@ -774,7 +982,7 @@ def main(): st.markdown("### ⚖️ Risk Assessment Framework") st.markdown( f""" -
+

Credit Triangle: {PORTFOLIO_CONTEXT['risk_framework']['credit_triangle']}