From 39b7fcf9be6eac87fe57f9ae59499b79db365cd2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 4 Oct 2025 19:59:58 +0000 Subject: [PATCH 1/2] Initial plan From 9a5431e8aa6daa2f72fd1427fea97bbc6c68786c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 4 Oct 2025 20:12:05 +0000 Subject: [PATCH 2/2] chore: add smoke test, move notebook, create inventory and report script Co-authored-by: ravishan16 <4581463+ravishan16@users.noreply.github.com> --- .github/workflows/ci.yml | 6 + REPO_INVENTORY.md | 152 ++++++++++++++++++ .../pipeline_csv_to_parquet multifile.ipynb | 0 scripts/report_unused.sh | 118 ++++++++++++++ tests/test_smoke_app.py | 111 +++++++++++++ 5 files changed, 387 insertions(+) create mode 100644 REPO_INVENTORY.md rename notebooks/{ => experiments}/pipeline_csv_to_parquet multifile.ipynb (100%) create mode 100755 scripts/report_unused.sh create mode 100644 tests/test_smoke_app.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 17340c8..9227fc1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -64,6 +64,12 @@ jobs: python -m pip install --upgrade pip pip install -r requirements.txt + - name: Run smoke tests + run: | + pytest -k smoke -v + env: + PYTHONPATH: ${{ github.workspace }} + - name: Run unit tests with coverage run: | pytest tests/unit/ -v --cov=src --cov-report=xml --cov-report=term-missing diff --git a/REPO_INVENTORY.md b/REPO_INVENTORY.md new file mode 100644 index 0000000..4e3009e --- /dev/null +++ b/REPO_INVENTORY.md @@ -0,0 +1,152 @@ +# Repository Inventory + +**Generated:** 2024-10-04 +**Purpose:** Document potentially unused files, large notebooks, and items for cleanup consideration + +--- + +## Summary + +This inventory identifies files that may be candidates for removal, archiving, or relocation based on static analysis and repository structure review. + +### Statistics +- Total Python files: 44 +- Total test files: ~20 +- Documentation files: 12 +- Notebooks: 2 + +--- + +## Actions Taken + +### ✅ Moved to Archive +- `notebooks/pipeline_csv_to_parquet multifile.ipynb` → `notebooks/experiments/` + - **Reason:** Large experimental notebook (36KB) not referenced in main pipeline + - **Status:** Preserved in experiments directory for historical reference + - **Action:** MOVED (not deleted) + +--- + +## Files Under Review + +### Configuration Files - Potential Consolidation + +#### `.replit` +- **Size:** 714 bytes +- **Purpose:** Replit IDE configuration +- **Recommendation:** Keep if using Replit; consider adding to `.gitignore` if not needed in version control +- **Risk:** Low - IDE-specific configuration +- **Action:** REVIEW + +#### `pyproject.toml` + `setup.cfg` +- **Purpose:** Both contain tool configuration (Black, isort, mypy, flake8) +- **Observation:** Configuration is split between two files +- **Recommendation:** Consider consolidating all tool config into `pyproject.toml` (modern standard) +- **Action:** CONSOLIDATE (optional optimization) + +--- + +## Notebooks + +### Active Notebooks +- `notebooks/pipeline_csv_to_parquet.ipynb` (24KB) + - **Status:** Active, referenced in documentation + - **Action:** KEEP + +### Archived Notebooks +- `notebooks/experiments/pipeline_csv_to_parquet multifile.ipynb` (36KB) + - **Status:** Experimental, preserved for reference + - **Action:** ARCHIVED + +--- + +## Documentation Assessment + +All documentation files appear active and well-maintained: +- ✅ `docs/ARCHITECTURE.md` (20KB) - Core architecture documentation +- ✅ `docs/AI_ENGINES.md` (24KB) - AI adapter documentation +- ✅ `docs/DATA_PIPELINE.md` (19KB) - Data pipeline documentation +- ✅ `docs/VISUALIZATION.md` (17KB) - Visualization documentation +- ✅ Other docs: All appear referenced and active + +**Observation:** `docs/ARCHITECTURE_V2.md` (1.7KB) is notably smaller than `ARCHITECTURE.md` (20KB) +- **Recommendation:** Verify if V2 is intended to replace or supplement ARCHITECTURE.md +- **Action:** REVIEW DOCUMENTATION STRATEGY + +--- + +## Code Quality Notes + +### Ignored Files in Coverage/Linting +The following file is explicitly excluded from coverage and linting: +- `src/ai_service_old.py` + - **Status:** Listed in `.gitignore`, `pyproject.toml`, and `setup.cfg` + - **Issue:** File reference exists but file not found in repository + - **Recommendation:** Remove references from config files since file doesn't exist + - **Action:** CLEANUP CONFIG + +--- + +## Testing Infrastructure + +### Test Structure +- Unit tests: `tests/unit/` - Well organized with 49 passing tests +- Integration tests: `tests/integration/` - Properly separated +- New smoke test: `tests/test_smoke_app.py` - Added for basic import validation + +### Coverage +Current coverage is 75% for base adapter, with lower coverage for AI adapters (expected for integration code). + +--- + +## Recommendations + +### High Priority +1. ✅ **COMPLETED:** Move `pipeline_csv_to_parquet multifile.ipynb` to experiments +2. ✅ **COMPLETED:** Add smoke test for app initialization +3. **TODO:** Remove references to `src/ai_service_old.py` from config files + +### Medium Priority +1. **Consider:** Consolidate tool configuration into `pyproject.toml` only +2. **Review:** Clarify relationship between `ARCHITECTURE.md` and `ARCHITECTURE_V2.md` +3. **Consider:** Add `.replit` to `.gitignore` if not needed in version control + +### Low Priority +1. **Monitor:** Track unused code with regular `vulture` runs (tool added in `scripts/report_unused.sh`) +2. **Consider:** Create `notebooks/README.md` to document purpose of each notebook + +--- + +## Future Cleanup Strategy + +The `scripts/report_unused.sh` script has been added to help identify: +- Unused Python functions and classes (via vulture) +- Code with zero coverage (via pytest-cov) +- Import patterns that may indicate dead code + +Run periodically with: +```bash +bash scripts/report_unused.sh +``` + +--- + +## Risk Assessment + +**Overall Risk:** LOW +- All changes are non-destructive +- Experimental notebook preserved (moved, not deleted) +- No production code removed +- Smoke test added for safety + +**Rollback:** Simple - revert the branch; all moved files can be restored from git history. + +--- + +## Notes + +This inventory is a living document. As the codebase evolves: +1. Run `scripts/report_unused.sh` periodically +2. Update this inventory when making structural changes +3. Review archived notebooks annually for potential deletion +4. Keep test coverage high to identify truly unused code diff --git a/notebooks/pipeline_csv_to_parquet multifile.ipynb b/notebooks/experiments/pipeline_csv_to_parquet multifile.ipynb similarity index 100% rename from notebooks/pipeline_csv_to_parquet multifile.ipynb rename to notebooks/experiments/pipeline_csv_to_parquet multifile.ipynb diff --git a/scripts/report_unused.sh b/scripts/report_unused.sh new file mode 100755 index 0000000..cf088bf --- /dev/null +++ b/scripts/report_unused.sh @@ -0,0 +1,118 @@ +#!/bin/bash +# +# Report unused code using vulture and coverage analysis +# This script helps identify potential dead code for cleanup +# +# Usage: +# bash scripts/report_unused.sh [--install] +# +# Options: +# --install Install vulture if not present + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" + +echo "🔍 Unused Code Report for converSQL" +echo "====================================" +echo "" + +# Function to check if a command exists +command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +# Check for --install flag +if [[ "$1" == "--install" ]]; then + echo "📦 Installing vulture..." + pip install vulture + echo "✅ vulture installed" + echo "" +fi + +# Check if vulture is available +if ! command_exists vulture; then + echo "⚠️ vulture is not installed" + echo "Install with: pip install vulture" + echo "Or run: bash scripts/report_unused.sh --install" + echo "" + VULTURE_AVAILABLE=false +else + VULTURE_AVAILABLE=true +fi + +# Change to project root +cd "${PROJECT_ROOT}" + +# Run vulture if available +if [ "$VULTURE_AVAILABLE" = true ]; then + echo "📊 Running vulture static analysis..." + echo "------------------------------------" + + # Run vulture with confidence threshold + # Higher confidence = more likely to be actually unused + vulture src/ --min-confidence 80 --sort-by-size 2>/dev/null || { + echo "✅ No high-confidence unused code found (confidence >= 80%)" + } + echo "" + + echo "📊 Vulture summary (all confidence levels)..." + echo "------------------------------------" + vulture src/ --min-confidence 60 2>&1 | head -20 || { + echo "✅ No unused code found (confidence >= 60%)" + } + echo "" +fi + +# Run coverage analysis if pytest is available +if command_exists pytest; then + echo "📊 Coverage analysis for untested code..." + echo "----------------------------------------" + + # Run tests with coverage, focusing on source code + pytest tests/unit/ --cov=src --cov-report=term-missing --cov-report=html -q 2>&1 | \ + grep -E "^src/" | \ + grep -E "0%" || echo "✅ All source files have some test coverage" + + echo "" + echo "📁 Detailed coverage report available at: htmlcov/index.html" + echo "" +else + echo "⚠️ pytest not available for coverage analysis" + echo "" +fi + +# Check for common patterns that might indicate unused code +echo "🔍 Checking for potential cleanup patterns..." +echo "-------------------------------------------" + +# Look for files with "_old" suffix +OLD_FILES=$(find src/ -type f -name "*_old.py" 2>/dev/null || true) +if [ -n "$OLD_FILES" ]; then + echo "⚠️ Found files with '_old' suffix:" + echo "$OLD_FILES" +else + echo "✅ No '_old' backup files found" +fi + +# Look for TODO/FIXME/DEPRECATED comments +TODO_COUNT=$(grep -r "TODO\|FIXME\|DEPRECATED" src/ --include="*.py" 2>/dev/null | wc -l || echo "0") +if [ "$TODO_COUNT" -gt 0 ]; then + echo "📝 Found $TODO_COUNT TODO/FIXME/DEPRECATED comments in source code" + echo " Run: grep -rn 'TODO\|FIXME\|DEPRECATED' src/ --include='*.py'" +else + echo "✅ No TODO/FIXME/DEPRECATED markers found" +fi + +echo "" +echo "====================================" +echo "✅ Unused code report complete!" +echo "" +echo "💡 Tips:" +echo " - Review vulture output carefully (false positives are common)" +echo " - Focus on code with 0% coverage first" +echo " - Check _old backup files for safe removal" +echo " - Address TODO/FIXME comments before cleanup" +echo "" +echo "📖 See REPO_INVENTORY.md for cleanup recommendations" diff --git a/tests/test_smoke_app.py b/tests/test_smoke_app.py new file mode 100644 index 0000000..f467b01 --- /dev/null +++ b/tests/test_smoke_app.py @@ -0,0 +1,111 @@ +""" +Smoke test for app initialization. + +This test ensures that the application can be imported and core initialization +paths work without network calls or full UI rendering. +""" + +import sys +from unittest.mock import MagicMock, patch + + +def test_app_imports_successfully(): + """Test that the app.py module can be imported without errors.""" + # Mock streamlit to prevent UI operations + sys.modules["streamlit"] = MagicMock() + + try: + import app # noqa: F401 + + assert True, "app.py imported successfully" + except ImportError as e: + assert False, f"Failed to import app.py: {e}" + + +def test_app_logic_imports_successfully(): + """Test that src.app_logic can be imported.""" + try: + from src import app_logic # noqa: F401 + + assert True, "src.app_logic imported successfully" + except ImportError as e: + assert False, f"Failed to import src.app_logic: {e}" + + +def test_app_logic_initialize_without_streamlit(): + """Test that initialize_app_data can be called with mocked streamlit.""" + + # Create a session state mock that supports both dict and attribute access + class SessionStateMock: + def __init__(self): + self._data = {} + + def __contains__(self, key): + return key in self._data + + def __setitem__(self, key, value): + self._data[key] = value + + def __getitem__(self, key): + return self._data[key] + + def __setattr__(self, key, value): + if key == "_data": + super().__setattr__(key, value) + else: + self._data[key] = value + + def __getattr__(self, key): + if key == "_data": + return super().__getattribute__(key) + return self._data.get(key) + + def setdefault(self, key, value): + return self._data.setdefault(key, value) + + # Mock streamlit with custom session state + mock_session_state = SessionStateMock() + mock_st = MagicMock() + mock_st.session_state = mock_session_state + + # Mock spinner context manager + mock_spinner = MagicMock() + mock_spinner.__enter__ = MagicMock(return_value=None) + mock_spinner.__exit__ = MagicMock(return_value=None) + mock_st.spinner.return_value = mock_spinner + + # Mock the data services to avoid file I/O + with patch("src.app_logic.st", mock_st): + with patch("src.app_logic.load_parquet_files", return_value=[]): + with patch("src.app_logic.load_schema_context", return_value=""): + with patch("src.app_logic.load_ai_service", return_value=MagicMock(is_available=lambda: False)): + from src.app_logic import initialize_app_data + + # Should not raise any exceptions + initialize_app_data() + + # Verify session state was initialized + assert "generated_sql" in mock_session_state + assert "ai_error" in mock_session_state + assert "show_edit_sql" in mock_session_state + assert "_rendered_this_run" in mock_session_state + + +def test_core_imports_successfully(): + """Test that src.core module can be imported.""" + try: + from src import core # noqa: F401 + + assert True, "src.core imported successfully" + except ImportError as e: + assert False, f"Failed to import src.core: {e}" + + +def test_ai_service_imports_successfully(): + """Test that src.ai_service module can be imported.""" + try: + from src import ai_service # noqa: F401 + + assert True, "src.ai_service imported successfully" + except ImportError as e: + assert False, f"Failed to import src.ai_service: {e}"