diff --git a/docs/testing.md b/docs/testing.md index 02bf041..6d763f7 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -290,3 +290,104 @@ npm run e2e:headed # optional, debug mode ``` **Note:** E2E relies on BASE_URL from global-setup (spawns Flask). `SCIDK_PROVIDERS` defaults to `local_fs` in CI. The scan E2E uses a real temp directory under the runner OS temp path and triggers a synchronous scan via `/api/scan`. + +## E2E Testing Complete Guide + +### Quick Start + +1. **Install dependencies** (one-time setup): + ```bash + npm install + npm run e2e:install # Installs Playwright browsers + ``` + +2. **Run all E2E tests**: + ```bash + npm run e2e # Headless (recommended for CI/local verification) + npm run e2e:headed # With visible browser (useful for debugging) + ``` + +3. **Run specific test files**: + ```bash + npm run e2e -- e2e/smoke.spec.ts + npm run e2e -- e2e/core-flows.spec.ts + npm run e2e -- e2e/negative.spec.ts + ``` + +### Available Test Suites + +- **`e2e/smoke.spec.ts`**: Basic page load and navigation smoke tests +- **`e2e/core-flows.spec.ts`**: Full user workflows (scan → browse → details) +- **`e2e/scan.spec.ts`**: Directory scanning functionality +- **`e2e/browse.spec.ts`**: File browsing and navigation +- **`e2e/negative.spec.ts`**: Error handling, empty states, edge cases + +### CI Integration + +E2E tests run automatically in GitHub Actions on every push and PR. See `.github/workflows/ci.yml`: + +- **Job: `e2e`**: Runs Playwright tests with `SCIDK_PROVIDERS=local_fs` +- **On failure**: Uploads Playwright report and traces as artifacts +- **Access artifacts**: Go to Actions → failed run → download `playwright-report` + +To view traces locally: +```bash +npx playwright show-trace test-results//trace.zip +``` + +## Troubleshooting + +### E2E Tests + +**Problem: `spawn python ENOENT` or Python not found** +- **Cause**: Playwright global-setup can't find Python executable +- **Fix**: The `e2e/global-setup.ts` uses `python3` on Linux/Mac, `python` on Windows +- **Verify**: `which python3` (Linux/Mac) or `where python` (Windows) + +**Problem: Tests fail with "element not found" or timeouts** +- **Cause**: Page load too slow, or elements missing `data-testid` attributes +- **Fix 1**: Check Flask server logs in test output for errors +- **Fix 2**: Run with headed mode to see what's happening: `npm run e2e:headed` +- **Fix 3**: Verify `data-testid` attributes exist in templates (`scidk/ui/templates/`) + +**Problem: "Port already in use" error** +- **Cause**: Previous Flask server didn't shut down cleanly +- **Fix**: Kill stale processes: `pkill -f "python.*scidk.app"` or `lsof -ti:5000 | xargs kill` + +**Problem: Tests pass locally but fail in CI** +- **Cause**: Different providers enabled, or timing differences +- **Check**: CI uses `SCIDK_PROVIDERS=local_fs` only (see `.github/workflows/ci.yml`) +- **Fix**: Run locally with same env: `SCIDK_PROVIDERS=local_fs npm run e2e` + +### pytest Tests + +**Problem: `ModuleNotFoundError` for scidk package** +- **Cause**: Package not installed in editable mode +- **Fix**: `pip install -e .[dev]` + +**Problem: Tests fail with "No such file or directory" for temp files** +- **Cause**: Tests didn't clean up properly, or timing issue with `tmp_path` +- **Fix**: Use pytest's `tmp_path` fixture, which auto-cleans after each test + +**Problem: "RuntimeError: Working outside of application context"** +- **Cause**: Flask test missing `app` or `client` fixture +- **Fix**: Add `def test_something(client):` to use Flask test client + +**Problem: Neo4j or rclone tests fail** +- **Cause**: Missing mocks/fakes for external dependencies +- **Fix**: Use helpers from `tests/helpers/`: + - `from tests.helpers.neo4j import inject_fake_neo4j` + - `from tests.helpers.rclone import rclone_env` + +**Problem: Slow tests or database locks** +- **Cause**: SQLite WAL mode or concurrent access +- **Fix**: Use `tmp_path` for isolated test databases, avoid shared state between tests + +### General Tips + +- **Run tests verbosely**: `python -m pytest -v` or `npm run e2e -- --debug` +- **Run single test**: `python -m pytest tests/test_foo.py::test_bar -v` +- **Skip slow tests**: `python -m pytest -m "not e2e" -q` +- **Clear pytest cache**: `rm -rf .pytest_cache` +- **Check logs**: E2E server logs appear inline with test output +- **Update snapshots**: If visual regression tests exist, use `npm run e2e -- --update-snapshots` diff --git a/e2e/core-flows.spec.ts b/e2e/core-flows.spec.ts new file mode 100644 index 0000000..25cc391 --- /dev/null +++ b/e2e/core-flows.spec.ts @@ -0,0 +1,124 @@ +import { test, expect, request } from '@playwright/test'; +import os from 'os'; +import fs from 'fs'; +import path from 'path'; + +/** + * Core E2E flows for SciDK: scan → browse → file details + * Tests user-visible outcomes with stable selectors (data-testid) + */ + +function createTestDirectory(prefix = 'scidk-e2e-core-'): string { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), prefix)); + // Create a small directory structure for browsing + fs.writeFileSync(path.join(dir, 'data.txt'), 'sample data'); + fs.writeFileSync(path.join(dir, 'notes.md'), '# Notes\nTest content'); + const subdir = path.join(dir, 'subdir'); + fs.mkdirSync(subdir); + fs.writeFileSync(path.join(subdir, 'nested.txt'), 'nested file'); + return dir; +} + +test('complete flow: scan → browse → file details', async ({ page, baseURL, request: pageRequest }) => { + const consoleMessages: { type: string; text: string }[] = []; + page.on('console', (msg) => { + consoleMessages.push({ type: msg.type(), text: msg.text() }); + }); + + const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; + const tempDir = createTestDirectory(); + + // Step 1: Scan the directory via API + const api = pageRequest || (await request.newContext()); + const scanResp = await api.post(`${base}/api/scan`, { + headers: { 'Content-Type': 'application/json' }, + data: { path: tempDir, recursive: true }, + }); + expect(scanResp.ok()).toBeTruthy(); + + // Step 2: Navigate to Home and verify scan appears + await page.goto(base); + await page.waitForLoadState('networkidle'); + + const homeScans = await page.getByTestId('home-recent-scans'); + await expect(homeScans).toBeVisible(); + + // Verify the scanned path appears on the page + const pathOccurrences = await page.getByText(tempDir, { exact: false }).count(); + expect(pathOccurrences).toBeGreaterThan(0); + + // Step 3: Navigate to Files page + await page.getByTestId('nav-files').click(); + await page.waitForLoadState('networkidle'); + await expect(page.getByTestId('files-title')).toBeVisible(); + await expect(page.getByTestId('files-root')).toBeVisible(); + + // Step 4: Verify browsing works (check that scanned files are listed) + // The Files page should show directories; verify our temp directory is accessible + const filesContent = await page.getByTestId('files-root').textContent(); + expect(filesContent).toBeTruthy(); + + // Step 5: Ensure no console errors occurred during the flow + await page.waitForTimeout(500); // Brief wait to catch any delayed errors + const errors = consoleMessages.filter((m) => m.type === 'error'); + expect(errors.length).toBe(0); + + // Cleanup + fs.rmSync(tempDir, { recursive: true, force: true }); +}); + +test('scan with recursive flag captures nested files', async ({ page, baseURL, request: pageRequest }) => { + const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; + const tempDir = createTestDirectory('scidk-e2e-recursive-'); + + const api = pageRequest || (await request.newContext()); + const scanResp = await api.post(`${base}/api/scan`, { + headers: { 'Content-Type': 'application/json' }, + data: { path: tempDir, recursive: true }, + }); + expect(scanResp.ok()).toBeTruthy(); + + // Verify via API that nested files are indexed + const directoriesResp = await api.get(`${base}/api/directories`); + expect(directoriesResp.ok()).toBeTruthy(); + const directories = await directoriesResp.json(); + expect(Array.isArray(directories)).toBe(true); + + // Check that our scanned directory appears + const hasTempDir = directories.some((d: any) => + d.path && d.path.includes(tempDir) + ); + expect(hasTempDir).toBe(true); + + // Cleanup + fs.rmSync(tempDir, { recursive: true, force: true }); +}); + +test('browse page shows correct file listing structure', async ({ page, baseURL, request: pageRequest }) => { + const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000'; + const tempDir = createTestDirectory('scidk-e2e-browse-'); + + // Scan directory first + const api = pageRequest || (await request.newContext()); + await api.post(`${base}/api/scan`, { + headers: { 'Content-Type': 'application/json' }, + data: { path: tempDir, recursive: false }, + }); + + // Navigate to Files/Datasets page (accessible via nav-files button) + await page.goto(base); + await page.waitForLoadState('networkidle'); + await page.getByTestId('nav-files').click(); + await page.waitForLoadState('networkidle'); + + // Verify stable selectors are present + await expect(page.getByTestId('files-title')).toBeVisible(); + await expect(page.getByTestId('files-root')).toBeVisible(); + + // The page should have rendered without errors + const title = await page.title(); + expect(title).toBeTruthy(); + + // Cleanup + fs.rmSync(tempDir, { recursive: true, force: true }); +}); diff --git a/tests/test_helpers_example.py b/tests/test_helpers_example.py new file mode 100644 index 0000000..ab45970 --- /dev/null +++ b/tests/test_helpers_example.py @@ -0,0 +1,71 @@ +""" +Example test demonstrating the tests.helpers package. + +This file serves as documentation and verification that the helper modules +are correctly importable and usable in tests. +""" +import pytest +from tests.helpers.rclone import rclone_env +from tests.helpers.neo4j import inject_fake_neo4j, CypherRecorder +from tests.helpers.builders import build_tree, write_csv +from tests.helpers.asserts import assert_json, assert_error + + +def test_rclone_helper_example(monkeypatch): + """Example usage of rclone test helper.""" + env_config = rclone_env( + monkeypatch, + listremotes=["local", "s3", "gdrive"], + version="rclone v1.62.2" + ) + assert env_config["version"] == "rclone v1.62.2" + assert "s3" in env_config["listremotes"] + + +def test_neo4j_helper_example(monkeypatch): + """Example usage of neo4j test helpers.""" + # Inject fake credentials to avoid connecting to real Neo4j + inject_fake_neo4j(monkeypatch, uri="", user="", password="") + + # Use CypherRecorder to capture queries without executing them + recorder = CypherRecorder() + recorder.run("CREATE (n:Node {name: $name})", name="test") + recorder.run("MATCH (n:Node) RETURN n") + + assert len(recorder.records) == 2 + assert recorder.last().query == "MATCH (n:Node) RETURN n" + + +def test_builders_helper_example(tmp_path): + """Example usage of builders test helpers.""" + # Create a filesystem tree for testing + build_tree(tmp_path, { + 'data': { + 'sample.txt': 'hello world', + 'nested': { + 'file.txt': 'nested content' + } + }, + 'output.csv': [['id', 'name'], [1, 'Alice'], [2, 'Bob']] + }) + + assert (tmp_path / 'data' / 'sample.txt').read_text() == 'hello world' + assert (tmp_path / 'data' / 'nested' / 'file.txt').exists() + assert (tmp_path / 'output.csv').exists() + + # Write a standalone CSV + write_csv(tmp_path / 'users.csv', [['id', 'email'], [1, 'test@example.com']]) + assert (tmp_path / 'users.csv').exists() + + +def test_asserts_helper_example(client): + """Example usage of asserts test helpers.""" + # Test successful JSON response + resp = client.get('/api/providers') + data = assert_json(resp, shape=list) + assert isinstance(data, list) + + # Test error response + resp_err = client.get('/api/scans/nonexistent-id/status') + error_data = assert_error(resp_err) + assert isinstance(error_data, dict)