diff --git a/.claude/hooks/lint-on-save.sh b/.claude/hooks/lint-on-save.sh new file mode 100755 index 0000000..7a2fc18 --- /dev/null +++ b/.claude/hooks/lint-on-save.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# Runs linters on files after Claude edits them + +# Read hook input from stdin +input=$(cat) +file_path=$(echo "$input" | jq -r '.tool_input.file_path // empty') + +# Exit silently if no file path +[ -z "$file_path" ] && exit 0 + +# Only lint specific file types +case "$file_path" in + *.js|*.jsx|*.ts|*.tsx|*.astro) + cd "$CLAUDE_PROJECT_DIR" + npm run lint:fix -- "$file_path" 2>/dev/null || true + ;; + *.md|*.mdx) + cd "$CLAUDE_PROJECT_DIR" + npm run spellcheck 2>/dev/null || true + ;; +esac + +exit 0 diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 0000000..fdb6f17 --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,15 @@ +{ + "hooks": { + "PostToolUse": [ + { + "matcher": "Write|Edit", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/lint-on-save.sh" + } + ] + } + ] + } +} diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml new file mode 100644 index 0000000..205b0fe --- /dev/null +++ b/.github/workflows/claude-code-review.yml @@ -0,0 +1,57 @@ +name: Claude Code Review + +on: + pull_request: + types: [opened, synchronize] + # Optional: Only run on specific file changes + # paths: + # - "src/**/*.ts" + # - "src/**/*.tsx" + # - "src/**/*.js" + # - "src/**/*.jsx" + +jobs: + claude-review: + # Optional: Filter by PR author + # if: | + # github.event.pull_request.user.login == 'external-contributor' || + # github.event.pull_request.user.login == 'new-developer' || + # github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR' + + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read + issues: read + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Run Claude Code Review + id: claude-review + uses: anthropics/claude-code-action@v1 + with: + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + prompt: | + REPO: ${{ github.repository }} + PR NUMBER: ${{ github.event.pull_request.number }} + + Please review this pull request and provide feedback on: + - Code quality and best practices + - Potential bugs or issues + - Performance considerations + - Security concerns + - Test coverage + + Use the repository's CLAUDE.md for guidance on style and conventions. Be constructive and helpful in your feedback. + + Use `gh pr comment` with your Bash tool to leave your review as a comment on the PR. + + # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md + # or https://docs.claude.com/en/docs/claude-code/cli-reference for available options + claude_args: '--allowed-tools "Bash(gh issue view:*),Bash(gh search:*),Bash(gh issue list:*),Bash(gh pr comment:*),Bash(gh pr diff:*),Bash(gh pr view:*),Bash(gh pr list:*)"' + diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml new file mode 100644 index 0000000..412cef9 --- /dev/null +++ b/.github/workflows/claude.yml @@ -0,0 +1,50 @@ +name: Claude Code + +on: + issue_comment: + types: [created] + pull_request_review_comment: + types: [created] + issues: + types: [opened, assigned] + pull_request_review: + types: [submitted] + +jobs: + claude: + if: | + (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) || + (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude'))) + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read + issues: read + id-token: write + actions: read # Required for Claude to read CI results on PRs + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Run Claude Code + id: claude + uses: anthropics/claude-code-action@v1 + with: + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + + # This is an optional setting that allows Claude to read CI results on PRs + additional_permissions: | + actions: read + + # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it. + # prompt: 'Update the pull request description to include a summary of changes.' + + # Optional: Add claude_args to customize behavior and configuration + # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md + # or https://docs.claude.com/en/docs/claude-code/cli-reference for available options + # claude_args: '--allowed-tools Bash(gh pr:*)' + diff --git a/.gitignore b/.gitignore index a242a98..860981e 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,4 @@ pnpm-debug.log* # CSpell cache .cspellcache +/test-results diff --git a/CLAUDE.md b/CLAUDE.md index fabd343..bd4ff16 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,129 +4,82 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Repository Overview -This is an Astro-based static site deployed to GitHub Pages. The site uses Tailwind CSS for styling and is built using npm/Node.js, then deployed via GitHub Actions. +This is an Astro-based static site deployed to GitHub Pages. The site uses Tailwind CSS for styling and is built using npm/Node.js, then deployed via GitHub Actions; as such, any changes pushed to `main` will be automatically deployed—convenient, but be careful! ## Key Commands -### Local Development - -The repository includes a justfile for convenient local development: - -1. **Initial setup** (one-time): - ```bash - # Install trop (port reservation tool) - # Required for automatic port management when running multiple repo checkouts - cargo install trop-cli - - # Install dependencies - npm install - # OR using justfile - just install - ``` - - **About trop**: This project uses [trop](https://github.com/plx/trop) for automatic port management. When you run `just preview`, trop automatically assigns a unique port for each repository checkout directory. This allows you to run multiple checkouts simultaneously without port conflicts. Each directory gets the same port consistently (idempotent), so your bookmarks and workflows remain stable. - -2. **Development commands** (via justfile): - ```bash - # Start Astro dev server for preview (port automatically allocated by trop) - just preview - - # Start server and open in browser - just view - - # Stop the server - just shutdown - - # Open browser if server is already running - just open - - # Build for production - just build - - # Clean build artifacts - just clean - - # Use custom port (all commands accept port argument) - just view 8080 - just shutdown 8080 - ``` - -3. **Direct npm commands** (if needed): - ```bash - # Run development server - npm run dev - - # Build for production - npm run build - - # Preview production build - npm run preview - - # Lint code - npm run lint - npm run lint:fix - ``` - -4. **GitHub Pages deployment**: Push changes to the main branch and GitHub Actions will automatically build and deploy via the `.github/workflows/deploy.yml` workflow - -### Code Style and Linting - -**IMPORTANT**: This project enforces strict linting rules via ESLint. Always run the linter before committing: - -```bash -npm run lint # Check for linting errors -npm run lint:fix # Auto-fix linting errors where possible -``` +The repository includes a justfile to gather all project commands in a single place; if you're unsure "how do I X?", look there first. +It also manages the preview server using a tool called `trop` (https://github.com/plx/trop). + +Some key commands are: + +- just install: installs dependencies (npm ci) +- just preview: launches dev server with hot reload (port automatically allocated by trop) +- just shutdown: kills dev server if running (port automatically allocated by trop) +- just build: builds the site for production (to dist/) +- just spellcheck: checks spelling in source files +- just spellcheck-html: checks spelling in built HTML output +- just lint: runs ESLint on all files +- just lint-fix: auto-fixes ESLint issues where possible +- just validate: runs all validation checks (lint + spellcheck + build + links) + +## Key Technical Decisions + +- **Framework**: Astro with React integration +- **Styling**: Tailwind CSS with Typography plugin +- **Content**: MDX support for enhanced markdown +- **Build**: Static site generation to `dist/` folder +- **Deployment**: GitHub Actions workflow deploys to GitHub Pages +- **Site URL**: https://plx.github.io + +Additionally, we aim to have *reasonable* accessibility support throughout the site. + +## Content Structure + +The site's content is organized into three main collections: + +- Blog posts (longer-form articles): `src/content/blog/` +- Briefs (short notes): `src/content/briefs/` +- Projects: `src/content/projects/` -Key linting requirements: -- **Use double quotes for strings** - NOT single quotes (TypeScript/JavaScript) -- The project uses ESLint with strict quote rules -- CI/CD will fail if linting errors are present -- Always verify with `npm run lint` before pushing changes +Here are brief remarks about each. -### Content Structure +### Blog Posts + +Structured as folders containing *at least* an `index.md` file, placed in `src/content/blog/`; for example, `my-new-post` looks like: -#### Blog Posts -Create new blog posts in `src/content/blog/` as folders with an `index.md` file: ``` src/content/blog/my-new-post/ -└── index.md +src/content/blog/my-new-post/index.md ``` Posts should include front matter with relevant metadata. -#### Briefs (Short Notes) -Create brief notes in category subfolders within `src/content/briefs/`: -``` -src/content/briefs/swift-warts/my-swift-brief.md -src/content/briefs/claude-code/my-claude-brief.md -``` +### Briefs (Short Notes) -Categories are auto-discovered from folder names. To add a new category, simply create a new folder. You can optionally add a `category.yaml` file in the folder to customize the category metadata (display name, description, sort priority). +Organized into categories represented as folders within `src/content/briefs/`, and stored *directly* as markdown files (no additional nesting / generic `index.md`). +For example, the following contains two briefs—one in the `swift-warts` category and one in the `claude-code` category: -#### Projects -Create project pages in `src/content/projects/` as folders with an `index.md` file: ``` -src/content/projects/my-project/ -└── index.md +src/content/briefs/swift-warts/my-swift-brief.md +src/content/briefs/claude-code/my-claude-brief.md ``` -### Testing and QA +Categories are auto-discovered from folder names. To add a new category, simply create a new folder. +Categories may also customize their display name, description, and sort priority by establishing a `category.yaml` file in the category folder; this is useful because the category name is used in multiple places throughout the site, and benefits from having distinct, contextually-appropriate representations. -The repository has Playwright browser automation available via MCP for testing and QA purposes. This enables: -- Visual testing and screenshot capture -- Navigation testing -- Content verification -- Browser automation tasks +### Projects (Descriptions of Projects) -Note: the project has a dedicated QA-via-playwright agent named "web-qa-playwright". +Structured analogously to "Blog Posts`, but placed in `src/content/projects/`, instead. -## Architecture +## Directory Structure -### Directory Structure - `src/`: Source code - `components/`: Astro components - `content/`: Content collections (blog, briefs, projects) + - `blog/`: where blog posts live + - `briefs/`: where briefs live + - `projects/`: where project pages live - `layouts/`: Page layouts - `pages/`: Routes and pages - `styles/`: Global styles @@ -135,52 +88,11 @@ Note: the project has a dedicated QA-via-playwright agent named "web-qa-playwrig - `dist/`: Build output (generated, not in repo) - `.github/workflows/`: GitHub Actions workflows -### Key Technical Details -- **Framework**: Astro with React integration -- **Styling**: Tailwind CSS with Typography plugin -- **Content**: MDX support for enhanced markdown -- **Build**: Static site generation to `dist/` folder -- **Deployment**: GitHub Actions workflow deploys to GitHub Pages -- **Site URL**: https://plx.github.io +## Testing and QA + +The repository has Playwright browser automation available via MCP for testing and QA purposes. This enables: -### Content Collections -Astro's content collections are used to manage: -- Blog posts with metadata -- Brief notes -- Project pages - -### Build & Deployment Flow -1. Content is written in Markdown/MDX files -2. Astro processes content through layouts and components -3. `npm run build` generates static site in `dist/` folder -4. GitHub Actions workflow triggers on push to main -5. Workflow builds site and deploys to GitHub Pages - -### CSS Lessons Learned - -When implementing mobile navigation, several CSS challenges were encountered and solved: - -1. **Element Hiding Best Practices** - - **Issue**: Using negative `left` positioning (e.g., `left: -100%`) can leave partial elements visible - - **Solution**: Use `transform: translateX(-100%)` combined with `visibility: hidden` for complete hiding - - **Why**: Transform moves the element visually while visibility ensures it's not interactable - -2. **Mobile Layout Gotchas** - - **Issue**: Flex layouts can cause unexpected spacing on mobile - - **Solution**: Change wrapper to `display: block` on mobile breakpoints - - **Why**: Removes flex-related spacing issues - -3. **Z-Index and Positioning** - - Mobile header needs proper z-index stacking (1000+) to stay above content - - Fixed positioning requires careful height calculations for content padding - - Use `overflow: visible` on containers to allow menus to extend beyond - -4. **Debugging Overlapping Elements** - - Browser developer tools are essential for identifying which specific element is causing overlap - - Check both the container and child elements for positioning issues - - Sometimes the issue is inherited padding/margin rather than the obvious element - -5. **Full-Width Mobile Menus** - - Set menu width to 100% for better mobile readability - - Ensure no parent containers constrain the width - - Test on actual mobile devices or browser mobile emulation +- Visual testing and screenshot capture +- Navigation testing +- Content verification +- Browser automation tasks diff --git a/cspell.json b/cspell.json index 32e183b..7f4992f 100644 --- a/cspell.json +++ b/cspell.json @@ -9,12 +9,14 @@ "Astro", "astrojs", "autoclosure", + "automagically", "Bandung", "Berman", "BGRA", "briefs", "Claude", "Codegen", + "conformances", "cplusplus", "cppreference", "decltype", @@ -33,15 +35,18 @@ "MDX", "meso", "Meso", + "metatype", + "metatypes", "metrids", + "microbenchmarks", "msapplication", "mstile", "mutatis", "nonnull", "NSURL", "objc", - "plx", "Playwright", + "plx", "prb", "pretraining", "ripgrep", @@ -55,10 +60,12 @@ "Typesafe", "uncategorized", "Uncategorized", + "unintuitively", "webfetch", "worktree", "worktrees", "WWDC", + "Xcodes", "xctest" ], "ignorePaths": [ @@ -103,7 +110,11 @@ "overrides": [ { "filename": "**/*.{ts,tsx,js,jsx,astro}", - "dictionaries": ["typescript", "node", "npm"], + "dictionaries": [ + "typescript", + "node", + "npm" + ], "ignoreRegExpList": [ "/import .* from ['\"].*/g", "/\\b[A-Z][a-z]+(?:[A-Z][a-z]+)+\\b/g" @@ -111,7 +122,10 @@ }, { "filename": "**/*.{md,mdx}", - "dictionaries": ["en-us", "software-terms"], + "dictionaries": [ + "en-us", + "software-terms" + ], "ignoreRegExpList": [ "/```[\\s\\S]*?```/gm", "/`[^`]+`/g" @@ -119,7 +133,11 @@ }, { "filename": "dist/**/*.html", - "dictionaries": ["en-us", "html"], + "dictionaries": [ + "en-us", + "html", + "software-terms" + ], "ignoreRegExpList": [ "//gm", "//gm", diff --git a/justfile b/justfile index 4276b5a..5860a5e 100644 --- a/justfile +++ b/justfile @@ -10,61 +10,15 @@ build: # Preview: launches dev server with hot reload # Fails early if another preview is already running preview port=port: - #!/usr/bin/env bash - set -euo pipefail - - # Check if Astro is already running on the specified port - if lsof -i :{{port}} | grep -q LISTEN; then - echo "Error: Port {{port}} is already in use. Another preview may be running." - echo "Run 'just shutdown {{port}}' to stop it first." - exit 1 - fi - - # Start Astro dev server in background - echo "Starting Astro dev server on port {{port}}..." - npm run dev -- --port {{port}} > /tmp/astro-{{port}}.log 2>&1 & - - # Wait a moment for server to start - sleep 3 - - # Check if server started successfully - if lsof -i :{{port}} | grep -q LISTEN; then - echo "Astro server started successfully at http://localhost:{{port}}" - else - echo "Error: Failed to start Astro server. Check /tmp/astro-{{port}}.log for details." - exit 1 - fi + bash scripts/preview-server.sh {{port}} # Shutdown: kills Astro server if running on specified port shutdown port=port: - #!/usr/bin/env bash - set -euo pipefail - - # Find Node process on the specified port - PID=$(lsof -ti :{{port}} 2>/dev/null || true) - - if [ -z "$PID" ]; then - echo "No server found running on port {{port}}" - else - echo "Stopping Astro server on port {{port}} (PID: $PID)..." - kill $PID - echo "Server stopped." - fi + bash scripts/shutdown-server.sh {{port}} # Open: opens browser if server is running open port=port: - #!/usr/bin/env bash - set -euo pipefail - - # Check if server is running on the specified port - if lsof -i :{{port}} | grep -q LISTEN; then - echo "Opening http://localhost:{{port}} in browser..." - open "http://localhost:{{port}}" - else - echo "Error: No server found running on port {{port}}" - echo "Run 'just preview {{port}}' to start the server first." - exit 1 - fi + bash scripts/open-browser.sh {{port}} # View: starts preview then opens browser view port=port: @@ -101,4 +55,8 @@ lint-fix: # Validate: runs all validation checks (lint + spellcheck + build + links) validate: - npm run validate:all \ No newline at end of file + npm run validate:all + +# Learn-spelling: adds new words to cspell dictionary (comma-separated) +learn-spelling words: + node scripts/learn-spelling.js {{words}} \ No newline at end of file diff --git a/scripts/learn-spelling.js b/scripts/learn-spelling.js new file mode 100755 index 0000000..a6cf1cf --- /dev/null +++ b/scripts/learn-spelling.js @@ -0,0 +1,53 @@ +#!/usr/bin/env node + +const fs = require("fs"); +const path = require("path"); + +// Check if words argument is provided +if (process.argv.length < 3) { + console.log("Usage: node learn-spelling.js "); + process.exit(1); +} + +// Parse input words +const newWords = process.argv[2] + .split(",") + .map(w => w.trim()) + .filter(w => w.length > 0); + +if (newWords.length === 0) { + console.log("No words provided"); + process.exit(1); +} + +// Read cspell.json +const configPath = path.join(process.cwd(), "cspell.json"); +const config = JSON.parse(fs.readFileSync(configPath, "utf8")); + +// Get existing words +const existingWords = new Set(config.words || []); + +// Add new words (skip duplicates) +let added = 0; +let skipped = 0; +newWords.forEach(word => { + if (existingWords.has(word)) { + console.log(`Skipping duplicate: ${word}`); + skipped++; + } else { + existingWords.add(word); + added++; + console.log(`Adding: ${word}`); + } +}); + +// Sort alphabetically (case-insensitive) +config.words = Array.from(existingWords).sort((a, b) => + a.toLowerCase().localeCompare(b.toLowerCase()) +); + +// Write back with pretty formatting +fs.writeFileSync(configPath, JSON.stringify(config, null, 2) + "\n", "utf8"); + +console.log(`\nDone! Added ${added} word(s), skipped ${skipped} duplicate(s).`); +console.log(`Total words in dictionary: ${config.words.length}`); diff --git a/scripts/open-browser.sh b/scripts/open-browser.sh new file mode 100755 index 0000000..f5149bc --- /dev/null +++ b/scripts/open-browser.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Check if port argument is provided +if [ $# -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +PORT="$1" + +# Check if server is running on the specified port +if lsof -i :"$PORT" | grep -q LISTEN; then + echo "Opening http://localhost:$PORT in browser..." + open "http://localhost:$PORT" +else + echo "Error: No server found running on port $PORT" + echo "Run 'just preview $PORT' to start the server first." + exit 1 +fi diff --git a/scripts/preview-server.sh b/scripts/preview-server.sh new file mode 100755 index 0000000..a5b5e97 --- /dev/null +++ b/scripts/preview-server.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Check if port argument is provided +if [ $# -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +PORT="$1" + +# Check if Astro is already running on the specified port +if lsof -i :"$PORT" | grep -q LISTEN; then + echo "Error: Port $PORT is already in use. Another preview may be running." + echo "Run 'just shutdown $PORT' to stop it first." + exit 1 +fi + +# Start Astro dev server in background +echo "Starting Astro dev server on port $PORT..." +npm run dev -- --port "$PORT" > /tmp/astro-"$PORT".log 2>&1 & + +# Wait a moment for server to start +sleep 3 + +# Check if server started successfully +if lsof -i :"$PORT" | grep -q LISTEN; then + echo "Astro server started successfully at http://localhost:$PORT" +else + echo "Error: Failed to start Astro server. Check /tmp/astro-$PORT.log for details." + exit 1 +fi diff --git a/scripts/shutdown-server.sh b/scripts/shutdown-server.sh new file mode 100755 index 0000000..c7e0fce --- /dev/null +++ b/scripts/shutdown-server.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Check if port argument is provided +if [ $# -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +PORT="$1" + +# Find Node process on the specified port +PID=$(lsof -ti :"$PORT" 2>/dev/null || true) + +if [ -z "$PID" ]; then + echo "No server found running on port $PORT" +else + echo "Stopping Astro server on port $PORT (PID: $PID)..." + kill "$PID" + echo "Server stopped." +fi diff --git a/src/content/blog/generic-testing/index.md b/src/content/blog/generic-testing/index.md index 66e58a5..d0383a7 100644 --- a/src/content/blog/generic-testing/index.md +++ b/src/content/blog/generic-testing/index.md @@ -2,19 +2,55 @@ title: "*Generic* Testing For Generic Swift Code" cardTitle: "Testing Generic Swift Code, *Generically*" description: "A practical approach to writing generic tests for generic Swift code." -date: "2025-10-18" -draft: true +date: "2025-11-01" --- ## Introduction -This article explores the concept of "generic testing"—writing test suites that are *themselves* generic, and can thus be evaluated against multiple concrete types. This is particularly important when testing generic code whose behavior subtly depends on the specific types being used, even when those dependencies aren't fully captured by the generic constraints. +Generic code is one of Swift's great strengths—write once, use with many types. But what happens when you need to *test* that generic code? Do you write separate tests for each concrete type? Copy and paste? Hope that testing with `Double` is sufficient for `Float16`? -### What Is Generic Testing? +This article presents a practical solution to this problem: generic testing, where the tests themselves are generic and can be systematically executed against multiple concrete types. While this might sound complex, XCTest provides a surprisingly elegant mechanism that makes it straightforward. + +We'll explore: + +- Why generic testing matters +- How to implement generic tests using XCTest's class inheritance +- Techniques for managing test values and validation logic at scale +- Why Swift Testing doesn't currently support this pattern + +If you're writing numerical libraries, generic algorithms, or any code where behavior might subtly vary between type parameters, this technique can save you from subtle bugs that only manifest with specific types. + +## What Is Generic Testing? Generic testing is about writing tests that mirror the genericity of the code being tested. Instead of manually writing separate test functions for each concrete type you want to test, you write the test logic *once* as generic code, then arrange for that logic to be executed against each concrete type of interest. -Consider a generic type like `LinearSpan`: +In other words, in an ideal world we'd be able to write something like this: + +- write a generic test function *once* +- *declare* which types we want to test against +- *automagically* have the test function get executed against each of those concrete types + + +## Why Does It Matter? + +Given how clean Swift's generic semantics are, it's reasonable to ask if generic testing is even necessary. +After all, you might think, generic code is strictly written against the API available via the type bounds, and thus *in general* it "should work" for any conforming type(s). + +The short answer is that this is directionally correct, but there's some nuance: + +- sometimes protocols capture unintuitively-weak semantics[^1] +- Swift generics can't always differentiate between value and reference types (e.g. code might be correct for `struct` but not `class` types)[^2] +- sometimes types don't *fully* satisfy their API contract (often in subtle ways)[^3] + +[^1]: The relationship between e.g. `Numeric`, `Comparable`, and the floating-point numbers is an example: reasoning "mathematically" you might expect `a + b > b` when `b > 0`, for example, but...that's not actually a semantic guarantee you can *fully* rely upon. This is far more salient with low-precision types like `Float16`, but it's applicable to the other floating-point types, too. +[^2]: The easy example here is something like, say, conforming a `class` to `SetAlgebra` and then using it with generic `SetAlgebra` code that assumes value semantics (e.g. `var original = foo; foo.formIntersection(with: bar)`). +[^3]: A typical example here is something like `Collection`, which has a large, "multi-type" API contract—the API contract has invariants spanning the collection, its index, its slice type, and so on. In such scenarios, it's easy to get a type to a state where it compiles and works for "easy cases", but has some subtle issues that only manifest under more-intensive usage. + +To help make this more concrete, I'll walk through a concrete scenario. + +### Concrete Example: `LinearSpan` + +For our example, we'll be using the following generic type, which is intended to represent a closed unit interval: ```swift struct LinearSpan where Representation: BinaryFloatingPoint { @@ -35,442 +71,523 @@ struct LinearSpan where Representation: BinaryFloatingPoint { } ``` -This type relies on generic floating-point arithmetic, which can be surprisingly subtle. With 16-bit floats (`Float16`), numerical surprises arise even at typical UI scales—for instance: +This is a useful type to have for doing layout calculations[^4], and in reality would have a much broader API than what's shown here—we're showing just enough to illustrate the need for *generic* testing. -- ✅ `Float16(2048) - Float16(1) == Float16(2047)`: this works as-expected -- ❌ `Float16(2048) + Float16(1) == Float16(2049)`: this fails b/c the next `Float16` after 2048 is `2050` +[^4]: Such a type is *very useful* when writing, say, custom layouts (e.g. SwiftUI `Layout`, classical custom `UICollectionViewLayout`s, and so on). -...which can, in turn, lead to otherwise-correct looking code producing unexpected results. +#### Our *Non-Generic* Tests -Consider the following test, which verifies a seemingly-trivial property of `LinearSpan`: "the center of a span with a non-zero length is not an endpoint": +Let's say we've implemented this type, it compiles, and it *seems to work*—so far, so good. +Being diligent, cautious coders, we don't stop there—we write tests, too. -```swift -import Testing +In fact, we write extensive, property-test-style unit tests for this type, and successfully achieve *full test* coverage. +I'm not going to write that entire suite just for this article, but you can imagine the `LinearSpanTests.swift` contains a very large number of tests along the following lines; -@Test -func `LinearSpan.center is not the endpoint`() { - let span = LinearSpan(lowerBound: 2048.0, length: 1) // inferred to be `LineraSpan` - // all of these pass for `Double`, but not for `Float16`: - #expect(span.lowerBound < span.upperBound) +```swift +@Test(arguments: LinearSpan.exampleSpans) +func `LinearSpan.center is not an endpoint`(span: LinearSpan) { + guard span.length > 0 else { return } // skip the degenerate case + // for non-empty spans, verify the expected start-center-end ordering: #expect(span.lowerBound < span.center) #expect(span.center < span.upperBound) } + +@Test(arguments: LinearSpan.exampleSpans, LinearSpan.exampleTranslations) +func `Invariants-of: LinearSpan.translated(by:)`( + span: LinearSpan, + translation: Double +) { + let translated = span.translated(by: translation) + // length should *always* be preserved + #expect(translated.length == span.length) + // bounds should be shifted by offset + #expect(translated.lowerBound == span.lowerBound + translation) + guard translation != 0 else { return } // skip the degenerate case + // redundant, weaker check that we moved *at all* - "you can never be too careful" + #expect(translated.lowerBound != span.lowerBound) + #expect(translated.upperBound != span.upperBound) + #expect(translated.center != span.center) +} ``` -Since this is an article about generic testing, we'll focus on testing: discovering this won't dwell on the numerical aspects focus on the testing aspects. Since this isn't an article about floating-point numerics, I'm not going to dwell on the numerical aspects much further. +At this point, we think we're in good shape: our type is complete, its tests pass, and its test coverage is *very* thorough. Perhaps we *should* be more concerned, but we're not, so we move on to what we're actually trying to do: write custom layouts. -Instead, I'm go +#### Where Things Go Wrong -Without generic testing, we'd be stuck writing repetitive boilerplate like: +We write the layouts, they also work well, and *initially* everything seems OK. +Sometime later, however, we start encountering surprising runtime bugs in our layouts: items are mispositioned and mis-sized, item movement is sometimes "jumpy", and so on. -```swift -@Test(arguments: [0.0, 0.01, 1.0]) -func nonZeroLengthImpliesDistinctBounds_Double(length: Double) { - let span = LinearSpan(lowerBound: 0, length: length) - #expect(span.lowerBound < span.upperBound || length == 0.0) -} +I'll spare you the play-by-play and jump to the punchline: for performance reasons, at some point we migrated some of our layouts from `LinearSpan` to `LinearSpan` +At the time, this seemed like an easy win: 1/4th the memory usage, microbenchmarks showed improved performance, and it *seemed* to work. -@Test(arguments: [0.0, 0.01, 1.0] as [Float]) -func nonZeroLengthImpliesDistinctBounds_Float(length: Float) { - let span = LinearSpan(lowerBound: 0, length: length) - #expect(span.lowerBound < span.upperBound || length == 0.0) -} +Unfortunately, however, `Float16` has low-enough precision that the "quirkiness" of floating-point math becomes salient at far more quotidian scales than `Double`; for a particularly *screen-sized* example: -@Test(arguments: [0.0, 0.01, 1.0] as [Float16]) -func nonZeroLengthImpliesDistinctBounds_Float16(length: Float16) { - let span = LinearSpan(lowerBound: 0, length: length) - #expect(span.lowerBound < span.upperBound || length == 0.0) -} -``` +- ✅ `Float16(2048) - Float16(1) == Float16(2047)`: this works as-expected +- ❌ `Float16(2048) + Float16(1) == Float16(2049)`: this fails b/c the next `Float16` after 2048 is `2050` -This approach doesn't scale: it's tedious to write, difficult to maintain, and the duplication makes it easy for tests to drift out of sync when requirements change. +#### Back To *Generic* Testing -### Desired Properties for Generic Testing +Since this is an article about *testing*—and generic testing, specifically!—we're not going to dwell on the numerical surprise above. -An ideal generic testing strategy should have several key properties: +Instead, we're going to focus on the testing situation, and its specific relevancy to *generic testing*: -**Write tests generically:** Following the famous "M + N instead of M × N" principle from generic programming, we want to write M test functions that can work with N types, not M × N separate test implementations. +- this was an issue we *could have* caught in unit tests +- *in fact*, we had unit tests that "would have caught it"... +- ...but only if we ran them for `Float16`, not `Double` -**Minimal invocation boilerplate:** While classic generic programming is about *writing* M algorithms for N types, testing requires actually *invoking* each of the M × N test-type pairs. Our case deviates from the classic formulation because we need some mechanism to trigger execution of each combination, but we want this overhead to be as lightweight as possible. +If there were a simple, obvious, and widely-known way to make that happen, this would be a short article. -**Standard framework integration:** The solution should work within vanilla XCTest or Swift Testing, not require a complex custom framework layered on top. Each test-type pair should be individually runnable and debuggable from both Xcode's GUI and the command line. +Thankfully, however, it turns out there *is* a way to write these tests in an ergonomic-and-maintainable fashion. -### Overview of Topics +## Generic Testing In `XCTest` -We'll explore three interconnected topics in this article: +As alluded to in the introduction, the `XCTest` framework contains a surprisingly-ergonomic mechanism for generic testing. +To date I've never seen this feature explicitly *suggested* in their documentation, but you'll see references to it being intended-to-work if you read through enough release notes and forum threads. -1. **The XCTest strategy:** Using generic test-case base classes that get subclassed for each concrete type—a solution that satisfies all our requirements -2. **Improvement techniques:** Ways to enhance the XCTest approach through better value provisioning and validation helper functions -3. **Swift Testing limitations:** Why there's currently no equivalently satisfactory approach for Swift Testing, despite it being more modern +In this section I'll walk through the following: -## The XCTest Strategy +- the basic concept +- some implementation details +- Xcode quirks and caveats +- why this approach is satisfactory -XCTest provides an elegant solution to generic testing through class inheritance. The approach involves creating a generic base test class containing all test methods, then creating lightweight concrete subclasses for each type you want to test. +### Basic Concept -### Basic Implementation +Here's the basic idea: -Let's start with a simple example testing `LinearSpan`. First, we create a generic base class: +1. create a generic test case class +2. write the *generic* test cases you want +3. define *concrete*, *trivial* subclasses for each type you want to test -```swift -class LinearSpanTests: XCTestCase where Representation: BinaryFloatingPoint { +Going back to our `LinearSpan` example, we could write the following: - // Abstract method for subclasses to provide test values - func representativeSpans() -> [LinearSpan] { - // Base implementation returns empty array - // Subclasses must override to provide actual test data - return [] +```swift +// step 1: define the generic test case class +class LinearSpanTests: XCTestCase { + // step 2: write the generic test cases you want + func testCenterIsNotEndpoint() { + for span in LinearSpan.exampleSpans where span.length > 0 { + XCTAssertLessThan(span.lowerBound, span.center) + XCTAssertLessThan(span.center, span.upperBound) + } } - - // Generic test checking point containment - func testPointContainment() { - let spans = representativeSpans() - XCTAssertFalse(spans.isEmpty, "Subclass must provide test spans") - - for span in spans { - // Points that should be inside - XCTAssertTrue(span.contains(span.lowerBound), "Lower bound should be contained") - XCTAssertTrue(span.contains(span.upperBound), "Upper bound should be contained") - - // Calculate midpoint (being careful about overflow) - let center = span.lowerBound + (span.length / 2) - if span.lowerBound < span.upperBound { - XCTAssertTrue(span.contains(center), "Center should be contained") + + // (still) step 2: write the generic test cases you want + func testTranslatedByInvariants() { + for span in LinearSpan.exampleSpans { + for translation in LinearSpan.exampleTranslations { + let translated = span.translated(by: translation) + XCTAssertEqual(translated.length, span.length) + XCTAssertEqual(translated.lowerBound, span.lowerBound + translation) + if translation != 0 { + XCTAssertNotEqual(translated.lowerBound, span.lowerBound) + XCTAssertNotEqual(translated.upperBound, span.upperBound) + XCTAssertNotEqual(translated.center, span.center) + } } - - // Points that should be outside - let before = span.lowerBound - abs(span.length) - let after = span.upperBound + abs(span.length) - XCTAssertFalse(span.contains(before), "Point before span shouldn't be contained") - XCTAssertFalse(span.contains(after), "Point after span shouldn't be contained") } } } + +// step 3: define concrete classes for each type you want to test +final class LinearSpanDoubleTests: LinearSpanTests {} +final class LinearSpanFloatTests: LinearSpanTests {} +final class LinearSpanFloat16Tests: LinearSpanTests {} ``` -Then we create concrete subclasses for each type we want to test: +I've put all of these steps into a single snippet for this article, but in real life would suggest splitting things up. -```swift -final class DoubleLinearSpanTests: LinearSpanTests { - override func representativeSpans() -> [LinearSpan] { - return [ - LinearSpan(lowerBound: 0, length: 1), - LinearSpan(lowerBound: -100, length: 200), - LinearSpan(lowerBound: 1e-10, length: 1e-8) - ] - } -} +When the concrete subclasses are truly-trivial, I'd suggest a two-file pattern: -final class FloatLinearSpanTests: LinearSpanTests { - override func representativeSpans() -> [LinearSpan] { - return [ - LinearSpan(lowerBound: 0, length: 1), - LinearSpan(lowerBound: -100, length: 200), - LinearSpan(lowerBound: 1e-6, length: 1e-4) // Adjusted for Float precision - ] - } -} +- `LinearSpanTests.swift`: the generic test case class (and thus the test logic itself) +- `LinearSpanTests+ConcreteTypes.swift`: a slim file containing *just* the concrete subclasses -final class Float16LinearSpanTests: LinearSpanTests { - override func representativeSpans() -> [LinearSpan] { - return [ - LinearSpan(lowerBound: 0, length: 1), - LinearSpan(lowerBound: -100, length: 200), - LinearSpan(lowerBound: 0.001, length: 0.1) // Much coarser due to Float16 limits - ] - } -} -``` +On the other hand, if the concrete subclasses are *not* trivial, I'd suggest using one file per class: -### Xcode Quirks and Caveats +- `LinearSpanTests.swift`: the generic test case class (and thus the test logic itself) +- `LinearSpanDoubleTests.swift`: the concrete subclass for `Double` +- `LinearSpanFloatTests.swift`: the concrete subclass for `Float` +- `LinearSpanFloat16Tests.swift`: the concrete subclass for `Float16` -While this technique works well, Xcode sometimes exhibits quirky behavior with generic test classes: +If you're wondering about when the test-case subclass is non-trivial, don't fret—we'll be discussing that in the immediately-subsequent section. -- The test navigator may occasionally show the generic base class as runnable (it shouldn't be) -- Test discovery might briefly fail to recognize new concrete subclasses until you build -- Error messages in failed assertions sometimes show the base class name rather than the concrete subclass +### Implementation Detail: Obtaining Values, Generically -None of these issues affect the actual execution of tests, but they can be momentarily confusing during development. +As you've probably already noticed, both our original `Swift Testing` tests and their `XCTest` test ports strongly-resemble property-based tests: -### Why This Approach Is Satisfactory +- we have some generic test logic +- we have some generic way of getting "example values": + - `LinearSpan.exampleSpans` for "spans to test against" + - `LinearSpan.exampleTranslations` for "translations to test against" +- our sketches assume these methods exist, but never got into the details -The XCTest strategy meets all our desired criteria: +Once you start writing generic tests, you'll quickly find that "getting values, generically" will be a recurring, central consideration. +There's a lot of ways you can obtain these values, but from my experience there's two strategies to consider: -**Truly generic test logic:** The test methods in the base class are written once and contain no type-specific code. All type-specific behavior is isolated to the concrete subclasses. +- emulating property-style composable generators +- defining hooks in the generic base class, and manually overriding them in each concrete subclass -**Minimal dispatch overhead:** Creating a new test target requires only: -- Declaring a subclass (one line of code) -- Overriding methods to provide test values (typically just a few lines) +I'll sketch each approach enough to convey the idea without getting bogged down in the details. -Each concrete subclass automatically inherits all test methods from the base class, and XCTest's runtime handles test discovery and execution. The result is that adding a new type to test requires minimal boilerplate while maintaining full integration with Xcode's test runner. +#### Emulating Property-Style Composable Generators -## Improving the XCTest Approach +In this approach, you incrementally build up your example values: -While the basic XCTest strategy works well, we can enhance it in two key ways: using protocols for systematic value provisioning and extracting validation logic into helper functions. +```swift +extension LinearSpan { + static var exampleLowerBounds: [Representation] { [0, 1, -1, 42] } + static var exampleLengths: [Representation] { [0, 1, 2, 10] } + static var exampleSpans: [LinearSpan] { + mapCartesianProduct( + exampleLowerBounds, + exampleLengths, + LinearSpan.init(lowerBound:length:) + ) + } -### Generic Protocols for Test Values + static var exampleTranslations: [Representation] { [0, 1, -1, 42] } +} +``` -Rather than having each subclass independently implement `representativeSpans()`, we can use protocols to systematize how test values are provided: +Note that for real tests you would probably want more-interesting sets of example values, but this shows you the basics. +For cases where you need more fine-tuning for specific types, you can introduce a protocol to accommodate that: ```swift protocol LinearSpanTestValueProviding: BinaryFloatingPoint { - static var representativeSpanParameters: [(lowerBound: Self, length: Self)] { get } - static var boundaryCases: [Self] { get } - static var typicalValues: [Self] { get } + // give each of these a default implementation with the values from above + static var exampleLowerBounds: [Self] { get } + static var exampleLengths: [Self] { get } + static var exampleTranslations: [Self] { get } } -// Provide conformances for our test types -extension Double: LinearSpanTestValueProviding { - static let representativeSpanParameters = [ - (lowerBound: 0.0, length: 1.0), - (lowerBound: -100.0, length: 200.0), - (lowerBound: 1e-10, length: 1e-8), - (lowerBound: .leastNormalMagnitude, length: .ulpOfOne) - ] - - static let boundaryCases = [0.0, .infinity, -.infinity, .nan] - static let typicalValues = [0.0, 1.0, -1.0, 42.0, 1e10, 1e-10] -} +extension LinearSpan where Representation: LinearSpanTestValueProviding { + // source the values from the protocol + static var exampleLowerBounds: [Representation] { Representation.exampleLowerBounds } + static var exampleLengths: [Representation] { Representation.exampleLengths } + static var exampleTranslations: [Representation] { Representation.exampleTranslations } -extension Float16: LinearSpanTestValueProviding { - static let representativeSpanParameters = [ - (lowerBound: Float16(0), length: Float16(1)), - (lowerBound: Float16(-100), length: Float16(200)), - (lowerBound: Float16(0.001), length: Float16(0.1)) // Coarser values - ] - - static let boundaryCases = [Float16(0), .infinity, -.infinity, .nan] - static let typicalValues = [Float16(0), Float16(1), Float16(-1), Float16(42)] + // include derived things as-before, e.g. example spans, etc. } ``` -Now our base test class can be even more generic: +The point of this fancier approach would be if, say, you want to use different examples of "big" and "small" values for `Float16` than you do for `Double`—introducing the protocol gives you a mechanism to fine-tune the values the type-under-test drags in, *without* going all the way to mandatory hooks in the generic base class. + +#### Mandatory Hooks + +In the alternative approach, the "get me some values" logic gets moved into the generic base class, and each concrete subclass is responsible for overriding them: ```swift -class LinearSpanTests: XCTestCase - where Representation: BinaryFloatingPoint & LinearSpanTestValueProviding { - +class LinearSpanTests: XCTestCase { + func representativeSpans() -> [LinearSpan] { + // base implementation returns an empty array + // concrete subclasses must override to provide actual test data + XCTFail("Forgot to override representativeSpans in concrete subclass!") + return [] + } + func testPointContainment() { - for (lowerBound, length) in Representation.representativeSpanParameters { - let span = LinearSpan(lowerBound: lowerBound, length: length) + let spans = representativeSpans() + XCTAssertFalse(spans.isEmpty, "Subclass must provide test spans") + for span in spans { // ... test logic using span } } - - func testBoundaryBehavior() { - for value in Representation.boundaryCases { - // Test behavior with boundary values - let span = LinearSpan(lowerBound: value, length: 1) - // ... assertions about boundary behavior - } +} + +class LinearSpanDoubleTests: LinearSpanTests { + override func representativeSpans() -> [LinearSpan] { + [ + LinearSpan(lowerBound: 0, length: 1), + LinearSpan(lowerBound: -100, length: 200), + LinearSpan(lowerBound: 1e-10, length: 1e-8) + ] } } ``` -This approach starts to resemble property-based testing, where we're testing properties that should hold across a range of inputs, but with more control over the specific values used. +Note that for `LinearSpan` we could also have had hooks for lower bounds, lengths, and translations, but for the sake of brevity I'm only showing the "spans" hook. -### Validation Helper Functions +#### When To Use Each? -Extracting test logic into validation helpers provides two major benefits: increased semantic clarity in tests and reusability across similar test contexts. +My advice is: -Consider this validation helper for span ordering: +- for generic code involving numerical types, containers, or anything else with some kind of predictable/regular structure, favor emulating property tests +- for generic code involving wildly-divergent types (`String`, `Int`, `Data`, `URL`, all needing testing), favor the "mandatory hooks" approach -```swift -func verify( - span: LinearSpan, - isStrictlyBefore other: LinearSpan, - sourceLocation: StaticString = #filePath, - line: UInt = #line -) { - XCTAssertLessThan( - span.upperBound, other.lowerBound, - "Span \(span) should be strictly before \(other)", - file: sourceLocation, line: line - ) - - // Additional semantic checks - XCTAssertFalse( - span.overlaps(with: other), - "Strictly ordered spans should not overlap", - file: sourceLocation, line: line - ) -} +Put a bit differently: emulate property-test style generators when it's easy to generically create representative values for all types under test; fall back on mandatory hooks when there's no such generic mechanism available. -func verifyConsistentOrdering( - _ values: [T], - sourceLocation: StaticString = #filePath, - line: UInt = #line -) { - for i in 0.. j { - XCTAssertGreaterThanOrEqual(vi, vj, file: sourceLocation, line: line) - } - - // Verify Comparable laws - if vi < vj { - XCTAssertFalse(vj < vi, "Comparable antisymmetry violated", - file: sourceLocation, line: line) +### Implementation Detail: Validation Helpers + +Up until now, I've been writing our test logic "inline" in the test methods themselves, e.g.: + +```swift +func testTranslatedByInvariants() { + for span in LinearSpan.exampleSpans { + for translation in LinearSpan.exampleTranslations { + let translated = span.translated(by: translation) + XCTAssertEqual(translated.length, span.length) + XCTAssertEqual(translated.lowerBound, span.lowerBound + translation) + if translation != 0 { + XCTAssertNotEqual(translated.lowerBound, span.lowerBound) + XCTAssertNotEqual(translated.upperBound, span.upperBound) + XCTAssertNotEqual(translated.center, span.center) } } } } ``` -These helpers can be reused across different test contexts. For example, `verifyConsistentOrdering` is useful for testing any custom `Comparable` conformance, while `verify(span:isStrictlyBefore:)` encapsulates domain-specific invariants about span relationships. - -Here's a more complete example showing validation helpers in action: +When writing generic tests, there's a strong argument to be made for (a) extracting this logic into validation helpers and (b) taking the time to provide additional explanatory information into them. For example, after extracting the logic into a validation helper, the above example might look like this: ```swift -func validateTranslatedBy( +func testTranslatedByInvariants() { + for span in LinearSpan.exampleSpans { + for translation in LinearSpan.exampleTranslations { + validateSpanTranslation( + original: span, + translation: translation + ) + } + } +} + +func validateSpanTranslation( original: LinearSpan, - offset: R, - sourceLocation: StaticString = #filePath, + translation: R, + file: StaticString = #filePath, line: UInt = #line ) { - let translated = original.translated(by: offset) - - // Length should be preserved + let translated = original.translated(by: translation) + // this is a bit reusable + validateEqualLengths( + translated, + original, + "Translation should preserve length", + file: file, + line: line + ) + // this isn't super-reusable XCTAssertEqual( - translated.length, original.length, - "Translation should preserve span length", - file: sourceLocation, line: line + translated.lowerBound, + original.lowerBound + translation, + "Expected translated lower bound to be original lower bound plus translation; got \(translated.lowerBound) instead of \(original.lowerBound + translation)", + file: file, + line: line ) - - // Bounds should be shifted by offset - let expectedLower = original.lowerBound + offset - let expectedUpper = original.upperBound + offset - - // Use appropriate comparison for floating point - if offset.isFinite && original.lowerBound.isFinite { - XCTAssertEqual( - translated.lowerBound, expectedLower, - accuracy: R.ulpOfOne * max(abs(expectedLower), 1), - "Lower bound should be translated by offset", - file: sourceLocation, line: line - ) - } - - // Verify containment relationships are preserved - let testPoint = original.lowerBound + (original.length / 2) - if original.contains(testPoint) && offset.isFinite { - XCTAssertTrue( - translated.contains(testPoint + offset), - "Translated span should contain translated points", - file: sourceLocation, line: line + if translation != 0 { + // this is a bit reusable + validateDistinctEndpoints( + translated, + original, + "Translation should move endpoints", + file: file, + line: line ) } } + +func validateEqualLengths( + _ lhs: LinearSpan, + _ rhs: LinearSpan, + _ explanation: @autoclosure () -> String, + file: StaticString = #filePath, + line: UInt = #line +) { + XCTAssertEqual( + lhs.length, + rhs.length, + "Expected equal lengths for span \(lhs) and \(rhs), but got \(lhs.length) and \(rhs.length); explanation: \(explanation())", + file: file, + line: line + ) +} + +func validateDistinctEndpoints( + _ lhs: LinearSpan, + _ rhs: LinearSpan, + _ explanation: @autoclosure () -> String, + file: StaticString = #filePath, + line: UInt = #line +) { + XCTAssertNotEqual( + lhs.lowerBound, + rhs.lowerBound, + "Expected distinct lower bounds for span \(lhs) and \(rhs), but got \(lhs.lowerBound) and \(rhs.lowerBound); explanation: \(explanation())", + file: file, + line: line + ) + XCTAssertNotEqual( + lhs.upperBound, + rhs.upperBound, + "Expected distinct upper bounds for span \(lhs) and \(rhs), but got \(lhs.upperBound) and \(rhs.upperBound); explanation: \(explanation())", + file: file, + line: line + ) + XCTAssertNotEqual( + lhs.center, + rhs.center, + "Expected distinct centers for span \(lhs) and \(rhs), but got \(lhs.center) and \(rhs.center); explanation: \(explanation())", + file: file, + line: line + ) +} ``` -Validation helpers make tests more semantic and help identify exactly what property is being tested. They're particularly valuable when testing numerical code where the same mathematical properties need to be verified across multiple scenarios. +These examples are somewhat little contrived, but illustrate the general concept. + +Although there's an appeal to having test logic captured into reusable helpers like this, for this use case I consider that a secondary benefit. The *primary* benefit, here, is as follows: + +- we're testing against programmatically-generated example values (and probably *a lot* of them) +- if we have any failures, we're probably going to have *lots of them* +- compared to hand-written tests, we have much less context available about the values and specifics[^7] -## Swift Testing Limitations +As such, for this type of testing I think it's worth the extra effort to write high-quality failure messages, etc., but to keep that approach feasible it'll behoove you to extract the test logic into helpers. -Despite Swift Testing being the more modern framework with better Swift integration in many ways, it currently lacks any satisfactory approach for generic testing comparable to what we've achieved with XCTest. +[^7]: Even setting breakpoints near failing assertions can be tricky in this paradigm, due to the "risk" of having a comparatively-small amount of failures spread across a *lot* of successful test invocations. -### No Generic Test-Case Classes +### Implementation Detail: Controlling Failure Quantity -Swift Testing doesn't have test-case classes at all—tests are just functions, potentially organized within structs annotated with `@Suite`. This fundamental architectural difference means the XCTest inheritance strategy has no direct equivalent. +As a related point, an unfortunate reality of all the Xcodes I've known and "loved" is that the UI is not built to handle having hundreds-or-thousands of failures at the same line of source code. This is one of my pet peeves and may someday make a guest appearance in a longer article about property-testing in Swift, but that's a story for another day. For this article, let's just say that the scale looks like: -### Test Functions Cannot Be Generic +- 1 failure at a single line: everything's ok +- 10 failures at a single line: questionable +- 100+ failures at a single line: expect hangs, freezes, and other Xcode quirks -You might hope to write something like: +To mitigate this, I'd strongly encourage putting something like the below somewhere in your codebase: ```swift -@Test -func testSpanTranslation() { - let span = LinearSpan(lowerBound: 0, length: 1) - // ... test logic +extension XCTestCase { + + func haltingAfterFirstFailure(_ body: () throws -> R) rethrows -> R { + let previousPreference = continueAfterFailure + defer { continueAfterFailure = previousPreference } + continueAfterFailure = false + return try body() + } + } ``` -But this isn't supported—test functions in Swift Testing cannot have generic parameters. The framework needs to know all test functions at compile time with concrete signatures. - -### Failed Approach: Metatypes and Parameter Packs - -One might attempt to use Swift Testing's parameterized test feature with metatypes: +With that in place, you'd then wrap your test logic in `haltingAfterFirstFailure`, e.g.: ```swift -@Test( - arguments: [ - Float16.self as any BinaryFloatingPoint.Type, - Float.self as any BinaryFloatingPoint.Type, - Double.self as any BinaryFloatingPoint.Type - ] -) -func testWithMetatype(type: any BinaryFloatingPoint.Type) { - // Attempt to use 'type' to perform generic testing... +func testTranslatedByInvariants() { + haltingAfterFirstFailure { + for span in LinearSpan.exampleSpans { + for translation in LinearSpan.exampleTranslations { + validateSpanTranslation( + original: span, + translation: translation + ) + } + } + } } ``` -Unfortunately, this approach quickly hits fundamental limitations. What you can do with protocol-metatype values is extremely limited—you can't use them to instantiate generic types or call generic functions in any useful way. Parameter packs don't help here either, as they solve a different problem (variadic generic parameters) and still require compile-time resolution. +Alternatively, you could simply override `continueAfterFailure` in your generic test case class to default-to `false` (instead of its original default of `true`). If all of your tests are heavyweight, I'd recommend that instead of `haltingAfterFirstFailure`, in fact—go with the method that's appropriate for your use case. + +It'd be better yet if Xcode were more robust on this front, but, again that's a topic for another day—for now, this seems to be making a reasonable trade-off. + +### Xcode Quirks and Caveats + +Although this *mechanism* is intentionally-present and fully-supported, it's consistently confused Xcode and even some of the CLI tooling. +This doesn't impact its *functionality*, but depending on the tooling and version, don't be surprised if you see quirky behavior: + +- The test navigator may occasionally show the generic base class as runnable (it shouldn't be) +- test discovery might briefly fail to recognize new concrete subclasses—or even any subclasses—until you build +- error messages in failed assertions sometimes show the base class name rather than the concrete subclass + +None of these issues affect the actual execution of tests, but they can be certainly be confusing. + +### Why Is This Satisfactory? + +Other than `XCTest` being less ergonomic than Swift Testing, the mechanism I just described is very ergonomic: + +- you write your generic test code *once* (in the base, generic class) +- the effort to *run* your test against a specific type parameter is close to the minimum conceivable +- there's *some* effort needed to provide values for testing, but it can be kept light-weight via shrewd use of generics -### Macro-Based Solutions: A Future Possibility? +This brings us close to the M + N scaling we'd want, and keeps the overall test suite highly maintainable. -The most promising future direction appears to be custom macros that could generate the necessary boilerplate. Imagine something like: +Perhaps in the future Swift Testing will gain a similar mechanism, but, until then, if you need generic testing capabilities, you can do it via `XCTest` + +## Swift Testing: Why It Doesn't Work (Yet) + +You might reasonably wonder: can we do this in Swift Testing? After all, it's more modern, more Swift-native, and generally more ergonomic than XCTest. + +Unfortunately, the answer is no—and it's not for lack of trying. The fundamental issue is architectural: Swift Testing discovers all tests at compile time through its `@Test` attribute, which means: + +- test functions cannot be generic (they need concrete types at compile time) +- you can define test suites as structs, but equivalent mechanism like class inheritance to exploit +- parameterized tests with metatypes don't work because metatype values are too limited at runtime + +To unpack that last point, here's something that you might hope would work, but doesn't work out: ```swift -@Suite("LinearSpan") -@GenerateTestSpecializations(types: Float16.self, Float.self, Double.self) -struct LinearSpanTests { - - @GenericTestTemplate("Translation preserves length ({{typename}})") - private func _testTranslation(type: T.Type) { - let span = LinearSpan(lowerBound: 0, length: 1) - let translated = span.translated(by: 10) - #expect(translated.length == span.length) - } +@Test(arguments: [Double.self, Float.self, Float16.self]) +func testSpan(type: any BinaryFloatingPoint.Type) { + // Can't instantiate LinearSpan with a metatype + // Can't call generic functions with the metatype + // Can't really do anything useful here } ``` -This would expand to create individual test functions for each type. However, this remains speculative—at time of writing, the necessary macro capabilities are either unavailable or still behind feature flags. Additionally, designing such a system well would require careful consideration of parameterized tests, multiple generic parameters, and how to specify type-dependent test metadata. +Even writing a generic validation function won't help us, despite looking like it might: -### Current Recommendation: Stick with XCTest +```swift +func validateSpanBehavior(type: T.Type) { + // here we *can* write generic code against `T`, and thus can have our test logic... +} -Given these limitations, if you need to write generic tests for generic Swift code at any non-trivial scale, XCTest remains the better choice. While you could write validation helpers and copy-paste concrete test functions in Swift Testing, this approach only works for very small test suites. +// but this still won't work +@Test(arguments: [Double.self, Float.self, Float16.self]) +func testSpan(type: any BinaryFloatingPoint.Type) { + validateSpanBehavior(type: type) + // ^ this doesn't work because *in this context* `type` is just a metatype, + // and doesn't give us a way to invoke the generic function we're trying to call. +} +``` -For simple cases with just a few tests and types, the copy-paste approach with validation helpers is acceptable: +A bit curiously, this does work with parameter packs: ```swift -// Validation helper -func validateStackBehavior(type: T.Type, values: [T]) { - var stack = Stack() - for value in values { - stack.push(value) - } - for value in values.reversed() { - #expect(stack.pop() == value) - } +func validateSpanBehavior(type: T.Type) { + // here we *can* write generic code against `T`, and thus can have our test logic... } -// Concrete tests (copy-pasted) -@Test func testStack_Int() { - validateStackBehavior(type: Int.self, values: [1, 2, 3]) +func validateSpans(types: repeat (each T.Type)) { + for type in types { + // this actually works! + validateSpanBehavior(type: type) + } } +``` + +But, unfortunately, this doesn't work in a generic way for arbitrary closures, e.g. this won't work: -@Test func testStack_String() { - validateStackBehavior(type: String.self, values: ["a", "b", "c"]) +```swift +func forEachBinaryFloatingPointType( + _ types: repeat (each T).type, + body: (T.Type) -> Void +) { + for type in types { + body(type) + } } ``` -But this doesn't scale—once you have dozens of tests across multiple types, the maintenance burden becomes untenable. +The specific limitation is that `body` itself needs to be a generic function, but Swift doesn't currently support such "generic closures" (e.g. generic functions can take closures, but the closures aren't themselves generic). -## Conclusion +Since this isn't an article about Swift's type arcana I'll cut it off here—suffice to say there's a lot of promising leads that uniformly fail to pan out. -Generic testing—writing test suites that are themselves generic—is a powerful technique for validating generic Swift code, particularly when that code's correctness depends on subtle properties of the concrete types being used. This is especially important for numerical and algorithmic code, where behaviors can vary significantly between types like `Float16`, `Float`, and `Double`. +## Conclusion -The XCTest-based approach using generic base classes provides an excellent solution that meets all our requirements: truly generic test logic, minimal invocation overhead, and full integration with standard tooling. Combined with validation helpers and systematic value provisioning through protocols, it creates a robust testing strategy that scales well. +Generic testing is a powerful technique that becomes essential when your generic code's correctness depends on subtle properties of its type parameters. This is particularly true for: -While it's somewhat ironic that the older XCTest framework handles this advanced use case better than the more modern Swift Testing, the current reality is clear: if you need generic testing capabilities, XCTest is the way to go. Swift Testing may eventually grow to support these use cases—perhaps through macros or other language features—but for now, the situation is what it is. +- **Numerical code** where precision limits and floating-point quirks vary dramatically between types +- **Custom collections** wherein correctness hinges on complex api contracts involving multiple types +- **Protocol-heavy code** where conformances might have subtle semantic shortcomings -The good news is that the XCTest solution works well. It's battle-tested, integrates perfectly with Xcode, and provides all the flexibility needed to thoroughly test generic code. For those working on libraries with complex generic algorithms or numerical code with multiple floating-point types, mastering this pattern is well worth the investment. +When you need generic testing, the XCTest approach illustrated in this article is available and surprisingly-ergonomic. +Perhaps someday Swift Testing will provide a similar mechanism, but until then this seems to be the best option. diff --git a/src/content/projects/trop/index.md b/src/content/projects/trop/index.md index 47eb990..d4cf917 100644 --- a/src/content/projects/trop/index.md +++ b/src/content/projects/trop/index.md @@ -28,7 +28,7 @@ my-server --port $(trop reserve) The *motivation* for this tool was to streamline the "simultaneous agents in multiple worktrees"-style workflows, e.g. wherein: - you have multiple claude code instances operating concurrently -- each instance is working on a distinct task +- each instance is working on a distinct task - each instance is working within a distinct worktree `trop` exists because using that workflow with *small-and-simple* projects can easily lead to port collisions. @@ -84,7 +84,7 @@ In other words, it makes it a lot easier to keep track of things: #### Token Efficiency -Since `trop` can be used as a drop-in substitute for hardcoded port numbers, it makes *launching servers* a bit more token-efficient: +Since `trop` can be used as a drop-in substitute for hardcoded port numbers, it makes *launching servers* a bit more token-efficient: - the agent knows the port it should use - it directly invokes the server at that port @@ -94,7 +94,7 @@ Nothing magic, but reduces the need to burn tokens either (a) identifying a port #### Cross-File Consistency -I think this is the strongest benefit for `trop`, and it's something I didn't anticipate when I started the project: +I think this is the strongest benefit for `trop`, and it's something I didn't anticipate when I started the project: - `trop reserve` is idempotent(ish) vis-a-vis the invocation path - coding agents are (usually) invoked from the worktree root @@ -112,7 +112,7 @@ Nothing *earth-shattering*, but still a useful capability to help with token-eff ### Overall Justification -The tl;dr, here is that the tool is solving an easily-solvable, "already-solved" problem, but in a way that's particularly-helpful when using worktrees and agentic coding assistants. +The tl;dr, here is that the tool is solving an easily-solvable, "already-solved" problem, but in a way that's particularly-helpful when using worktrees and agentic coding assistants. ## Implementation Remarks @@ -152,7 +152,7 @@ Despite having to undertake a few significant interventions, my overall experien Overall, I'd say that we're closer than I thought to being able to write a detailed spec and have coding agents diligently implement it (and implement it *correctly*, at that). We may even be at that point, in fact, if your skill level is high enough and your strategy is sufficiently sophisticated. -As a final remark—just to put some concrete measurements on the table—I'd ballpark this project as just about 1 week of end-to-end, full-time work. +As a final remark—just to put some concrete measurements on the table—I'd ballpark this project as just about 1 week of end-to-end, full-time work. It's hard to be too precise because I was doing this in-between other things, but to the best of my recollection: - 2 full days spent writing-and-revising the specification diff --git a/src/layouts/PageLayout.astro b/src/layouts/PageLayout.astro index b9a67a7..bb8017c 100644 --- a/src/layouts/PageLayout.astro +++ b/src/layouts/PageLayout.astro @@ -4,6 +4,7 @@ import Header from "@components/Header.astro"; import Footer from "@components/Footer.astro"; import { SITE } from "@consts"; import type { OpenGraphData } from "@lib/opengraph"; +import { stripMarkdown } from "@lib/markdown"; type Props = { title: string; @@ -12,12 +13,13 @@ type Props = { }; const { title, description, ogData } = Astro.props; +const plainTitle = stripMarkdown(title); --- - +
diff --git a/src/lib/markdown.ts b/src/lib/markdown.ts index 4daa0cb..45512df 100644 --- a/src/lib/markdown.ts +++ b/src/lib/markdown.ts @@ -1,6 +1,6 @@ export function renderInlineMarkdown(text: string): string { if (!text) return ""; - + // Helper function to escape HTML entities const escapeHtml = (str: string): string => { return str @@ -10,26 +10,52 @@ export function renderInlineMarkdown(text: string): string { .replace(/"/g, """) .replace(/'/g, "'"); }; - + // Process inline markdown patterns let html = text // Code: `text` - escape content inside backticks, then wrap in .replace(/`([^`]+)`/g, (_, content) => `${escapeHtml(content)}`) - + // Bold: **text** or __text__ - escape content, then wrap in .replace(/\*\*([^*]+)\*\*/g, (_, content) => `${escapeHtml(content)}`) .replace(/__([^_]+)__/g, (_, content) => `${escapeHtml(content)}`) - + // Italic: *text* or _text_ (but not part of bold) - escape content, then wrap in .replace(/(? `${escapeHtml(content)}`) .replace(/(? `${escapeHtml(content)}`) - + // Strikethrough: ~~text~~ - escape content, then wrap in .replace(/~~([^~]+)~~/g, (_, content) => `${escapeHtml(content)}`); - + // Escape any remaining unprocessed text (text outside of markdown patterns) // This is tricky because we need to avoid escaping the HTML we just created // For now, we'll leave plain text unescaped since Astro should handle it - + return html; +} + +/** + * Strip markdown and HTML from text, returning plain text. + * Useful for meta tags, alt text, and other contexts where plain text is needed. + */ +export function stripMarkdown(text: string): string { + if (!text) return ""; + + return text + // Remove code: `text` + .replace(/`([^`]+)`/g, "$1") + + // Remove bold: **text** or __text__ + .replace(/\*\*([^*]+)\*\*/g, "$1") + .replace(/__([^_]+)__/g, "$1") + + // Remove italic: *text* or _text_ + .replace(/(?]*>/g, ""); } \ No newline at end of file diff --git a/src/lib/opengraph.ts b/src/lib/opengraph.ts index 2364438..c0acaf8 100644 --- a/src/lib/opengraph.ts +++ b/src/lib/opengraph.ts @@ -1,5 +1,6 @@ import type { CollectionEntry } from "astro:content"; import { SITE } from "@consts"; +import { stripMarkdown } from "./markdown"; export interface OpenGraphData { title: string; @@ -68,17 +69,17 @@ export function getPostOGData( url: string, siteUrl: string ): OpenGraphData { - const ogTitle = post.data.ogTitle || post.data.title; + const ogTitle = stripMarkdown(post.data.ogTitle || post.data.title); const ogDescription = post.data.ogDescription || post.data.description; - + let ogImage = post.data.ogImage; if (!ogImage && !post.data.noOgImage) { ogImage = generateTailgraphURL({ - title: post.data.cardTitle || post.data.title, - subtitle: post.data.date.toLocaleDateString("en-US", { - year: "numeric", - month: "long", - day: "numeric" + title: stripMarkdown(post.data.cardTitle || post.data.title), + subtitle: post.data.date.toLocaleDateString("en-US", { + year: "numeric", + month: "long", + day: "numeric" }), author: "plx", theme: "dark", @@ -86,7 +87,7 @@ export function getPostOGData( logo: `${siteUrl}/default-og-image.jpg` }); } - + return { title: ogTitle, description: ogDescription, @@ -116,21 +117,21 @@ export function getBriefOGData( url: string, siteUrl: string ): OpenGraphData { - const ogTitle = brief.data.ogTitle || brief.data.title; + const ogTitle = stripMarkdown(brief.data.ogTitle || brief.data.title); const ogDescription = brief.data.ogDescription || brief.data.description; - + let ogImage = brief.data.ogImage; if (!ogImage && !brief.data.noOgImage) { ogImage = generateTailgraphURL({ - title: brief.data.cardTitle || brief.data.title, - subtitle: category?.titlePrefix || category?.displayName || "Brief", + title: stripMarkdown(brief.data.cardTitle || brief.data.title), + subtitle: stripMarkdown(category?.titlePrefix || category?.displayName || "Brief"), author: "plx", theme: "dark", backgroundImage: "gradient", logo: `${siteUrl}/default-og-image.jpg` }); } - + return { title: ogTitle, description: ogDescription, @@ -159,13 +160,13 @@ export function getProjectOGData( url: string, siteUrl: string ): OpenGraphData { - const ogTitle = project.data.ogTitle || project.data.title; + const ogTitle = stripMarkdown(project.data.ogTitle || project.data.title); const ogDescription = project.data.ogDescription || project.data.description; - + let ogImage = project.data.ogImage; if (!ogImage && !project.data.noOgImage) { ogImage = generateTailgraphURL({ - title: project.data.title, + title: stripMarkdown(project.data.title), subtitle: "Project", author: "plx", theme: "dark", @@ -173,7 +174,7 @@ export function getProjectOGData( logo: `${siteUrl}/default-og-image.jpg` }); } - + return { title: ogTitle, description: ogDescription, diff --git a/src/pages/blog/[...slug].astro b/src/pages/blog/[...slug].astro index 38d5e1c..e048cdd 100644 --- a/src/pages/blog/[...slug].astro +++ b/src/pages/blog/[...slug].astro @@ -6,6 +6,7 @@ import FormattedDate from "@components/FormattedDate.astro"; import { readingTime } from "@lib/utils"; import BackToPrev from "@components/BackToPrev.astro"; import { getPostOGData } from "@lib/opengraph"; +import { renderInlineMarkdown } from "@lib/markdown"; export async function getStaticPaths() { const posts = (await getCollection("blog")) @@ -22,6 +23,7 @@ const post = Astro.props; const { Content } = await post.render(); const ogData = getPostOGData(post, Astro.url.toString(), Astro.site?.toString() || ""); +const renderedTitle = renderInlineMarkdown(post.data.title); --- @@ -41,9 +43,7 @@ const ogData = getPostOGData(post, Astro.url.toString(), Astro.site?.toString() {readingTime(post.body)} -
- {post.data.title} -
+

diff --git a/src/pages/briefs/[...slug].astro b/src/pages/briefs/[...slug].astro index d17d159..9db35ac 100644 --- a/src/pages/briefs/[...slug].astro +++ b/src/pages/briefs/[...slug].astro @@ -27,6 +27,7 @@ const categorySlug = extractCategoryFromSlug(brief.slug); const category = categorySlug ? getCategory(categorySlug, `src/content/briefs/${categorySlug}`) : null; const ogData = getBriefOGData(brief, category, Astro.url.toString(), Astro.site?.toString() || ""); const renderedTitlePrefix = category?.titlePrefix ? renderInlineMarkdown(category.titlePrefix) : null; +const renderedTitle = renderInlineMarkdown(brief.data.title); --- @@ -49,9 +50,7 @@ const renderedTitlePrefix = category?.titlePrefix ? renderInlineMarkdown(categor )} -
- {brief.data.title} -
+

diff --git a/src/pages/projects/[...slug].astro b/src/pages/projects/[...slug].astro index 295c984..2e74f7c 100644 --- a/src/pages/projects/[...slug].astro +++ b/src/pages/projects/[...slug].astro @@ -7,6 +7,7 @@ import { readingTime } from "@lib/utils"; import BackToPrev from "@components/BackToPrev.astro"; import Link from "@components/Link.astro"; import { getProjectOGData } from "@lib/opengraph"; +import { renderInlineMarkdown } from "@lib/markdown"; export async function getStaticPaths() { const projects = (await getCollection("projects")) @@ -23,6 +24,7 @@ const project = Astro.props; const { Content } = await project.render(); const ogData = getProjectOGData(project, Astro.url.toString(), Astro.site?.toString() || ""); +const renderedTitle = renderInlineMarkdown(project.data.title); --- @@ -42,9 +44,7 @@ const ogData = getProjectOGData(project, Astro.url.toString(), Astro.site?.toStr {readingTime(project.body)} -
- {project.data.title} -
+

{(project.data.demoURL || project.data.repoURL) && (