From b6e4c0c5be2a0608e72982fa4e77c803f3e84b10 Mon Sep 17 00:00:00 2001 From: Mike Hartington Date: Mon, 23 Feb 2026 10:24:08 -0500 Subject: [PATCH 1/8] chore(): improve new link checker --- apps/docs/scripts/lint-external-links.ts | 98 +++++++++++++++++++----- 1 file changed, 79 insertions(+), 19 deletions(-) diff --git a/apps/docs/scripts/lint-external-links.ts b/apps/docs/scripts/lint-external-links.ts index cafd50510e..f16e0f8c3e 100644 --- a/apps/docs/scripts/lint-external-links.ts +++ b/apps/docs/scripts/lint-external-links.ts @@ -9,7 +9,8 @@ const __dirname = path.dirname(__filename); const CONTENT_DIR = path.join(__dirname, "../content"); const TIMEOUT_MS = 10_000; -const MAX_CONCURRENCY = 15; +const DEFAULT_FILE_CONCURRENCY = 15; +const DEFAULT_URL_CHECK_CONCURRENCY = 40; const ACCEPTED_STATUSES = new Set([403, 429]); const IMAGE_EXTENSIONS = new Set([ @@ -48,6 +49,21 @@ type FailedResult = { occurrences: LinkOccurrence[]; }; +function readPositiveIntEnv(name: string, fallback: number): number { + const raw = process.env[name]; + if (!raw) return fallback; + + const parsed = Number.parseInt(raw, 10); + if (!Number.isFinite(parsed) || parsed <= 0) { + console.warn( + `Invalid ${name} value "${raw}". Falling back to ${fallback}.`, + ); + return fallback; + } + + return parsed; +} + function findMarkdownFiles(dir: string, fileList: string[] = []): string[] { const files = fs.readdirSync(dir); @@ -110,7 +126,8 @@ function toExternalHttpUrl(raw: string): string | null { try { const parsed = new URL(trimmed); - if (parsed.protocol !== "http:" && parsed.protocol !== "https:") return null; + if (parsed.protocol !== "http:" && parsed.protocol !== "https:") + return null; if (parsed.hostname === "localhost") return null; return parsed.toString(); } catch { @@ -118,8 +135,10 @@ function toExternalHttpUrl(raw: string): string | null { } } -function collectFileLinks(filePath: string): Array<{ url: string; line: number }> { - const rawContent = fs.readFileSync(filePath, "utf8"); +async function collectFileLinks( + filePath: string, +): Promise> { + const rawContent = await fs.promises.readFile(filePath, "utf8"); const content = stripCodeBlocks(rawContent); const links: Array<{ url: string; line: number }> = []; @@ -183,7 +202,9 @@ async function fetchWithTimeout( } } -async function checkUrl(url: string): Promise<{ ok: boolean; reason?: string }> { +async function checkUrl( + url: string, +): Promise<{ ok: boolean; reason?: string }> { try { // Some providers (e.g. VS Marketplace, Bluesky) return 404 for HEAD // while the same URL works with GET in a browser. @@ -193,7 +214,10 @@ async function checkUrl(url: string): Promise<{ ok: boolean; reason?: string }> }); if (response.status >= 400 || response.status === 429) { - response = await fetchWithTimeout(url, { method: "GET", headers: BROWSER_HEADERS }); + response = await fetchWithTimeout(url, { + method: "GET", + headers: BROWSER_HEADERS, + }); } if (response.status === 404) { @@ -226,43 +250,79 @@ async function runWithConcurrency( ): Promise { let index = 0; - const workers = Array.from({ length: Math.min(limit, items.length) }, async () => { - while (index < items.length) { - const item = items[index]; - index += 1; - await worker(item); - } - }); + const workers = Array.from( + { length: Math.min(limit, items.length) }, + async () => { + while (index < items.length) { + const item = items[index]; + index += 1; + await worker(item); + } + }, + ); await Promise.all(workers); } +function mergeOccurrences( + target: Map, + source: Map, +): void { + for (const [url, occurrences] of source) { + const existing = target.get(url); + if (existing) { + existing.push(...occurrences); + continue; + } + target.set(url, occurrences); + } +} + async function main(): Promise { + const fileConcurrency = readPositiveIntEnv( + "EXTERNAL_LINKS_FILE_CONCURRENCY", + DEFAULT_FILE_CONCURRENCY, + ); + const urlCheckConcurrency = readPositiveIntEnv( + "EXTERNAL_LINKS_URL_CONCURRENCY", + DEFAULT_URL_CHECK_CONCURRENCY, + ); + if (!fs.existsSync(CONTENT_DIR)) { console.error(`Content directory not found: ${CONTENT_DIR}`); process.exit(1); } const files = findMarkdownFiles(CONTENT_DIR); + const occurrencesByUrl = new Map(); let extractedLinks = 0; - for (const filePath of files) { + // Parse markdown files concurrently before link validation starts. + const perFileOccurrences: Array> = []; + await runWithConcurrency(files, fileConcurrency, async (filePath) => { const relativeFile = path.relative(process.cwd(), filePath); - const links = collectFileLinks(filePath); - extractedLinks += links.length; + const links = await collectFileLinks(filePath); + const localOccurrences = new Map(); for (const link of links) { - const occurrences = occurrencesByUrl.get(link.url) ?? []; + const occurrences = localOccurrences.get(link.url) ?? []; occurrences.push({ file: relativeFile, line: link.line }); - occurrencesByUrl.set(link.url, occurrences); + localOccurrences.set(link.url, occurrences); } + + perFileOccurrences.push(localOccurrences); + extractedLinks += links.length; + }); + + for (const fileOccurrences of perFileOccurrences) { + mergeOccurrences(occurrencesByUrl, fileOccurrences); } const uniqueUrls = [...occurrencesByUrl.keys()]; const failed: FailedResult[] = []; - await runWithConcurrency(uniqueUrls, MAX_CONCURRENCY, async (url) => { + await runWithConcurrency(uniqueUrls, urlCheckConcurrency, async (url) => { const result = await checkUrl(url); if (result.ok) return; From f21dec241c763e6beee5b663328c934330c3b356 Mon Sep 17 00:00:00 2001 From: Mike Hartington Date: Mon, 23 Feb 2026 10:41:19 -0500 Subject: [PATCH 2/8] chore(): swap lychee out --- .github/workflows/external-links.yml | 142 ++++++++++++++++++++++ .github/workflows/lychee.yml | 147 ----------------------- apps/docs/scripts/lint-external-links.ts | 22 ++++ 3 files changed, 164 insertions(+), 147 deletions(-) create mode 100644 .github/workflows/external-links.yml delete mode 100644 .github/workflows/lychee.yml diff --git a/.github/workflows/external-links.yml b/.github/workflows/external-links.yml new file mode 100644 index 0000000000..bdf1ba35d7 --- /dev/null +++ b/.github/workflows/external-links.yml @@ -0,0 +1,142 @@ +name: External Links + +on: + pull_request: + +concurrency: + group: external-links-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + check: + name: Check External Links + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Setup pnpm + uses: pnpm/action-setup@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: "20" + cache: "pnpm" + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Run external link checker + id: external-links + shell: bash + run: | + mkdir -p external-links + set +e + pnpm run lint:external-links -- --json > external-links/report.json 2> external-links/output.txt + exit_code=$? + echo "exit_code=$exit_code" >> "$GITHUB_OUTPUT" + exit 0 + + - name: Format external links report + if: ${{ always() }} + shell: bash + run: | + REPORT_FILE="external-links/formatted.md" + JSON_FILE="external-links/report.json" + OUTPUT_FILE="external-links/output.txt" + EXIT_CODE="${{ steps.external-links.outputs.exit_code }}" + export REPORT_FILE JSON_FILE OUTPUT_FILE EXIT_CODE + node <<'EOF' + const fs = require("node:fs"); + + const reportPath = process.env.REPORT_FILE; + const jsonPath = process.env.JSON_FILE; + const outputPath = process.env.OUTPUT_FILE; + const exitCode = process.env.EXIT_CODE ?? "1"; + + function readText(path) { + if (!fs.existsSync(path)) return ""; + return fs.readFileSync(path, "utf8"); + } + + let parsed = null; + try { + const jsonText = readText(jsonPath); + if (jsonText.trim()) parsed = JSON.parse(jsonText); + } catch {} + + const outputText = readText(outputPath).trim(); + const lines = []; + lines.push("## External Link Check Report", ""); + + if (!parsed) { + lines.push("### ❌ Failed to parse JSON output from checker.", ""); + lines.push("
", "Raw output", "", "```text"); + lines.push((outputText || "No output captured.").split("\n").slice(-400).join("\n")); + lines.push("```", "
"); + fs.writeFileSync(reportPath, `${lines.join("\n")}\n`); + process.exit(0); + } + + const scanned = parsed.scannedFiles ?? "n/a"; + const extracted = parsed.extractedExternalLinks ?? "n/a"; + const unique = parsed.uniqueLinksChecked ?? "n/a"; + const failedCount = parsed.failedLinks ?? "n/a"; + const failures = Array.isArray(parsed.failures) ? parsed.failures : []; + + if (Number(exitCode) === 0) { + lines.push("### ✅ No failed external links found."); + } else { + lines.push("### ❌ Failed external links detected."); + } + + lines.push( + "", + `- Scanned files: \`${scanned}\``, + `- Extracted external links: \`${extracted}\``, + `- Unique links checked: \`${unique}\``, + `- Failed links: \`${failedCount}\``, + "", + ); + + if (failures.length > 0) { + lines.push("### Failures", ""); + for (const failure of failures.slice(0, 100)) { + lines.push(`- ${failure.url} (\`${failure.reason}\`)`); + const occurrences = Array.isArray(failure.occurrences) + ? failure.occurrences + : []; + for (const occurrence of occurrences.slice(0, 10)) { + lines.push(` - \`${occurrence.file}:${occurrence.line}\``); + } + } + if (failures.length > 100) { + lines.push("", `_Showing first 100 of ${failures.length} failures._`, ""); + } + } + + if (outputText) { + lines.push("
", "stderr output", "", "```text"); + lines.push(outputText.split("\n").slice(-400).join("\n")); + lines.push("```", "
"); + } + + fs.writeFileSync(reportPath, `${lines.join("\n")}\n`); + EOF + + - name: Comment external links report + if: ${{ always() && github.event.pull_request.head.repo.fork == false }} + uses: marocchino/sticky-pull-request-comment@v2 + with: + header: external-links + path: external-links/formatted.md + + - name: Fail if broken links found + if: ${{ steps.external-links.outputs.exit_code != '0' }} + run: | + echo "External link checker failed." + exit 1 diff --git a/.github/workflows/lychee.yml b/.github/workflows/lychee.yml deleted file mode 100644 index 6f15961a14..0000000000 --- a/.github/workflows/lychee.yml +++ /dev/null @@ -1,147 +0,0 @@ -name: 🍈 Lychee - -on: [pull_request] - -concurrency: - group: lychee-${{ github.event.pull_request.number }} - cancel-in-progress: true - -jobs: - check: - name: Check Links - runs-on: ubuntu-latest - permissions: write-all - steps: - - uses: actions/checkout@v4 - - - name: 🍈 Lychee Link Checker (First Run) - id: lychee - uses: lycheeverse/lychee-action@v2 - with: - args: >- - --cache - --cache-exclude-status 429,500,502,503,504 - --max-cache-age 5m - --verbose - --no-progress - --exclude '.*' - --timeout 20 - --max-retries 8 - --retry-wait-time 5 - --include '^https://' - --exclude 'https://www.gnu.org' - --exclude 'https://docs.solidjs.com' - --accept 200,201,204,304,403,429 - './apps/docs/content' - output: lychee/out.md - fail: false - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: 🔄 Retry Lychee Link Checker (Second Run for Timeouts) - id: lychee-retry - if: ${{ always() && steps.lychee.outputs.exit_code != 0 }} - uses: lycheeverse/lychee-action@v2 - with: - args: >- - --cache - --cache-exclude-status 429,500,502,503,504 - --max-cache-age 5m - --verbose - --no-progress - --exclude '.*' - --timeout 30 - --max-retries 10 - --retry-wait-time 10 - --include '^https://' - --exclude 'https://www.gnu.org' - --exclude 'https://docs.solidjs.com' - --accept 200,201,204,304,403,429 - './apps/docs/content' - output: lychee/out-retry.md - fail: false - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: 📝 Clean up Lychee Report - if: ${{ always() && github.event.pull_request.head.repo.fork == false }} - run: | - # Use retry results if available, otherwise use first run results - if [ -f "lychee/out-retry.md" ]; then - REPORT_FILE="lychee/out-retry.md" - elif [ -f "lychee/out.md" ]; then - REPORT_FILE="lychee/out.md" - fi - - if [ -n "$REPORT_FILE" ]; then - # Parse stats from lychee markdown table - TOTAL=$(grep 'Total' "$REPORT_FILE" | grep -oE '\| [0-9]+ +\|' | grep -oE '[0-9]+' || echo "0") - SUCCESS=$(grep 'Successful' "$REPORT_FILE" | grep -oE '\| [0-9]+ +\|' | grep -oE '[0-9]+' || echo "0") - ERRORS=$(grep 'Errors' "$REPORT_FILE" | grep -oE '\| [0-9]+ +\|' | grep -oE '[0-9]+' || echo "0") - REDIRECTS=$(grep 'Redirected' "$REPORT_FILE" | grep -oE '\| [0-9]+ +\|' | grep -oE '[0-9]+' || echo "0") - EXCLUDED=$(grep 'Excluded' "$REPORT_FILE" | grep -oE '\| [0-9]+ +\|' | grep -oE '[0-9]+' || echo "0") - TIMEOUTS=$(grep 'Timeouts' "$REPORT_FILE" | grep -oE '\| [0-9]+ +\|' | grep -oE '[0-9]+' || echo "0") - UNKNOWN=$(grep 'Unknown' "$REPORT_FILE" | grep -oE '\| [0-9]+ +\|' | grep -oE '[0-9]+' || echo "0") - UNSUPPORTED=$(grep 'Unsupported' "$REPORT_FILE" | grep -oE '\| [0-9]+ +\|' | grep -oE '[0-9]+' || echo "0") - - # Extract errors section - ERRORS_SECTION=$(sed -n '/^## Errors/,/^## /p' "$REPORT_FILE" | sed '$d' | tail -n +2) - - # Format errors section - FORMATTED_ERRORS="" - while IFS= read -r line; do - if [[ $line =~ ^### ]]; then - file=$(echo "$line" | sed 's/### Errors in //') - FORMATTED_ERRORS+="\n**\`$file\`**\n" - elif [[ $line =~ ^\* ]]; then - FORMATTED_ERRORS+="$line\n" - fi - done <<< "$ERRORS_SECTION" - - # Create formatted output using echo statements - echo "## 🍈 Lychee Link Check Report" > lychee/formatted.md - echo "" >> lychee/formatted.md - echo "**$TOTAL links:** \`✅ $SUCCESS OK\` | \`🚫 $ERRORS errors\` | \`🔀 $REDIRECTS redirects\` | \`👻 $EXCLUDED excluded\`" >> lychee/formatted.md - echo "" >> lychee/formatted.md - - if [ "$ERRORS" -eq 0 ]; then - echo "### ✅ All links are working!" >> lychee/formatted.md - else - echo "### ❌ Errors" >> lychee/formatted.md - echo -e "$FORMATTED_ERRORS" >> lychee/formatted.md - fi - echo "---" >> lychee/formatted.md - echo "" >> lychee/formatted.md - echo "
" >> lychee/formatted.md - echo "Full Statistics Table" >> lychee/formatted.md - echo "" >> lychee/formatted.md - echo "| Status | Count |" >> lychee/formatted.md - echo "|--------|-------|" >> lychee/formatted.md - echo "| ✅ Successful | $SUCCESS |" >> lychee/formatted.md - echo "| 🔀 Redirected | $REDIRECTS |" >> lychee/formatted.md - echo "| 👻 Excluded | $EXCLUDED |" >> lychee/formatted.md - echo "| 🚫 Errors | $ERRORS |" >> lychee/formatted.md - echo "| ⛔ Unsupported | $UNSUPPORTED |" >> lychee/formatted.md - echo "| ⏳ Timeouts | $TIMEOUTS |" >> lychee/formatted.md - echo "| ❓ Unknown | $UNKNOWN |" >> lychee/formatted.md - echo "" >> lychee/formatted.md - echo "
" >> lychee/formatted.md - fi - - - name: 📝 Comment Broken Links - if: ${{ always() && github.event.pull_request.head.repo.fork == false }} - uses: marocchino/sticky-pull-request-comment@v2 - with: - header: lychee - path: lychee/formatted.md - - - name: 🚫 Fail if broken links found - if: ${{ steps.lychee-retry.conclusion == 'success' && steps.lychee-retry.outputs.exit_code != 0 || steps.lychee-retry.conclusion == 'failure' }} - run: | - if [ "${{ steps.lychee-retry.conclusion }}" == "success" ]; then - echo "Failing based on retry run results" - exit ${{ steps.lychee-retry.outputs.exit_code }} - else - echo "Failing based on first run results" - exit ${{ steps.lychee.outputs.exit_code }} - fi diff --git a/apps/docs/scripts/lint-external-links.ts b/apps/docs/scripts/lint-external-links.ts index f16e0f8c3e..e9bc00a775 100644 --- a/apps/docs/scripts/lint-external-links.ts +++ b/apps/docs/scripts/lint-external-links.ts @@ -49,6 +49,14 @@ type FailedResult = { occurrences: LinkOccurrence[]; }; +type LinkCheckReport = { + scannedFiles: number; + extractedExternalLinks: number; + uniqueLinksChecked: number; + failedLinks: number; + failures: FailedResult[]; +}; + function readPositiveIntEnv(name: string, fallback: number): number { const raw = process.env[name]; if (!raw) return fallback; @@ -279,6 +287,7 @@ function mergeOccurrences( } async function main(): Promise { + const jsonOutput = process.argv.includes("--json"); const fileConcurrency = readPositiveIntEnv( "EXTERNAL_LINKS_FILE_CONCURRENCY", DEFAULT_FILE_CONCURRENCY, @@ -335,6 +344,19 @@ async function main(): Promise { failed.sort((a, b) => a.url.localeCompare(b.url)); + const report: LinkCheckReport = { + scannedFiles: files.length, + extractedExternalLinks: extractedLinks, + uniqueLinksChecked: uniqueUrls.length, + failedLinks: failed.length, + failures: failed, + }; + + if (jsonOutput) { + console.log(JSON.stringify(report, null, 2)); + process.exit(failed.length === 0 ? 0 : 1); + } + console.log(`Scanned files: ${files.length}`); console.log(`Extracted external links: ${extractedLinks}`); console.log(`Unique links checked: ${uniqueUrls.length}`); From ae60e404063fd1ff4ab0de909d33b916f08f36e5 Mon Sep 17 00:00:00 2001 From: Mike Hartington Date: Mon, 23 Feb 2026 10:53:57 -0500 Subject: [PATCH 3/8] chore(): fix workflow --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index daab08195c..07e8b13b0c 100644 --- a/package.json +++ b/package.json @@ -7,7 +7,7 @@ "check": "turbo run check", "types:check": "turbo run types:check", "lint:links": "turbo run lint:links --filter=docs", - "lint:external-links": "turbo run lint:external-links --filter=docs", + "lint:external-links": "turbo run lint:external-links --filter=docs --", "lint:code": "turbo run lint:code --filter=docs", "lint:spellcheck": "turbo run lint:spellcheck --filter=docs" }, From 4e70f2c7c125d3a01001eeeeb2ff9ee742d62bdb Mon Sep 17 00:00:00 2001 From: Mike Hartington Date: Mon, 23 Feb 2026 12:44:06 -0500 Subject: [PATCH 4/8] fix --- .github/workflows/external-links.yml | 156 +++++++++++++-------------- turbo.json | 2 +- 2 files changed, 77 insertions(+), 81 deletions(-) diff --git a/.github/workflows/external-links.yml b/.github/workflows/external-links.yml index bdf1ba35d7..12b93c82a1 100644 --- a/.github/workflows/external-links.yml +++ b/.github/workflows/external-links.yml @@ -41,92 +41,88 @@ jobs: echo "exit_code=$exit_code" >> "$GITHUB_OUTPUT" exit 0 - - name: Format external links report + - name: Parse and print external links report if: ${{ always() }} shell: bash run: | REPORT_FILE="external-links/formatted.md" JSON_FILE="external-links/report.json" OUTPUT_FILE="external-links/output.txt" - EXIT_CODE="${{ steps.external-links.outputs.exit_code }}" - export REPORT_FILE JSON_FILE OUTPUT_FILE EXIT_CODE - node <<'EOF' - const fs = require("node:fs"); - - const reportPath = process.env.REPORT_FILE; - const jsonPath = process.env.JSON_FILE; - const outputPath = process.env.OUTPUT_FILE; - const exitCode = process.env.EXIT_CODE ?? "1"; - - function readText(path) { - if (!fs.existsSync(path)) return ""; - return fs.readFileSync(path, "utf8"); - } - - let parsed = null; - try { - const jsonText = readText(jsonPath); - if (jsonText.trim()) parsed = JSON.parse(jsonText); - } catch {} - - const outputText = readText(outputPath).trim(); - const lines = []; - lines.push("## External Link Check Report", ""); - - if (!parsed) { - lines.push("### ❌ Failed to parse JSON output from checker.", ""); - lines.push("
", "Raw output", "", "```text"); - lines.push((outputText || "No output captured.").split("\n").slice(-400).join("\n")); - lines.push("```", "
"); - fs.writeFileSync(reportPath, `${lines.join("\n")}\n`); - process.exit(0); - } - - const scanned = parsed.scannedFiles ?? "n/a"; - const extracted = parsed.extractedExternalLinks ?? "n/a"; - const unique = parsed.uniqueLinksChecked ?? "n/a"; - const failedCount = parsed.failedLinks ?? "n/a"; - const failures = Array.isArray(parsed.failures) ? parsed.failures : []; - - if (Number(exitCode) === 0) { - lines.push("### ✅ No failed external links found."); - } else { - lines.push("### ❌ Failed external links detected."); - } - - lines.push( - "", - `- Scanned files: \`${scanned}\``, - `- Extracted external links: \`${extracted}\``, - `- Unique links checked: \`${unique}\``, - `- Failed links: \`${failedCount}\``, - "", - ); - - if (failures.length > 0) { - lines.push("### Failures", ""); - for (const failure of failures.slice(0, 100)) { - lines.push(`- ${failure.url} (\`${failure.reason}\`)`); - const occurrences = Array.isArray(failure.occurrences) - ? failure.occurrences - : []; - for (const occurrence of occurrences.slice(0, 10)) { - lines.push(` - \`${occurrence.file}:${occurrence.line}\``); - } - } - if (failures.length > 100) { - lines.push("", `_Showing first 100 of ${failures.length} failures._`, ""); - } - } - - if (outputText) { - lines.push("
", "stderr output", "", "```text"); - lines.push(outputText.split("\n").slice(-400).join("\n")); - lines.push("```", "
"); - } - - fs.writeFileSync(reportPath, `${lines.join("\n")}\n`); - EOF + if [ ! -s "$JSON_FILE" ]; then + echo "::warning::No JSON output found at $JSON_FILE. Using empty object." + echo "{}" > "$JSON_FILE" + fi + + if ! jq . "$JSON_FILE" >/dev/null; then + echo "::error::Invalid JSON in $JSON_FILE" + if [ -s "$OUTPUT_FILE" ]; then + echo "stderr output:" + cat "$OUTPUT_FILE" + fi + exit 1 + fi + + echo "External links JSON report:" + jq . "$JSON_FILE" + echo "" + + echo "External links summary:" + jq -r ' + "- Scanned files: \(.scannedFiles // "n/a")", + "- Extracted external links: \(.extractedExternalLinks // "n/a")", + "- Unique links checked: \(.uniqueLinksChecked // "n/a")", + "- Failed links: \(.failedLinks // "n/a")" + ' "$JSON_FILE" + echo "" + + FAILED_COUNT="$(jq -r '.failedLinks // 0' "$JSON_FILE")" + if [ "$FAILED_COUNT" -gt 0 ]; then + echo "Failures:" + jq -r ' + .failures[]? as $failure + | "- \($failure.url) (\($failure.reason))", + ($failure.occurrences[]? | " - \(.file):\(.line)") + ' "$JSON_FILE" + else + echo "No failed external links found." + fi + + { + echo "## External Link Check Report" + echo "" + jq -r ' + "- Scanned files: `\(.scannedFiles // "n/a")`", + "- Extracted external links: `\(.extractedExternalLinks // "n/a")`", + "- Unique links checked: `\(.uniqueLinksChecked // "n/a")`", + "- Failed links: `\(.failedLinks // "n/a")`" + ' "$JSON_FILE" + echo "" + + if [ "$FAILED_COUNT" -gt 0 ]; then + echo "### Failures" + echo "" + jq -r ' + .failures[]? as $failure + | "- \($failure.url) (`\($failure.reason)`)", + ($failure.occurrences[]? | " - `\(.file):\(.line)`") + ' "$JSON_FILE" + else + echo "### ✅ No failed external links found." + fi + + if [ -s "$OUTPUT_FILE" ]; then + echo "" + echo "
" + echo "stderr output" + echo "" + echo '```text' + cat "$OUTPUT_FILE" + echo '```' + echo "
" + fi + } > "$REPORT_FILE" + + cat "$REPORT_FILE" >> "$GITHUB_STEP_SUMMARY" - name: Comment external links report if: ${{ always() && github.event.pull_request.head.repo.fork == false }} diff --git a/turbo.json b/turbo.json index d67f90b84c..da9e4f66d8 100644 --- a/turbo.json +++ b/turbo.json @@ -33,4 +33,4 @@ "lint:code": {}, "lint:spellcheck": {} } -} \ No newline at end of file +} From 62c946bddd6d13afbb177e5d66f3d23083d61c9b Mon Sep 17 00:00:00 2001 From: Mike Hartington Date: Mon, 23 Feb 2026 13:00:33 -0500 Subject: [PATCH 5/8] fix --- .github/workflows/external-links.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/external-links.yml b/.github/workflows/external-links.yml index 12b93c82a1..afa218fd10 100644 --- a/.github/workflows/external-links.yml +++ b/.github/workflows/external-links.yml @@ -36,7 +36,7 @@ jobs: run: | mkdir -p external-links set +e - pnpm run lint:external-links -- --json > external-links/report.json 2> external-links/output.txt + pnpm --dir apps/docs exec tsx ./scripts/lint-external-links.ts --json > external-links/report.json 2> external-links/output.txt exit_code=$? echo "exit_code=$exit_code" >> "$GITHUB_OUTPUT" exit 0 From 71b4c532a0de2e3711c4810c3ba0218a604a8801 Mon Sep 17 00:00:00 2001 From: Mike Hartington Date: Mon, 23 Feb 2026 13:04:05 -0500 Subject: [PATCH 6/8] fix --- .github/workflows/external-links.yml | 32 +++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/.github/workflows/external-links.yml b/.github/workflows/external-links.yml index afa218fd10..a4b9dd8456 100644 --- a/.github/workflows/external-links.yml +++ b/.github/workflows/external-links.yml @@ -48,6 +48,10 @@ jobs: REPORT_FILE="external-links/formatted.md" JSON_FILE="external-links/report.json" OUTPUT_FILE="external-links/output.txt" + { + echo "## External Link Check Report" + echo "" + } > "$REPORT_FILE" if [ ! -s "$JSON_FILE" ]; then echo "::warning::No JSON output found at $JSON_FILE. Using empty object." echo "{}" > "$JSON_FILE" @@ -55,10 +59,34 @@ jobs: if ! jq . "$JSON_FILE" >/dev/null; then echo "::error::Invalid JSON in $JSON_FILE" + { + echo "### ❌ Invalid JSON report output" + echo "" + echo "The external link checker output was not valid JSON." + echo "" + echo "
" + echo "Raw report.json" + echo "" + echo '```text' + cat "$JSON_FILE" + echo '```' + echo "
" + } >> "$REPORT_FILE" if [ -s "$OUTPUT_FILE" ]; then echo "stderr output:" cat "$OUTPUT_FILE" + { + echo "" + echo "
" + echo "stderr output" + echo "" + echo '```text' + cat "$OUTPUT_FILE" + echo '```' + echo "
" + } >> "$REPORT_FILE" fi + cat "$REPORT_FILE" >> "$GITHUB_STEP_SUMMARY" exit 1 fi @@ -88,8 +116,6 @@ jobs: fi { - echo "## External Link Check Report" - echo "" jq -r ' "- Scanned files: `\(.scannedFiles // "n/a")`", "- Extracted external links: `\(.extractedExternalLinks // "n/a")`", @@ -125,7 +151,7 @@ jobs: cat "$REPORT_FILE" >> "$GITHUB_STEP_SUMMARY" - name: Comment external links report - if: ${{ always() && github.event.pull_request.head.repo.fork == false }} + if: ${{ always() && github.event.pull_request.head.repo.fork == false && hashFiles('external-links/formatted.md') != '' }} uses: marocchino/sticky-pull-request-comment@v2 with: header: external-links From 6e04c5763746a3f3907e6139eb37d96b7b47e903 Mon Sep 17 00:00:00 2001 From: Mike Hartington Date: Mon, 23 Feb 2026 13:51:18 -0500 Subject: [PATCH 7/8] fix --- .github/workflows/external-links.yml | 14 +++++++++----- apps/docs/scripts/lint-external-links.ts | 7 ++++++- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/.github/workflows/external-links.yml b/.github/workflows/external-links.yml index a4b9dd8456..a19b4aa1c9 100644 --- a/.github/workflows/external-links.yml +++ b/.github/workflows/external-links.yml @@ -33,6 +33,10 @@ jobs: - name: Run external link checker id: external-links shell: bash + timeout-minutes: 30 + env: + EXTERNAL_LINKS_FILE_CONCURRENCY: "8" + EXTERNAL_LINKS_URL_CONCURRENCY: "10" run: | mkdir -p external-links set +e @@ -48,10 +52,6 @@ jobs: REPORT_FILE="external-links/formatted.md" JSON_FILE="external-links/report.json" OUTPUT_FILE="external-links/output.txt" - { - echo "## External Link Check Report" - echo "" - } > "$REPORT_FILE" if [ ! -s "$JSON_FILE" ]; then echo "::warning::No JSON output found at $JSON_FILE. Using empty object." echo "{}" > "$JSON_FILE" @@ -60,6 +60,8 @@ jobs: if ! jq . "$JSON_FILE" >/dev/null; then echo "::error::Invalid JSON in $JSON_FILE" { + echo "## External Link Check Report" + echo "" echo "### ❌ Invalid JSON report output" echo "" echo "The external link checker output was not valid JSON." @@ -71,7 +73,7 @@ jobs: cat "$JSON_FILE" echo '```' echo "" - } >> "$REPORT_FILE" + } > "$REPORT_FILE" if [ -s "$OUTPUT_FILE" ]; then echo "stderr output:" cat "$OUTPUT_FILE" @@ -116,6 +118,8 @@ jobs: fi { + echo "## External Link Check Report" + echo "" jq -r ' "- Scanned files: `\(.scannedFiles // "n/a")`", "- Extracted external links: `\(.extractedExternalLinks // "n/a")`", diff --git a/apps/docs/scripts/lint-external-links.ts b/apps/docs/scripts/lint-external-links.ts index e9bc00a775..10b3af8735 100644 --- a/apps/docs/scripts/lint-external-links.ts +++ b/apps/docs/scripts/lint-external-links.ts @@ -8,7 +8,7 @@ const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); const CONTENT_DIR = path.join(__dirname, "../content"); -const TIMEOUT_MS = 10_000; +const DEFAULT_TIMEOUT_MS = 20_000; const DEFAULT_FILE_CONCURRENCY = 15; const DEFAULT_URL_CHECK_CONCURRENCY = 40; const ACCEPTED_STATUSES = new Set([403, 429]); @@ -72,6 +72,11 @@ function readPositiveIntEnv(name: string, fallback: number): number { return parsed; } +const TIMEOUT_MS = readPositiveIntEnv( + "EXTERNAL_LINKS_TIMEOUT_MS", + DEFAULT_TIMEOUT_MS, +); + function findMarkdownFiles(dir: string, fileList: string[] = []): string[] { const files = fs.readdirSync(dir); From fc287e216bff705ad89d85a94eff244e0d24f04f Mon Sep 17 00:00:00 2001 From: Mike Hartington Date: Mon, 23 Feb 2026 14:10:18 -0500 Subject: [PATCH 8/8] revert back to lychee --- .github/workflows/external-links.yml | 267 ++++++++++++--------------- 1 file changed, 123 insertions(+), 144 deletions(-) diff --git a/.github/workflows/external-links.yml b/.github/workflows/external-links.yml index a19b4aa1c9..6f15961a14 100644 --- a/.github/workflows/external-links.yml +++ b/.github/workflows/external-links.yml @@ -1,168 +1,147 @@ -name: External Links +name: 🍈 Lychee -on: - pull_request: +on: [pull_request] concurrency: - group: external-links-${{ github.event.pull_request.number }} + group: lychee-${{ github.event.pull_request.number }} cancel-in-progress: true jobs: check: - name: Check External Links + name: Check Links runs-on: ubuntu-latest - permissions: - contents: read - pull-requests: write + permissions: write-all steps: - - name: Checkout repository - uses: actions/checkout@v4 + - uses: actions/checkout@v4 - - name: Setup pnpm - uses: pnpm/action-setup@v4 - - - name: Setup Node.js - uses: actions/setup-node@v4 + - name: 🍈 Lychee Link Checker (First Run) + id: lychee + uses: lycheeverse/lychee-action@v2 with: - node-version: "20" - cache: "pnpm" - - - name: Install dependencies - run: pnpm install --frozen-lockfile + args: >- + --cache + --cache-exclude-status 429,500,502,503,504 + --max-cache-age 5m + --verbose + --no-progress + --exclude '.*' + --timeout 20 + --max-retries 8 + --retry-wait-time 5 + --include '^https://' + --exclude 'https://www.gnu.org' + --exclude 'https://docs.solidjs.com' + --accept 200,201,204,304,403,429 + './apps/docs/content' + output: lychee/out.md + fail: false + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Run external link checker - id: external-links - shell: bash - timeout-minutes: 30 + - name: 🔄 Retry Lychee Link Checker (Second Run for Timeouts) + id: lychee-retry + if: ${{ always() && steps.lychee.outputs.exit_code != 0 }} + uses: lycheeverse/lychee-action@v2 + with: + args: >- + --cache + --cache-exclude-status 429,500,502,503,504 + --max-cache-age 5m + --verbose + --no-progress + --exclude '.*' + --timeout 30 + --max-retries 10 + --retry-wait-time 10 + --include '^https://' + --exclude 'https://www.gnu.org' + --exclude 'https://docs.solidjs.com' + --accept 200,201,204,304,403,429 + './apps/docs/content' + output: lychee/out-retry.md + fail: false env: - EXTERNAL_LINKS_FILE_CONCURRENCY: "8" - EXTERNAL_LINKS_URL_CONCURRENCY: "10" - run: | - mkdir -p external-links - set +e - pnpm --dir apps/docs exec tsx ./scripts/lint-external-links.ts --json > external-links/report.json 2> external-links/output.txt - exit_code=$? - echo "exit_code=$exit_code" >> "$GITHUB_OUTPUT" - exit 0 + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Parse and print external links report - if: ${{ always() }} - shell: bash + - name: 📝 Clean up Lychee Report + if: ${{ always() && github.event.pull_request.head.repo.fork == false }} run: | - REPORT_FILE="external-links/formatted.md" - JSON_FILE="external-links/report.json" - OUTPUT_FILE="external-links/output.txt" - if [ ! -s "$JSON_FILE" ]; then - echo "::warning::No JSON output found at $JSON_FILE. Using empty object." - echo "{}" > "$JSON_FILE" - fi - - if ! jq . "$JSON_FILE" >/dev/null; then - echo "::error::Invalid JSON in $JSON_FILE" - { - echo "## External Link Check Report" - echo "" - echo "### ❌ Invalid JSON report output" - echo "" - echo "The external link checker output was not valid JSON." - echo "" - echo "
" - echo "Raw report.json" - echo "" - echo '```text' - cat "$JSON_FILE" - echo '```' - echo "
" - } > "$REPORT_FILE" - if [ -s "$OUTPUT_FILE" ]; then - echo "stderr output:" - cat "$OUTPUT_FILE" - { - echo "" - echo "
" - echo "stderr output" - echo "" - echo '```text' - cat "$OUTPUT_FILE" - echo '```' - echo "
" - } >> "$REPORT_FILE" - fi - cat "$REPORT_FILE" >> "$GITHUB_STEP_SUMMARY" - exit 1 - fi - - echo "External links JSON report:" - jq . "$JSON_FILE" - echo "" - - echo "External links summary:" - jq -r ' - "- Scanned files: \(.scannedFiles // "n/a")", - "- Extracted external links: \(.extractedExternalLinks // "n/a")", - "- Unique links checked: \(.uniqueLinksChecked // "n/a")", - "- Failed links: \(.failedLinks // "n/a")" - ' "$JSON_FILE" - echo "" - - FAILED_COUNT="$(jq -r '.failedLinks // 0' "$JSON_FILE")" - if [ "$FAILED_COUNT" -gt 0 ]; then - echo "Failures:" - jq -r ' - .failures[]? as $failure - | "- \($failure.url) (\($failure.reason))", - ($failure.occurrences[]? | " - \(.file):\(.line)") - ' "$JSON_FILE" - else - echo "No failed external links found." + # Use retry results if available, otherwise use first run results + if [ -f "lychee/out-retry.md" ]; then + REPORT_FILE="lychee/out-retry.md" + elif [ -f "lychee/out.md" ]; then + REPORT_FILE="lychee/out.md" fi - { - echo "## External Link Check Report" - echo "" - jq -r ' - "- Scanned files: `\(.scannedFiles // "n/a")`", - "- Extracted external links: `\(.extractedExternalLinks // "n/a")`", - "- Unique links checked: `\(.uniqueLinksChecked // "n/a")`", - "- Failed links: `\(.failedLinks // "n/a")`" - ' "$JSON_FILE" - echo "" - - if [ "$FAILED_COUNT" -gt 0 ]; then - echo "### Failures" - echo "" - jq -r ' - .failures[]? as $failure - | "- \($failure.url) (`\($failure.reason)`)", - ($failure.occurrences[]? | " - `\(.file):\(.line)`") - ' "$JSON_FILE" + if [ -n "$REPORT_FILE" ]; then + # Parse stats from lychee markdown table + TOTAL=$(grep 'Total' "$REPORT_FILE" | grep -oE '\| [0-9]+ +\|' | grep -oE '[0-9]+' || echo "0") + SUCCESS=$(grep 'Successful' "$REPORT_FILE" | grep -oE '\| [0-9]+ +\|' | grep -oE '[0-9]+' || echo "0") + ERRORS=$(grep 'Errors' "$REPORT_FILE" | grep -oE '\| [0-9]+ +\|' | grep -oE '[0-9]+' || echo "0") + REDIRECTS=$(grep 'Redirected' "$REPORT_FILE" | grep -oE '\| [0-9]+ +\|' | grep -oE '[0-9]+' || echo "0") + EXCLUDED=$(grep 'Excluded' "$REPORT_FILE" | grep -oE '\| [0-9]+ +\|' | grep -oE '[0-9]+' || echo "0") + TIMEOUTS=$(grep 'Timeouts' "$REPORT_FILE" | grep -oE '\| [0-9]+ +\|' | grep -oE '[0-9]+' || echo "0") + UNKNOWN=$(grep 'Unknown' "$REPORT_FILE" | grep -oE '\| [0-9]+ +\|' | grep -oE '[0-9]+' || echo "0") + UNSUPPORTED=$(grep 'Unsupported' "$REPORT_FILE" | grep -oE '\| [0-9]+ +\|' | grep -oE '[0-9]+' || echo "0") + + # Extract errors section + ERRORS_SECTION=$(sed -n '/^## Errors/,/^## /p' "$REPORT_FILE" | sed '$d' | tail -n +2) + + # Format errors section + FORMATTED_ERRORS="" + while IFS= read -r line; do + if [[ $line =~ ^### ]]; then + file=$(echo "$line" | sed 's/### Errors in //') + FORMATTED_ERRORS+="\n**\`$file\`**\n" + elif [[ $line =~ ^\* ]]; then + FORMATTED_ERRORS+="$line\n" + fi + done <<< "$ERRORS_SECTION" + + # Create formatted output using echo statements + echo "## 🍈 Lychee Link Check Report" > lychee/formatted.md + echo "" >> lychee/formatted.md + echo "**$TOTAL links:** \`✅ $SUCCESS OK\` | \`🚫 $ERRORS errors\` | \`🔀 $REDIRECTS redirects\` | \`👻 $EXCLUDED excluded\`" >> lychee/formatted.md + echo "" >> lychee/formatted.md + + if [ "$ERRORS" -eq 0 ]; then + echo "### ✅ All links are working!" >> lychee/formatted.md else - echo "### ✅ No failed external links found." - fi - - if [ -s "$OUTPUT_FILE" ]; then - echo "" - echo "
" - echo "stderr output" - echo "" - echo '```text' - cat "$OUTPUT_FILE" - echo '```' - echo "
" + echo "### ❌ Errors" >> lychee/formatted.md + echo -e "$FORMATTED_ERRORS" >> lychee/formatted.md fi - } > "$REPORT_FILE" - - cat "$REPORT_FILE" >> "$GITHUB_STEP_SUMMARY" + echo "---" >> lychee/formatted.md + echo "" >> lychee/formatted.md + echo "
" >> lychee/formatted.md + echo "Full Statistics Table" >> lychee/formatted.md + echo "" >> lychee/formatted.md + echo "| Status | Count |" >> lychee/formatted.md + echo "|--------|-------|" >> lychee/formatted.md + echo "| ✅ Successful | $SUCCESS |" >> lychee/formatted.md + echo "| 🔀 Redirected | $REDIRECTS |" >> lychee/formatted.md + echo "| 👻 Excluded | $EXCLUDED |" >> lychee/formatted.md + echo "| 🚫 Errors | $ERRORS |" >> lychee/formatted.md + echo "| ⛔ Unsupported | $UNSUPPORTED |" >> lychee/formatted.md + echo "| ⏳ Timeouts | $TIMEOUTS |" >> lychee/formatted.md + echo "| ❓ Unknown | $UNKNOWN |" >> lychee/formatted.md + echo "" >> lychee/formatted.md + echo "
" >> lychee/formatted.md + fi - - name: Comment external links report - if: ${{ always() && github.event.pull_request.head.repo.fork == false && hashFiles('external-links/formatted.md') != '' }} + - name: 📝 Comment Broken Links + if: ${{ always() && github.event.pull_request.head.repo.fork == false }} uses: marocchino/sticky-pull-request-comment@v2 with: - header: external-links - path: external-links/formatted.md + header: lychee + path: lychee/formatted.md - - name: Fail if broken links found - if: ${{ steps.external-links.outputs.exit_code != '0' }} + - name: 🚫 Fail if broken links found + if: ${{ steps.lychee-retry.conclusion == 'success' && steps.lychee-retry.outputs.exit_code != 0 || steps.lychee-retry.conclusion == 'failure' }} run: | - echo "External link checker failed." - exit 1 + if [ "${{ steps.lychee-retry.conclusion }}" == "success" ]; then + echo "Failing based on retry run results" + exit ${{ steps.lychee-retry.outputs.exit_code }} + else + echo "Failing based on first run results" + exit ${{ steps.lychee.outputs.exit_code }} + fi