diff --git a/.github/workflows/lychee.yml b/.github/workflows/external-links.yml similarity index 100% rename from .github/workflows/lychee.yml rename to .github/workflows/external-links.yml diff --git a/apps/docs/scripts/lint-external-links.ts b/apps/docs/scripts/lint-external-links.ts index cafd50510e..10b3af8735 100644 --- a/apps/docs/scripts/lint-external-links.ts +++ b/apps/docs/scripts/lint-external-links.ts @@ -8,8 +8,9 @@ const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); const CONTENT_DIR = path.join(__dirname, "../content"); -const TIMEOUT_MS = 10_000; -const MAX_CONCURRENCY = 15; +const DEFAULT_TIMEOUT_MS = 20_000; +const DEFAULT_FILE_CONCURRENCY = 15; +const DEFAULT_URL_CHECK_CONCURRENCY = 40; const ACCEPTED_STATUSES = new Set([403, 429]); const IMAGE_EXTENSIONS = new Set([ @@ -48,6 +49,34 @@ type FailedResult = { occurrences: LinkOccurrence[]; }; +type LinkCheckReport = { + scannedFiles: number; + extractedExternalLinks: number; + uniqueLinksChecked: number; + failedLinks: number; + failures: FailedResult[]; +}; + +function readPositiveIntEnv(name: string, fallback: number): number { + const raw = process.env[name]; + if (!raw) return fallback; + + const parsed = Number.parseInt(raw, 10); + if (!Number.isFinite(parsed) || parsed <= 0) { + console.warn( + `Invalid ${name} value "${raw}". Falling back to ${fallback}.`, + ); + return fallback; + } + + return parsed; +} + +const TIMEOUT_MS = readPositiveIntEnv( + "EXTERNAL_LINKS_TIMEOUT_MS", + DEFAULT_TIMEOUT_MS, +); + function findMarkdownFiles(dir: string, fileList: string[] = []): string[] { const files = fs.readdirSync(dir); @@ -110,7 +139,8 @@ function toExternalHttpUrl(raw: string): string | null { try { const parsed = new URL(trimmed); - if (parsed.protocol !== "http:" && parsed.protocol !== "https:") return null; + if (parsed.protocol !== "http:" && parsed.protocol !== "https:") + return null; if (parsed.hostname === "localhost") return null; return parsed.toString(); } catch { @@ -118,8 +148,10 @@ function toExternalHttpUrl(raw: string): string | null { } } -function collectFileLinks(filePath: string): Array<{ url: string; line: number }> { - const rawContent = fs.readFileSync(filePath, "utf8"); +async function collectFileLinks( + filePath: string, +): Promise> { + const rawContent = await fs.promises.readFile(filePath, "utf8"); const content = stripCodeBlocks(rawContent); const links: Array<{ url: string; line: number }> = []; @@ -183,7 +215,9 @@ async function fetchWithTimeout( } } -async function checkUrl(url: string): Promise<{ ok: boolean; reason?: string }> { +async function checkUrl( + url: string, +): Promise<{ ok: boolean; reason?: string }> { try { // Some providers (e.g. VS Marketplace, Bluesky) return 404 for HEAD // while the same URL works with GET in a browser. @@ -193,7 +227,10 @@ async function checkUrl(url: string): Promise<{ ok: boolean; reason?: string }> }); if (response.status >= 400 || response.status === 429) { - response = await fetchWithTimeout(url, { method: "GET", headers: BROWSER_HEADERS }); + response = await fetchWithTimeout(url, { + method: "GET", + headers: BROWSER_HEADERS, + }); } if (response.status === 404) { @@ -226,43 +263,80 @@ async function runWithConcurrency( ): Promise { let index = 0; - const workers = Array.from({ length: Math.min(limit, items.length) }, async () => { - while (index < items.length) { - const item = items[index]; - index += 1; - await worker(item); - } - }); + const workers = Array.from( + { length: Math.min(limit, items.length) }, + async () => { + while (index < items.length) { + const item = items[index]; + index += 1; + await worker(item); + } + }, + ); await Promise.all(workers); } +function mergeOccurrences( + target: Map, + source: Map, +): void { + for (const [url, occurrences] of source) { + const existing = target.get(url); + if (existing) { + existing.push(...occurrences); + continue; + } + target.set(url, occurrences); + } +} + async function main(): Promise { + const jsonOutput = process.argv.includes("--json"); + const fileConcurrency = readPositiveIntEnv( + "EXTERNAL_LINKS_FILE_CONCURRENCY", + DEFAULT_FILE_CONCURRENCY, + ); + const urlCheckConcurrency = readPositiveIntEnv( + "EXTERNAL_LINKS_URL_CONCURRENCY", + DEFAULT_URL_CHECK_CONCURRENCY, + ); + if (!fs.existsSync(CONTENT_DIR)) { console.error(`Content directory not found: ${CONTENT_DIR}`); process.exit(1); } const files = findMarkdownFiles(CONTENT_DIR); + const occurrencesByUrl = new Map(); let extractedLinks = 0; - for (const filePath of files) { + // Parse markdown files concurrently before link validation starts. + const perFileOccurrences: Array> = []; + await runWithConcurrency(files, fileConcurrency, async (filePath) => { const relativeFile = path.relative(process.cwd(), filePath); - const links = collectFileLinks(filePath); - extractedLinks += links.length; + const links = await collectFileLinks(filePath); + const localOccurrences = new Map(); for (const link of links) { - const occurrences = occurrencesByUrl.get(link.url) ?? []; + const occurrences = localOccurrences.get(link.url) ?? []; occurrences.push({ file: relativeFile, line: link.line }); - occurrencesByUrl.set(link.url, occurrences); + localOccurrences.set(link.url, occurrences); } + + perFileOccurrences.push(localOccurrences); + extractedLinks += links.length; + }); + + for (const fileOccurrences of perFileOccurrences) { + mergeOccurrences(occurrencesByUrl, fileOccurrences); } const uniqueUrls = [...occurrencesByUrl.keys()]; const failed: FailedResult[] = []; - await runWithConcurrency(uniqueUrls, MAX_CONCURRENCY, async (url) => { + await runWithConcurrency(uniqueUrls, urlCheckConcurrency, async (url) => { const result = await checkUrl(url); if (result.ok) return; @@ -275,6 +349,19 @@ async function main(): Promise { failed.sort((a, b) => a.url.localeCompare(b.url)); + const report: LinkCheckReport = { + scannedFiles: files.length, + extractedExternalLinks: extractedLinks, + uniqueLinksChecked: uniqueUrls.length, + failedLinks: failed.length, + failures: failed, + }; + + if (jsonOutput) { + console.log(JSON.stringify(report, null, 2)); + process.exit(failed.length === 0 ? 0 : 1); + } + console.log(`Scanned files: ${files.length}`); console.log(`Extracted external links: ${extractedLinks}`); console.log(`Unique links checked: ${uniqueUrls.length}`); diff --git a/package.json b/package.json index daab08195c..07e8b13b0c 100644 --- a/package.json +++ b/package.json @@ -7,7 +7,7 @@ "check": "turbo run check", "types:check": "turbo run types:check", "lint:links": "turbo run lint:links --filter=docs", - "lint:external-links": "turbo run lint:external-links --filter=docs", + "lint:external-links": "turbo run lint:external-links --filter=docs --", "lint:code": "turbo run lint:code --filter=docs", "lint:spellcheck": "turbo run lint:spellcheck --filter=docs" }, diff --git a/turbo.json b/turbo.json index d67f90b84c..da9e4f66d8 100644 --- a/turbo.json +++ b/turbo.json @@ -33,4 +33,4 @@ "lint:code": {}, "lint:spellcheck": {} } -} \ No newline at end of file +}