diff --git a/packages/docs-builder/src/gen-html.spec.ts b/packages/docs-builder/src/gen-html.spec.ts index 7ce6f66..208ce2a 100644 --- a/packages/docs-builder/src/gen-html.spec.ts +++ b/packages/docs-builder/src/gen-html.spec.ts @@ -2,7 +2,108 @@ import { describe, expect, it } from 'vitest' -import { convertMarkdownToHtml, subscriptify } from './gen-html' +import type { Config } from './config' +import { Context } from './context' +import { convertMarkdownToHtml, generateHtml, subscriptify } from './gen-html' +import { parseMarkdownPageContent } from './parse' + +const config: Config = { + mode: 'development', + baseProjDir: 'xxx', + sourceDir: 'xxx', + outDir: 'xxx', + version: '25.1.0', + langs: [{ code: 'de', version: '25.1.0' }], + formats: [], + template: 'default', + author: 'Climate Interactive', + logoPath: 'xxx', + defs: [], + pages: ['page_1.md'], + untranslated: [], + options: {} +} + +describe('generateHtml', () => { + it('should convert valid Markdown', () => { + const md = `\ +This is a valid normal link: [page](https://climateinteractive.org) + +This is a valid reference-style link: [page][ref] + +This is a valid normal link: [page](https://climateinteractive.org) (with parentheses after) and more text + +This is a valid reference-style link: [page][ref] (with parentheses after) and more text + +[ref]: https://climateinteractive.org +` + + const html = generateHtml(new Context(config, 'en'), 'page_1.md', { raw: md }) + expect(html.baseName).toBe('page_1') + expect(html.relPath).toBe('page_1.html') + expect(html.body).toBe(`\ +

This is a valid normal link: page

+

This is a valid reference-style link: page

+

This is a valid normal link: page (with parentheses after) and more text

+

This is a valid reference-style link: page (with parentheses after) and more text

+`) + }) + + it('should throw an error if invalid link syntax is detected', () => { + const links = `\ +This is a valid normal link: [page](https://climateinteractive.org) + +This is a valid reference-style link: [page][ref] + +This is a valid normal link: [page](https://climateinteractive.org) (with parentheses after) and more text + +This is a valid reference-style link: [page][ref] (with parentheses after) and more text + +This is an invalid normal link: [page] (https://climateinteractive.org) (with parentheses after) and more text + +This is an invalid reference-style link: [page] [ref] (with parentheses after) and more text +` + + const md = `\ +# Section 1 + + + +${links} + + + +[ref]: https://climateinteractive.org +` + + // Verify that an error is thrown if the English content contains invalid link syntax. + // Note that in the English case, the invalid ref link will be converted to an HTML link. + const enContext = new Context(config, 'en') + const enMd = parseMarkdownPageContent(enContext, 'page_1.md', md) + expect(() => generateHtml(enContext, 'page_1.md', { raw: enMd.raw })).toThrow(`\ +Detected invalid Markdown link syntax in the generated HTML: +[page] (<a href +[page] <a href +To fix, ensure there are no spaces between link text and link url/reference, for example: [text](url) or [text][ref] (page=page_1.md)`) + + // Verify that an error is thrown if the translated content contains invalid link syntax. + // Note that in the non-English case, the invalid ref link target will not be converted + // to an HTML link (unlike the English case above), so the error message will be different. + const deContext = enContext.derive( + 'de', + new Map([ + ['section_1__title', 'Section 1'], + ['section_1__block_1', links] + ]) + ) + const deMd = parseMarkdownPageContent(deContext, 'page_1.md', md) + expect(() => generateHtml(deContext, 'page_1.md', { raw: deMd.raw })).toThrow(`\ +Detected invalid Markdown link syntax in the generated HTML: +[page] (<a href +[page] [ref] +To fix, ensure there are no spaces between link text and link url/reference, for example: [text](url) or [text][ref] (lang=de page=page_1.md)`) + }) +}) describe('subscriptify', () => { it('should convert chemical formulas', () => { @@ -29,7 +130,7 @@ describe('convertMarkdownToHtml', () => { '

This is -CO2-

\n' ) expect(convertMarkdownToHtml(undefined, '# This is CO2')).toBe( - '

This is CO2

\n' + '

This is CO2

\n' ) expect(convertMarkdownToHtml(undefined, '> This is _CO2_')).toBe( '
\n

This is CO2

\n
\n' diff --git a/packages/docs-builder/src/gen-html.ts b/packages/docs-builder/src/gen-html.ts index 6406f54..d80d85d 100644 --- a/packages/docs-builder/src/gen-html.ts +++ b/packages/docs-builder/src/gen-html.ts @@ -144,6 +144,9 @@ export function generateHtml(context: Context, mdRelPath: string, mdPage: Markdo // Convert the Markdown content to HTML const body = convertMarkdownToHtml(context, md) + // Check for evidence of invalid Markdown link syntax that remains in the generated HTML + checkForInvalidLinkSyntax(context, body) + // Save the names of the `` fragments to include const headFragments = mdPage.frontmatter?.fragments?.head || [] @@ -592,7 +595,9 @@ export function convertMarkdownToHtml(context: Context, md: string): string { }) // Parse the Markdown into HTML - return marked.parse(md) + return marked.parse(md, { + headerIds: false + }) } /** @@ -617,3 +622,30 @@ export function subscriptify(s: string): string { return subscriptMap.get(m1) }) } + +// This will match cases where a space in the Markdown link syntax caused the link parts +// to be converted to separate elements in the HTML output, for example: +// Markdown: [text] (https://example.com) +// HTML: [text] (https://example.com) +// Markdown: [text] [ref] +// HTML (en): [text] ref +// HTML (xx): [text] [ref] +// Note that the generated HTML in the second example is different for the English and +// non-English cases (due to different parsing code paths), so we need to detect both. +const invalidLinkRegExp = /\[([^\]]+)\]\s+(\(?