From 7f4278a69d7052517d9e2dee22e9c88b2bef1caf Mon Sep 17 00:00:00 2001 From: Lucas Date: Sat, 15 Feb 2025 19:46:52 +0100 Subject: [PATCH 01/22] feat: implement emoji and variation generation commands with error handling --- src/adapter/base.ts | 6 ++- src/cli.ts | 121 ++++++++++++++++++++++++++++++++++++++++++++ src/utils/errors.ts | 10 ++++ 3 files changed, 136 insertions(+), 1 deletion(-) create mode 100644 src/utils/errors.ts diff --git a/src/adapter/base.ts b/src/adapter/base.ts index fd0e24b..50dc038 100644 --- a/src/adapter/base.ts +++ b/src/adapter/base.ts @@ -1,11 +1,13 @@ import type { EmojiGroup } from "../types"; +import { red } from "farver/fast"; import { defineMojiAdapter } from "../adapter"; import { slugify } from "../utils"; import { fetchCache } from "../utils/cache"; +import { MojisNotImplemented } from "../utils/errors"; function notImplemented(adapterFn: string) { return async () => { - throw new Error(`the adapter function ${adapterFn} is not implemented`); + throw new MojisNotImplemented(`the adapter function ${red(adapterFn)} is not implemented`); }; } @@ -63,4 +65,6 @@ export default defineMojiAdapter({ return groups; }, sequences: notImplemented("sequences"), + emojis: notImplemented("emojis"), + variations: notImplemented("variations"), }); diff --git a/src/cli.ts b/src/cli.ts index 8991dfe..75cf8f1 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -7,6 +7,7 @@ import pkg from "../package.json" with { type: "json" }; import { resolveAdapter } from "./adapters"; import { SUPPORTED_EMOJI_VERSIONS } from "./constants"; import { getAllEmojiVersions } from "./utils"; +import { isNotImplementedError } from "./utils/errors"; import { readLockfile, writeLockfile } from "./utils/lockfile"; const cli = yargs(process.argv.slice(2)) @@ -19,6 +20,126 @@ const cli = yargs(process.argv.slice(2)) .alias("v", "version") .demandCommand(1, ""); +cli.command( + "generate:emojis ", + "Generate emoji data for the specified versions", + (args) => commonOptions(args) + .positional("versions", { + type: "string", + description: "emoji versions to generate", + }) + .strict().help(), + async (args) => { + const force = args.force ?? false; + const versions = Array.isArray(args.versions) ? args.versions : [args.versions]; + + if (SUPPORTED_EMOJI_VERSIONS.every((v) => !versions.includes(v))) { + console.error(red("error:"), "unsupported emoji versions"); + console.log("supported versions:", SUPPORTED_EMOJI_VERSIONS.join(", ")); + process.exit(1); + } + + console.log("generating emoji data for versions", versions.map((v) => yellow(v)).join(", ")); + + const promises = versions.map(async (version) => { + const coerced = semver.coerce(version); + + if (coerced == null) { + throw new Error(`invalid version ${version}`); + } + + const adapter = resolveAdapter(coerced.version); + + if (adapter == null) { + throw new Error(`no adapter found for version ${version}`); + } + + const emojis = await adapter.emojis!({ version, force }); + + await fs.ensureDir(`./data/v${version}`); + return fs.writeFile( + `./data/v${version}/emojis.json`, + JSON.stringify(emojis, null, 2), + "utf-8", + ); + }); + + const results = await Promise.allSettled(promises); + + for (const result of results) { + if (result.status === "rejected") { + if (isNotImplementedError(result.reason)) { + console.warn(yellow("warning:"), result.reason.message); + continue; + } + console.error(red("error:"), result.reason); + } + } + + console.log(green("done")); + }, +); + +cli.command( + "generate:variations ", + "Generate emoji variations for the specified versions", + (args) => commonOptions(args) + .positional("versions", { + type: "string", + description: "emoji versions to generate", + }) + .strict().help(), + async (args) => { + const force = args.force ?? false; + const versions = Array.isArray(args.versions) ? args.versions : [args.versions]; + + if (SUPPORTED_EMOJI_VERSIONS.every((v) => !versions.includes(v))) { + console.error(red("error:"), "unsupported emoji versions"); + console.log("supported versions:", SUPPORTED_EMOJI_VERSIONS.join(", ")); + process.exit(1); + } + + console.log("generating emoji variations for versions", versions.map((v) => yellow(v)).join(", ")); + + const promises = versions.map(async (version) => { + const coerced = semver.coerce(version); + + if (coerced == null) { + throw new Error(`invalid version ${version}`); + } + + const adapter = resolveAdapter(coerced.version); + + if (adapter == null) { + throw new Error(`no adapter found for version ${version}`); + } + + const variations = await adapter.variations!({ version, force }); + + await fs.ensureDir(`./data/v${version}`); + return fs.writeFile( + `./data/v${version}/variations.json`, + JSON.stringify(variations, null, 2), + "utf-8", + ); + }); + + const results = await Promise.allSettled(promises); + + for (const result of results) { + if (result.status === "rejected") { + if (isNotImplementedError(result.reason)) { + console.warn(yellow("warning:"), result.reason.message); + continue; + } + console.error(red("error:"), result.reason); + } + } + + console.log(green("done")); + }, +); + cli.command( "generate:sequences ", "Generate emoji sequences for the specified versions", diff --git a/src/utils/errors.ts b/src/utils/errors.ts new file mode 100644 index 0000000..4cc1dcb --- /dev/null +++ b/src/utils/errors.ts @@ -0,0 +1,10 @@ +export class MojisNotImplemented extends Error { + constructor(message: string) { + super(message); + this.name = "MojisNotImplemented"; + } +} + +export function isNotImplementedError(err: Error): err is MojisNotImplemented { + return err instanceof MojisNotImplemented; +} From 4d9672fbb7f84ceccae4a26ab2313e185df3260e Mon Sep 17 00:00:00 2001 From: Lucas Date: Sun, 16 Feb 2025 05:47:38 +0100 Subject: [PATCH 02/22] feat: enhance emoji data handling with version extraction and improved parsing logic --- src/adapter/v16.ts | 69 ++++++++++++++++++++++++++++++++++++++++++++-- src/types.ts | 9 ++++++ src/utils.ts | 47 +++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 3 deletions(-) diff --git a/src/adapter/v16.ts b/src/adapter/v16.ts index 89b2430..4a98adb 100644 --- a/src/adapter/v16.ts +++ b/src/adapter/v16.ts @@ -1,6 +1,7 @@ -import type { EmojiSequence, EmojiVariation } from "../types"; +import type { EmojiData, EmojiSequence, EmojiVariation, Property } from "../types"; import { defineMojiAdapter } from "../adapter"; import { FEMALE_SIGN, MALE_SIGN } from "../constants"; +import { extractEmojiVersion, extractUnicodeVersion } from "../utils"; import { fetchCache } from "../utils/cache"; import { expandHexRange } from "../utils/hexcode"; @@ -28,7 +29,7 @@ export default defineMojiAdapter({ const sequences: EmojiSequence[] = []; for (let line of lines) { - // skip empty line & comments + // skip empty line & comments if (line.trim() === "" || line.startsWith("#")) { continue; } @@ -68,7 +69,69 @@ export default defineMojiAdapter({ zwj: zwj || [], }; }, - async emojis({ version, force }) { + async emojis(ctx) { + const emojiData = await fetchCache(`https://unicode.org/Public/${ctx.version}.0/ucd/emoji/emoji-data.txt`, { + cacheKey: `v${ctx.version}/emoji-data.json`, + parser(data) { + const lines = data.split("\n"); + + const emojiData: Record = {}; + + for (const line of lines) { + // skip empty line & comments + if (line.trim() === "" || line.startsWith("#")) { + continue; + } + + const lineCommentIndex = line.indexOf("#"); + const lineComment = lineCommentIndex !== -1 ? line.slice(lineCommentIndex + 1).trim() : ""; + + let [hex, property] = line.split(";").map((col) => col.trim()).slice(0, 4); + + if (hex == null || property == null) { + throw new Error(`invalid line: ${line}`); + } + + // remove line comment from property + const propertyCommentIndex = property.indexOf("#"); + if (propertyCommentIndex !== -1) { + property = property.slice(0, propertyCommentIndex).trim(); + } + + if (property === "Extended_Pictographic") { + continue; + } + + const expandedHex = expandHexRange(hex); + const emojiVersion = extractEmojiVersion(lineComment) ?? Number.parseFloat(ctx.version); + + const emoji: EmojiData = { + description: lineComment, + hexcode: "", + gender: null, + properties: [(property as Property) || "Emoji"], + unicodeVersion: extractUnicodeVersion(emojiVersion, 16.0), + version: emojiVersion, + }; + + for (const hex of expandedHex) { + if (emojiData[hex] != null) { + emojiData[hex].properties = [...new Set([...emojiData[hex].properties, ...emoji.properties])]; + } else { + emojiData[hex] = { + ...emoji, + hexcode: hex.replace(/\s+/g, "-"), + }; + } + } + } + + return emojiData; + }, + bypassCache: ctx.force, + }); + + return emojiData; }, variations: async (ctx) => { return fetchCache(`https://unicode.org/Public/${ctx.version}.0/ucd/emoji/emoji-variation-sequences.txt`, { diff --git a/src/types.ts b/src/types.ts index 411cbb4..649f058 100644 --- a/src/types.ts +++ b/src/types.ts @@ -17,6 +17,15 @@ export interface EmojiComponent { } +export interface EmojiData { + description: string; + gender: string | null; + hexcode: string; + properties: Property[]; + unicodeVersion: number | null; + version: number; +} + export interface EmojiShortcode { /** * The shortcode for the emoji. diff --git a/src/utils.ts b/src/utils.ts index 33da8ca..6551dcb 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -266,3 +266,50 @@ export function extractVersion(text: string): string | null { return null; } + +/** + * Extracts the emoji version from a comment string. + * The version should be in the format "E{major}.{minor}" (e.g. "E14.0"). + * + * @param {string} comment - The comment string to extract the version from + * @returns {number | null} The parsed version number, or null if no valid version was found + * + * @example + * ```ts + * extractEmojiVersion("E14.0") // returns 14.0 + * extractEmojiVersion("Something else") // returns null + * ``` + */ +export function extractEmojiVersion(comment: string): number | null { + const version = comment.match(/E(\d+\.\d)/); + + if (version != null && version[1] != null) { + return Number.parseFloat(version[1].trim()); + } + + return null; +} + +// https://unicode.org/reports/tr51/#EmojiVersions +export function extractUnicodeVersion(emojiVersion: number, unicodeVersion?: number): number { + // v11+ aligned emoji and unicode specs (except for minor versions) + if (emojiVersion >= 11) { + return unicodeVersion ? Math.min(emojiVersion, unicodeVersion) : emojiVersion; + } + + switch (emojiVersion) { + case 0.7: + return 7; + case 1: + case 2: + return 8; + case 3: + case 4: + return 9; + case 5: + return 10; + default: + // v6 is the first unicode spec emojis appeared in + return 6; + } +} From c0717dd691333e56253f6c599cfc26db18a30469 Mon Sep 17 00:00:00 2001 From: Lucas Date: Sun, 16 Feb 2025 05:49:48 +0100 Subject: [PATCH 03/22] feat: add shortcode generator --- src/adapter/base.ts | 1 + src/adapter/index.ts | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/adapter/base.ts b/src/adapter/base.ts index 50dc038..6fa3888 100644 --- a/src/adapter/base.ts +++ b/src/adapter/base.ts @@ -67,4 +67,5 @@ export default defineMojiAdapter({ sequences: notImplemented("sequences"), emojis: notImplemented("emojis"), variations: notImplemented("variations"), + shortcodes: notImplemented("shortcodes"), }); diff --git a/src/adapter/index.ts b/src/adapter/index.ts index c4178e0..9c231d3 100644 --- a/src/adapter/index.ts +++ b/src/adapter/index.ts @@ -1,4 +1,4 @@ -import type { EmojiGroup, EmojiSequence, EmojiVariation } from "../types"; +import type { EmojiGroup, EmojiSequence, EmojiShortcode, EmojiVariation } from "../types"; import semver from "semver"; export interface MojiAdapter { @@ -41,6 +41,8 @@ export interface MojiAdapter { * A function to generate emoji variations for the specified version. */ variations?: EmojiVariationFn; + + shortcodes?: ShortcodeFn; } export interface BaseAdapterContext { @@ -52,6 +54,9 @@ export type GroupFn = (ctx: BaseAdapterContext) => Promise; export type SequenceFn = (ctx: BaseAdapterContext) => Promise<{ zwj: EmojiSequence[]; sequences: EmojiSequence[] }>; export type EmojiFn = (ctx: BaseAdapterContext) => Promise; export type EmojiVariationFn = (ctx: BaseAdapterContext) => Promise; +export type ShortcodeFn = (ctx: BaseAdapterContext & { + providers: string[]; +}) => Promise; export const ADAPTERS = new Map(); From ed009ebdc740d296fb8e08ebe080481eee4bde8f Mon Sep 17 00:00:00 2001 From: Lucas Date: Sun, 16 Feb 2025 05:56:55 +0100 Subject: [PATCH 04/22] feat: rename groups to metadata in MojiAdapter and update related commands --- src/adapter/base.ts | 14 ++++++++------ src/adapter/index.ts | 10 +++++----- src/cli.ts | 25 ++++++++++++++++++++----- 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/src/adapter/base.ts b/src/adapter/base.ts index 6fa3888..0afbf98 100644 --- a/src/adapter/base.ts +++ b/src/adapter/base.ts @@ -15,13 +15,15 @@ export default defineMojiAdapter({ name: "base", description: "base adapter", range: "*", - groups: async ({ version, force }) => { + metadata: async ({ version, force }) => { if (version === "1.0" || version === "2.0" || version === "3.0") { console.warn(`version ${version} does not have group data`); - return []; + return { + groups: [], + }; } - const groups = await fetchCache(`https://unicode.org/Public/emoji/${version}/emoji-test.txt`, { + return fetchCache(`https://unicode.org/Public/emoji/${version}/emoji-test.txt`, { cacheKey: `v${version}/metadata.json`, parser(data) { const lines = data.split("\n"); @@ -57,12 +59,12 @@ export default defineMojiAdapter({ } } - return groups; + return { + groups, + }; }, bypassCache: force, }); - - return groups; }, sequences: notImplemented("sequences"), emojis: notImplemented("emojis"), diff --git a/src/adapter/index.ts b/src/adapter/index.ts index 9c231d3..04890bd 100644 --- a/src/adapter/index.ts +++ b/src/adapter/index.ts @@ -22,11 +22,6 @@ export interface MojiAdapter { */ extend?: string; - /** - * A function to generate the emoji groups for the specified version. - */ - groups?: GroupFn; - /** * A function to generate the emoji sequences for the specified version */ @@ -43,6 +38,8 @@ export interface MojiAdapter { variations?: EmojiVariationFn; shortcodes?: ShortcodeFn; + + metadata?: MetadataFn; } export interface BaseAdapterContext { @@ -57,6 +54,9 @@ export type EmojiVariationFn = (ctx: BaseAdapterContext) => Promise Promise; +export type MetadataFn = (ctx: BaseAdapterContext) => Promise<{ + groups: EmojiGroup[]; +}>; export const ADAPTERS = new Map(); diff --git a/src/cli.ts b/src/cli.ts index 75cf8f1..995a5c4 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -193,6 +193,10 @@ cli.command( for (const result of results) { if (result.status === "rejected") { + if (isNotImplementedError(result.reason)) { + console.warn(yellow("warning:"), result.reason.message); + continue; + } console.error(red("error:"), result.reason); } } @@ -202,8 +206,8 @@ cli.command( ); cli.command( - "generate:groups ", - "Generate emoji group data for the specified versions", + "generate:metadata ", + "Generate emoji metadata for the specified versions", (args) => commonOptions(args) .positional("versions", { type: "string", @@ -220,7 +224,7 @@ cli.command( process.exit(1); } - console.log("generating emoji group data for versions", versions.map((v) => yellow(v)).join(", ")); + console.log("generating emoji metadata for versions", versions.map((v) => yellow(v)).join(", ")); const promises = versions.map(async (version) => { const coerced = semver.coerce(version); @@ -235,20 +239,31 @@ cli.command( throw new Error(`no adapter found for version ${version}`); } - const groups = await adapter.groups!({ version, force }); + const { groups } = await adapter.metadata!({ version, force }); await fs.ensureDir(`./data/v${version}`); - return fs.writeFile( + + await fs.writeFile( `./data/v${version}/groups.json`, JSON.stringify(groups, null, 2), "utf-8", ); + + return fs.writeFile( + `./data/v${version}/metadata.json`, + JSON.stringify({ groups }, null, 2), + "utf-8", + ); }); const results = await Promise.allSettled(promises); for (const result of results) { if (result.status === "rejected") { + if (isNotImplementedError(result.reason)) { + console.warn(yellow("warning:"), result.reason.message); + continue; + } console.error(red("error:"), result.reason); } } From eea2c377b376857253e41188dd882e9f3bf808dc Mon Sep 17 00:00:00 2001 From: Lucas Date: Sun, 16 Feb 2025 07:01:35 +0100 Subject: [PATCH 05/22] feat: enhance emoji metadata handling with version extraction and improved structure --- src/adapter/base.ts | 68 ++++++++++++++++++++++++++++++++++++++------ src/adapter/index.ts | 4 +-- src/adapter/v16.ts | 7 +++-- src/cli.ts | 14 ++++----- src/types.ts | 15 ++++++++-- src/utils.ts | 53 ++++++++++++++++++++++------------ test/utils.test.ts | 22 +++++++++++++- 7 files changed, 141 insertions(+), 42 deletions(-) diff --git a/src/adapter/base.ts b/src/adapter/base.ts index 0afbf98..555bdae 100644 --- a/src/adapter/base.ts +++ b/src/adapter/base.ts @@ -1,7 +1,7 @@ -import type { EmojiGroup } from "../types"; +import type { EmojiGroup, EmojiMetadata } from "../types"; import { red } from "farver/fast"; import { defineMojiAdapter } from "../adapter"; -import { slugify } from "../utils"; +import { extractEmojiVersion, extractUnicodeVersion, slugify } from "../utils"; import { fetchCache } from "../utils/cache"; import { MojisNotImplemented } from "../utils/errors"; @@ -15,22 +15,26 @@ export default defineMojiAdapter({ name: "base", description: "base adapter", range: "*", - metadata: async ({ version, force }) => { - if (version === "1.0" || version === "2.0" || version === "3.0") { - console.warn(`version ${version} does not have group data`); + metadata: async (ctx) => { + if (ctx.version === "1.0" || ctx.version === "2.0" || ctx.version === "3.0") { + console.warn(`version ${ctx.version} does not have group data`); return { groups: [], + emojiMetadata: {}, }; } - return fetchCache(`https://unicode.org/Public/emoji/${version}/emoji-test.txt`, { - cacheKey: `v${version}/metadata.json`, + return fetchCache(`https://unicode.org/Public/emoji/${ctx.version}/emoji-test.txt`, { + cacheKey: `v${ctx.version}/metadata.json`, parser(data) { const lines = data.split("\n"); let currentGroup: EmojiGroup | undefined; const groups: EmojiGroup[] = []; + // [group-subgroup][hexcode] = metadata + const emojiMetadata: Record> = {}; + for (const line of lines) { if (line.trim() === "") { continue; @@ -48,6 +52,8 @@ export default defineMojiAdapter({ currentGroup = group; groups.push(group); + + continue; } else if (line.startsWith("# subgroup:")) { const subgroupName = line.slice(11).trim(); @@ -55,15 +61,59 @@ export default defineMojiAdapter({ throw new Error(`subgroup ${subgroupName} without group`); } - currentGroup.subgroups.push(subgroupName); + currentGroup.subgroups.push(slugify(subgroupName)); + + continue; + } else if (line.startsWith("#")) { + continue; + } + + const [baseHexcode, trailingLine] = line.split(";"); + + if (baseHexcode == null || trailingLine == null) { + throw new Error(`invalid line: ${line}`); } + + const [baseQualifier, comment] = trailingLine.split("#"); + + if (baseQualifier == null || comment == null) { + throw new Error(`invalid line: ${line}`); + } + + const hexcode = baseHexcode.trim().replace(/\s+/g, "-"); + const qualifier = baseQualifier.trim(); + + const emojiVersion = extractEmojiVersion(comment.trim()); + const [emoji, trimmedComment] = comment.trim().split(` E${emojiVersion} `); + + const groupName = currentGroup?.slug ?? "unknown"; + const subgroupName = currentGroup?.subgroups[currentGroup.subgroups.length - 1] ?? "unknown"; + + const metadataGroup = `${groupName}-${subgroupName}`; + + if (emojiMetadata[metadataGroup] == null) { + emojiMetadata[metadataGroup] = {}; + } + + emojiMetadata[metadataGroup][hexcode] = { + group: groupName, + subgroup: subgroupName, + qualifier, + emojiVersion: emojiVersion || null, + // TODO: use correct unicode version + unicodeVersion: extractUnicodeVersion(emojiVersion, "16.0"), + description: trimmedComment || "", + emoji: emoji || null, + hexcodes: hexcode.split("-"), + }; } return { groups, + emojiMetadata, }; }, - bypassCache: force, + bypassCache: ctx.force, }); }, sequences: notImplemented("sequences"), diff --git a/src/adapter/index.ts b/src/adapter/index.ts index 04890bd..3da6917 100644 --- a/src/adapter/index.ts +++ b/src/adapter/index.ts @@ -1,4 +1,4 @@ -import type { EmojiGroup, EmojiSequence, EmojiShortcode, EmojiVariation } from "../types"; +import type { EmojiGroup, EmojiMetadata, EmojiSequence, EmojiShortcode, EmojiVariation } from "../types"; import semver from "semver"; export interface MojiAdapter { @@ -47,7 +47,6 @@ export interface BaseAdapterContext { force: boolean; } -export type GroupFn = (ctx: BaseAdapterContext) => Promise; export type SequenceFn = (ctx: BaseAdapterContext) => Promise<{ zwj: EmojiSequence[]; sequences: EmojiSequence[] }>; export type EmojiFn = (ctx: BaseAdapterContext) => Promise; export type EmojiVariationFn = (ctx: BaseAdapterContext) => Promise; @@ -56,6 +55,7 @@ export type ShortcodeFn = (ctx: BaseAdapterContext & { }) => Promise; export type MetadataFn = (ctx: BaseAdapterContext) => Promise<{ groups: EmojiGroup[]; + emojiMetadata: Record>; }>; export const ADAPTERS = new Map(); diff --git a/src/adapter/v16.ts b/src/adapter/v16.ts index 4a98adb..b47ab00 100644 --- a/src/adapter/v16.ts +++ b/src/adapter/v16.ts @@ -103,15 +103,16 @@ export default defineMojiAdapter({ } const expandedHex = expandHexRange(hex); - const emojiVersion = extractEmojiVersion(lineComment) ?? Number.parseFloat(ctx.version); + const emojiVersion = extractEmojiVersion(lineComment); const emoji: EmojiData = { description: lineComment, hexcode: "", gender: null, properties: [(property as Property) || "Emoji"], - unicodeVersion: extractUnicodeVersion(emojiVersion, 16.0), - version: emojiVersion, + // TODO: use correct unicode version + unicodeVersion: extractUnicodeVersion(emojiVersion, "16.0"), + emojiVersion, }; for (const hex of expandedHex) { diff --git a/src/cli.ts b/src/cli.ts index 995a5c4..1d82c8a 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -239,9 +239,9 @@ cli.command( throw new Error(`no adapter found for version ${version}`); } - const { groups } = await adapter.metadata!({ version, force }); + const { groups, emojiMetadata } = await adapter.metadata!({ version, force }); - await fs.ensureDir(`./data/v${version}`); + await fs.ensureDir(`./data/v${version}/metadata`); await fs.writeFile( `./data/v${version}/groups.json`, @@ -249,14 +249,14 @@ cli.command( "utf-8", ); - return fs.writeFile( - `./data/v${version}/metadata.json`, - JSON.stringify({ groups }, null, 2), + return Object.entries(emojiMetadata).map(([group, metadata]) => fs.writeFile( + `./data/v${version}/metadata/${group}.json`, + JSON.stringify(metadata, null, 2), "utf-8", - ); + )); }); - const results = await Promise.allSettled(promises); + const results = await Promise.allSettled(promises.flat()); for (const result of results) { if (result.status === "rejected") { diff --git a/src/types.ts b/src/types.ts index 649f058..e6d8021 100644 --- a/src/types.ts +++ b/src/types.ts @@ -17,13 +17,24 @@ export interface EmojiComponent { } +export interface EmojiMetadata { + group: string; + subgroup: string; + qualifier: string; + unicodeVersion: string | null; + emojiVersion: string | null; + description: string; + emoji: string | null; + hexcodes: string[]; +} + export interface EmojiData { description: string; gender: string | null; hexcode: string; properties: Property[]; - unicodeVersion: number | null; - version: number; + unicodeVersion: string | null; + emojiVersion: string | null; } export interface EmojiShortcode { diff --git a/src/utils.ts b/src/utils.ts index 6551dcb..1515f8a 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -272,44 +272,61 @@ export function extractVersion(text: string): string | null { * The version should be in the format "E{major}.{minor}" (e.g. "E14.0"). * * @param {string} comment - The comment string to extract the version from - * @returns {number | null} The parsed version number, or null if no valid version was found + * @returns {string | null} The parsed version number, or null if no valid version was found * * @example * ```ts - * extractEmojiVersion("E14.0") // returns 14.0 + * extractEmojiVersion("E14.0") // returns "14.0" * extractEmojiVersion("Something else") // returns null * ``` */ -export function extractEmojiVersion(comment: string): number | null { +export function extractEmojiVersion(comment: string): string | null { const version = comment.match(/E(\d+\.\d)/); if (version != null && version[1] != null) { - return Number.parseFloat(version[1].trim()); + return version[1].trim(); } return null; } // https://unicode.org/reports/tr51/#EmojiVersions -export function extractUnicodeVersion(emojiVersion: number, unicodeVersion?: number): number { +export function extractUnicodeVersion(emojiVersion: string | null, unicodeVersion?: string): string | null { + const coercedEmojiVersion = semver.coerce(emojiVersion); + const coercedUnicodeVersion = semver.coerce(unicodeVersion); + + if (coercedEmojiVersion == null || coercedUnicodeVersion == null) { + return null; + } + // v11+ aligned emoji and unicode specs (except for minor versions) - if (emojiVersion >= 11) { - return unicodeVersion ? Math.min(emojiVersion, unicodeVersion) : emojiVersion; + if (semver.gte(coercedEmojiVersion, "11.0.0")) { + // if the unicode version is not provided, we will return the emoji version. + if (unicodeVersion == null) { + return emojiVersion; + } + + // return the smallest version between the emoji and unicode version. + if (semver.lt(coercedEmojiVersion, coercedUnicodeVersion)) { + return emojiVersion; + } + + return unicodeVersion; } switch (emojiVersion) { - case 0.7: - return 7; - case 1: - case 2: - return 8; - case 3: - case 4: - return 9; - case 5: - return 10; + case "0.7": + return "7.0"; + case "1.0": + case "2.0": + return "8.0"; + case "3.0": + case "4.0": + return "9.0"; + case "5.0": + return "10.0"; default: // v6 is the first unicode spec emojis appeared in - return 6; + return "6.0"; } } diff --git a/test/utils.test.ts b/test/utils.test.ts index e3ac9b8..f260a96 100644 --- a/test/utils.test.ts +++ b/test/utils.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from "vitest"; -import { slugify } from "../src/utils"; +import { extractEmojiVersion, slugify } from "../src/utils"; describe("slugify", () => { it("should convert string to slug format", () => { @@ -30,3 +30,23 @@ describe("slugify", () => { expect(slugify("HELLO WORLD")).toBe("hello-world"); }); }); + +describe("extractEmojiVersion", () => { + it("should extract valid emoji version numbers", () => { + expect(extractEmojiVersion("E14.0")).toBe("14.0"); + expect(extractEmojiVersion("E15.1")).toBe("15.1"); + expect(extractEmojiVersion("E5.0")).toBe("5.0"); + }); + + it("should return null for invalid formats", () => { + expect(extractEmojiVersion("14.0")).toBeNull(); + expect(extractEmojiVersion("Hello E14")).toBeNull(); + expect(extractEmojiVersion("E14")).toBeNull(); + expect(extractEmojiVersion("")).toBeNull(); + }); + + it("should handle whitespace", () => { + expect(extractEmojiVersion(" E14.0 ")).toBe("14.0"); + expect(extractEmojiVersion("E 14.0")).toBeNull(); + }); +}); From 7eb81307d3241b180d90308f177aaddc9f1f3e90 Mon Sep 17 00:00:00 2001 From: Lucas Date: Sun, 16 Feb 2025 07:11:35 +0100 Subject: [PATCH 06/22] feat: add unicode version to adapter context --- src/adapter/base.ts | 11 +++++------ src/adapter/index.ts | 3 ++- src/adapter/v16.ts | 19 +++++++++---------- src/cli.ts | 10 +++++----- src/utils.ts | 25 +++++++++++++++++++++++++ 5 files changed, 46 insertions(+), 22 deletions(-) diff --git a/src/adapter/base.ts b/src/adapter/base.ts index 555bdae..fc4d3f3 100644 --- a/src/adapter/base.ts +++ b/src/adapter/base.ts @@ -16,16 +16,16 @@ export default defineMojiAdapter({ description: "base adapter", range: "*", metadata: async (ctx) => { - if (ctx.version === "1.0" || ctx.version === "2.0" || ctx.version === "3.0") { - console.warn(`version ${ctx.version} does not have group data`); + if (ctx.emojiVersion === "1.0" || ctx.emojiVersion === "2.0" || ctx.emojiVersion === "3.0") { + console.warn(`version ${ctx.emojiVersion} does not have group data`); return { groups: [], emojiMetadata: {}, }; } - return fetchCache(`https://unicode.org/Public/emoji/${ctx.version}/emoji-test.txt`, { - cacheKey: `v${ctx.version}/metadata.json`, + return fetchCache(`https://unicode.org/Public/emoji/${ctx.emojiVersion}/emoji-test.txt`, { + cacheKey: `v${ctx.emojiVersion}/metadata.json`, parser(data) { const lines = data.split("\n"); let currentGroup: EmojiGroup | undefined; @@ -100,8 +100,7 @@ export default defineMojiAdapter({ subgroup: subgroupName, qualifier, emojiVersion: emojiVersion || null, - // TODO: use correct unicode version - unicodeVersion: extractUnicodeVersion(emojiVersion, "16.0"), + unicodeVersion: extractUnicodeVersion(emojiVersion, ctx.unicodeVersion), description: trimmedComment || "", emoji: emoji || null, hexcodes: hexcode.split("-"), diff --git a/src/adapter/index.ts b/src/adapter/index.ts index 3da6917..f36ecc8 100644 --- a/src/adapter/index.ts +++ b/src/adapter/index.ts @@ -43,7 +43,8 @@ export interface MojiAdapter { } export interface BaseAdapterContext { - version: string; + emojiVersion: string; + unicodeVersion: string; force: boolean; } diff --git a/src/adapter/v16.ts b/src/adapter/v16.ts index b47ab00..5166cb4 100644 --- a/src/adapter/v16.ts +++ b/src/adapter/v16.ts @@ -13,12 +13,12 @@ export default defineMojiAdapter({ sequences: async (ctx) => { const [sequences, zwj] = await Promise.all([ { - cacheKey: `v${ctx.version}/sequences.json`, - url: `https://unicode.org/Public/emoji/${ctx.version}/emoji-sequences.txt`, + cacheKey: `v${ctx.emojiVersion}/sequences.json`, + url: `https://unicode.org/Public/emoji/${ctx.emojiVersion}/emoji-sequences.txt`, }, { - cacheKey: `v${ctx.version}/zwj-sequences.json`, - url: `https://unicode.org/Public/emoji/${ctx.version}/emoji-zwj-sequences.txt`, + cacheKey: `v${ctx.emojiVersion}/zwj-sequences.json`, + url: `https://unicode.org/Public/emoji/${ctx.emojiVersion}/emoji-zwj-sequences.txt`, }, ].map(async ({ cacheKey, url }) => { return await fetchCache(url, { @@ -70,8 +70,8 @@ export default defineMojiAdapter({ }; }, async emojis(ctx) { - const emojiData = await fetchCache(`https://unicode.org/Public/${ctx.version}.0/ucd/emoji/emoji-data.txt`, { - cacheKey: `v${ctx.version}/emoji-data.json`, + const emojiData = await fetchCache(`https://unicode.org/Public/${ctx.emojiVersion}.0/ucd/emoji/emoji-data.txt`, { + cacheKey: `v${ctx.emojiVersion}/emoji-data.json`, parser(data) { const lines = data.split("\n"); @@ -110,8 +110,7 @@ export default defineMojiAdapter({ hexcode: "", gender: null, properties: [(property as Property) || "Emoji"], - // TODO: use correct unicode version - unicodeVersion: extractUnicodeVersion(emojiVersion, "16.0"), + unicodeVersion: extractUnicodeVersion(emojiVersion, ctx.unicodeVersion), emojiVersion, }; @@ -135,8 +134,8 @@ export default defineMojiAdapter({ return emojiData; }, variations: async (ctx) => { - return fetchCache(`https://unicode.org/Public/${ctx.version}.0/ucd/emoji/emoji-variation-sequences.txt`, { - cacheKey: `v${ctx.version}/variations.json`, + return fetchCache(`https://unicode.org/Public/${ctx.emojiVersion}.0/ucd/emoji/emoji-variation-sequences.txt`, { + cacheKey: `v${ctx.emojiVersion}/variations.json`, parser(data) { const lines = data.split("\n"); diff --git a/src/cli.ts b/src/cli.ts index 1d82c8a..d449993 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -6,7 +6,7 @@ import yargs, { type Argv } from "yargs"; import pkg from "../package.json" with { type: "json" }; import { resolveAdapter } from "./adapters"; import { SUPPORTED_EMOJI_VERSIONS } from "./constants"; -import { getAllEmojiVersions } from "./utils"; +import { extractUnicodeVersion, getAllEmojiVersions, getUnicodeVersionByEmojiVersion } from "./utils"; import { isNotImplementedError } from "./utils/errors"; import { readLockfile, writeLockfile } from "./utils/lockfile"; @@ -54,7 +54,7 @@ cli.command( throw new Error(`no adapter found for version ${version}`); } - const emojis = await adapter.emojis!({ version, force }); + const emojis = await adapter.emojis!({ emojiVersion: version, force, unicodeVersion: getUnicodeVersionByEmojiVersion(version)! }); await fs.ensureDir(`./data/v${version}`); return fs.writeFile( @@ -114,7 +114,7 @@ cli.command( throw new Error(`no adapter found for version ${version}`); } - const variations = await adapter.variations!({ version, force }); + const variations = await adapter.variations!({ emojiVersion: version, force, unicodeVersion: getUnicodeVersionByEmojiVersion(version)! }); await fs.ensureDir(`./data/v${version}`); return fs.writeFile( @@ -174,7 +174,7 @@ cli.command( throw new Error(`no adapter found for version ${version}`); } - const { sequences, zwj } = await adapter.sequences!({ version, force }); + const { sequences, zwj } = await adapter.sequences!({ emojiVersion: version, force, unicodeVersion: getUnicodeVersionByEmojiVersion(version)! }); await fs.ensureDir(`./data/v${version}`); await fs.writeFile( @@ -239,7 +239,7 @@ cli.command( throw new Error(`no adapter found for version ${version}`); } - const { groups, emojiMetadata } = await adapter.metadata!({ version, force }); + const { groups, emojiMetadata } = await adapter.metadata!({ emojiVersion: version, force, unicodeVersion: getUnicodeVersionByEmojiVersion(version)! }); await fs.ensureDir(`./data/v${version}/metadata`); diff --git a/src/utils.ts b/src/utils.ts index 1515f8a..8140e75 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -330,3 +330,28 @@ export function extractUnicodeVersion(emojiVersion: string | null, unicodeVersio return "6.0"; } } + +export function getUnicodeVersionByEmojiVersion(emojiVersion: string): string { + const coercedEmojiVersion = semver.coerce(emojiVersion); + + if (coercedEmojiVersion == null) { + throw new Error(`invalid emoji version: ${emojiVersion}`); + } + + if (semver.gte(coercedEmojiVersion, "11.0.0")) { + return emojiVersion; + } + + switch (emojiVersion) { + case "1.0": + case "2.0": + return "8.0"; + case "3.0": + case "4.0": + return "9.0"; + case "5.0": + return "10.0"; + default: + throw new Error(`invalid emoji version: ${emojiVersion}`); + } +} From b8dadc52e18c2266b22fd0edd91e1520dade5058 Mon Sep 17 00:00:00 2001 From: Lucas Date: Sun, 16 Feb 2025 07:12:42 +0100 Subject: [PATCH 07/22] chore: lint --- src/cli.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cli.ts b/src/cli.ts index d449993..fe76dca 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -6,7 +6,7 @@ import yargs, { type Argv } from "yargs"; import pkg from "../package.json" with { type: "json" }; import { resolveAdapter } from "./adapters"; import { SUPPORTED_EMOJI_VERSIONS } from "./constants"; -import { extractUnicodeVersion, getAllEmojiVersions, getUnicodeVersionByEmojiVersion } from "./utils"; +import { getAllEmojiVersions, getUnicodeVersionByEmojiVersion } from "./utils"; import { isNotImplementedError } from "./utils/errors"; import { readLockfile, writeLockfile } from "./utils/lockfile"; From 071afb2a25cef0e692046596c5bb803da719b827 Mon Sep 17 00:00:00 2001 From: Lucas Date: Sun, 16 Feb 2025 07:20:15 +0100 Subject: [PATCH 08/22] refactor: remove errors and merge into base adapter --- src/adapter/base.ts | 3 +-- src/adapter/index.ts | 7 +++++++ src/cli.ts | 10 +++++----- src/utils/errors.ts | 10 ---------- 4 files changed, 13 insertions(+), 17 deletions(-) delete mode 100644 src/utils/errors.ts diff --git a/src/adapter/base.ts b/src/adapter/base.ts index fc4d3f3..e58d8ae 100644 --- a/src/adapter/base.ts +++ b/src/adapter/base.ts @@ -1,9 +1,8 @@ import type { EmojiGroup, EmojiMetadata } from "../types"; import { red } from "farver/fast"; -import { defineMojiAdapter } from "../adapter"; +import { defineMojiAdapter, MojisNotImplemented } from "../adapter"; import { extractEmojiVersion, extractUnicodeVersion, slugify } from "../utils"; import { fetchCache } from "../utils/cache"; -import { MojisNotImplemented } from "../utils/errors"; function notImplemented(adapterFn: string) { return async () => { diff --git a/src/adapter/index.ts b/src/adapter/index.ts index f36ecc8..961424f 100644 --- a/src/adapter/index.ts +++ b/src/adapter/index.ts @@ -88,3 +88,10 @@ export function defineMojiAdapter(adapter: MojiAdapter): MojiAdapter { return adapter; } + +export class MojisNotImplemented extends Error { + constructor(message: string) { + super(message); + this.name = "MojisNotImplemented"; + } +} diff --git a/src/cli.ts b/src/cli.ts index fe76dca..7e6fcd5 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -4,10 +4,10 @@ import fs from "fs-extra"; import semver from "semver"; import yargs, { type Argv } from "yargs"; import pkg from "../package.json" with { type: "json" }; +import { MojisNotImplemented } from "./adapter"; import { resolveAdapter } from "./adapters"; import { SUPPORTED_EMOJI_VERSIONS } from "./constants"; import { getAllEmojiVersions, getUnicodeVersionByEmojiVersion } from "./utils"; -import { isNotImplementedError } from "./utils/errors"; import { readLockfile, writeLockfile } from "./utils/lockfile"; const cli = yargs(process.argv.slice(2)) @@ -68,7 +68,7 @@ cli.command( for (const result of results) { if (result.status === "rejected") { - if (isNotImplementedError(result.reason)) { + if (result.reason instanceof MojisNotImplemented) { console.warn(yellow("warning:"), result.reason.message); continue; } @@ -128,7 +128,7 @@ cli.command( for (const result of results) { if (result.status === "rejected") { - if (isNotImplementedError(result.reason)) { + if (result.reason instanceof MojisNotImplemented) { console.warn(yellow("warning:"), result.reason.message); continue; } @@ -193,7 +193,7 @@ cli.command( for (const result of results) { if (result.status === "rejected") { - if (isNotImplementedError(result.reason)) { + if (result.reason instanceof MojisNotImplemented) { console.warn(yellow("warning:"), result.reason.message); continue; } @@ -260,7 +260,7 @@ cli.command( for (const result of results) { if (result.status === "rejected") { - if (isNotImplementedError(result.reason)) { + if (result.reason instanceof MojisNotImplemented) { console.warn(yellow("warning:"), result.reason.message); continue; } diff --git a/src/utils/errors.ts b/src/utils/errors.ts deleted file mode 100644 index 4cc1dcb..0000000 --- a/src/utils/errors.ts +++ /dev/null @@ -1,10 +0,0 @@ -export class MojisNotImplemented extends Error { - constructor(message: string) { - super(message); - this.name = "MojisNotImplemented"; - } -} - -export function isNotImplementedError(err: Error): err is MojisNotImplemented { - return err instanceof MojisNotImplemented; -} From c49edcff6a304ae284638641babb6647c89dd374 Mon Sep 17 00:00:00 2001 From: Lucas Date: Sun, 16 Feb 2025 07:49:04 +0100 Subject: [PATCH 09/22] feat: add unicodeNames function to fetch and parse Unicode names for emojis --- src/adapter/base.ts | 26 ++++++++++++++++++++++++++ src/adapter/index.ts | 9 +++++++-- src/adapter/v16.ts | 7 ++++++- src/cli.ts | 11 +++++++++-- src/types.ts | 1 + src/utils/cache.ts | 5 ++++- 6 files changed, 53 insertions(+), 6 deletions(-) diff --git a/src/adapter/base.ts b/src/adapter/base.ts index e58d8ae..516411b 100644 --- a/src/adapter/base.ts +++ b/src/adapter/base.ts @@ -118,4 +118,30 @@ export default defineMojiAdapter({ emojis: notImplemented("emojis"), variations: notImplemented("variations"), shortcodes: notImplemented("shortcodes"), + unicodeNames: async (ctx) => { + return fetchCache(`https://unicode.org/Public/${ctx.emojiVersion}.0/ucd/UnicodeData.txt`, { + cacheKey: `v${ctx.emojiVersion}/unicode-names.json`, + parser(data) { + const lines = data.split("\n"); + const unicodeNames: Record = {}; + + for (const line of lines) { + if (line.trim() === "" || line.startsWith("#")) { + continue; + } + + const [hex, name] = line.split(";").map((col) => col.trim()); + + if (hex == null || name == null) { + throw new Error(`invalid line: ${line}`); + } + + unicodeNames[hex] = name; + } + + return unicodeNames; + }, + bypassCache: ctx.force, + }); + }, }); diff --git a/src/adapter/index.ts b/src/adapter/index.ts index 961424f..a18835a 100644 --- a/src/adapter/index.ts +++ b/src/adapter/index.ts @@ -1,4 +1,4 @@ -import type { EmojiGroup, EmojiMetadata, EmojiSequence, EmojiShortcode, EmojiVariation } from "../types"; +import type { EmojiData, EmojiGroup, EmojiMetadata, EmojiSequence, EmojiShortcode, EmojiVariation } from "../types"; import semver from "semver"; export interface MojiAdapter { @@ -40,6 +40,8 @@ export interface MojiAdapter { shortcodes?: ShortcodeFn; metadata?: MetadataFn; + + unicodeNames?: UnicodeNamesFn; } export interface BaseAdapterContext { @@ -48,8 +50,11 @@ export interface BaseAdapterContext { force: boolean; } +export type UnicodeNamesFn = (ctx: BaseAdapterContext) => Promise>; export type SequenceFn = (ctx: BaseAdapterContext) => Promise<{ zwj: EmojiSequence[]; sequences: EmojiSequence[] }>; -export type EmojiFn = (ctx: BaseAdapterContext) => Promise; +export type EmojiFn = (ctx: BaseAdapterContext) => Promise<{ + emojiData: Record; +}>; export type EmojiVariationFn = (ctx: BaseAdapterContext) => Promise; export type ShortcodeFn = (ctx: BaseAdapterContext & { providers: string[]; diff --git a/src/adapter/v16.ts b/src/adapter/v16.ts index 5166cb4..b1946e0 100644 --- a/src/adapter/v16.ts +++ b/src/adapter/v16.ts @@ -70,6 +70,8 @@ export default defineMojiAdapter({ }; }, async emojis(ctx) { + const unicodeNames = await this.unicodeNames!(ctx); + const emojiData = await fetchCache(`https://unicode.org/Public/${ctx.emojiVersion}.0/ucd/emoji/emoji-data.txt`, { cacheKey: `v${ctx.emojiVersion}/emoji-data.json`, parser(data) { @@ -112,6 +114,7 @@ export default defineMojiAdapter({ properties: [(property as Property) || "Emoji"], unicodeVersion: extractUnicodeVersion(emojiVersion, ctx.unicodeVersion), emojiVersion, + name: unicodeNames[hex] || "", }; for (const hex of expandedHex) { @@ -131,7 +134,9 @@ export default defineMojiAdapter({ bypassCache: ctx.force, }); - return emojiData; + return { + emojiData, + }; }, variations: async (ctx) => { return fetchCache(`https://unicode.org/Public/${ctx.emojiVersion}.0/ucd/emoji/emoji-variation-sequences.txt`, { diff --git a/src/cli.ts b/src/cli.ts index 7e6fcd5..c01a5ce 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -54,12 +54,19 @@ cli.command( throw new Error(`no adapter found for version ${version}`); } - const emojis = await adapter.emojis!({ emojiVersion: version, force, unicodeVersion: getUnicodeVersionByEmojiVersion(version)! }); + const { emojiData } = await adapter.emojis!({ emojiVersion: version, force, unicodeVersion: getUnicodeVersionByEmojiVersion(version)! }); await fs.ensureDir(`./data/v${version}`); + + await fs.writeFile( + `./data/v${version}/emoji-data.json`, + JSON.stringify(emojiData, null, 2), + "utf-8", + ); + return fs.writeFile( `./data/v${version}/emojis.json`, - JSON.stringify(emojis, null, 2), + JSON.stringify({}, null, 2), "utf-8", ); }); diff --git a/src/types.ts b/src/types.ts index e6d8021..07dc2ac 100644 --- a/src/types.ts +++ b/src/types.ts @@ -35,6 +35,7 @@ export interface EmojiData { properties: Property[]; unicodeVersion: string | null; emojiVersion: string | null; + name: string; } export interface EmojiShortcode { diff --git a/src/utils/cache.ts b/src/utils/cache.ts index c457afd..d485608 100644 --- a/src/utils/cache.ts +++ b/src/utils/cache.ts @@ -1,5 +1,6 @@ import path from "node:path"; import process from "node:process"; +import { green } from "farver/fast"; import fs from "fs-extra"; const CACHE_FOLDER = path.resolve(process.cwd(), ".cache"); @@ -56,9 +57,11 @@ export async function fetchCache( ): Promise { const { cacheKey, parser, bypassCache, options: fetchOptions } = options; - const cache = LOCAL_CACHE[cacheKey] || await readCache(cacheKey); + const cache = LOCAL_CACHE[cacheKey] || await readCache(cacheKey); if (!bypassCache && cache != null) { + // eslint-disable-next-line no-console + console.log(`cache hit: ${green(cacheKey)}`); LOCAL_CACHE[cacheKey] = cache; return cache as TData; From b314647b1717962c4051f1019b4e82dd47a91d89 Mon Sep 17 00:00:00 2001 From: Lucas Date: Sun, 16 Feb 2025 08:23:33 +0100 Subject: [PATCH 10/22] refactor: migrate to a single generate command --- src/adapter/base.ts | 4 +- src/adapter/index.ts | 4 +- src/cli.ts | 285 +++++++++++++++---------------------------- src/schemas.ts | 18 +++ src/types.ts | 5 + 5 files changed, 128 insertions(+), 188 deletions(-) create mode 100644 src/schemas.ts diff --git a/src/adapter/base.ts b/src/adapter/base.ts index 516411b..e661bd1 100644 --- a/src/adapter/base.ts +++ b/src/adapter/base.ts @@ -1,5 +1,5 @@ import type { EmojiGroup, EmojiMetadata } from "../types"; -import { red } from "farver/fast"; +import { red, yellow } from "farver/fast"; import { defineMojiAdapter, MojisNotImplemented } from "../adapter"; import { extractEmojiVersion, extractUnicodeVersion, slugify } from "../utils"; import { fetchCache } from "../utils/cache"; @@ -16,7 +16,7 @@ export default defineMojiAdapter({ range: "*", metadata: async (ctx) => { if (ctx.emojiVersion === "1.0" || ctx.emojiVersion === "2.0" || ctx.emojiVersion === "3.0") { - console.warn(`version ${ctx.emojiVersion} does not have group data`); + console.warn(`skipping metadata for emoji version ${yellow(ctx.emojiVersion)}, as it's not supported.`); return { groups: [], emojiMetadata: {}, diff --git a/src/adapter/index.ts b/src/adapter/index.ts index a18835a..1a882ba 100644 --- a/src/adapter/index.ts +++ b/src/adapter/index.ts @@ -1,4 +1,4 @@ -import type { EmojiData, EmojiGroup, EmojiMetadata, EmojiSequence, EmojiShortcode, EmojiVariation } from "../types"; +import type { EmojiData, EmojiGroup, EmojiMetadata, EmojiSequence, EmojiShortcode, EmojiVariation, ShortcodeProvider } from "../types"; import semver from "semver"; export interface MojiAdapter { @@ -58,7 +58,7 @@ export type EmojiFn = (ctx: BaseAdapterContext) => Promise<{ export type EmojiVariationFn = (ctx: BaseAdapterContext) => Promise; export type ShortcodeFn = (ctx: BaseAdapterContext & { providers: string[]; -}) => Promise; +}) => Promise>; export type MetadataFn = (ctx: BaseAdapterContext) => Promise<{ groups: EmojiGroup[]; emojiMetadata: Record>; diff --git a/src/cli.ts b/src/cli.ts index c01a5ce..d285a40 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -2,11 +2,13 @@ import process from "node:process"; import { green, red, yellow } from "farver/fast"; import fs from "fs-extra"; import semver from "semver"; +import { parseAsync } from "valibot"; import yargs, { type Argv } from "yargs"; import pkg from "../package.json" with { type: "json" }; import { MojisNotImplemented } from "./adapter"; import { resolveAdapter } from "./adapters"; import { SUPPORTED_EMOJI_VERSIONS } from "./constants"; +import { SHORTCODE_PROVIDERS_SCHEMA } from "./schemas"; import { getAllEmojiVersions, getUnicodeVersionByEmojiVersion } from "./utils"; import { readLockfile, writeLockfile } from "./utils/lockfile"; @@ -21,25 +23,43 @@ const cli = yargs(process.argv.slice(2)) .demandCommand(1, ""); cli.command( - "generate:emojis ", - "Generate emoji data for the specified versions", + "generate ", + "generate emoji data for the specified versions", (args) => commonOptions(args) .positional("versions", { type: "string", description: "emoji versions to generate", }) + .option("generators", { + type: "array", + description: "generators to use", + default: ["metadata", "sequences", "variations", "emojis", "shortcodes"], + }) + .option("shortcode-providers", { + type: "array", + description: "shortcode providers to use", + default: ["github", "joypixels", "iamcal"], + }) .strict().help(), async (args) => { const force = args.force ?? false; const versions = Array.isArray(args.versions) ? args.versions : [args.versions]; + const generators = Array.isArray(args.generators) ? args.generators : [args.generators]; + + function isGeneratorEnabled(generator: string) { + return generators.includes(generator); + } - if (SUPPORTED_EMOJI_VERSIONS.every((v) => !versions.includes(v))) { - console.error(red("error:"), "unsupported emoji versions"); - console.log("supported versions:", SUPPORTED_EMOJI_VERSIONS.join(", ")); + const unsupported = versions.filter((v) => !SUPPORTED_EMOJI_VERSIONS.includes(v)); + + // require that all versions are supported, otherwise exit + if (unsupported.length > 0) { + console.error(red("error:"), `version(s) ${unsupported.map((v) => yellow(v)).join(", ")} is not supported`); process.exit(1); } console.log("generating emoji data for versions", versions.map((v) => yellow(v)).join(", ")); + console.log(`using the following generators ${args.generators.map((g) => yellow(g)).join(", ")}`); const promises = versions.map(async (version) => { const coerced = semver.coerce(version); @@ -54,216 +74,113 @@ cli.command( throw new Error(`no adapter found for version ${version}`); } - const { emojiData } = await adapter.emojis!({ emojiVersion: version, force, unicodeVersion: getUnicodeVersionByEmojiVersion(version)! }); - - await fs.ensureDir(`./data/v${version}`); - - await fs.writeFile( - `./data/v${version}/emoji-data.json`, - JSON.stringify(emojiData, null, 2), - "utf-8", - ); - - return fs.writeFile( - `./data/v${version}/emojis.json`, - JSON.stringify({}, null, 2), - "utf-8", - ); - }); - - const results = await Promise.allSettled(promises); - - for (const result of results) { - if (result.status === "rejected") { - if (result.reason instanceof MojisNotImplemented) { - console.warn(yellow("warning:"), result.reason.message); - continue; + if (isGeneratorEnabled("metadata")) { + if (adapter.metadata == null) { + throw new MojisNotImplemented("metadata"); } - console.error(red("error:"), result.reason); - } - } - - console.log(green("done")); - }, -); - -cli.command( - "generate:variations ", - "Generate emoji variations for the specified versions", - (args) => commonOptions(args) - .positional("versions", { - type: "string", - description: "emoji versions to generate", - }) - .strict().help(), - async (args) => { - const force = args.force ?? false; - const versions = Array.isArray(args.versions) ? args.versions : [args.versions]; - - if (SUPPORTED_EMOJI_VERSIONS.every((v) => !versions.includes(v))) { - console.error(red("error:"), "unsupported emoji versions"); - console.log("supported versions:", SUPPORTED_EMOJI_VERSIONS.join(", ")); - process.exit(1); - } - console.log("generating emoji variations for versions", versions.map((v) => yellow(v)).join(", ")); - - const promises = versions.map(async (version) => { - const coerced = semver.coerce(version); + const { groups, emojiMetadata } = await adapter.metadata({ emojiVersion: version, force, unicodeVersion: getUnicodeVersionByEmojiVersion(version)! }); - if (coerced == null) { - throw new Error(`invalid version ${version}`); - } + await fs.ensureDir(`./data/v${version}/metadata`); - const adapter = resolveAdapter(coerced.version); + await fs.writeFile( + `./data/v${version}/groups.json`, + JSON.stringify(groups, null, 2), + "utf-8", + ); - if (adapter == null) { - throw new Error(`no adapter found for version ${version}`); + await Promise.all(Object.entries(emojiMetadata).map(([group, metadata]) => fs.writeFile( + `./data/v${version}/metadata/${group}.json`, + JSON.stringify(metadata, null, 2), + "utf-8", + ))); } - const variations = await adapter.variations!({ emojiVersion: version, force, unicodeVersion: getUnicodeVersionByEmojiVersion(version)! }); - - await fs.ensureDir(`./data/v${version}`); - return fs.writeFile( - `./data/v${version}/variations.json`, - JSON.stringify(variations, null, 2), - "utf-8", - ); - }); - - const results = await Promise.allSettled(promises); - - for (const result of results) { - if (result.status === "rejected") { - if (result.reason instanceof MojisNotImplemented) { - console.warn(yellow("warning:"), result.reason.message); - continue; + if (isGeneratorEnabled("sequences")) { + if (adapter.sequences == null) { + throw new MojisNotImplemented("sequences"); } - console.error(red("error:"), result.reason); - } - } - - console.log(green("done")); - }, -); -cli.command( - "generate:sequences ", - "Generate emoji sequences for the specified versions", - (args) => commonOptions(args) - .positional("versions", { - type: "string", - description: "emoji versions to generate", - }) - .strict().help(), - async (args) => { - const force = args.force ?? false; - const versions = Array.isArray(args.versions) ? args.versions : [args.versions]; - - if (SUPPORTED_EMOJI_VERSIONS.every((v) => !versions.includes(v))) { - console.error(red("error:"), "unsupported emoji versions"); - console.log("supported versions:", SUPPORTED_EMOJI_VERSIONS.join(", ")); - process.exit(1); - } - - console.log("generating emoji group data for versions", versions.map((v) => yellow(v)).join(", ")); - - const promises = versions.map(async (version) => { - const coerced = semver.coerce(version); + const { sequences, zwj } = await adapter.sequences({ emojiVersion: version, force, unicodeVersion: getUnicodeVersionByEmojiVersion(version)! }); - if (coerced == null) { - throw new Error(`invalid version ${version}`); - } + await fs.ensureDir(`./data/v${version}`); - const adapter = resolveAdapter(coerced.version); + await fs.writeFile( + `./data/v${version}/zwj-sequences.json`, + JSON.stringify(zwj, null, 2), + "utf-8", + ); - if (adapter == null) { - throw new Error(`no adapter found for version ${version}`); + await fs.writeFile( + `./data/v${version}/sequences.json`, + JSON.stringify(sequences, null, 2), + "utf-8", + ); } - const { sequences, zwj } = await adapter.sequences!({ emojiVersion: version, force, unicodeVersion: getUnicodeVersionByEmojiVersion(version)! }); - - await fs.ensureDir(`./data/v${version}`); - await fs.writeFile( - `./data/v${version}/zwj-sequences.json`, - JSON.stringify(zwj, null, 2), - "utf-8", - ); - return fs.writeFile( - `./data/v${version}/sequences.json`, - JSON.stringify(sequences, null, 2), - "utf-8", - ); - }); + if (isGeneratorEnabled("variations")) { + if (adapter.variations == null) { + throw new MojisNotImplemented("variations"); + } - const results = await Promise.allSettled(promises); + const variations = await adapter.variations({ emojiVersion: version, force, unicodeVersion: getUnicodeVersionByEmojiVersion(version)! }); - for (const result of results) { - if (result.status === "rejected") { - if (result.reason instanceof MojisNotImplemented) { - console.warn(yellow("warning:"), result.reason.message); - continue; - } - console.error(red("error:"), result.reason); + await fs.ensureDir(`./data/v${version}`); + await fs.writeFile( + `./data/v${version}/variations.json`, + JSON.stringify(variations, null, 2), + "utf-8", + ); } - } - - console.log(green("done")); - }, -); -cli.command( - "generate:metadata ", - "Generate emoji metadata for the specified versions", - (args) => commonOptions(args) - .positional("versions", { - type: "string", - description: "emoji versions to generate", - }) - .strict().help(), - async (args) => { - const force = args.force ?? false; - const versions = Array.isArray(args.versions) ? args.versions : [args.versions]; + if (isGeneratorEnabled("emojis")) { + if (adapter.emojis == null) { + throw new MojisNotImplemented("emojis"); + } - if (SUPPORTED_EMOJI_VERSIONS.every((v) => !versions.includes(v))) { - console.error(red("error:"), "unsupported emoji versions"); - console.log("supported versions:", SUPPORTED_EMOJI_VERSIONS.join(", ")); - process.exit(1); - } + const { emojiData } = await adapter.emojis({ emojiVersion: version, force, unicodeVersion: getUnicodeVersionByEmojiVersion(version)! }); - console.log("generating emoji metadata for versions", versions.map((v) => yellow(v)).join(", ")); + await fs.ensureDir(`./data/v${version}`); - const promises = versions.map(async (version) => { - const coerced = semver.coerce(version); + await fs.writeFile( + `./data/v${version}/emoji-data.json`, + JSON.stringify(emojiData, null, 2), + "utf-8", + ); - if (coerced == null) { - throw new Error(`invalid version ${version}`); + await fs.writeFile( + `./data/v${version}/emojis.json`, + JSON.stringify({}, null, 2), + "utf-8", + ); } - const adapter = resolveAdapter(coerced.version); + if (isGeneratorEnabled("shortcodes")) { + const providers = await parseAsync(SHORTCODE_PROVIDERS_SCHEMA, args["shortcode-providers"]); - if (adapter == null) { - throw new Error(`no adapter found for version ${version}`); - } + if (providers.length === 0) { + throw new Error("no shortcode providers specified"); + } - const { groups, emojiMetadata } = await adapter.metadata!({ emojiVersion: version, force, unicodeVersion: getUnicodeVersionByEmojiVersion(version)! }); + if (adapter.shortcodes == null) { + throw new MojisNotImplemented("shortcodes"); + } - await fs.ensureDir(`./data/v${version}/metadata`); + const shortcodes = await adapter.shortcodes({ emojiVersion: version, force, unicodeVersion: getUnicodeVersionByEmojiVersion(version)!, providers }); - await fs.writeFile( - `./data/v${version}/groups.json`, - JSON.stringify(groups, null, 2), - "utf-8", - ); + await fs.ensureDir(`./data/v${version}/shortcodes`); - return Object.entries(emojiMetadata).map(([group, metadata]) => fs.writeFile( - `./data/v${version}/metadata/${group}.json`, - JSON.stringify(metadata, null, 2), - "utf-8", - )); + for (const provider of providers) { + await fs.writeFile( + `./data/v${version}/shortcodes/${provider}.json`, + JSON.stringify(shortcodes[provider], null, 2), + "utf-8", + ); + } + } }); - const results = await Promise.allSettled(promises.flat()); + const results = await Promise.allSettled(promises); for (const result of results) { if (result.status === "rejected") { diff --git a/src/schemas.ts b/src/schemas.ts new file mode 100644 index 0000000..4656134 --- /dev/null +++ b/src/schemas.ts @@ -0,0 +1,18 @@ +import * as v from "valibot"; + +export const SHORTCODE_PROVIDER_SCHEMA = v.union([ + v.literal("github"), + v.literal("joypixels"), + v.literal("iamcal"), +]); + +export const SHORTCODE_PROVIDERS_SCHEMA = v.array(SHORTCODE_PROVIDER_SCHEMA); + +export const GENERATOR_SCHEMA = v.union([ + v.literal("metadata"), + v.literal("sequences"), + v.literal("emojis"), + v.literal("variations"), + v.literal("shortcodes"), + v.literal("unicode-names"), +]); diff --git a/src/types.ts b/src/types.ts index 07dc2ac..da77dbb 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,3 +1,6 @@ +import type { InferInput } from "valibot"; +import type { SHORTCODE_PROVIDER_SCHEMA } from "./schemas"; + export interface EmojiGroup { name: string; slug: string; @@ -17,6 +20,8 @@ export interface EmojiComponent { } +export type ShortcodeProvider = InferInput; + export interface EmojiMetadata { group: string; subgroup: string; From aa607a1c2bc793b5cec32c5624f958749091271b Mon Sep 17 00:00:00 2001 From: Lucas Date: Sun, 16 Feb 2025 08:35:30 +0100 Subject: [PATCH 11/22] feat: add consola for improved logging throughout the application --- package.json | 1 + pnpm-lock.yaml | 3 +++ src/adapter/base.ts | 3 ++- src/cli.ts | 23 ++++++++++++----------- src/utils.ts | 5 +++-- src/utils/cache.ts | 4 ++-- 6 files changed, 23 insertions(+), 16 deletions(-) diff --git a/package.json b/package.json index 06df39d..643ff40 100644 --- a/package.json +++ b/package.json @@ -34,6 +34,7 @@ }, "dependencies": { "cac": "^6.7.14", + "consola": "^3.4.0", "farver": "^0.4.0", "fs-extra": "^11.3.0", "semver": "^7.7.1", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 96f05c7..586cbe8 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -11,6 +11,9 @@ importers: cac: specifier: ^6.7.14 version: 6.7.14 + consola: + specifier: ^3.4.0 + version: 3.4.0 farver: specifier: ^0.4.0 version: 0.4.0 diff --git a/src/adapter/base.ts b/src/adapter/base.ts index e661bd1..abb0756 100644 --- a/src/adapter/base.ts +++ b/src/adapter/base.ts @@ -1,4 +1,5 @@ import type { EmojiGroup, EmojiMetadata } from "../types"; +import consola from "consola"; import { red, yellow } from "farver/fast"; import { defineMojiAdapter, MojisNotImplemented } from "../adapter"; import { extractEmojiVersion, extractUnicodeVersion, slugify } from "../utils"; @@ -16,7 +17,7 @@ export default defineMojiAdapter({ range: "*", metadata: async (ctx) => { if (ctx.emojiVersion === "1.0" || ctx.emojiVersion === "2.0" || ctx.emojiVersion === "3.0") { - console.warn(`skipping metadata for emoji version ${yellow(ctx.emojiVersion)}, as it's not supported.`); + consola.warn(`skipping metadata for emoji version ${yellow(ctx.emojiVersion)}, as it's not supported.`); return { groups: [], emojiMetadata: {}, diff --git a/src/cli.ts b/src/cli.ts index d285a40..4c45ac6 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -1,4 +1,5 @@ import process from "node:process"; +import consola from "consola"; import { green, red, yellow } from "farver/fast"; import fs from "fs-extra"; import semver from "semver"; @@ -54,12 +55,12 @@ cli.command( // require that all versions are supported, otherwise exit if (unsupported.length > 0) { - console.error(red("error:"), `version(s) ${unsupported.map((v) => yellow(v)).join(", ")} is not supported`); + consola.error(`version(s) ${unsupported.map((v) => yellow(v)).join(", ")} is not supported`); process.exit(1); } - console.log("generating emoji data for versions", versions.map((v) => yellow(v)).join(", ")); - console.log(`using the following generators ${args.generators.map((g) => yellow(g)).join(", ")}`); + consola.info("generating emoji data for versions", versions.map((v) => yellow(v)).join(", ")); + consola.info(`using the following generators ${args.generators.map((g) => yellow(g)).join(", ")}`); const promises = versions.map(async (version) => { const coerced = semver.coerce(version); @@ -185,14 +186,14 @@ cli.command( for (const result of results) { if (result.status === "rejected") { if (result.reason instanceof MojisNotImplemented) { - console.warn(yellow("warning:"), result.reason.message); + consola.warn(result.reason.message); continue; } - console.error(red("error:"), result.reason); + consola.error(result.reason); } } - console.log(green("done")); + consola.info(green("done")); }, ); @@ -211,7 +212,7 @@ cli.command( const latest = versions[0]; - console.log("latest emoji version:", yellow(latest?.emoji_version)); + consola.log("latest emoji version:", yellow(latest?.emoji_version)); if (args.writeLockfile) { const lockfile = await readLockfile(); @@ -219,7 +220,7 @@ cli.command( lockfile.latestVersion = latest?.emoji_version; await writeLockfile(lockfile); - console.log(`updated ${yellow("emojis.lock")}`); + consola.log(`updated ${yellow("emojis.lock")}`); } }, ); @@ -236,8 +237,8 @@ cli.command( async (args) => { const versions = await getAllEmojiVersions(); - console.log("all available versions:"); - console.log(versions.map((v) => `${yellow(v.emoji_version)}${v.draft ? ` ${red("(draft)")}` : ""}`).join(", ")); + consola.log("all available versions:"); + consola.log(versions.map((v) => `${yellow(v.emoji_version)}${v.draft ? ` ${red("(draft)")}` : ""}`).join(", ")); if (args.writeLockfile) { const lockfile = await readLockfile(); @@ -245,7 +246,7 @@ cli.command( lockfile.versions = Array.from(versions); await writeLockfile(lockfile); - console.log(`updated ${yellow("emojis.lock")}`); + consola.log(`updated ${yellow("emojis.lock")}`); } }, ); diff --git a/src/utils.ts b/src/utils.ts index 8140e75..ac420bc 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -1,4 +1,5 @@ import type { EmojiVersion } from "./utils/lockfile"; +import consola from "consola"; import semver from "semver"; import { NO_EMOJI_VERSIONS } from "./constants"; @@ -60,7 +61,7 @@ export async function getAllEmojiVersions(): Promise { } if (rootResult.status === "rejected" || emojiResult.status === "rejected") { - console.error({ + consola.error({ root: rootResult.status === "rejected" ? rootResult.reason : "ok", emoji: emojiResult.status === "rejected" ? emojiResult.reason : "ok", }); @@ -206,7 +207,7 @@ export async function getCurrentDraftVersion(): Promise { } if (rootResult.status === "rejected" || emojiResult.status === "rejected") { - console.error({ + consola.error({ root: rootResult.status === "rejected" ? rootResult.reason : "ok", emoji: emojiResult.status === "rejected" ? emojiResult.reason : "ok", }); diff --git a/src/utils/cache.ts b/src/utils/cache.ts index d485608..ccbdb4b 100644 --- a/src/utils/cache.ts +++ b/src/utils/cache.ts @@ -1,5 +1,6 @@ import path from "node:path"; import process from "node:process"; +import consola from "consola"; import { green } from "farver/fast"; import fs from "fs-extra"; @@ -60,8 +61,7 @@ export async function fetchCache( const cache = LOCAL_CACHE[cacheKey] || await readCache(cacheKey); if (!bypassCache && cache != null) { - // eslint-disable-next-line no-console - console.log(`cache hit: ${green(cacheKey)}`); + consola.debug(`cache hit: ${green(cacheKey)}`); LOCAL_CACHE[cacheKey] = cache; return cache as TData; From b25d8e7edd4268a66040322a21f6f6fa3d54b9e6 Mon Sep 17 00:00:00 2001 From: Lucas Date: Sun, 16 Feb 2025 10:07:23 +0100 Subject: [PATCH 12/22] feat: implement shortcodes functionality and update emoji data structure --- src/adapter/base.ts | 30 ++++++++++++++++++++-- src/adapter/index.ts | 10 +++----- src/adapter/v16.ts | 17 ++++++++++--- src/cli.ts | 19 +++++++------- src/schemas.ts | 2 -- src/shortcode/github.ts | 51 ++++++++++++++++++++++++++++++++++++++ src/types.ts | 11 +++----- src/utils/hexcode.ts | 14 +++++++++++ test/utils/hexcode.test.ts | 28 ++++++++++++++++++++- 9 files changed, 149 insertions(+), 33 deletions(-) create mode 100644 src/shortcode/github.ts diff --git a/src/adapter/base.ts b/src/adapter/base.ts index abb0756..b86661b 100644 --- a/src/adapter/base.ts +++ b/src/adapter/base.ts @@ -1,4 +1,4 @@ -import type { EmojiGroup, EmojiMetadata } from "../types"; +import type { EmojiGroup, EmojiMetadata, EmojiShortcode, ShortcodeProvider } from "../types"; import consola from "consola"; import { red, yellow } from "farver/fast"; import { defineMojiAdapter, MojisNotImplemented } from "../adapter"; @@ -118,7 +118,6 @@ export default defineMojiAdapter({ sequences: notImplemented("sequences"), emojis: notImplemented("emojis"), variations: notImplemented("variations"), - shortcodes: notImplemented("shortcodes"), unicodeNames: async (ctx) => { return fetchCache(`https://unicode.org/Public/${ctx.emojiVersion}.0/ucd/UnicodeData.txt`, { cacheKey: `v${ctx.emojiVersion}/unicode-names.json`, @@ -145,4 +144,31 @@ export default defineMojiAdapter({ bypassCache: ctx.force, }); }, + async shortcodes(ctx) { + const providers = ctx.providers; + + if (providers.length === 0) { + throw new Error("no shortcode providers specified"); + } + + const shortcodes: Partial> = {}; + + if (this.emojis == null) { + throw new MojisNotImplemented("emojis"); + } + + const emojis = await this.emojis(ctx); + + if (providers.includes("github")) { + const githubShortcodesFn = await import("../shortcode/github").then((m) => m.generateGitHubShortcodes); + + shortcodes.github = await githubShortcodesFn({ + emojis, + force: ctx.force, + version: ctx.emojiVersion, + }); + } + + return shortcodes; + }, }); diff --git a/src/adapter/index.ts b/src/adapter/index.ts index 1a882ba..c729e99 100644 --- a/src/adapter/index.ts +++ b/src/adapter/index.ts @@ -1,4 +1,4 @@ -import type { EmojiData, EmojiGroup, EmojiMetadata, EmojiSequence, EmojiShortcode, EmojiVariation, ShortcodeProvider } from "../types"; +import type { Emoji, EmojiData, EmojiGroup, EmojiMetadata, EmojiSequence, EmojiShortcode, EmojiVariation, ShortcodeProvider } from "../types"; import semver from "semver"; export interface MojiAdapter { @@ -52,13 +52,11 @@ export interface BaseAdapterContext { export type UnicodeNamesFn = (ctx: BaseAdapterContext) => Promise>; export type SequenceFn = (ctx: BaseAdapterContext) => Promise<{ zwj: EmojiSequence[]; sequences: EmojiSequence[] }>; -export type EmojiFn = (ctx: BaseAdapterContext) => Promise<{ - emojiData: Record; -}>; +export type EmojiFn = (ctx: BaseAdapterContext) => Promise>; export type EmojiVariationFn = (ctx: BaseAdapterContext) => Promise; export type ShortcodeFn = (ctx: BaseAdapterContext & { - providers: string[]; -}) => Promise>; + providers: ShortcodeProvider[]; +}) => Promise>>; export type MetadataFn = (ctx: BaseAdapterContext) => Promise<{ groups: EmojiGroup[]; emojiMetadata: Record>; diff --git a/src/adapter/v16.ts b/src/adapter/v16.ts index b1946e0..87ef446 100644 --- a/src/adapter/v16.ts +++ b/src/adapter/v16.ts @@ -1,4 +1,4 @@ -import type { EmojiData, EmojiSequence, EmojiVariation, Property } from "../types"; +import type { Emoji, EmojiData, EmojiSequence, EmojiShortcode, EmojiVariation, Property, ShortcodeProvider } from "../types"; import { defineMojiAdapter } from "../adapter"; import { FEMALE_SIGN, MALE_SIGN } from "../constants"; import { extractEmojiVersion, extractUnicodeVersion } from "../utils"; @@ -71,6 +71,11 @@ export default defineMojiAdapter({ }, async emojis(ctx) { const unicodeNames = await this.unicodeNames!(ctx); + const { sequences, zwj } = await this.sequences!(ctx); + const metadata = await this.metadata!(ctx); + const variations = await this.variations!(ctx); + + const emojis: Record = {}; const emojiData = await fetchCache(`https://unicode.org/Public/${ctx.emojiVersion}.0/ucd/emoji/emoji-data.txt`, { cacheKey: `v${ctx.emojiVersion}/emoji-data.json`, @@ -134,9 +139,13 @@ export default defineMojiAdapter({ bypassCache: ctx.force, }); - return { - emojiData, - }; + // join names, metadata, variations, sequences, zwj + + for (const [hex, data] of Object.entries(emojiData)) { + + } + + return {}; }, variations: async (ctx) => { return fetchCache(`https://unicode.org/Public/${ctx.emojiVersion}.0/ucd/emoji/emoji-variation-sequences.txt`, { diff --git a/src/cli.ts b/src/cli.ts index 4c45ac6..e927f05 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -3,7 +3,7 @@ import consola from "consola"; import { green, red, yellow } from "farver/fast"; import fs from "fs-extra"; import semver from "semver"; -import { parseAsync } from "valibot"; +import { type InferInput, parseAsync } from "valibot"; import yargs, { type Argv } from "yargs"; import pkg from "../package.json" with { type: "json" }; import { MojisNotImplemented } from "./adapter"; @@ -39,7 +39,7 @@ cli.command( .option("shortcode-providers", { type: "array", description: "shortcode providers to use", - default: ["github", "joypixels", "iamcal"], + default: ["github"] satisfies InferInput, }) .strict().help(), async (args) => { @@ -139,19 +139,13 @@ cli.command( throw new MojisNotImplemented("emojis"); } - const { emojiData } = await adapter.emojis({ emojiVersion: version, force, unicodeVersion: getUnicodeVersionByEmojiVersion(version)! }); + const emojis = await adapter.emojis({ emojiVersion: version, force, unicodeVersion: getUnicodeVersionByEmojiVersion(version)! }); await fs.ensureDir(`./data/v${version}`); - await fs.writeFile( - `./data/v${version}/emoji-data.json`, - JSON.stringify(emojiData, null, 2), - "utf-8", - ); - await fs.writeFile( `./data/v${version}/emojis.json`, - JSON.stringify({}, null, 2), + JSON.stringify(emojis, null, 2), "utf-8", ); } @@ -172,6 +166,11 @@ cli.command( await fs.ensureDir(`./data/v${version}/shortcodes`); for (const provider of providers) { + if (shortcodes[provider] == null) { + consola.warn(`no shortcodes found for provider ${provider}`); + continue; + } + await fs.writeFile( `./data/v${version}/shortcodes/${provider}.json`, JSON.stringify(shortcodes[provider], null, 2), diff --git a/src/schemas.ts b/src/schemas.ts index 4656134..5e42e5b 100644 --- a/src/schemas.ts +++ b/src/schemas.ts @@ -2,8 +2,6 @@ import * as v from "valibot"; export const SHORTCODE_PROVIDER_SCHEMA = v.union([ v.literal("github"), - v.literal("joypixels"), - v.literal("iamcal"), ]); export const SHORTCODE_PROVIDERS_SCHEMA = v.array(SHORTCODE_PROVIDER_SCHEMA); diff --git a/src/shortcode/github.ts b/src/shortcode/github.ts new file mode 100644 index 0000000..6546b23 --- /dev/null +++ b/src/shortcode/github.ts @@ -0,0 +1,51 @@ +import type { EmojiShortcode } from "../types"; +import { fetchCache } from "../utils/cache"; + +export interface ShortcodeOptions { + version: string; + force: boolean; + emojis: any; +} + +export async function generateGitHubShortcodes(options: ShortcodeOptions): Promise { + const { emojis, force, version } = options; + + const githubEmojis = await fetchCache>("https://api.github.com/emojis", { + cacheKey: `v${version}/github-emojis.json`, + bypassCache: force, + parser(data) { + return JSON.parse(data); + }, + options: { + headers: { + "Accept": "application/vnd.github.v3+json", + "User-Agent": "mojis.dev", + }, + }, + }); + + const shortcodes: EmojiShortcode[] = []; + + for (const [shortcode, url] of Object.entries(githubEmojis)) { + const match = url.match(/emoji\/unicode\/([\da-z-]+)\.png/i); + + // github has some standard emojis that don't have a unicode representation + if (!match || !match[1]) { + continue; + } + + const hexcode = match[1].toUpperCase(); + + if (emojis[hexcode] == null) { + continue; + } + + shortcodes.push({ + code: shortcode, + vendor: "github", + source: "github", + }); + } + + return shortcodes; +} diff --git a/src/types.ts b/src/types.ts index da77dbb..c9bf356 100644 --- a/src/types.ts +++ b/src/types.ts @@ -10,14 +10,9 @@ export interface EmojiGroup { export interface Emoji { name: string; slug: string; - components: EmojiComponent[]; - hexcode: string; - type: "ZWJ" | "SINGLE"; -} - -// eslint-disable-next-line ts/no-empty-object-type -export interface EmojiComponent { - + code: string; + hexcodes: string[]; + shortcodes: EmojiShortcode[]; } export type ShortcodeProvider = InferInput; diff --git a/src/utils/hexcode.ts b/src/utils/hexcode.ts index a1d22db..f7f1cfa 100644 --- a/src/utils/hexcode.ts +++ b/src/utils/hexcode.ts @@ -52,3 +52,17 @@ export function expandHexRange(hex: string): string[] { return [hex]; } + +/** + * Removes specific unicode variation selectors from a hex string. + * Specifically removes: + * - 200D (Zero Width Joiner) + * - FE0E (Variation Selector-15, text style) + * - FE0F (Variation Selector-16, emoji style) + * + * @param {string} hex - The hex string to strip variation selectors from + * @returns {string} The hex string with variation selectors removed + */ +export function stripHex(hex: string): string { + return hex.replace(/(-| )?(200D|FE0E|FE0F)/g, ""); +} diff --git a/test/utils/hexcode.test.ts b/test/utils/hexcode.test.ts index 4283a51..c9d2c68 100644 --- a/test/utils/hexcode.test.ts +++ b/test/utils/hexcode.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from "vitest"; -import { expandHexRange, fromHexToCodepoint } from "../../src/utils/hexcode"; +import { expandHexRange, fromHexToCodepoint, stripHex } from "../../src/utils/hexcode"; describe("fromHexToCodepoint", () => { it("should convert hex string with hyphens to codepoints", () => { @@ -36,3 +36,29 @@ describe("expandHexRange", () => { expect(expandHexRange("1F600..1F602")).toEqual(["1F600", "1F601", "1F602"]); }); }); + +describe("stripHex", () => { + it("should remove zero width joiner (200D)", () => { + expect(stripHex("1F468-200D-1F469")).toBe("1F468-1F469"); + }); + + it("should remove text style selector (FE0E)", () => { + expect(stripHex("2764-FE0E")).toBe("2764"); + }); + + it("should remove emoji style selector (FE0F)", () => { + expect(stripHex("2764-FE0F")).toBe("2764"); + }); + + it("should remove multiple variation selectors", () => { + expect(stripHex("1F468-200D-2764-FE0F-200D-1F468")).toBe("1F468-2764-1F468"); + }); + + it("should handle string without variation selectors", () => { + expect(stripHex("1F600")).toBe("1F600"); + }); + + it("should handle space-separated values", () => { + expect(stripHex("1F468 200D 1F469")).toBe("1F468 1F469"); + }); +}); From 4c21308652a62b319e1f1e87f6cdb398ef826631 Mon Sep 17 00:00:00 2001 From: Lucas Date: Sun, 16 Feb 2025 10:21:19 +0100 Subject: [PATCH 13/22] feat: restructure emoji handling to include emojiData and flatten emoji groups --- src/adapter/base.ts | 16 +++++++++++++--- src/adapter/index.ts | 6 +++++- src/adapter/v16.ts | 19 ++++++++----------- src/cli.ts | 22 +++++++++++++++++++--- 4 files changed, 45 insertions(+), 18 deletions(-) diff --git a/src/adapter/base.ts b/src/adapter/base.ts index b86661b..ecc7585 100644 --- a/src/adapter/base.ts +++ b/src/adapter/base.ts @@ -1,4 +1,4 @@ -import type { EmojiGroup, EmojiMetadata, EmojiShortcode, ShortcodeProvider } from "../types"; +import type { Emoji, EmojiGroup, EmojiMetadata, EmojiShortcode, ShortcodeProvider } from "../types"; import consola from "consola"; import { red, yellow } from "farver/fast"; import { defineMojiAdapter, MojisNotImplemented } from "../adapter"; @@ -157,13 +157,23 @@ export default defineMojiAdapter({ throw new MojisNotImplemented("emojis"); } - const emojis = await this.emojis(ctx); + const { emojis } = await this.emojis(ctx); + + const flattenedEmojis = Object.values(emojis).reduce((acc, subgroup) => { + for (const hexcodes of Object.values(subgroup)) { + for (const [hexcode, emoji] of Object.entries(hexcodes)) { + acc[hexcode] = emoji; + } + } + + return acc; + }, {} as Record); if (providers.includes("github")) { const githubShortcodesFn = await import("../shortcode/github").then((m) => m.generateGitHubShortcodes); shortcodes.github = await githubShortcodesFn({ - emojis, + emojis: flattenedEmojis, force: ctx.force, version: ctx.emojiVersion, }); diff --git a/src/adapter/index.ts b/src/adapter/index.ts index c729e99..ec34801 100644 --- a/src/adapter/index.ts +++ b/src/adapter/index.ts @@ -52,7 +52,11 @@ export interface BaseAdapterContext { export type UnicodeNamesFn = (ctx: BaseAdapterContext) => Promise>; export type SequenceFn = (ctx: BaseAdapterContext) => Promise<{ zwj: EmojiSequence[]; sequences: EmojiSequence[] }>; -export type EmojiFn = (ctx: BaseAdapterContext) => Promise>; +export type EmojiFn = (ctx: BaseAdapterContext) => Promise<{ + emojiData: Record; + // group: subgroup: hexcode: emoji + emojis: Record>>; +}>; export type EmojiVariationFn = (ctx: BaseAdapterContext) => Promise; export type ShortcodeFn = (ctx: BaseAdapterContext & { providers: ShortcodeProvider[]; diff --git a/src/adapter/v16.ts b/src/adapter/v16.ts index 87ef446..0e15a78 100644 --- a/src/adapter/v16.ts +++ b/src/adapter/v16.ts @@ -71,11 +71,11 @@ export default defineMojiAdapter({ }, async emojis(ctx) { const unicodeNames = await this.unicodeNames!(ctx); - const { sequences, zwj } = await this.sequences!(ctx); - const metadata = await this.metadata!(ctx); - const variations = await this.variations!(ctx); + // const { sequences, zwj } = await this.sequences!(ctx); + // const metadata = await this.metadata!(ctx); + // const variations = await this.variations!(ctx); - const emojis: Record = {}; + const emojis: Record>> = {}; const emojiData = await fetchCache(`https://unicode.org/Public/${ctx.emojiVersion}.0/ucd/emoji/emoji-data.txt`, { cacheKey: `v${ctx.emojiVersion}/emoji-data.json`, @@ -139,13 +139,10 @@ export default defineMojiAdapter({ bypassCache: ctx.force, }); - // join names, metadata, variations, sequences, zwj - - for (const [hex, data] of Object.entries(emojiData)) { - - } - - return {}; + return { + emojiData, + emojis, + }; }, variations: async (ctx) => { return fetchCache(`https://unicode.org/Public/${ctx.emojiVersion}.0/ucd/emoji/emoji-variation-sequences.txt`, { diff --git a/src/cli.ts b/src/cli.ts index e927f05..b6419d4 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -139,15 +139,31 @@ cli.command( throw new MojisNotImplemented("emojis"); } - const emojis = await adapter.emojis({ emojiVersion: version, force, unicodeVersion: getUnicodeVersionByEmojiVersion(version)! }); + const { emojiData, emojis } = await adapter.emojis({ emojiVersion: version, force, unicodeVersion: getUnicodeVersionByEmojiVersion(version)! }); await fs.ensureDir(`./data/v${version}`); await fs.writeFile( - `./data/v${version}/emojis.json`, - JSON.stringify(emojis, null, 2), + `./data/v${version}/emoji-data.json`, + JSON.stringify(emojiData, null, 2), "utf-8", ); + + for (const [group, subgroup] of Object.entries(emojis)) { + await fs.ensureDir(`./data/v${version}/emojis/${group}`); + + for (const hexcodes of Object.values(subgroup)) { + await fs.ensureDir(`./data/v${version}/emojis/${group}/${subgroup}`); + + for (const [hexcode, emoji] of Object.entries(hexcodes)) { + await fs.writeFile( + `./data/v${version}/emojis/${group}/${subgroup}/${hexcode}.json`, + JSON.stringify(emoji, null, 2), + "utf-8", + ); + } + } + } } if (isGeneratorEnabled("shortcodes")) { From 49bf2cf25b7d5944ee84521374e364fca4431727 Mon Sep 17 00:00:00 2001 From: Lucas Date: Sun, 16 Feb 2025 10:25:32 +0100 Subject: [PATCH 14/22] chore: dump --- src/adapter/base.ts | 2 +- src/adapter/v13.ts | 187 +++++++++++++++++++++++++++++++++++++++++++- src/adapter/v14.ts | 187 +++++++++++++++++++++++++++++++++++++++++++- src/adapter/v15.ts | 185 +++++++++++++++++++++++++++++++++++++++++++ src/adapter/v16.ts | 2 +- 5 files changed, 559 insertions(+), 4 deletions(-) diff --git a/src/adapter/base.ts b/src/adapter/base.ts index ecc7585..10e3350 100644 --- a/src/adapter/base.ts +++ b/src/adapter/base.ts @@ -119,7 +119,7 @@ export default defineMojiAdapter({ emojis: notImplemented("emojis"), variations: notImplemented("variations"), unicodeNames: async (ctx) => { - return fetchCache(`https://unicode.org/Public/${ctx.emojiVersion}.0/ucd/UnicodeData.txt`, { + return fetchCache(`https://unicode.org/Public/${ctx.emojiVersion === "13.1" ? "13.0" : ctx.emojiVersion}.0/ucd/UnicodeData.txt`, { cacheKey: `v${ctx.emojiVersion}/unicode-names.json`, parser(data) { const lines = data.split("\n"); diff --git a/src/adapter/v13.ts b/src/adapter/v13.ts index 1715acc..dd0b8a0 100644 --- a/src/adapter/v13.ts +++ b/src/adapter/v13.ts @@ -1,8 +1,193 @@ -import { defineMojiAdapter } from "."; +import type { Emoji, EmojiData, EmojiSequence, EmojiVariation, Property } from "../types"; +import { defineMojiAdapter } from "../adapter"; +import { FEMALE_SIGN, MALE_SIGN } from "../constants"; +import { extractEmojiVersion, extractUnicodeVersion } from "../utils"; +import { fetchCache } from "../utils/cache"; +import { expandHexRange } from "../utils/hexcode"; export default defineMojiAdapter({ name: "v13", description: "adapter for version 13 & 13.1", range: ">=13.0.0 <14.0.0", extend: "base", + sequences: async (ctx) => { + const [sequences, zwj] = await Promise.all([ + { + cacheKey: `v${ctx.emojiVersion}/sequences.json`, + url: `https://unicode.org/Public/emoji/${ctx.emojiVersion}/emoji-sequences.txt`, + }, + { + cacheKey: `v${ctx.emojiVersion}/zwj-sequences.json`, + url: `https://unicode.org/Public/emoji/${ctx.emojiVersion}/emoji-zwj-sequences.txt`, + }, + ].map(async ({ cacheKey, url }) => { + return await fetchCache(url, { + cacheKey, + parser(data) { + const lines = data.split("\n"); + + const sequences: EmojiSequence[] = []; + + for (let line of lines) { + // skip empty line & comments + if (line.trim() === "" || line.startsWith("#")) { + continue; + } + + // remove line comment + const commentIndex = line.indexOf("#"); + if (commentIndex !== -1) { + line = line.slice(0, commentIndex).trim(); + } + + const [hex, property, description] = line.split(";").map((col) => col.trim()).slice(0, 4); + + if (hex == null || property == null || description == null) { + throw new Error(`invalid line: ${line}`); + } + + const expandedHex = expandHexRange(hex); + + for (const hex of expandedHex) { + sequences.push({ + hex: hex.replace(/\s+/g, "-"), + property, + description, + gender: hex.includes(FEMALE_SIGN) ? "female" : hex.includes(MALE_SIGN) ? "male" : null, + }); + } + } + + return sequences; + }, + bypassCache: ctx.force, + }); + })); + + return { + sequences: sequences || [], + zwj: zwj || [], + }; + }, + async emojis(ctx) { + const unicodeNames = await this.unicodeNames!(ctx); + // const { sequences, zwj } = await this.sequences!(ctx); + // const metadata = await this.metadata!(ctx); + // const variations = await this.variations!(ctx); + + const emojis: Record>> = {}; + + const emojiData = await fetchCache(`https://unicode.org/Public/${ctx.emojiVersion === "13.1" ? "13.0" : ctx.emojiVersion}.0/ucd/emoji/emoji-data.txt`, { + cacheKey: `v${ctx.emojiVersion}/emoji-data.json`, + parser(data) { + const lines = data.split("\n"); + + const emojiData: Record = {}; + + for (const line of lines) { + // skip empty line & comments + if (line.trim() === "" || line.startsWith("#")) { + continue; + } + + const lineCommentIndex = line.indexOf("#"); + const lineComment = lineCommentIndex !== -1 ? line.slice(lineCommentIndex + 1).trim() : ""; + + let [hex, property] = line.split(";").map((col) => col.trim()).slice(0, 4); + + if (hex == null || property == null) { + throw new Error(`invalid line: ${line}`); + } + + // remove line comment from property + const propertyCommentIndex = property.indexOf("#"); + if (propertyCommentIndex !== -1) { + property = property.slice(0, propertyCommentIndex).trim(); + } + + if (property === "Extended_Pictographic") { + continue; + } + + const expandedHex = expandHexRange(hex); + const emojiVersion = extractEmojiVersion(lineComment); + + const emoji: EmojiData = { + description: lineComment, + hexcode: "", + gender: null, + properties: [(property as Property) || "Emoji"], + unicodeVersion: extractUnicodeVersion(emojiVersion, ctx.unicodeVersion), + emojiVersion, + name: unicodeNames[hex] || "", + }; + + for (const hex of expandedHex) { + if (emojiData[hex] != null) { + emojiData[hex].properties = [...new Set([...emojiData[hex].properties, ...emoji.properties])]; + } else { + emojiData[hex] = { + ...emoji, + hexcode: hex.replace(/\s+/g, "-"), + }; + } + } + } + + return emojiData; + }, + bypassCache: ctx.force, + }); + + return { + emojiData, + emojis, + }; + }, + variations: async (ctx) => { + return fetchCache(`https://unicode.org/Public/${ctx.emojiVersion === "13.1" ? "13.0" : ctx.emojiVersion}.0/ucd/emoji/emoji-variation-sequences.txt`, { + cacheKey: `v${ctx.emojiVersion}/variations.json`, + parser(data) { + const lines = data.split("\n"); + + const variations: EmojiVariation[] = []; + + for (let line of lines) { + // skip empty line & comments + if (line.trim() === "" || line.startsWith("#")) { + continue; + } + + // remove line comment + const commentIndex = line.indexOf("#"); + if (commentIndex !== -1) { + line = line.slice(0, commentIndex).trim(); + } + + const [hex, style] = line.split(";").map((col) => col.trim()).slice(0, 4); + + if (hex == null || style == null) { + throw new Error(`invalid line: ${line}`); + } + + const hexcode = hex.replace(/\s+/g, "-"); + + const type = style.replace("style", "").trim(); + + if (type !== "text" && type !== "emoji") { + throw new Error(`invalid style: ${style}`); + } + + variations.push({ + emoji: type === "emoji" ? hexcode : null, + text: type === "text" ? hexcode : null, + property: ["Emoji"], + }); + } + + return variations; + }, + bypassCache: ctx.force, + }); + }, }); diff --git a/src/adapter/v14.ts b/src/adapter/v14.ts index b91a58b..6ff55fd 100644 --- a/src/adapter/v14.ts +++ b/src/adapter/v14.ts @@ -1,8 +1,193 @@ -import { defineMojiAdapter } from "."; +import type { Emoji, EmojiData, EmojiSequence, EmojiVariation, Property } from "../types"; +import { defineMojiAdapter } from "../adapter"; +import { FEMALE_SIGN, MALE_SIGN } from "../constants"; +import { extractEmojiVersion, extractUnicodeVersion } from "../utils"; +import { fetchCache } from "../utils/cache"; +import { expandHexRange } from "../utils/hexcode"; export default defineMojiAdapter({ name: "v14", description: "adapter for version 14", range: ">=14.0.0 <15.0.0", extend: "base", + sequences: async (ctx) => { + const [sequences, zwj] = await Promise.all([ + { + cacheKey: `v${ctx.emojiVersion}/sequences.json`, + url: `https://unicode.org/Public/emoji/${ctx.emojiVersion}/emoji-sequences.txt`, + }, + { + cacheKey: `v${ctx.emojiVersion}/zwj-sequences.json`, + url: `https://unicode.org/Public/emoji/${ctx.emojiVersion}/emoji-zwj-sequences.txt`, + }, + ].map(async ({ cacheKey, url }) => { + return await fetchCache(url, { + cacheKey, + parser(data) { + const lines = data.split("\n"); + + const sequences: EmojiSequence[] = []; + + for (let line of lines) { + // skip empty line & comments + if (line.trim() === "" || line.startsWith("#")) { + continue; + } + + // remove line comment + const commentIndex = line.indexOf("#"); + if (commentIndex !== -1) { + line = line.slice(0, commentIndex).trim(); + } + + const [hex, property, description] = line.split(";").map((col) => col.trim()).slice(0, 4); + + if (hex == null || property == null || description == null) { + throw new Error(`invalid line: ${line}`); + } + + const expandedHex = expandHexRange(hex); + + for (const hex of expandedHex) { + sequences.push({ + hex: hex.replace(/\s+/g, "-"), + property, + description, + gender: hex.includes(FEMALE_SIGN) ? "female" : hex.includes(MALE_SIGN) ? "male" : null, + }); + } + } + + return sequences; + }, + bypassCache: ctx.force, + }); + })); + + return { + sequences: sequences || [], + zwj: zwj || [], + }; + }, + async emojis(ctx) { + const unicodeNames = await this.unicodeNames!(ctx); + // const { sequences, zwj } = await this.sequences!(ctx); + // const metadata = await this.metadata!(ctx); + // const variations = await this.variations!(ctx); + + const emojis: Record>> = {}; + + const emojiData = await fetchCache(`https://unicode.org/Public/${ctx.emojiVersion}.0/ucd/emoji/emoji-data.txt`, { + cacheKey: `v${ctx.emojiVersion}/emoji-data.json`, + parser(data) { + const lines = data.split("\n"); + + const emojiData: Record = {}; + + for (const line of lines) { + // skip empty line & comments + if (line.trim() === "" || line.startsWith("#")) { + continue; + } + + const lineCommentIndex = line.indexOf("#"); + const lineComment = lineCommentIndex !== -1 ? line.slice(lineCommentIndex + 1).trim() : ""; + + let [hex, property] = line.split(";").map((col) => col.trim()).slice(0, 4); + + if (hex == null || property == null) { + throw new Error(`invalid line: ${line}`); + } + + // remove line comment from property + const propertyCommentIndex = property.indexOf("#"); + if (propertyCommentIndex !== -1) { + property = property.slice(0, propertyCommentIndex).trim(); + } + + if (property === "Extended_Pictographic") { + continue; + } + + const expandedHex = expandHexRange(hex); + const emojiVersion = extractEmojiVersion(lineComment); + + const emoji: EmojiData = { + description: lineComment, + hexcode: "", + gender: null, + properties: [(property as Property) || "Emoji"], + unicodeVersion: extractUnicodeVersion(emojiVersion, ctx.unicodeVersion), + emojiVersion, + name: unicodeNames[hex] || "", + }; + + for (const hex of expandedHex) { + if (emojiData[hex] != null) { + emojiData[hex].properties = [...new Set([...emojiData[hex].properties, ...emoji.properties])]; + } else { + emojiData[hex] = { + ...emoji, + hexcode: hex.replace(/\s+/g, "-"), + }; + } + } + } + + return emojiData; + }, + bypassCache: ctx.force, + }); + + return { + emojiData, + emojis, + }; + }, + variations: async (ctx) => { + return fetchCache(`https://unicode.org/Public/${ctx.emojiVersion}.0/ucd/emoji/emoji-variation-sequences.txt`, { + cacheKey: `v${ctx.emojiVersion}/variations.json`, + parser(data) { + const lines = data.split("\n"); + + const variations: EmojiVariation[] = []; + + for (let line of lines) { + // skip empty line & comments + if (line.trim() === "" || line.startsWith("#")) { + continue; + } + + // remove line comment + const commentIndex = line.indexOf("#"); + if (commentIndex !== -1) { + line = line.slice(0, commentIndex).trim(); + } + + const [hex, style] = line.split(";").map((col) => col.trim()).slice(0, 4); + + if (hex == null || style == null) { + throw new Error(`invalid line: ${line}`); + } + + const hexcode = hex.replace(/\s+/g, "-"); + + const type = style.replace("style", "").trim(); + + if (type !== "text" && type !== "emoji") { + throw new Error(`invalid style: ${style}`); + } + + variations.push({ + emoji: type === "emoji" ? hexcode : null, + text: type === "text" ? hexcode : null, + property: ["Emoji"], + }); + } + + return variations; + }, + bypassCache: ctx.force, + }); + }, }); diff --git a/src/adapter/v15.ts b/src/adapter/v15.ts index 9055ef0..3a8f879 100644 --- a/src/adapter/v15.ts +++ b/src/adapter/v15.ts @@ -1,8 +1,193 @@ +import type { Emoji, EmojiData, EmojiSequence, EmojiVariation, Property } from "../types"; import { defineMojiAdapter } from "../adapter"; +import { FEMALE_SIGN, MALE_SIGN } from "../constants"; +import { extractEmojiVersion, extractUnicodeVersion } from "../utils"; +import { fetchCache } from "../utils/cache"; +import { expandHexRange } from "../utils/hexcode"; export default defineMojiAdapter({ name: "v15", description: "adapter for version 15 & v15.1", range: ">=15.0.0 <16.0.0", extend: "base", + sequences: async (ctx) => { + const [sequences, zwj] = await Promise.all([ + { + cacheKey: `v${ctx.emojiVersion}/sequences.json`, + url: `https://unicode.org/Public/emoji/${ctx.emojiVersion}/emoji-sequences.txt`, + }, + { + cacheKey: `v${ctx.emojiVersion}/zwj-sequences.json`, + url: `https://unicode.org/Public/emoji/${ctx.emojiVersion}/emoji-zwj-sequences.txt`, + }, + ].map(async ({ cacheKey, url }) => { + return await fetchCache(url, { + cacheKey, + parser(data) { + const lines = data.split("\n"); + + const sequences: EmojiSequence[] = []; + + for (let line of lines) { + // skip empty line & comments + if (line.trim() === "" || line.startsWith("#")) { + continue; + } + + // remove line comment + const commentIndex = line.indexOf("#"); + if (commentIndex !== -1) { + line = line.slice(0, commentIndex).trim(); + } + + const [hex, property, description] = line.split(";").map((col) => col.trim()).slice(0, 4); + + if (hex == null || property == null || description == null) { + throw new Error(`invalid line: ${line}`); + } + + const expandedHex = expandHexRange(hex); + + for (const hex of expandedHex) { + sequences.push({ + hex: hex.replace(/\s+/g, "-"), + property, + description, + gender: hex.includes(FEMALE_SIGN) ? "female" : hex.includes(MALE_SIGN) ? "male" : null, + }); + } + } + + return sequences; + }, + bypassCache: ctx.force, + }); + })); + + return { + sequences: sequences || [], + zwj: zwj || [], + }; + }, + async emojis(ctx) { + const unicodeNames = await this.unicodeNames!(ctx); + // const { sequences, zwj } = await this.sequences!(ctx); + // const metadata = await this.metadata!(ctx); + // const variations = await this.variations!(ctx); + + const emojis: Record>> = {}; + + const emojiData = await fetchCache(`https://unicode.org/Public/${ctx.emojiVersion}.0/ucd/emoji/emoji-data.txt`, { + cacheKey: `v${ctx.emojiVersion}/emoji-data.json`, + parser(data) { + const lines = data.split("\n"); + + const emojiData: Record = {}; + + for (const line of lines) { + // skip empty line & comments + if (line.trim() === "" || line.startsWith("#")) { + continue; + } + + const lineCommentIndex = line.indexOf("#"); + const lineComment = lineCommentIndex !== -1 ? line.slice(lineCommentIndex + 1).trim() : ""; + + let [hex, property] = line.split(";").map((col) => col.trim()).slice(0, 4); + + if (hex == null || property == null) { + throw new Error(`invalid line: ${line}`); + } + + // remove line comment from property + const propertyCommentIndex = property.indexOf("#"); + if (propertyCommentIndex !== -1) { + property = property.slice(0, propertyCommentIndex).trim(); + } + + if (property === "Extended_Pictographic") { + continue; + } + + const expandedHex = expandHexRange(hex); + const emojiVersion = extractEmojiVersion(lineComment); + + const emoji: EmojiData = { + description: lineComment, + hexcode: "", + gender: null, + properties: [(property as Property) || "Emoji"], + unicodeVersion: extractUnicodeVersion(emojiVersion, ctx.unicodeVersion), + emojiVersion, + name: unicodeNames[hex] || "", + }; + + for (const hex of expandedHex) { + if (emojiData[hex] != null) { + emojiData[hex].properties = [...new Set([...emojiData[hex].properties, ...emoji.properties])]; + } else { + emojiData[hex] = { + ...emoji, + hexcode: hex.replace(/\s+/g, "-"), + }; + } + } + } + + return emojiData; + }, + bypassCache: ctx.force, + }); + + return { + emojiData, + emojis, + }; + }, + variations: async (ctx) => { + return fetchCache(`https://unicode.org/Public/${ctx.emojiVersion}.0/ucd/emoji/emoji-variation-sequences.txt`, { + cacheKey: `v${ctx.emojiVersion}/variations.json`, + parser(data) { + const lines = data.split("\n"); + + const variations: EmojiVariation[] = []; + + for (let line of lines) { + // skip empty line & comments + if (line.trim() === "" || line.startsWith("#")) { + continue; + } + + // remove line comment + const commentIndex = line.indexOf("#"); + if (commentIndex !== -1) { + line = line.slice(0, commentIndex).trim(); + } + + const [hex, style] = line.split(";").map((col) => col.trim()).slice(0, 4); + + if (hex == null || style == null) { + throw new Error(`invalid line: ${line}`); + } + + const hexcode = hex.replace(/\s+/g, "-"); + + const type = style.replace("style", "").trim(); + + if (type !== "text" && type !== "emoji") { + throw new Error(`invalid style: ${style}`); + } + + variations.push({ + emoji: type === "emoji" ? hexcode : null, + text: type === "text" ? hexcode : null, + property: ["Emoji"], + }); + } + + return variations; + }, + bypassCache: ctx.force, + }); + }, }); diff --git a/src/adapter/v16.ts b/src/adapter/v16.ts index 0e15a78..5c67bb4 100644 --- a/src/adapter/v16.ts +++ b/src/adapter/v16.ts @@ -1,4 +1,4 @@ -import type { Emoji, EmojiData, EmojiSequence, EmojiShortcode, EmojiVariation, Property, ShortcodeProvider } from "../types"; +import type { Emoji, EmojiData, EmojiSequence, EmojiVariation, Property } from "../types"; import { defineMojiAdapter } from "../adapter"; import { FEMALE_SIGN, MALE_SIGN } from "../constants"; import { extractEmojiVersion, extractUnicodeVersion } from "../utils"; From 3ce15b567101970f399ebadaff2fafc94a8cb749 Mon Sep 17 00:00:00 2001 From: Lucas Date: Sun, 16 Feb 2025 12:07:35 +0100 Subject: [PATCH 15/22] feat: update emoji version handling to correctly map unicode versions and simplify sorting logic --- src/utils.ts | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/src/utils.ts b/src/utils.ts index ac420bc..cb05fb7 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -118,23 +118,40 @@ export async function getAllEmojiVersions(): Promise { // if it does, we will update the emoji version. const existing = versions.find((v) => v.unicode_version === version); + let unicode_version = null; + + // the emoji version 13.1 is using the unicode + // 13.0, since it was never released. + if (match[1] === "13.1") { + unicode_version = "13.0.0"; + } + + if (match[1] === "5.0") { + unicode_version = "10.0.0"; + } + + if (match[1] === "4.0" || match[1] === "3.0") { + unicode_version = "9.0.0"; + } + + if (match[1] === "2.0" || match[1] === "1.0") { + unicode_version = "8.0.0"; + } + if (existing) { + existing.unicode_version = unicode_version || existing.unicode_version; existing.emoji_version = match[1]; continue; } versions.push({ emoji_version: match[1], - unicode_version: null, + unicode_version, draft: version === draft, }); } - return versions.sort((a, b) => { - const versionA = a.unicode_version ?? `${a.emoji_version}.0`; - const versionB = b.unicode_version ?? `${b.emoji_version}.0`; - return semver.compare(versionB, versionA); - }); + return versions.sort((a, b) => semver.compare(`${b.emoji_version}.0`, `${a.emoji_version}.0`)); } /** @@ -166,7 +183,7 @@ export async function isEmojiVersionValid(version: string): Promise { // from v1 to v5, there was only major releases. So no v1.1, v1.2, etc. // only, v1.0, v2.0, v3.0, v4.0, v5.0. // if version has any minor or patch, it is invalid. - if (semver.minor(version) !== 0 || semver.patch(version) !== 0) { + if (semver.major(version) <= 5 && (semver.minor(version) !== 0 || semver.patch(version) !== 0)) { return false; } From fdf063f0dd5877b1b85f95b93f9a9e05e5ac3980 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20N=C3=B8rg=C3=A5rd?= Date: Thu, 20 Feb 2025 18:56:54 +0100 Subject: [PATCH 16/22] chore: move lockfile out of utils --- src/cli.ts | 2 +- src/{utils => }/lockfile.ts | 0 src/utils.ts | 2 +- test/utils/lockfile.test.ts | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename src/{utils => }/lockfile.ts (100%) diff --git a/src/cli.ts b/src/cli.ts index b6419d4..df6bfc1 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -11,7 +11,7 @@ import { resolveAdapter } from "./adapters"; import { SUPPORTED_EMOJI_VERSIONS } from "./constants"; import { SHORTCODE_PROVIDERS_SCHEMA } from "./schemas"; import { getAllEmojiVersions, getUnicodeVersionByEmojiVersion } from "./utils"; -import { readLockfile, writeLockfile } from "./utils/lockfile"; +import { readLockfile, writeLockfile } from "./lockfile"; const cli = yargs(process.argv.slice(2)) .scriptName("mojis") diff --git a/src/utils/lockfile.ts b/src/lockfile.ts similarity index 100% rename from src/utils/lockfile.ts rename to src/lockfile.ts diff --git a/src/utils.ts b/src/utils.ts index cb05fb7..fe01ee3 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -1,4 +1,4 @@ -import type { EmojiVersion } from "./utils/lockfile"; +import type { EmojiVersion } from "./lockfile"; import consola from "consola"; import semver from "semver"; import { NO_EMOJI_VERSIONS } from "./constants"; diff --git a/test/utils/lockfile.test.ts b/test/utils/lockfile.test.ts index 0c84b12..79d3f63 100644 --- a/test/utils/lockfile.test.ts +++ b/test/utils/lockfile.test.ts @@ -1,7 +1,7 @@ import fs from "fs-extra"; import { describe, expect, it } from "vitest"; import { testdir } from "vitest-testdirs"; -import { type EmojiLockfile, hasLockfile, readLockfile, writeLockfile } from "../../src/utils/lockfile"; +import { type EmojiLockfile, hasLockfile, readLockfile, writeLockfile } from "../../src/lockfile"; describe("hasLockfile", () => { it("should return true when lockfile exists", async () => { From 5e9706e2b65828c1466ee3ed294e479f19cd7d0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20N=C3=B8rg=C3=A5rd?= Date: Thu, 20 Feb 2025 18:57:13 +0100 Subject: [PATCH 17/22] chore: update test path --- test/{utils => }/lockfile.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename test/{utils => }/lockfile.test.ts (98%) diff --git a/test/utils/lockfile.test.ts b/test/lockfile.test.ts similarity index 98% rename from test/utils/lockfile.test.ts rename to test/lockfile.test.ts index 79d3f63..242ad62 100644 --- a/test/utils/lockfile.test.ts +++ b/test/lockfile.test.ts @@ -1,7 +1,7 @@ import fs from "fs-extra"; import { describe, expect, it } from "vitest"; import { testdir } from "vitest-testdirs"; -import { type EmojiLockfile, hasLockfile, readLockfile, writeLockfile } from "../../src/lockfile"; +import { type EmojiLockfile, hasLockfile, readLockfile, writeLockfile } from "../src/lockfile"; describe("hasLockfile", () => { it("should return true when lockfile exists", async () => { From 7c5b445fba6c8c0b76bb629ff5e870c8b590dad0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20N=C3=B8rg=C3=A5rd?= Date: Thu, 20 Feb 2025 20:04:39 +0100 Subject: [PATCH 18/22] feat: add README files for Unicode Emoji and Character Database draft versions --- src/utils.ts | 193 +----------------- src/versions.test.ts | 124 +++++++++++ src/versions.ts | 171 ++++++++++++++++ .../extract-version/emoji/README-invalid.txt | 21 ++ .../extract-version/emoji/README-valid.txt | 21 ++ .../extract-version/root/README-invalid.txt | 24 +++ .../extract-version/root/README-valid.txt | 24 +++ test/utils.test.ts | 22 +- 8 files changed, 393 insertions(+), 207 deletions(-) create mode 100644 src/versions.test.ts create mode 100644 src/versions.ts create mode 100644 test/fixtures/extract-version/emoji/README-invalid.txt create mode 100644 test/fixtures/extract-version/emoji/README-valid.txt create mode 100644 test/fixtures/extract-version/root/README-invalid.txt create mode 100644 test/fixtures/extract-version/root/README-valid.txt diff --git a/src/utils.ts b/src/utils.ts index fe01ee3..01f2167 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -2,6 +2,7 @@ import type { EmojiVersion } from "./lockfile"; import consola from "consola"; import semver from "semver"; import { NO_EMOJI_VERSIONS } from "./constants"; +import { getCurrentDraftVersion } from "./versions"; /** * Converts a string to a URL-friendly slug. @@ -76,6 +77,10 @@ export async function getAllEmojiVersions(): Promise { const draft = await getCurrentDraftVersion(); + if (draft == null) { + throw new Error("failed to fetch draft version"); + } + const versions: EmojiVersion[] = []; for (const match of rootHtml.matchAll(versionRegex)) { @@ -94,7 +99,7 @@ export async function getAllEmojiVersions(): Promise { versions.push({ emoji_version: null, unicode_version: version, - draft: version === draft, + draft: version === draft.unicode_version || version === draft.emoji_version, }); } @@ -147,7 +152,7 @@ export async function getAllEmojiVersions(): Promise { versions.push({ emoji_version: match[1], unicode_version, - draft: version === draft, + draft: version === draft.unicode_version || version === draft.emoji_version, }); } @@ -189,187 +194,3 @@ export async function isEmojiVersionValid(version: string): Promise { return true; } - -/** - * Retrieves the current Unicode draft version by fetching and comparing root and emoji ReadMe files. - * - * This function fetches two ReadMe files from unicode.org: - * - The main draft ReadMe - * - The emoji draft ReadMe - * - * It then extracts and validates the version numbers from both files to ensure they match. - * The emoji version uses major.minor format while the root version uses major.minor.patch. - * - * @returns A Promise that resolves to the current draft version string, or null if not found - * @throws {Error} If either fetch fails - * @throws {Error} If version extraction fails - * @throws {Error} If versions between root and emoji drafts don't match - */ -export async function getCurrentDraftVersion(): Promise { - const [rootResult, emojiResult] = await Promise.allSettled([ - "https://unicode.org/Public/draft/ReadMe.txt", - "https://unicode.org/Public/draft/emoji/ReadMe.txt", - ].map(async (url) => { - const res = await fetch(url); - - if (!res.ok) { - throw new Error(`failed to fetch ${url}: ${res.statusText}`); - } - - return res.text(); - })); - - if (rootResult == null || emojiResult == null) { - throw new Error("failed to fetch draft readme or draft emoji readme"); - } - - if (rootResult.status === "rejected" || emojiResult.status === "rejected") { - consola.error({ - root: rootResult.status === "rejected" ? rootResult.reason : "ok", - emoji: emojiResult.status === "rejected" ? emojiResult.reason : "ok", - }); - - throw new Error("failed to fetch draft readme or draft emoji readme"); - } - - const draftText = rootResult.value; - const emojiText = emojiResult.value; - - const rootVersion = extractVersion(draftText); - const emojiVersion = extractVersion(emojiText); - - if (rootVersion == null || emojiVersion == null) { - throw new Error("failed to extract draft version"); - } - - // the emoji version is only using major.minor format. - // so, we will need to add the last 0 to the version. - - // if they don't match the major and minor version, we will throw an error. - if (semver.major(rootVersion) !== semver.major(`${emojiVersion}.0`) || semver.minor(rootVersion) !== semver.minor(`${emojiVersion}.0`)) { - throw new Error("draft versions do not match"); - } - - return rootVersion; -} - -/** - * Extracts the Unicode version number from a given text string. - * - * @param {string} text - The text to extract the version number from - * @returns {string | null} The extracted version number as a string, or null if no version number is found - * - * @example - * ```ts - * extractVersion("Version 15.0.0 of the Unicode Standard") // Returns "15.0.0" - * extractVersion("Unicode15.1") // Returns "15.1" - * extractVersion("No version here") // Returns null - * ``` - */ -export function extractVersion(text: string): string | null { - const patterns = [ - /Version (\d+\.\d+(?:\.\d+)?) of the Unicode Standard/, // Most explicit - /Unicode(\d+\.\d+(?:\.\d+)?)/, // From URLs - /Version (\d+\.\d+)(?!\.\d)/, // Bare major.minor format - /Unicode Emoji, Version (\d+\.\d+(?:\.\d+)?)/, // Emoji-specific version - ]; - - for (const pattern of patterns) { - const match = text.match(pattern); - - if (match == null || match[1] == null) continue; - - return match[1]; - } - - return null; -} - -/** - * Extracts the emoji version from a comment string. - * The version should be in the format "E{major}.{minor}" (e.g. "E14.0"). - * - * @param {string} comment - The comment string to extract the version from - * @returns {string | null} The parsed version number, or null if no valid version was found - * - * @example - * ```ts - * extractEmojiVersion("E14.0") // returns "14.0" - * extractEmojiVersion("Something else") // returns null - * ``` - */ -export function extractEmojiVersion(comment: string): string | null { - const version = comment.match(/E(\d+\.\d)/); - - if (version != null && version[1] != null) { - return version[1].trim(); - } - - return null; -} - -// https://unicode.org/reports/tr51/#EmojiVersions -export function extractUnicodeVersion(emojiVersion: string | null, unicodeVersion?: string): string | null { - const coercedEmojiVersion = semver.coerce(emojiVersion); - const coercedUnicodeVersion = semver.coerce(unicodeVersion); - - if (coercedEmojiVersion == null || coercedUnicodeVersion == null) { - return null; - } - - // v11+ aligned emoji and unicode specs (except for minor versions) - if (semver.gte(coercedEmojiVersion, "11.0.0")) { - // if the unicode version is not provided, we will return the emoji version. - if (unicodeVersion == null) { - return emojiVersion; - } - - // return the smallest version between the emoji and unicode version. - if (semver.lt(coercedEmojiVersion, coercedUnicodeVersion)) { - return emojiVersion; - } - - return unicodeVersion; - } - - switch (emojiVersion) { - case "0.7": - return "7.0"; - case "1.0": - case "2.0": - return "8.0"; - case "3.0": - case "4.0": - return "9.0"; - case "5.0": - return "10.0"; - default: - // v6 is the first unicode spec emojis appeared in - return "6.0"; - } -} - -export function getUnicodeVersionByEmojiVersion(emojiVersion: string): string { - const coercedEmojiVersion = semver.coerce(emojiVersion); - - if (coercedEmojiVersion == null) { - throw new Error(`invalid emoji version: ${emojiVersion}`); - } - - if (semver.gte(coercedEmojiVersion, "11.0.0")) { - return emojiVersion; - } - - switch (emojiVersion) { - case "1.0": - case "2.0": - return "8.0"; - case "3.0": - case "4.0": - return "9.0"; - case "5.0": - return "10.0"; - default: - throw new Error(`invalid emoji version: ${emojiVersion}`); - } -} diff --git a/src/versions.test.ts b/src/versions.test.ts new file mode 100644 index 0000000..d0f3184 --- /dev/null +++ b/src/versions.test.ts @@ -0,0 +1,124 @@ +import fs from "fs-extra"; +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { extractEmojiVersion, extractVersion, getCurrentDraftVersion } from "./versions"; + +describe("getCurrentDraftVersion", () => { + beforeEach(() => { + vi.restoreAllMocks(); + }); + + it("returns draft versions when fetches succeed and versions match", async () => { + globalThis.fetch = vi.fn() + .mockImplementationOnce(() => Promise.resolve({ + ok: true, + text: () => Promise.resolve("Version 15.1.0 of the Unicode Standard"), + })) + .mockImplementationOnce(() => Promise.resolve({ + ok: true, + text: () => Promise.resolve("Unicode Emoji, Version 15.1"), + })); + + const result = await getCurrentDraftVersion(); + expect(result).toEqual({ + emoji_version: "15.1", + unicode_version: "15.1.0", + }); + }); + + it("throws when fetch fails", async () => { + globalThis.fetch = vi.fn().mockImplementation(() => Promise.resolve({ + ok: false, + statusText: "Not Found", + })); + + await expect(getCurrentDraftVersion()).rejects.toThrow("failed to fetch"); + }); + + it("throws when versions do not match", async () => { + globalThis.fetch = vi.fn() + .mockImplementationOnce(() => Promise.resolve({ + ok: true, + text: () => Promise.resolve("Version 15.1.0 of the Unicode Standard"), + })) + .mockImplementationOnce(() => Promise.resolve({ + ok: true, + text: () => Promise.resolve("Unicode Emoji, Version 15.0"), + })); + + await expect(getCurrentDraftVersion()).rejects.toThrow("draft versions do not match"); + }); + + it("throws when version extraction fails", async () => { + globalThis.fetch = vi.fn() + .mockImplementationOnce(() => Promise.resolve({ + ok: true, + text: () => Promise.resolve("Invalid version format"), + })) + .mockImplementationOnce(() => Promise.resolve({ + ok: true, + text: () => Promise.resolve("Invalid version format"), + })); + + await expect(getCurrentDraftVersion()).rejects.toThrow("failed to extract draft version"); + }); +}); + +describe("extract version", () => { + it.each([ + { input: "Version 15.1.0 of the Unicode Standard", expected: "15.1.0" }, + { input: "Version 15.1 of the Unicode Standard", expected: "15.1" }, + { input: "Version 15.0 of the Unicode Standard", expected: "15.0" }, + { input: "Version 5.0 of the Unicode Standard", expected: "5.0" }, + ])("should extract valid version numbers (input: $input, expected: $expected)", ({ input, expected }) => { + expect(extractVersion(input)).toBe(expected); + }); + + it.each([ + { input: "Invalid version format", expected: null }, + { input: "Version 15.1.0", expected: null }, + { input: "Version 15", expected: null }, + { input: "", expected: null }, + ])("should return null for invalid formats (input: $input, expected: $expected)", ({ input, expected }) => { + expect(extractVersion(input)).toBe(expected); + }); + + describe.each([ + { name: "emoji draft readme", path: "emoji/README-valid.txt", version: "17.0" }, + { name: "invalid emoji draft readme", path: "emoji/README-invalid.txt", version: null }, + + { name: "draft readme", path: "root/README-valid.txt", version: "17.0.0" }, + { name: "invalid draft readme", path: "root/README-invalid.txt", version: null }, + + ])("extract version from $name", ({ path, version }) => { + it("should extract version from file path", () => { + const content = fs.readFileSync(`./test/fixtures/extract-version/${path}`, "utf-8"); + expect(extractVersion(content)).toBe(version); + }); + }); + + describe("extract emoji version", () => { + it.each([ + { input: "E14.0", expected: "14.0" }, + { input: "E15.1", expected: "15.1" }, + { input: "E5.0", expected: "5.0" }, + ])("should extract valid emoji version numbers (input: $input, expected: $expected)", ({ input, expected }) => { + expect(extractEmojiVersion(input)).toBe(expected); + }); + + it.each([ + { input: "14.0", expected: null }, + { input: "Hello E14", expected: null }, + { input: "E14", expected: null }, + { input: "", expected: null }, + ])("should return null for invalid formats (input: $input, expected: $expected)", ({ input, expected }) => { + expect(extractEmojiVersion(input)).toBe(expected); + }); + + it.each([ + { input: " E14.0 ", expected: "14.0" }, + { input: "E 14.0", expected: null }, + ])("should handle whitespace (input: $input, expected: $expected)", ({ input, expected }) => { + expect(extractEmojiVersion(input)).toBe(expected); + }); + }); +}); diff --git a/src/versions.ts b/src/versions.ts new file mode 100644 index 0000000..5ed3afb --- /dev/null +++ b/src/versions.ts @@ -0,0 +1,171 @@ +import type { EmojiVersion } from "./lockfile"; +import consola from "consola"; +import semver from "semver"; +import { NO_EMOJI_VERSIONS } from "./constants"; + +export interface DraftVersion { + emoji_version: string; + unicode_version: string; +} + +/** + * Retrieves the current Unicode draft version by fetching and comparing root and emoji ReadMe files. + * + * This function fetches two ReadMe files from unicode.org: + * - The main draft ReadMe + * - The emoji draft ReadMe + * + * It then extracts and validates the version numbers from both files to ensure they match. + * The emoji version uses major.minor format while the root version uses major.minor.patch. + * + * @returns A Promise that resolves to the current draft version string, or null if not found + * @throws {Error} If either fetch fails + * @throws {Error} If version extraction fails + * @throws {Error} If versions between root and emoji drafts don't match + */ +export async function getCurrentDraftVersion(): Promise { + const [rootResult, emojiResult] = await Promise.allSettled([ + "https://unicode.org/Public/draft/ReadMe.txt", + "https://unicode.org/Public/draft/emoji/ReadMe.txt", + ].map(async (url) => { + const res = await fetch(url); + + if (!res.ok) { + throw new Error(`failed to fetch ${url}: ${res.statusText}`); + } + + return res.text(); + })); + + if (rootResult == null || emojiResult == null) { + throw new Error("failed to fetch draft readme or draft emoji readme"); + } + + if (rootResult.status === "rejected" || emojiResult.status === "rejected") { + consola.error({ + root: rootResult.status === "rejected" ? rootResult.reason : "ok", + emoji: emojiResult.status === "rejected" ? emojiResult.reason : "ok", + }); + + throw new Error("failed to fetch draft readme or draft emoji readme"); + } + + const draftText = rootResult.value; + const emojiText = emojiResult.value; + + const rootVersion = extractVersion(draftText); + const emojiVersion = extractVersion(emojiText); + + if (rootVersion == null || emojiVersion == null) { + throw new Error("failed to extract draft version"); + } + + // the emoji version is only using major.minor format. + // so, we will need to add the last 0 to the version. + + // if they don't match the major and minor version, we will throw an error. + if (semver.major(rootVersion) !== semver.major(`${emojiVersion}.0`) || semver.minor(rootVersion) !== semver.minor(`${emojiVersion}.0`)) { + throw new Error("draft versions do not match"); + } + + return { + emoji_version: emojiVersion, + unicode_version: rootVersion, + }; +} + +/** + * Extracts the emoji version from a comment string. + * The version should be in the format "E{major}.{minor}" (e.g. "E14.0"). + * + * @param {string} comment - The comment string to extract the version from + * @returns {string | null} The parsed version number, or null if no valid version was found + * + * @example + * ```ts + * extractEmojiVersion("E14.0") // returns "14.0" + * extractEmojiVersion("Something else") // returns null + * ``` + */ +export function extractEmojiVersion(comment: string): string | null { + const version = comment.match(/E(\d+\.\d)/); + + if (version != null && version[1] != null) { + return version[1].trim(); + } + + return null; +} + +/** + * Extracts the Unicode version number from a given text string. + * + * @param {string} text - The text to extract the version number from + * @returns {string | null} The extracted version number as a string, or null if no version number is found + * + * @example + * ```ts + * extractVersion("Version 15.0.0 of the Unicode Standard") // Returns "15.0.0" + * extractVersion("Unicode15.1") // Returns "15.1" + * extractVersion("No version here") // Returns null + * ``` + */ +export function extractVersion(text: string): string | null { + const patterns = [ + /Version (\d+\.\d+(?:\.\d+)?) of the Unicode Standard/, // Most explicit + /Unicode(\d+\.\d+(?:\.\d+)?)/, // From URLs + /Version (\d+\.\d+)(?!\.\d)/, // Bare major.minor format + /Unicode Emoji, Version (\d+\.\d+(?:\.\d+)?)/, // Emoji-specific version + ]; + + for (const pattern of patterns) { + const match = text.match(pattern); + + if (match == null || match[1] == null) continue; + + return match[1]; + } + + return null; +} + +// https://unicode.org/reports/tr51/#EmojiVersions +export function extractUnicodeVersion(emojiVersion: string | null, unicodeVersion?: string): string | null { + const coercedEmojiVersion = semver.coerce(emojiVersion); + const coercedUnicodeVersion = semver.coerce(unicodeVersion); + + if (coercedEmojiVersion == null || coercedUnicodeVersion == null) { + return null; + } + + // v11+ aligned emoji and unicode specs (except for minor versions) + if (semver.gte(coercedEmojiVersion, "11.0.0")) { + // if the unicode version is not provided, we will return the emoji version. + if (unicodeVersion == null) { + return emojiVersion; + } + + // return the smallest version between the emoji and unicode version. + if (semver.lt(coercedEmojiVersion, coercedUnicodeVersion)) { + return emojiVersion; + } + + return unicodeVersion; + } + + switch (emojiVersion) { + case "0.7": + return "7.0"; + case "1.0": + case "2.0": + return "8.0"; + case "3.0": + case "4.0": + return "9.0"; + case "5.0": + return "10.0"; + default: + // v6 is the first unicode spec emojis appeared in + return "6.0"; + } +} diff --git a/test/fixtures/extract-version/emoji/README-invalid.txt b/test/fixtures/extract-version/emoji/README-invalid.txt new file mode 100644 index 0000000..8fa19c2 --- /dev/null +++ b/test/fixtures/extract-version/emoji/README-invalid.txt @@ -0,0 +1,21 @@ +# Unicode Emoji +# © 2025 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use and license, see https://www.unicode.org/terms_of_use.html + +This directory contains draft data files for Unicode Emoji, Version x.x + +Public/draft/emoji/ + + emoji-sequences.txt + emoji-zwj-sequences.txt + emoji-test.txt + +The following related files are found in the UCD for Version x.x + +Public/draft/ucd/emoji/ + + emoji-data.txt + emoji-variation-sequences.txt + +For documentation, see UTS #51 Unicode Emoji, Version x.x diff --git a/test/fixtures/extract-version/emoji/README-valid.txt b/test/fixtures/extract-version/emoji/README-valid.txt new file mode 100644 index 0000000..f591646 --- /dev/null +++ b/test/fixtures/extract-version/emoji/README-valid.txt @@ -0,0 +1,21 @@ +# Unicode Emoji +# © 2025 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use and license, see https://www.unicode.org/terms_of_use.html + +This directory contains draft data files for Unicode Emoji, Version 17.0 + +Public/draft/emoji/ + + emoji-sequences.txt + emoji-zwj-sequences.txt + emoji-test.txt + +The following related files are found in the UCD for Version 17.0 + +Public/draft/ucd/emoji/ + + emoji-data.txt + emoji-variation-sequences.txt + +For documentation, see UTS #51 Unicode Emoji, Version 17.0 diff --git a/test/fixtures/extract-version/root/README-invalid.txt b/test/fixtures/extract-version/root/README-invalid.txt new file mode 100644 index 0000000..dd1abaa --- /dev/null +++ b/test/fixtures/extract-version/root/README-invalid.txt @@ -0,0 +1,24 @@ +# Unicode Character Database +# Date: 2025-01-29 +# © 2025 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use and license, see https://www.unicode.org/terms_of_use.html +# +# For documentation, see the following: +# ucd/NamesList.html +# UAX #38, "Unicode Han Database (Unihan)" +# UAX #42, "Unicode Character Database in XML" +# UAX #44, "Unicode Character Database" +# UTS #51, "Unicode Emoji" +# UAX #57, "Unicode Egyptian Hieroglyph Database" +# +# The UAXes and UTS #51 can be accessed at https://www.unicode.org/versions/Unicodex.x.x/ + +This directory contains the draft data files +for Version x.x.x of the Unicode Standard. + +The "charts" subdirectory contains an archival set of +pdf code charts corresponding exactly to Version x.x.x. + +The other subdirectories contain the data files for the +Unicode Character Database and for the synchronized Unicode Technical Standards. diff --git a/test/fixtures/extract-version/root/README-valid.txt b/test/fixtures/extract-version/root/README-valid.txt new file mode 100644 index 0000000..f2f8dfa --- /dev/null +++ b/test/fixtures/extract-version/root/README-valid.txt @@ -0,0 +1,24 @@ +# Unicode Character Database +# Date: 2025-01-29 +# © 2025 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use and license, see https://www.unicode.org/terms_of_use.html +# +# For documentation, see the following: +# ucd/NamesList.html +# UAX #38, "Unicode Han Database (Unihan)" +# UAX #42, "Unicode Character Database in XML" +# UAX #44, "Unicode Character Database" +# UTS #51, "Unicode Emoji" +# UAX #57, "Unicode Egyptian Hieroglyph Database" +# +# The UAXes and UTS #51 can be accessed at https://www.unicode.org/versions/Unicode17.0.0/ + +This directory contains the draft data files +for Version 17.0.0 of the Unicode Standard. + +The "charts" subdirectory contains an archival set of +pdf code charts corresponding exactly to Version 17.0.0. + +The other subdirectories contain the data files for the +Unicode Character Database and for the synchronized Unicode Technical Standards. diff --git a/test/utils.test.ts b/test/utils.test.ts index f260a96..e3ac9b8 100644 --- a/test/utils.test.ts +++ b/test/utils.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from "vitest"; -import { extractEmojiVersion, slugify } from "../src/utils"; +import { slugify } from "../src/utils"; describe("slugify", () => { it("should convert string to slug format", () => { @@ -30,23 +30,3 @@ describe("slugify", () => { expect(slugify("HELLO WORLD")).toBe("hello-world"); }); }); - -describe("extractEmojiVersion", () => { - it("should extract valid emoji version numbers", () => { - expect(extractEmojiVersion("E14.0")).toBe("14.0"); - expect(extractEmojiVersion("E15.1")).toBe("15.1"); - expect(extractEmojiVersion("E5.0")).toBe("5.0"); - }); - - it("should return null for invalid formats", () => { - expect(extractEmojiVersion("14.0")).toBeNull(); - expect(extractEmojiVersion("Hello E14")).toBeNull(); - expect(extractEmojiVersion("E14")).toBeNull(); - expect(extractEmojiVersion("")).toBeNull(); - }); - - it("should handle whitespace", () => { - expect(extractEmojiVersion(" E14.0 ")).toBe("14.0"); - expect(extractEmojiVersion("E 14.0")).toBeNull(); - }); -}); From cae0e524d6ecb1fbfafd7443e78507cdf4606a05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20N=C3=B8rg=C3=A5rd?= Date: Thu, 20 Feb 2025 20:06:30 +0100 Subject: [PATCH 19/22] refactor: rename extractVersion to extractVersionFromReadme for clarity --- src/versions.test.ts | 8 ++++---- src/versions.ts | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/versions.test.ts b/src/versions.test.ts index d0f3184..79d9421 100644 --- a/src/versions.test.ts +++ b/src/versions.test.ts @@ -1,6 +1,6 @@ import fs from "fs-extra"; import { beforeEach, describe, expect, it, vi } from "vitest"; -import { extractEmojiVersion, extractVersion, getCurrentDraftVersion } from "./versions"; +import { extractEmojiVersion, extractVersionFromReadme, getCurrentDraftVersion } from "./versions"; describe("getCurrentDraftVersion", () => { beforeEach(() => { @@ -70,7 +70,7 @@ describe("extract version", () => { { input: "Version 15.0 of the Unicode Standard", expected: "15.0" }, { input: "Version 5.0 of the Unicode Standard", expected: "5.0" }, ])("should extract valid version numbers (input: $input, expected: $expected)", ({ input, expected }) => { - expect(extractVersion(input)).toBe(expected); + expect(extractVersionFromReadme(input)).toBe(expected); }); it.each([ @@ -79,7 +79,7 @@ describe("extract version", () => { { input: "Version 15", expected: null }, { input: "", expected: null }, ])("should return null for invalid formats (input: $input, expected: $expected)", ({ input, expected }) => { - expect(extractVersion(input)).toBe(expected); + expect(extractVersionFromReadme(input)).toBe(expected); }); describe.each([ @@ -92,7 +92,7 @@ describe("extract version", () => { ])("extract version from $name", ({ path, version }) => { it("should extract version from file path", () => { const content = fs.readFileSync(`./test/fixtures/extract-version/${path}`, "utf-8"); - expect(extractVersion(content)).toBe(version); + expect(extractVersionFromReadme(content)).toBe(version); }); }); diff --git a/src/versions.ts b/src/versions.ts index 5ed3afb..b5e7b21 100644 --- a/src/versions.ts +++ b/src/versions.ts @@ -53,8 +53,8 @@ export async function getCurrentDraftVersion(): Promise { const draftText = rootResult.value; const emojiText = emojiResult.value; - const rootVersion = extractVersion(draftText); - const emojiVersion = extractVersion(emojiText); + const rootVersion = extractVersionFromReadme(draftText); + const emojiVersion = extractVersionFromReadme(emojiText); if (rootVersion == null || emojiVersion == null) { throw new Error("failed to extract draft version"); @@ -105,12 +105,12 @@ export function extractEmojiVersion(comment: string): string | null { * * @example * ```ts - * extractVersion("Version 15.0.0 of the Unicode Standard") // Returns "15.0.0" - * extractVersion("Unicode15.1") // Returns "15.1" - * extractVersion("No version here") // Returns null + * extractVersionFromReadme("Version 15.0.0 of the Unicode Standard") // Returns "15.0.0" + * extractVersionFromReadme("Unicode15.1") // Returns "15.1" + * extractVersionFromReadme("No version here") // Returns null * ``` */ -export function extractVersion(text: string): string | null { +export function extractVersionFromReadme(text: string): string | null { const patterns = [ /Version (\d+\.\d+(?:\.\d+)?) of the Unicode Standard/, // Most explicit /Unicode(\d+\.\d+(?:\.\d+)?)/, // From URLs From d6453b568d8588310a7a7a8ab44bde4012e3a153 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20N=C3=B8rg=C3=A5rd?= Date: Thu, 20 Feb 2025 20:06:48 +0100 Subject: [PATCH 20/22] chore: lint --- src/cli.ts | 2 +- src/versions.ts | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/cli.ts b/src/cli.ts index df6bfc1..a1bc1b6 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -9,9 +9,9 @@ import pkg from "../package.json" with { type: "json" }; import { MojisNotImplemented } from "./adapter"; import { resolveAdapter } from "./adapters"; import { SUPPORTED_EMOJI_VERSIONS } from "./constants"; +import { readLockfile, writeLockfile } from "./lockfile"; import { SHORTCODE_PROVIDERS_SCHEMA } from "./schemas"; import { getAllEmojiVersions, getUnicodeVersionByEmojiVersion } from "./utils"; -import { readLockfile, writeLockfile } from "./lockfile"; const cli = yargs(process.argv.slice(2)) .scriptName("mojis") diff --git a/src/versions.ts b/src/versions.ts index b5e7b21..8a58752 100644 --- a/src/versions.ts +++ b/src/versions.ts @@ -1,7 +1,5 @@ -import type { EmojiVersion } from "./lockfile"; import consola from "consola"; import semver from "semver"; -import { NO_EMOJI_VERSIONS } from "./constants"; export interface DraftVersion { emoji_version: string; From 8730e217d19d56f297ae6b030666bc6ac58fc485 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20N=C3=B8rg=C3=A5rd?= Date: Thu, 20 Feb 2025 20:35:48 +0100 Subject: [PATCH 21/22] feat: add vitest-fetch-mock for enhanced testing and setup configuration --- package.json | 1 + pnpm-lock.yaml | 13 ++++ src/utils.ts | 131 -------------------------------- src/versions.ts | 133 +++++++++++++++++++++++++++++++++ test/__setup.ts | 6 ++ {src => test}/versions.test.ts | 62 +++++++-------- vitest.config.ts | 9 +++ 7 files changed, 187 insertions(+), 168 deletions(-) create mode 100644 test/__setup.ts rename {src => test}/versions.test.ts (71%) create mode 100644 vitest.config.ts diff --git a/package.json b/package.json index 643ff40..a657baf 100644 --- a/package.json +++ b/package.json @@ -54,6 +54,7 @@ "tsx": "^4.19.2", "typescript": "^5.7.3", "vitest": "^3.0.5", + "vitest-fetch-mock": "^0.4.3", "vitest-testdirs": "^2.1.1" }, "pnpm": { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 586cbe8..66b8afb 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -66,6 +66,9 @@ importers: vitest: specifier: ^3.0.5 version: 3.0.5(@types/debug@4.1.12)(@types/node@22.13.1)(msw@2.7.0(@types/node@22.13.1)(typescript@5.7.3))(tsx@4.19.2)(yaml@2.7.0) + vitest-fetch-mock: + specifier: ^0.4.3 + version: 0.4.3(vitest@3.0.5(@types/debug@4.1.12)(@types/node@22.13.1)(msw@2.7.0(@types/node@22.13.1)(typescript@5.7.3))(tsx@4.19.2)(yaml@2.7.0)) vitest-testdirs: specifier: ^2.1.1 version: 2.1.1(vitest@3.0.5(@types/debug@4.1.12)(@types/node@22.13.1)(msw@2.7.0(@types/node@22.13.1)(typescript@5.7.3))(tsx@4.19.2)(yaml@2.7.0)) @@ -2341,6 +2344,12 @@ packages: yaml: optional: true + vitest-fetch-mock@0.4.3: + resolution: {integrity: sha512-PhuEh+9HCsXFMRPUJilDL7yVDFufoxqk7ze+CNks64UGlfFXaJTn1bLABiNlEc0u25RERXQGj0Tm+M9i6UY9HQ==} + engines: {node: '>=18.0.0'} + peerDependencies: + vitest: '>=2.0.0' + vitest-testdirs@2.1.1: resolution: {integrity: sha512-1bVjra7vT07fFasVYmpWYBYvvGBogrvjBOBk3UVhCD4ESwhU+CcLBEnpo21jBDgM1KWvWQsg3HZgerMnnBDcsQ==} engines: {node: '>=20'} @@ -4809,6 +4818,10 @@ snapshots: tsx: 4.19.2 yaml: 2.7.0 + vitest-fetch-mock@0.4.3(vitest@3.0.5(@types/debug@4.1.12)(@types/node@22.13.1)(msw@2.7.0(@types/node@22.13.1)(typescript@5.7.3))(tsx@4.19.2)(yaml@2.7.0)): + dependencies: + vitest: 3.0.5(@types/debug@4.1.12)(@types/node@22.13.1)(msw@2.7.0(@types/node@22.13.1)(typescript@5.7.3))(tsx@4.19.2)(yaml@2.7.0) + vitest-testdirs@2.1.1(vitest@3.0.5(@types/debug@4.1.12)(@types/node@22.13.1)(msw@2.7.0(@types/node@22.13.1)(typescript@5.7.3))(tsx@4.19.2)(yaml@2.7.0)): dependencies: testdirs: 0.1.4 diff --git a/src/utils.ts b/src/utils.ts index 01f2167..4521bea 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -28,137 +28,6 @@ export function slugify(val: string): string { .replace(/^-+|-+$/g, ""); } -/** - * Retrieves all available emoji versions from Unicode.org. - * This function fetches both the root Unicode directory and the emoji-specific directory - * to compile a comprehensive list of valid emoji versions. - * - * The function performs the following steps: - * 1. Fetches content from Unicode.org's public directories - * 2. Extracts version numbers using regex - * 3. Validates each version - * 4. Normalizes version numbers to valid semver format - * - * @throws {Error} When either the root or emoji page fetch fails - * @returns {Promise} A promise that resolves to an array of emoji versions, - * sorted according to semver rules - */ -export async function getAllEmojiVersions(): Promise { - const [rootResult, emojiResult] = await Promise.allSettled([ - "https://unicode.org/Public/", - "https://unicode.org/Public/emoji/", - ].map(async (url) => { - const res = await fetch(url); - - if (!res.ok) { - throw new Error(`failed to fetch ${url}: ${res.statusText}`); - } - - return res.text(); - })); - - if (rootResult == null || emojiResult == null) { - throw new Error("failed to fetch root or emoji page"); - } - - if (rootResult.status === "rejected" || emojiResult.status === "rejected") { - consola.error({ - root: rootResult.status === "rejected" ? rootResult.reason : "ok", - emoji: emojiResult.status === "rejected" ? emojiResult.reason : "ok", - }); - - throw new Error("failed to fetch root or emoji page"); - } - - const rootHtml = rootResult.value; - const emojiHtml = emojiResult.value; - - const versionRegex = /href="(\d+\.\d+(?:\.\d+)?)\/?"/g; - - const draft = await getCurrentDraftVersion(); - - if (draft == null) { - throw new Error("failed to fetch draft version"); - } - - const versions: EmojiVersion[] = []; - - for (const match of rootHtml.matchAll(versionRegex)) { - if (match == null || match[1] == null) continue; - - const version = match[1]; - - if (!await isEmojiVersionValid(version)) { - continue; - } - - if (versions.some((v) => v.unicode_version === version)) { - continue; - } - - versions.push({ - emoji_version: null, - unicode_version: version, - draft: version === draft.unicode_version || version === draft.emoji_version, - }); - } - - for (const match of emojiHtml.matchAll(versionRegex)) { - if (match == null || match[1] == null) continue; - - let version = match[1]; - - // for the emoji page, the versions is not valid semver. - // so we will add the last 0 to the version. - // handle both 5.0 and 12.0 -> 5.0.0 and 12.0.0 - if (version.length === 3 || version.length === 4) { - version += ".0"; - } - - if (!await isEmojiVersionValid(version)) { - continue; - } - - // check if the unicode_version already exists. - // if it does, we will update the emoji version. - const existing = versions.find((v) => v.unicode_version === version); - - let unicode_version = null; - - // the emoji version 13.1 is using the unicode - // 13.0, since it was never released. - if (match[1] === "13.1") { - unicode_version = "13.0.0"; - } - - if (match[1] === "5.0") { - unicode_version = "10.0.0"; - } - - if (match[1] === "4.0" || match[1] === "3.0") { - unicode_version = "9.0.0"; - } - - if (match[1] === "2.0" || match[1] === "1.0") { - unicode_version = "8.0.0"; - } - - if (existing) { - existing.unicode_version = unicode_version || existing.unicode_version; - existing.emoji_version = match[1]; - continue; - } - - versions.push({ - emoji_version: match[1], - unicode_version, - draft: version === draft.unicode_version || version === draft.emoji_version, - }); - } - - return versions.sort((a, b) => semver.compare(`${b.emoji_version}.0`, `${a.emoji_version}.0`)); -} - /** * Checks if the given emoji version is valid according to Unicode Consortium standards. * diff --git a/src/versions.ts b/src/versions.ts index 8a58752..0051afb 100644 --- a/src/versions.ts +++ b/src/versions.ts @@ -1,5 +1,7 @@ +import type { EmojiVersion } from "./lockfile"; import consola from "consola"; import semver from "semver"; +import { isEmojiVersionValid } from "./utils"; export interface DraftVersion { emoji_version: string; @@ -167,3 +169,134 @@ export function extractUnicodeVersion(emojiVersion: string | null, unicodeVersio return "6.0"; } } + +/** + * Retrieves all available emoji versions from Unicode.org. + * This function fetches both the root Unicode directory and the emoji-specific directory + * to compile a comprehensive list of valid emoji versions. + * + * The function performs the following steps: + * 1. Fetches content from Unicode.org's public directories + * 2. Extracts version numbers using regex + * 3. Validates each version + * 4. Normalizes version numbers to valid semver format + * + * @throws {Error} When either the root or emoji page fetch fails + * @returns {Promise} A promise that resolves to an array of emoji versions, + * sorted according to semver rules + */ +export async function getAllEmojiVersions(): Promise { + const [rootResult, emojiResult] = await Promise.allSettled([ + "https://unicode.org/Public/", + "https://unicode.org/Public/emoji/", + ].map(async (url) => { + const res = await fetch(url); + + if (!res.ok) { + throw new Error(`failed to fetch ${url}: ${res.statusText}`); + } + + return res.text(); + })); + + if (rootResult == null || emojiResult == null) { + throw new Error("failed to fetch root or emoji page"); + } + + if (rootResult.status === "rejected" || emojiResult.status === "rejected") { + consola.error({ + root: rootResult.status === "rejected" ? rootResult.reason : "ok", + emoji: emojiResult.status === "rejected" ? emojiResult.reason : "ok", + }); + + throw new Error("failed to fetch root or emoji page"); + } + + const rootHtml = rootResult.value; + const emojiHtml = emojiResult.value; + + const versionRegex = /href="(\d+\.\d+(?:\.\d+)?)\/?"/g; + + const draft = await getCurrentDraftVersion(); + + if (draft == null) { + throw new Error("failed to fetch draft version"); + } + + const versions: EmojiVersion[] = []; + + for (const match of rootHtml.matchAll(versionRegex)) { + if (match == null || match[1] == null) continue; + + const version = match[1]; + + if (!await isEmojiVersionValid(version)) { + continue; + } + + if (versions.some((v) => v.unicode_version === version)) { + continue; + } + + versions.push({ + emoji_version: null, + unicode_version: version, + draft: version === draft.unicode_version || version === draft.emoji_version, + }); + } + + for (const match of emojiHtml.matchAll(versionRegex)) { + if (match == null || match[1] == null) continue; + + let version = match[1]; + + // for the emoji page, the versions is not valid semver. + // so we will add the last 0 to the version. + // handle both 5.0 and 12.0 -> 5.0.0 and 12.0.0 + if (version.length === 3 || version.length === 4) { + version += ".0"; + } + + if (!await isEmojiVersionValid(version)) { + continue; + } + + // check if the unicode_version already exists. + // if it does, we will update the emoji version. + const existing = versions.find((v) => v.unicode_version === version); + + let unicode_version = null; + + // the emoji version 13.1 is using the unicode + // 13.0, since it was never released. + if (match[1] === "13.1") { + unicode_version = "13.0.0"; + } + + if (match[1] === "5.0") { + unicode_version = "10.0.0"; + } + + if (match[1] === "4.0" || match[1] === "3.0") { + unicode_version = "9.0.0"; + } + + if (match[1] === "2.0" || match[1] === "1.0") { + unicode_version = "8.0.0"; + } + + if (existing) { + existing.unicode_version = unicode_version || existing.unicode_version; + existing.emoji_version = match[1]; + continue; + } + + versions.push({ + emoji_version: match[1], + unicode_version, + draft: version === draft.unicode_version || version === draft.emoji_version, + }); + } + + return versions.sort((a, b) => semver.compare(`${b.emoji_version}.0`, `${a.emoji_version}.0`)); +} diff --git a/test/__setup.ts b/test/__setup.ts new file mode 100644 index 0000000..96e204d --- /dev/null +++ b/test/__setup.ts @@ -0,0 +1,6 @@ +import { vi } from "vitest"; +import createFetchMock from "vitest-fetch-mock"; + +const fetchMocker = createFetchMock(vi); + +fetchMocker.enableMocks(); diff --git a/src/versions.test.ts b/test/versions.test.ts similarity index 71% rename from src/versions.test.ts rename to test/versions.test.ts index 79d9421..9abbb20 100644 --- a/src/versions.test.ts +++ b/test/versions.test.ts @@ -1,22 +1,12 @@ import fs from "fs-extra"; import { beforeEach, describe, expect, it, vi } from "vitest"; -import { extractEmojiVersion, extractVersionFromReadme, getCurrentDraftVersion } from "./versions"; - -describe("getCurrentDraftVersion", () => { - beforeEach(() => { - vi.restoreAllMocks(); - }); +import { extractEmojiVersion, extractVersionFromReadme, getAllEmojiVersions, getCurrentDraftVersion } from "../src/versions"; +describe("get draft version", () => { it("returns draft versions when fetches succeed and versions match", async () => { - globalThis.fetch = vi.fn() - .mockImplementationOnce(() => Promise.resolve({ - ok: true, - text: () => Promise.resolve("Version 15.1.0 of the Unicode Standard"), - })) - .mockImplementationOnce(() => Promise.resolve({ - ok: true, - text: () => Promise.resolve("Unicode Emoji, Version 15.1"), - })); + fetchMock + .mockResponseOnceIf("https://unicode.org/Public/draft/ReadMe.txt", "Version 15.1.0 of the Unicode Standard") + .mockResponseOnceIf("https://unicode.org/Public/draft/emoji/ReadMe.txt", "Unicode Emoji, Version 15.1"); const result = await getCurrentDraftVersion(); expect(result).toEqual({ @@ -26,38 +16,22 @@ describe("getCurrentDraftVersion", () => { }); it("throws when fetch fails", async () => { - globalThis.fetch = vi.fn().mockImplementation(() => Promise.resolve({ - ok: false, - statusText: "Not Found", - })); + fetchMock.mockResponse("Not Found", { status: 404 }); await expect(getCurrentDraftVersion()).rejects.toThrow("failed to fetch"); }); it("throws when versions do not match", async () => { - globalThis.fetch = vi.fn() - .mockImplementationOnce(() => Promise.resolve({ - ok: true, - text: () => Promise.resolve("Version 15.1.0 of the Unicode Standard"), - })) - .mockImplementationOnce(() => Promise.resolve({ - ok: true, - text: () => Promise.resolve("Unicode Emoji, Version 15.0"), - })); + fetchMock + .mockResponseOnceIf("https://unicode.org/Public/draft/ReadMe.txt", "Version 15.1.0 of the Unicode Standard") + .mockResponseOnceIf("https://unicode.org/Public/draft/emoji/ReadMe.txt", "Unicode Emoji, Version 15.0"); await expect(getCurrentDraftVersion()).rejects.toThrow("draft versions do not match"); }); it("throws when version extraction fails", async () => { - globalThis.fetch = vi.fn() - .mockImplementationOnce(() => Promise.resolve({ - ok: true, - text: () => Promise.resolve("Invalid version format"), - })) - .mockImplementationOnce(() => Promise.resolve({ - ok: true, - text: () => Promise.resolve("Invalid version format"), - })); + fetchMock + .mockResponse("Invalid version format", { status: 200 }); await expect(getCurrentDraftVersion()).rejects.toThrow("failed to extract draft version"); }); @@ -122,3 +96,17 @@ describe("extract version", () => { }); }); }); + +// describe("get all emoji versions", () => { +// it("should return all emoji versions", async () => { +// fetchMock +// .mockResponseOnceIf("https://unicode.org/Public/", "Version 15.1.0 of the Unicode Standard") +// .mockResponseOnceIf("https://unicode.org/Public/emoji/", "Unicode Emoji, Version 15.1"); + +// const result = await getCurrentDraftVersion(); +// expect(result).toEqual({ +// emoji_version: "15.1", +// unicode_version: "15.1.0", +// }); +// }); +// }); diff --git a/vitest.config.ts b/vitest.config.ts new file mode 100644 index 0000000..f2e3338 --- /dev/null +++ b/vitest.config.ts @@ -0,0 +1,9 @@ +import { defineConfig } from "vitest/config"; + +export default defineConfig({ + test: { + setupFiles: [ + "./test/__setup.ts", + ], + }, +}); From 9fcf29e56bd74b16210e94e96c93167eb4a3858a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20N=C3=B8rg=C3=A5rd?= Date: Thu, 20 Feb 2025 20:35:58 +0100 Subject: [PATCH 22/22] chore: lint --- src/utils.ts | 3 --- test/versions.test.ts | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/utils.ts b/src/utils.ts index 4521bea..7b02ad1 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -1,8 +1,5 @@ -import type { EmojiVersion } from "./lockfile"; -import consola from "consola"; import semver from "semver"; import { NO_EMOJI_VERSIONS } from "./constants"; -import { getCurrentDraftVersion } from "./versions"; /** * Converts a string to a URL-friendly slug. diff --git a/test/versions.test.ts b/test/versions.test.ts index 9abbb20..49f1589 100644 --- a/test/versions.test.ts +++ b/test/versions.test.ts @@ -1,6 +1,6 @@ import fs from "fs-extra"; -import { beforeEach, describe, expect, it, vi } from "vitest"; -import { extractEmojiVersion, extractVersionFromReadme, getAllEmojiVersions, getCurrentDraftVersion } from "../src/versions"; +import { describe, expect, it } from "vitest"; +import { extractEmojiVersion, extractVersionFromReadme, getCurrentDraftVersion } from "../src/versions"; describe("get draft version", () => { it("returns draft versions when fetches succeed and versions match", async () => {