diff --git a/package.json b/package.json index 06df39d..a657baf 100644 --- a/package.json +++ b/package.json @@ -34,6 +34,7 @@ }, "dependencies": { "cac": "^6.7.14", + "consola": "^3.4.0", "farver": "^0.4.0", "fs-extra": "^11.3.0", "semver": "^7.7.1", @@ -53,6 +54,7 @@ "tsx": "^4.19.2", "typescript": "^5.7.3", "vitest": "^3.0.5", + "vitest-fetch-mock": "^0.4.3", "vitest-testdirs": "^2.1.1" }, "pnpm": { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 96f05c7..66b8afb 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -11,6 +11,9 @@ importers: cac: specifier: ^6.7.14 version: 6.7.14 + consola: + specifier: ^3.4.0 + version: 3.4.0 farver: specifier: ^0.4.0 version: 0.4.0 @@ -63,6 +66,9 @@ importers: vitest: specifier: ^3.0.5 version: 3.0.5(@types/debug@4.1.12)(@types/node@22.13.1)(msw@2.7.0(@types/node@22.13.1)(typescript@5.7.3))(tsx@4.19.2)(yaml@2.7.0) + vitest-fetch-mock: + specifier: ^0.4.3 + version: 0.4.3(vitest@3.0.5(@types/debug@4.1.12)(@types/node@22.13.1)(msw@2.7.0(@types/node@22.13.1)(typescript@5.7.3))(tsx@4.19.2)(yaml@2.7.0)) vitest-testdirs: specifier: ^2.1.1 version: 2.1.1(vitest@3.0.5(@types/debug@4.1.12)(@types/node@22.13.1)(msw@2.7.0(@types/node@22.13.1)(typescript@5.7.3))(tsx@4.19.2)(yaml@2.7.0)) @@ -2338,6 +2344,12 @@ packages: yaml: optional: true + vitest-fetch-mock@0.4.3: + resolution: {integrity: sha512-PhuEh+9HCsXFMRPUJilDL7yVDFufoxqk7ze+CNks64UGlfFXaJTn1bLABiNlEc0u25RERXQGj0Tm+M9i6UY9HQ==} + engines: {node: '>=18.0.0'} + peerDependencies: + vitest: '>=2.0.0' + vitest-testdirs@2.1.1: resolution: {integrity: sha512-1bVjra7vT07fFasVYmpWYBYvvGBogrvjBOBk3UVhCD4ESwhU+CcLBEnpo21jBDgM1KWvWQsg3HZgerMnnBDcsQ==} engines: {node: '>=20'} @@ -4806,6 +4818,10 @@ snapshots: tsx: 4.19.2 yaml: 2.7.0 + vitest-fetch-mock@0.4.3(vitest@3.0.5(@types/debug@4.1.12)(@types/node@22.13.1)(msw@2.7.0(@types/node@22.13.1)(typescript@5.7.3))(tsx@4.19.2)(yaml@2.7.0)): + dependencies: + vitest: 3.0.5(@types/debug@4.1.12)(@types/node@22.13.1)(msw@2.7.0(@types/node@22.13.1)(typescript@5.7.3))(tsx@4.19.2)(yaml@2.7.0) + vitest-testdirs@2.1.1(vitest@3.0.5(@types/debug@4.1.12)(@types/node@22.13.1)(msw@2.7.0(@types/node@22.13.1)(typescript@5.7.3))(tsx@4.19.2)(yaml@2.7.0)): dependencies: testdirs: 0.1.4 diff --git a/src/adapter/base.ts b/src/adapter/base.ts index fd0e24b..10e3350 100644 --- a/src/adapter/base.ts +++ b/src/adapter/base.ts @@ -1,11 +1,13 @@ -import type { EmojiGroup } from "../types"; -import { defineMojiAdapter } from "../adapter"; -import { slugify } from "../utils"; +import type { Emoji, EmojiGroup, EmojiMetadata, EmojiShortcode, ShortcodeProvider } from "../types"; +import consola from "consola"; +import { red, yellow } from "farver/fast"; +import { defineMojiAdapter, MojisNotImplemented } from "../adapter"; +import { extractEmojiVersion, extractUnicodeVersion, slugify } from "../utils"; import { fetchCache } from "../utils/cache"; function notImplemented(adapterFn: string) { return async () => { - throw new Error(`the adapter function ${adapterFn} is not implemented`); + throw new MojisNotImplemented(`the adapter function ${red(adapterFn)} is not implemented`); }; } @@ -13,20 +15,26 @@ export default defineMojiAdapter({ name: "base", description: "base adapter", range: "*", - groups: async ({ version, force }) => { - if (version === "1.0" || version === "2.0" || version === "3.0") { - console.warn(`version ${version} does not have group data`); - return []; + metadata: async (ctx) => { + if (ctx.emojiVersion === "1.0" || ctx.emojiVersion === "2.0" || ctx.emojiVersion === "3.0") { + consola.warn(`skipping metadata for emoji version ${yellow(ctx.emojiVersion)}, as it's not supported.`); + return { + groups: [], + emojiMetadata: {}, + }; } - const groups = await fetchCache(`https://unicode.org/Public/emoji/${version}/emoji-test.txt`, { - cacheKey: `v${version}/metadata.json`, + return fetchCache(`https://unicode.org/Public/emoji/${ctx.emojiVersion}/emoji-test.txt`, { + cacheKey: `v${ctx.emojiVersion}/metadata.json`, parser(data) { const lines = data.split("\n"); let currentGroup: EmojiGroup | undefined; const groups: EmojiGroup[] = []; + // [group-subgroup][hexcode] = metadata + const emojiMetadata: Record> = {}; + for (const line of lines) { if (line.trim() === "") { continue; @@ -44,6 +52,8 @@ export default defineMojiAdapter({ currentGroup = group; groups.push(group); + + continue; } else if (line.startsWith("# subgroup:")) { const subgroupName = line.slice(11).trim(); @@ -51,16 +61,124 @@ export default defineMojiAdapter({ throw new Error(`subgroup ${subgroupName} without group`); } - currentGroup.subgroups.push(subgroupName); + currentGroup.subgroups.push(slugify(subgroupName)); + + continue; + } else if (line.startsWith("#")) { + continue; } + + const [baseHexcode, trailingLine] = line.split(";"); + + if (baseHexcode == null || trailingLine == null) { + throw new Error(`invalid line: ${line}`); + } + + const [baseQualifier, comment] = trailingLine.split("#"); + + if (baseQualifier == null || comment == null) { + throw new Error(`invalid line: ${line}`); + } + + const hexcode = baseHexcode.trim().replace(/\s+/g, "-"); + const qualifier = baseQualifier.trim(); + + const emojiVersion = extractEmojiVersion(comment.trim()); + const [emoji, trimmedComment] = comment.trim().split(` E${emojiVersion} `); + + const groupName = currentGroup?.slug ?? "unknown"; + const subgroupName = currentGroup?.subgroups[currentGroup.subgroups.length - 1] ?? "unknown"; + + const metadataGroup = `${groupName}-${subgroupName}`; + + if (emojiMetadata[metadataGroup] == null) { + emojiMetadata[metadataGroup] = {}; + } + + emojiMetadata[metadataGroup][hexcode] = { + group: groupName, + subgroup: subgroupName, + qualifier, + emojiVersion: emojiVersion || null, + unicodeVersion: extractUnicodeVersion(emojiVersion, ctx.unicodeVersion), + description: trimmedComment || "", + emoji: emoji || null, + hexcodes: hexcode.split("-"), + }; } - return groups; + return { + groups, + emojiMetadata, + }; }, - bypassCache: force, + bypassCache: ctx.force, }); - - return groups; }, sequences: notImplemented("sequences"), + emojis: notImplemented("emojis"), + variations: notImplemented("variations"), + unicodeNames: async (ctx) => { + return fetchCache(`https://unicode.org/Public/${ctx.emojiVersion === "13.1" ? "13.0" : ctx.emojiVersion}.0/ucd/UnicodeData.txt`, { + cacheKey: `v${ctx.emojiVersion}/unicode-names.json`, + parser(data) { + const lines = data.split("\n"); + const unicodeNames: Record = {}; + + for (const line of lines) { + if (line.trim() === "" || line.startsWith("#")) { + continue; + } + + const [hex, name] = line.split(";").map((col) => col.trim()); + + if (hex == null || name == null) { + throw new Error(`invalid line: ${line}`); + } + + unicodeNames[hex] = name; + } + + return unicodeNames; + }, + bypassCache: ctx.force, + }); + }, + async shortcodes(ctx) { + const providers = ctx.providers; + + if (providers.length === 0) { + throw new Error("no shortcode providers specified"); + } + + const shortcodes: Partial> = {}; + + if (this.emojis == null) { + throw new MojisNotImplemented("emojis"); + } + + const { emojis } = await this.emojis(ctx); + + const flattenedEmojis = Object.values(emojis).reduce((acc, subgroup) => { + for (const hexcodes of Object.values(subgroup)) { + for (const [hexcode, emoji] of Object.entries(hexcodes)) { + acc[hexcode] = emoji; + } + } + + return acc; + }, {} as Record); + + if (providers.includes("github")) { + const githubShortcodesFn = await import("../shortcode/github").then((m) => m.generateGitHubShortcodes); + + shortcodes.github = await githubShortcodesFn({ + emojis: flattenedEmojis, + force: ctx.force, + version: ctx.emojiVersion, + }); + } + + return shortcodes; + }, }); diff --git a/src/adapter/index.ts b/src/adapter/index.ts index c4178e0..ec34801 100644 --- a/src/adapter/index.ts +++ b/src/adapter/index.ts @@ -1,4 +1,4 @@ -import type { EmojiGroup, EmojiSequence, EmojiVariation } from "../types"; +import type { Emoji, EmojiData, EmojiGroup, EmojiMetadata, EmojiSequence, EmojiShortcode, EmojiVariation, ShortcodeProvider } from "../types"; import semver from "semver"; export interface MojiAdapter { @@ -22,11 +22,6 @@ export interface MojiAdapter { */ extend?: string; - /** - * A function to generate the emoji groups for the specified version. - */ - groups?: GroupFn; - /** * A function to generate the emoji sequences for the specified version */ @@ -41,17 +36,35 @@ export interface MojiAdapter { * A function to generate emoji variations for the specified version. */ variations?: EmojiVariationFn; + + shortcodes?: ShortcodeFn; + + metadata?: MetadataFn; + + unicodeNames?: UnicodeNamesFn; } export interface BaseAdapterContext { - version: string; + emojiVersion: string; + unicodeVersion: string; force: boolean; } -export type GroupFn = (ctx: BaseAdapterContext) => Promise; +export type UnicodeNamesFn = (ctx: BaseAdapterContext) => Promise>; export type SequenceFn = (ctx: BaseAdapterContext) => Promise<{ zwj: EmojiSequence[]; sequences: EmojiSequence[] }>; -export type EmojiFn = (ctx: BaseAdapterContext) => Promise; +export type EmojiFn = (ctx: BaseAdapterContext) => Promise<{ + emojiData: Record; + // group: subgroup: hexcode: emoji + emojis: Record>>; +}>; export type EmojiVariationFn = (ctx: BaseAdapterContext) => Promise; +export type ShortcodeFn = (ctx: BaseAdapterContext & { + providers: ShortcodeProvider[]; +}) => Promise>>; +export type MetadataFn = (ctx: BaseAdapterContext) => Promise<{ + groups: EmojiGroup[]; + emojiMetadata: Record>; +}>; export const ADAPTERS = new Map(); @@ -82,3 +95,10 @@ export function defineMojiAdapter(adapter: MojiAdapter): MojiAdapter { return adapter; } + +export class MojisNotImplemented extends Error { + constructor(message: string) { + super(message); + this.name = "MojisNotImplemented"; + } +} diff --git a/src/adapter/v13.ts b/src/adapter/v13.ts index 1715acc..dd0b8a0 100644 --- a/src/adapter/v13.ts +++ b/src/adapter/v13.ts @@ -1,8 +1,193 @@ -import { defineMojiAdapter } from "."; +import type { Emoji, EmojiData, EmojiSequence, EmojiVariation, Property } from "../types"; +import { defineMojiAdapter } from "../adapter"; +import { FEMALE_SIGN, MALE_SIGN } from "../constants"; +import { extractEmojiVersion, extractUnicodeVersion } from "../utils"; +import { fetchCache } from "../utils/cache"; +import { expandHexRange } from "../utils/hexcode"; export default defineMojiAdapter({ name: "v13", description: "adapter for version 13 & 13.1", range: ">=13.0.0 <14.0.0", extend: "base", + sequences: async (ctx) => { + const [sequences, zwj] = await Promise.all([ + { + cacheKey: `v${ctx.emojiVersion}/sequences.json`, + url: `https://unicode.org/Public/emoji/${ctx.emojiVersion}/emoji-sequences.txt`, + }, + { + cacheKey: `v${ctx.emojiVersion}/zwj-sequences.json`, + url: `https://unicode.org/Public/emoji/${ctx.emojiVersion}/emoji-zwj-sequences.txt`, + }, + ].map(async ({ cacheKey, url }) => { + return await fetchCache(url, { + cacheKey, + parser(data) { + const lines = data.split("\n"); + + const sequences: EmojiSequence[] = []; + + for (let line of lines) { + // skip empty line & comments + if (line.trim() === "" || line.startsWith("#")) { + continue; + } + + // remove line comment + const commentIndex = line.indexOf("#"); + if (commentIndex !== -1) { + line = line.slice(0, commentIndex).trim(); + } + + const [hex, property, description] = line.split(";").map((col) => col.trim()).slice(0, 4); + + if (hex == null || property == null || description == null) { + throw new Error(`invalid line: ${line}`); + } + + const expandedHex = expandHexRange(hex); + + for (const hex of expandedHex) { + sequences.push({ + hex: hex.replace(/\s+/g, "-"), + property, + description, + gender: hex.includes(FEMALE_SIGN) ? "female" : hex.includes(MALE_SIGN) ? "male" : null, + }); + } + } + + return sequences; + }, + bypassCache: ctx.force, + }); + })); + + return { + sequences: sequences || [], + zwj: zwj || [], + }; + }, + async emojis(ctx) { + const unicodeNames = await this.unicodeNames!(ctx); + // const { sequences, zwj } = await this.sequences!(ctx); + // const metadata = await this.metadata!(ctx); + // const variations = await this.variations!(ctx); + + const emojis: Record>> = {}; + + const emojiData = await fetchCache(`https://unicode.org/Public/${ctx.emojiVersion === "13.1" ? "13.0" : ctx.emojiVersion}.0/ucd/emoji/emoji-data.txt`, { + cacheKey: `v${ctx.emojiVersion}/emoji-data.json`, + parser(data) { + const lines = data.split("\n"); + + const emojiData: Record = {}; + + for (const line of lines) { + // skip empty line & comments + if (line.trim() === "" || line.startsWith("#")) { + continue; + } + + const lineCommentIndex = line.indexOf("#"); + const lineComment = lineCommentIndex !== -1 ? line.slice(lineCommentIndex + 1).trim() : ""; + + let [hex, property] = line.split(";").map((col) => col.trim()).slice(0, 4); + + if (hex == null || property == null) { + throw new Error(`invalid line: ${line}`); + } + + // remove line comment from property + const propertyCommentIndex = property.indexOf("#"); + if (propertyCommentIndex !== -1) { + property = property.slice(0, propertyCommentIndex).trim(); + } + + if (property === "Extended_Pictographic") { + continue; + } + + const expandedHex = expandHexRange(hex); + const emojiVersion = extractEmojiVersion(lineComment); + + const emoji: EmojiData = { + description: lineComment, + hexcode: "", + gender: null, + properties: [(property as Property) || "Emoji"], + unicodeVersion: extractUnicodeVersion(emojiVersion, ctx.unicodeVersion), + emojiVersion, + name: unicodeNames[hex] || "", + }; + + for (const hex of expandedHex) { + if (emojiData[hex] != null) { + emojiData[hex].properties = [...new Set([...emojiData[hex].properties, ...emoji.properties])]; + } else { + emojiData[hex] = { + ...emoji, + hexcode: hex.replace(/\s+/g, "-"), + }; + } + } + } + + return emojiData; + }, + bypassCache: ctx.force, + }); + + return { + emojiData, + emojis, + }; + }, + variations: async (ctx) => { + return fetchCache(`https://unicode.org/Public/${ctx.emojiVersion === "13.1" ? "13.0" : ctx.emojiVersion}.0/ucd/emoji/emoji-variation-sequences.txt`, { + cacheKey: `v${ctx.emojiVersion}/variations.json`, + parser(data) { + const lines = data.split("\n"); + + const variations: EmojiVariation[] = []; + + for (let line of lines) { + // skip empty line & comments + if (line.trim() === "" || line.startsWith("#")) { + continue; + } + + // remove line comment + const commentIndex = line.indexOf("#"); + if (commentIndex !== -1) { + line = line.slice(0, commentIndex).trim(); + } + + const [hex, style] = line.split(";").map((col) => col.trim()).slice(0, 4); + + if (hex == null || style == null) { + throw new Error(`invalid line: ${line}`); + } + + const hexcode = hex.replace(/\s+/g, "-"); + + const type = style.replace("style", "").trim(); + + if (type !== "text" && type !== "emoji") { + throw new Error(`invalid style: ${style}`); + } + + variations.push({ + emoji: type === "emoji" ? hexcode : null, + text: type === "text" ? hexcode : null, + property: ["Emoji"], + }); + } + + return variations; + }, + bypassCache: ctx.force, + }); + }, }); diff --git a/src/adapter/v14.ts b/src/adapter/v14.ts index b91a58b..6ff55fd 100644 --- a/src/adapter/v14.ts +++ b/src/adapter/v14.ts @@ -1,8 +1,193 @@ -import { defineMojiAdapter } from "."; +import type { Emoji, EmojiData, EmojiSequence, EmojiVariation, Property } from "../types"; +import { defineMojiAdapter } from "../adapter"; +import { FEMALE_SIGN, MALE_SIGN } from "../constants"; +import { extractEmojiVersion, extractUnicodeVersion } from "../utils"; +import { fetchCache } from "../utils/cache"; +import { expandHexRange } from "../utils/hexcode"; export default defineMojiAdapter({ name: "v14", description: "adapter for version 14", range: ">=14.0.0 <15.0.0", extend: "base", + sequences: async (ctx) => { + const [sequences, zwj] = await Promise.all([ + { + cacheKey: `v${ctx.emojiVersion}/sequences.json`, + url: `https://unicode.org/Public/emoji/${ctx.emojiVersion}/emoji-sequences.txt`, + }, + { + cacheKey: `v${ctx.emojiVersion}/zwj-sequences.json`, + url: `https://unicode.org/Public/emoji/${ctx.emojiVersion}/emoji-zwj-sequences.txt`, + }, + ].map(async ({ cacheKey, url }) => { + return await fetchCache(url, { + cacheKey, + parser(data) { + const lines = data.split("\n"); + + const sequences: EmojiSequence[] = []; + + for (let line of lines) { + // skip empty line & comments + if (line.trim() === "" || line.startsWith("#")) { + continue; + } + + // remove line comment + const commentIndex = line.indexOf("#"); + if (commentIndex !== -1) { + line = line.slice(0, commentIndex).trim(); + } + + const [hex, property, description] = line.split(";").map((col) => col.trim()).slice(0, 4); + + if (hex == null || property == null || description == null) { + throw new Error(`invalid line: ${line}`); + } + + const expandedHex = expandHexRange(hex); + + for (const hex of expandedHex) { + sequences.push({ + hex: hex.replace(/\s+/g, "-"), + property, + description, + gender: hex.includes(FEMALE_SIGN) ? "female" : hex.includes(MALE_SIGN) ? "male" : null, + }); + } + } + + return sequences; + }, + bypassCache: ctx.force, + }); + })); + + return { + sequences: sequences || [], + zwj: zwj || [], + }; + }, + async emojis(ctx) { + const unicodeNames = await this.unicodeNames!(ctx); + // const { sequences, zwj } = await this.sequences!(ctx); + // const metadata = await this.metadata!(ctx); + // const variations = await this.variations!(ctx); + + const emojis: Record>> = {}; + + const emojiData = await fetchCache(`https://unicode.org/Public/${ctx.emojiVersion}.0/ucd/emoji/emoji-data.txt`, { + cacheKey: `v${ctx.emojiVersion}/emoji-data.json`, + parser(data) { + const lines = data.split("\n"); + + const emojiData: Record = {}; + + for (const line of lines) { + // skip empty line & comments + if (line.trim() === "" || line.startsWith("#")) { + continue; + } + + const lineCommentIndex = line.indexOf("#"); + const lineComment = lineCommentIndex !== -1 ? line.slice(lineCommentIndex + 1).trim() : ""; + + let [hex, property] = line.split(";").map((col) => col.trim()).slice(0, 4); + + if (hex == null || property == null) { + throw new Error(`invalid line: ${line}`); + } + + // remove line comment from property + const propertyCommentIndex = property.indexOf("#"); + if (propertyCommentIndex !== -1) { + property = property.slice(0, propertyCommentIndex).trim(); + } + + if (property === "Extended_Pictographic") { + continue; + } + + const expandedHex = expandHexRange(hex); + const emojiVersion = extractEmojiVersion(lineComment); + + const emoji: EmojiData = { + description: lineComment, + hexcode: "", + gender: null, + properties: [(property as Property) || "Emoji"], + unicodeVersion: extractUnicodeVersion(emojiVersion, ctx.unicodeVersion), + emojiVersion, + name: unicodeNames[hex] || "", + }; + + for (const hex of expandedHex) { + if (emojiData[hex] != null) { + emojiData[hex].properties = [...new Set([...emojiData[hex].properties, ...emoji.properties])]; + } else { + emojiData[hex] = { + ...emoji, + hexcode: hex.replace(/\s+/g, "-"), + }; + } + } + } + + return emojiData; + }, + bypassCache: ctx.force, + }); + + return { + emojiData, + emojis, + }; + }, + variations: async (ctx) => { + return fetchCache(`https://unicode.org/Public/${ctx.emojiVersion}.0/ucd/emoji/emoji-variation-sequences.txt`, { + cacheKey: `v${ctx.emojiVersion}/variations.json`, + parser(data) { + const lines = data.split("\n"); + + const variations: EmojiVariation[] = []; + + for (let line of lines) { + // skip empty line & comments + if (line.trim() === "" || line.startsWith("#")) { + continue; + } + + // remove line comment + const commentIndex = line.indexOf("#"); + if (commentIndex !== -1) { + line = line.slice(0, commentIndex).trim(); + } + + const [hex, style] = line.split(";").map((col) => col.trim()).slice(0, 4); + + if (hex == null || style == null) { + throw new Error(`invalid line: ${line}`); + } + + const hexcode = hex.replace(/\s+/g, "-"); + + const type = style.replace("style", "").trim(); + + if (type !== "text" && type !== "emoji") { + throw new Error(`invalid style: ${style}`); + } + + variations.push({ + emoji: type === "emoji" ? hexcode : null, + text: type === "text" ? hexcode : null, + property: ["Emoji"], + }); + } + + return variations; + }, + bypassCache: ctx.force, + }); + }, }); diff --git a/src/adapter/v15.ts b/src/adapter/v15.ts index 9055ef0..3a8f879 100644 --- a/src/adapter/v15.ts +++ b/src/adapter/v15.ts @@ -1,8 +1,193 @@ +import type { Emoji, EmojiData, EmojiSequence, EmojiVariation, Property } from "../types"; import { defineMojiAdapter } from "../adapter"; +import { FEMALE_SIGN, MALE_SIGN } from "../constants"; +import { extractEmojiVersion, extractUnicodeVersion } from "../utils"; +import { fetchCache } from "../utils/cache"; +import { expandHexRange } from "../utils/hexcode"; export default defineMojiAdapter({ name: "v15", description: "adapter for version 15 & v15.1", range: ">=15.0.0 <16.0.0", extend: "base", + sequences: async (ctx) => { + const [sequences, zwj] = await Promise.all([ + { + cacheKey: `v${ctx.emojiVersion}/sequences.json`, + url: `https://unicode.org/Public/emoji/${ctx.emojiVersion}/emoji-sequences.txt`, + }, + { + cacheKey: `v${ctx.emojiVersion}/zwj-sequences.json`, + url: `https://unicode.org/Public/emoji/${ctx.emojiVersion}/emoji-zwj-sequences.txt`, + }, + ].map(async ({ cacheKey, url }) => { + return await fetchCache(url, { + cacheKey, + parser(data) { + const lines = data.split("\n"); + + const sequences: EmojiSequence[] = []; + + for (let line of lines) { + // skip empty line & comments + if (line.trim() === "" || line.startsWith("#")) { + continue; + } + + // remove line comment + const commentIndex = line.indexOf("#"); + if (commentIndex !== -1) { + line = line.slice(0, commentIndex).trim(); + } + + const [hex, property, description] = line.split(";").map((col) => col.trim()).slice(0, 4); + + if (hex == null || property == null || description == null) { + throw new Error(`invalid line: ${line}`); + } + + const expandedHex = expandHexRange(hex); + + for (const hex of expandedHex) { + sequences.push({ + hex: hex.replace(/\s+/g, "-"), + property, + description, + gender: hex.includes(FEMALE_SIGN) ? "female" : hex.includes(MALE_SIGN) ? "male" : null, + }); + } + } + + return sequences; + }, + bypassCache: ctx.force, + }); + })); + + return { + sequences: sequences || [], + zwj: zwj || [], + }; + }, + async emojis(ctx) { + const unicodeNames = await this.unicodeNames!(ctx); + // const { sequences, zwj } = await this.sequences!(ctx); + // const metadata = await this.metadata!(ctx); + // const variations = await this.variations!(ctx); + + const emojis: Record>> = {}; + + const emojiData = await fetchCache(`https://unicode.org/Public/${ctx.emojiVersion}.0/ucd/emoji/emoji-data.txt`, { + cacheKey: `v${ctx.emojiVersion}/emoji-data.json`, + parser(data) { + const lines = data.split("\n"); + + const emojiData: Record = {}; + + for (const line of lines) { + // skip empty line & comments + if (line.trim() === "" || line.startsWith("#")) { + continue; + } + + const lineCommentIndex = line.indexOf("#"); + const lineComment = lineCommentIndex !== -1 ? line.slice(lineCommentIndex + 1).trim() : ""; + + let [hex, property] = line.split(";").map((col) => col.trim()).slice(0, 4); + + if (hex == null || property == null) { + throw new Error(`invalid line: ${line}`); + } + + // remove line comment from property + const propertyCommentIndex = property.indexOf("#"); + if (propertyCommentIndex !== -1) { + property = property.slice(0, propertyCommentIndex).trim(); + } + + if (property === "Extended_Pictographic") { + continue; + } + + const expandedHex = expandHexRange(hex); + const emojiVersion = extractEmojiVersion(lineComment); + + const emoji: EmojiData = { + description: lineComment, + hexcode: "", + gender: null, + properties: [(property as Property) || "Emoji"], + unicodeVersion: extractUnicodeVersion(emojiVersion, ctx.unicodeVersion), + emojiVersion, + name: unicodeNames[hex] || "", + }; + + for (const hex of expandedHex) { + if (emojiData[hex] != null) { + emojiData[hex].properties = [...new Set([...emojiData[hex].properties, ...emoji.properties])]; + } else { + emojiData[hex] = { + ...emoji, + hexcode: hex.replace(/\s+/g, "-"), + }; + } + } + } + + return emojiData; + }, + bypassCache: ctx.force, + }); + + return { + emojiData, + emojis, + }; + }, + variations: async (ctx) => { + return fetchCache(`https://unicode.org/Public/${ctx.emojiVersion}.0/ucd/emoji/emoji-variation-sequences.txt`, { + cacheKey: `v${ctx.emojiVersion}/variations.json`, + parser(data) { + const lines = data.split("\n"); + + const variations: EmojiVariation[] = []; + + for (let line of lines) { + // skip empty line & comments + if (line.trim() === "" || line.startsWith("#")) { + continue; + } + + // remove line comment + const commentIndex = line.indexOf("#"); + if (commentIndex !== -1) { + line = line.slice(0, commentIndex).trim(); + } + + const [hex, style] = line.split(";").map((col) => col.trim()).slice(0, 4); + + if (hex == null || style == null) { + throw new Error(`invalid line: ${line}`); + } + + const hexcode = hex.replace(/\s+/g, "-"); + + const type = style.replace("style", "").trim(); + + if (type !== "text" && type !== "emoji") { + throw new Error(`invalid style: ${style}`); + } + + variations.push({ + emoji: type === "emoji" ? hexcode : null, + text: type === "text" ? hexcode : null, + property: ["Emoji"], + }); + } + + return variations; + }, + bypassCache: ctx.force, + }); + }, }); diff --git a/src/adapter/v16.ts b/src/adapter/v16.ts index 89b2430..5c67bb4 100644 --- a/src/adapter/v16.ts +++ b/src/adapter/v16.ts @@ -1,6 +1,7 @@ -import type { EmojiSequence, EmojiVariation } from "../types"; +import type { Emoji, EmojiData, EmojiSequence, EmojiVariation, Property } from "../types"; import { defineMojiAdapter } from "../adapter"; import { FEMALE_SIGN, MALE_SIGN } from "../constants"; +import { extractEmojiVersion, extractUnicodeVersion } from "../utils"; import { fetchCache } from "../utils/cache"; import { expandHexRange } from "../utils/hexcode"; @@ -12,12 +13,12 @@ export default defineMojiAdapter({ sequences: async (ctx) => { const [sequences, zwj] = await Promise.all([ { - cacheKey: `v${ctx.version}/sequences.json`, - url: `https://unicode.org/Public/emoji/${ctx.version}/emoji-sequences.txt`, + cacheKey: `v${ctx.emojiVersion}/sequences.json`, + url: `https://unicode.org/Public/emoji/${ctx.emojiVersion}/emoji-sequences.txt`, }, { - cacheKey: `v${ctx.version}/zwj-sequences.json`, - url: `https://unicode.org/Public/emoji/${ctx.version}/emoji-zwj-sequences.txt`, + cacheKey: `v${ctx.emojiVersion}/zwj-sequences.json`, + url: `https://unicode.org/Public/emoji/${ctx.emojiVersion}/emoji-zwj-sequences.txt`, }, ].map(async ({ cacheKey, url }) => { return await fetchCache(url, { @@ -28,7 +29,7 @@ export default defineMojiAdapter({ const sequences: EmojiSequence[] = []; for (let line of lines) { - // skip empty line & comments + // skip empty line & comments if (line.trim() === "" || line.startsWith("#")) { continue; } @@ -68,11 +69,84 @@ export default defineMojiAdapter({ zwj: zwj || [], }; }, - async emojis({ version, force }) { + async emojis(ctx) { + const unicodeNames = await this.unicodeNames!(ctx); + // const { sequences, zwj } = await this.sequences!(ctx); + // const metadata = await this.metadata!(ctx); + // const variations = await this.variations!(ctx); + + const emojis: Record>> = {}; + + const emojiData = await fetchCache(`https://unicode.org/Public/${ctx.emojiVersion}.0/ucd/emoji/emoji-data.txt`, { + cacheKey: `v${ctx.emojiVersion}/emoji-data.json`, + parser(data) { + const lines = data.split("\n"); + + const emojiData: Record = {}; + + for (const line of lines) { + // skip empty line & comments + if (line.trim() === "" || line.startsWith("#")) { + continue; + } + + const lineCommentIndex = line.indexOf("#"); + const lineComment = lineCommentIndex !== -1 ? line.slice(lineCommentIndex + 1).trim() : ""; + + let [hex, property] = line.split(";").map((col) => col.trim()).slice(0, 4); + + if (hex == null || property == null) { + throw new Error(`invalid line: ${line}`); + } + + // remove line comment from property + const propertyCommentIndex = property.indexOf("#"); + if (propertyCommentIndex !== -1) { + property = property.slice(0, propertyCommentIndex).trim(); + } + + if (property === "Extended_Pictographic") { + continue; + } + + const expandedHex = expandHexRange(hex); + const emojiVersion = extractEmojiVersion(lineComment); + + const emoji: EmojiData = { + description: lineComment, + hexcode: "", + gender: null, + properties: [(property as Property) || "Emoji"], + unicodeVersion: extractUnicodeVersion(emojiVersion, ctx.unicodeVersion), + emojiVersion, + name: unicodeNames[hex] || "", + }; + + for (const hex of expandedHex) { + if (emojiData[hex] != null) { + emojiData[hex].properties = [...new Set([...emojiData[hex].properties, ...emoji.properties])]; + } else { + emojiData[hex] = { + ...emoji, + hexcode: hex.replace(/\s+/g, "-"), + }; + } + } + } + + return emojiData; + }, + bypassCache: ctx.force, + }); + + return { + emojiData, + emojis, + }; }, variations: async (ctx) => { - return fetchCache(`https://unicode.org/Public/${ctx.version}.0/ucd/emoji/emoji-variation-sequences.txt`, { - cacheKey: `v${ctx.version}/variations.json`, + return fetchCache(`https://unicode.org/Public/${ctx.emojiVersion}.0/ucd/emoji/emoji-variation-sequences.txt`, { + cacheKey: `v${ctx.emojiVersion}/variations.json`, parser(data) { const lines = data.split("\n"); diff --git a/src/cli.ts b/src/cli.ts index 8991dfe..a1bc1b6 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -1,13 +1,17 @@ import process from "node:process"; +import consola from "consola"; import { green, red, yellow } from "farver/fast"; import fs from "fs-extra"; import semver from "semver"; +import { type InferInput, parseAsync } from "valibot"; import yargs, { type Argv } from "yargs"; import pkg from "../package.json" with { type: "json" }; +import { MojisNotImplemented } from "./adapter"; import { resolveAdapter } from "./adapters"; import { SUPPORTED_EMOJI_VERSIONS } from "./constants"; -import { getAllEmojiVersions } from "./utils"; -import { readLockfile, writeLockfile } from "./utils/lockfile"; +import { readLockfile, writeLockfile } from "./lockfile"; +import { SHORTCODE_PROVIDERS_SCHEMA } from "./schemas"; +import { getAllEmojiVersions, getUnicodeVersionByEmojiVersion } from "./utils"; const cli = yargs(process.argv.slice(2)) .scriptName("mojis") @@ -20,25 +24,43 @@ const cli = yargs(process.argv.slice(2)) .demandCommand(1, ""); cli.command( - "generate:sequences ", - "Generate emoji sequences for the specified versions", + "generate ", + "generate emoji data for the specified versions", (args) => commonOptions(args) .positional("versions", { type: "string", description: "emoji versions to generate", }) + .option("generators", { + type: "array", + description: "generators to use", + default: ["metadata", "sequences", "variations", "emojis", "shortcodes"], + }) + .option("shortcode-providers", { + type: "array", + description: "shortcode providers to use", + default: ["github"] satisfies InferInput, + }) .strict().help(), async (args) => { const force = args.force ?? false; const versions = Array.isArray(args.versions) ? args.versions : [args.versions]; + const generators = Array.isArray(args.generators) ? args.generators : [args.generators]; + + function isGeneratorEnabled(generator: string) { + return generators.includes(generator); + } + + const unsupported = versions.filter((v) => !SUPPORTED_EMOJI_VERSIONS.includes(v)); - if (SUPPORTED_EMOJI_VERSIONS.every((v) => !versions.includes(v))) { - console.error(red("error:"), "unsupported emoji versions"); - console.log("supported versions:", SUPPORTED_EMOJI_VERSIONS.join(", ")); + // require that all versions are supported, otherwise exit + if (unsupported.length > 0) { + consola.error(`version(s) ${unsupported.map((v) => yellow(v)).join(", ")} is not supported`); process.exit(1); } - console.log("generating emoji group data for versions", versions.map((v) => yellow(v)).join(", ")); + consola.info("generating emoji data for versions", versions.map((v) => yellow(v)).join(", ")); + consola.info(`using the following generators ${args.generators.map((g) => yellow(g)).join(", ")}`); const promises = versions.map(async (version) => { const coerced = semver.coerce(version); @@ -53,86 +75,140 @@ cli.command( throw new Error(`no adapter found for version ${version}`); } - const { sequences, zwj } = await adapter.sequences!({ version, force }); - - await fs.ensureDir(`./data/v${version}`); - await fs.writeFile( - `./data/v${version}/zwj-sequences.json`, - JSON.stringify(zwj, null, 2), - "utf-8", - ); - return fs.writeFile( - `./data/v${version}/sequences.json`, - JSON.stringify(sequences, null, 2), - "utf-8", - ); - }); + if (isGeneratorEnabled("metadata")) { + if (adapter.metadata == null) { + throw new MojisNotImplemented("metadata"); + } - const results = await Promise.allSettled(promises); + const { groups, emojiMetadata } = await adapter.metadata({ emojiVersion: version, force, unicodeVersion: getUnicodeVersionByEmojiVersion(version)! }); - for (const result of results) { - if (result.status === "rejected") { - console.error(red("error:"), result.reason); + await fs.ensureDir(`./data/v${version}/metadata`); + + await fs.writeFile( + `./data/v${version}/groups.json`, + JSON.stringify(groups, null, 2), + "utf-8", + ); + + await Promise.all(Object.entries(emojiMetadata).map(([group, metadata]) => fs.writeFile( + `./data/v${version}/metadata/${group}.json`, + JSON.stringify(metadata, null, 2), + "utf-8", + ))); } - } - console.log(green("done")); - }, -); + if (isGeneratorEnabled("sequences")) { + if (adapter.sequences == null) { + throw new MojisNotImplemented("sequences"); + } -cli.command( - "generate:groups ", - "Generate emoji group data for the specified versions", - (args) => commonOptions(args) - .positional("versions", { - type: "string", - description: "emoji versions to generate", - }) - .strict().help(), - async (args) => { - const force = args.force ?? false; - const versions = Array.isArray(args.versions) ? args.versions : [args.versions]; + const { sequences, zwj } = await adapter.sequences({ emojiVersion: version, force, unicodeVersion: getUnicodeVersionByEmojiVersion(version)! }); - if (SUPPORTED_EMOJI_VERSIONS.every((v) => !versions.includes(v))) { - console.error(red("error:"), "unsupported emoji versions"); - console.log("supported versions:", SUPPORTED_EMOJI_VERSIONS.join(", ")); - process.exit(1); - } + await fs.ensureDir(`./data/v${version}`); - console.log("generating emoji group data for versions", versions.map((v) => yellow(v)).join(", ")); + await fs.writeFile( + `./data/v${version}/zwj-sequences.json`, + JSON.stringify(zwj, null, 2), + "utf-8", + ); - const promises = versions.map(async (version) => { - const coerced = semver.coerce(version); + await fs.writeFile( + `./data/v${version}/sequences.json`, + JSON.stringify(sequences, null, 2), + "utf-8", + ); + } - if (coerced == null) { - throw new Error(`invalid version ${version}`); + if (isGeneratorEnabled("variations")) { + if (adapter.variations == null) { + throw new MojisNotImplemented("variations"); + } + + const variations = await adapter.variations({ emojiVersion: version, force, unicodeVersion: getUnicodeVersionByEmojiVersion(version)! }); + + await fs.ensureDir(`./data/v${version}`); + await fs.writeFile( + `./data/v${version}/variations.json`, + JSON.stringify(variations, null, 2), + "utf-8", + ); } - const adapter = resolveAdapter(coerced.version); + if (isGeneratorEnabled("emojis")) { + if (adapter.emojis == null) { + throw new MojisNotImplemented("emojis"); + } - if (adapter == null) { - throw new Error(`no adapter found for version ${version}`); + const { emojiData, emojis } = await adapter.emojis({ emojiVersion: version, force, unicodeVersion: getUnicodeVersionByEmojiVersion(version)! }); + + await fs.ensureDir(`./data/v${version}`); + + await fs.writeFile( + `./data/v${version}/emoji-data.json`, + JSON.stringify(emojiData, null, 2), + "utf-8", + ); + + for (const [group, subgroup] of Object.entries(emojis)) { + await fs.ensureDir(`./data/v${version}/emojis/${group}`); + + for (const hexcodes of Object.values(subgroup)) { + await fs.ensureDir(`./data/v${version}/emojis/${group}/${subgroup}`); + + for (const [hexcode, emoji] of Object.entries(hexcodes)) { + await fs.writeFile( + `./data/v${version}/emojis/${group}/${subgroup}/${hexcode}.json`, + JSON.stringify(emoji, null, 2), + "utf-8", + ); + } + } + } } - const groups = await adapter.groups!({ version, force }); + if (isGeneratorEnabled("shortcodes")) { + const providers = await parseAsync(SHORTCODE_PROVIDERS_SCHEMA, args["shortcode-providers"]); + + if (providers.length === 0) { + throw new Error("no shortcode providers specified"); + } - await fs.ensureDir(`./data/v${version}`); - return fs.writeFile( - `./data/v${version}/groups.json`, - JSON.stringify(groups, null, 2), - "utf-8", - ); + if (adapter.shortcodes == null) { + throw new MojisNotImplemented("shortcodes"); + } + + const shortcodes = await adapter.shortcodes({ emojiVersion: version, force, unicodeVersion: getUnicodeVersionByEmojiVersion(version)!, providers }); + + await fs.ensureDir(`./data/v${version}/shortcodes`); + + for (const provider of providers) { + if (shortcodes[provider] == null) { + consola.warn(`no shortcodes found for provider ${provider}`); + continue; + } + + await fs.writeFile( + `./data/v${version}/shortcodes/${provider}.json`, + JSON.stringify(shortcodes[provider], null, 2), + "utf-8", + ); + } + } }); const results = await Promise.allSettled(promises); for (const result of results) { if (result.status === "rejected") { - console.error(red("error:"), result.reason); + if (result.reason instanceof MojisNotImplemented) { + consola.warn(result.reason.message); + continue; + } + consola.error(result.reason); } } - console.log(green("done")); + consola.info(green("done")); }, ); @@ -151,7 +227,7 @@ cli.command( const latest = versions[0]; - console.log("latest emoji version:", yellow(latest?.emoji_version)); + consola.log("latest emoji version:", yellow(latest?.emoji_version)); if (args.writeLockfile) { const lockfile = await readLockfile(); @@ -159,7 +235,7 @@ cli.command( lockfile.latestVersion = latest?.emoji_version; await writeLockfile(lockfile); - console.log(`updated ${yellow("emojis.lock")}`); + consola.log(`updated ${yellow("emojis.lock")}`); } }, ); @@ -176,8 +252,8 @@ cli.command( async (args) => { const versions = await getAllEmojiVersions(); - console.log("all available versions:"); - console.log(versions.map((v) => `${yellow(v.emoji_version)}${v.draft ? ` ${red("(draft)")}` : ""}`).join(", ")); + consola.log("all available versions:"); + consola.log(versions.map((v) => `${yellow(v.emoji_version)}${v.draft ? ` ${red("(draft)")}` : ""}`).join(", ")); if (args.writeLockfile) { const lockfile = await readLockfile(); @@ -185,7 +261,7 @@ cli.command( lockfile.versions = Array.from(versions); await writeLockfile(lockfile); - console.log(`updated ${yellow("emojis.lock")}`); + consola.log(`updated ${yellow("emojis.lock")}`); } }, ); diff --git a/src/utils/lockfile.ts b/src/lockfile.ts similarity index 100% rename from src/utils/lockfile.ts rename to src/lockfile.ts diff --git a/src/schemas.ts b/src/schemas.ts new file mode 100644 index 0000000..5e42e5b --- /dev/null +++ b/src/schemas.ts @@ -0,0 +1,16 @@ +import * as v from "valibot"; + +export const SHORTCODE_PROVIDER_SCHEMA = v.union([ + v.literal("github"), +]); + +export const SHORTCODE_PROVIDERS_SCHEMA = v.array(SHORTCODE_PROVIDER_SCHEMA); + +export const GENERATOR_SCHEMA = v.union([ + v.literal("metadata"), + v.literal("sequences"), + v.literal("emojis"), + v.literal("variations"), + v.literal("shortcodes"), + v.literal("unicode-names"), +]); diff --git a/src/shortcode/github.ts b/src/shortcode/github.ts new file mode 100644 index 0000000..6546b23 --- /dev/null +++ b/src/shortcode/github.ts @@ -0,0 +1,51 @@ +import type { EmojiShortcode } from "../types"; +import { fetchCache } from "../utils/cache"; + +export interface ShortcodeOptions { + version: string; + force: boolean; + emojis: any; +} + +export async function generateGitHubShortcodes(options: ShortcodeOptions): Promise { + const { emojis, force, version } = options; + + const githubEmojis = await fetchCache>("https://api.github.com/emojis", { + cacheKey: `v${version}/github-emojis.json`, + bypassCache: force, + parser(data) { + return JSON.parse(data); + }, + options: { + headers: { + "Accept": "application/vnd.github.v3+json", + "User-Agent": "mojis.dev", + }, + }, + }); + + const shortcodes: EmojiShortcode[] = []; + + for (const [shortcode, url] of Object.entries(githubEmojis)) { + const match = url.match(/emoji\/unicode\/([\da-z-]+)\.png/i); + + // github has some standard emojis that don't have a unicode representation + if (!match || !match[1]) { + continue; + } + + const hexcode = match[1].toUpperCase(); + + if (emojis[hexcode] == null) { + continue; + } + + shortcodes.push({ + code: shortcode, + vendor: "github", + source: "github", + }); + } + + return shortcodes; +} diff --git a/src/types.ts b/src/types.ts index 411cbb4..c9bf356 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,3 +1,6 @@ +import type { InferInput } from "valibot"; +import type { SHORTCODE_PROVIDER_SCHEMA } from "./schemas"; + export interface EmojiGroup { name: string; slug: string; @@ -7,14 +10,32 @@ export interface EmojiGroup { export interface Emoji { name: string; slug: string; - components: EmojiComponent[]; - hexcode: string; - type: "ZWJ" | "SINGLE"; + code: string; + hexcodes: string[]; + shortcodes: EmojiShortcode[]; } -// eslint-disable-next-line ts/no-empty-object-type -export interface EmojiComponent { +export type ShortcodeProvider = InferInput; +export interface EmojiMetadata { + group: string; + subgroup: string; + qualifier: string; + unicodeVersion: string | null; + emojiVersion: string | null; + description: string; + emoji: string | null; + hexcodes: string[]; +} + +export interface EmojiData { + description: string; + gender: string | null; + hexcode: string; + properties: Property[]; + unicodeVersion: string | null; + emojiVersion: string | null; + name: string; } export interface EmojiShortcode { diff --git a/src/utils.ts b/src/utils.ts index 33da8ca..7b02ad1 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -1,4 +1,3 @@ -import type { EmojiVersion } from "./utils/lockfile"; import semver from "semver"; import { NO_EMOJI_VERSIONS } from "./constants"; @@ -26,116 +25,6 @@ export function slugify(val: string): string { .replace(/^-+|-+$/g, ""); } -/** - * Retrieves all available emoji versions from Unicode.org. - * This function fetches both the root Unicode directory and the emoji-specific directory - * to compile a comprehensive list of valid emoji versions. - * - * The function performs the following steps: - * 1. Fetches content from Unicode.org's public directories - * 2. Extracts version numbers using regex - * 3. Validates each version - * 4. Normalizes version numbers to valid semver format - * - * @throws {Error} When either the root or emoji page fetch fails - * @returns {Promise} A promise that resolves to an array of emoji versions, - * sorted according to semver rules - */ -export async function getAllEmojiVersions(): Promise { - const [rootResult, emojiResult] = await Promise.allSettled([ - "https://unicode.org/Public/", - "https://unicode.org/Public/emoji/", - ].map(async (url) => { - const res = await fetch(url); - - if (!res.ok) { - throw new Error(`failed to fetch ${url}: ${res.statusText}`); - } - - return res.text(); - })); - - if (rootResult == null || emojiResult == null) { - throw new Error("failed to fetch root or emoji page"); - } - - if (rootResult.status === "rejected" || emojiResult.status === "rejected") { - console.error({ - root: rootResult.status === "rejected" ? rootResult.reason : "ok", - emoji: emojiResult.status === "rejected" ? emojiResult.reason : "ok", - }); - - throw new Error("failed to fetch root or emoji page"); - } - - const rootHtml = rootResult.value; - const emojiHtml = emojiResult.value; - - const versionRegex = /href="(\d+\.\d+(?:\.\d+)?)\/?"/g; - - const draft = await getCurrentDraftVersion(); - - const versions: EmojiVersion[] = []; - - for (const match of rootHtml.matchAll(versionRegex)) { - if (match == null || match[1] == null) continue; - - const version = match[1]; - - if (!await isEmojiVersionValid(version)) { - continue; - } - - if (versions.some((v) => v.unicode_version === version)) { - continue; - } - - versions.push({ - emoji_version: null, - unicode_version: version, - draft: version === draft, - }); - } - - for (const match of emojiHtml.matchAll(versionRegex)) { - if (match == null || match[1] == null) continue; - - let version = match[1]; - - // for the emoji page, the versions is not valid semver. - // so we will add the last 0 to the version. - // handle both 5.0 and 12.0 -> 5.0.0 and 12.0.0 - if (version.length === 3 || version.length === 4) { - version += ".0"; - } - - if (!await isEmojiVersionValid(version)) { - continue; - } - - // check if the unicode_version already exists. - // if it does, we will update the emoji version. - const existing = versions.find((v) => v.unicode_version === version); - - if (existing) { - existing.emoji_version = match[1]; - continue; - } - - versions.push({ - emoji_version: match[1], - unicode_version: null, - draft: version === draft, - }); - } - - return versions.sort((a, b) => { - const versionA = a.unicode_version ?? `${a.emoji_version}.0`; - const versionB = b.unicode_version ?? `${b.emoji_version}.0`; - return semver.compare(versionB, versionA); - }); -} - /** * Checks if the given emoji version is valid according to Unicode Consortium standards. * @@ -165,104 +54,9 @@ export async function isEmojiVersionValid(version: string): Promise { // from v1 to v5, there was only major releases. So no v1.1, v1.2, etc. // only, v1.0, v2.0, v3.0, v4.0, v5.0. // if version has any minor or patch, it is invalid. - if (semver.minor(version) !== 0 || semver.patch(version) !== 0) { + if (semver.major(version) <= 5 && (semver.minor(version) !== 0 || semver.patch(version) !== 0)) { return false; } return true; } - -/** - * Retrieves the current Unicode draft version by fetching and comparing root and emoji ReadMe files. - * - * This function fetches two ReadMe files from unicode.org: - * - The main draft ReadMe - * - The emoji draft ReadMe - * - * It then extracts and validates the version numbers from both files to ensure they match. - * The emoji version uses major.minor format while the root version uses major.minor.patch. - * - * @returns A Promise that resolves to the current draft version string, or null if not found - * @throws {Error} If either fetch fails - * @throws {Error} If version extraction fails - * @throws {Error} If versions between root and emoji drafts don't match - */ -export async function getCurrentDraftVersion(): Promise { - const [rootResult, emojiResult] = await Promise.allSettled([ - "https://unicode.org/Public/draft/ReadMe.txt", - "https://unicode.org/Public/draft/emoji/ReadMe.txt", - ].map(async (url) => { - const res = await fetch(url); - - if (!res.ok) { - throw new Error(`failed to fetch ${url}: ${res.statusText}`); - } - - return res.text(); - })); - - if (rootResult == null || emojiResult == null) { - throw new Error("failed to fetch draft readme or draft emoji readme"); - } - - if (rootResult.status === "rejected" || emojiResult.status === "rejected") { - console.error({ - root: rootResult.status === "rejected" ? rootResult.reason : "ok", - emoji: emojiResult.status === "rejected" ? emojiResult.reason : "ok", - }); - - throw new Error("failed to fetch draft readme or draft emoji readme"); - } - - const draftText = rootResult.value; - const emojiText = emojiResult.value; - - const rootVersion = extractVersion(draftText); - const emojiVersion = extractVersion(emojiText); - - if (rootVersion == null || emojiVersion == null) { - throw new Error("failed to extract draft version"); - } - - // the emoji version is only using major.minor format. - // so, we will need to add the last 0 to the version. - - // if they don't match the major and minor version, we will throw an error. - if (semver.major(rootVersion) !== semver.major(`${emojiVersion}.0`) || semver.minor(rootVersion) !== semver.minor(`${emojiVersion}.0`)) { - throw new Error("draft versions do not match"); - } - - return rootVersion; -} - -/** - * Extracts the Unicode version number from a given text string. - * - * @param {string} text - The text to extract the version number from - * @returns {string | null} The extracted version number as a string, or null if no version number is found - * - * @example - * ```ts - * extractVersion("Version 15.0.0 of the Unicode Standard") // Returns "15.0.0" - * extractVersion("Unicode15.1") // Returns "15.1" - * extractVersion("No version here") // Returns null - * ``` - */ -export function extractVersion(text: string): string | null { - const patterns = [ - /Version (\d+\.\d+(?:\.\d+)?) of the Unicode Standard/, // Most explicit - /Unicode(\d+\.\d+(?:\.\d+)?)/, // From URLs - /Version (\d+\.\d+)(?!\.\d)/, // Bare major.minor format - /Unicode Emoji, Version (\d+\.\d+(?:\.\d+)?)/, // Emoji-specific version - ]; - - for (const pattern of patterns) { - const match = text.match(pattern); - - if (match == null || match[1] == null) continue; - - return match[1]; - } - - return null; -} diff --git a/src/utils/cache.ts b/src/utils/cache.ts index c457afd..ccbdb4b 100644 --- a/src/utils/cache.ts +++ b/src/utils/cache.ts @@ -1,5 +1,7 @@ import path from "node:path"; import process from "node:process"; +import consola from "consola"; +import { green } from "farver/fast"; import fs from "fs-extra"; const CACHE_FOLDER = path.resolve(process.cwd(), ".cache"); @@ -56,9 +58,10 @@ export async function fetchCache( ): Promise { const { cacheKey, parser, bypassCache, options: fetchOptions } = options; - const cache = LOCAL_CACHE[cacheKey] || await readCache(cacheKey); + const cache = LOCAL_CACHE[cacheKey] || await readCache(cacheKey); if (!bypassCache && cache != null) { + consola.debug(`cache hit: ${green(cacheKey)}`); LOCAL_CACHE[cacheKey] = cache; return cache as TData; diff --git a/src/utils/hexcode.ts b/src/utils/hexcode.ts index a1d22db..f7f1cfa 100644 --- a/src/utils/hexcode.ts +++ b/src/utils/hexcode.ts @@ -52,3 +52,17 @@ export function expandHexRange(hex: string): string[] { return [hex]; } + +/** + * Removes specific unicode variation selectors from a hex string. + * Specifically removes: + * - 200D (Zero Width Joiner) + * - FE0E (Variation Selector-15, text style) + * - FE0F (Variation Selector-16, emoji style) + * + * @param {string} hex - The hex string to strip variation selectors from + * @returns {string} The hex string with variation selectors removed + */ +export function stripHex(hex: string): string { + return hex.replace(/(-| )?(200D|FE0E|FE0F)/g, ""); +} diff --git a/src/versions.ts b/src/versions.ts new file mode 100644 index 0000000..0051afb --- /dev/null +++ b/src/versions.ts @@ -0,0 +1,302 @@ +import type { EmojiVersion } from "./lockfile"; +import consola from "consola"; +import semver from "semver"; +import { isEmojiVersionValid } from "./utils"; + +export interface DraftVersion { + emoji_version: string; + unicode_version: string; +} + +/** + * Retrieves the current Unicode draft version by fetching and comparing root and emoji ReadMe files. + * + * This function fetches two ReadMe files from unicode.org: + * - The main draft ReadMe + * - The emoji draft ReadMe + * + * It then extracts and validates the version numbers from both files to ensure they match. + * The emoji version uses major.minor format while the root version uses major.minor.patch. + * + * @returns A Promise that resolves to the current draft version string, or null if not found + * @throws {Error} If either fetch fails + * @throws {Error} If version extraction fails + * @throws {Error} If versions between root and emoji drafts don't match + */ +export async function getCurrentDraftVersion(): Promise { + const [rootResult, emojiResult] = await Promise.allSettled([ + "https://unicode.org/Public/draft/ReadMe.txt", + "https://unicode.org/Public/draft/emoji/ReadMe.txt", + ].map(async (url) => { + const res = await fetch(url); + + if (!res.ok) { + throw new Error(`failed to fetch ${url}: ${res.statusText}`); + } + + return res.text(); + })); + + if (rootResult == null || emojiResult == null) { + throw new Error("failed to fetch draft readme or draft emoji readme"); + } + + if (rootResult.status === "rejected" || emojiResult.status === "rejected") { + consola.error({ + root: rootResult.status === "rejected" ? rootResult.reason : "ok", + emoji: emojiResult.status === "rejected" ? emojiResult.reason : "ok", + }); + + throw new Error("failed to fetch draft readme or draft emoji readme"); + } + + const draftText = rootResult.value; + const emojiText = emojiResult.value; + + const rootVersion = extractVersionFromReadme(draftText); + const emojiVersion = extractVersionFromReadme(emojiText); + + if (rootVersion == null || emojiVersion == null) { + throw new Error("failed to extract draft version"); + } + + // the emoji version is only using major.minor format. + // so, we will need to add the last 0 to the version. + + // if they don't match the major and minor version, we will throw an error. + if (semver.major(rootVersion) !== semver.major(`${emojiVersion}.0`) || semver.minor(rootVersion) !== semver.minor(`${emojiVersion}.0`)) { + throw new Error("draft versions do not match"); + } + + return { + emoji_version: emojiVersion, + unicode_version: rootVersion, + }; +} + +/** + * Extracts the emoji version from a comment string. + * The version should be in the format "E{major}.{minor}" (e.g. "E14.0"). + * + * @param {string} comment - The comment string to extract the version from + * @returns {string | null} The parsed version number, or null if no valid version was found + * + * @example + * ```ts + * extractEmojiVersion("E14.0") // returns "14.0" + * extractEmojiVersion("Something else") // returns null + * ``` + */ +export function extractEmojiVersion(comment: string): string | null { + const version = comment.match(/E(\d+\.\d)/); + + if (version != null && version[1] != null) { + return version[1].trim(); + } + + return null; +} + +/** + * Extracts the Unicode version number from a given text string. + * + * @param {string} text - The text to extract the version number from + * @returns {string | null} The extracted version number as a string, or null if no version number is found + * + * @example + * ```ts + * extractVersionFromReadme("Version 15.0.0 of the Unicode Standard") // Returns "15.0.0" + * extractVersionFromReadme("Unicode15.1") // Returns "15.1" + * extractVersionFromReadme("No version here") // Returns null + * ``` + */ +export function extractVersionFromReadme(text: string): string | null { + const patterns = [ + /Version (\d+\.\d+(?:\.\d+)?) of the Unicode Standard/, // Most explicit + /Unicode(\d+\.\d+(?:\.\d+)?)/, // From URLs + /Version (\d+\.\d+)(?!\.\d)/, // Bare major.minor format + /Unicode Emoji, Version (\d+\.\d+(?:\.\d+)?)/, // Emoji-specific version + ]; + + for (const pattern of patterns) { + const match = text.match(pattern); + + if (match == null || match[1] == null) continue; + + return match[1]; + } + + return null; +} + +// https://unicode.org/reports/tr51/#EmojiVersions +export function extractUnicodeVersion(emojiVersion: string | null, unicodeVersion?: string): string | null { + const coercedEmojiVersion = semver.coerce(emojiVersion); + const coercedUnicodeVersion = semver.coerce(unicodeVersion); + + if (coercedEmojiVersion == null || coercedUnicodeVersion == null) { + return null; + } + + // v11+ aligned emoji and unicode specs (except for minor versions) + if (semver.gte(coercedEmojiVersion, "11.0.0")) { + // if the unicode version is not provided, we will return the emoji version. + if (unicodeVersion == null) { + return emojiVersion; + } + + // return the smallest version between the emoji and unicode version. + if (semver.lt(coercedEmojiVersion, coercedUnicodeVersion)) { + return emojiVersion; + } + + return unicodeVersion; + } + + switch (emojiVersion) { + case "0.7": + return "7.0"; + case "1.0": + case "2.0": + return "8.0"; + case "3.0": + case "4.0": + return "9.0"; + case "5.0": + return "10.0"; + default: + // v6 is the first unicode spec emojis appeared in + return "6.0"; + } +} + +/** + * Retrieves all available emoji versions from Unicode.org. + * This function fetches both the root Unicode directory and the emoji-specific directory + * to compile a comprehensive list of valid emoji versions. + * + * The function performs the following steps: + * 1. Fetches content from Unicode.org's public directories + * 2. Extracts version numbers using regex + * 3. Validates each version + * 4. Normalizes version numbers to valid semver format + * + * @throws {Error} When either the root or emoji page fetch fails + * @returns {Promise} A promise that resolves to an array of emoji versions, + * sorted according to semver rules + */ +export async function getAllEmojiVersions(): Promise { + const [rootResult, emojiResult] = await Promise.allSettled([ + "https://unicode.org/Public/", + "https://unicode.org/Public/emoji/", + ].map(async (url) => { + const res = await fetch(url); + + if (!res.ok) { + throw new Error(`failed to fetch ${url}: ${res.statusText}`); + } + + return res.text(); + })); + + if (rootResult == null || emojiResult == null) { + throw new Error("failed to fetch root or emoji page"); + } + + if (rootResult.status === "rejected" || emojiResult.status === "rejected") { + consola.error({ + root: rootResult.status === "rejected" ? rootResult.reason : "ok", + emoji: emojiResult.status === "rejected" ? emojiResult.reason : "ok", + }); + + throw new Error("failed to fetch root or emoji page"); + } + + const rootHtml = rootResult.value; + const emojiHtml = emojiResult.value; + + const versionRegex = /href="(\d+\.\d+(?:\.\d+)?)\/?"/g; + + const draft = await getCurrentDraftVersion(); + + if (draft == null) { + throw new Error("failed to fetch draft version"); + } + + const versions: EmojiVersion[] = []; + + for (const match of rootHtml.matchAll(versionRegex)) { + if (match == null || match[1] == null) continue; + + const version = match[1]; + + if (!await isEmojiVersionValid(version)) { + continue; + } + + if (versions.some((v) => v.unicode_version === version)) { + continue; + } + + versions.push({ + emoji_version: null, + unicode_version: version, + draft: version === draft.unicode_version || version === draft.emoji_version, + }); + } + + for (const match of emojiHtml.matchAll(versionRegex)) { + if (match == null || match[1] == null) continue; + + let version = match[1]; + + // for the emoji page, the versions is not valid semver. + // so we will add the last 0 to the version. + // handle both 5.0 and 12.0 -> 5.0.0 and 12.0.0 + if (version.length === 3 || version.length === 4) { + version += ".0"; + } + + if (!await isEmojiVersionValid(version)) { + continue; + } + + // check if the unicode_version already exists. + // if it does, we will update the emoji version. + const existing = versions.find((v) => v.unicode_version === version); + + let unicode_version = null; + + // the emoji version 13.1 is using the unicode + // 13.0, since it was never released. + if (match[1] === "13.1") { + unicode_version = "13.0.0"; + } + + if (match[1] === "5.0") { + unicode_version = "10.0.0"; + } + + if (match[1] === "4.0" || match[1] === "3.0") { + unicode_version = "9.0.0"; + } + + if (match[1] === "2.0" || match[1] === "1.0") { + unicode_version = "8.0.0"; + } + + if (existing) { + existing.unicode_version = unicode_version || existing.unicode_version; + existing.emoji_version = match[1]; + continue; + } + + versions.push({ + emoji_version: match[1], + unicode_version, + draft: version === draft.unicode_version || version === draft.emoji_version, + }); + } + + return versions.sort((a, b) => semver.compare(`${b.emoji_version}.0`, `${a.emoji_version}.0`)); +} diff --git a/test/__setup.ts b/test/__setup.ts new file mode 100644 index 0000000..96e204d --- /dev/null +++ b/test/__setup.ts @@ -0,0 +1,6 @@ +import { vi } from "vitest"; +import createFetchMock from "vitest-fetch-mock"; + +const fetchMocker = createFetchMock(vi); + +fetchMocker.enableMocks(); diff --git a/test/fixtures/extract-version/emoji/README-invalid.txt b/test/fixtures/extract-version/emoji/README-invalid.txt new file mode 100644 index 0000000..8fa19c2 --- /dev/null +++ b/test/fixtures/extract-version/emoji/README-invalid.txt @@ -0,0 +1,21 @@ +# Unicode Emoji +# © 2025 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use and license, see https://www.unicode.org/terms_of_use.html + +This directory contains draft data files for Unicode Emoji, Version x.x + +Public/draft/emoji/ + + emoji-sequences.txt + emoji-zwj-sequences.txt + emoji-test.txt + +The following related files are found in the UCD for Version x.x + +Public/draft/ucd/emoji/ + + emoji-data.txt + emoji-variation-sequences.txt + +For documentation, see UTS #51 Unicode Emoji, Version x.x diff --git a/test/fixtures/extract-version/emoji/README-valid.txt b/test/fixtures/extract-version/emoji/README-valid.txt new file mode 100644 index 0000000..f591646 --- /dev/null +++ b/test/fixtures/extract-version/emoji/README-valid.txt @@ -0,0 +1,21 @@ +# Unicode Emoji +# © 2025 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use and license, see https://www.unicode.org/terms_of_use.html + +This directory contains draft data files for Unicode Emoji, Version 17.0 + +Public/draft/emoji/ + + emoji-sequences.txt + emoji-zwj-sequences.txt + emoji-test.txt + +The following related files are found in the UCD for Version 17.0 + +Public/draft/ucd/emoji/ + + emoji-data.txt + emoji-variation-sequences.txt + +For documentation, see UTS #51 Unicode Emoji, Version 17.0 diff --git a/test/fixtures/extract-version/root/README-invalid.txt b/test/fixtures/extract-version/root/README-invalid.txt new file mode 100644 index 0000000..dd1abaa --- /dev/null +++ b/test/fixtures/extract-version/root/README-invalid.txt @@ -0,0 +1,24 @@ +# Unicode Character Database +# Date: 2025-01-29 +# © 2025 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use and license, see https://www.unicode.org/terms_of_use.html +# +# For documentation, see the following: +# ucd/NamesList.html +# UAX #38, "Unicode Han Database (Unihan)" +# UAX #42, "Unicode Character Database in XML" +# UAX #44, "Unicode Character Database" +# UTS #51, "Unicode Emoji" +# UAX #57, "Unicode Egyptian Hieroglyph Database" +# +# The UAXes and UTS #51 can be accessed at https://www.unicode.org/versions/Unicodex.x.x/ + +This directory contains the draft data files +for Version x.x.x of the Unicode Standard. + +The "charts" subdirectory contains an archival set of +pdf code charts corresponding exactly to Version x.x.x. + +The other subdirectories contain the data files for the +Unicode Character Database and for the synchronized Unicode Technical Standards. diff --git a/test/fixtures/extract-version/root/README-valid.txt b/test/fixtures/extract-version/root/README-valid.txt new file mode 100644 index 0000000..f2f8dfa --- /dev/null +++ b/test/fixtures/extract-version/root/README-valid.txt @@ -0,0 +1,24 @@ +# Unicode Character Database +# Date: 2025-01-29 +# © 2025 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use and license, see https://www.unicode.org/terms_of_use.html +# +# For documentation, see the following: +# ucd/NamesList.html +# UAX #38, "Unicode Han Database (Unihan)" +# UAX #42, "Unicode Character Database in XML" +# UAX #44, "Unicode Character Database" +# UTS #51, "Unicode Emoji" +# UAX #57, "Unicode Egyptian Hieroglyph Database" +# +# The UAXes and UTS #51 can be accessed at https://www.unicode.org/versions/Unicode17.0.0/ + +This directory contains the draft data files +for Version 17.0.0 of the Unicode Standard. + +The "charts" subdirectory contains an archival set of +pdf code charts corresponding exactly to Version 17.0.0. + +The other subdirectories contain the data files for the +Unicode Character Database and for the synchronized Unicode Technical Standards. diff --git a/test/utils/lockfile.test.ts b/test/lockfile.test.ts similarity index 98% rename from test/utils/lockfile.test.ts rename to test/lockfile.test.ts index 0c84b12..242ad62 100644 --- a/test/utils/lockfile.test.ts +++ b/test/lockfile.test.ts @@ -1,7 +1,7 @@ import fs from "fs-extra"; import { describe, expect, it } from "vitest"; import { testdir } from "vitest-testdirs"; -import { type EmojiLockfile, hasLockfile, readLockfile, writeLockfile } from "../../src/utils/lockfile"; +import { type EmojiLockfile, hasLockfile, readLockfile, writeLockfile } from "../src/lockfile"; describe("hasLockfile", () => { it("should return true when lockfile exists", async () => { diff --git a/test/utils/hexcode.test.ts b/test/utils/hexcode.test.ts index 4283a51..c9d2c68 100644 --- a/test/utils/hexcode.test.ts +++ b/test/utils/hexcode.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from "vitest"; -import { expandHexRange, fromHexToCodepoint } from "../../src/utils/hexcode"; +import { expandHexRange, fromHexToCodepoint, stripHex } from "../../src/utils/hexcode"; describe("fromHexToCodepoint", () => { it("should convert hex string with hyphens to codepoints", () => { @@ -36,3 +36,29 @@ describe("expandHexRange", () => { expect(expandHexRange("1F600..1F602")).toEqual(["1F600", "1F601", "1F602"]); }); }); + +describe("stripHex", () => { + it("should remove zero width joiner (200D)", () => { + expect(stripHex("1F468-200D-1F469")).toBe("1F468-1F469"); + }); + + it("should remove text style selector (FE0E)", () => { + expect(stripHex("2764-FE0E")).toBe("2764"); + }); + + it("should remove emoji style selector (FE0F)", () => { + expect(stripHex("2764-FE0F")).toBe("2764"); + }); + + it("should remove multiple variation selectors", () => { + expect(stripHex("1F468-200D-2764-FE0F-200D-1F468")).toBe("1F468-2764-1F468"); + }); + + it("should handle string without variation selectors", () => { + expect(stripHex("1F600")).toBe("1F600"); + }); + + it("should handle space-separated values", () => { + expect(stripHex("1F468 200D 1F469")).toBe("1F468 1F469"); + }); +}); diff --git a/test/versions.test.ts b/test/versions.test.ts new file mode 100644 index 0000000..49f1589 --- /dev/null +++ b/test/versions.test.ts @@ -0,0 +1,112 @@ +import fs from "fs-extra"; +import { describe, expect, it } from "vitest"; +import { extractEmojiVersion, extractVersionFromReadme, getCurrentDraftVersion } from "../src/versions"; + +describe("get draft version", () => { + it("returns draft versions when fetches succeed and versions match", async () => { + fetchMock + .mockResponseOnceIf("https://unicode.org/Public/draft/ReadMe.txt", "Version 15.1.0 of the Unicode Standard") + .mockResponseOnceIf("https://unicode.org/Public/draft/emoji/ReadMe.txt", "Unicode Emoji, Version 15.1"); + + const result = await getCurrentDraftVersion(); + expect(result).toEqual({ + emoji_version: "15.1", + unicode_version: "15.1.0", + }); + }); + + it("throws when fetch fails", async () => { + fetchMock.mockResponse("Not Found", { status: 404 }); + + await expect(getCurrentDraftVersion()).rejects.toThrow("failed to fetch"); + }); + + it("throws when versions do not match", async () => { + fetchMock + .mockResponseOnceIf("https://unicode.org/Public/draft/ReadMe.txt", "Version 15.1.0 of the Unicode Standard") + .mockResponseOnceIf("https://unicode.org/Public/draft/emoji/ReadMe.txt", "Unicode Emoji, Version 15.0"); + + await expect(getCurrentDraftVersion()).rejects.toThrow("draft versions do not match"); + }); + + it("throws when version extraction fails", async () => { + fetchMock + .mockResponse("Invalid version format", { status: 200 }); + + await expect(getCurrentDraftVersion()).rejects.toThrow("failed to extract draft version"); + }); +}); + +describe("extract version", () => { + it.each([ + { input: "Version 15.1.0 of the Unicode Standard", expected: "15.1.0" }, + { input: "Version 15.1 of the Unicode Standard", expected: "15.1" }, + { input: "Version 15.0 of the Unicode Standard", expected: "15.0" }, + { input: "Version 5.0 of the Unicode Standard", expected: "5.0" }, + ])("should extract valid version numbers (input: $input, expected: $expected)", ({ input, expected }) => { + expect(extractVersionFromReadme(input)).toBe(expected); + }); + + it.each([ + { input: "Invalid version format", expected: null }, + { input: "Version 15.1.0", expected: null }, + { input: "Version 15", expected: null }, + { input: "", expected: null }, + ])("should return null for invalid formats (input: $input, expected: $expected)", ({ input, expected }) => { + expect(extractVersionFromReadme(input)).toBe(expected); + }); + + describe.each([ + { name: "emoji draft readme", path: "emoji/README-valid.txt", version: "17.0" }, + { name: "invalid emoji draft readme", path: "emoji/README-invalid.txt", version: null }, + + { name: "draft readme", path: "root/README-valid.txt", version: "17.0.0" }, + { name: "invalid draft readme", path: "root/README-invalid.txt", version: null }, + + ])("extract version from $name", ({ path, version }) => { + it("should extract version from file path", () => { + const content = fs.readFileSync(`./test/fixtures/extract-version/${path}`, "utf-8"); + expect(extractVersionFromReadme(content)).toBe(version); + }); + }); + + describe("extract emoji version", () => { + it.each([ + { input: "E14.0", expected: "14.0" }, + { input: "E15.1", expected: "15.1" }, + { input: "E5.0", expected: "5.0" }, + ])("should extract valid emoji version numbers (input: $input, expected: $expected)", ({ input, expected }) => { + expect(extractEmojiVersion(input)).toBe(expected); + }); + + it.each([ + { input: "14.0", expected: null }, + { input: "Hello E14", expected: null }, + { input: "E14", expected: null }, + { input: "", expected: null }, + ])("should return null for invalid formats (input: $input, expected: $expected)", ({ input, expected }) => { + expect(extractEmojiVersion(input)).toBe(expected); + }); + + it.each([ + { input: " E14.0 ", expected: "14.0" }, + { input: "E 14.0", expected: null }, + ])("should handle whitespace (input: $input, expected: $expected)", ({ input, expected }) => { + expect(extractEmojiVersion(input)).toBe(expected); + }); + }); +}); + +// describe("get all emoji versions", () => { +// it("should return all emoji versions", async () => { +// fetchMock +// .mockResponseOnceIf("https://unicode.org/Public/", "Version 15.1.0 of the Unicode Standard") +// .mockResponseOnceIf("https://unicode.org/Public/emoji/", "Unicode Emoji, Version 15.1"); + +// const result = await getCurrentDraftVersion(); +// expect(result).toEqual({ +// emoji_version: "15.1", +// unicode_version: "15.1.0", +// }); +// }); +// }); diff --git a/vitest.config.ts b/vitest.config.ts new file mode 100644 index 0000000..f2e3338 --- /dev/null +++ b/vitest.config.ts @@ -0,0 +1,9 @@ +import { defineConfig } from "vitest/config"; + +export default defineConfig({ + test: { + setupFiles: [ + "./test/__setup.ts", + ], + }, +});