diff --git a/.kiro/steering/structure.md b/.kiro/steering/structure.md index 165eac1fe..8f3db3d7c 100644 --- a/.kiro/steering/structure.md +++ b/.kiro/steering/structure.md @@ -128,10 +128,10 @@ accidental mixing of similar types at compile time. | `ConfigurationId` | `createNewConfigurationId()` | `@/core/ConfigurationProvider/types` | | `RenderedVertexId` | `toRenderedVertexId()` | `@/core/StateProvider/renderedEntities` | | `RenderedEdgeId` | `toRenderedEdgeId()` | `@/core/StateProvider/renderedEntities` | -| `IriNamespace` | `as IriNamespace` | `@/utils/rdf` | -| `IriLocalValue` | `as IriLocalValue` | `@/utils/rdf` | -| `RdfPrefix` | `as RdfPrefix` | `@/utils/rdf` | -| `NormalizedIriNamespace` | `as NormalizedIriNamespace` | `@/utils/rdf` | +| `IriNamespace` | `splitIri()` | `@/utils/rdf` | +| `IriLocalValue` | `splitIri()` | `@/utils/rdf` | +| `RdfPrefix` | `generatePrefix()` | `@/utils/rdf` | +| `NormalizedIriNamespace` | `normalizeNamespace()` | `@/utils/rdf` | Always use the appropriate branded type instead of `string` when working with these identifiers. diff --git a/packages/graph-explorer/src/core/ConfigurationProvider/types.ts b/packages/graph-explorer/src/core/ConfigurationProvider/types.ts index 97c705b43..2c940d88e 100644 --- a/packages/graph-explorer/src/core/ConfigurationProvider/types.ts +++ b/packages/graph-explorer/src/core/ConfigurationProvider/types.ts @@ -96,11 +96,6 @@ export type PrefixTypeConfig = { * Mark as true after inferring from the schema. */ __inferred?: boolean; - /** - * Internal purpose only. - * Matches URIs - */ - __matches?: Set; }; /** diff --git a/packages/graph-explorer/src/core/StateProvider/schema.test.ts b/packages/graph-explorer/src/core/StateProvider/schema.test.ts index 08be4081b..1a8d7dea6 100644 --- a/packages/graph-explorer/src/core/StateProvider/schema.test.ts +++ b/packages/graph-explorer/src/core/StateProvider/schema.test.ts @@ -40,6 +40,7 @@ import { mapEdgeToTypeConfig, mapVertexToTypeConfigs, maybeActiveSchemaAtom, + type SchemaStorageModel, shouldUpdateSchemaFromEntities, updateSchemaFromEntities, updateSchemaPrefixes, @@ -244,6 +245,24 @@ describe("schema", () => { newNodes.flatMap(mapVertexToTypeConfigs).flatMap(n => n.attributes), ); }); + + it("should generate prefixes from vertex IDs", () => { + const schema = createRandomSchema(); + schema.vertices = []; + schema.edges = []; + schema.prefixes = []; + + const vertex = createVertex({ + ...createRandomVertex(), + id: "http://data.nobelprize.org/resource/country/France", + types: ["http://data.nobelprize.org/class/Country"], + }); + + const result = updateSchemaFromEntities({ vertices: [vertex] }, schema); + + const prefixes = result.prefixes?.map(p => p.prefix); + expect(prefixes).toContain("country"); + }); }); describe("updateSchemaPrefixes", () => { @@ -275,16 +294,40 @@ describe("schema", () => { expect(result.prefixes).toBeDefined(); expect(result.prefixes).toEqual([ { - prefix: "ver" as RdfPrefix, + prefix: "vertex" as RdfPrefix, uri: "http://abcdefg.com/vertex#" as IriNamespace, __inferred: true, - __matches: new Set(schema.vertices.map(v => v.type)), }, { - prefix: "edg" as RdfPrefix, + prefix: "edge" as RdfPrefix, uri: "http://abcdefg.com/edge#" as IriNamespace, __inferred: true, - __matches: new Set(schema.edges.map(e => e.type)), + }, + ] satisfies PrefixTypeConfig[]); + }); + + it("should append new prefixes to existing ones", () => { + const schema = createRandomSchema(); + const existingPrefix: PrefixTypeConfig = { + prefix: "custom" as RdfPrefix, + uri: "http://custom.example.com/" as IriNamespace, + }; + schema.prefixes = [existingPrefix]; + schema.vertices.forEach(v => { + v.type = createVertexType( + "http://abcdefg.com/vertex#" + encodeURIComponent(v.type), + ); + }); + schema.edges = []; + + const result = updateSchemaPrefixes(schema); + + expect(result.prefixes).toStrictEqual([ + existingPrefix, + { + prefix: "vertex" as RdfPrefix, + uri: "http://abcdefg.com/vertex#" as IriNamespace, + __inferred: true, }, ] satisfies PrefixTypeConfig[]); }); @@ -686,3 +729,109 @@ describe("useActiveSchema", () => { expect(result.current).toStrictEqual(schema); }); }); + +/** + * BACKWARD COMPATIBILITY — PERSISTED DATA + * + * SchemaStorageModel (including its PrefixTypeConfig[] in `prefixes`) is + * persisted to IndexedDB via localforage. Older versions stored a `__matches` + * property (Set) on inferred prefixes. That property has been removed + * from PrefixTypeConfig, but previously persisted data may still contain it. + * These tests verify that schema operations continue to work correctly when + * the schema contains prefixes in the old shape. + * + * DO NOT delete or weaken these tests without confirming that all persisted + * data has been migrated or that the old shape is no longer in the wild. + */ +describe("backward compatibility: legacy __matches on prefixes", () => { + it("updateSchemaPrefixes should preserve legacy prefixes and append new ones", () => { + // Simulates a schema loaded from IndexedDB that was persisted before + // __matches was removed from PrefixTypeConfig. + const legacyPrefix = { + prefix: "soccer" as RdfPrefix, + uri: "http://www.example.com/soccer/ontology/" as IriNamespace, + __inferred: true, + __matches: new Set(["http://www.example.com/soccer/ontology/League"]), + } as PrefixTypeConfig; + + const schema = createRandomSchema(); + schema.prefixes = [legacyPrefix]; + schema.vertices.forEach(v => { + v.type = createVertexType( + "http://newdomain.com/vertex#" + encodeURIComponent(v.type), + ); + }); + schema.edges = []; + + const result = updateSchemaPrefixes(schema); + + // Legacy prefix should be preserved as-is at the start of the array + expect(result.prefixes?.[0]).toBe(legacyPrefix); + // New prefix should be appended + expect(result.prefixes).toHaveLength(2); + expect(result.prefixes?.[1]).toStrictEqual({ + prefix: "vertex" as RdfPrefix, + uri: "http://newdomain.com/vertex#" as IriNamespace, + __inferred: true, + }); + }); + + it("updateSchemaPrefixes should not regenerate prefixes already covered by legacy entries", () => { + // The legacy prefix covers the same namespace as the vertex types, + // so no new prefixes should be generated. + const legacyPrefix = { + prefix: "soccer" as RdfPrefix, + uri: "http://www.example.com/soccer/ontology/" as IriNamespace, + __inferred: true, + __matches: new Set(["http://www.example.com/soccer/ontology/League"]), + } as PrefixTypeConfig; + + const schema = createRandomSchema(); + schema.prefixes = [legacyPrefix]; + schema.vertices = [ + { + type: createVertexType("http://www.example.com/soccer/ontology/Player"), + attributes: [], + }, + ]; + schema.edges = []; + + const result = updateSchemaPrefixes(schema); + + // No change — the legacy prefix already covers this namespace + expect(result).toBe(schema); + }); + + it("updateSchemaFromEntities should work with schema containing legacy prefixes", () => { + const legacyPrefix = { + prefix: "old" as RdfPrefix, + uri: "http://old.example.com/" as IriNamespace, + __inferred: true, + __matches: new Set(["http://old.example.com/Thing"]), + } as PrefixTypeConfig; + + const schema: SchemaStorageModel = { + vertices: [], + edges: [], + prefixes: [legacyPrefix], + }; + + const vertex = createVertex({ + id: "http://new.example.com/vertex#1", + types: ["http://new.example.com/vertex#Person"], + attributes: {}, + }); + + const result = updateSchemaFromEntities({ vertices: [vertex] }, schema); + + // Legacy prefix should be preserved + expect(result.prefixes?.[0]).toBe(legacyPrefix); + // New prefix should be appended for the new namespace + expect(result.prefixes).toHaveLength(2); + expect(result.prefixes?.[1]).toStrictEqual({ + prefix: "vertex" as RdfPrefix, + uri: "http://new.example.com/vertex#" as IriNamespace, + __inferred: true, + }); + }); +}); diff --git a/packages/graph-explorer/src/core/StateProvider/schema.ts b/packages/graph-explorer/src/core/StateProvider/schema.ts index acb6bb99c..9fe57b92c 100644 --- a/packages/graph-explorer/src/core/StateProvider/schema.ts +++ b/packages/graph-explorer/src/core/StateProvider/schema.ts @@ -29,7 +29,7 @@ import { type VertexType, } from "@/core"; import { logger } from "@/utils"; -import { generatePrefixes } from "@/utils/rdf"; +import { generatePrefixes, PrefixLookup } from "@/utils/rdf"; /** * Persisted schema state for a database connection. @@ -113,15 +113,14 @@ export function useMaybeActiveSchema(): SchemaStorageModel | undefined { return useDeferredValue(useAtomValue(maybeActiveSchemaAtom)); } -/** Gets the stored prefixes from the active schema. */ -export function usePrefixes(): PrefixTypeConfig[] { - const schema = useActiveSchema(); - return schema.prefixes ?? []; +/** Gets the stored prefixes from the active schema as a lookup object. */ +export function usePrefixes() { + return useAtomValue(prefixesAtom); } export const prefixesAtom = atom(get => { const schema = get(activeSchemaAtom); - return schema.prefixes ?? []; + return PrefixLookup.fromArray(schema.prefixes ?? []); }); function createVertexSchema(vtConfig: VertexTypeConfig) { @@ -304,7 +303,7 @@ export function updateSchemaFromEntities( } satisfies SchemaStorageModel; // Update the generated prefixes in the schema - newSchema = updateSchemaPrefixes(newSchema); + newSchema = updateSchemaPrefixes(newSchema, entities); logger.debug("Updated schema:", { newSchema, prevSchema: schema }); return newSchema; @@ -417,31 +416,34 @@ function detectDataType(value: ScalarValue) { /** Generate RDF prefixes for all the resource URIs in the schema. */ export function updateSchemaPrefixes( schema: SchemaStorageModel, + entities?: Partial, ): SchemaStorageModel { - const existingPrefixes = schema.prefixes ?? []; + const existingPrefixes = PrefixLookup.fromArray(schema.prefixes ?? []); - // Get all the resource URIs from the vertex and edge type configs - const resourceUris = getResourceUris(schema); + const resourceUris = getResourceUris(schema, entities); if (resourceUris.size === 0) { return schema; } - const genPrefixes = generatePrefixes(resourceUris, existingPrefixes); - if (!genPrefixes?.length) { + const newPrefixes = generatePrefixes(resourceUris, existingPrefixes); + if (newPrefixes.length === 0) { return schema; } - logger.debug("Updating schema with prefixes:", genPrefixes); + logger.debug("Updating schema with prefixes:", newPrefixes); return { ...schema, - prefixes: genPrefixes, + prefixes: [...(schema.prefixes ?? []), ...newPrefixes], }; } -/** A performant way to construct the set of resource URIs from the schema. */ -function getResourceUris(schema: SchemaStorageModel) { +/** Collects resource URIs from schema type configs and entity IDs. */ +function getResourceUris( + schema: SchemaStorageModel, + entities?: Partial, +) { const result = new Set(); schema.vertices.forEach(v => { @@ -454,6 +456,13 @@ function getResourceUris(schema: SchemaStorageModel) { result.add(e.type); }); + for (const v of entities?.vertices ?? []) { + result.add(String(v.id)); + } + for (const e of entities?.edges ?? []) { + result.add(String(e.id)); + } + return result; } diff --git a/packages/graph-explorer/src/modules/AvailableConnections/useImportConnectionFile.test.tsx b/packages/graph-explorer/src/modules/AvailableConnections/useImportConnectionFile.test.tsx index d37166299..bd8fdf9a2 100644 --- a/packages/graph-explorer/src/modules/AvailableConnections/useImportConnectionFile.test.tsx +++ b/packages/graph-explorer/src/modules/AvailableConnections/useImportConnectionFile.test.tsx @@ -167,7 +167,6 @@ describe("useImportConnectionFile", () => { { prefix: "rdf", uri: "http://www.w3.org/1999/02/22-rdf-syntax-ns#", - __matches: ["http://example.com/1", "http://example.com/2"], }, ], }, @@ -186,13 +185,12 @@ describe("useImportConnectionFile", () => { (_, index) => index === 1, ); - expect(importedSchema?.prefixes).toHaveLength(1); - expect(importedSchema?.prefixes?.[0].prefix).toBe("rdf"); - expect(importedSchema?.prefixes?.[0].uri).toBe( - "http://www.w3.org/1999/02/22-rdf-syntax-ns#", - ); - expect(importedSchema?.prefixes?.[0].__matches).toBeInstanceOf(Set); - expect(importedSchema?.prefixes?.[0].__matches?.size).toBe(2); + expect(importedSchema?.prefixes).toStrictEqual([ + { + prefix: "rdf", + uri: "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + }, + ]); }); test("should handle schema with lastUpdate date", async () => { @@ -347,3 +345,82 @@ describe("useImportConnectionFile", () => { expect(importedSchema?.edges[1].type).toBe("knows"); }); }); + +/** + * BACKWARD COMPATIBILITY — PERSISTED DATA + * + * Exported configuration files from older versions may contain a `__matches` + * array on prefix entries. That property has been removed from PrefixTypeConfig, + * but previously exported files may still contain it. These tests verify that + * importing such files still works correctly — the extra property is harmlessly + * carried through without breaking the import flow. + * + * DO NOT delete or weaken these tests without confirming that all exported + * files in the wild have been re-exported or that the old shape is no longer + * a concern. + */ +describe("backward compatibility: legacy __matches in exported files", () => { + test("should import file with legacy __matches array on prefixes", async () => { + const state = new DbState(); + const { result } = renderHookWithState( + () => useImportConnectionFile(), + state, + ); + + // This mirrors the shape of a file exported by an older version that + // serialized __matches as an array. + const legacyConfig = { + id: createNewConfigurationId(), + displayLabel: createRandomName("Config"), + connection: { + url: createRandomUrlString(), + queryEngine: "sparql" as const, + }, + schema: { + totalVertices: 0, + vertices: [], + totalEdges: 0, + edges: [], + prefixes: [ + { + prefix: "rdf", + uri: "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + __inferred: true, + __matches: [ + "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", + "http://www.w3.org/1999/02/22-rdf-syntax-ns#Property", + ], + }, + { + prefix: "custom", + uri: "http://custom.example.com/", + }, + ], + }, + }; + + const file = new File([JSON.stringify(legacyConfig)], "connection.json", { + type: "application/json", + }); + + await act(async () => { + await result.current(file); + }); + + const schemas = getAppStore().get(schemaAtom); + const importedSchema = Array.from(schemas.values()).find( + (_, index) => index === 1, + ); + + // Both prefixes should be imported successfully + expect(importedSchema?.prefixes).toHaveLength(2); + expect(importedSchema?.prefixes?.[0].prefix).toBe("rdf"); + expect(importedSchema?.prefixes?.[0].uri).toBe( + "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + ); + expect(importedSchema?.prefixes?.[1].prefix).toBe("custom"); + expect(importedSchema?.prefixes?.[1].uri).toBe( + "http://custom.example.com/", + ); + }); +}); diff --git a/packages/graph-explorer/src/modules/AvailableConnections/useImportConnectionFile.ts b/packages/graph-explorer/src/modules/AvailableConnections/useImportConnectionFile.ts index 6f8a1706c..34853c732 100644 --- a/packages/graph-explorer/src/modules/AvailableConnections/useImportConnectionFile.ts +++ b/packages/graph-explorer/src/modules/AvailableConnections/useImportConnectionFile.ts @@ -42,10 +42,7 @@ export function useImportConnectionFile() { updatedSchema.set(newId, { vertices: fileContent.schema?.vertices || [], edges: fileContent.schema?.edges || [], - prefixes: fileContent.schema?.prefixes?.map(prefix => ({ - ...prefix, - __matches: new Set(prefix.__matches || []), - })), + prefixes: fileContent.schema?.prefixes, lastUpdate: fileContent.schema?.lastUpdate ? new Date(fileContent.schema?.lastUpdate) : undefined, diff --git a/packages/graph-explorer/src/modules/Namespaces/GeneratedPrefixes.tsx b/packages/graph-explorer/src/modules/Namespaces/GeneratedPrefixes.tsx index a72ff615d..79f8a3987 100644 --- a/packages/graph-explorer/src/modules/Namespaces/GeneratedPrefixes.tsx +++ b/packages/graph-explorer/src/modules/Namespaces/GeneratedPrefixes.tsx @@ -57,13 +57,7 @@ function Layout(props: ComponentPropsWithoutRef<"div">) { function useGeneratedPrefixes() { const prefixes = usePrefixes(); - return prefixes - .filter( - prefixConfig => - prefixConfig.__inferred === true && - prefixConfig.__matches && - prefixConfig.__matches.size > 0, - ) + return prefixes.inferredPrefixes .map(mapToPrefixData) .toSorted((a, b) => a.title.localeCompare(b.title)); } diff --git a/packages/graph-explorer/src/modules/Namespaces/UserPrefixes.tsx b/packages/graph-explorer/src/modules/Namespaces/UserPrefixes.tsx index 148c568b9..670a5d67b 100644 --- a/packages/graph-explorer/src/modules/Namespaces/UserPrefixes.tsx +++ b/packages/graph-explorer/src/modules/Namespaces/UserPrefixes.tsx @@ -60,7 +60,7 @@ const UserPrefixes = () => { function useCustomPrefixes() { const prefixes = usePrefixes(); - return prefixes.filter(prefixConfig => prefixConfig.__inferred !== true); + return prefixes.userPrefixes; } function SearchablePrefixes({ diff --git a/packages/graph-explorer/src/utils/rdf/PrefixLookup.test.ts b/packages/graph-explorer/src/utils/rdf/PrefixLookup.test.ts new file mode 100644 index 000000000..869800f39 --- /dev/null +++ b/packages/graph-explorer/src/utils/rdf/PrefixLookup.test.ts @@ -0,0 +1,162 @@ +import type { PrefixTypeConfig } from "@/core"; +import type { IriNamespace, RdfPrefix } from "@/utils/rdf"; + +import { PrefixLookup } from "./PrefixLookup"; + +function userPrefix(prefix: string, uri: string): PrefixTypeConfig { + return { prefix: prefix as RdfPrefix, uri: uri as IriNamespace }; +} + +function inferredPrefix(prefix: string, uri: string): PrefixTypeConfig { + return { + prefix: prefix as RdfPrefix, + uri: uri as IriNamespace, + __inferred: true, + }; +} + +describe("PrefixLookup", () => { + it("should be created from an empty array", () => { + const lookup = PrefixLookup.fromArray([]); + expect(lookup.userPrefixes).toEqual([]); + expect(lookup.inferredPrefixes).toEqual([]); + }); + + it("should separate user and inferred prefixes", () => { + const user = userPrefix("foo", "http://foo.com/"); + const inferred = inferredPrefix("bar", "http://bar.com/"); + const lookup = PrefixLookup.fromArray([user, inferred]); + expect(lookup.userPrefixes).toEqual([user]); + expect(lookup.inferredPrefixes).toEqual([inferred]); + }); + + it("should include all inferred prefixes", () => { + const first = inferredPrefix("bar", "http://bar.com/"); + const second = inferredPrefix("empty", "http://empty.com/"); + const lookup = PrefixLookup.fromArray([first, second]); + expect(lookup.inferredPrefixes).toEqual([first, second]); + }); + + describe("findPrefix", () => { + it("should find a user prefix by namespace", () => { + const lookup = PrefixLookup.fromArray([ + userPrefix("foo", "http://foo.com/"), + ]); + expect(lookup.findPrefix("http://foo.com/" as IriNamespace)).toBe("foo"); + }); + + it("should find an inferred prefix by namespace", () => { + const lookup = PrefixLookup.fromArray([ + inferredPrefix("bar", "http://bar.com/"), + ]); + expect(lookup.findPrefix("http://bar.com/" as IriNamespace)).toBe("bar"); + }); + + it("should find a common prefix by namespace", () => { + const lookup = PrefixLookup.fromArray([]); + expect( + lookup.findPrefix("http://www.w3.org/2002/07/owl#" as IriNamespace), + ).toBe("owl"); + }); + + it("should match namespace case-insensitively", () => { + const lookup = PrefixLookup.fromArray([ + userPrefix("foo", "http://FOO.com/"), + ]); + expect(lookup.findPrefix("http://foo.com/" as IriNamespace)).toBe("foo"); + }); + + it("should return undefined when no prefix matches", () => { + const lookup = PrefixLookup.fromArray([]); + expect( + lookup.findPrefix("http://unknown.com/" as IriNamespace), + ).toBeUndefined(); + }); + + it("should prefer user prefix over common prefix", () => { + const lookup = PrefixLookup.fromArray([ + userPrefix("myowl", "http://www.w3.org/2002/07/owl#"), + ]); + expect( + lookup.findPrefix("http://www.w3.org/2002/07/owl#" as IriNamespace), + ).toBe("myowl"); + }); + + it("should prefer common prefix over inferred prefix", () => { + const lookup = PrefixLookup.fromArray([ + inferredPrefix("gen", "http://www.w3.org/2002/07/owl#"), + ]); + expect( + lookup.findPrefix("http://www.w3.org/2002/07/owl#" as IriNamespace), + ).toBe("owl"); + }); + }); +}); + +/** + * BACKWARD COMPATIBILITY — PERSISTED DATA + * + * PrefixTypeConfig is persisted to IndexedDB via localforage. Older versions + * stored a `__matches` property (Set) on inferred prefixes. That + * property has been removed from the type, but previously persisted data may + * still contain it. These tests verify that PrefixLookup continues to work + * correctly when given data in the old shape. + * + * DO NOT delete or weaken these tests without confirming that all persisted + * data has been migrated or that the old shape is no longer in the wild. + */ +describe("backward compatibility: legacy __matches property", () => { + it("should handle inferred prefix with legacy __matches Set", () => { + // Simulates data loaded from IndexedDB that was persisted before + // __matches was removed from PrefixTypeConfig. + const legacyPrefix = { + prefix: "soccer" as RdfPrefix, + uri: "http://www.example.com/soccer/ontology/" as IriNamespace, + __inferred: true, + __matches: new Set(["http://www.example.com/soccer/ontology/League"]), + } as PrefixTypeConfig; + + const lookup = PrefixLookup.fromArray([legacyPrefix]); + + expect(lookup.inferredPrefixes).toHaveLength(1); + expect( + lookup.findPrefix( + "http://www.example.com/soccer/ontology/" as IriNamespace, + ), + ).toBe("soccer"); + }); + + it("should handle mix of legacy and current prefix shapes", () => { + // Legacy inferred prefix with __matches (old shape) + const legacyInferred = { + prefix: "old" as RdfPrefix, + uri: "http://old.example.com/" as IriNamespace, + __inferred: true, + __matches: new Set(["http://old.example.com/Thing"]), + } as PrefixTypeConfig; + + // Current inferred prefix without __matches (new shape) + const currentInferred = inferredPrefix("new", "http://new.example.com/"); + + // User prefix (never had __matches) + const user = userPrefix("custom", "http://custom.example.com/"); + + const lookup = PrefixLookup.fromArray([ + legacyInferred, + currentInferred, + user, + ]); + + expect(lookup.inferredPrefixes).toHaveLength(2); + expect(lookup.userPrefixes).toHaveLength(1); + expect(lookup.findPrefix("http://old.example.com/" as IriNamespace)).toBe( + "old", + ); + expect(lookup.findPrefix("http://new.example.com/" as IriNamespace)).toBe( + "new", + ); + expect( + lookup.findPrefix("http://custom.example.com/" as IriNamespace), + ).toBe("custom"); + }); +}); diff --git a/packages/graph-explorer/src/utils/rdf/PrefixLookup.ts b/packages/graph-explorer/src/utils/rdf/PrefixLookup.ts new file mode 100644 index 000000000..ec1814e56 --- /dev/null +++ b/packages/graph-explorer/src/utils/rdf/PrefixLookup.ts @@ -0,0 +1,64 @@ +import type { PrefixTypeConfig } from "@/core/ConfigurationProvider/types"; + +import type { IriNamespace, NormalizedIriNamespace, RdfPrefix } from "./types"; + +import { + commonPrefixesByNamespace, + normalizeNamespace, +} from "./commonPrefixes"; + +/** + * Provides fast Map-based lookups over a PrefixTypeConfig array. + * + * Separates user-created and inferred prefixes and normalizes namespaces + * for case-insensitive matching. + */ +export class PrefixLookup { + private readonly userMap: Map; + private readonly inferredMap: Map; + + /** User-created prefixes (not inferred). */ + readonly userPrefixes: PrefixTypeConfig[]; + /** Inferred prefixes that have at least one match. */ + readonly inferredPrefixes: PrefixTypeConfig[]; + + private constructor(prefixes: PrefixTypeConfig[]) { + this.userMap = new Map(); + this.inferredMap = new Map(); + + for (const p of prefixes) { + const key = normalizeNamespace(p.uri); + if (p.__inferred) { + this.inferredMap.set(key, p); + } else { + this.userMap.set(key, p); + } + } + + this.userPrefixes = [...this.userMap.values()]; + this.inferredPrefixes = [...this.inferredMap.values()]; + } + + static fromArray(prefixes: PrefixTypeConfig[]): PrefixLookup { + return new PrefixLookup(prefixes); + } + + /** + * Finds the best matching prefix name for the given namespace. + * + * Priority order: user-created > common > inferred. + */ + findPrefix(namespace: IriNamespace): RdfPrefix | undefined { + const normalizedNamespace = normalizeNamespace(namespace); + + // Prefixes priority: + // 1. manually added (user) + // 2. common prefixes + // 3. automatically generated (inferred) + return ( + this.userMap.get(normalizedNamespace)?.prefix ?? + commonPrefixesByNamespace.get(normalizedNamespace) ?? + this.inferredMap.get(normalizedNamespace)?.prefix + ); + } +} diff --git a/packages/graph-explorer/src/utils/rdf/commonPrefixes.test.ts b/packages/graph-explorer/src/utils/rdf/commonPrefixes.test.ts new file mode 100644 index 000000000..ba945b9f1 --- /dev/null +++ b/packages/graph-explorer/src/utils/rdf/commonPrefixes.test.ts @@ -0,0 +1,59 @@ +import type { IriNamespace } from "./types"; + +import { + commonPrefixesByNamespace, + normalizeNamespace, +} from "./commonPrefixes"; + +describe("normalizeNamespace", () => { + it("lowercases the namespace", () => { + expect(normalizeNamespace("HTTP://Example.ORG/" as IriNamespace)).toBe( + "http://example.org/", + ); + }); + + it("trims whitespace", () => { + expect(normalizeNamespace(" http://example.org/ " as IriNamespace)).toBe( + "http://example.org/", + ); + }); + + it("handles already normalized namespaces", () => { + expect(normalizeNamespace("http://example.org/" as IriNamespace)).toBe( + "http://example.org/", + ); + }); +}); + +describe("commonPrefixesByNamespace", () => { + it("contains well-known RDF prefixes", () => { + expect( + commonPrefixesByNamespace.get( + normalizeNamespace("http://www.w3.org/2002/07/owl#" as IriNamespace), + ), + ).toBe("owl"); + expect( + commonPrefixesByNamespace.get( + normalizeNamespace( + "http://www.w3.org/1999/02/22-rdf-syntax-ns#" as IriNamespace, + ), + ), + ).toBe("rdf"); + }); + + it("matches case-insensitively via normalizeNamespace", () => { + expect( + commonPrefixesByNamespace.get( + normalizeNamespace("HTTP://WWW.W3.ORG/2002/07/OWL#" as IriNamespace), + ), + ).toBe("owl"); + }); + + it("returns undefined for unknown namespaces", () => { + expect( + commonPrefixesByNamespace.get( + normalizeNamespace("http://unknown.example.org/" as IriNamespace), + ), + ).toBeUndefined(); + }); +}); diff --git a/packages/graph-explorer/src/utils/rdf/commonPrefixes.ts b/packages/graph-explorer/src/utils/rdf/commonPrefixes.ts new file mode 100644 index 000000000..7cb771f98 --- /dev/null +++ b/packages/graph-explorer/src/utils/rdf/commonPrefixes.ts @@ -0,0 +1,21 @@ +import type { IriNamespace, NormalizedIriNamespace, RdfPrefix } from "./types"; + +import rawPrefixes from "./common-prefixes.json"; + +/** Lowercases and trims a namespace for case-insensitive map lookups. */ +export function normalizeNamespace( + namespace: IriNamespace, +): NormalizedIriNamespace { + return namespace.toLowerCase().trim() as NormalizedIriNamespace; +} + +/** Maps normalized namespace URI → prefix name for all common prefixes. */ +export const commonPrefixesByNamespace = new Map< + NormalizedIriNamespace, + RdfPrefix +>( + Object.entries(rawPrefixes).map(([prefix, uri]) => [ + normalizeNamespace(uri as IriNamespace), + prefix as RdfPrefix, + ]), +); diff --git a/packages/graph-explorer/src/utils/rdf/generatePrefix.test.ts b/packages/graph-explorer/src/utils/rdf/generatePrefix.test.ts new file mode 100644 index 000000000..d9477fbcc --- /dev/null +++ b/packages/graph-explorer/src/utils/rdf/generatePrefix.test.ts @@ -0,0 +1,228 @@ +import { generatePrefix } from "./generatePrefix"; + +describe("generatePrefix", () => { + it("returns null for non-IRI strings", () => { + expect(generatePrefix("not a url")).toBeNull(); + expect(generatePrefix("")).toBeNull(); + expect(generatePrefix("just-text")).toBeNull(); + }); + + it("returns null for IRIs without a namespace or value", () => { + expect(generatePrefix("http://example.org/")).toBeNull(); + expect(generatePrefix("http://example.org")).toBeNull(); + expect(generatePrefix("http://example.org/#")).toBeNull(); + }); + + it("returns null for non-URL schemes", () => { + expect(generatePrefix("urn:Person")).toBeNull(); + expect(generatePrefix("urn:isbn:1234567890")).toBeNull(); + expect(generatePrefix("mailto:example@abc.com")).toBeNull(); + }); + + it("returns null for file URIs", () => { + expect(generatePrefix("file://foo/bar.txt")).toBeNull(); + }); + + it("skips 'resource' path segment for hash IRIs", () => { + expect( + generatePrefix("http://www.example.com/soccer/resource#EPL"), + ).toEqual({ + namespace: "http://www.example.com/soccer/resource#", + value: "EPL", + prefix: "soccer", + }); + }); + + it("skips 'ontology' path segment for slash IRIs", () => { + expect( + generatePrefix("http://www.example.com/soccer/ontology/League"), + ).toEqual({ + namespace: "http://www.example.com/soccer/ontology/", + value: "League", + prefix: "soccer", + }); + }); + + it("generates a prefix for well-known RDF namespaces", () => { + expect( + generatePrefix("http://www.w3.org/2002/07/owl#ObjectProperty"), + ).toEqual({ + namespace: "http://www.w3.org/2002/07/owl#", + value: "ObjectProperty", + prefix: "owl", + }); + }); + + it("generates a prefix for rdf-schema hash IRI", () => { + expect( + generatePrefix("http://www.w3.org/2000/01/rdf-schema#subClassOf"), + ).toEqual({ + namespace: "http://www.w3.org/2000/01/rdf-schema#", + value: "subClassOf", + prefix: "rdf", + }); + }); + + it("generates a prefix from the host when namespace has only one path segment", () => { + expect(generatePrefix("http://www.schema.org/City")).toEqual({ + namespace: "http://www.schema.org/", + value: "City", + prefix: "schema", + }); + }); + + it("strips www from host-derived prefixes", () => { + const result = generatePrefix("http://www.schema.org/City"); + expect(result!.prefix).toBe("schema"); + }); + + it("skips 'ontology' for dbpedia slash IRI", () => { + expect(generatePrefix("https://dbpedia.org/ontology/endowment")).toEqual({ + namespace: "https://dbpedia.org/ontology/", + value: "endowment", + prefix: "dbpedia", + }); + }); + + it("skips 'class' and uses next meaningful segment", () => { + expect( + generatePrefix("https://dbpedia.org/class/yago/Record106647206"), + ).toEqual({ + namespace: "https://dbpedia.org/class/yago/", + value: "Record106647206", + prefix: "yago", + }); + }); + + it("skips 'resource' for dbpedia slash IRI", () => { + expect( + generatePrefix("https://dbpedia.org/resource/Qualifying_Rounds"), + ).toEqual({ + namespace: "https://dbpedia.org/resource/", + value: "Qualifying_Rounds", + prefix: "dbpedia", + }); + }); + + it("uses the hash path segment for hash IRIs with deep paths", () => { + expect(generatePrefix("https://example.org/ns/animals#Cat")).toEqual({ + namespace: "https://example.org/ns/animals#", + value: "Cat", + prefix: "nsa", + }); + }); + + it("does not truncate long path segments", () => { + expect( + generatePrefix("http://example.org/verylongsegmentnamehere/Thing"), + ).toEqual({ + namespace: "http://example.org/verylongsegmentnamehere/", + value: "Thing", + prefix: "verylongsegmentnamehere", + }); + }); + + it("removes special characters from the prefix", () => { + expect(generatePrefix("http://example.org/my-special_ns.v2/Item")).toEqual({ + namespace: "http://example.org/my-special_ns.v2/", + value: "Item", + prefix: "my", + }); + }); + + it("allows short prefixes", () => { + expect(generatePrefix("http://example.org/a/Thing")).toEqual({ + namespace: "http://example.org/a/", + value: "Thing", + prefix: "a", + }); + }); + + it("returns deterministic results for the same input", () => { + const a = generatePrefix("http://example.org/foo/Bar"); + const b = generatePrefix("http://example.org/foo/Bar"); + expect(a).toEqual(b); + }); + + it("handles hash IRIs with only host and no path segments", () => { + expect(generatePrefix("http://example.org/#value")).toEqual({ + namespace: "http://example.org/#", + value: "value", + prefix: "example", + }); + }); + + it("preserves original casing of the prefix", () => { + expect(generatePrefix("https://dbpedia.org/Ontology/endowment")).toEqual({ + namespace: "https://dbpedia.org/Ontology/", + value: "endowment", + prefix: "dbpedia", + }); + }); + + it("preserves casing for mixed-case path segments", () => { + expect(generatePrefix("http://example.org/MyNamespace/Thing")).toEqual({ + namespace: "http://example.org/MyNamespace/", + value: "Thing", + prefix: "My", + }); + }); + + it("preserves casing for hash IRIs", () => { + expect(generatePrefix("http://example.org/MyOntology#Person")).toEqual({ + namespace: "http://example.org/MyOntology#", + value: "Person", + prefix: "My", + }); + }); + + it("preserves uppercase host when used as prefix", () => { + expect(generatePrefix("http://ExAmPlE.org/Thing")).toEqual({ + namespace: "http://ExAmPlE.org/", + value: "Thing", + prefix: "ExAmPlE", + }); + }); + + it("uses first word of primary and abbreviates secondary segments", () => { + expect( + generatePrefix( + "http://kelvinlawrence.net/air-routes/datatypeProperty/name", + ), + ).toEqual({ + namespace: "http://kelvinlawrence.net/air-routes/datatypeProperty/", + value: "name", + prefix: "airdp", + }); + }); + + it("abbreviates camelCase secondary segments to initials", () => { + expect( + generatePrefix( + "http://kelvinlawrence.net/air-routes/objectProperty/route", + ), + ).toEqual({ + namespace: "http://kelvinlawrence.net/air-routes/objectProperty/", + value: "route", + prefix: "airop", + }); + }); + + it("uses only primary when secondary is ignored", () => { + expect( + generatePrefix("http://kelvinlawrence.net/air-routes/class/Airport"), + ).toEqual({ + namespace: "http://kelvinlawrence.net/air-routes/class/", + value: "Airport", + prefix: "air", + }); + }); + + it("skips numeric-only path segments", () => { + expect(generatePrefix("http://example.org/2024/01/schema#Thing")).toEqual({ + namespace: "http://example.org/2024/01/schema#", + value: "Thing", + prefix: "schema", + }); + }); +}); diff --git a/packages/graph-explorer/src/utils/rdf/generatePrefix.ts b/packages/graph-explorer/src/utils/rdf/generatePrefix.ts new file mode 100644 index 000000000..2783d036d --- /dev/null +++ b/packages/graph-explorer/src/utils/rdf/generatePrefix.ts @@ -0,0 +1,133 @@ +import type { GeneratedPrefix, IriNamespace, RdfPrefix } from "./types"; + +import { splitIri } from "./splitIri"; + +/** + * Generates a short prefix for a valid IRI. Returns `null` if the IRI is + * invalid or has no namespace/value. + * + * The first non-ignored path segment is the primary and contributes its first + * word (split on camelCase, hyphens, underscores, dots). Subsequent segments + * are abbreviated to their word initials. Ignored segments ("ontology", + * "resource", "class") and numeric-only segments are skipped. + * + * @example + * // Single path segment uses first word of the segment + * generatePrefix("http://example.org/MyNamespace/Thing") + * // => { namespace: "http://example.org/MyNamespace/", value: "Thing", prefix: "My" } + * + * @example + * // Multiple path segments: primary first word + secondary initials + * generatePrefix("http://kelvinlawrence.net/air-routes/datatypeProperty/name") + * // => { namespace: "http://kelvinlawrence.net/air-routes/datatypeProperty/", value: "name", prefix: "airdp" } + * + * @example + * // Hash IRIs with deep paths abbreviate secondary segments + * generatePrefix("https://example.org/ns/animals#Cat") + * // => { namespace: "https://example.org/ns/animals#", value: "Cat", prefix: "nsa" } + * + * @example + * // Ignored segments ("ontology", "resource", "class") are skipped + * generatePrefix("http://www.example.com/soccer/ontology/League") + * // => { namespace: "http://www.example.com/soccer/ontology/", value: "League", prefix: "soccer" } + * + * @example + * // Numeric-only path segments are skipped + * generatePrefix("http://example.org/2024/01/schema#Thing") + * // => { namespace: "http://example.org/2024/01/schema#", value: "Thing", prefix: "schema" } + * + * @example + * // Falls back to host when no meaningful path segments exist + * generatePrefix("http://example.org/#value") + * // => { namespace: "http://example.org/#", value: "value", prefix: "example" } + * + * @example + * // Returns null for invalid or incomplete IRIs + * generatePrefix("not a url") // => null + * generatePrefix("http://example.org/") // => null + */ +export function generatePrefix(iri: string): GeneratedPrefix | null { + const parts = splitIri(iri); + if (!parts) { + return null; + } + + const prefix = derivePrefixFromNamespace(parts.namespace); + + return { ...parts, prefix }; +} + +const IGNORED_SEGMENTS = new Set(["ontology", "resource", "class"]); +const NUMERIC_ONLY = /^\d+$/; +const CAMEL_CASE_BOUNDARY = /([a-z0-9])([A-Z])/g; +const WORD_SEPARATORS = /[-_.]+/; + +function derivePrefixFromNamespace(namespace: IriNamespace): RdfPrefix { + const afterScheme = stripScheme(namespace); + const host = extractHost(afterScheme); + const pathSegments = extractPathSegments(afterScheme); + const meaningful = pathSegments.filter( + s => !IGNORED_SEGMENTS.has(s.toLowerCase()) && !NUMERIC_ONLY.test(s), + ); + + if (meaningful.length === 0) { + return sanitizePrefix(host) as RdfPrefix; + } + + const primary = firstWordOf(meaningful[0]); + const secondaryAbbreviations = meaningful + .slice(1) + .map(s => abbreviate(s)) + .join(""); + + return (sanitizePrefix(primary) + + secondaryAbbreviations.toLowerCase()) as RdfPrefix; +} + +/** Returns the portion after `://`, or the original string if no scheme. */ +function stripScheme(namespace: string): string { + const doubleSlash = namespace.indexOf("//"); + return doubleSlash === -1 ? namespace : namespace.substring(doubleSlash + 2); +} + +/** Splits a segment on camelCase, hyphens, underscores, and dots. */ +function splitSegmentIntoWords(segment: string): string[] { + return segment + .replace(CAMEL_CASE_BOUNDARY, "$1-$2") + .split(WORD_SEPARATORS) + .filter(w => w.length > 0); +} + +/** Returns the first word of a compound segment (camelCase, hyphenated, etc). */ +function firstWordOf(segment: string): string { + const words = splitSegmentIntoWords(segment); + return words.length > 0 ? words[0] : segment; +} + +/** Abbreviates a segment to the first letter of each word. */ +function abbreviate(segment: string): string { + const words = splitSegmentIntoWords(segment); + if (words.length <= 1) { + return sanitizePrefix(segment.charAt(0)); + } + return words.map(w => w.charAt(0)).join(""); +} + +function extractPathSegments(afterScheme: string): string[] { + const pathStart = afterScheme.indexOf("/"); + if (pathStart === -1) { + return []; + } + const path = afterScheme.substring(pathStart + 1).replace(/[#/]$/, ""); + return path.split("/").filter(s => s.length > 0); +} + +function extractHost(afterScheme: string): string { + const hostEnd = afterScheme.indexOf("/"); + const host = hostEnd !== -1 ? afterScheme.substring(0, hostEnd) : afterScheme; + return host.replace(/^www\./i, "").replace(/\.[^.]+$/, ""); +} + +function sanitizePrefix(raw: string): string { + return raw.replace(/[^a-zA-Z0-9]/g, ""); +} diff --git a/packages/graph-explorer/src/utils/rdf/generatePrefixes.test.ts b/packages/graph-explorer/src/utils/rdf/generatePrefixes.test.ts index 80726acfc..89c46ff40 100644 --- a/packages/graph-explorer/src/utils/rdf/generatePrefixes.test.ts +++ b/packages/graph-explorer/src/utils/rdf/generatePrefixes.test.ts @@ -1,245 +1,234 @@ import type { PrefixTypeConfig } from "@/core"; import type { IriNamespace, RdfPrefix } from "@/utils/rdf"; -import generatePrefixes, { - generateHashPrefix, - generatePrefix, -} from "./generatePrefixes"; +import generatePrefixes from "./generatePrefixes"; +import { PrefixLookup } from "./PrefixLookup"; describe("generatePrefixes", () => { - it("should return null when nothing is updated", () => { - const existing: PrefixTypeConfig[] = [ + it("should return empty when all IRIs already have prefixes", () => { + const existing = PrefixLookup.fromArray([ { - prefix: "owl" as RdfPrefix, - uri: "https://www.w3.org/2002/07/owl#" as IriNamespace, - __matches: new Set(["https://www.w3.org/2002/07/owl#ObjectProperty"]), - }, - { - prefix: "rdf" as RdfPrefix, - uri: "https://www.w3.org/2000/01/rdf-schema#" as IriNamespace, - __matches: new Set([ - "https://www.w3.org/2000/01/rdf-schema#subClassOf", - ]), + prefix: "soccer" as RdfPrefix, + uri: "http://www.example.com/soccer/ontology/" as IriNamespace, + __inferred: true, }, - ]; - - const uris = new Set([ - "https://www.w3.org/2002/07/owl#ObjectProperty", - "https://www.w3.org/2000/01/rdf-schema#subClassOf", ]); - const result = generatePrefixes(uris, existing); + const result = generatePrefixes( + new Set(["http://www.example.com/soccer/ontology/League"]), + existing, + ); - expect(result).toBeNull(); + expect(result).toStrictEqual([]); }); - it("Should generate prefixes for URLs which contain a #", () => { - const urisWithConfig = { - "https://www.w3.org/2002/07/owl#ObjectProperty": { - prefix: "owl", - uri: "https://www.w3.org/2002/07/owl#", - }, - "https://www.w3.org/2000/01/rdf-schema#subClassOf": { - prefix: "rdf", - uri: "https://www.w3.org/2000/01/rdf-schema#", - }, - "https://www.w3.org/2007/05/powder-s#describedby": { - prefix: "pow", - uri: "https://www.w3.org/2007/05/powder-s#", - }, - "http://www.example.com/location/resource#London": { - prefix: "loc-r", - uri: "http://www.example.com/location/resource#", - }, - "http://www.example.com/soccer/resource#EPL": { - prefix: "soc-r", - uri: "http://www.example.com/soccer/resource#", - }, - "https://www.w3.org/ns/prov#wasDerivedFrom": { - prefix: "pro", - uri: "https://www.w3.org/ns/prov#", - }, - }; + it("should return empty when all IRIs match common prefixes", () => { + const result = generatePrefixes( + new Set(["http://www.w3.org/2002/07/owl#ObjectProperty"]), + PrefixLookup.fromArray([]), + ); + expect(result).toStrictEqual([]); + }); - Object.entries(urisWithConfig).forEach(([uri, config]) => { - const result = generateHashPrefix(new URL(uri)); - expect(result.prefix).toEqual(config.prefix); - expect(result.uri).toEqual(config.uri); - }); + it("should return empty when all IRIs match user prefixes", () => { + const result = generatePrefixes( + new Set(["https://dbpedia.org/resource/Qualifying_Rounds"]), + PrefixLookup.fromArray([ + { + prefix: "dbr" as RdfPrefix, + uri: "https://dbpedia.org/resource/" as IriNamespace, + }, + ]), + ); + expect(result).toStrictEqual([]); }); - it("Should generate prefixes for URLs which NOT contain a #", () => { - const urisWithConfig = { - "https://dbpedia.org/ontology/endowment": { - prefix: "dbp-o", - uri: "https://dbpedia.org/ontology/", - }, - "https://open.vocab.org/terms/describes": { - prefix: "ter", - uri: "https://open.vocab.org/terms/", - }, - "http://www.example.com/soccer/ontology/League": { - prefix: "soc", + it("should generate prefixes for unmatched namespaces only", () => { + const result = generatePrefixes( + new Set([ + "http://www.w3.org/2002/07/owl#ObjectProperty", + "http://www.example.com/soccer/ontology/League", + "http://www.example.com/soccer/resource#EPL", + ]), + PrefixLookup.fromArray([]), + ); + + expect(result).toStrictEqual([ + { + __inferred: true, + prefix: "soccer", uri: "http://www.example.com/soccer/ontology/", }, - "http://www.schema.org/City": { - prefix: "sch", - uri: "http://www.schema.org/", - }, - "https://dbpedia.org/resource/Qualifying_Rounds": { - prefix: "dbp-r", - uri: "https://dbpedia.org/resource/", - }, - "https://dbpedia.org/class/yago/Record106647206": { - prefix: "yag", - uri: "https://dbpedia.org/class/yago/", + { + __inferred: true, + prefix: "soccer2", + uri: "http://www.example.com/soccer/resource#", }, - }; - - Object.entries(urisWithConfig).forEach(([uri, config]) => { - const result = generatePrefix(new URL(uri)); - expect(result.prefix).toEqual(config.prefix); - expect(result.uri).toEqual(config.uri); - }); + ]); }); - it("Should generate only non-matching prefixes and update counts", () => { - const updatedPrefixes = generatePrefixes( + it("should append incrementing numerals for multiple collisions", () => { + const result = generatePrefixes( new Set([ - "https://www.w3.org/2002/07/owl#ObjectProperty", - "https://dbpedia.org/resource/Qualifying_Rounds", - "http://www.example.com/soccer/ontology/League", - "http://www.example.com/soccer/resource#EPL", - "http://www.example.com/location/resource#London", - "http://www.example.com/location/resource#Manchester", + "http://www.example.com/soccer/ontology/A", + "http://www.example.com/soccer/resource#B", + "http://www.example.com/soccer/class#C", ]), - [ - { - prefix: "owl" as RdfPrefix, - uri: "https://www.w3.org/2002/07/owl#" as IriNamespace, - }, - { - prefix: "dbr" as RdfPrefix, - uri: "https://dbpedia.org/resource/" as IriNamespace, - }, + PrefixLookup.fromArray([]), + ); + + const prefixes = result.map(r => r.prefix); + expect(prefixes).toStrictEqual(["soccer", "soccer2", "soccer3"]); + }); + + it("should avoid collisions with existing prefixes", () => { + const result = generatePrefixes( + new Set(["http://example.com/sport/resource#EPL"]), + PrefixLookup.fromArray([ { + prefix: "sport" as RdfPrefix, + uri: "http://other.com/sport/ontology/" as IriNamespace, __inferred: true, - prefix: "loc-r" as RdfPrefix, - uri: "http://www.example.com/location/resource#" as IriNamespace, - __matches: new Set([ - "http://www.example.com/location/resource#London", - "http://www.example.com/location/resource#Manchester", - ]), }, - ], + ]), ); - expect(updatedPrefixes).toHaveLength(5); - expect(updatedPrefixes?.[0]).toEqual({ - prefix: "owl", - uri: "https://www.w3.org/2002/07/owl#", - __matches: new Set(["https://www.w3.org/2002/07/owl#ObjectProperty"]), - }); - expect(updatedPrefixes?.[1]).toEqual({ - prefix: "dbr", - uri: "https://dbpedia.org/resource/", - __matches: new Set(["https://dbpedia.org/resource/Qualifying_Rounds"]), - }); - expect(updatedPrefixes?.[2]).toEqual({ - __inferred: true, - uri: "http://www.example.com/location/resource#", - prefix: "loc-r", - __matches: new Set([ - "http://www.example.com/location/resource#London", - "http://www.example.com/location/resource#Manchester", - ]), - }); - expect(updatedPrefixes?.[3]).toEqual({ - __inferred: true, - uri: "http://www.example.com/soccer/ontology/", - prefix: "soc", - __matches: new Set(["http://www.example.com/soccer/ontology/League"]), - }); - expect(updatedPrefixes?.[4]).toEqual({ - __inferred: true, - uri: "http://www.example.com/soccer/resource#", - prefix: "soc-r", - __matches: new Set(["http://www.example.com/soccer/resource#EPL"]), - }); + expect(result).toStrictEqual([ + { + __inferred: true, + prefix: "sport2", + uri: "http://example.com/sport/resource#", + }, + ]); }); - it("Should update existing prefixes when casing doesn't match", () => { - const updatedPrefixes = generatePrefixes( + it("should deduplicate namespaces within the batch", () => { + const result = generatePrefixes( new Set([ - "http://SecretSpyOrg/entity/quantity", - "http://SecretSpyOrg/entity/other", - "http://SecretSpyOrg/data/hasText", + "http://www.example.com/location/resource#London", + "http://www.example.com/location/resource#Manchester", ]), - [ - { - __inferred: true, - prefix: "ent" as RdfPrefix, - uri: "http://secretspyorg/entity/" as IriNamespace, - __matches: new Set(["http://SecretSpyOrg/entity/quantity"]), - }, - ], + PrefixLookup.fromArray([]), ); - expect(updatedPrefixes).toHaveLength(2); - expect(updatedPrefixes?.[0]).toEqual({ - __inferred: true, - uri: "http://secretspyorg/entity/", - prefix: "ent", - __matches: new Set([ - "http://SecretSpyOrg/entity/quantity", - "http://SecretSpyOrg/entity/other", - ]), - }); - expect(updatedPrefixes?.[1]).toEqual({ - __inferred: true, - uri: "http://secretspyorg/data/", - prefix: "dat", - __matches: new Set(["http://SecretSpyOrg/data/hasText"]), - }); + expect(result).toStrictEqual([ + { + __inferred: true, + prefix: "location", + uri: "http://www.example.com/location/resource#", + }, + ]); }); - it("should ignore file URIs since they don't have an origin", () => { - const updatedPrefixes = generatePrefixes( + it("should ignore file URIs", () => { + const result = generatePrefixes( new Set(["file://foo/bar.txt"]), - [], + PrefixLookup.fromArray([]), ); - - expect(updatedPrefixes).toBeNull(); + expect(result).toStrictEqual([]); }); - it("should ignore any non-path URIs", () => { - const updatedPrefixes = generatePrefixes( + it("should ignore non-path URIs", () => { + const result = generatePrefixes( new Set([ "urn:Person", - "urn:knows", - "urn:name", "urn:isbn:1234567890", "mailto:example@abc.com", "custom-scheme:foo", ]), - [], + PrefixLookup.fromArray([]), ); - - expect(updatedPrefixes).toBeNull(); + expect(result).toStrictEqual([]); }); - it("should handle any pathed URI", () => { - const updatedPrefixes = generatePrefixes( + it("should handle any pathed URI scheme", () => { + const result = generatePrefixes( new Set(["ftp://foo/bar.txt"]), - [], + PrefixLookup.fromArray([]), ); - - expect(updatedPrefixes).toEqual([ + expect(result).toStrictEqual([ { __inferred: true, - uri: "ftp://foo/", prefix: "foo", - __matches: new Set(["ftp://foo/bar.txt"]), + uri: "ftp://foo/", + }, + ]); + }); + + it("should match existing prefixes case-insensitively", () => { + const result = generatePrefixes( + new Set(["http://SecretSpyOrg/entity/quantity"]), + PrefixLookup.fromArray([ + { + __inferred: true, + prefix: "ent" as RdfPrefix, + uri: "http://secretspyorg/entity/" as IriNamespace, + }, + ]), + ); + + expect(result).toStrictEqual([]); + }); +}); + +/** + * BACKWARD COMPATIBILITY — PERSISTED DATA + * + * PrefixTypeConfig is persisted to IndexedDB via localforage. Older versions + * stored a `__matches` property (Set) on inferred prefixes. That + * property has been removed from the type, but previously persisted data may + * still contain it. These tests verify that generatePrefixes continues to work + * correctly when the PrefixLookup is built from data in the old shape. + * + * DO NOT delete or weaken these tests without confirming that all persisted + * data has been migrated or that the old shape is no longer in the wild. + */ +describe("backward compatibility: legacy __matches property", () => { + it("should recognize existing prefixes that have legacy __matches", () => { + // Simulates a PrefixLookup built from data loaded from IndexedDB that + // was persisted before __matches was removed from PrefixTypeConfig. + const legacyPrefixes = PrefixLookup.fromArray([ + { + prefix: "soccer" as RdfPrefix, + uri: "http://www.example.com/soccer/ontology/" as IriNamespace, + __inferred: true, + __matches: new Set(["http://www.example.com/soccer/ontology/League"]), + } as PrefixTypeConfig, + ]); + + const result = generatePrefixes( + new Set(["http://www.example.com/soccer/ontology/Team"]), + legacyPrefixes, + ); + + // Should not generate a new prefix because the namespace already exists + expect(result).toStrictEqual([]); + }); + + it("should generate new prefixes alongside legacy ones", () => { + const legacyPrefixes = PrefixLookup.fromArray([ + { + prefix: "soccer" as RdfPrefix, + uri: "http://www.example.com/soccer/ontology/" as IriNamespace, + __inferred: true, + __matches: new Set(["http://www.example.com/soccer/ontology/League"]), + } as PrefixTypeConfig, + ]); + + const result = generatePrefixes( + new Set([ + "http://www.example.com/soccer/ontology/Team", + "http://www.example.com/location/resource#London", + ]), + legacyPrefixes, + ); + + // Only the new namespace should produce a prefix + expect(result).toStrictEqual([ + { + __inferred: true, + prefix: "location", + uri: "http://www.example.com/location/resource#", }, ]); }); diff --git a/packages/graph-explorer/src/utils/rdf/generatePrefixes.ts b/packages/graph-explorer/src/utils/rdf/generatePrefixes.ts index 3e2d4e768..0f157121a 100644 --- a/packages/graph-explorer/src/utils/rdf/generatePrefixes.ts +++ b/packages/graph-explorer/src/utils/rdf/generatePrefixes.ts @@ -1,191 +1,75 @@ import type { PrefixTypeConfig } from "@/core"; -import type { IriNamespace, RdfPrefix } from "./types"; - -import commonPrefixes from "./common-prefixes.json"; - -// Create a map of the common prefixes -const commonPrefixesMap = toPrefixTypeConfigMap( - Object.entries(commonPrefixes).map(([prefix, uri]) => ({ - prefix: prefix as RdfPrefix, - uri: uri as IriNamespace, - })), -); - -/** Helper function to create a map of prefix configs from an array of configs. */ -function toPrefixTypeConfigMap( - configs: PrefixTypeConfig[], -): Map { - return new Map(configs.map(config => [normalizeUri(config.uri), config])); -} - -/** Converts URI to lowercase and trims leading and trailing whitespace. */ -function normalizeUri(uri: string) { - return uri.toLowerCase().trim(); -} - -/** Checks if the given string is a valid URL with a path. */ -function isUrl(str: string) { - try { - const url = new URL(str); +import type { PrefixLookup } from "./PrefixLookup"; +import type { RdfPrefix } from "./types"; +import type { NormalizedIriNamespace } from "./types"; + +import { normalizeNamespace } from "./commonPrefixes"; +import { generatePrefix } from "./generatePrefix"; +import { splitIri } from "./splitIri"; + +/** + * Finds namespaces in the given IRIs that have no matching prefix in the + * lookup, generates a prefix for each, and returns the new prefix configs. + * + * Returns an empty array when every IRI already has a matching prefix. + */ +export default function generatePrefixes( + iris: Set, + existingPrefixes: PrefixLookup, +): PrefixTypeConfig[] { + const newPrefixes = new Map(); + const usedNames = new Set( + [ + ...existingPrefixes.userPrefixes, + ...existingPrefixes.inferredPrefixes, + ].map(p => p.prefix), + ); - // Check for invalid origin (`urn:Person` has "null" origin) - if (url.origin.length === 0 || url.origin === "null") { - return false; + for (const iri of iris) { + const parts = splitIri(iri); + if (!parts) { + continue; } - // Must contain a path or a hash - return url.pathname.length > 0 || url.hash.length > 0; - } catch { - return false; - } -} + const normalizedNamespace = normalizeNamespace(parts.namespace); -/** Creates a prefix config from the given URI. */ -function createPrefixTypeConfig(uri: string): PrefixTypeConfig | null { - // Create a new prefix entry - try { - const url = new URL(uri); - let newPrefix: PrefixTypeConfig; - if (url.hash) { - newPrefix = generateHashPrefix(url); - } else { - newPrefix = generatePrefix(url); + // Already covered by an existing prefix + if (existingPrefixes.findPrefix(parts.namespace)) { + continue; } - return { - ...newPrefix, - __matches: new Set([uri]), - }; - } catch { - return null; - } -} - -export function generateHashPrefix( - url: URL, -): Omit { - const paths = url.pathname.replace(/^\//, "").split("/"); - let prefix; - - if ( - paths.length >= 2 && - (paths[paths.length - 1].toLowerCase() === "ontology" || - paths[paths.length - 1].toLowerCase() === "resource" || - paths[paths.length - 1].toLowerCase() === "class") - ) { - prefix = - paths[paths.length - 2].substring(0, 3) + - "-" + - paths[paths.length - 1].toLowerCase().substring(0, 1); - } else { - prefix = paths[paths.length - 1].substring(0, 3); - } - - return { - __inferred: true, - uri: url.href.replace(url.hash, "#") as IriNamespace, - prefix: prefix as RdfPrefix, - }; -} - -function prefixFromHost(host: string) { - return host.replace(/^(www\.)*/, "").substring(0, 3); -} - -export function generatePrefix(url: URL): Omit { - const paths = url.pathname.replace(/^\//, "").split("/"); + // Already generated in this batch + if (newPrefixes.has(normalizedNamespace)) { + continue; + } - if (paths.length === 1) { - const prefix = prefixFromHost(url.host); - return { - __inferred: true, - uri: (url.origin + "/") as IriNamespace, - prefix: prefix as RdfPrefix, - }; - } + const generated = generatePrefix(iri); + if (!generated) { + continue; + } - if ( - paths.length === 2 && - (paths[0].toLowerCase() === "ontology" || - paths[0].toLowerCase() === "resource" || - paths[0].toLowerCase() === "class") - ) { - const prefix = - prefixFromHost(url.host) + "-" + paths[0].toLowerCase().substring(0, 1); - const uriChunks = url.href.split("/"); - uriChunks.pop(); + const uniqueName = makeUnique(generated.prefix, usedNames); + usedNames.add(uniqueName); - return { + newPrefixes.set(normalizedNamespace, { __inferred: true, - uri: (uriChunks.join("/") + "/") as IriNamespace, - prefix: prefix as RdfPrefix, - }; - } - - const filteredPaths = paths.filter( - path => !["ontology", "resource", "class"].includes(path.toLowerCase()), - ); - filteredPaths.pop(); - if (filteredPaths.length === 0) { - const prefix = prefixFromHost(url.host); - return { - __inferred: true, - uri: (url.origin + "/") as IriNamespace, - prefix: prefix as RdfPrefix, - }; + prefix: uniqueName, + uri: generated.namespace, + }); } - const uriChunks = url.href.split("/"); - uriChunks.length = uriChunks.length - 1; - return { - __inferred: true, - uri: (uriChunks.join("/") + "/") as IriNamespace, - prefix: filteredPaths[0].substring(0, 3) as RdfPrefix, - }; + return newPrefixes.values().toArray(); } -function generatePrefixes( - uris: Set, - currentPrefixes: PrefixTypeConfig[], -) { - const updatedPrefixes = toPrefixTypeConfigMap(currentPrefixes); - let hasBeenUpdated = false; - uris - .values() - // Filter out non-URLs - .filter(isUrl) - // Create prefix config - .map(uri => createPrefixTypeConfig(uri)) - // Filter out prefix configs that failed to be created - .filter(newPrefix => newPrefix != null) - // Filter out common prefixes - .filter(prefix => !commonPrefixesMap.has(normalizeUri(prefix.uri))) - // Update the map of prefixes - .forEach(newPrefix => { - const existingPrefix = updatedPrefixes.get(newPrefix.uri); - const normalizedUri = normalizeUri(newPrefix.uri); - - if (!existingPrefix) { - // Create a new prefix entry - updatedPrefixes.set(normalizedUri, newPrefix); - hasBeenUpdated = true; - } else { - const set = existingPrefix.__matches ?? new Set(); - const matches = newPrefix.__matches ?? new Set(); - if (set.isDisjointFrom(matches)) { - existingPrefix.__matches = set.union(matches); - hasBeenUpdated = true; - } - } - }); - - // If nothing was updated, return null - if (!hasBeenUpdated) { - return null; +/** Appends an incrementing numeral to ensure the prefix name is unique. */ +function makeUnique(prefix: RdfPrefix, used: Set): RdfPrefix { + if (!used.has(prefix)) { + return prefix; } - - return updatedPrefixes.values().toArray(); + let i = 2; + while (used.has(`${prefix}${i}` as RdfPrefix)) { + i++; + } + return `${prefix}${i}` as RdfPrefix; } - -export default generatePrefixes; diff --git a/packages/graph-explorer/src/utils/rdf/index.ts b/packages/graph-explorer/src/utils/rdf/index.ts index 6535943c5..08f046de1 100644 --- a/packages/graph-explorer/src/utils/rdf/index.ts +++ b/packages/graph-explorer/src/utils/rdf/index.ts @@ -1,5 +1,6 @@ export { default as replacePrefixes } from "./replacePrefixes"; export { default as generatePrefixes } from "./generatePrefixes"; +export { PrefixLookup } from "./PrefixLookup"; export type { GeneratedPrefix, IriLocalValue, diff --git a/packages/graph-explorer/src/utils/rdf/replacePrefixes.test.ts b/packages/graph-explorer/src/utils/rdf/replacePrefixes.test.ts index 1d9609d1c..1c27b1e01 100644 --- a/packages/graph-explorer/src/utils/rdf/replacePrefixes.test.ts +++ b/packages/graph-explorer/src/utils/rdf/replacePrefixes.test.ts @@ -1,53 +1,142 @@ +import type { PrefixTypeConfig } from "@/core"; import type { IriNamespace, RdfPrefix } from "@/utils/rdf"; +import { PrefixLookup } from "./PrefixLookup"; import replacePrefixes from "./replacePrefixes"; +const emptyLookup = PrefixLookup.fromArray([]); + +function toLookup(prefixes: PrefixTypeConfig[]): PrefixLookup { + return PrefixLookup.fromArray(prefixes); +} + test("should do nothing when no URI is provided", () => { - const result = replacePrefixes(undefined); + const result = replacePrefixes(undefined, emptyLookup); expect(result).toBe(""); }); +test("should return the URI when it is not a valid IRI", () => { + expect(replacePrefixes("not-a-url", emptyLookup)).toBe("not-a-url"); + expect(replacePrefixes("just text", emptyLookup)).toBe("just text"); +}); + +test("should return the URI when no prefix matches", () => { + const result = replacePrefixes( + "http://unknown.example.org/ns/Thing", + emptyLookup, + ); + expect(result).toBe("http://unknown.example.org/ns/Thing"); +}); + test("should replace using common prefixes", () => { - const result = replacePrefixes("http://example.com/foo/bar"); - expect(result).toBe("meat:foo/bar"); + const result = replacePrefixes( + "http://www.w3.org/2002/07/owl#ObjectProperty", + emptyLookup, + ); + expect(result).toBe("owl:ObjectProperty"); }); test("should replace using custom prefixes", () => { - const result = replacePrefixes("http://example.com/foo/bar", [ - { - prefix: "foo" as RdfPrefix, - uri: "http://example.com/foo/" as IriNamespace, - }, - ]); + const result = replacePrefixes( + "http://example.com/foo/bar", + toLookup([ + { + prefix: "foo" as RdfPrefix, + uri: "http://example.com/foo/" as IriNamespace, + }, + ]), + ); expect(result).toBe("foo:bar"); }); test("should use generated prefixes", () => { - const result = replacePrefixes("http://foo.com/foo/bar", [ - { - prefix: "foo" as RdfPrefix, - uri: "http://foo.com/foo/" as IriNamespace, - __inferred: true, - __matches: new Set(["http://foo.com/foo/bar"]), - }, - ]); + const result = replacePrefixes( + "http://foo.com/foo/bar", + toLookup([ + { + prefix: "foo" as RdfPrefix, + uri: "http://foo.com/foo/" as IriNamespace, + __inferred: true, + }, + ]), + ); expect(result).toBe("foo:bar"); }); +test("should prefer custom prefixes over common prefixes", () => { + const result = replacePrefixes( + "http://www.w3.org/2002/07/owl#ObjectProperty", + toLookup([ + { + prefix: "myowl" as RdfPrefix, + uri: "http://www.w3.org/2002/07/owl#" as IriNamespace, + }, + ]), + ); + expect(result).toBe("myowl:ObjectProperty"); +}); + test("should prefer common prefixes over generated prefixes", () => { - const result = replacePrefixes("http://example.com/foo/bar", [ - { - prefix: "foo" as RdfPrefix, - uri: "http://example.com/foo/" as IriNamespace, - __inferred: true, - __matches: new Set(["http://example.com/foo/bar"]), - }, - ]); - expect(result).toBe("meat:foo/bar"); -}); - -test("should ignore case", () => { - expect(replacePrefixes("HTTP://example.com/foo/bar")).toBe("meat:foo/bar"); - expect(replacePrefixes("http://Example.COM/foo/bar")).toBe("meat:foo/bar"); - expect(replacePrefixes("http://example.com/Foo/Bar")).toBe("meat:Foo/Bar"); + const result = replacePrefixes( + "http://www.w3.org/2002/07/owl#ObjectProperty", + toLookup([ + { + prefix: "gen" as RdfPrefix, + uri: "http://www.w3.org/2002/07/owl#" as IriNamespace, + __inferred: true, + }, + ]), + ); + expect(result).toBe("owl:ObjectProperty"); +}); + +test("should match namespace case-insensitively", () => { + expect( + replacePrefixes( + "HTTP://www.w3.org/2002/07/owl#ObjectProperty", + emptyLookup, + ), + ).toBe("owl:ObjectProperty"); + expect( + replacePrefixes( + "http://WWW.W3.ORG/2002/07/owl#ObjectProperty", + emptyLookup, + ), + ).toBe("owl:ObjectProperty"); +}); + +test("should preserve the original casing of the local value", () => { + expect( + replacePrefixes( + "http://www.w3.org/2002/07/owl#objectproperty", + emptyLookup, + ), + ).toBe("owl:objectproperty"); +}); + +/** + * BACKWARD COMPATIBILITY — PERSISTED DATA + * + * PrefixTypeConfig is persisted to IndexedDB via localforage. Older versions + * stored a `__matches` property (Set) on inferred prefixes. That + * property has been removed from the type, but previously persisted data may + * still contain it. These tests verify that replacePrefixes continues to work + * correctly when given data in the old shape. + * + * DO NOT delete or weaken these tests without confirming that all persisted + * data has been migrated or that the old shape is no longer in the wild. + */ +test("should replace using legacy inferred prefix with __matches", () => { + const result = replacePrefixes( + "http://legacy.com/ns/Thing", + toLookup([ + { + prefix: "legacy" as RdfPrefix, + uri: "http://legacy.com/ns/" as IriNamespace, + __inferred: true, + __matches: new Set(["http://legacy.com/ns/Thing"]), + } as PrefixTypeConfig, + ]), + ); + expect(result).toBe("legacy:Thing"); }); diff --git a/packages/graph-explorer/src/utils/rdf/replacePrefixes.ts b/packages/graph-explorer/src/utils/rdf/replacePrefixes.ts index c0ac6b1ba..4b61b6253 100644 --- a/packages/graph-explorer/src/utils/rdf/replacePrefixes.ts +++ b/packages/graph-explorer/src/utils/rdf/replacePrefixes.ts @@ -1,52 +1,29 @@ -import type { PrefixTypeConfig } from "@/core"; - -import type { IriNamespace, RdfPrefix } from "./types"; - -import commonPrefixes from "./common-prefixes.json"; - -const commonPrefixesConfig: PrefixTypeConfig[] = Object.entries( - commonPrefixes, -).map(([prefix, uri]) => ({ - prefix: prefix as RdfPrefix, - uri: uri as IriNamespace, -})); - -const replacePrefixes = ( - uri?: string, - prefixes: PrefixTypeConfig[] = [], -): string => { +import type { PrefixLookup } from "./PrefixLookup"; + +import { splitIri } from "./splitIri"; + +/** + * Replaces the namespace portion of an IRI with a short prefix if one is found + * in the given lookup. Returns the original string when no match exists. + */ +export default function replacePrefixes( + uri: string | undefined, + prefixes: PrefixLookup, +): string { if (!uri) { return ""; } - // Prefixes priority: - // 1. manually added - // 2. common prefixes - // 3. automatically generated - const customPrefixes = prefixes.filter(p => !p.__inferred); - const generatedPrefixes = prefixes.filter( - p => p.__inferred === true && p.__matches && p.__matches.size > 0, - ); - const allPrefixes = [ - ...customPrefixes, - ...commonPrefixesConfig, - ...generatedPrefixes, - ]; + const iriParts = splitIri(uri); + if (!iriParts) { + return uri; + } - // Find matching prefix ignoring case - const prefixConfig = allPrefixes.find(prefixConfig => - uri.match(new RegExp(`^${prefixConfig.uri}`, "i")), - ); + const prefixMatch = prefixes.findPrefix(iriParts.namespace); - if (!prefixConfig) { + if (!prefixMatch) { return uri; } - // Replace the matching part of the URI with the prefix, ignoring case - return uri.replace( - new RegExp(`^${prefixConfig.uri}`, "i"), - `${prefixConfig.prefix}:`, - ); -}; - -export default replacePrefixes; + return `${prefixMatch}:${iriParts.value}`; +} diff --git a/packages/graph-explorer/src/utils/rdf/splitIri.test.ts b/packages/graph-explorer/src/utils/rdf/splitIri.test.ts new file mode 100644 index 000000000..0cf707887 --- /dev/null +++ b/packages/graph-explorer/src/utils/rdf/splitIri.test.ts @@ -0,0 +1,162 @@ +import { splitIri } from "./splitIri"; + +describe("splitIri", () => { + it("returns null for non-IRI strings", () => { + expect(splitIri("not a url")).toBeNull(); + expect(splitIri("just-text")).toBeNull(); + expect(splitIri("")).toBeNull(); + }); + + it("returns null for URIs without a path or fragment value", () => { + expect(splitIri("http://example.org/")).toBeNull(); + expect(splitIri("http://example.org")).toBeNull(); + expect(splitIri("http://example.org/#")).toBeNull(); + }); + + it("returns null for non-URL schemes like urn or mailto", () => { + expect(splitIri("urn:Person")).toBeNull(); + expect(splitIri("urn:isbn:1234567890")).toBeNull(); + expect(splitIri("mailto:example@abc.com")).toBeNull(); + expect(splitIri("custom-scheme:foo")).toBeNull(); + }); + + it("splits a hash IRI into namespace and value", () => { + expect(splitIri("http://example.org/ontology#Person")).toEqual({ + namespace: "http://example.org/ontology#", + value: "Person", + }); + expect(splitIri("https://www.w3.org/2002/07/owl#ObjectProperty")).toEqual({ + namespace: "https://www.w3.org/2002/07/owl#", + value: "ObjectProperty", + }); + }); + + it("splits a slash IRI into namespace and value", () => { + expect(splitIri("http://example.org/resource/London")).toEqual({ + namespace: "http://example.org/resource/", + value: "London", + }); + expect(splitIri("https://dbpedia.org/ontology/endowment")).toEqual({ + namespace: "https://dbpedia.org/ontology/", + value: "endowment", + }); + }); + + it("handles IRIs with multiple path segments", () => { + expect(splitIri("https://dbpedia.org/class/yago/Record106647206")).toEqual({ + namespace: "https://dbpedia.org/class/yago/", + value: "Record106647206", + }); + }); + + it("returns null when hash fragment is empty", () => { + expect(splitIri("http://example.org/ontology#")).toBeNull(); + }); + + it("returns null for file URIs", () => { + expect(splitIri("file://foo/bar.txt")).toBeNull(); + }); + + it("handles hash IRIs with multiple hash characters by splitting on the first", () => { + expect(splitIri("http://example.org/ontology#foo#bar")).toEqual({ + namespace: "http://example.org/ontology#", + value: "foo#bar", + }); + }); + + it("splits a hash IRI with no path segments", () => { + expect(splitIri("http://example.org#value")).toEqual({ + namespace: "http://example.org#", + value: "value", + }); + }); + + it("preserves original casing in namespace and value", () => { + expect(splitIri("HTTP://Example.ORG/Ontology#Person")).toEqual({ + namespace: "HTTP://Example.ORG/Ontology#", + value: "Person", + }); + }); + + it("handles FTP scheme", () => { + expect(splitIri("ftp://example.org/resource/Thing")).toEqual({ + namespace: "ftp://example.org/resource/", + value: "Thing", + }); + }); + + describe("local value validation", () => { + it("accepts underscores, hyphens, and periods", () => { + expect(splitIri("http://example.org/ns/my_item")).toEqual({ + namespace: "http://example.org/ns/", + value: "my_item", + }); + expect(splitIri("http://example.org/ns/my-item")).toEqual({ + namespace: "http://example.org/ns/", + value: "my-item", + }); + expect(splitIri("http://example.org/ns/v2.0")).toEqual({ + namespace: "http://example.org/ns/", + value: "v2.0", + }); + }); + + it("accepts percent-encoded sequences", () => { + expect(splitIri("http://example.org/ns/caf%C3%A9")).toEqual({ + namespace: "http://example.org/ns/", + value: "caf%C3%A9", + }); + }); + + it("accepts Unicode letters", () => { + expect(splitIri("http://example.org/ns/café")).toEqual({ + namespace: "http://example.org/ns/", + value: "café", + }); + }); + + it("accepts middle dot", () => { + expect(splitIri("http://example.org/ns/item·1")).toEqual({ + namespace: "http://example.org/ns/", + value: "item·1", + }); + }); + + it("rejects local values with spaces", () => { + expect(splitIri("http://example.org/ns/my item")).toBeNull(); + }); + + it("rejects local values with angle brackets", () => { + expect(splitIri("http://example.org/ns/ab")).toBeNull(); + }); + + it("rejects local values with curly braces", () => { + expect(splitIri("http://example.org/ns/a{b}")).toBeNull(); + }); + + it("rejects local values with pipes", () => { + expect(splitIri("http://example.org/ns/a|b")).toBeNull(); + }); + + it("rejects local values with carets", () => { + expect(splitIri("http://example.org/ns/a^b")).toBeNull(); + }); + + it("rejects local values with backticks", () => { + expect(splitIri("http://example.org/ns/a`b")).toBeNull(); + }); + + it("rejects local values with backslashes", () => { + expect(splitIri("http://example.org/ns/a\\b")).toBeNull(); + }); + + it("validates hash IRI local values", () => { + expect(splitIri("http://example.org/ns#valid-name")).toEqual({ + namespace: "http://example.org/ns#", + value: "valid-name", + }); + expect(splitIri("http://example.org/ns#invalid name")).toBeNull(); + }); + }); +}); diff --git a/packages/graph-explorer/src/utils/rdf/splitIri.ts b/packages/graph-explorer/src/utils/rdf/splitIri.ts new file mode 100644 index 000000000..68216041b --- /dev/null +++ b/packages/graph-explorer/src/utils/rdf/splitIri.ts @@ -0,0 +1,80 @@ +import type { IriLocalValue, IriNamespace, IriParts } from "./types"; + +/** + * Splits an IRI string into its namespace and local value. + * + * For hash IRIs like `http://example.org/ontology#Person`, the namespace is + * `http://example.org/ontology#` and the value is `Person`. + * + * For slash IRIs like `http://example.org/resource/London`, the namespace is + * `http://example.org/resource/` and the value is `London`. + * + * The local value is validated against a permissive subset of the Turtle + * `PN_LOCAL` production. Allowed characters include letters, digits, + * underscores, hyphens, periods, middle dots (`·`), percent-encoded sequences + * (`%XX`), and common Unicode letters. Values containing spaces, angle + * brackets, curly braces, pipes, carets, or backticks are rejected. + * + * Returns `null` if the string is not a valid IRI, has no namespace/value, + * or the local value contains invalid characters. + */ +export function splitIri(iri: string): IriParts | null { + let url: URL; + try { + url = new URL(iri); + } catch { + return null; + } + + // Reject URIs without a real origin (e.g. urn:, mailto:, custom-scheme:) + if (!url.origin || url.origin === "null") { + return null; + } + + // Reject file URIs + if (url.protocol === "file:") { + return null; + } + + // Hash IRI: split on the first `#` + const hashIndex = iri.indexOf("#"); + if (hashIndex !== -1) { + const value = iri.substring(hashIndex + 1); + if (!value || !isValidLocalValue(value)) { + return null; + } + const namespace = iri.substring(0, hashIndex + 1); + return { + namespace: namespace as IriNamespace, + value: value as IriLocalValue, + }; + } + + // Slash IRI: need a path beyond just "/" + if (url.pathname === "/" || url.pathname === "") { + return null; + } + + const lastSlash = iri.lastIndexOf("/"); + const value = iri.substring(lastSlash + 1); + if (!value || !isValidLocalValue(value)) { + return null; + } + + const namespace = iri.substring(0, lastSlash + 1); + return { + namespace: namespace as IriNamespace, + value: value as IriLocalValue, + }; +} + +/** + * Validates a local value against a permissive subset of the Turtle PN_LOCAL + * production. Rejects values containing characters that are not valid in + * prefixed names: spaces, angle brackets, curly braces, pipes, carets, + * backticks, and other control/delimiter characters. + */ +const INVALID_LOCAL_VALUE = /[\s<>{}|^`\\]/; +function isValidLocalValue(value: string): boolean { + return !INVALID_LOCAL_VALUE.test(value); +} diff --git a/packages/graph-explorer/src/utils/saveConfigurationToFile.test.ts b/packages/graph-explorer/src/utils/saveConfigurationToFile.test.ts index e160a59b6..b1a5ada43 100644 --- a/packages/graph-explorer/src/utils/saveConfigurationToFile.test.ts +++ b/packages/graph-explorer/src/utils/saveConfigurationToFile.test.ts @@ -170,7 +170,7 @@ describe("saveConfigurationToFile", () => { expect(parsed.schema.lastUpdate).toBe("2024-01-01T12:30:00.000Z"); }); - it("should convert prefix __matches Set to Array", async () => { + it("should export prefixes without internal properties", async () => { const config: ConfigurationContextProps = { ...createRandomRawConfiguration(), schema: { @@ -180,12 +180,10 @@ describe("saveConfigurationToFile", () => { { prefix: "rdf" as RdfPrefix, uri: "http://www.w3.org/1999/02/22-rdf-syntax-ns#" as IriNamespace, - __matches: new Set(["http://www.w3.org/1999/02/22-rdf-syntax-ns#"]), }, { prefix: "rdfs" as RdfPrefix, uri: "http://www.w3.org/2000/01/rdf-schema#" as IriNamespace, - __matches: new Set(["http://www.w3.org/2000/01/rdf-schema#"]), }, ], totalVertices: 0, @@ -205,13 +203,16 @@ describe("saveConfigurationToFile", () => { const text = await (blob as Blob).text(); const parsed = JSON.parse(text); - expect(Array.isArray(parsed.schema.prefixes[0].__matches)).toBe(true); - expect(parsed.schema.prefixes[0].__matches).toContain( - "http://www.w3.org/1999/02/22-rdf-syntax-ns#", - ); - expect(parsed.schema.prefixes[1].__matches).toContain( - "http://www.w3.org/2000/01/rdf-schema#", - ); + expect(parsed.schema.prefixes).toStrictEqual([ + { + prefix: "rdf", + uri: "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + }, + { + prefix: "rdfs", + uri: "http://www.w3.org/2000/01/rdf-schema#", + }, + ]); }); it("should handle empty schema", async () => { diff --git a/packages/graph-explorer/src/utils/saveConfigurationToFile.ts b/packages/graph-explorer/src/utils/saveConfigurationToFile.ts index df65b0feb..a0b633fa8 100644 --- a/packages/graph-explorer/src/utils/saveConfigurationToFile.ts +++ b/packages/graph-explorer/src/utils/saveConfigurationToFile.ts @@ -15,10 +15,7 @@ const saveConfigurationToFile = (config: ConfigurationContextProps) => { schema: { vertices: config.schema?.vertices || [], edges: config.schema?.edges || [], - prefixes: config?.schema?.prefixes?.map(prefix => ({ - ...prefix, - __matches: Array.from(prefix.__matches || []), - })), + prefixes: config?.schema?.prefixes, lastUpdate: config.schema?.lastUpdate?.toISOString(), edgeConnections: config.schema?.edgeConnections, },