From 0509cb79c0326a55f0cd9c427de6fb7aac9e1db0 Mon Sep 17 00:00:00 2001 From: Florian Nachtigall Date: Thu, 16 Oct 2025 15:18:12 +0200 Subject: [PATCH 1/6] Add data theme definitions table to /guides - Added `theme-definitions.json` with structured descriptions of all data themes (based on the [Definitions Proposal](https://lf-overturemaps.atlassian.net/wiki/spaces/PROJ/pages/353927169/Definitions+Proposal)) - Added `theme-definitions.mdx` page under /guides to display the themes table - Introduced `ThemesTable` React component for rendering and styling the table Note: Theme definitions are still incomplete and may contain errors. --- docs/guides/theme-definitions.json | 614 +++++++++++++++++++++++++++++ docs/guides/theme-definitions.mdx | 16 + sidebars.js | 1 + src/components/themesTable.js | 536 +++++++++++++++++++++++++ 4 files changed, 1167 insertions(+) create mode 100644 docs/guides/theme-definitions.json create mode 100644 docs/guides/theme-definitions.mdx create mode 100644 src/components/themesTable.js diff --git a/docs/guides/theme-definitions.json b/docs/guides/theme-definitions.json new file mode 100644 index 00000000..dddd0eb5 --- /dev/null +++ b/docs/guides/theme-definitions.json @@ -0,0 +1,614 @@ +{ + "themes": { + "Places": { + "brief_description": "Concrete, physically identifiable, stationary destinations.", + "licenses": [ + { + "name": "CDLA Permissive 2.0.", + "url": "https://cdla.dev/permissive-2-0/" + }, + { + "name": "Apache 2.0.", + "url": "https://www.apache.org/licenses/LICENSE-2.0" + } + ], + "sources": [ + { + "name": "Meta", + "freshness": "", + "type": "commercial", + "url": "" + }, + { + "name": "Foursquare", + "freshness": "", + "type": "commercial", + "url": "" + }, + { + "name": "Microsoft", + "freshness": "", + "type": "commercial", + "url": "" + }, + { + "name": "PinMeTo", + "freshness": "", + "type": "commercial", + "url": "" + } + ], + "gers": { + "gersified": { + "flag": true, + "note": "" + }, + "bridge_files": { + "flag": true, + "note": "" + }, + "registry": { + "flag": true, + "note": "" + }, + "data_changelog": { + "flag": true, + "note": "" + } + }, + "signal_confidence_score": { + "flag": true, + "note": "" + }, + "excluded_by_design": [ + "Concrete and physically identifiable: to exclude divisions and addresses", + "Stationary: to exclude noise from UGC datasets for things like food carts or vehicles/boats/aircraft", + "Destination: to exclude bus stops and train platforms and other intermediate waypoints", + "Private: we only include places that do not include PII" + ], + "freshness": { + "release_frequency": "monthly", + "last_updated": "" + }, + "quality_assurance": { + "coverage_summary": "Global", + "coverage": [ + "Global", + "US ~73%" + ], + "quality_summary": "Duplicates", + "quality": [ + "Duplicates", + "High junk rate", + "Low attribute completeness" + ], + "violations": [] + }, + "filtering": { + "summary": "None, except existence `confidence` > 0.2", + "location": [ + "" + ], + "topological": [], + "geometrical": [], + "properties": [ + "`confidence` > 0.2" + ], + "others": [] + }, + "matching": { + "summary": "ML-based matching with clustering", + "logic": [ + "Identifying potential matching pairs based on quadkey", + "ML-based matching based on attribute similarity" + ], + "properties": [ + "`name`", + "`address`", + "`phone number`", + "`house number`", + "`website`", + "`spatial distance`" + ] + }, + "merging": { + "summary": "Promotion of single source; no merging of attributes", + "logic": [ + "Clustering of matched places", + "Promotion of place from source with the highest match count", + "No merging of attributes between matches" + ], + "constraints": [] + } + }, + "Buildings": { + "brief_description": "Permanent human-made structures with a roof.", + "licenses": [ + { + "name": "ODbL", + "url": "https://opendatacommons.org/licenses/odbl/" + } + ], + "sources": [ + { + "name": "OSM", + "freshness": "monthly", + "type": "community", + "url": "https://osm.org/" + }, + { + "name": "Esri", + "freshness": "biannually", + "type": "community", + "url": "https://communitymaps.arcgis.com/home" + }, + { + "name": "Vancouver", + "freshness": "biannually", + "type": "authoritative", + "url": "https://opendata.vancouver.ca/" + }, + { + "name": "IGN Spain", + "freshness": "one-time ingestion", + "type": "authoritative", + "url": "https://www.ign.es/" + }, + { + "name": "Microsoft", + "freshness": "irregularly", + "type": "ML-derived", + "url": "https://github.com/microsoft/GlobalMLBuildingFootprints" + }, + { + "name": "Google", + "freshness": "one-time ingestion", + "type": "ML-derived", + "url": "https://sites.research.google/open-buildings/" + }, + { + "name": "East Asian countries", + "freshness": "one-time ingestion", + "type": "ML-derived", + "url": "https://zenodo.org/records/8174931" + } + ], + "gers": { + "gersified": { + "flag": true, + "note": "only building, not building_part" + }, + "bridge_files": { + "flag": true, + "note": "One-to-one matches only" + }, + "registry": { + "flag": true, + "note": "" + }, + "data_changelog": { + "flag": true, + "note": "" + } + }, + "signal_confidence_score": { + "flag": false, + "note": "" + }, + "excluded_by_design": [ + "Features that are well defined in other themes. Examples:", + "Physical “regions”", + "Places of business" + ], + "freshness": { + "release_frequency": "monthly", + "last_updated": "" + }, + "quality_assurance": { + "coverage_summary": "Global", + "coverage": "Global", + "quality_summary": "Lower footprint precision in ML-derived sources", + "quality": [ + "Lower footprint precision in Global South due to high share of ML-derived buildings" + ], + "violations": [ + "Pre-match violations: `building_tiny`, `building_large`, `building_huge`, `building_invalid_geometry`, `building_duplicate_record_id`.", + "Post-merge violations: `building_transportation_intersection`, `building_water_intersection`, `building_invalid_area`, `building_too_many_small_angles`" + ] + }, + "filtering": { + "summary": "Overlap allowed within source, but not between sources", + "location": [ + "Buildings in water" + ], + "topological": [ + "Overlap allowed within source, but not between sources" + ], + "geometrical": [ + "Geometry identical to source", + "(Multi)Polygons with too many sharp angles are excluded", + "Footprint area > 10m for ML-derived sources" + ], + "properties": [ + "`height` < 900m" + ], + "others": [] + }, + "matching": { + "summary": "Geometric similarity (Intersection-over-Union > 0.5)", + "logic": "Intersection-over-Union > 0.5", + "properties": [ + "`geometry`" + ] + }, + "merging": { + "summary": "Hierarchical non-overlapping spatial merge", + "logic": [ + "Hierarchical merging of non-overlapping footprints", + "Merging of building height attributes between matches" + ], + "constraints": [ + "No spatial overlap for footprint merging", + "Intersection-over-Union > 0.5 for attribute merging" + ] + } + }, + "Addresses": { + "brief_description": "Unique geographic points representing a physical address location.", + "licenses": [ + { + "name": "Various licenses", + "url": "https://docs.overturemaps.org/attribution/#addresses" + } + ], + "sources": [ + { + "name": "OpenAddresses", + "freshness": "", + "type": "community", + "url": "https://openaddresses.io/" + }, + { + "name": "AddressForAll", + "freshness": "", + "type": "community", + "url": "https://www.addressforall.org/en/" + }, + { + "name": "City of New York", + "freshness": "", + "type": "authoritative", + "url": "https://data.cityofnewyork.us/City-Government/NYC-Address-Points/g6pj-hd8k" + }, + { + "name": "U.S. Department of Transportation", + "freshness": "", + "type": "authoritative", + "url": "https://www.transportation.gov/gis/national-address-database" + } + ], + "gers": { + "gersified": { + "flag": false, + "note": "" + }, + "bridge_files": { + "flag": false, + "note": "" + }, + "registry": { + "flag": true, + "note": "" + }, + "data_changelog": { + "flag": true, + "note": "" + } + }, + "signal_confidence_score": { + "flag": false, + "note": "" + }, + "excluded_by_design": [ + "Open, navigable spaces like fields, parks, or oceans" + ], + "freshness": { + "release_frequency": "", + "last_updated": "" + }, + "quality_assurance": { + "coverage_summary": "37 countries", + "coverage": [ + "Coverage in 37 countries", + "Several countries with partial coverage: US, Germany, Taiwan", + "Datasets have varying levels of completeness in their attributes. A dataset may be missing postcodes or only have partial coverage for `address_levels` for example." + ], + "quality_summary": "Duplicates", + "quality": [ + "Duplicates" + ], + "violations": [] + }, + "filtering": { + "summary": "", + "location": [ + "" + ], + "topological": [], + "geometrical": [], + "properties": [], + "others": [] + }, + "matching": { + "summary": "", + "logic": "n/a (subtypes within a country have a single source)", + "properties": [] + }, + "merging": { + "summary": "", + "logic": "", + "constraints": [] + } + }, + "Transportation": { + "brief_description": "Traversable path segments (roads, railways, trails) or connectors (road intersections).", + "licenses": [ + { + "name": "ODbL", + "url": "https://opendatacommons.org/licenses/odbl/" + } + ], + "sources": [ + { + "name": "OSM", + "freshness": "", + "type": "community", + "url": "https://osm.org/" + }, + { + "name": "TomTom", + "freshness": "", + "type": "commercial", + "url": "https://www.tomtom.com/" + } + ], + "gers": { + "gersified": { + "flag": true, + "note": "" + }, + "bridge_files": { + "flag": true, + "note": "" + }, + "registry": { + "flag": true, + "note": "" + }, + "data_changelog": { + "flag": true, + "note": "" + } + }, + "signal_confidence_score": { + "flag": false, + "note": "" + }, + "excluded_by_design": [ + "Aerial paths, such as flight paths or geostationary satellite orbits", + "Paths traversed by continuous entities: oil pipelines, electric lines" + ], + "freshness": { + "release_frequency": "monthly", + "last_updated": "" + }, + "quality_assurance": { + "coverage_summary": "Global", + "coverage": [ + "The network is matched against TomTom’s (internally), any features in TomTom MNR that do not match are used as a signal indicating a missing road.", + "TomTom also performs GPS trace matching on the network." + ], + "quality_summary": "", + "quality": [ + "Presence of navigational islands" + ], + "violations": [] + }, + "filtering": { + "summary": "Deduplication of nodes", + "location": [], + "topological": [], + "geometrical": [], + "properties": [], + "others": [ + "Deduplication of nodes" + ] + }, + "matching": { + "summary": "", + "logic": "n/a (single source)", + "properties": [] + }, + "merging": { + "summary": "", + "logic": "n/a (single source)", + "constraints": [] + } + }, + "Divisions": { + "brief_description": "Recognized areas for governance, culture, or organization.", + "licenses": [ + { + "name": "ODbL", + "url": "https://opendatacommons.org/licenses/odbl" + } + ], + "sources": [ + { + "name": "OSM", + "freshness": "", + "type": "community", + "url": "https://osm.org/" + }, + { + "name": "geoBoundaries", + "freshness": "", + "type": "community", + "url": "https://www.geoboundaries.org/" + } + ], + "gers": { + "gersified": { + "flag": true, + "note": "" + }, + "bridge_files": { + "flag": true, + "note": "" + }, + "registry": { + "flag": true, + "note": "" + }, + "data_changelog": { + "flag": true, + "note": "" + } + }, + "signal_confidence_score": { + "flag": false, + "note": "" + }, + "excluded_by_design": [ + "Non unique features: An address could relate to multiple Places for example but there should not be more than one address point with the same values." + ], + "freshness": { + "release_frequency": "", + "last_updated": "" + }, + "quality_assurance": { + "coverage_summary": "Global", + "coverage": [ + "Coverage generally aligns with admin_level tags in OSM and geoboundaries datasets.", + "Global coverage of country, dependency, region, and county.", + "Macroregion and macrocounty should be present, but are miscategorized as other subtypes.", + "Subtypes below county (locality, borough. neighborhood, microhood) should be present in every country, but coverage is often spotty." + ], + "quality_summary": "Minor macroregion issues; sub-county coverage spotty", + "quality": [], + "violations": [] + }, + "filtering": { + "summary": "Deduplication; overlap not allowed for countries", + "location": [ + "" + ], + "topological": [ + "Overlap allowed at lower subtypes (ex locality), not allowed in others (ex country)" + ], + "geometrical": [], + "properties": [], + "others": [ + "Deduplication" + ] + }, + "matching": { + "summary": "", + "logic": "", + "properties": [] + }, + "merging": { + "summary": "", + "logic": "", + "constraints": [] + } + }, + "Base": { + "brief_description": "Foundational layers such as land, water, infrastructure, and bathymetry.", + "licenses": [ + { + "name": "ODbL", + "url": "https://opendatacommons.org/licenses/odbl/" + } + ], + "sources": [ + { + "name": "Daylight Coastlines (OSM)", + "freshness": "", + "type": "community", + "url": "https://daylightmap.org/coastlines.html" + }, + { + "name": "ETOPO1", + "freshness": "", + "type": "community", + "url": "https://www.ncei.noaa.gov/products/etopo-global-relief-model" + }, + { + "name": "GLOBathy", + "freshness": "", + "type": "ML-derived", + "url": "https://www.nature.com/articles/s41597-022-01132-9" + }, + { + "name": "ESA WorldCover", + "freshness": "", + "type": "ML-derived", + "url": "https://esa-worldcover.org/en" + } + ], + "gers": { + "gersified": { + "flag": false, + "note": "" + }, + "bridge_files": { + "flag": false, + "note": "" + }, + "registry": { + "flag": false, + "note": "" + }, + "data_changelog": { + "flag": true, + "note": "" + } + }, + "signal_confidence_score": { + "flag": false, + "note": "" + }, + "excluded_by_design": [], + "freshness": { + "release_frequency": "monthly", + "last_updated": "" + }, + "quality_assurance": { + "coverage_summary": "Global", + "coverage": "Features in base are not considered to be their own entities, so non-bathymetry coverage is just basic features from OSM with all the pass through tags.", + "quality_summary": "Derived from OSM tags", + "quality": [], + "violations": [] + }, + "filtering": { + "summary": "Tag-based filtering (non-bathymetry)", + "location": [], + "topological": [], + "geometrical": [], + "properties": [], + "others": [] + }, + "matching": { + "summary": "", + "logic": "n/a (types are single source)", + "properties": [] + }, + "merging": { + "summary": "", + "logic": "n/a (types are single source)", + "constraints": [] + } + } + } +} \ No newline at end of file diff --git a/docs/guides/theme-definitions.mdx b/docs/guides/theme-definitions.mdx new file mode 100644 index 00000000..53c5f2c2 --- /dev/null +++ b/docs/guides/theme-definitions.mdx @@ -0,0 +1,16 @@ +--- +title: Theme Definitions +description: An overview of theme definitions and characteristics +--- + +import data from '@site/docs/guides/theme-definitions.json'; +import ThemesTable from '@site/src/components/themesTable'; + +# Data Theme Definitions + +An overview of each data theme and its key properties. + + + diff --git a/sidebars.js b/sidebars.js index d6dae5ca..abf3c739 100644 --- a/sidebars.js +++ b/sidebars.js @@ -57,6 +57,7 @@ const sidebars = { }, collapsed: true, items: [ + 'guides/theme-definitions', 'guides/addresses', 'guides/base', 'guides/buildings', diff --git a/src/components/themesTable.js b/src/components/themesTable.js new file mode 100644 index 00000000..333d2e0b --- /dev/null +++ b/src/components/themesTable.js @@ -0,0 +1,536 @@ +import React, { useRef, memo, useMemo, useState, useEffect } from "react"; +import YAMLFileResolver from "@site/src/components/shared-libs/yamlFileResolver"; + +const SCHEMA_GROUPS = { + Places: ["places/place.yaml"], + Addresses: ["addresses/address.yaml"], + Buildings: ["buildings/building.yaml", "buildings/building_part.yaml"], + Transportation: ["transportation/segment.yaml", "transportation/connector.yaml"], + Divisions: [ + "divisions/division.yaml", + "divisions/division_area.yaml", + "divisions/division_boundary.yaml", + ], + Base: [ + "base/bathymetry.yaml", + "base/land.yaml", + "base/land_use.yaml", + "base/land_cover.yaml", + "base/infrastructure.yaml", + ], +}; + +const useIsOverflowing = (text, lines) => { + const ref = useRef(null); + const [isOverflowing, setIsOverflowing] = useState(false); + + useEffect(() => { + const el = ref.current; + if (!el) return; + + const lineHeight = parseFloat(getComputedStyle(el).lineHeight || 20); + const maxHeight = lineHeight * lines; + const checkOverflow = () => setIsOverflowing(el.scrollHeight > maxHeight + 1); + + checkOverflow(); + window.addEventListener("resize", checkOverflow); + return () => window.removeEventListener("resize", checkOverflow); + }, [text, lines]); + + return [ref, isOverflowing]; +}; + +const getCroppedCellStyle = (cropped, lines) => ({ + position: "relative", + overflow: cropped ? "hidden" : "visible", + maxHeight: cropped ? `${1.6 * lines}em` : "none", + display: "-webkit-box", + WebkitBoxOrient: "vertical", + WebkitLineClamp: cropped ? lines : "unset", + textOverflow: cropped ? "ellipsis" : "clip", + whiteSpace: "pre-line", + cursor: "pointer", + transition: "max-height 0.3s ease, mask-image 0.2s ease", + ...(cropped && { + maskImage: "linear-gradient(to bottom, black 75%, transparent 100%)", + WebkitMaskImage: "linear-gradient(to bottom, black 75%, transparent 100%)", + }), +}); + +const CroppedText = memo(({ text, expanded, lines = 10 }) => { + const [ref, isOverflowing] = useIsOverflowing(text, lines); + const cropped = !expanded && isOverflowing; + + return ( +
+ {text || "—"} +
+ ); +}); + + +const renderCodeSpans = (text) => { + const parts = String(text).split(/(`[^`]+`)/g); + return parts.map((part, i) => { + if (part.startsWith("`") && part.endsWith("`")) { + return ( + + {part.slice(1, -1)} + + ); + } + return {part}; + }); +}; + +const getGeometryTypes = (schemas) => { + const extract = (geom) => { + if (!geom) return []; + const items = geom.oneOf || geom.allOf || [geom]; + return items + .map((g) => g.$ref?.match(/([A-Za-z]+)\.json$/)?.[1] || "Unknown") + .filter(Boolean); + }; + + return [...new Set(schemas.flatMap(s => extract(s?.properties?.geometry)))]; +}; + +export default function ThemesTable({ data }) { + const themes = data.themes; + const [expandedTheme, setExpandedTheme] = useState(null); + const [schemas, setSchemas] = useState({}); + const parsedSchemas = useMemo(() => schemas, [schemas]); + + useEffect(() => { + const resolver = YAMLFileResolver(); + const allPaths = Object.values(SCHEMA_GROUPS).flat(); + + Promise.all(allPaths.map((p) => resolver.resolve(p))).then((loaded) => { + const result = {}; + let index = 0; + for (const [group, paths] of Object.entries(SCHEMA_GROUPS)) { + result[group] = paths.map(() => loaded[index++]); + } + setSchemas(result); + }); + }, []); + + + const th = { + borderBottom: "2px solid #ccc", + textAlign: "left", + padding: "6px 8px", + backgroundColor: "#f7f7f7", + fontWeight: 600, + fontSize: "0.85rem", + }; + + const td = { + borderBottom: "1px solid #eee", + padding: "6px 8px", + verticalAlign: "top", + fontSize: "0.88rem", + }; + + const toggleExpand = (themeName) => + setExpandedTheme(expandedTheme === themeName ? null : themeName); + + const checkIcon = (val, hasNote = false) => { + if (val) { + return hasNote ? "☑️" : "✅" + } + return "❌" + }; + + const FlagCell = ({ item }) => { + const note = item?.note || "" + const hasNote = Boolean(note) + + return ( + + {checkIcon(item?.flag, hasNote)} + + ) + }; + + const renderList = (arr) => { + if (!Array.isArray(arr) || arr.length === 0) return ; + + return ( + + ); + }; + + const renderNameWithUrl = (name, url, strong = false) => { + const label = name || url || "—"; + + if (url) { + return ( + (e.target.style.textDecoration = "underline")} + onMouseOut={(e) => (e.target.style.textDecoration = "none")} + > + {label} + + ); + } + + return strong ? {label} : {label}; + }; + + const renderSources = (sources) => { + if (!Array.isArray(sources) || sources.length === 0) return ; + + return ( + + ); + }; + + return ( +
+ + + + + + + + + + + + + + + + + + + + + + + + {Object.entries(themes).map(([name, theme]) => { + const schemas = parsedSchemas[name] || []; + const geometries = getGeometryTypes(schemas); + const shortDef = theme.brief_description || "—"; + const coverage = theme.quality_assurance?.coverage_summary || "—"; + const quality = theme.quality_assurance?.quality_summary || "—"; + const filteringSummary = theme.filtering?.summary || "—"; + const matchingSummary = theme.matching?.summary || "—"; + const mergingSummary = theme.merging?.summary || "—"; + const gers = theme.gers || {}; + const freshnessText = theme.freshness?.release_frequency || ""; + + return ( + + toggleExpand(name)} + > + + + + + + + + + + + + + + + + + + + {expandedTheme === name && ( + + + + )} + + ); + })} + +
+ 💡 Click a row to view detailed theme definition. +
ThemeDescriptionCoverageQualityRelease FrequencyLicensesSourcesGERSRegistryChangelogBridge FilesConfidence ScoreGeometry TypesFilteringMatchingMerging
+ {name} + {shortDef}{coverage}{quality}{freshnessText}{renderList(theme.licenses || [])} + + {geometries.join(", ") || "—"} + + + + + +
+ +
+
+ ); +} + +function ExpandedThemeDetails({ theme, schemas }) { + const blockStyle = { marginBottom: "1.2rem" }; + + const boxGrid = { + display: "grid", + gap: "1rem", + gridTemplateColumns: "repeat(auto-fit, minmax(280px, 1fr))", + marginTop: "1.2rem", + marginBottom: "2rem", + }; + + const box = { + border: "1px solid #d9f0f2", + backgroundColor: "#f7fcfc", + padding: "0.9rem 1.1rem", + fontSize: "0.9rem", + }; + + const boxTitle = { + fontWeight: "600", + fontSize: "0.95rem", + marginBottom: "0.4rem", + textTransform: "capitalize", + }; + + const renderList = (arr) => ( + + ); + + const renderKeyValue = (obj) => { + if (!obj || typeof obj !== "object") return ; + + const entries = Object.entries(obj).filter( + ([key]) => !key.toLowerCase().includes("summary") + ); + if (entries.length === 0) return ; + + const capitalize = (s) => + s.charAt(0).toUpperCase() + s.slice(1).replace(/_/g, " "); + + return ( + + ); + }; + + + const renderSchemaSummaries = (schemas) => { + if (!schemas || schemas.length === 0) return null; + return ( +
+
Data Types
+
+ {schemas.map((schema, idx) => { + const props = schema.properties?.properties?.properties || {}; + const geometries = getGeometryTypes([schema]); + + return ( +
+
+ {schema.title} +
+
+ {schema.description} +
+
+ Geometry: {geometries.join(", ") || "—"} +
+ {Object.keys(props).length > 0 && ( +
+ + Show properties + +
    + {Object.entries(props).map(([k, v]) => ( +
  • + {k} — {v.description || "—"} + + {Array.isArray(v.enum) && v.enum.length > 0 && ( +
    + + Show {v.enum.length} values + +
    + {v.enum.join(", ")} +
    +
    + )} +
  • + ))} +
+
+ )} +
+ ); + })} +
+
+ ); + }; + + return ( +
+ {renderSchemaSummaries(schemas)} + + {theme.excluded_by_design && ( +
+
+ Excluded by Design +
+ {renderList(theme.excluded_by_design)} +
+ )} + + {theme.quality_assurance && ( +
+
+ Quality Assurance +
+ {renderKeyValue(theme.quality_assurance)} +
+ )} + +
+ {theme.filtering && ( +
+
Filtering
+ {renderKeyValue(theme.filtering)} +
+ )} + {theme.matching && ( +
+
Matching
+ {renderKeyValue(theme.matching)} +
+ )} + {theme.merging && ( +
+
Merging
+ {renderKeyValue(theme.merging)} +
+ )} +
+
+ ); +} From eb62cb1f408ce0a7a5f638aa0c2509ca58a26e85 Mon Sep 17 00:00:00 2001 From: Florian Nachtigall Date: Mon, 24 Nov 2025 18:53:42 +0100 Subject: [PATCH 2/6] Update theme definitions - Fix license name formatting - Update address definition (GERSified, coverage, quality) - Correct any fields transferred incorrectly from the wiki - Render merging summary as list --- docs/guides/theme-definitions.json | 65 +++++++++++++++++------------- src/components/themesTable.js | 4 +- 2 files changed, 39 insertions(+), 30 deletions(-) diff --git a/docs/guides/theme-definitions.json b/docs/guides/theme-definitions.json index dddd0eb5..556b1f00 100644 --- a/docs/guides/theme-definitions.json +++ b/docs/guides/theme-definitions.json @@ -4,7 +4,7 @@ "brief_description": "Concrete, physically identifiable, stationary destinations.", "licenses": [ { - "name": "CDLA Permissive 2.0.", + "name": "CDLA Permissive 2.0", "url": "https://cdla.dev/permissive-2-0/" }, { @@ -112,7 +112,10 @@ ] }, "merging": { - "summary": "Promotion of single source; no merging of attributes", + "summary": [ + "Promotion of single source for matched places", + "No merging of attributes" + ], "logic": [ "Clustering of matched places", "Promotion of place from source with the highest match count", @@ -242,9 +245,12 @@ ] }, "merging": { - "summary": "Hierarchical non-overlapping spatial merge", - "logic": [ + "summary": [ "Hierarchical merging of non-overlapping footprints", + "Merging of height attributes between matches" + ], + "logic": [ + "Hierarchical merging of non-overlapping building footprints", "Merging of building height attributes between matches" ], "constraints": [ @@ -289,7 +295,7 @@ ], "gers": { "gersified": { - "flag": false, + "flag": true, "note": "" }, "bridge_files": { @@ -310,43 +316,47 @@ "note": "" }, "excluded_by_design": [ - "Open, navigable spaces like fields, parks, or oceans" ], "freshness": { - "release_frequency": "", + "release_frequency": "monthly", "last_updated": "" }, "quality_assurance": { - "coverage_summary": "37 countries", + "coverage_summary": "39 countries", "coverage": [ - "Coverage in 37 countries", + "Coverage in 39 countries", "Several countries with partial coverage: US, Germany, Taiwan", "Datasets have varying levels of completeness in their attributes. A dataset may be missing postcodes or only have partial coverage for `address_levels` for example." ], - "quality_summary": "Duplicates", + "quality_summary": "Limited GERS ID stability and variable point location accuracy", "quality": [ - "Duplicates" + "Address point locations vary from dataset to dataset but most often represent either building centroids, building entrances, points on road, or parcel centroids.", + "GERS IDs are not very stable because of identical matching. Any change to an attribute or location will result in a new GERS ID." ], "violations": [] }, "filtering": { "summary": "", - "location": [ - "" - ], + "location": [], "topological": [], "geometrical": [], "properties": [], "others": [] }, "matching": { - "summary": "", - "logic": "n/a (subtypes within a country have a single source)", - "properties": [] + "summary": "Exact matching", + "logic": "Matching based on exact point geometry and address properties", + "properties": ["All properties incl. geometry"] }, "merging": { - "summary": "", - "logic": "", + "summary": [ + "Promotion of single source for matched addresses", + "No merging of attributes" + ], + "logic": [ + "All addresses, except for perfect duplicates/matches, are released", + "No merging of attributes between sources" + ], "constraints": [] } }, @@ -395,6 +405,7 @@ "note": "" }, "excluded_by_design": [ + "Open, navigable spaces like fields, parks, or oceans", "Aerial paths, such as flight paths or geostationary satellite orbits", "Paths traversed by continuous entities: oil pipelines, electric lines" ], @@ -430,7 +441,7 @@ "properties": [] }, "merging": { - "summary": "", + "summary": [], "logic": "n/a (single source)", "constraints": [] } @@ -479,11 +490,9 @@ "flag": false, "note": "" }, - "excluded_by_design": [ - "Non unique features: An address could relate to multiple Places for example but there should not be more than one address point with the same values." - ], + "excluded_by_design": [], "freshness": { - "release_frequency": "", + "release_frequency": "monthly", "last_updated": "" }, "quality_assurance": { @@ -518,7 +527,7 @@ "properties": [] }, "merging": { - "summary": "", + "summary": [], "logic": "", "constraints": [] } @@ -601,12 +610,12 @@ }, "matching": { "summary": "", - "logic": "n/a (types are single source)", + "logic": "n/a (single source)", "properties": [] }, "merging": { - "summary": "", - "logic": "n/a (types are single source)", + "summary": [], + "logic": "n/a (single source)", "constraints": [] } } diff --git a/src/components/themesTable.js b/src/components/themesTable.js index 333d2e0b..70c87fe8 100644 --- a/src/components/themesTable.js +++ b/src/components/themesTable.js @@ -294,7 +294,7 @@ export default function ThemesTable({ data }) { - + @@ -351,7 +351,7 @@ function ExpandedThemeDetails({ theme, schemas }) { const renderList = (arr) => (
    {arr.map((item, i) => ( -
  • {item}
  • +
  • {renderCodeSpans(item)}
  • ))}
); From 8540c19cb8a73efb6ded56b0ee08f8d2eda4b6b0 Mon Sep 17 00:00:00 2001 From: Dana Bauer Date: Thu, 4 Dec 2025 12:01:17 -0500 Subject: [PATCH 3/6] update theme definitions table structure, styles, and data --- docs/guides/index.mdx | 16 +- docs/guides/theme-definitions.mdx | 16 - sidebars.js | 1 - src/components/themesTable.js | 536 ----------- static/theme-definitions-table.html | 894 ++++++++++++++++++ .../theme_definitions.json | 585 ++++++------ 6 files changed, 1230 insertions(+), 818 deletions(-) delete mode 100644 docs/guides/theme-definitions.mdx delete mode 100644 src/components/themesTable.js create mode 100644 static/theme-definitions-table.html rename docs/guides/theme-definitions.json => static/theme_definitions.json (84%) diff --git a/docs/guides/index.mdx b/docs/guides/index.mdx index 74d6081f..bddcb457 100644 --- a/docs/guides/index.mdx +++ b/docs/guides/index.mdx @@ -1,7 +1,21 @@ --- +id: index +slug: /guides/ title: Data Guides +sidebar_label: Overview +description: Overture Maps data theme documentation --- import DocCardList from '@theme/DocCardList'; - +## Theme Definitions + + + +## Theme Guides + + \ No newline at end of file diff --git a/docs/guides/theme-definitions.mdx b/docs/guides/theme-definitions.mdx deleted file mode 100644 index 53c5f2c2..00000000 --- a/docs/guides/theme-definitions.mdx +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: Theme Definitions -description: An overview of theme definitions and characteristics ---- - -import data from '@site/docs/guides/theme-definitions.json'; -import ThemesTable from '@site/src/components/themesTable'; - -# Data Theme Definitions - -An overview of each data theme and its key properties. - - - diff --git a/sidebars.js b/sidebars.js index abf3c739..d6dae5ca 100644 --- a/sidebars.js +++ b/sidebars.js @@ -57,7 +57,6 @@ const sidebars = { }, collapsed: true, items: [ - 'guides/theme-definitions', 'guides/addresses', 'guides/base', 'guides/buildings', diff --git a/src/components/themesTable.js b/src/components/themesTable.js deleted file mode 100644 index 70c87fe8..00000000 --- a/src/components/themesTable.js +++ /dev/null @@ -1,536 +0,0 @@ -import React, { useRef, memo, useMemo, useState, useEffect } from "react"; -import YAMLFileResolver from "@site/src/components/shared-libs/yamlFileResolver"; - -const SCHEMA_GROUPS = { - Places: ["places/place.yaml"], - Addresses: ["addresses/address.yaml"], - Buildings: ["buildings/building.yaml", "buildings/building_part.yaml"], - Transportation: ["transportation/segment.yaml", "transportation/connector.yaml"], - Divisions: [ - "divisions/division.yaml", - "divisions/division_area.yaml", - "divisions/division_boundary.yaml", - ], - Base: [ - "base/bathymetry.yaml", - "base/land.yaml", - "base/land_use.yaml", - "base/land_cover.yaml", - "base/infrastructure.yaml", - ], -}; - -const useIsOverflowing = (text, lines) => { - const ref = useRef(null); - const [isOverflowing, setIsOverflowing] = useState(false); - - useEffect(() => { - const el = ref.current; - if (!el) return; - - const lineHeight = parseFloat(getComputedStyle(el).lineHeight || 20); - const maxHeight = lineHeight * lines; - const checkOverflow = () => setIsOverflowing(el.scrollHeight > maxHeight + 1); - - checkOverflow(); - window.addEventListener("resize", checkOverflow); - return () => window.removeEventListener("resize", checkOverflow); - }, [text, lines]); - - return [ref, isOverflowing]; -}; - -const getCroppedCellStyle = (cropped, lines) => ({ - position: "relative", - overflow: cropped ? "hidden" : "visible", - maxHeight: cropped ? `${1.6 * lines}em` : "none", - display: "-webkit-box", - WebkitBoxOrient: "vertical", - WebkitLineClamp: cropped ? lines : "unset", - textOverflow: cropped ? "ellipsis" : "clip", - whiteSpace: "pre-line", - cursor: "pointer", - transition: "max-height 0.3s ease, mask-image 0.2s ease", - ...(cropped && { - maskImage: "linear-gradient(to bottom, black 75%, transparent 100%)", - WebkitMaskImage: "linear-gradient(to bottom, black 75%, transparent 100%)", - }), -}); - -const CroppedText = memo(({ text, expanded, lines = 10 }) => { - const [ref, isOverflowing] = useIsOverflowing(text, lines); - const cropped = !expanded && isOverflowing; - - return ( -
- {text || "—"} -
- ); -}); - - -const renderCodeSpans = (text) => { - const parts = String(text).split(/(`[^`]+`)/g); - return parts.map((part, i) => { - if (part.startsWith("`") && part.endsWith("`")) { - return ( - - {part.slice(1, -1)} - - ); - } - return {part}; - }); -}; - -const getGeometryTypes = (schemas) => { - const extract = (geom) => { - if (!geom) return []; - const items = geom.oneOf || geom.allOf || [geom]; - return items - .map((g) => g.$ref?.match(/([A-Za-z]+)\.json$/)?.[1] || "Unknown") - .filter(Boolean); - }; - - return [...new Set(schemas.flatMap(s => extract(s?.properties?.geometry)))]; -}; - -export default function ThemesTable({ data }) { - const themes = data.themes; - const [expandedTheme, setExpandedTheme] = useState(null); - const [schemas, setSchemas] = useState({}); - const parsedSchemas = useMemo(() => schemas, [schemas]); - - useEffect(() => { - const resolver = YAMLFileResolver(); - const allPaths = Object.values(SCHEMA_GROUPS).flat(); - - Promise.all(allPaths.map((p) => resolver.resolve(p))).then((loaded) => { - const result = {}; - let index = 0; - for (const [group, paths] of Object.entries(SCHEMA_GROUPS)) { - result[group] = paths.map(() => loaded[index++]); - } - setSchemas(result); - }); - }, []); - - - const th = { - borderBottom: "2px solid #ccc", - textAlign: "left", - padding: "6px 8px", - backgroundColor: "#f7f7f7", - fontWeight: 600, - fontSize: "0.85rem", - }; - - const td = { - borderBottom: "1px solid #eee", - padding: "6px 8px", - verticalAlign: "top", - fontSize: "0.88rem", - }; - - const toggleExpand = (themeName) => - setExpandedTheme(expandedTheme === themeName ? null : themeName); - - const checkIcon = (val, hasNote = false) => { - if (val) { - return hasNote ? "☑️" : "✅" - } - return "❌" - }; - - const FlagCell = ({ item }) => { - const note = item?.note || "" - const hasNote = Boolean(note) - - return ( - - {checkIcon(item?.flag, hasNote)} - - ) - }; - - const renderList = (arr) => { - if (!Array.isArray(arr) || arr.length === 0) return ; - - return ( -
    - {arr.map((item, i) => { - if (typeof item === "object" && item !== null) { - return
  • {renderNameWithUrl(item.name, item.url)}
  • ; - } - return
  • {item}
  • ; - })} -
- ); - }; - - const renderNameWithUrl = (name, url, strong = false) => { - const label = name || url || "—"; - - if (url) { - return ( - (e.target.style.textDecoration = "underline")} - onMouseOut={(e) => (e.target.style.textDecoration = "none")} - > - {label} - - ); - } - - return strong ? {label} : {label}; - }; - - const renderSources = (sources) => { - if (!Array.isArray(sources) || sources.length === 0) return ; - - return ( -
    - {sources.map((src, i) => ( -
  • - {renderNameWithUrl(src.name, src.url, true)} -
    - {src.type ? `${src.type}` : "—"} - {src.freshness ? ` • ${src.freshness}` : ""} -
    -
  • - ))} -
- ); - }; - - return ( -
- - - - - - - - - - - - - - - - - - - - - - - - {Object.entries(themes).map(([name, theme]) => { - const schemas = parsedSchemas[name] || []; - const geometries = getGeometryTypes(schemas); - const shortDef = theme.brief_description || "—"; - const coverage = theme.quality_assurance?.coverage_summary || "—"; - const quality = theme.quality_assurance?.quality_summary || "—"; - const filteringSummary = theme.filtering?.summary || "—"; - const matchingSummary = theme.matching?.summary || "—"; - const mergingSummary = theme.merging?.summary || "—"; - const gers = theme.gers || {}; - const freshnessText = theme.freshness?.release_frequency || ""; - - return ( - - toggleExpand(name)} - > - - - - - - - - - - - - - - - - - - - {expandedTheme === name && ( - - - - )} - - ); - })} - -
- 💡 Click a row to view detailed theme definition. -
ThemeDescriptionCoverageQualityRelease FrequencyLicensesSourcesGERSRegistryChangelogBridge FilesConfidence ScoreGeometry TypesFilteringMatchingMerging
- {name} - {shortDef}{coverage}{quality}{freshnessText}{renderList(theme.licenses || [])} - - {geometries.join(", ") || "—"} - - - - - -
- -
-
- ); -} - -function ExpandedThemeDetails({ theme, schemas }) { - const blockStyle = { marginBottom: "1.2rem" }; - - const boxGrid = { - display: "grid", - gap: "1rem", - gridTemplateColumns: "repeat(auto-fit, minmax(280px, 1fr))", - marginTop: "1.2rem", - marginBottom: "2rem", - }; - - const box = { - border: "1px solid #d9f0f2", - backgroundColor: "#f7fcfc", - padding: "0.9rem 1.1rem", - fontSize: "0.9rem", - }; - - const boxTitle = { - fontWeight: "600", - fontSize: "0.95rem", - marginBottom: "0.4rem", - textTransform: "capitalize", - }; - - const renderList = (arr) => ( -
    - {arr.map((item, i) => ( -
  • {renderCodeSpans(item)}
  • - ))} -
- ); - - const renderKeyValue = (obj) => { - if (!obj || typeof obj !== "object") return ; - - const entries = Object.entries(obj).filter( - ([key]) => !key.toLowerCase().includes("summary") - ); - if (entries.length === 0) return ; - - const capitalize = (s) => - s.charAt(0).toUpperCase() + s.slice(1).replace(/_/g, " "); - - return ( -
    - {entries.map(([key, value]) => { - if (value == null) return null; - - let displayValue; - - if (Array.isArray(value)) { - const items = value.filter(Boolean); - displayValue = - items.length > 0 ? ( -
      - {items.map((v, i) => ( -
    • - {typeof v === "object" - ? renderKeyValue(v) - : renderCodeSpans(v)} -
    • - ))} -
    - ) : ( - - ); - } - else if (typeof value === "object") { - displayValue = renderKeyValue(value); - } - else { - displayValue = - value !== "" ? renderCodeSpans(String(value)) : ; - } - - return ( -
  • - {capitalize(key)}: {displayValue} -
  • - ); - })} -
- ); - }; - - - const renderSchemaSummaries = (schemas) => { - if (!schemas || schemas.length === 0) return null; - return ( -
-
Data Types
-
- {schemas.map((schema, idx) => { - const props = schema.properties?.properties?.properties || {}; - const geometries = getGeometryTypes([schema]); - - return ( -
-
- {schema.title} -
-
- {schema.description} -
-
- Geometry: {geometries.join(", ") || "—"} -
- {Object.keys(props).length > 0 && ( -
- - Show properties - -
    - {Object.entries(props).map(([k, v]) => ( -
  • - {k} — {v.description || "—"} - - {Array.isArray(v.enum) && v.enum.length > 0 && ( -
    - - Show {v.enum.length} values - -
    - {v.enum.join(", ")} -
    -
    - )} -
  • - ))} -
-
- )} -
- ); - })} -
-
- ); - }; - - return ( -
- {renderSchemaSummaries(schemas)} - - {theme.excluded_by_design && ( -
-
- Excluded by Design -
- {renderList(theme.excluded_by_design)} -
- )} - - {theme.quality_assurance && ( -
-
- Quality Assurance -
- {renderKeyValue(theme.quality_assurance)} -
- )} - -
- {theme.filtering && ( -
-
Filtering
- {renderKeyValue(theme.filtering)} -
- )} - {theme.matching && ( -
-
Matching
- {renderKeyValue(theme.matching)} -
- )} - {theme.merging && ( -
-
Merging
- {renderKeyValue(theme.merging)} -
- )} -
-
- ); -} diff --git a/static/theme-definitions-table.html b/static/theme-definitions-table.html new file mode 100644 index 00000000..a03c1713 --- /dev/null +++ b/static/theme-definitions-table.html @@ -0,0 +1,894 @@ + + + + + + Overture Maps - Theme Definitions Table + + + +
+
+

Theme Definitions Table

+ ← Back to Data Guides +
+
+ +
+

+ Comprehensive specifications for Overture Maps data themes. +

+ +
+ Click any row to view detailed specifications including sources, licenses, and processing methods +
+ +
Error loading theme definitions: Failed to fetch
+
+ + + + \ No newline at end of file diff --git a/docs/guides/theme-definitions.json b/static/theme_definitions.json similarity index 84% rename from docs/guides/theme-definitions.json rename to static/theme_definitions.json index 556b1f00..fb4494c3 100644 --- a/docs/guides/theme-definitions.json +++ b/static/theme_definitions.json @@ -1,55 +1,51 @@ { "themes": { - "Places": { - "brief_description": "Concrete, physically identifiable, stationary destinations.", + "Addresses": { + "brief_description": "Unique geographic points representing a physical address location.", "licenses": [ { - "name": "CDLA Permissive 2.0", - "url": "https://cdla.dev/permissive-2-0/" - }, - { - "name": "Apache 2.0.", - "url": "https://www.apache.org/licenses/LICENSE-2.0" + "name": "Various licenses", + "url": "https://docs.overturemaps.org/attribution/#addresses" } ], "sources": [ { - "name": "Meta", + "name": "OpenAddresses", "freshness": "", - "type": "commercial", - "url": "" + "type": "community", + "url": "https://openaddresses.io/" }, { - "name": "Foursquare", + "name": "AddressForAll", "freshness": "", - "type": "commercial", - "url": "" + "type": "community", + "url": "https://www.addressforall.org/en/" }, { - "name": "Microsoft", + "name": "City of New York", "freshness": "", - "type": "commercial", - "url": "" + "type": "authoritative", + "url": "https://data.cityofnewyork.us/City-Government/NYC-Address-Points/g6pj-hd8k" }, { - "name": "PinMeTo", + "name": "U.S. Department of Transportation", "freshness": "", - "type": "commercial", - "url": "" + "type": "authoritative", + "url": "https://www.transportation.gov/gis/national-address-database" } ], "gers": { "gersified": { - "flag": true, + "flag": false, "note": "" }, "bridge_files": { - "flag": true, + "flag": false, "note": "" }, - "registry": { - "flag": true, - "note": "" + "GERS_registry": { + "flag": false, + "note": "The features in this theme are included in the GERS registry." }, "data_changelog": { "flag": true, @@ -57,72 +53,152 @@ } }, "signal_confidence_score": { - "flag": true, + "flag": false, "note": "" }, - "excluded_by_design": [ - "Concrete and physically identifiable: to exclude divisions and addresses", - "Stationary: to exclude noise from UGC datasets for things like food carts or vehicles/boats/aircraft", - "Destination: to exclude bus stops and train platforms and other intermediate waypoints", - "Private: we only include places that do not include PII" - ], + "excluded_by_design": [], "freshness": { "release_frequency": "monthly", "last_updated": "" }, "quality_assurance": { - "coverage_summary": "Global", + "coverage_summary": "Countries", "coverage": [ - "Global", - "US ~73%" + "Coverage in 39 countries", + "Several countries with partial coverage: US, Germany, Taiwan", + "Datasets have varying levels of completeness in their attributes. A dataset may be missing postcodes or only have partial coverage for `address_levels` for example." ], - "quality_summary": "Duplicates", + "quality_summary": "Limited GERS ID stability and variable point location accuracy", "quality": [ - "Duplicates", - "High junk rate", - "Low attribute completeness" + "Address point locations vary from dataset to dataset but most often represent either building centroids, building entrances, points on road, or parcel centroids.", + "GERS IDs are not very stable because of identical matching. Any change to an attribute or location will result in a new GERS ID." ], "violations": [] }, "filtering": { - "summary": "None, except existence `confidence` > 0.2", - "location": [ - "" - ], + "summary": "", + "location": [], "topological": [], "geometrical": [], - "properties": [ - "`confidence` > 0.2" - ], + "properties": [], "others": [] }, "matching": { - "summary": "ML-based matching with clustering", - "logic": [ - "Identifying potential matching pairs based on quadkey", - "ML-based matching based on attribute similarity" - ], + "summary": "Exact matching", + "logic": "Matching based on exact point geometry and address properties", "properties": [ - "`name`", - "`address`", - "`phone number`", - "`house number`", - "`website`", - "`spatial distance`" + "All properties incl. geometry" ] }, "merging": { "summary": [ - "Promotion of single source for matched places", + "Promotion of single source for matched addresses", "No merging of attributes" ], "logic": [ - "Clustering of matched places", - "Promotion of place from source with the highest match count", - "No merging of attributes between matches" + "All addresses, except for perfect duplicates/matches, are released", + "No merging of attributes between sources" ], "constraints": [] - } + }, + "types": [ + "address" + ] + }, + "Base": { + "brief_description": "Foundational layers such as land, water, infrastructure, and bathymetry.", + "licenses": [ + { + "name": "ODbL", + "url": "https://opendatacommons.org/licenses/odbl/" + } + ], + "sources": [ + { + "name": "Daylight Coastlines (OSM)", + "freshness": "", + "type": "community", + "url": "https://daylightmap.org/coastlines.html" + }, + { + "name": "ETOPO1", + "freshness": "", + "type": "community", + "url": "https://www.ncei.noaa.gov/products/etopo-global-relief-model" + }, + { + "name": "GLOBathy", + "freshness": "", + "type": "ML-derived", + "url": "https://www.nature.com/articles/s41597-022-01132-9" + }, + { + "name": "ESA WorldCover", + "freshness": "", + "type": "ML-derived", + "url": "https://esa-worldcover.org/en" + } + ], + "gers": { + "gersified": { + "flag": false, + "note": "" + }, + "bridge_files": { + "flag": false, + "note": "" + }, + "GERS_registry": { + "flag": false, + "note": "The features in this theme are included in the GERS registry." + }, + "data_changelog": { + "flag": true, + "note": "" + } + }, + "signal_confidence_score": { + "flag": false, + "note": "" + }, + "excluded_by_design": [], + "freshness": { + "release_frequency": "monthly", + "last_updated": "" + }, + "quality_assurance": { + "coverage_summary": "Global", + "coverage": "Features in base are not considered to be their own entities, so non-bathymetry coverage is just basic features from OSM with all the pass through tags.", + "quality_summary": "Derived from OSM tags", + "quality": [], + "violations": [] + }, + "filtering": { + "summary": "Tag-based filtering (non-bathymetry)", + "location": [], + "topological": [], + "geometrical": [], + "properties": [], + "others": [] + }, + "matching": { + "summary": "", + "logic": "n/a (single source)", + "properties": [] + }, + "merging": { + "summary": [], + "logic": "n/a (single source)", + "constraints": [] + }, + "types": [ + "land", + "land_cover", + "land_use", + "water", + "bathymetry", + "infrastructure" + ] }, "Buildings": { "brief_description": "Permanent human-made structures with a roof.", @@ -185,7 +261,7 @@ "flag": true, "note": "One-to-one matches only" }, - "registry": { + "GERS_registry": { "flag": true, "note": "" }, @@ -200,7 +276,7 @@ }, "excluded_by_design": [ "Features that are well defined in other themes. Examples:", - "Physical “regions”", + "Physical \u201cregions\u201d", "Places of business" ], "freshness": { @@ -257,40 +333,32 @@ "No spatial overlap for footprint merging", "Intersection-over-Union > 0.5 for attribute merging" ] - } + }, + "types": [ + "building", + "building_part" + ] }, - "Addresses": { - "brief_description": "Unique geographic points representing a physical address location.", + "Divisions": { + "brief_description": "Recognized areas for governance, culture, or organization.", "licenses": [ { - "name": "Various licenses", - "url": "https://docs.overturemaps.org/attribution/#addresses" + "name": "ODbL", + "url": "https://opendatacommons.org/licenses/odbl" } ], "sources": [ { - "name": "OpenAddresses", + "name": "OSM", "freshness": "", "type": "community", - "url": "https://openaddresses.io/" + "url": "https://osm.org/" }, { - "name": "AddressForAll", + "name": "geoBoundaries", "freshness": "", "type": "community", - "url": "https://www.addressforall.org/en/" - }, - { - "name": "City of New York", - "freshness": "", - "type": "authoritative", - "url": "https://data.cityofnewyork.us/City-Government/NYC-Address-Points/g6pj-hd8k" - }, - { - "name": "U.S. Department of Transportation", - "freshness": "", - "type": "authoritative", - "url": "https://www.transportation.gov/gis/national-address-database" + "url": "https://www.geoboundaries.org/" } ], "gers": { @@ -299,12 +367,12 @@ "note": "" }, "bridge_files": { - "flag": false, + "flag": true, "note": "" }, - "registry": { + "GERS_registry": { "flag": true, - "note": "" + "note": "The features in this theme are included in the GERS registry." }, "data_changelog": { "flag": true, @@ -315,71 +383,89 @@ "flag": false, "note": "" }, - "excluded_by_design": [ - ], + "excluded_by_design": [], "freshness": { "release_frequency": "monthly", "last_updated": "" }, "quality_assurance": { - "coverage_summary": "39 countries", + "coverage_summary": "Global", "coverage": [ - "Coverage in 39 countries", - "Several countries with partial coverage: US, Germany, Taiwan", - "Datasets have varying levels of completeness in their attributes. A dataset may be missing postcodes or only have partial coverage for `address_levels` for example." - ], - "quality_summary": "Limited GERS ID stability and variable point location accuracy", - "quality": [ - "Address point locations vary from dataset to dataset but most often represent either building centroids, building entrances, points on road, or parcel centroids.", - "GERS IDs are not very stable because of identical matching. Any change to an attribute or location will result in a new GERS ID." + "Coverage generally aligns with admin_level tags in OSM and geoboundaries datasets.", + "Global coverage of country, dependency, region, and county.", + "Macroregion and macrocounty should be present, but are miscategorized as other subtypes.", + "Subtypes below county (locality, borough. neighborhood, microhood) should be present in every country, but coverage is often spotty." ], + "quality_summary": "Minor macroregion issues; sub-county coverage spotty", + "quality": [], "violations": [] }, "filtering": { - "summary": "", - "location": [], - "topological": [], + "summary": "Deduplication; overlap not allowed for countries", + "location": [ + "" + ], + "topological": [ + "Overlap allowed at lower subtypes (ex locality), not allowed in others (ex country)" + ], "geometrical": [], "properties": [], - "others": [] + "others": [ + "Deduplication" + ] }, "matching": { - "summary": "Exact matching", - "logic": "Matching based on exact point geometry and address properties", - "properties": ["All properties incl. geometry"] + "summary": "", + "logic": "", + "properties": [] }, "merging": { - "summary": [ - "Promotion of single source for matched addresses", - "No merging of attributes" - ], - "logic": [ - "All addresses, except for perfect duplicates/matches, are released", - "No merging of attributes between sources" - ], + "summary": [], + "logic": "", "constraints": [] - } + }, + "types": [ + "division", + "division_area", + "division_boundary" + ] }, - "Transportation": { - "brief_description": "Traversable path segments (roads, railways, trails) or connectors (road intersections).", + "Places": { + "brief_description": "Concrete, physically identifiable, stationary destinations.", "licenses": [ { - "name": "ODbL", - "url": "https://opendatacommons.org/licenses/odbl/" + "name": "CDLA Permissive 2.0", + "url": "https://cdla.dev/permissive-2-0/" + }, + { + "name": "Apache 2.0.", + "url": "https://www.apache.org/licenses/LICENSE-2.0" } ], "sources": [ { - "name": "OSM", + "name": "Meta", "freshness": "", - "type": "community", - "url": "https://osm.org/" + "type": "commercial", + "url": "" }, { - "name": "TomTom", + "name": "Foursquare", "freshness": "", "type": "commercial", - "url": "https://www.tomtom.com/" + "url": "" + }, + { + "name": "Microsoft", + "freshness": "", + "type": "commercial", + "url": "" + }, + { + "name": "PinMeTo", + "freshness": "", + "type": "commercial", + "url": "" } ], "gers": { @@ -391,7 +477,7 @@ "flag": true, "note": "" }, - "registry": { + "GERS_registry": { "flag": true, "note": "" }, @@ -401,13 +487,14 @@ } }, "signal_confidence_score": { - "flag": false, + "flag": true, "note": "" }, "excluded_by_design": [ - "Open, navigable spaces like fields, parks, or oceans", - "Aerial paths, such as flight paths or geostationary satellite orbits", - "Paths traversed by continuous entities: oil pipelines, electric lines" + "Concrete and physically identifiable: to exclude divisions and addresses", + "Stationary: to exclude noise from UGC datasets for things like food carts or vehicles/boats/aircraft", + "Destination: to exclude bus stops and train platforms and other intermediate waypoints", + "Private: we only include places that do not include PII" ], "freshness": { "release_frequency": "monthly", @@ -416,42 +503,66 @@ "quality_assurance": { "coverage_summary": "Global", "coverage": [ - "The network is matched against TomTom’s (internally), any features in TomTom MNR that do not match are used as a signal indicating a missing road.", - "TomTom also performs GPS trace matching on the network." + "Global", + "US ~73%" ], - "quality_summary": "", + "quality_summary": "Duplicates", "quality": [ - "Presence of navigational islands" + "Duplicates", + "High junk rate", + "Low attribute completeness" ], "violations": [] }, "filtering": { - "summary": "Deduplication of nodes", - "location": [], + "summary": "None, except existence `confidence` > 0.2", + "location": [ + "" + ], "topological": [], "geometrical": [], - "properties": [], - "others": [ - "Deduplication of nodes" - ] + "properties": [ + "`confidence` > 0.2" + ], + "others": [] }, "matching": { - "summary": "", - "logic": "n/a (single source)", - "properties": [] + "summary": "ML-based matching with clustering", + "logic": [ + "Identifying potential matching pairs based on quadkey", + "ML-based matching based on attribute similarity" + ], + "properties": [ + "`name`", + "`address`", + "`phone number`", + "`house number`", + "`website`", + "`spatial distance`" + ] }, "merging": { - "summary": [], - "logic": "n/a (single source)", + "summary": [ + "Promotion of single source for matched places", + "No merging of attributes" + ], + "logic": [ + "Clustering of matched places", + "Promotion of place from source with the highest match count", + "No merging of attributes between matches" + ], "constraints": [] - } + }, + "types": [ + "place" + ] }, - "Divisions": { - "brief_description": "Recognized areas for governance, culture, or organization.", + "Transportation": { + "brief_description": "Traversable segments (roads, railways, ferries) and connectors (intersections), representing how people and objects travel.", "licenses": [ { "name": "ODbL", - "url": "https://opendatacommons.org/licenses/odbl" + "url": "https://opendatacommons.org/licenses/odbl/" } ], "sources": [ @@ -462,10 +573,10 @@ "url": "https://osm.org/" }, { - "name": "geoBoundaries", + "name": "TomTom", "freshness": "", - "type": "community", - "url": "https://www.geoboundaries.org/" + "type": "commercial", + "url": "https://www.tomtom.com/" } ], "gers": { @@ -477,9 +588,9 @@ "flag": true, "note": "" }, - "registry": { + "GERS_registry": { "flag": true, - "note": "" + "note": "The features in this theme are included in the GERS registry." }, "data_changelog": { "flag": true, @@ -490,134 +601,80 @@ "flag": false, "note": "" }, - "excluded_by_design": [], + "excluded_by_design": [ + "Open, navigable spaces like fields, parks, or oceans", + "Aerial paths, such as flight paths or geostationary satellite orbits", + "Paths traversed by continuous entities: oil pipelines, electric lines" + ], "freshness": { "release_frequency": "monthly", "last_updated": "" }, "quality_assurance": { "coverage_summary": "Global", - "coverage": [ - "Coverage generally aligns with admin_level tags in OSM and geoboundaries datasets.", - "Global coverage of country, dependency, region, and county.", - "Macroregion and macrocounty should be present, but are miscategorized as other subtypes.", - "Subtypes below county (locality, borough. neighborhood, microhood) should be present in every country, but coverage is often spotty." + "coverage": [], + "quality_summary": "", + "quality": [ + "Presence of navigational islands", + "Road name gap detection", + "Road sharp turn", + "Normalization of data, including dropping unrealistic outliers", + "Overlap/duplicate highways", + "Invalid intersections" ], - "quality_summary": "Minor macroregion issues; sub-county coverage spotty", - "quality": [], "violations": [] }, "filtering": { - "summary": "Deduplication; overlap not allowed for countries", - "location": [ - "" - ], - "topological": [ - "Overlap allowed at lower subtypes (ex locality), not allowed in others (ex country)" + "summary": "Deduplication of nodes; OSM highway values filtering", + "logic": [ + "Remove duplicate nodes based on location and properties", + "Filter OSM segments to specific highway values currently in scope" ], - "geometrical": [], - "properties": [], - "others": [ - "Deduplication" + "highway_values": [ + "railway", + "construction", + "cycleway", + "footway", + "living_street", + "motorway", + "motorway_link", + "path", + "pedestrian", + "primary", + "primary_link", + "residential", + "road", + "secondary", + "secondary_link", + "service", + "steps", + "tertiary", + "tertiary_link", + "track", + "trunk", + "trunk_link", + "unclassified" ] }, "matching": { - "summary": "", - "logic": "", - "properties": [] + "summary": "Deduplication of nodes; OSM to Orbis matching for TomTom integration", + "logic": [ + "TomTom roads matched to OSM using Orbis as part of OSM ingestion", + "Public OSM needs to be updated directly" + ], + "note": "If public OSM needs updating (e.g., road removed/rerouted), road must be reassigned or removed from TomTom road. Public OSM will be updated directly." }, "merging": { - "summary": [], - "logic": "", - "constraints": [] - } - }, - "Base": { - "brief_description": "Foundational layers such as land, water, infrastructure, and bathymetry.", - "licenses": [ - { - "name": "ODbL", - "url": "https://opendatacommons.org/licenses/odbl/" - } - ], - "sources": [ - { - "name": "Daylight Coastlines (OSM)", - "freshness": "", - "type": "community", - "url": "https://daylightmap.org/coastlines.html" - }, - { - "name": "ETOPO1", - "freshness": "", - "type": "community", - "url": "https://www.ncei.noaa.gov/products/etopo-global-relief-model" - }, - { - "name": "GLOBathy", - "freshness": "", - "type": "ML-derived", - "url": "https://www.nature.com/articles/s41597-022-01132-9" - }, - { - "name": "ESA WorldCover", - "freshness": "", - "type": "ML-derived", - "url": "https://esa-worldcover.org/en" - } - ], - "gers": { - "gersified": { - "flag": false, - "note": "" - }, - "bridge_files": { - "flag": false, - "note": "" - }, - "registry": { - "flag": false, - "note": "" - }, - "data_changelog": { - "flag": true, - "note": "" - } - }, - "signal_confidence_score": { - "flag": false, - "note": "" - }, - "excluded_by_design": [], - "freshness": { - "release_frequency": "monthly", - "last_updated": "" - }, - "quality_assurance": { - "coverage_summary": "Global", - "coverage": "Features in base are not considered to be their own entities, so non-bathymetry coverage is just basic features from OSM with all the pass through tags.", - "quality_summary": "Derived from OSM tags", - "quality": [], - "violations": [] - }, - "filtering": { - "summary": "Tag-based filtering (non-bathymetry)", - "location": [], - "topological": [], - "geometrical": [], - "properties": [], - "others": [] - }, - "matching": { - "summary": "", - "logic": "n/a (single source)", - "properties": [] + "summary": "Merging of OSM -> Orbis; ongoing merge of legacy network", + "logic": [ + "OSM segments merged into Orbis as part of ingestion", + "Ongoing integration of TomTom legacy network" + ] }, - "merging": { - "summary": [], - "logic": "n/a (single source)", - "constraints": [] - } + "types": [ + "connector", + "segment" + ] } } } \ No newline at end of file From 5d20d3822ce2c336617824ef4585b916402526c8 Mon Sep 17 00:00:00 2001 From: Dana Bauer Date: Thu, 4 Dec 2025 14:09:35 -0500 Subject: [PATCH 4/6] clean up structure and links --- docs/guides/index.mdx | 3 ++- static/theme-definitions-table.html | 13 +++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/docs/guides/index.mdx b/docs/guides/index.mdx index bddcb457..56042fc0 100644 --- a/docs/guides/index.mdx +++ b/docs/guides/index.mdx @@ -7,11 +7,12 @@ description: Overture Maps data theme documentation --- import DocCardList from '@theme/DocCardList'; +import useBaseUrl from '@docusaurus/useBaseUrl'; ## Theme Definitions diff --git a/static/theme-definitions-table.html b/static/theme-definitions-table.html index a03c1713..d312040d 100644 --- a/static/theme-definitions-table.html +++ b/static/theme-definitions-table.html @@ -1,5 +1,5 @@ - + @@ -480,13 +480,13 @@

Theme Definitions Table

- ← Back to Data Guides + ← Back to Data Guides

- Comprehensive specifications for Overture Maps data themes. + Comprehensive specifications for all Overture Maps data themes.

@@ -501,6 +501,7 @@

Theme Definitions Table

let expandedRows = new Set(); // Load the theme definitions data from separate JSON file + // Use relative path to work regardless of base URL fetch('/theme_definitions.json') .then(response => { if (!response.ok) { @@ -541,7 +542,7 @@

Theme Definitions Table

Theme Description - Types + Type Coverage Sources Release Artifacts @@ -634,7 +635,7 @@

Theme Definitions Table

} if (gers.bridge_files?.flag) { - // Show as "bridge files" + // Show as "Bridge Files" const title = gers.bridge_files.note || ''; items.push(`bridge files`); } @@ -668,7 +669,7 @@

Theme Definitions Table

if (theme.types && theme.types.length > 0) { html += `
-

Feature Types

+

Type

${theme.types.map(type => `${type}` From 6b8235e25971300c2266e7322d9305e123af3ce1 Mon Sep 17 00:00:00 2001 From: Dana Bauer Date: Thu, 4 Dec 2025 14:36:01 -0500 Subject: [PATCH 5/6] Fix fetch path for theme definitions JSON file --- static/theme-definitions-table.html | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/static/theme-definitions-table.html b/static/theme-definitions-table.html index d312040d..98ce86b3 100644 --- a/static/theme-definitions-table.html +++ b/static/theme-definitions-table.html @@ -1,5 +1,4 @@ - @@ -502,7 +501,7 @@

Theme Definitions Table

// Load the theme definitions data from separate JSON file // Use relative path to work regardless of base URL - fetch('/theme_definitions.json') + fetch('theme_definitions.json') .then(response => { if (!response.ok) { throw new Error('Failed to load data: ' + response.status); @@ -892,4 +891,4 @@

Merging

} - \ No newline at end of file + From 5342fc58b35a65dfa032e664513cf81b5399b280 Mon Sep 17 00:00:00 2001 From: Dana Bauer Date: Thu, 11 Dec 2025 10:20:40 -0500 Subject: [PATCH 6/6] update styles --- docs/guides/index.mdx | 13 +++++-------- static/theme-definitions-table.html | 18 ++++++++---------- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/docs/guides/index.mdx b/docs/guides/index.mdx index 56042fc0..1a66a5e7 100644 --- a/docs/guides/index.mdx +++ b/docs/guides/index.mdx @@ -1,7 +1,7 @@ --- id: index slug: /guides/ -title: Data Guides +title: Guides sidebar_label: Overview description: Overture Maps data theme documentation --- @@ -9,14 +9,11 @@ description: Overture Maps data theme documentation import DocCardList from '@theme/DocCardList'; import useBaseUrl from '@docusaurus/useBaseUrl'; -## Theme Definitions - -## Theme Guides + +## Data Theme Guides + +**View Theme Definitions Table →** \ No newline at end of file diff --git a/static/theme-definitions-table.html b/static/theme-definitions-table.html index 98ce86b3..bcb30ae7 100644 --- a/static/theme-definitions-table.html +++ b/static/theme-definitions-table.html @@ -87,7 +87,7 @@ th { background: #f8f9fa; border: 1px solid #dee2e6; - padding: 0.75rem; + padding: 0.875rem; text-align: left; font-weight: 600; font-size: 0.8125rem; @@ -100,9 +100,10 @@ td { border: 1px solid #dee2e6; - padding: 0.625rem; + padding: 0.875rem; vertical-align: top; font-size: 0.875rem; + line-height: 1.5; } tr.theme-row { @@ -166,7 +167,7 @@ display: inline-flex; align-items: center; gap: 0.125rem; - padding: 0.125rem 0.375rem; + padding: 0.25rem 0.5rem; background: #f0f0f0; border-radius: 3px; font-size: 0.75rem; @@ -194,7 +195,7 @@ align-items: center; justify-content: center; min-width: 1.5rem; - padding: 0.125rem 0.375rem; + padding: 0.25rem 0.5rem; background: #0066cc; color: white; border-radius: 10px; @@ -479,17 +480,14 @@

Theme Definitions Table

- ← Back to Data Guides + ← Back to Guides
-

- Comprehensive specifications for all Overture Maps data themes. -

- Click any row to view detailed specifications including sources, licenses, and processing methods + Click any row to view detailed information about each data theme.
Error loading theme definitions: Failed to fetch
@@ -540,7 +538,7 @@

Theme Definitions Table

Theme - Description + Definition Type Coverage Sources