diff --git a/docs/guides/index.mdx b/docs/guides/index.mdx
index 74d6081f..1a66a5e7 100644
--- a/docs/guides/index.mdx
+++ b/docs/guides/index.mdx
@@ -1,7 +1,19 @@
---
-title: Data Guides
+id: index
+slug: /guides/
+title: Guides
+sidebar_label: Overview
+description: Overture Maps data theme documentation
---
import DocCardList from '@theme/DocCardList';
+import useBaseUrl from '@docusaurus/useBaseUrl';
-
+
+
+
+## Data Theme Guides
+
+**View Theme Definitions Table →**
+
+
\ No newline at end of file
diff --git a/static/theme-definitions-table.html b/static/theme-definitions-table.html
new file mode 100644
index 00000000..bcb30ae7
--- /dev/null
+++ b/static/theme-definitions-table.html
@@ -0,0 +1,892 @@
+
+
+
+
+ Overture Maps - Theme Definitions Table
+
+
+
+
+
+
+
+
+ Click any row to view detailed information about each data theme.
+
+
+
Error loading theme definitions: Failed to fetch
+
+
+
+
+
diff --git a/static/theme_definitions.json b/static/theme_definitions.json
new file mode 100644
index 00000000..fb4494c3
--- /dev/null
+++ b/static/theme_definitions.json
@@ -0,0 +1,680 @@
+{
+ "themes": {
+ "Addresses": {
+ "brief_description": "Unique geographic points representing a physical address location.",
+ "licenses": [
+ {
+ "name": "Various licenses",
+ "url": "https://docs.overturemaps.org/attribution/#addresses"
+ }
+ ],
+ "sources": [
+ {
+ "name": "OpenAddresses",
+ "freshness": "",
+ "type": "community",
+ "url": "https://openaddresses.io/"
+ },
+ {
+ "name": "AddressForAll",
+ "freshness": "",
+ "type": "community",
+ "url": "https://www.addressforall.org/en/"
+ },
+ {
+ "name": "City of New York",
+ "freshness": "",
+ "type": "authoritative",
+ "url": "https://data.cityofnewyork.us/City-Government/NYC-Address-Points/g6pj-hd8k"
+ },
+ {
+ "name": "U.S. Department of Transportation",
+ "freshness": "",
+ "type": "authoritative",
+ "url": "https://www.transportation.gov/gis/national-address-database"
+ }
+ ],
+ "gers": {
+ "gersified": {
+ "flag": false,
+ "note": ""
+ },
+ "bridge_files": {
+ "flag": false,
+ "note": ""
+ },
+ "GERS_registry": {
+ "flag": false,
+ "note": "The features in this theme are included in the GERS registry."
+ },
+ "data_changelog": {
+ "flag": true,
+ "note": ""
+ }
+ },
+ "signal_confidence_score": {
+ "flag": false,
+ "note": ""
+ },
+ "excluded_by_design": [],
+ "freshness": {
+ "release_frequency": "monthly",
+ "last_updated": ""
+ },
+ "quality_assurance": {
+ "coverage_summary": "Countries",
+ "coverage": [
+ "Coverage in 39 countries",
+ "Several countries with partial coverage: US, Germany, Taiwan",
+ "Datasets have varying levels of completeness in their attributes. A dataset may be missing postcodes or only have partial coverage for `address_levels` for example."
+ ],
+ "quality_summary": "Limited GERS ID stability and variable point location accuracy",
+ "quality": [
+ "Address point locations vary from dataset to dataset but most often represent either building centroids, building entrances, points on road, or parcel centroids.",
+ "GERS IDs are not very stable because of identical matching. Any change to an attribute or location will result in a new GERS ID."
+ ],
+ "violations": []
+ },
+ "filtering": {
+ "summary": "",
+ "location": [],
+ "topological": [],
+ "geometrical": [],
+ "properties": [],
+ "others": []
+ },
+ "matching": {
+ "summary": "Exact matching",
+ "logic": "Matching based on exact point geometry and address properties",
+ "properties": [
+ "All properties incl. geometry"
+ ]
+ },
+ "merging": {
+ "summary": [
+ "Promotion of single source for matched addresses",
+ "No merging of attributes"
+ ],
+ "logic": [
+ "All addresses, except for perfect duplicates/matches, are released",
+ "No merging of attributes between sources"
+ ],
+ "constraints": []
+ },
+ "types": [
+ "address"
+ ]
+ },
+ "Base": {
+ "brief_description": "Foundational layers such as land, water, infrastructure, and bathymetry.",
+ "licenses": [
+ {
+ "name": "ODbL",
+ "url": "https://opendatacommons.org/licenses/odbl/"
+ }
+ ],
+ "sources": [
+ {
+ "name": "Daylight Coastlines (OSM)",
+ "freshness": "",
+ "type": "community",
+ "url": "https://daylightmap.org/coastlines.html"
+ },
+ {
+ "name": "ETOPO1",
+ "freshness": "",
+ "type": "community",
+ "url": "https://www.ncei.noaa.gov/products/etopo-global-relief-model"
+ },
+ {
+ "name": "GLOBathy",
+ "freshness": "",
+ "type": "ML-derived",
+ "url": "https://www.nature.com/articles/s41597-022-01132-9"
+ },
+ {
+ "name": "ESA WorldCover",
+ "freshness": "",
+ "type": "ML-derived",
+ "url": "https://esa-worldcover.org/en"
+ }
+ ],
+ "gers": {
+ "gersified": {
+ "flag": false,
+ "note": ""
+ },
+ "bridge_files": {
+ "flag": false,
+ "note": ""
+ },
+ "GERS_registry": {
+ "flag": false,
+ "note": "The features in this theme are included in the GERS registry."
+ },
+ "data_changelog": {
+ "flag": true,
+ "note": ""
+ }
+ },
+ "signal_confidence_score": {
+ "flag": false,
+ "note": ""
+ },
+ "excluded_by_design": [],
+ "freshness": {
+ "release_frequency": "monthly",
+ "last_updated": ""
+ },
+ "quality_assurance": {
+ "coverage_summary": "Global",
+ "coverage": "Features in base are not considered to be their own entities, so non-bathymetry coverage is just basic features from OSM with all the pass through tags.",
+ "quality_summary": "Derived from OSM tags",
+ "quality": [],
+ "violations": []
+ },
+ "filtering": {
+ "summary": "Tag-based filtering (non-bathymetry)",
+ "location": [],
+ "topological": [],
+ "geometrical": [],
+ "properties": [],
+ "others": []
+ },
+ "matching": {
+ "summary": "",
+ "logic": "n/a (single source)",
+ "properties": []
+ },
+ "merging": {
+ "summary": [],
+ "logic": "n/a (single source)",
+ "constraints": []
+ },
+ "types": [
+ "land",
+ "land_cover",
+ "land_use",
+ "water",
+ "bathymetry",
+ "infrastructure"
+ ]
+ },
+ "Buildings": {
+ "brief_description": "Permanent human-made structures with a roof.",
+ "licenses": [
+ {
+ "name": "ODbL",
+ "url": "https://opendatacommons.org/licenses/odbl/"
+ }
+ ],
+ "sources": [
+ {
+ "name": "OSM",
+ "freshness": "monthly",
+ "type": "community",
+ "url": "https://osm.org/"
+ },
+ {
+ "name": "Esri",
+ "freshness": "biannually",
+ "type": "community",
+ "url": "https://communitymaps.arcgis.com/home"
+ },
+ {
+ "name": "Vancouver",
+ "freshness": "biannually",
+ "type": "authoritative",
+ "url": "https://opendata.vancouver.ca/"
+ },
+ {
+ "name": "IGN Spain",
+ "freshness": "one-time ingestion",
+ "type": "authoritative",
+ "url": "https://www.ign.es/"
+ },
+ {
+ "name": "Microsoft",
+ "freshness": "irregularly",
+ "type": "ML-derived",
+ "url": "https://github.com/microsoft/GlobalMLBuildingFootprints"
+ },
+ {
+ "name": "Google",
+ "freshness": "one-time ingestion",
+ "type": "ML-derived",
+ "url": "https://sites.research.google/open-buildings/"
+ },
+ {
+ "name": "East Asian countries",
+ "freshness": "one-time ingestion",
+ "type": "ML-derived",
+ "url": "https://zenodo.org/records/8174931"
+ }
+ ],
+ "gers": {
+ "gersified": {
+ "flag": true,
+ "note": "only building, not building_part"
+ },
+ "bridge_files": {
+ "flag": true,
+ "note": "One-to-one matches only"
+ },
+ "GERS_registry": {
+ "flag": true,
+ "note": ""
+ },
+ "data_changelog": {
+ "flag": true,
+ "note": ""
+ }
+ },
+ "signal_confidence_score": {
+ "flag": false,
+ "note": ""
+ },
+ "excluded_by_design": [
+ "Features that are well defined in other themes. Examples:",
+ "Physical \u201cregions\u201d",
+ "Places of business"
+ ],
+ "freshness": {
+ "release_frequency": "monthly",
+ "last_updated": ""
+ },
+ "quality_assurance": {
+ "coverage_summary": "Global",
+ "coverage": "Global",
+ "quality_summary": "Lower footprint precision in ML-derived sources",
+ "quality": [
+ "Lower footprint precision in Global South due to high share of ML-derived buildings"
+ ],
+ "violations": [
+ "Pre-match violations: `building_tiny`, `building_large`, `building_huge`, `building_invalid_geometry`, `building_duplicate_record_id`.",
+ "Post-merge violations: `building_transportation_intersection`, `building_water_intersection`, `building_invalid_area`, `building_too_many_small_angles`"
+ ]
+ },
+ "filtering": {
+ "summary": "Overlap allowed within source, but not between sources",
+ "location": [
+ "Buildings in water"
+ ],
+ "topological": [
+ "Overlap allowed within source, but not between sources"
+ ],
+ "geometrical": [
+ "Geometry identical to source",
+ "(Multi)Polygons with too many sharp angles are excluded",
+ "Footprint area > 10m for ML-derived sources"
+ ],
+ "properties": [
+ "`height` < 900m"
+ ],
+ "others": []
+ },
+ "matching": {
+ "summary": "Geometric similarity (Intersection-over-Union > 0.5)",
+ "logic": "Intersection-over-Union > 0.5",
+ "properties": [
+ "`geometry`"
+ ]
+ },
+ "merging": {
+ "summary": [
+ "Hierarchical merging of non-overlapping footprints",
+ "Merging of height attributes between matches"
+ ],
+ "logic": [
+ "Hierarchical merging of non-overlapping building footprints",
+ "Merging of building height attributes between matches"
+ ],
+ "constraints": [
+ "No spatial overlap for footprint merging",
+ "Intersection-over-Union > 0.5 for attribute merging"
+ ]
+ },
+ "types": [
+ "building",
+ "building_part"
+ ]
+ },
+ "Divisions": {
+ "brief_description": "Recognized areas for governance, culture, or organization.",
+ "licenses": [
+ {
+ "name": "ODbL",
+ "url": "https://opendatacommons.org/licenses/odbl"
+ }
+ ],
+ "sources": [
+ {
+ "name": "OSM",
+ "freshness": "",
+ "type": "community",
+ "url": "https://osm.org/"
+ },
+ {
+ "name": "geoBoundaries",
+ "freshness": "",
+ "type": "community",
+ "url": "https://www.geoboundaries.org/"
+ }
+ ],
+ "gers": {
+ "gersified": {
+ "flag": true,
+ "note": ""
+ },
+ "bridge_files": {
+ "flag": true,
+ "note": ""
+ },
+ "GERS_registry": {
+ "flag": true,
+ "note": "The features in this theme are included in the GERS registry."
+ },
+ "data_changelog": {
+ "flag": true,
+ "note": ""
+ }
+ },
+ "signal_confidence_score": {
+ "flag": false,
+ "note": ""
+ },
+ "excluded_by_design": [],
+ "freshness": {
+ "release_frequency": "monthly",
+ "last_updated": ""
+ },
+ "quality_assurance": {
+ "coverage_summary": "Global",
+ "coverage": [
+ "Coverage generally aligns with admin_level tags in OSM and geoboundaries datasets.",
+ "Global coverage of country, dependency, region, and county.",
+ "Macroregion and macrocounty should be present, but are miscategorized as other subtypes.",
+ "Subtypes below county (locality, borough. neighborhood, microhood) should be present in every country, but coverage is often spotty."
+ ],
+ "quality_summary": "Minor macroregion issues; sub-county coverage spotty",
+ "quality": [],
+ "violations": []
+ },
+ "filtering": {
+ "summary": "Deduplication; overlap not allowed for countries",
+ "location": [
+ ""
+ ],
+ "topological": [
+ "Overlap allowed at lower subtypes (ex locality), not allowed in others (ex country)"
+ ],
+ "geometrical": [],
+ "properties": [],
+ "others": [
+ "Deduplication"
+ ]
+ },
+ "matching": {
+ "summary": "",
+ "logic": "",
+ "properties": []
+ },
+ "merging": {
+ "summary": [],
+ "logic": "",
+ "constraints": []
+ },
+ "types": [
+ "division",
+ "division_area",
+ "division_boundary"
+ ]
+ },
+ "Places": {
+ "brief_description": "Concrete, physically identifiable, stationary destinations.",
+ "licenses": [
+ {
+ "name": "CDLA Permissive 2.0",
+ "url": "https://cdla.dev/permissive-2-0/"
+ },
+ {
+ "name": "Apache 2.0.",
+ "url": "https://www.apache.org/licenses/LICENSE-2.0"
+ }
+ ],
+ "sources": [
+ {
+ "name": "Meta",
+ "freshness": "",
+ "type": "commercial",
+ "url": ""
+ },
+ {
+ "name": "Foursquare",
+ "freshness": "",
+ "type": "commercial",
+ "url": ""
+ },
+ {
+ "name": "Microsoft",
+ "freshness": "",
+ "type": "commercial",
+ "url": ""
+ },
+ {
+ "name": "PinMeTo",
+ "freshness": "",
+ "type": "commercial",
+ "url": ""
+ }
+ ],
+ "gers": {
+ "gersified": {
+ "flag": true,
+ "note": ""
+ },
+ "bridge_files": {
+ "flag": true,
+ "note": ""
+ },
+ "GERS_registry": {
+ "flag": true,
+ "note": ""
+ },
+ "data_changelog": {
+ "flag": true,
+ "note": ""
+ }
+ },
+ "signal_confidence_score": {
+ "flag": true,
+ "note": ""
+ },
+ "excluded_by_design": [
+ "Concrete and physically identifiable: to exclude divisions and addresses",
+ "Stationary: to exclude noise from UGC datasets for things like food carts or vehicles/boats/aircraft",
+ "Destination: to exclude bus stops and train platforms and other intermediate waypoints",
+ "Private: we only include places that do not include PII"
+ ],
+ "freshness": {
+ "release_frequency": "monthly",
+ "last_updated": ""
+ },
+ "quality_assurance": {
+ "coverage_summary": "Global",
+ "coverage": [
+ "Global",
+ "US ~73%"
+ ],
+ "quality_summary": "Duplicates",
+ "quality": [
+ "Duplicates",
+ "High junk rate",
+ "Low attribute completeness"
+ ],
+ "violations": []
+ },
+ "filtering": {
+ "summary": "None, except existence `confidence` > 0.2",
+ "location": [
+ ""
+ ],
+ "topological": [],
+ "geometrical": [],
+ "properties": [
+ "`confidence` > 0.2"
+ ],
+ "others": []
+ },
+ "matching": {
+ "summary": "ML-based matching with clustering",
+ "logic": [
+ "Identifying potential matching pairs based on quadkey",
+ "ML-based matching based on attribute similarity"
+ ],
+ "properties": [
+ "`name`",
+ "`address`",
+ "`phone number`",
+ "`house number`",
+ "`website`",
+ "`spatial distance`"
+ ]
+ },
+ "merging": {
+ "summary": [
+ "Promotion of single source for matched places",
+ "No merging of attributes"
+ ],
+ "logic": [
+ "Clustering of matched places",
+ "Promotion of place from source with the highest match count",
+ "No merging of attributes between matches"
+ ],
+ "constraints": []
+ },
+ "types": [
+ "place"
+ ]
+ },
+ "Transportation": {
+ "brief_description": "Traversable segments (roads, railways, ferries) and connectors (intersections), representing how people and objects travel.",
+ "licenses": [
+ {
+ "name": "ODbL",
+ "url": "https://opendatacommons.org/licenses/odbl/"
+ }
+ ],
+ "sources": [
+ {
+ "name": "OSM",
+ "freshness": "",
+ "type": "community",
+ "url": "https://osm.org/"
+ },
+ {
+ "name": "TomTom",
+ "freshness": "",
+ "type": "commercial",
+ "url": "https://www.tomtom.com/"
+ }
+ ],
+ "gers": {
+ "gersified": {
+ "flag": true,
+ "note": ""
+ },
+ "bridge_files": {
+ "flag": true,
+ "note": ""
+ },
+ "GERS_registry": {
+ "flag": true,
+ "note": "The features in this theme are included in the GERS registry."
+ },
+ "data_changelog": {
+ "flag": true,
+ "note": ""
+ }
+ },
+ "signal_confidence_score": {
+ "flag": false,
+ "note": ""
+ },
+ "excluded_by_design": [
+ "Open, navigable spaces like fields, parks, or oceans",
+ "Aerial paths, such as flight paths or geostationary satellite orbits",
+ "Paths traversed by continuous entities: oil pipelines, electric lines"
+ ],
+ "freshness": {
+ "release_frequency": "monthly",
+ "last_updated": ""
+ },
+ "quality_assurance": {
+ "coverage_summary": "Global",
+ "coverage": [],
+ "quality_summary": "",
+ "quality": [
+ "Presence of navigational islands",
+ "Road name gap detection",
+ "Road sharp turn",
+ "Normalization of data, including dropping unrealistic outliers",
+ "Overlap/duplicate highways",
+ "Invalid intersections"
+ ],
+ "violations": []
+ },
+ "filtering": {
+ "summary": "Deduplication of nodes; OSM highway values filtering",
+ "logic": [
+ "Remove duplicate nodes based on location and properties",
+ "Filter OSM segments to specific highway values currently in scope"
+ ],
+ "highway_values": [
+ "railway",
+ "construction",
+ "cycleway",
+ "footway",
+ "living_street",
+ "motorway",
+ "motorway_link",
+ "path",
+ "pedestrian",
+ "primary",
+ "primary_link",
+ "residential",
+ "road",
+ "secondary",
+ "secondary_link",
+ "service",
+ "steps",
+ "tertiary",
+ "tertiary_link",
+ "track",
+ "trunk",
+ "trunk_link",
+ "unclassified"
+ ]
+ },
+ "matching": {
+ "summary": "Deduplication of nodes; OSM to Orbis matching for TomTom integration",
+ "logic": [
+ "TomTom roads matched to OSM using Orbis as part of OSM ingestion",
+ "Public OSM needs to be updated directly"
+ ],
+ "note": "If public OSM needs updating (e.g., road removed/rerouted), road must be reassigned or removed from TomTom road. Public OSM will be updated directly."
+ },
+ "merging": {
+ "summary": "Merging of OSM -> Orbis; ongoing merge of legacy network",
+ "logic": [
+ "OSM segments merged into Orbis as part of ingestion",
+ "Ongoing integration of TomTom legacy network"
+ ]
+ },
+ "types": [
+ "connector",
+ "segment"
+ ]
+ }
+ }
+}
\ No newline at end of file