diff --git a/docs/guides/index.mdx b/docs/guides/index.mdx index 74d6081f..1a66a5e7 100644 --- a/docs/guides/index.mdx +++ b/docs/guides/index.mdx @@ -1,7 +1,19 @@ --- -title: Data Guides +id: index +slug: /guides/ +title: Guides +sidebar_label: Overview +description: Overture Maps data theme documentation --- import DocCardList from '@theme/DocCardList'; +import useBaseUrl from '@docusaurus/useBaseUrl'; - + + + +## Data Theme Guides + +**View Theme Definitions Table →** + + \ No newline at end of file diff --git a/static/theme-definitions-table.html b/static/theme-definitions-table.html new file mode 100644 index 00000000..bcb30ae7 --- /dev/null +++ b/static/theme-definitions-table.html @@ -0,0 +1,892 @@ + + + + + Overture Maps - Theme Definitions Table + + + +
+
+

Theme Definitions Table

+ ← Back to Guides +
+
+ +
+ +
+ Click any row to view detailed information about each data theme. +
+ +
Error loading theme definitions: Failed to fetch
+
+ + + + diff --git a/static/theme_definitions.json b/static/theme_definitions.json new file mode 100644 index 00000000..fb4494c3 --- /dev/null +++ b/static/theme_definitions.json @@ -0,0 +1,680 @@ +{ + "themes": { + "Addresses": { + "brief_description": "Unique geographic points representing a physical address location.", + "licenses": [ + { + "name": "Various licenses", + "url": "https://docs.overturemaps.org/attribution/#addresses" + } + ], + "sources": [ + { + "name": "OpenAddresses", + "freshness": "", + "type": "community", + "url": "https://openaddresses.io/" + }, + { + "name": "AddressForAll", + "freshness": "", + "type": "community", + "url": "https://www.addressforall.org/en/" + }, + { + "name": "City of New York", + "freshness": "", + "type": "authoritative", + "url": "https://data.cityofnewyork.us/City-Government/NYC-Address-Points/g6pj-hd8k" + }, + { + "name": "U.S. Department of Transportation", + "freshness": "", + "type": "authoritative", + "url": "https://www.transportation.gov/gis/national-address-database" + } + ], + "gers": { + "gersified": { + "flag": false, + "note": "" + }, + "bridge_files": { + "flag": false, + "note": "" + }, + "GERS_registry": { + "flag": false, + "note": "The features in this theme are included in the GERS registry." + }, + "data_changelog": { + "flag": true, + "note": "" + } + }, + "signal_confidence_score": { + "flag": false, + "note": "" + }, + "excluded_by_design": [], + "freshness": { + "release_frequency": "monthly", + "last_updated": "" + }, + "quality_assurance": { + "coverage_summary": "Countries", + "coverage": [ + "Coverage in 39 countries", + "Several countries with partial coverage: US, Germany, Taiwan", + "Datasets have varying levels of completeness in their attributes. A dataset may be missing postcodes or only have partial coverage for `address_levels` for example." + ], + "quality_summary": "Limited GERS ID stability and variable point location accuracy", + "quality": [ + "Address point locations vary from dataset to dataset but most often represent either building centroids, building entrances, points on road, or parcel centroids.", + "GERS IDs are not very stable because of identical matching. Any change to an attribute or location will result in a new GERS ID." + ], + "violations": [] + }, + "filtering": { + "summary": "", + "location": [], + "topological": [], + "geometrical": [], + "properties": [], + "others": [] + }, + "matching": { + "summary": "Exact matching", + "logic": "Matching based on exact point geometry and address properties", + "properties": [ + "All properties incl. geometry" + ] + }, + "merging": { + "summary": [ + "Promotion of single source for matched addresses", + "No merging of attributes" + ], + "logic": [ + "All addresses, except for perfect duplicates/matches, are released", + "No merging of attributes between sources" + ], + "constraints": [] + }, + "types": [ + "address" + ] + }, + "Base": { + "brief_description": "Foundational layers such as land, water, infrastructure, and bathymetry.", + "licenses": [ + { + "name": "ODbL", + "url": "https://opendatacommons.org/licenses/odbl/" + } + ], + "sources": [ + { + "name": "Daylight Coastlines (OSM)", + "freshness": "", + "type": "community", + "url": "https://daylightmap.org/coastlines.html" + }, + { + "name": "ETOPO1", + "freshness": "", + "type": "community", + "url": "https://www.ncei.noaa.gov/products/etopo-global-relief-model" + }, + { + "name": "GLOBathy", + "freshness": "", + "type": "ML-derived", + "url": "https://www.nature.com/articles/s41597-022-01132-9" + }, + { + "name": "ESA WorldCover", + "freshness": "", + "type": "ML-derived", + "url": "https://esa-worldcover.org/en" + } + ], + "gers": { + "gersified": { + "flag": false, + "note": "" + }, + "bridge_files": { + "flag": false, + "note": "" + }, + "GERS_registry": { + "flag": false, + "note": "The features in this theme are included in the GERS registry." + }, + "data_changelog": { + "flag": true, + "note": "" + } + }, + "signal_confidence_score": { + "flag": false, + "note": "" + }, + "excluded_by_design": [], + "freshness": { + "release_frequency": "monthly", + "last_updated": "" + }, + "quality_assurance": { + "coverage_summary": "Global", + "coverage": "Features in base are not considered to be their own entities, so non-bathymetry coverage is just basic features from OSM with all the pass through tags.", + "quality_summary": "Derived from OSM tags", + "quality": [], + "violations": [] + }, + "filtering": { + "summary": "Tag-based filtering (non-bathymetry)", + "location": [], + "topological": [], + "geometrical": [], + "properties": [], + "others": [] + }, + "matching": { + "summary": "", + "logic": "n/a (single source)", + "properties": [] + }, + "merging": { + "summary": [], + "logic": "n/a (single source)", + "constraints": [] + }, + "types": [ + "land", + "land_cover", + "land_use", + "water", + "bathymetry", + "infrastructure" + ] + }, + "Buildings": { + "brief_description": "Permanent human-made structures with a roof.", + "licenses": [ + { + "name": "ODbL", + "url": "https://opendatacommons.org/licenses/odbl/" + } + ], + "sources": [ + { + "name": "OSM", + "freshness": "monthly", + "type": "community", + "url": "https://osm.org/" + }, + { + "name": "Esri", + "freshness": "biannually", + "type": "community", + "url": "https://communitymaps.arcgis.com/home" + }, + { + "name": "Vancouver", + "freshness": "biannually", + "type": "authoritative", + "url": "https://opendata.vancouver.ca/" + }, + { + "name": "IGN Spain", + "freshness": "one-time ingestion", + "type": "authoritative", + "url": "https://www.ign.es/" + }, + { + "name": "Microsoft", + "freshness": "irregularly", + "type": "ML-derived", + "url": "https://github.com/microsoft/GlobalMLBuildingFootprints" + }, + { + "name": "Google", + "freshness": "one-time ingestion", + "type": "ML-derived", + "url": "https://sites.research.google/open-buildings/" + }, + { + "name": "East Asian countries", + "freshness": "one-time ingestion", + "type": "ML-derived", + "url": "https://zenodo.org/records/8174931" + } + ], + "gers": { + "gersified": { + "flag": true, + "note": "only building, not building_part" + }, + "bridge_files": { + "flag": true, + "note": "One-to-one matches only" + }, + "GERS_registry": { + "flag": true, + "note": "" + }, + "data_changelog": { + "flag": true, + "note": "" + } + }, + "signal_confidence_score": { + "flag": false, + "note": "" + }, + "excluded_by_design": [ + "Features that are well defined in other themes. Examples:", + "Physical \u201cregions\u201d", + "Places of business" + ], + "freshness": { + "release_frequency": "monthly", + "last_updated": "" + }, + "quality_assurance": { + "coverage_summary": "Global", + "coverage": "Global", + "quality_summary": "Lower footprint precision in ML-derived sources", + "quality": [ + "Lower footprint precision in Global South due to high share of ML-derived buildings" + ], + "violations": [ + "Pre-match violations: `building_tiny`, `building_large`, `building_huge`, `building_invalid_geometry`, `building_duplicate_record_id`.", + "Post-merge violations: `building_transportation_intersection`, `building_water_intersection`, `building_invalid_area`, `building_too_many_small_angles`" + ] + }, + "filtering": { + "summary": "Overlap allowed within source, but not between sources", + "location": [ + "Buildings in water" + ], + "topological": [ + "Overlap allowed within source, but not between sources" + ], + "geometrical": [ + "Geometry identical to source", + "(Multi)Polygons with too many sharp angles are excluded", + "Footprint area > 10m for ML-derived sources" + ], + "properties": [ + "`height` < 900m" + ], + "others": [] + }, + "matching": { + "summary": "Geometric similarity (Intersection-over-Union > 0.5)", + "logic": "Intersection-over-Union > 0.5", + "properties": [ + "`geometry`" + ] + }, + "merging": { + "summary": [ + "Hierarchical merging of non-overlapping footprints", + "Merging of height attributes between matches" + ], + "logic": [ + "Hierarchical merging of non-overlapping building footprints", + "Merging of building height attributes between matches" + ], + "constraints": [ + "No spatial overlap for footprint merging", + "Intersection-over-Union > 0.5 for attribute merging" + ] + }, + "types": [ + "building", + "building_part" + ] + }, + "Divisions": { + "brief_description": "Recognized areas for governance, culture, or organization.", + "licenses": [ + { + "name": "ODbL", + "url": "https://opendatacommons.org/licenses/odbl" + } + ], + "sources": [ + { + "name": "OSM", + "freshness": "", + "type": "community", + "url": "https://osm.org/" + }, + { + "name": "geoBoundaries", + "freshness": "", + "type": "community", + "url": "https://www.geoboundaries.org/" + } + ], + "gers": { + "gersified": { + "flag": true, + "note": "" + }, + "bridge_files": { + "flag": true, + "note": "" + }, + "GERS_registry": { + "flag": true, + "note": "The features in this theme are included in the GERS registry." + }, + "data_changelog": { + "flag": true, + "note": "" + } + }, + "signal_confidence_score": { + "flag": false, + "note": "" + }, + "excluded_by_design": [], + "freshness": { + "release_frequency": "monthly", + "last_updated": "" + }, + "quality_assurance": { + "coverage_summary": "Global", + "coverage": [ + "Coverage generally aligns with admin_level tags in OSM and geoboundaries datasets.", + "Global coverage of country, dependency, region, and county.", + "Macroregion and macrocounty should be present, but are miscategorized as other subtypes.", + "Subtypes below county (locality, borough. neighborhood, microhood) should be present in every country, but coverage is often spotty." + ], + "quality_summary": "Minor macroregion issues; sub-county coverage spotty", + "quality": [], + "violations": [] + }, + "filtering": { + "summary": "Deduplication; overlap not allowed for countries", + "location": [ + "" + ], + "topological": [ + "Overlap allowed at lower subtypes (ex locality), not allowed in others (ex country)" + ], + "geometrical": [], + "properties": [], + "others": [ + "Deduplication" + ] + }, + "matching": { + "summary": "", + "logic": "", + "properties": [] + }, + "merging": { + "summary": [], + "logic": "", + "constraints": [] + }, + "types": [ + "division", + "division_area", + "division_boundary" + ] + }, + "Places": { + "brief_description": "Concrete, physically identifiable, stationary destinations.", + "licenses": [ + { + "name": "CDLA Permissive 2.0", + "url": "https://cdla.dev/permissive-2-0/" + }, + { + "name": "Apache 2.0.", + "url": "https://www.apache.org/licenses/LICENSE-2.0" + } + ], + "sources": [ + { + "name": "Meta", + "freshness": "", + "type": "commercial", + "url": "" + }, + { + "name": "Foursquare", + "freshness": "", + "type": "commercial", + "url": "" + }, + { + "name": "Microsoft", + "freshness": "", + "type": "commercial", + "url": "" + }, + { + "name": "PinMeTo", + "freshness": "", + "type": "commercial", + "url": "" + } + ], + "gers": { + "gersified": { + "flag": true, + "note": "" + }, + "bridge_files": { + "flag": true, + "note": "" + }, + "GERS_registry": { + "flag": true, + "note": "" + }, + "data_changelog": { + "flag": true, + "note": "" + } + }, + "signal_confidence_score": { + "flag": true, + "note": "" + }, + "excluded_by_design": [ + "Concrete and physically identifiable: to exclude divisions and addresses", + "Stationary: to exclude noise from UGC datasets for things like food carts or vehicles/boats/aircraft", + "Destination: to exclude bus stops and train platforms and other intermediate waypoints", + "Private: we only include places that do not include PII" + ], + "freshness": { + "release_frequency": "monthly", + "last_updated": "" + }, + "quality_assurance": { + "coverage_summary": "Global", + "coverage": [ + "Global", + "US ~73%" + ], + "quality_summary": "Duplicates", + "quality": [ + "Duplicates", + "High junk rate", + "Low attribute completeness" + ], + "violations": [] + }, + "filtering": { + "summary": "None, except existence `confidence` > 0.2", + "location": [ + "" + ], + "topological": [], + "geometrical": [], + "properties": [ + "`confidence` > 0.2" + ], + "others": [] + }, + "matching": { + "summary": "ML-based matching with clustering", + "logic": [ + "Identifying potential matching pairs based on quadkey", + "ML-based matching based on attribute similarity" + ], + "properties": [ + "`name`", + "`address`", + "`phone number`", + "`house number`", + "`website`", + "`spatial distance`" + ] + }, + "merging": { + "summary": [ + "Promotion of single source for matched places", + "No merging of attributes" + ], + "logic": [ + "Clustering of matched places", + "Promotion of place from source with the highest match count", + "No merging of attributes between matches" + ], + "constraints": [] + }, + "types": [ + "place" + ] + }, + "Transportation": { + "brief_description": "Traversable segments (roads, railways, ferries) and connectors (intersections), representing how people and objects travel.", + "licenses": [ + { + "name": "ODbL", + "url": "https://opendatacommons.org/licenses/odbl/" + } + ], + "sources": [ + { + "name": "OSM", + "freshness": "", + "type": "community", + "url": "https://osm.org/" + }, + { + "name": "TomTom", + "freshness": "", + "type": "commercial", + "url": "https://www.tomtom.com/" + } + ], + "gers": { + "gersified": { + "flag": true, + "note": "" + }, + "bridge_files": { + "flag": true, + "note": "" + }, + "GERS_registry": { + "flag": true, + "note": "The features in this theme are included in the GERS registry." + }, + "data_changelog": { + "flag": true, + "note": "" + } + }, + "signal_confidence_score": { + "flag": false, + "note": "" + }, + "excluded_by_design": [ + "Open, navigable spaces like fields, parks, or oceans", + "Aerial paths, such as flight paths or geostationary satellite orbits", + "Paths traversed by continuous entities: oil pipelines, electric lines" + ], + "freshness": { + "release_frequency": "monthly", + "last_updated": "" + }, + "quality_assurance": { + "coverage_summary": "Global", + "coverage": [], + "quality_summary": "", + "quality": [ + "Presence of navigational islands", + "Road name gap detection", + "Road sharp turn", + "Normalization of data, including dropping unrealistic outliers", + "Overlap/duplicate highways", + "Invalid intersections" + ], + "violations": [] + }, + "filtering": { + "summary": "Deduplication of nodes; OSM highway values filtering", + "logic": [ + "Remove duplicate nodes based on location and properties", + "Filter OSM segments to specific highway values currently in scope" + ], + "highway_values": [ + "railway", + "construction", + "cycleway", + "footway", + "living_street", + "motorway", + "motorway_link", + "path", + "pedestrian", + "primary", + "primary_link", + "residential", + "road", + "secondary", + "secondary_link", + "service", + "steps", + "tertiary", + "tertiary_link", + "track", + "trunk", + "trunk_link", + "unclassified" + ] + }, + "matching": { + "summary": "Deduplication of nodes; OSM to Orbis matching for TomTom integration", + "logic": [ + "TomTom roads matched to OSM using Orbis as part of OSM ingestion", + "Public OSM needs to be updated directly" + ], + "note": "If public OSM needs updating (e.g., road removed/rerouted), road must be reassigned or removed from TomTom road. Public OSM will be updated directly." + }, + "merging": { + "summary": "Merging of OSM -> Orbis; ongoing merge of legacy network", + "logic": [ + "OSM segments merged into Orbis as part of ingestion", + "Ongoing integration of TomTom legacy network" + ] + }, + "types": [ + "connector", + "segment" + ] + } + } +} \ No newline at end of file