From 7dea9e56166e359edb96456b72329a26185e88f8 Mon Sep 17 00:00:00 2001 From: Stephen Epps Date: Mon, 12 Jan 2026 08:20:52 -0800 Subject: [PATCH 1/3] new query to extract buildings in madrid --- src/queries/duckdb/buildings_madrid.sql | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 src/queries/duckdb/buildings_madrid.sql diff --git a/src/queries/duckdb/buildings_madrid.sql b/src/queries/duckdb/buildings_madrid.sql new file mode 100644 index 000000000..e1b534e0d --- /dev/null +++ b/src/queries/duckdb/buildings_madrid.sql @@ -0,0 +1,17 @@ +LOAD spatial; -- noqa + +SET s3_region='us-west-2'; + +COPY( + SELECT + id, + names.primary as primary_name, + height, + geometry + FROM + read_parquet('s3://overturemaps-us-west-2/release/__OVERTURE_RELEASE/theme=buildings/type=building/*', filename=true, hive_partitioning=1) + WHERE + names.primary IS NOT NULL + AND bbox.xmin BETWEEN -4.009 AND -3.455 + AND bbox.ymin BETWEEN 40.211 AND 40.596 +) TO 'madrid_buildings.geojson' WITH (FORMAT GDAL, DRIVER 'GeoJSON'); From 7a703af8eec02c9674055c0dde383a105b997f29 Mon Sep 17 00:00:00 2001 From: Stephen Epps Date: Mon, 12 Jan 2026 08:31:53 -0800 Subject: [PATCH 2/3] update duckdb example, links --- docs/getting-data/index.mdx | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/docs/getting-data/index.mdx b/docs/getting-data/index.mdx index 184e11d8f..0930794f4 100644 --- a/docs/getting-data/index.mdx +++ b/docs/getting-data/index.mdx @@ -3,6 +3,7 @@ title: Accessing the Data --- import QueryBuilder from '@site/src/components/queryBuilder'; +import MadridBuildings from '!!raw-loader!@site/src/queries/duckdb/buildings_madrid.sql'; ## Official Overture sources @@ -72,26 +73,11 @@ You can read more about querying with the data directly in [Azure using Synapse] ### DuckDB -[DuckDB](duckdb) is one of our favorite tools for querying the data and downloading only what you need. We have a full page of example queries here; we'll give you one quick example below. - -Let's say you don't want the entire buildings dataset for the planet. With DuckDB, you can scan the Parquet files in the S3 bucket and extract only the data for a particular area of interest: - -```sql -install spatial; -load spatial; -COPY( - SELECT - id, -- GERS ID - names.primary as name, - confidence, - CAST(socials AS JSON) as socials, -- serialize property to JSON - geometry -- DuckDB understands the geometry type - FROM read_parquet('s3://overturemaps-us-west-2/release/2025-04-23.0/theme=places/type=place/*', filename=true, hive_partitioning=1) - WHERE categories.primary = 'pizza_restaurant' - AND bbox.xmin BETWEEN -75 AND -73 AND bbox.ymin BETWEEN 40 AND 41 -- with point geometries you only need the bbox min values - - ) TO 'nyc_pizza.geojson' WITH (FORMAT GDAL, DRIVER 'GeoJSON'); - ``` +[DuckDB](https://duckdb.org/) is one of our favorite tools for querying the data and downloading only what you need. We have a full page of example queries [here](/getting-data/duckdb/); we'll give you one quick example below. + +Let's say you don't want the entire buildings dataset for the planet. With DuckDB, you can scan Parquet files directly in S3 and extract only the data that matches specific parameters: + + Alternatively, you can put your query in a SQL file and run `duckdb -f query.sql` at the command line. From cb95c86e469478db2498d8cac886ad581e3383ac Mon Sep 17 00:00:00 2001 From: Stephen Epps Date: Mon, 12 Jan 2026 08:32:22 -0800 Subject: [PATCH 3/3] typo --- docs/getting-data/index.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/getting-data/index.mdx b/docs/getting-data/index.mdx index 0930794f4..b34918f24 100644 --- a/docs/getting-data/index.mdx +++ b/docs/getting-data/index.mdx @@ -73,7 +73,7 @@ You can read more about querying with the data directly in [Azure using Synapse] ### DuckDB -[DuckDB](https://duckdb.org/) is one of our favorite tools for querying the data and downloading only what you need. We have a full page of example queries [here](/getting-data/duckdb/); we'll give you one quick example below. +[DuckDB](https://duckdb.org/) is one of our favorite tools for querying the data and downloading only what you need. We have a full page of example queries [here](/getting-data/duckdb/); we'll give you one quick example below. Let's say you don't want the entire buildings dataset for the planet. With DuckDB, you can scan Parquet files directly in S3 and extract only the data that matches specific parameters: