diff --git a/docs/getting-data/index.mdx b/docs/getting-data/index.mdx index 184e11d8f..b34918f24 100644 --- a/docs/getting-data/index.mdx +++ b/docs/getting-data/index.mdx @@ -3,6 +3,7 @@ title: Accessing the Data --- import QueryBuilder from '@site/src/components/queryBuilder'; +import MadridBuildings from '!!raw-loader!@site/src/queries/duckdb/buildings_madrid.sql'; ## Official Overture sources @@ -72,26 +73,11 @@ You can read more about querying with the data directly in [Azure using Synapse] ### DuckDB -[DuckDB](duckdb) is one of our favorite tools for querying the data and downloading only what you need. We have a full page of example queries here; we'll give you one quick example below. - -Let's say you don't want the entire buildings dataset for the planet. With DuckDB, you can scan the Parquet files in the S3 bucket and extract only the data for a particular area of interest: - -```sql -install spatial; -load spatial; -COPY( - SELECT - id, -- GERS ID - names.primary as name, - confidence, - CAST(socials AS JSON) as socials, -- serialize property to JSON - geometry -- DuckDB understands the geometry type - FROM read_parquet('s3://overturemaps-us-west-2/release/2025-04-23.0/theme=places/type=place/*', filename=true, hive_partitioning=1) - WHERE categories.primary = 'pizza_restaurant' - AND bbox.xmin BETWEEN -75 AND -73 AND bbox.ymin BETWEEN 40 AND 41 -- with point geometries you only need the bbox min values - - ) TO 'nyc_pizza.geojson' WITH (FORMAT GDAL, DRIVER 'GeoJSON'); - ``` +[DuckDB](https://duckdb.org/) is one of our favorite tools for querying the data and downloading only what you need. We have a full page of example queries [here](/getting-data/duckdb/); we'll give you one quick example below. + +Let's say you don't want the entire buildings dataset for the planet. With DuckDB, you can scan Parquet files directly in S3 and extract only the data that matches specific parameters: + + Alternatively, you can put your query in a SQL file and run `duckdb -f query.sql` at the command line. diff --git a/src/queries/duckdb/buildings_madrid.sql b/src/queries/duckdb/buildings_madrid.sql new file mode 100644 index 000000000..e1b534e0d --- /dev/null +++ b/src/queries/duckdb/buildings_madrid.sql @@ -0,0 +1,17 @@ +LOAD spatial; -- noqa + +SET s3_region='us-west-2'; + +COPY( + SELECT + id, + names.primary as primary_name, + height, + geometry + FROM + read_parquet('s3://overturemaps-us-west-2/release/__OVERTURE_RELEASE/theme=buildings/type=building/*', filename=true, hive_partitioning=1) + WHERE + names.primary IS NOT NULL + AND bbox.xmin BETWEEN -4.009 AND -3.455 + AND bbox.ymin BETWEEN 40.211 AND 40.596 +) TO 'madrid_buildings.geojson' WITH (FORMAT GDAL, DRIVER 'GeoJSON');