From bd55acd72fb9fa75239f79d8f9e744b456d3756f Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Tue, 27 Jan 2026 15:36:10 -0800 Subject: [PATCH 01/27] feat(gis): add GIS/PostGIS query support with GeoParquet output - Detect geometry columns in PostgreSQL/PostGIS with SRID metadata - Fetch geometry data as WKB using ST_AsBinary() - Write GeoParquet 1.1.0 metadata with CRS information - Register geodatafusion spatial functions (st_area, st_distance, etc.) - Add spatial type support for MySQL, Snowflake, and DuckDB backends - Add GIS integration tests --- Cargo.lock | 2037 +++++++++++++++--------- Cargo.toml | 2 + src/datafetch/batch_writer.rs | 14 +- src/datafetch/mod.rs | 5 +- src/datafetch/native/duckdb.rs | 15 + src/datafetch/native/iceberg.rs | 1 + src/datafetch/native/mysql.rs | 102 +- src/datafetch/native/parquet_writer.rs | 285 +++- src/datafetch/native/postgres.rs | 245 ++- src/datafetch/native/snowflake.rs | 68 +- src/datafetch/orchestrator.rs | 30 +- src/datafetch/types.rs | 54 +- src/engine.rs | 3 + tests/gis_integration_tests.rs | 552 +++++++ 14 files changed, 2608 insertions(+), 805 deletions(-) create mode 100644 tests/gis_integration_tests.rs diff --git a/Cargo.lock b/Cargo.lock index 8cc8072..bfb618c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -50,7 +50,7 @@ version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", "once_cell", "version_check", ] @@ -140,29 +140,29 @@ dependencies = [ [[package]] name = "anstyle-query" -version = "1.1.4" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] name = "anstyle-wincon" -version = "3.0.10" +version = "3.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] name = "anyhow" -version = "1.0.100" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea" [[package]] name = "apache-avro" @@ -184,16 +184,25 @@ dependencies = [ "serde_json", "strum", "strum_macros", - "thiserror 2.0.17", + "thiserror 2.0.18", "uuid", "zstd", ] +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + [[package]] name = "ar_archive_writer" -version = "0.2.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a" +checksum = "7eb93bbb63b9c227414f6eb3a0adfddca591a8ce1e9b60661bb08969b87e340b" dependencies = [ "object", ] @@ -209,9 +218,9 @@ dependencies = [ [[package]] name = "arc-swap" -version = "1.8.0" +version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d03449bb8ca2cc2ef70869af31463d1ae5ccc8fa3e334b307203fbf815207e" +checksum = "f9f3647c145568cec02c42054e07bdf9a5a698e15b466fb2341bfc393cd24aa5" dependencies = [ "rustversion", ] @@ -281,23 +290,23 @@ dependencies = [ [[package]] name = "arrow" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a2b10dcb159faf30d3f81f6d56c1211a5bea2ca424eabe477648a44b993320e" +checksum = "e4754a624e5ae42081f464514be454b39711daae0458906dacde5f4c632f33a8" dependencies = [ - "arrow-arith 57.2.0", - "arrow-array 57.2.0", - "arrow-buffer 57.2.0", - "arrow-cast 57.2.0", - "arrow-csv 57.2.0", - "arrow-data 57.2.0", - "arrow-ipc 57.2.0", - "arrow-json 57.2.0", - "arrow-ord 57.2.0", - "arrow-row 57.2.0", - "arrow-schema 57.2.0", - "arrow-select 57.2.0", - "arrow-string 57.2.0", + "arrow-arith 57.3.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-cast 57.3.0", + "arrow-csv 57.3.0", + "arrow-data 57.3.0", + "arrow-ipc 57.3.0", + "arrow-json 57.3.0", + "arrow-ord 57.3.0", + "arrow-row 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "arrow-string 57.3.0", ] [[package]] @@ -344,14 +353,14 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "288015089e7931843c80ed4032c5274f02b37bcb720c4a42096d50b390e70372" +checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" dependencies = [ - "arrow-array 57.2.0", - "arrow-buffer 57.2.0", - "arrow-data 57.2.0", - "arrow-schema 57.2.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", "chrono", "num-traits", ] @@ -400,24 +409,24 @@ dependencies = [ "arrow-schema 56.2.0", "chrono", "half", - "hashbrown 0.16.0", + "hashbrown 0.16.1", "num", ] [[package]] name = "arrow-array" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65ca404ea6191e06bf30956394173337fa9c35f445bd447fe6c21ab944e1a23c" +checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" dependencies = [ "ahash 0.8.12", - "arrow-buffer 57.2.0", - "arrow-data 57.2.0", - "arrow-schema 57.2.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", "chrono", "chrono-tz", "half", - "hashbrown 0.16.0", + "hashbrown 0.16.1", "num-complex", "num-integer", "num-traits", @@ -458,9 +467,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36356383099be0151dacc4245309895f16ba7917d79bdb71a7148659c9206c56" +checksum = "c697ddca96183182f35b3a18e50b9110b11e916d7b7799cbfd4d34662f2c56c2" dependencies = [ "bytes", "half", @@ -531,16 +540,16 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8e372ed52bd4ee88cc1e6c3859aa7ecea204158ac640b10e187936e7e87074" +checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" dependencies = [ - "arrow-array 57.2.0", - "arrow-buffer 57.2.0", - "arrow-data 57.2.0", - "arrow-ord 57.2.0", - "arrow-schema 57.2.0", - "arrow-select 57.2.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-ord 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", "atoi", "base64 0.22.1", "chrono", @@ -569,13 +578,13 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e4100b729fe656f2e4fb32bc5884f14acf9118d4ad532b7b33c1132e4dce896" +checksum = "8da746f4180004e3ce7b83c977daf6394d768332349d3d913998b10a120b790a" dependencies = [ - "arrow-array 57.2.0", - "arrow-cast 57.2.0", - "arrow-schema 57.2.0", + "arrow-array 57.3.0", + "arrow-cast 57.3.0", + "arrow-schema 57.3.0", "chrono", "csv", "csv-core", @@ -620,12 +629,12 @@ dependencies = [ [[package]] name = "arrow-data" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf87f4ff5fc13290aa47e499a8b669a82c5977c6a1fedce22c7f542c1fd5a597" +checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" dependencies = [ - "arrow-buffer 57.2.0", - "arrow-schema 57.2.0", + "arrow-buffer 57.3.0", + "arrow-schema 57.3.0", "half", "num-integer", "num-traits", @@ -633,21 +642,21 @@ dependencies = [ [[package]] name = "arrow-flight" -version = "57.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63654f21676be802d446c6c4bc54f6a47e18d55f9ae6f7195a6f6faf2ecdbeb" -dependencies = [ - "arrow-arith 57.2.0", - "arrow-array 57.2.0", - "arrow-buffer 57.2.0", - "arrow-cast 57.2.0", - "arrow-data 57.2.0", - "arrow-ipc 57.2.0", - "arrow-ord 57.2.0", - "arrow-row 57.2.0", - "arrow-schema 57.2.0", - "arrow-select 57.2.0", - "arrow-string 57.2.0", +version = "57.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58c5b083668e6230eae3eab2fc4b5fb989974c845d0aa538dde61a4327c78675" +dependencies = [ + "arrow-arith 57.3.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-cast 57.3.0", + "arrow-data 57.3.0", + "arrow-ipc 57.3.0", + "arrow-ord 57.3.0", + "arrow-row 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "arrow-string 57.3.0", "base64 0.22.1", "bytes", "futures", @@ -682,7 +691,7 @@ dependencies = [ "arrow-buffer 55.2.0", "arrow-data 55.2.0", "arrow-schema 55.2.0", - "flatbuffers 25.9.23", + "flatbuffers 25.12.19", ] [[package]] @@ -696,21 +705,21 @@ dependencies = [ "arrow-data 56.2.0", "arrow-schema 56.2.0", "arrow-select 56.2.0", - "flatbuffers 25.9.23", + "flatbuffers 25.12.19", ] [[package]] name = "arrow-ipc" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb3ca63edd2073fcb42ba112f8ae165df1de935627ead6e203d07c99445f2081" +checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" dependencies = [ - "arrow-array 57.2.0", - "arrow-buffer 57.2.0", - "arrow-data 57.2.0", - "arrow-schema 57.2.0", - "arrow-select 57.2.0", - "flatbuffers 25.9.23", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", + "flatbuffers 25.12.19", "lz4_flex 0.12.0", "zstd", ] @@ -728,7 +737,7 @@ dependencies = [ "arrow-schema 54.3.1", "chrono", "half", - "indexmap 2.12.0", + "indexmap 2.13.0", "lexical-core", "memchr", "num", @@ -739,18 +748,18 @@ dependencies = [ [[package]] name = "arrow-json" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a36b2332559d3310ebe3e173f75b29989b4412df4029a26a30cc3f7da0869297" +checksum = "0ff8357658bedc49792b13e2e862b80df908171275f8e6e075c460da5ee4bf86" dependencies = [ - "arrow-array 57.2.0", - "arrow-buffer 57.2.0", - "arrow-cast 57.2.0", - "arrow-data 57.2.0", - "arrow-schema 57.2.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-cast 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", "chrono", "half", - "indexmap 2.12.0", + "indexmap 2.13.0", "itoa", "lexical-core", "memchr", @@ -802,15 +811,15 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c4e0530272ca755d6814218dffd04425c5b7854b87fa741d5ff848bf50aa39" +checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" dependencies = [ - "arrow-array 57.2.0", - "arrow-buffer 57.2.0", - "arrow-data 57.2.0", - "arrow-schema 57.2.0", - "arrow-select 57.2.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", ] [[package]] @@ -841,14 +850,14 @@ dependencies = [ [[package]] name = "arrow-row" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b07f52788744cc71c4628567ad834cadbaeb9f09026ff1d7a4120f69edf7abd3" +checksum = "18228633bad92bff92a95746bbeb16e5fc318e8382b75619dec26db79e4de4c0" dependencies = [ - "arrow-array 57.2.0", - "arrow-buffer 57.2.0", - "arrow-data 57.2.0", - "arrow-schema 57.2.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", "half", ] @@ -870,14 +879,14 @@ version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", ] [[package]] name = "arrow-schema" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bb63203e8e0e54b288d0d8043ca8fa1013820822a27692ef1b78a977d879f2c" +checksum = "8c872d36b7bf2a6a6a2b40de9156265f0242910791db366a2c17476ba8330d68" dependencies = [ "serde", "serde_core", @@ -928,15 +937,15 @@ dependencies = [ [[package]] name = "arrow-select" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c96d8a1c180b44ecf2e66c9a2f2bbcb8b1b6f14e165ce46ac8bde211a363411b" +checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" dependencies = [ "ahash 0.8.12", - "arrow-array 57.2.0", - "arrow-buffer 57.2.0", - "arrow-data 57.2.0", - "arrow-schema 57.2.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", "num-traits", ] @@ -993,15 +1002,15 @@ dependencies = [ [[package]] name = "arrow-string" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8ad6a81add9d3ea30bf8374ee8329992c7fd246ffd8b7e2f48a3cea5aa0cc9a" +checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" dependencies = [ - "arrow-array 57.2.0", - "arrow-buffer 57.2.0", - "arrow-data 57.2.0", - "arrow-schema 57.2.0", - "arrow-select 57.2.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-data 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", "memchr", "num-traits", "regex", @@ -1077,7 +1086,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -1088,7 +1097,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -1114,9 +1123,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "aws-config" -version = "1.8.12" +version = "1.8.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96571e6996817bf3d58f6b569e4b9fd2e9d2fcf9f7424eed07b2ce9bb87535e5" +checksum = "8a8fc176d53d6fe85017f230405e3255cedb4a02221cb55ed6d76dccbbb099b2" dependencies = [ "aws-credential-types", "aws-runtime", @@ -1133,7 +1142,7 @@ dependencies = [ "bytes", "fastrand", "hex", - "http 1.3.1", + "http 1.4.0", "ring", "time", "tokio", @@ -1144,9 +1153,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.2.11" +version = "1.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cd362783681b15d136480ad555a099e82ecd8e2d10a841e14dfd0078d67fee3" +checksum = "6d203b0bf2626dcba8665f5cd0871d7c2c0930223d6b6be9097592fea21242d0" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -1156,9 +1165,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.15.2" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a88aab2464f1f25453baa7a07c84c5b7684e274054ba06817f382357f77a288" +checksum = "d9a7b350e3bb1767102698302bc37256cbd48422809984b98d292c40e2579aa9" dependencies = [ "aws-lc-sys", "zeroize", @@ -1166,9 +1175,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.35.0" +version = "0.37.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45afffdee1e7c9126814751f88dddc747f41d91da16c9551a0f1e8a11e788a1" +checksum = "b092fe214090261288111db7a2b2c2118e5a7f30dc2569f1732c4069a6840549" dependencies = [ "cc", "cmake", @@ -1178,9 +1187,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.5.17" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d81b5b2898f6798ad58f484856768bca817e3cd9de0974c24ae0f1113fe88f1b" +checksum = "ede2ddc593e6c8acc6ce3358c28d6677a6dc49b65ba4b37a2befe14a11297e75" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -1191,9 +1200,10 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", + "bytes-utils", "fastrand", - "http 0.2.12", - "http-body 0.4.6", + "http 1.4.0", + "http-body 1.0.1", "percent-encoding", "pin-project-lite", "tracing", @@ -1202,15 +1212,16 @@ dependencies = [ [[package]] name = "aws-sdk-glue" -version = "1.133.0" +version = "1.139.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46e5b45757688d5c99ef0ab38d064fefcc46ee7923fb6d7b23b035323e300c7d" +checksum = "af3da2f5cf74983a60a7d5a182d76db1609ee4401057c98732ed8be973cb30ee" dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", "aws-smithy-http", "aws-smithy-json", + "aws-smithy-observability", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -1218,21 +1229,23 @@ dependencies = [ "bytes", "fastrand", "http 0.2.12", + "http 1.4.0", "regex-lite", "tracing", ] [[package]] name = "aws-sdk-sso" -version = "1.91.0" +version = "1.95.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ee6402a36f27b52fe67661c6732d684b2635152b676aa2babbfb5204f99115d" +checksum = "00c5ff27c6ba2cbd95e6e26e2e736676fdf6bcf96495b187733f521cfe4ce448" dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", "aws-smithy-http", "aws-smithy-json", + "aws-smithy-observability", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -1240,21 +1253,23 @@ dependencies = [ "bytes", "fastrand", "http 0.2.12", + "http 1.4.0", "regex-lite", "tracing", ] [[package]] name = "aws-sdk-ssooidc" -version = "1.93.0" +version = "1.97.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a45a7f750bbd170ee3677671ad782d90b894548f4e4ae168302c57ec9de5cb3e" +checksum = "4d186f1e5a3694a188e5a0640b3115ccc6e084d104e16fd6ba968dca072ffef8" dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", "aws-smithy-http", "aws-smithy-json", + "aws-smithy-observability", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -1262,21 +1277,23 @@ dependencies = [ "bytes", "fastrand", "http 0.2.12", + "http 1.4.0", "regex-lite", "tracing", ] [[package]] name = "aws-sdk-sts" -version = "1.95.0" +version = "1.99.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55542378e419558e6b1f398ca70adb0b2088077e79ad9f14eb09441f2f7b2164" +checksum = "9acba7c62f3d4e2408fa998a3a8caacd8b9a5b5549cf36e2372fbdae329d5449" dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", "aws-smithy-http", "aws-smithy-json", + "aws-smithy-observability", "aws-smithy-query", "aws-smithy-runtime", "aws-smithy-runtime-api", @@ -1285,15 +1302,16 @@ dependencies = [ "aws-types", "fastrand", "http 0.2.12", + "http 1.4.0", "regex-lite", "tracing", ] [[package]] name = "aws-sigv4" -version = "1.3.7" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69e523e1c4e8e7e8ff219d732988e22bfeae8a1cafdbe6d9eca1546fa080be7c" +checksum = "37411f8e0f4bea0c3ca0958ce7f18f6439db24d555dbd809787262cd00926aa9" dependencies = [ "aws-credential-types", "aws-smithy-http", @@ -1304,7 +1322,7 @@ dependencies = [ "hex", "hmac", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "percent-encoding", "sha2", "time", @@ -1313,9 +1331,9 @@ dependencies = [ [[package]] name = "aws-smithy-async" -version = "1.2.7" +version = "1.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ee19095c7c4dda59f1697d028ce704c24b2d33c6718790c7f1d5a3015b4107c" +checksum = "5cc50d0f63e714784b84223abd7abbc8577de8c35d699e0edd19f0a88a08ae13" dependencies = [ "futures-util", "pin-project-lite", @@ -1324,9 +1342,9 @@ dependencies = [ [[package]] name = "aws-smithy-http" -version = "0.62.6" +version = "0.63.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "826141069295752372f8203c17f28e30c464d22899a43a0c9fd9c458d469c88b" +checksum = "d619373d490ad70966994801bc126846afaa0d1ee920697a031f0cf63f2568e7" dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", @@ -1334,9 +1352,9 @@ dependencies = [ "bytes-utils", "futures-core", "futures-util", - "http 0.2.12", - "http 1.3.1", - "http-body 0.4.6", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", "percent-encoding", "pin-project-lite", "pin-utils", @@ -1345,17 +1363,17 @@ dependencies = [ [[package]] name = "aws-smithy-http-client" -version = "1.1.5" +version = "1.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59e62db736db19c488966c8d787f52e6270be565727236fd5579eaa301e7bc4a" +checksum = "00ccbb08c10f6bcf912f398188e42ee2eab5f1767ce215a02a73bc5df1bbdd95" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", "aws-smithy-types", "h2 0.3.27", - "h2 0.4.12", + "h2 0.4.13", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "http-body 0.4.6", "hyper 0.14.32", "hyper 1.8.1", @@ -1364,7 +1382,7 @@ dependencies = [ "hyper-util", "pin-project-lite", "rustls 0.21.12", - "rustls 0.23.35", + "rustls 0.23.36", "rustls-native-certs", "rustls-pki-types", "tokio", @@ -1375,27 +1393,27 @@ dependencies = [ [[package]] name = "aws-smithy-json" -version = "0.61.9" +version = "0.62.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49fa1213db31ac95288d981476f78d05d9cbb0353d22cdf3472cc05bb02f6551" +checksum = "27b3a779093e18cad88bbae08dc4261e1d95018c4c5b9356a52bcae7c0b6e9bb" dependencies = [ "aws-smithy-types", ] [[package]] name = "aws-smithy-observability" -version = "0.1.5" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17f616c3f2260612fe44cede278bafa18e73e6479c4e393e2c4518cf2a9a228a" +checksum = "4d3f39d5bb871aaf461d59144557f16d5927a5248a983a40654d9cf3b9ba183b" dependencies = [ "aws-smithy-runtime-api", ] [[package]] name = "aws-smithy-query" -version = "0.60.9" +version = "0.60.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae5d689cf437eae90460e944a58b5668530d433b4ff85789e69d2f2a556e057d" +checksum = "05f76a580e3d8f8961e5d48763214025a2af65c2fa4cd1fb7f270a0e107a71b0" dependencies = [ "aws-smithy-types", "urlencoding", @@ -1403,9 +1421,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.9.5" +version = "1.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a392db6c583ea4a912538afb86b7be7c5d8887d91604f50eb55c262ee1b4a5f5" +checksum = "22ccf7f6eba8b2dcf8ce9b74806c6c185659c311665c4bf8d6e71ebd454db6bf" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -1416,9 +1434,10 @@ dependencies = [ "bytes", "fastrand", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "http-body 0.4.6", "http-body 1.0.1", + "http-body-util", "pin-project-lite", "pin-utils", "tokio", @@ -1427,15 +1446,15 @@ dependencies = [ [[package]] name = "aws-smithy-runtime-api" -version = "1.9.3" +version = "1.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab0d43d899f9e508300e587bf582ba54c27a452dd0a9ea294690669138ae14a2" +checksum = "b4af6e5def28be846479bbeac55aa4603d6f7986fc5da4601ba324dd5d377516" dependencies = [ "aws-smithy-async", "aws-smithy-types", "bytes", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "pin-project-lite", "tokio", "tracing", @@ -1444,16 +1463,16 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.3.5" +version = "1.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "905cb13a9895626d49cf2ced759b062d913834c7482c38e49557eac4e6193f01" +checksum = "8ca2734c16913a45343b37313605d84e7d8b34a4611598ce1d25b35860a2bed3" dependencies = [ "base64-simd", "bytes", "bytes-utils", "futures-core", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "http-body 0.4.6", "http-body 1.0.1", "http-body-util", @@ -1470,18 +1489,18 @@ dependencies = [ [[package]] name = "aws-smithy-xml" -version = "0.60.13" +version = "0.60.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11b2f670422ff42bf7065031e72b45bc52a3508bd089f743ea90731ca2b6ea57" +checksum = "b53543b4b86ed43f051644f704a98c7291b3618b67adf057ee77a366fa52fcaa" dependencies = [ "xmlparser", ] [[package]] name = "aws-types" -version = "1.3.11" +version = "1.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d980627d2dd7bfc32a3c025685a033eeab8d365cc840c631ef59d1b8f428164" +checksum = "0470cc047657c6e286346bdf10a8719d26efd6a91626992e0e64481e44323e96" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -1493,15 +1512,15 @@ dependencies = [ [[package]] name = "axum" -version = "0.8.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b098575ebe77cb6d14fc7f32749631a6e44edbef6b796f89b020e99ba20d425" +checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" dependencies = [ "axum-core", "bytes", "form_urlencoded", "futures-util", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "http-body-util", "hyper 1.8.1", @@ -1526,13 +1545,13 @@ dependencies = [ [[package]] name = "axum-core" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59446ce19cd142f8833f856eb31f3eb097812d1479ab224f54d72428ca21ea22" +checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" dependencies = [ "bytes", "futures-core", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "http-body-util", "mime", @@ -1578,9 +1597,9 @@ dependencies = [ [[package]] name = "base64ct" -version = "1.8.0" +version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba" +checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" [[package]] name = "bigdecimal" @@ -1610,9 +1629,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.10.0" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" dependencies = [ "serde_core", ] @@ -1640,15 +1659,16 @@ dependencies = [ [[package]] name = "blake3" -version = "1.8.2" +version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" +checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" dependencies = [ "arrayref", "arrayvec", "cc", "cfg-if", "constant_time_eq", + "cpufeatures", ] [[package]] @@ -1668,7 +1688,7 @@ checksum = "87a52479c9237eb04047ddb94788c41ca0d26eaff8b697ecfbb4c32f7fdc3b1b" dependencies = [ "async-stream", "base64 0.22.1", - "bitflags 2.10.0", + "bitflags 2.11.0", "bollard-buildkit-proto", "bollard-stubs", "bytes", @@ -1677,7 +1697,7 @@ dependencies = [ "futures-util", "hex", "home", - "http 1.3.1", + "http 1.4.0", "http-body-util", "hyper 1.8.1", "hyper-named-pipe", @@ -1688,7 +1708,7 @@ dependencies = [ "num", "pin-project-lite", "rand 0.9.2", - "rustls 0.23.35", + "rustls 0.23.36", "rustls-native-certs", "rustls-pemfile", "rustls-pki-types", @@ -1697,7 +1717,7 @@ dependencies = [ "serde_json", "serde_repr", "serde_urlencoded", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tokio-stream", "tokio-util", @@ -1739,9 +1759,9 @@ dependencies = [ [[package]] name = "bon" -version = "3.8.1" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebeb9aaf9329dff6ceb65c689ca3db33dbf15f324909c60e4e5eef5701ce31b1" +checksum = "2d13a61f2963b88eef9c1be03df65d42f6996dfeac1054870d950fcf66686f83" dependencies = [ "bon-macros", "rustversion", @@ -1749,24 +1769,24 @@ dependencies = [ [[package]] name = "bon-macros" -version = "3.8.1" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77e9d642a7e3a318e37c2c9427b5a6a48aa1ad55dcd986f3034ab2239045a645" +checksum = "d314cc62af2b6b0c65780555abb4d02a03dd3b799cd42419044f0c38d99738c0" dependencies = [ - "darling 0.21.3", + "darling 0.23.0", "ident_case", "prettyplease", "proc-macro2", "quote", "rustversion", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] name = "borsh" -version = "1.5.7" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad8646f98db542e39fc66e68a20b2144f6a732636df7c2354e74645faaa433ce" +checksum = "d1da5ab77c1437701eeff7c88d968729e7766172279eab0676857b3d63af7a6f" dependencies = [ "borsh-derive", "cfg_aliases", @@ -1774,15 +1794,15 @@ dependencies = [ [[package]] name = "borsh-derive" -version = "1.5.7" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdd1d3c0c2f5833f22386f252fe8ed005c7f59fdcddeef025c01b4c3b9fd9ac3" +checksum = "0686c856aa6aac0c4498f936d7d6a02df690f614c03e4d906d1018062b5c5e2c" dependencies = [ "once_cell", "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -1808,9 +1828,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.19.0" +version = "3.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +checksum = "5c6f81257d10a0f602a294ae4182251151ff97dbb504ef9afcdda4a64b24d9b4" [[package]] name = "bytecheck" @@ -1836,9 +1856,9 @@ dependencies = [ [[package]] name = "bytemuck" -version = "1.24.0" +version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" [[package]] name = "byteorder" @@ -1848,9 +1868,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" [[package]] name = "bytes-utils" @@ -1898,9 +1918,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.51" +version = "1.2.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a0aeaff4ff1a90589618835a598e545176939b97874f7abc7851caa0618f203" +checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" dependencies = [ "find-msvc-tools", "jobserver", @@ -1928,9 +1948,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.42" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" dependencies = [ "iana-time-zone", "js-sys", @@ -1962,9 +1982,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.51" +version = "4.5.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c26d721170e0295f191a69bd9a1f93efcdb0aff38684b61ab5750468972e5f5" +checksum = "c5caf74d17c3aec5495110c34cc3f78644bfa89af6c8993ed4de2790e49b6499" dependencies = [ "clap_builder", "clap_derive", @@ -1972,9 +1992,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.51" +version = "4.5.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75835f0c7bf681bfd05abe44e965760fea999a5286c6eb2d59883634fd02011a" +checksum = "370daa45065b80218950227371916a1633217ae42b2715b2287b606dcd618e24" dependencies = [ "anstream", "anstyle", @@ -1984,21 +2004,21 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.49" +version = "4.5.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" +checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] name = "clap_lex" -version = "0.7.6" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" +checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" [[package]] name = "cmake" @@ -2064,7 +2084,7 @@ dependencies = [ "serde-untagged", "serde_core", "serde_json", - "toml 0.9.10+spec-1.1.0", + "toml 0.9.12+spec-1.1.0", "winnow", "yaml-rust2", ] @@ -2090,16 +2110,16 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", "once_cell", "tiny-keccak", ] [[package]] name = "constant_time_eq" -version = "0.3.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" [[package]] name = "convert_case" @@ -2187,6 +2207,16 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + [[package]] name = "crossbeam-epoch" version = "0.9.18" @@ -2217,7 +2247,7 @@ version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "crossterm_winapi", "document-features", "parking_lot 0.12.5", @@ -2242,9 +2272,9 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "crypto-common" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", "rand_core 0.6.4", @@ -2301,6 +2331,16 @@ dependencies = [ "darling_macro 0.21.3", ] +[[package]] +name = "darling" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" +dependencies = [ + "darling_core 0.23.0", + "darling_macro 0.23.0", +] + [[package]] name = "darling_core" version = "0.20.11" @@ -2312,7 +2352,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -2326,7 +2366,20 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.110", + "syn 2.0.116", +] + +[[package]] +name = "darling_core" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0" +dependencies = [ + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.116", ] [[package]] @@ -2337,7 +2390,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core 0.20.11", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -2348,7 +2401,18 @@ checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ "darling_core 0.21.3", "quote", - "syn 2.0.110", + "syn 2.0.116", +] + +[[package]] +name = "darling_macro" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" +dependencies = [ + "darling_core 0.23.0", + "quote", + "syn 2.0.116", ] [[package]] @@ -2371,8 +2435,8 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ba7cb113e9c0bedf9e9765926031e132fa05a1b09ba6e93a6d1a4d7044457b8" dependencies = [ - "arrow 57.2.0", - "arrow-schema 57.2.0", + "arrow 57.3.0", + "arrow-schema 57.3.0", "async-trait", "bytes", "bzip2 0.6.1", @@ -2406,9 +2470,9 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store 0.12.4", + "object_store 0.12.5", "parking_lot 0.12.5", - "parquet 57.2.0", + "parquet 57.3.0", "rand 0.9.2", "regex", "rstest", @@ -2427,7 +2491,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "66a3a799f914a59b1ea343906a0486f17061f39509af74e874a866428951130d" dependencies = [ - "arrow 57.2.0", + "arrow 57.3.0", "async-trait", "dashmap", "datafusion-common", @@ -2441,7 +2505,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store 0.12.4", + "object_store 0.12.5", "parking_lot 0.12.5", "tokio", ] @@ -2452,7 +2516,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db1b113c80d7a0febcd901476a57aef378e717c54517a163ed51417d87621b0" dependencies = [ - "arrow 57.2.0", + "arrow 57.3.0", "async-trait", "datafusion-catalog", "datafusion-common", @@ -2466,7 +2530,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store 0.12.4", + "object_store 0.12.5", "tokio", ] @@ -2477,16 +2541,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c10f7659e96127d25e8366be7c8be4109595d6a2c3eac70421f380a7006a1b0" dependencies = [ "ahash 0.8.12", - "arrow 57.2.0", - "arrow-ipc 57.2.0", + "arrow 57.3.0", + "arrow-ipc 57.3.0", "chrono", "half", "hashbrown 0.14.5", - "indexmap 2.12.0", + "indexmap 2.13.0", "libc", "log", - "object_store 0.12.4", - "parquet 57.2.0", + "object_store 0.12.5", + "parquet 57.3.0", "paste", "recursive", "sqlparser", @@ -2511,7 +2575,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fde13794244bc7581cd82f6fff217068ed79cdc344cafe4ab2c3a1c3510b38d6" dependencies = [ - "arrow 57.2.0", + "arrow 57.3.0", "async-compression", "async-trait", "bytes", @@ -2531,7 +2595,7 @@ dependencies = [ "glob", "itertools 0.14.0", "log", - "object_store 0.12.4", + "object_store 0.12.5", "rand 0.9.2", "tokio", "tokio-util", @@ -2546,8 +2610,8 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "804fa9b4ecf3157982021770617200ef7c1b2979d57bec9044748314775a9aea" dependencies = [ - "arrow 57.2.0", - "arrow-ipc 57.2.0", + "arrow 57.3.0", + "arrow-ipc 57.3.0", "async-trait", "bytes", "datafusion-common", @@ -2560,7 +2624,7 @@ dependencies = [ "datafusion-session", "futures", "itertools 0.14.0", - "object_store 0.12.4", + "object_store 0.12.5", "tokio", ] @@ -2570,7 +2634,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61a1641a40b259bab38131c5e6f48fac0717bedb7dc93690e604142a849e0568" dependencies = [ - "arrow 57.2.0", + "arrow 57.3.0", "async-trait", "bytes", "datafusion-common", @@ -2582,7 +2646,7 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store 0.12.4", + "object_store 0.12.5", "regex", "tokio", ] @@ -2593,7 +2657,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adeacdb00c1d37271176f8fb6a1d8ce096baba16ea7a4b2671840c5c9c64fe85" dependencies = [ - "arrow 57.2.0", + "arrow 57.3.0", "async-trait", "bytes", "datafusion-common", @@ -2605,7 +2669,7 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store 0.12.4", + "object_store 0.12.5", "tokio", ] @@ -2615,7 +2679,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43d0b60ffd66f28bfb026565d62b0a6cbc416da09814766a3797bba7d85a3cd9" dependencies = [ - "arrow 57.2.0", + "arrow 57.3.0", "async-trait", "bytes", "datafusion-common", @@ -2633,9 +2697,9 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store 0.12.4", + "object_store 0.12.5", "parking_lot 0.12.5", - "parquet 57.2.0", + "parquet 57.3.0", "tokio", ] @@ -2651,15 +2715,15 @@ version = "0.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d611b5aef6cdbe0287352376813ec422367437474e3d3033414d98913dd72d0e" dependencies = [ - "arrow 57.2.0", + "arrow 57.3.0", "async-trait", "datafusion", "duckdb", "futures", - "object_store 0.12.4", - "parquet 57.2.0", + "object_store 0.12.5", + "parquet 57.3.0", "sqlx", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tracing", "url", @@ -2671,14 +2735,14 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63695643190679037bc946ad46a263b62016931547bf119859c511f7ff2f5178" dependencies = [ - "arrow 57.2.0", + "arrow 57.3.0", "async-trait", "dashmap", "datafusion-common", "datafusion-expr", "futures", "log", - "object_store 0.12.4", + "object_store 0.12.5", "parking_lot 0.12.5", "rand 0.9.2", "tempfile", @@ -2691,7 +2755,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9a4787cbf5feb1ab351f789063398f67654a6df75c4d37d7f637dc96f951a91" dependencies = [ - "arrow 57.2.0", + "arrow 57.3.0", "async-trait", "chrono", "datafusion-common", @@ -2700,7 +2764,7 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", - "indexmap 2.12.0", + "indexmap 2.13.0", "itertools 0.14.0", "paste", "recursive", @@ -2714,9 +2778,9 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ce2fb1b8c15c9ac45b0863c30b268c69dc9ee7a1ee13ecf5d067738338173dc" dependencies = [ - "arrow 57.2.0", + "arrow 57.3.0", "datafusion-common", - "indexmap 2.12.0", + "indexmap 2.13.0", "itertools 0.14.0", "paste", ] @@ -2727,8 +2791,8 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "794a9db7f7b96b3346fc007ff25e994f09b8f0511b4cf7dff651fadfe3ebb28f" dependencies = [ - "arrow 57.2.0", - "arrow-buffer 57.2.0", + "arrow 57.3.0", + "arrow-buffer 57.3.0", "base64 0.22.1", "blake2", "blake3", @@ -2758,7 +2822,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c25210520a9dcf9c2b2cbbce31ebd4131ef5af7fc60ee92b266dc7d159cb305" dependencies = [ "ahash 0.8.12", - "arrow 57.2.0", + "arrow 57.3.0", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -2779,7 +2843,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62f4a66f3b87300bb70f4124b55434d2ae3fe80455f3574701d0348da040b55d" dependencies = [ "ahash 0.8.12", - "arrow 57.2.0", + "arrow 57.3.0", "datafusion-common", "datafusion-expr-common", "datafusion-physical-expr-common", @@ -2791,8 +2855,8 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae5c06eed03918dc7fe7a9f082a284050f0e9ecf95d72f57712d1496da03b8c4" dependencies = [ - "arrow 57.2.0", - "arrow-ord 57.2.0", + "arrow 57.3.0", + "arrow-ord 57.3.0", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -2814,7 +2878,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db4fed1d71738fbe22e2712d71396db04c25de4111f1ec252b8f4c6d3b25d7f5" dependencies = [ - "arrow 57.2.0", + "arrow 57.3.0", "async-trait", "datafusion-catalog", "datafusion-common", @@ -2830,7 +2894,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d92206aa5ae21892f1552b4d61758a862a70956e6fd7a95cb85db1de74bc6d1" dependencies = [ - "arrow 57.2.0", + "arrow 57.3.0", "datafusion-common", "datafusion-doc", "datafusion-expr", @@ -2860,7 +2924,7 @@ checksum = "1063ad4c9e094b3f798acee16d9a47bd7372d9699be2de21b05c3bd3f34ab848" dependencies = [ "datafusion-doc", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -2869,13 +2933,13 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f35f9ec5d08b87fd1893a30c2929f2559c2f9806ca072d8fefca5009dc0f06a" dependencies = [ - "arrow 57.2.0", + "arrow 57.3.0", "chrono", "datafusion-common", "datafusion-expr", "datafusion-expr-common", "datafusion-physical-expr", - "indexmap 2.12.0", + "indexmap 2.13.0", "itertools 0.14.0", "log", "recursive", @@ -2890,7 +2954,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c30cc8012e9eedcb48bbe112c6eff4ae5ed19cf3003cb0f505662e88b7014c5d" dependencies = [ "ahash 0.8.12", - "arrow 57.2.0", + "arrow 57.3.0", "datafusion-common", "datafusion-expr", "datafusion-expr-common", @@ -2898,11 +2962,11 @@ dependencies = [ "datafusion-physical-expr-common", "half", "hashbrown 0.14.5", - "indexmap 2.12.0", + "indexmap 2.13.0", "itertools 0.14.0", "parking_lot 0.12.5", "paste", - "petgraph 0.8.3", + "petgraph", ] [[package]] @@ -2911,7 +2975,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f9ff2dbd476221b1f67337699eff432781c4e6e1713d2aefdaa517dfbf79768" dependencies = [ - "arrow 57.2.0", + "arrow 57.3.0", "datafusion-common", "datafusion-expr", "datafusion-functions", @@ -2927,7 +2991,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90da43e1ec550b172f34c87ec68161986ced70fd05c8d2a2add66eef9c276f03" dependencies = [ "ahash 0.8.12", - "arrow 57.2.0", + "arrow 57.3.0", "datafusion-common", "datafusion-expr-common", "hashbrown 0.14.5", @@ -2940,7 +3004,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce9804f799acd7daef3be7aaffe77c0033768ed8fdbf5fb82fc4c5f2e6bc14e6" dependencies = [ - "arrow 57.2.0", + "arrow 57.3.0", "datafusion-common", "datafusion-execution", "datafusion-expr", @@ -2960,9 +3024,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0acf0ad6b6924c6b1aa7d213b181e012e2d3ec0a64ff5b10ee6282ab0f8532ac" dependencies = [ "ahash 0.8.12", - "arrow 57.2.0", - "arrow-ord 57.2.0", - "arrow-schema 57.2.0", + "arrow 57.3.0", + "arrow-ord 57.3.0", + "arrow-schema 57.3.0", "async-trait", "chrono", "datafusion-common", @@ -2976,7 +3040,7 @@ dependencies = [ "futures", "half", "hashbrown 0.14.5", - "indexmap 2.12.0", + "indexmap 2.13.0", "itertools 0.14.0", "log", "parking_lot 0.12.5", @@ -2990,7 +3054,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d368093a98a17d1449b1083ac22ed16b7128e4c67789991869480d8c4a40ecb9" dependencies = [ - "arrow 57.2.0", + "arrow 57.3.0", "chrono", "datafusion-catalog", "datafusion-catalog-listing", @@ -3007,7 +3071,7 @@ dependencies = [ "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-proto-common", - "object_store 0.12.4", + "object_store 0.12.5", "prost", ] @@ -3017,7 +3081,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b6aef3d5e5c1d2bc3114c4876730cb76a9bdc5a8df31ef1b6db48f0c1671895" dependencies = [ - "arrow 57.2.0", + "arrow 57.3.0", "datafusion-common", "prost", ] @@ -3028,7 +3092,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac2c2498a1f134a9e11a9f5ed202a2a7d7e9774bd9249295593053ea3be999db" dependencies = [ - "arrow 57.2.0", + "arrow 57.3.0", "datafusion-common", "datafusion-datasource", "datafusion-expr-common", @@ -3059,12 +3123,12 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fc195fe60634b2c6ccfd131b487de46dc30eccae8a3c35a13f136e7f440414f" dependencies = [ - "arrow 57.2.0", + "arrow 57.3.0", "bigdecimal", "chrono", "datafusion-common", "datafusion-expr", - "indexmap 2.12.0", + "indexmap 2.13.0", "log", "recursive", "regex", @@ -3113,7 +3177,7 @@ checksum = "780eb241654bf097afb00fc5f054a09b687dad862e485fdcf8399bb056565370" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -3129,9 +3193,9 @@ dependencies = [ [[package]] name = "deranged" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587" +checksum = "cc3dc5ad92c2e2d1c193bbbbdf2ea477cb81331de4f3103f267ca18368b988c4" dependencies = [ "powerfmt", "serde_core", @@ -3145,7 +3209,7 @@ checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -3166,7 +3230,7 @@ dependencies = [ "darling 0.20.11", "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -3176,7 +3240,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -3199,7 +3263,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -3272,6 +3336,16 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" +[[package]] +name = "earcutr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79127ed59a85d7687c409e9978547cffb7dc79675355ed22da6b66fd5f6ead01" +dependencies = [ + "itertools 0.11.0", + "num-traits", +] + [[package]] name = "either" version = "1.15.0" @@ -3415,7 +3489,7 @@ dependencies = [ "proc-macro-error2", "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -3425,7 +3499,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9bfe91596cd82e4b4fbd390f0e789fe3cf327b635fa713347f1509db97aa482c" dependencies = [ "fastrace", - "http 1.3.1", + "http 1.4.0", "tower-layer", "tower-service", ] @@ -3449,21 +3523,20 @@ dependencies = [ [[package]] name = "filetime" -version = "0.2.26" +version = "0.2.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc0505cd1b6fa6580283f6bdf70a73fcf4aba1184038c90902b92b3dd0df63ed" +checksum = "f98844151eee8917efc50bd9e8318cb963ae8b297431495d3f758616ea5c57db" dependencies = [ "cfg-if", "libc", "libredox", - "windows-sys 0.60.2", ] [[package]] name = "find-msvc-tools" -version = "0.1.6" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "fixedbitset" @@ -3483,25 +3556,31 @@ dependencies = [ [[package]] name = "flatbuffers" -version = "25.9.23" +version = "25.12.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09b6620799e7340ebd9968d2e0708eb82cf1971e9a16821e2091b6d6e475eed5" +checksum = "35f6839d7b3b98adde531effaf34f0c2badc6f4735d26fe74709d8e513a96ef3" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "rustc_version", ] [[package]] name = "flate2" -version = "1.1.5" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ "crc32fast", - "libz-rs-sys", "miniz_oxide", + "zlib-rs", ] +[[package]] +name = "float_next_after" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8" + [[package]] name = "flume" version = "0.11.1" @@ -3548,9 +3627,9 @@ checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" [[package]] name = "futures" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" dependencies = [ "futures-channel", "futures-core", @@ -3563,9 +3642,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" dependencies = [ "futures-core", "futures-sink", @@ -3573,15 +3652,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" [[package]] name = "futures-executor" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" dependencies = [ "futures-core", "futures-task", @@ -3601,32 +3680,32 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" [[package]] name = "futures-macro" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] name = "futures-sink" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" [[package]] name = "futures-task" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" [[package]] name = "futures-timer" @@ -3636,9 +3715,9 @@ checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" [[package]] name = "futures-util" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ "futures-channel", "futures-core", @@ -3648,7 +3727,6 @@ dependencies = [ "futures-task", "memchr", "pin-project-lite", - "pin-utils", "slab", ] @@ -3670,10 +3748,10 @@ dependencies = [ "prost", "prost-build", "prost-types", - "reqwest 0.12.24", + "reqwest 0.12.28", "serde", "serde_json", - "thiserror 2.0.17", + "thiserror 2.0.18", "time", "tokio", "tokio-stream", @@ -3687,19 +3765,168 @@ dependencies = [ [[package]] name = "generic-array" -version = "0.14.9" +version = "0.14.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", ] +[[package]] +name = "geo" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fc1a1678e54befc9b4bcab6cd43b8e7f834ae8ea121118b0fd8c42747675b4a" +dependencies = [ + "earcutr", + "float_next_after", + "geo-types", + "geographiclib-rs", + "i_overlay", + "log", + "num-traits", + "robust", + "rstar", + "spade", +] + +[[package]] +name = "geo-traits" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e7c353d12a704ccfab1ba8bfb1a7fe6cb18b665bf89d37f4f7890edcd260206" +dependencies = [ + "geo-types", +] + +[[package]] +name = "geo-types" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24f8647af4005fa11da47cd56252c6ef030be8fa97bdbf355e7dfb6348f0a82c" +dependencies = [ + "approx", + "num-traits", + "rayon", + "rstar", + "serde", +] + +[[package]] +name = "geoarrow-array" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc1cc4106ac0a0a512c398961ce95d8150475c84a84e17c4511c3643fa120a17" +dependencies = [ + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-schema 57.3.0", + "geo-traits", + "geoarrow-schema", + "num-traits", + "wkb", + "wkt", +] + +[[package]] +name = "geoarrow-expr-geo" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa84300361ce57fb875bcaa6e32b95b0aff5c6b1af692b936bdd58ff343f4394" +dependencies = [ + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "geo", + "geo-traits", + "geoarrow-array", + "geoarrow-schema", +] + +[[package]] +name = "geoarrow-schema" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e97be4e9f523f92bd6a0e0458323f4b783d073d011664decd8dbf05651704f34" +dependencies = [ + "arrow-schema 57.3.0", + "geo-traits", + "serde", + "serde_json", + "thiserror 1.0.69", +] + +[[package]] +name = "geodatafusion" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773cfa1fb0d7f7661b76b3fde00f3ffd8e0ff7b3635096f0ff6294fe5ca62a2b" +dependencies = [ + "arrow-arith 57.3.0", + "arrow-array 57.3.0", + "arrow-schema 57.3.0", + "datafusion", + "geo", + "geo-traits", + "geoarrow-array", + "geoarrow-expr-geo", + "geoarrow-schema", + "geohash", + "thiserror 1.0.69", + "wkt", +] + +[[package]] +name = "geographiclib-rs" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5a7f08910fd98737a6eda7568e7c5e645093e073328eeef49758cfe8b0489c7" +dependencies = [ + "libm", +] + +[[package]] +name = "geohash" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fb94b1a65401d6cbf22958a9040aa364812c26674f841bee538b12c135db1e6" +dependencies = [ + "geo-types", + "libm", +] + +[[package]] +name = "geojson" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e26f3c45b36fccc9cf2805e61d4da6bc4bbd5a3a9589b01afa3a40eff703bd79" +dependencies = [ + "log", + "serde", + "serde_json", + "thiserror 2.0.18", +] + +[[package]] +name = "geozero" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db5eda63aa99ac06160fd53328ed75c34f14e3196d3f56a3649e247ed796e54b" +dependencies = [ + "geo-types", + "geojson", + "log", + "scroll", + "serde_json", + "thiserror 2.0.18", + "wkt", +] + [[package]] name = "getrandom" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if", "js-sys", @@ -3722,6 +3949,19 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "getrandom" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", + "wasip3", +] + [[package]] name = "glob" version = "0.3.3" @@ -3752,7 +3992,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.12.0", + "indexmap 2.13.0", "slab", "tokio", "tokio-util", @@ -3761,17 +4001,17 @@ dependencies = [ [[package]] name = "h2" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" +checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" dependencies = [ "atomic-waker", "bytes", "fnv", "futures-core", "futures-sink", - "http 1.3.1", - "indexmap 2.12.0", + "http 1.4.0", + "indexmap 2.13.0", "slab", "tokio", "tokio-util", @@ -3790,6 +4030,15 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "hash32" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47d60b12902ba28e2730cd37e95b8c9223af2808df9e902d4df49588d1470606" +dependencies = [ + "byteorder", +] + [[package]] name = "hashbrown" version = "0.12.3" @@ -3822,9 +4071,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.16.0" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" [[package]] name = "hashlink" @@ -3835,6 +4084,16 @@ dependencies = [ "hashbrown 0.15.5", ] +[[package]] +name = "heapless" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bfb9eb618601c89945a70e254898da93b13be0388091d42117462b265bb3fad" +dependencies = [ + "hash32", + "stable_deref_trait", +] + [[package]] name = "heck" version = "0.5.0" @@ -3893,12 +4152,11 @@ dependencies = [ [[package]] name = "http" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" dependencies = [ "bytes", - "fnv", "itoa", ] @@ -3920,7 +4178,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http 1.3.1", + "http 1.4.0", ] [[package]] @@ -3931,7 +4189,7 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "pin-project-lite", ] @@ -3988,8 +4246,8 @@ dependencies = [ "bytes", "futures-channel", "futures-core", - "h2 0.4.12", - "http 1.3.1", + "h2 0.4.13", + "http 1.4.0", "http-body 1.0.1", "httparse", "httpdate", @@ -4037,16 +4295,16 @@ version = "0.27.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ - "http 1.3.1", + "http 1.4.0", "hyper 1.8.1", "hyper-util", - "rustls 0.23.35", + "rustls 0.23.36", "rustls-native-certs", "rustls-pki-types", "tokio", "tokio-rustls 0.26.4", "tower-service", - "webpki-roots 1.0.4", + "webpki-roots 1.0.6", ] [[package]] @@ -4064,23 +4322,22 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.18" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52e9a2a24dc5c6821e71a7030e1e14b7b632acac55c40e9d2e082c621261bb56" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" dependencies = [ "base64 0.22.1", "bytes", "futures-channel", - "futures-core", "futures-util", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "hyper 1.8.1", "ipnet", "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.1", + "socket2 0.6.2", "system-configuration", "tokio", "tower-service", @@ -4103,11 +4360,54 @@ dependencies = [ "tower-service", ] +[[package]] +name = "i_float" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "010025c2c532c8d82e42d0b8bb5184afa449fa6f06c709ea9adcb16c49ae405b" +dependencies = [ + "libm", +] + +[[package]] +name = "i_key_sort" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9190f86706ca38ac8add223b2aed8b1330002b5cdbbce28fb58b10914d38fc27" + +[[package]] +name = "i_overlay" +version = "4.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413183068e6e0289e18d7d0a1f661b81546e6918d5453a44570b9ab30cbed1b3" +dependencies = [ + "i_float", + "i_key_sort", + "i_shape", + "i_tree", + "rayon", +] + +[[package]] +name = "i_shape" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ea154b742f7d43dae2897fcd5ead86bc7b5eefcedd305a7ebf9f69d44d61082" +dependencies = [ + "i_float", +] + +[[package]] +name = "i_tree" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35e6d558e6d4c7b82bc51d9c771e7a927862a161a7d87bf2b0541450e0e20915" + [[package]] name = "iana-time-zone" -version = "0.1.64" +version = "0.1.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -4165,7 +4465,7 @@ dependencies = [ "parquet 55.2.0", "rand 0.8.5", "reqsign", - "reqwest 0.12.24", + "reqwest 0.12.28", "roaring", "rust_decimal", "serde", @@ -4207,10 +4507,10 @@ checksum = "c7aa0cb3c2a3e41c1fae671b7c6bfc09360271634b1790cce70bb1b674e735ef" dependencies = [ "async-trait", "chrono", - "http 1.3.1", + "http 1.4.0", "iceberg", "itertools 0.13.0", - "reqwest 0.12.24", + "reqwest 0.12.28", "serde", "serde_derive", "serde_json", @@ -4268,9 +4568,9 @@ checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" [[package]] name = "icu_properties" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e93fcd3157766c0c8da2f8cff6ce651a31f0810eaa1c51ec363ef790bbb5fb99" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" dependencies = [ "icu_collections", "icu_locale_core", @@ -4282,9 +4582,9 @@ dependencies = [ [[package]] name = "icu_properties_data" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02845b3647bb045f1100ecd6480ff52f34c35f82d9880e029d329c21d1054899" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" [[package]] name = "icu_provider" @@ -4301,6 +4601,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + [[package]] name = "ident_case" version = "1.0.1" @@ -4341,12 +4647,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.12.0" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", - "hashbrown 0.16.0", + "hashbrown 0.16.1", "serde", "serde_core", ] @@ -4381,7 +4687,7 @@ dependencies = [ "async-trait", "bytes", "futures", - "object_store 0.12.4", + "object_store 0.12.5", "tracing", "tracing-futures", ] @@ -4400,9 +4706,9 @@ checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" [[package]] name = "iri-string" -version = "0.7.9" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f867b9d1d896b67beb18518eda36fdb77a32ea590de864f1325b294a6d14397" +checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" dependencies = [ "memchr", "serde", @@ -4412,7 +4718,16 @@ dependencies = [ name = "is_terminal_polyfill" version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] [[package]] name = "itertools" @@ -4434,9 +4749,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.15" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "jni" @@ -4472,9 +4787,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.82" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65" +checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" dependencies = [ "once_cell", "wasm-bindgen", @@ -4515,6 +4830,12 @@ dependencies = [ "spin", ] +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + [[package]] name = "lexical-core" version = "1.0.6" @@ -4580,9 +4901,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" [[package]] name = "libc" -version = "0.2.177" +version = "0.2.182" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" +checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" [[package]] name = "libduckdb-sys" @@ -4593,7 +4914,7 @@ dependencies = [ "cc", "flate2", "pkg-config", - "reqwest 0.12.24", + "reqwest 0.12.28", "serde", "serde_json", "tar", @@ -4603,19 +4924,19 @@ dependencies = [ [[package]] name = "libm" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" [[package]] name = "libredox" -version = "0.1.10" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "416f7e718bdb06000964960ffa43b4335ad4012ae8b99060261aa4a8088d5ccb" +checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "libc", - "redox_syscall 0.5.18", + "redox_syscall 0.7.1", ] [[package]] @@ -4629,15 +4950,6 @@ dependencies = [ "vcpkg", ] -[[package]] -name = "libz-rs-sys" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "840db8cf39d9ec4dd794376f38acc40d0fc65eec2a8f484f7fd375b84602becd" -dependencies = [ - "zlib-rs", -] - [[package]] name = "linux-raw-sys" version = "0.11.0" @@ -4649,16 +4961,16 @@ name = "liquid-cache-client" version = "0.1.10" source = "git+https://github.com/XiangpengHao/liquid-cache?rev=5e63d811132230a0e15fb6d4311be2eb5551cb4d#5e63d811132230a0e15fb6d4311be2eb5551cb4d" dependencies = [ - "arrow 57.2.0", + "arrow 57.3.0", "arrow-flight", - "arrow-schema 57.2.0", + "arrow-schema 57.3.0", "datafusion", "datafusion-proto", "fastrace", "fastrace-tonic", "futures", "liquid-cache-common", - "object_store 0.12.4", + "object_store 0.12.5", "tokio", "tonic", "tower", @@ -4675,10 +4987,10 @@ dependencies = [ "bytes", "chrono", "futures", - "object_store 0.12.4", + "object_store 0.12.5", "prost", "serde", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "url", ] @@ -4706,9 +5018,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.28" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "lru-slab" @@ -4772,9 +5084,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.6" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "mime" @@ -4794,9 +5106,9 @@ dependencies = [ [[package]] name = "mio" -version = "1.1.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69d83b0086dc8ecf3ce9ae2874b2d1290252e2a30720bea58a5c6639b0092873" +checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" dependencies = [ "libc", "wasi", @@ -4805,9 +5117,9 @@ dependencies = [ [[package]] name = "moka" -version = "0.12.12" +version = "0.12.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3dec6bd31b08944e08b58fd99373893a6c17054d6f3ea5006cc894f4f4eee2a" +checksum = "b4ac832c50ced444ef6be0767a008b02c106a909ba79d1d830501e94b96f6b7e" dependencies = [ "async-lock", "crossbeam-channel", @@ -4905,9 +5217,9 @@ dependencies = [ [[package]] name = "num-conv" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" [[package]] name = "num-integer" @@ -4960,6 +5272,28 @@ dependencies = [ "libc", ] +[[package]] +name = "num_enum" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c" +dependencies = [ + "num_enum_derive", + "rustversion", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.116", +] + [[package]] name = "num_threads" version = "0.1.7" @@ -4971,9 +5305,9 @@ dependencies = [ [[package]] name = "object" -version = "0.32.2" +version = "0.37.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" dependencies = [ "memchr", ] @@ -4997,7 +5331,7 @@ dependencies = [ "percent-encoding", "quick-xml 0.37.5", "rand 0.8.5", - "reqwest 0.12.24", + "reqwest 0.12.28", "ring", "serde", "serde_json", @@ -5010,9 +5344,9 @@ dependencies = [ [[package]] name = "object_store" -version = "0.12.4" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c1be0c6c22ec0817cdc77d3842f721a17fd30ab6965001415b5402a74e6b740" +checksum = "fbfbfff40aeccab00ec8a910b57ca8ecf4319b335c542f2edcd19dd25a1e2a00" dependencies = [ "async-trait", "base64 0.22.1", @@ -5020,7 +5354,7 @@ dependencies = [ "chrono", "form_urlencoded", "futures", - "http 1.3.1", + "http 1.4.0", "http-body-util", "humantime", "hyper 1.8.1", @@ -5030,12 +5364,12 @@ dependencies = [ "percent-encoding", "quick-xml 0.38.4", "rand 0.9.2", - "reqwest 0.12.24", + "reqwest 0.12.28", "ring", "serde", "serde_json", "serde_urlencoded", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tracing", "url", @@ -5075,15 +5409,15 @@ dependencies = [ "chrono", "crc32c", "futures", - "getrandom 0.2.16", - "http 1.3.1", + "getrandom 0.2.17", + "http 1.4.0", "http-body 1.0.1", "log", "md-5", "percent-encoding", "quick-xml 0.38.4", "reqsign", - "reqwest 0.12.24", + "reqwest 0.12.28", "serde", "serde_json", "tokio", @@ -5092,9 +5426,9 @@ dependencies = [ [[package]] name = "openssl-probe" -version = "0.1.6" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" [[package]] name = "opentelemetry" @@ -5106,7 +5440,7 @@ dependencies = [ "futures-sink", "js-sys", "pin-project-lite", - "thiserror 2.0.17", + "thiserror 2.0.18", "tracing", ] @@ -5118,9 +5452,9 @@ checksum = "d7a6d09a73194e6b66df7c8f1b680f156d916a1a942abf2de06823dd02b7855d" dependencies = [ "async-trait", "bytes", - "http 1.3.1", + "http 1.4.0", "opentelemetry", - "reqwest 0.12.24", + "reqwest 0.12.28", ] [[package]] @@ -5129,14 +5463,14 @@ version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2366db2dca4d2ad033cad11e6ee42844fd727007af5ad04a1730f4cb8163bf" dependencies = [ - "http 1.3.1", + "http 1.4.0", "opentelemetry", "opentelemetry-http", "opentelemetry-proto", "opentelemetry_sdk", "prost", - "reqwest 0.12.24", - "thiserror 2.0.17", + "reqwest 0.12.28", + "thiserror 2.0.18", "tokio", "tonic", "tracing", @@ -5167,7 +5501,7 @@ dependencies = [ "opentelemetry", "percent-encoding", "rand 0.9.2", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tokio-stream", ] @@ -5297,18 +5631,18 @@ dependencies = [ [[package]] name = "parquet" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6a2926a30477c0b95fea6c28c3072712b139337a242c2cc64817bdc20a8854" +checksum = "6ee96b29972a257b855ff2341b37e61af5f12d6af1158b6dcdb5b31ea07bb3cb" dependencies = [ "ahash 0.8.12", - "arrow-array 57.2.0", - "arrow-buffer 57.2.0", - "arrow-cast 57.2.0", - "arrow-data 57.2.0", - "arrow-ipc 57.2.0", - "arrow-schema 57.2.0", - "arrow-select 57.2.0", + "arrow-array 57.3.0", + "arrow-buffer 57.3.0", + "arrow-cast 57.3.0", + "arrow-data 57.3.0", + "arrow-ipc 57.3.0", + "arrow-schema 57.3.0", + "arrow-select 57.3.0", "base64 0.22.1", "brotli", "bytes", @@ -5316,12 +5650,12 @@ dependencies = [ "flate2", "futures", "half", - "hashbrown 0.16.0", + "hashbrown 0.16.1", "lz4_flex 0.12.0", "num-bigint", "num-integer", "num-traits", - "object_store 0.12.4", + "object_store 0.12.5", "paste", "seq-macro", "simdutf8", @@ -5354,7 +5688,7 @@ dependencies = [ "regex", "regex-syntax", "structmeta", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -5396,9 +5730,9 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" [[package]] name = "pest" -version = "2.8.5" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9eb05c21a464ea704b53158d358a31e6425db2f63a1a7312268b05fe2b75f7" +checksum = "e0848c601009d37dfa3430c4666e147e49cdcf1b92ecd3e63657d8a5f19da662" dependencies = [ "memchr", "ucd-trie", @@ -5406,9 +5740,9 @@ dependencies = [ [[package]] name = "pest_derive" -version = "2.8.5" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f9dbced329c441fa79d80472764b1a2c7e57123553b8519b36663a2fb234ed" +checksum = "11f486f1ea21e6c10ed15d5a7c77165d0ee443402f0780849d1768e7d9d6fe77" dependencies = [ "pest", "pest_generator", @@ -5416,37 +5750,27 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.8.5" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bb96d5051a78f44f43c8f712d8e810adb0ebf923fc9ed2655a7f66f63ba8ee5" +checksum = "8040c4647b13b210a963c1ed407c1ff4fdfa01c31d6d2a098218702e6664f94f" dependencies = [ "pest", "pest_meta", "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] name = "pest_meta" -version = "2.8.5" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "602113b5b5e8621770cfd490cfd90b9f84ab29bd2b0e49ad83eb6d186cef2365" +checksum = "89815c69d36021a140146f26659a81d6c2afa33d216d736dd4be5381a7362220" dependencies = [ "pest", "sha2", ] -[[package]] -name = "petgraph" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" -dependencies = [ - "fixedbitset", - "indexmap 2.12.0", -] - [[package]] name = "petgraph" version = "0.8.3" @@ -5455,7 +5779,7 @@ checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset", "hashbrown 0.15.5", - "indexmap 2.12.0", + "indexmap 2.13.0", "serde", ] @@ -5494,7 +5818,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -5550,9 +5874,9 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.13.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "potential_utf" @@ -5585,7 +5909,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -5594,7 +5918,7 @@ version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" dependencies = [ - "toml_edit 0.23.7", + "toml_edit 0.23.10+spec-1.0.0", ] [[package]] @@ -5616,23 +5940,23 @@ dependencies = [ "proc-macro-error-attr2", "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] name = "proc-macro2" -version = "1.0.103" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] [[package]] name = "prost" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" +checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" dependencies = [ "bytes", "prost-derive", @@ -5640,53 +5964,52 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac6c3320f9abac597dcbc668774ef006702672474aad53c6d596b62e487b40b1" +checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", "itertools 0.14.0", "log", "multimap", - "once_cell", - "petgraph 0.7.1", + "petgraph", "prettyplease", "prost", "prost-types", "pulldown-cmark", "pulldown-cmark-to-cmark", "regex", - "syn 2.0.110", + "syn 2.0.116", "tempfile", ] [[package]] name = "prost-derive" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" +checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] name = "prost-types" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9b4db3d6da204ed77bb26ba83b6122a73aeb2e87e25fbf7ad2e84c4ccbf8f72" +checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" dependencies = [ "prost", ] [[package]] name = "psm" -version = "0.1.28" +version = "0.1.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d11f2fedc3b7dafdc2851bc52f277377c5473d378859be234bc7ebb593144d01" +checksum = "3852766467df634d74f0b2d7819bf8dc483a0eb2e3b0f50f756f9cfe8b0d18d8" dependencies = [ "ar_archive_writer", "cc", @@ -5718,16 +6041,16 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e8bbe1a966bd2f362681a44f6edce3c2310ac21e4d5067a6e7ec396297a6ea0" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "memchr", "unicase", ] [[package]] name = "pulldown-cmark-to-cmark" -version = "21.1.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8246feae3db61428fd0bb94285c690b460e4517d83152377543ca802357785f1" +checksum = "50793def1b900256624a709439404384204a5dc3a6ec580281bfaac35e882e90" dependencies = [ "pulldown-cmark", ] @@ -5770,9 +6093,9 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls 0.23.35", - "socket2 0.6.1", - "thiserror 2.0.17", + "rustls 0.23.36", + "socket2 0.6.2", + "thiserror 2.0.18", "tokio", "tracing", "web-time", @@ -5791,10 +6114,10 @@ dependencies = [ "rand 0.9.2", "ring", "rustc-hash", - "rustls 0.23.35", + "rustls 0.23.36", "rustls-pki-types", "slab", - "thiserror 2.0.17", + "thiserror 2.0.18", "tinyvec", "tracing", "web-time", @@ -5809,16 +6132,16 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.1", + "socket2 0.6.2", "tracing", "windows-sys 0.60.2", ] [[package]] name = "quote" -version = "1.0.42" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" dependencies = [ "proc-macro2", ] @@ -5853,7 +6176,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha 0.9.0", - "rand_core 0.9.3", + "rand_core 0.9.5", ] [[package]] @@ -5873,7 +6196,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core 0.9.3", + "rand_core 0.9.5", ] [[package]] @@ -5882,18 +6205,38 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", ] [[package]] name = "rand_core" -version = "0.9.3" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" dependencies = [ "getrandom 0.3.4", ] +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "recursive" version = "0.1.1" @@ -5911,7 +6254,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -5955,7 +6298,16 @@ version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", +] + +[[package]] +name = "redox_syscall" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35985aa610addc02e24fc232012c86fd11f14111180f902b67e2d5331f8ebf2b" +dependencies = [ + "bitflags 2.11.0", ] [[package]] @@ -5975,14 +6327,14 @@ checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] name = "regex" -version = "1.12.2" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick", "memchr", @@ -5992,9 +6344,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", @@ -6003,15 +6355,15 @@ dependencies = [ [[package]] name = "regex-lite" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d942b98df5e658f56f20d592c7f868833fe38115e65c33003d8cd224b0155da" +checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" [[package]] name = "regex-syntax" -version = "0.8.8" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" [[package]] name = "relative-path" @@ -6039,16 +6391,16 @@ dependencies = [ "base64 0.22.1", "chrono", "form_urlencoded", - "getrandom 0.2.16", + "getrandom 0.2.17", "hex", "hmac", "home", - "http 1.3.1", + "http 1.4.0", "log", "percent-encoding", "quick-xml 0.37.5", "rand 0.8.5", - "reqwest 0.12.24", + "reqwest 0.12.28", "rust-ini", "serde", "serde_json", @@ -6059,18 +6411,17 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.24" +version = "0.12.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" dependencies = [ - "async-compression", "base64 0.22.1", "bytes", "futures-channel", "futures-core", "futures-util", - "h2 0.4.12", - "http 1.3.1", + "h2 0.4.13", + "http 1.4.0", "http-body 1.0.1", "http-body-util", "hyper 1.8.1", @@ -6081,7 +6432,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.35", + "rustls 0.23.36", "rustls-native-certs", "rustls-pki-types", "serde", @@ -6099,21 +6450,21 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", - "webpki-roots 1.0.4", + "webpki-roots 1.0.6", ] [[package]] name = "reqwest" -version = "0.13.1" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04e9018c9d814e5f30cc16a0f03271aeab3571e609612d9fe78c1aa8d11c2f62" +checksum = "ab3f43e3283ab1488b624b44b0e988d0acea0b3214e694730a055cb6b2efa801" dependencies = [ "base64 0.22.1", "bytes", "encoding_rs", "futures-core", - "h2 0.4.12", - "http 1.3.1", + "h2 0.4.13", + "http 1.4.0", "http-body 1.0.1", "http-body-util", "hyper 1.8.1", @@ -6125,7 +6476,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.35", + "rustls 0.23.36", "rustls-pki-types", "rustls-platform-verifier", "serde", @@ -6150,8 +6501,8 @@ checksum = "57f17d28a6e6acfe1733fe24bcd30774d13bffa4b8a22535b4c8c98423088d4e" dependencies = [ "anyhow", "async-trait", - "http 1.3.1", - "reqwest 0.12.24", + "http 1.4.0", + "reqwest 0.12.28", "serde", "thiserror 1.0.69", "tower-service", @@ -6166,11 +6517,11 @@ dependencies = [ "anyhow", "async-trait", "futures", - "getrandom 0.2.16", - "http 1.3.1", + "getrandom 0.2.17", + "http 1.4.0", "hyper 1.8.1", "parking_lot 0.11.2", - "reqwest 0.12.24", + "reqwest 0.12.28", "reqwest-middleware", "retry-policies", "thiserror 1.0.69", @@ -6196,7 +6547,7 @@ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if", - "getrandom 0.2.16", + "getrandom 0.2.17", "libc", "untrusted", "windows-sys 0.52.0", @@ -6204,9 +6555,9 @@ dependencies = [ [[package]] name = "rkyv" -version = "0.7.45" +version = "0.7.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b" +checksum = "2297bf9c81a3f0dc96bc9521370b88f054168c29826a75e89c55ff196e7ed6a1" dependencies = [ "bitvec", "bytecheck", @@ -6222,9 +6573,9 @@ dependencies = [ [[package]] name = "rkyv_derive" -version = "0.7.45" +version = "0.7.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "503d1d27590a2b0a3a4ca4c94755aa2875657196ecbf401a42eff41d7de532c0" +checksum = "84d7b42d4b8d06048d3ac8db0eb31bcb942cbeb709f0b5f2b2ebde398d3038f5" dependencies = [ "proc-macro2", "quote", @@ -6241,13 +6592,19 @@ dependencies = [ "byteorder", ] +[[package]] +name = "robust" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e27ee8bb91ca0adcf0ecb116293afa12d393f9c2b9b9cd54d33e8078fe19839" + [[package]] name = "ron" version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd490c5b18261893f14449cbd28cb9c0b637aebf161cd77900bfdedaff21ec32" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "once_cell", "serde", "serde_derive", @@ -6257,9 +6614,9 @@ dependencies = [ [[package]] name = "rsa" -version = "0.9.9" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40a0376c50d0358279d9d643e4bf7b7be212f1f4ff1da9070a7b54d22ef75c88" +checksum = "b8573f03f5883dcaebdfcf4725caa1ecb9c15b2ef50c43a07b816e06799bb12d" dependencies = [ "const-oid", "digest", @@ -6275,6 +6632,17 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rstar" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "421400d13ccfd26dfa5858199c30a5d76f9c54e0dba7575273025b43c5175dbb" +dependencies = [ + "heapless", + "num-traits", + "smallvec", +] + [[package]] name = "rstest" version = "0.26.1" @@ -6300,7 +6668,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.110", + "syn 2.0.116", "unicode-ident", ] @@ -6321,8 +6689,8 @@ dependencies = [ "arrow-ipc 54.3.1", "arrow-ipc 55.2.0", "arrow-ipc 56.2.0", - "arrow-json 57.2.0", - "arrow-schema 57.2.0", + "arrow-json 57.3.0", + "arrow-schema 57.3.0", "async-trait", "axum", "base64 0.22.1", @@ -6336,7 +6704,9 @@ dependencies = [ "duckdb", "futures", "gcp-bigquery-client", - "http 1.3.1", + "geodatafusion", + "geozero", + "http 1.4.0", "iceberg", "iceberg-catalog-glue", "iceberg-catalog-rest", @@ -6344,13 +6714,13 @@ dependencies = [ "liquid-cache-client", "log", "nanoid", - "object_store 0.12.4", + "object_store 0.12.5", "opentelemetry", "opentelemetry-otlp", "opentelemetry_sdk", "rand 0.8.5", "redis", - "reqwest 0.13.1", + "reqwest 0.13.2", "rust_decimal", "serde", "serde_json", @@ -6360,7 +6730,7 @@ dependencies = [ "tempfile", "testcontainers", "testcontainers-modules", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tokio-util", "toml 0.8.23", @@ -6386,9 +6756,9 @@ dependencies = [ [[package]] name = "rust_decimal" -version = "1.39.0" +version = "1.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35affe401787a9bd846712274d97654355d21b2a2c092a3139aabe31e9022282" +checksum = "61f703d19852dbf87cbc513643fa81428361eb6940f1ac14fd58155d295a3eb0" dependencies = [ "arrayvec", "borsh", @@ -6417,11 +6787,11 @@ dependencies = [ [[package]] name = "rustix" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "errno", "libc", "linux-raw-sys", @@ -6442,25 +6812,25 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.35" +version = "0.23.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" +checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" dependencies = [ "aws-lc-rs", "log", "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.103.8", + "rustls-webpki 0.103.9", "subtle", "zeroize", ] [[package]] name = "rustls-native-certs" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9980d917ebb0c0536119ba501e90834767bffc3d60641457fd84a1f3fd337923" +checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" dependencies = [ "openssl-probe", "rustls-pki-types", @@ -6479,9 +6849,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.13.0" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94182ad936a0c91c324cd46c6511b9510ed16af436d7b5bab34beab0afd55f7a" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" dependencies = [ "web-time", "zeroize", @@ -6498,10 +6868,10 @@ dependencies = [ "jni", "log", "once_cell", - "rustls 0.23.35", + "rustls 0.23.36", "rustls-native-certs", "rustls-platform-verifier-android", - "rustls-webpki 0.103.8", + "rustls-webpki 0.103.9", "security-framework", "security-framework-sys", "webpki-root-certs", @@ -6526,9 +6896,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.8" +version = "0.103.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" +checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" dependencies = [ "aws-lc-rs", "ring", @@ -6544,9 +6914,9 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" -version = "1.0.20" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" [[package]] name = "same-file" @@ -6580,9 +6950,9 @@ dependencies = [ [[package]] name = "schemars" -version = "1.1.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9558e172d4e8533736ba97870c4b2cd63f84b382a3d6eb063da41b91cce17289" +checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc" dependencies = [ "dyn-clone", "ref-cast", @@ -6596,6 +6966,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scroll" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1257cd4248b4132760d6524d6dda4e053bc648c9070b960929bf50cfb1e7add" + [[package]] name = "sct" version = "0.7.1" @@ -6614,11 +6990,11 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" [[package]] name = "security-framework" -version = "3.5.1" +version = "3.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" +checksum = "d17b898a6d6948c3a8ee4372c17cb384f90d2e6e912ef00895b14fd7ab54ec38" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "core-foundation 0.10.1", "core-foundation-sys", "libc", @@ -6627,9 +7003,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.15.0" +version = "2.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0" +checksum = "321c8673b092a9a42605034a9879d73cb79101ed5fd117bc9a597b89b4e9e61a" dependencies = [ "core-foundation-sys", "libc", @@ -6696,20 +7072,20 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] name = "serde_json" -version = "1.0.145" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "itoa", "memchr", - "ryu", "serde", "serde_core", + "zmij", ] [[package]] @@ -6731,7 +7107,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -6766,17 +7142,17 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.16.0" +version = "3.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10574371d41b0d9b2cff89418eda27da52bcaff2cc8741db26382a77c29131f1" +checksum = "4fa237f2807440d238e0364a218270b98f767a00d3dada77b1c53ae88940e2e7" dependencies = [ "base64 0.22.1", "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.12.0", + "indexmap 2.13.0", "schemars 0.9.0", - "schemars 1.1.0", + "schemars 1.2.1", "serde_core", "serde_json", "serde_with_macros", @@ -6785,14 +7161,14 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.16.0" +version = "3.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08a72d8216842fdd57820dc78d840bef99248e35fb2554ff923319e60f2d686b" +checksum = "52a8e3ca0ca629121f70ab50f95249e5a6f925cc0f6ffe8256c45b728875706c" dependencies = [ "darling 0.21.3", "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -6840,10 +7216,11 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "signal-hook-registry" -version = "1.4.6" +version = "1.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" dependencies = [ + "errno", "libc", ] @@ -6859,9 +7236,9 @@ dependencies = [ [[package]] name = "simd-adler32" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" [[package]] name = "simdutf8" @@ -6871,27 +7248,27 @@ checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" [[package]] name = "simple_asn1" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "297f631f50729c8c99b84667867963997ec0b50f32b2a7dbcab828ef0541e8bb" +checksum = "0d585997b0ac10be3c5ee635f1bab02d512760d14b7c468801ac8a01d9ae5f1d" dependencies = [ "num-bigint", "num-traits", - "thiserror 2.0.17", + "thiserror 2.0.18", "time", ] [[package]] name = "siphasher" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" [[package]] name = "slab" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" [[package]] name = "small_ctor" @@ -6926,7 +7303,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -6950,13 +7327,13 @@ dependencies = [ "log", "object_store 0.11.2", "regex", - "reqwest 0.12.24", + "reqwest 0.12.28", "reqwest-middleware", "reqwest-retry", "serde", "serde_json", "snowflake-jwt", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "url", "uuid", @@ -6989,14 +7366,26 @@ dependencies = [ [[package]] name = "socket2" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" +checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0" dependencies = [ "libc", "windows-sys 0.60.2", ] +[[package]] +name = "spade" +version = "2.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb313e1c8afee5b5647e00ee0fe6855e3d529eb863a0fdae1d60006c4d1e9990" +dependencies = [ + "hashbrown 0.15.5", + "num-traits", + "robust", + "smallvec", +] + [[package]] name = "spin" version = "0.9.8" @@ -7035,7 +7424,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -7071,17 +7460,17 @@ dependencies = [ "futures-util", "hashbrown 0.15.5", "hashlink", - "indexmap 2.12.0", + "indexmap 2.13.0", "log", "memchr", "once_cell", "percent-encoding", - "rustls 0.23.35", + "rustls 0.23.36", "serde", "serde_json", "sha2", "smallvec", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tokio-stream", "tracing", @@ -7099,7 +7488,7 @@ dependencies = [ "quote", "sqlx-core", "sqlx-macros-core", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -7122,7 +7511,7 @@ dependencies = [ "sqlx-mysql", "sqlx-postgres", "sqlx-sqlite", - "syn 2.0.110", + "syn 2.0.116", "tokio", "url", ] @@ -7136,7 +7525,7 @@ dependencies = [ "atoi", "base64 0.22.1", "bigdecimal", - "bitflags 2.10.0", + "bitflags 2.11.0", "byteorder", "bytes", "chrono", @@ -7166,7 +7555,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror 2.0.17", + "thiserror 2.0.18", "tracing", "whoami", ] @@ -7180,7 +7569,7 @@ dependencies = [ "atoi", "base64 0.22.1", "bigdecimal", - "bitflags 2.10.0", + "bitflags 2.11.0", "byteorder", "chrono", "crc", @@ -7206,7 +7595,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror 2.0.17", + "thiserror 2.0.18", "tracing", "whoami", ] @@ -7231,7 +7620,7 @@ dependencies = [ "serde", "serde_urlencoded", "sqlx-core", - "thiserror 2.0.17", + "thiserror 2.0.18", "tracing", "url", ] @@ -7244,9 +7633,9 @@ checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" [[package]] name = "stacker" -version = "0.1.22" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" +checksum = "08d74a23609d509411d10e2176dc2a4346e3b4aea2e7b1869f19fdedbc71c013" dependencies = [ "cc", "cfg-if", @@ -7281,7 +7670,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta-derive", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -7292,7 +7681,7 @@ checksum = "152a0b65a590ff6c3da95cabe2353ee04e6167c896b28e3b14478c2636c922fc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -7313,7 +7702,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -7335,9 +7724,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.110" +version = "2.0.116" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a99801b5bd34ede4cf3fc688c5919368fea4e4814a4664359503e6015b280aea" +checksum = "3df424c70518695237746f84cede799c9c58fcb37450d7b23716568cc8bc69cb" dependencies = [ "proc-macro2", "quote", @@ -7361,16 +7750,16 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] name = "system-configuration" -version = "0.6.1" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" +checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "core-foundation 0.9.4", "system-configuration-sys", ] @@ -7410,12 +7799,12 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.23.0" +version = "3.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" dependencies = [ "fastrand", - "getrandom 0.3.4", + "getrandom 0.4.1", "once_cell", "rustix", "windows-sys 0.61.2", @@ -7444,7 +7833,7 @@ dependencies = [ "serde", "serde_json", "serde_with", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tokio-stream", "tokio-util", @@ -7471,11 +7860,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ - "thiserror-impl 2.0.17", + "thiserror-impl 2.0.18", ] [[package]] @@ -7486,18 +7875,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] name = "thiserror-impl" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -7533,9 +7922,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.44" +version = "0.3.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" dependencies = [ "deranged", "itoa", @@ -7543,22 +7932,22 @@ dependencies = [ "num-conv", "num_threads", "powerfmt", - "serde", + "serde_core", "time-core", "time-macros", ] [[package]] name = "time-core" -version = "0.1.6" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" [[package]] name = "time-macros" -version = "0.2.24" +version = "0.2.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" dependencies = [ "num-conv", "time-core", @@ -7600,9 +7989,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.48.0" +version = "1.49.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" +checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" dependencies = [ "bytes", "libc", @@ -7610,7 +7999,7 @@ dependencies = [ "parking_lot 0.12.5", "pin-project-lite", "signal-hook-registry", - "socket2 0.6.1", + "socket2 0.6.2", "tokio-macros", "windows-sys 0.61.2", ] @@ -7623,7 +8012,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -7642,15 +8031,15 @@ version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls 0.23.35", + "rustls 0.23.36", "tokio", ] [[package]] name = "tokio-stream" -version = "0.1.17" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" dependencies = [ "futures-core", "pin-project-lite", @@ -7659,9 +8048,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.17" +version = "0.7.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" dependencies = [ "bytes", "futures-core", @@ -7684,9 +8073,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.9.10+spec-1.1.0" +version = "0.9.12+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0825052159284a1a8b4d6c0c86cbc801f2da5afd2b225fa548c72f2e74002f48" +checksum = "cf92845e79fc2e2def6a5d828f0801e29a2f8acc037becc5ab08595c7d5e9863" dependencies = [ "serde_core", "serde_spanned 1.0.4", @@ -7719,7 +8108,7 @@ version = "0.22.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" dependencies = [ - "indexmap 2.12.0", + "indexmap 2.13.0", "serde", "serde_spanned 0.6.9", "toml_datetime 0.6.11", @@ -7729,11 +8118,11 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.23.7" +version = "0.23.10+spec-1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d" +checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269" dependencies = [ - "indexmap 2.12.0", + "indexmap 2.13.0", "toml_datetime 0.7.5+spec-1.1.0", "toml_parser", "winnow", @@ -7741,9 +8130,9 @@ dependencies = [ [[package]] name = "toml_parser" -version = "1.0.6+spec-1.1.0" +version = "1.0.9+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44" +checksum = "702d4415e08923e7e1ef96cd5727c0dfed80b4d2fa25db9647fe5eb6f7c5a4c4" dependencies = [ "winnow", ] @@ -7756,17 +8145,17 @@ checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" [[package]] name = "tonic" -version = "0.14.2" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb7613188ce9f7df5bfe185db26c5814347d110db17920415cf2fbcad85e7203" +checksum = "7f32a6f80051a4111560201420c7885d0082ba9efe2ab61875c587bb6b18b9a0" dependencies = [ "async-trait", "axum", "base64 0.22.1", "bytes", "flate2", - "h2 0.4.12", - "http 1.3.1", + "h2 0.4.13", + "http 1.4.0", "http-body 1.0.1", "http-body-util", "hyper 1.8.1", @@ -7775,7 +8164,7 @@ dependencies = [ "percent-encoding", "pin-project", "rustls-native-certs", - "socket2 0.6.1", + "socket2 0.6.2", "sync_wrapper", "tokio", "tokio-rustls 0.26.4", @@ -7789,21 +8178,21 @@ dependencies = [ [[package]] name = "tonic-build" -version = "0.14.3" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27aac809edf60b741e2d7db6367214d078856b8a5bff0087e94ff330fb97b6fc" +checksum = "ce6d8958ed3be404120ca43ffa0fb1e1fc7be214e96c8d33bd43a131b6eebc9e" dependencies = [ "prettyplease", "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] name = "tonic-prost" -version = "0.14.2" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66bd50ad6ce1252d87ef024b3d64fe4c3cf54a86fb9ef4c631fdd0ded7aeaa67" +checksum = "9f86539c0089bfd09b1f8c0ab0239d80392af74c21bc9e0f15e1b4aca4c1647f" dependencies = [ "bytes", "prost", @@ -7812,29 +8201,29 @@ dependencies = [ [[package]] name = "tonic-prost-build" -version = "0.14.3" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4556786613791cfef4ed134aa670b61a85cfcacf71543ef33e8d801abae988f" +checksum = "65873ace111e90344b8973e94a1fc817c924473affff24629281f90daed1cd2e" dependencies = [ "prettyplease", "proc-macro2", "prost-build", "prost-types", "quote", - "syn 2.0.110", + "syn 2.0.116", "tempfile", "tonic-build", ] [[package]] name = "tower" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" dependencies = [ "futures-core", "futures-util", - "indexmap 2.12.0", + "indexmap 2.13.0", "pin-project-lite", "slab", "sync_wrapper", @@ -7851,13 +8240,18 @@ version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ - "bitflags 2.10.0", + "async-compression", + "bitflags 2.11.0", "bytes", + "futures-core", "futures-util", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", + "http-body-util", "iri-string", "pin-project-lite", + "tokio", + "tokio-util", "tower", "tower-layer", "tower-service", @@ -7896,7 +8290,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -7995,7 +8389,7 @@ checksum = "3c36781cc0e46a83726d9879608e4cf6c2505237e263a8eb8c24502989cfdb28" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -8030,9 +8424,9 @@ checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" [[package]] name = "unicode-ident" -version = "1.0.22" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "unicode-normalization" @@ -8061,6 +8455,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + [[package]] name = "universal-hash" version = "0.5.1" @@ -8079,18 +8479,17 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "ureq" -version = "3.1.4" +version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d39cb1dbab692d82a977c0392ffac19e188bd9186a9f32806f0aaa859d75585a" +checksum = "fdc97a28575b85cfedf2a7e7d3cc64b3e11bd8ac766666318003abbacc7a21fc" dependencies = [ "base64 0.22.1", "log", "percent-encoding", - "rustls 0.23.35", + "rustls 0.23.36", "rustls-pki-types", "ureq-proto", "utf-8", - "webpki-roots 1.0.4", ] [[package]] @@ -8100,21 +8499,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d81f9efa9df032be5934a46a068815a10a042b494b6a58cb0a1a97bb5467ed6f" dependencies = [ "base64 0.22.1", - "http 1.3.1", + "http 1.4.0", "httparse", "log", ] [[package]] name = "url" -version = "2.5.7" +version = "2.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" dependencies = [ "form_urlencoded", "idna", "percent-encoding", "serde", + "serde_derive", ] [[package]] @@ -8143,13 +8543,13 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.18.1" +version = "1.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" +checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" dependencies = [ - "getrandom 0.3.4", + "getrandom 0.4.1", "js-sys", - "serde", + "serde_core", "wasm-bindgen", ] @@ -8204,9 +8604,18 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasip2" -version = "1.0.1+wasi-0.2.4" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" dependencies = [ "wit-bindgen", ] @@ -8219,9 +8628,9 @@ checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" [[package]] name = "wasm-bindgen" -version = "0.2.105" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60" +checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" dependencies = [ "cfg-if", "once_cell", @@ -8232,11 +8641,12 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.55" +version = "0.4.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "551f88106c6d5e7ccc7cd9a16f312dd3b5d36ea8b4954304657d5dfba115d4a0" +checksum = "70a6e77fd0ae8029c9ea0063f87c46fde723e7d887703d74ad2616d792e51e6f" dependencies = [ "cfg-if", + "futures-util", "js-sys", "once_cell", "wasm-bindgen", @@ -8245,9 +8655,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.105" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2" +checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -8255,26 +8665,48 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.105" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc" +checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.105" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76" +checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" dependencies = [ "unicode-ident", ] +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap 2.13.0", + "wasm-encoder", + "wasmparser", +] + [[package]] name = "wasm-streams" version = "0.4.2" @@ -8303,11 +8735,23 @@ dependencies = [ "web-sys", ] +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags 2.11.0", + "hashbrown 0.15.5", + "indexmap 2.13.0", + "semver", +] + [[package]] name = "web-sys" -version = "0.3.82" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1" +checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" dependencies = [ "js-sys", "wasm-bindgen", @@ -8325,9 +8769,9 @@ dependencies = [ [[package]] name = "webpki-root-certs" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36a29fc0408b113f68cf32637857ab740edfafdf460c326cd2afaa2d84cc05dc" +checksum = "804f18a4ac2676ffb4e8b5b5fa9ae38af06df08162314f96a68d2a363e21a8ca" dependencies = [ "rustls-pki-types", ] @@ -8338,14 +8782,14 @@ version = "0.26.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" dependencies = [ - "webpki-roots 1.0.4", + "webpki-roots 1.0.6", ] [[package]] name = "webpki-roots" -version = "1.0.4" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2878ef029c47c6e8cf779119f20fcf52bde7ad42a731b2a304bc221df17571e" +checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" dependencies = [ "rustls-pki-types", ] @@ -8412,7 +8856,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -8423,7 +8867,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -8760,18 +9204,125 @@ checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] name = "winnow" -version = "0.7.13" +version = "0.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" +checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" dependencies = [ "memchr", ] [[package]] name = "wit-bindgen" -version = "0.46.0" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap 2.13.0", + "prettyplease", + "syn 2.0.116", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn 2.0.116", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags 2.11.0", + "indexmap 2.13.0", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap 2.13.0", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "wkb" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a120b336c7ad17749026d50427c23d838ecb50cd64aaea6254b5030152f890a9" +dependencies = [ + "byteorder", + "geo-traits", + "num_enum", + "thiserror 1.0.69", +] + +[[package]] +name = "wkt" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efb2b923ccc882312e559ffaa832a055ba9d1ac0cc8e86b3e25453247e4b81d7" +dependencies = [ + "geo-traits", + "geo-types", + "log", + "num-traits", + "thiserror 1.0.69", +] [[package]] name = "writeable" @@ -8843,7 +9394,7 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", "synstructure", ] @@ -8855,18 +9406,18 @@ checksum = "ef19a12dfb29fe39f78e1547e1be49717b84aef8762a4001359ed4f94d3accc1" dependencies = [ "async-trait", "base64 0.22.1", - "http 1.3.1", + "http 1.4.0", "http-body-util", "hyper 1.8.1", "hyper-rustls 0.27.7", "hyper-util", "log", "percent-encoding", - "rustls 0.23.35", + "rustls 0.23.36", "seahash", "serde", "serde_json", - "thiserror 2.0.17", + "thiserror 2.0.18", "time", "tokio", "url", @@ -8874,22 +9425,22 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.27" +version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.27" +version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -8909,7 +9460,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", "synstructure", ] @@ -8949,7 +9500,7 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.110", + "syn 2.0.116", ] [[package]] @@ -8961,16 +9512,22 @@ dependencies = [ "arbitrary", "crc32fast", "flate2", - "indexmap 2.12.0", + "indexmap 2.13.0", "memchr", "zopfli", ] [[package]] name = "zlib-rs" -version = "0.5.2" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c745c48e1007337ed136dc99df34128b9faa6ed542d80a1c673cf55a6d7236c8" + +[[package]] +name = "zmij" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f06ae92f42f5e5c42443fd094f245eb656abf56dd7cce9b8b263236565e00f2" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" [[package]] name = "zopfli" diff --git a/Cargo.toml b/Cargo.toml index 7e61e82..85f1490 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,8 @@ path = "src/bin/server.rs" datafusion = "51.0" datafusion-tracing = "51.0.0" instrumented-object-store = "52.0.0" +geodatafusion = "0.2" +geozero = { version = "0.15", features = ["with-wkb"] } duckdb = { version = "1.4.4", features = ["bundled"] } sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "sqlite", "mysql", "chrono", "tls-rustls", "bigdecimal"] } bigdecimal = "0.4" diff --git a/src/datafetch/batch_writer.rs b/src/datafetch/batch_writer.rs index 37718af..1d9a932 100644 --- a/src/datafetch/batch_writer.rs +++ b/src/datafetch/batch_writer.rs @@ -1,6 +1,8 @@ use datafusion::arrow::datatypes::Schema; use datafusion::arrow::record_batch::RecordBatch; +use std::collections::HashMap; +use super::types::GeometryColumnInfo; use super::DataFetchError; /// Summary returned when a BatchWriter is closed. @@ -16,8 +18,9 @@ pub struct BatchWriteResult { /// /// Implementors must follow this lifecycle: /// 1. `init(schema)` - Initialize with the Arrow schema (must be called first) -/// 2. `write_batch(batch)` - Write batches (can be called zero or more times) -/// 3. `close()` - Finalize and return metadata (consumes the writer) +/// 2. Optionally call `set_geometry_columns()` to enable GeoParquet metadata +/// 3. `write_batch(batch)` - Write batches (can be called zero or more times) +/// 4. `close()` - Finalize and return metadata (consumes the writer) /// /// All methods are synchronous. When used in async contexts, callers should /// ensure writes are batched to minimize blocking time. @@ -25,6 +28,13 @@ pub trait BatchWriter: Send { /// Initialize the writer with the schema for the data to be written. fn init(&mut self, schema: &Schema) -> Result<(), DataFetchError>; + /// Set geometry column metadata for GeoParquet support. + /// Must be called before `init()` for the metadata to be included. + /// The map key is the column name. + fn set_geometry_columns(&mut self, _columns: HashMap) { + // Default implementation does nothing - non-GeoParquet writers can ignore this + } + /// Write a single RecordBatch. May be called multiple times. fn write_batch(&mut self, batch: &RecordBatch) -> Result<(), DataFetchError>; diff --git a/src/datafetch/mod.rs b/src/datafetch/mod.rs index ad335ca..c14dee5 100644 --- a/src/datafetch/mod.rs +++ b/src/datafetch/mod.rs @@ -10,4 +10,7 @@ pub use error::DataFetchError; pub use fetcher::DataFetcher; pub use native::{NativeFetcher, StreamingParquetWriter}; pub use orchestrator::FetchOrchestrator; -pub use types::{deserialize_arrow_schema, ColumnMetadata, TableMetadata}; +pub use types::{ + deserialize_arrow_schema, extract_geometry_columns, ColumnMetadata, GeometryColumnInfo, + TableMetadata, GEOMETRY_COLUMNS_METADATA_KEY, +}; diff --git a/src/datafetch/native/duckdb.rs b/src/datafetch/native/duckdb.rs index e2bd36d..85739dd 100644 --- a/src/datafetch/native/duckdb.rs +++ b/src/datafetch/native/duckdb.rs @@ -141,6 +141,7 @@ fn discover_tables_sync( table_name: table, table_type, columns: vec![column], + geometry_columns: std::collections::HashMap::new(), }); } @@ -344,10 +345,24 @@ pub fn duckdb_type_to_arrow(duckdb_type: &str) -> datafusion::arrow::datatypes:: "UUID" => DataType::Utf8, "JSON" => DataType::Utf8, "INTERVAL" => DataType::Interval(datafusion::arrow::datatypes::IntervalUnit::MonthDayNano), + // DuckDB Spatial extension types - stored as Binary (WKB format) + "GEOMETRY" | "POINT_2D" | "LINESTRING_2D" | "POLYGON_2D" | "BOX_2D" | "WKB_BLOB" => { + DataType::Binary + } _ => DataType::Utf8, // Default fallback } } +/// Check if a DuckDB type is a spatial type +pub fn is_spatial_type(duckdb_type: &str) -> bool { + let type_upper = duckdb_type.to_uppercase(); + let base_type = type_upper.split('(').next().unwrap_or(&type_upper).trim(); + matches!( + base_type, + "GEOMETRY" | "POINT_2D" | "LINESTRING_2D" | "POLYGON_2D" | "BOX_2D" | "WKB_BLOB" + ) +} + /// Parse DECIMAL(precision, scale) parameters from type string. /// Returns Decimal128 with extracted parameters, or defaults to (38, 10) if parsing fails. fn parse_decimal_params(type_str: &str) -> datafusion::arrow::datatypes::DataType { diff --git a/src/datafetch/native/iceberg.rs b/src/datafetch/native/iceberg.rs index 58116ad..d72b083 100644 --- a/src/datafetch/native/iceberg.rs +++ b/src/datafetch/native/iceberg.rs @@ -175,6 +175,7 @@ pub async fn discover_tables( table_name: table_ident.name().to_string(), table_type: "BASE TABLE".to_string(), columns, + geometry_columns: std::collections::HashMap::new(), }); } } diff --git a/src/datafetch/native/mysql.rs b/src/datafetch/native/mysql.rs index afe5223..98254ee 100644 --- a/src/datafetch/native/mysql.rs +++ b/src/datafetch/native/mysql.rs @@ -168,6 +168,7 @@ pub async fn discover_tables( table_name: table, table_type, columns: vec![column], + geometry_columns: std::collections::HashMap::new(), }); } } @@ -187,13 +188,6 @@ pub async fn fetch_table( let options = resolve_connect_options(source, secrets).await?; let mut conn = connect_with_ssl_retry(options).await?; - // Build query - use backticks for MySQL identifier escaping - let query = format!( - "SELECT * FROM `{}`.`{}`", - schema.replace('`', "``"), - table.replace('`', "``") - ); - const BATCH_SIZE: usize = 10_000; // Query information_schema for accurate column metadata (especially nullable). @@ -223,22 +217,43 @@ pub async fn fetch_table( ))); } - let fields: Vec = schema_rows - .iter() - .map(|row| { - let col_name: String = row.get(0); - let data_type: String = row.get(1); - let is_nullable: String = row.get(2); - Field::new( - col_name, - mysql_type_to_arrow(&data_type), - is_nullable.to_uppercase() == "YES", - ) - }) - .collect(); + // Build column list with ST_AsBinary for spatial columns + let mut fields: Vec = Vec::with_capacity(schema_rows.len()); + let mut column_exprs: Vec = Vec::with_capacity(schema_rows.len()); + + for row in &schema_rows { + let col_name: String = row.get(0); + let data_type: String = row.get(1); + let is_nullable: String = row.get(2); + + let arrow_type = mysql_type_to_arrow(&data_type); + fields.push(Field::new( + &col_name, + arrow_type.clone(), + is_nullable.to_uppercase() == "YES", + )); + + // Escape column name for SQL + let escaped_col = format!("`{}`", col_name.replace('`', "``")); + + // Use ST_AsBinary for spatial columns to get WKB format + if is_spatial_type(&data_type) { + column_exprs.push(format!("ST_AsBinary({}) AS {}", escaped_col, escaped_col)); + } else { + column_exprs.push(escaped_col); + } + } let arrow_schema = Schema::new(fields); + // Build query with explicit column list + let query = format!( + "SELECT {} FROM `{}`.`{}`", + column_exprs.join(", "), + schema.replace('`', "``"), + table.replace('`', "``") + ); + // Stream rows instead of loading all into memory let mut stream = sqlx::query(&query).fetch(&mut conn); @@ -366,10 +381,31 @@ pub fn mysql_type_to_arrow(mysql_type: &str) -> DataType { "year" => DataType::Int32, "json" => DataType::Utf8, "bit" => DataType::Binary, + // MySQL spatial types - stored as Binary (WKB format) + "geometry" | "point" | "linestring" | "polygon" | "multipoint" | "multilinestring" + | "multipolygon" | "geometrycollection" | "geomcollection" => DataType::Binary, _ => DataType::Utf8, // Default fallback } } +/// Check if a MySQL type is a spatial type +pub fn is_spatial_type(mysql_type: &str) -> bool { + let type_lower = mysql_type.to_lowercase(); + let base_type = type_lower.split('(').next().unwrap_or(&type_lower).trim(); + matches!( + base_type, + "geometry" + | "point" + | "linestring" + | "polygon" + | "multipoint" + | "multilinestring" + | "multipolygon" + | "geometrycollection" + | "geomcollection" + ) +} + /// Parse a decimal string to i128 with the given precision and scale. /// For example, "123.45" with precision=10, scale=2 becomes 12345. /// @@ -1105,8 +1141,30 @@ mod tests { DataType::Utf8 )); assert!(matches!(mysql_type_to_arrow("custom_type"), DataType::Utf8)); - assert!(matches!(mysql_type_to_arrow("geometry"), DataType::Utf8)); - assert!(matches!(mysql_type_to_arrow("point"), DataType::Utf8)); + // MySQL spatial types map to Binary (WKB format) + assert!(matches!(mysql_type_to_arrow("geometry"), DataType::Binary)); + assert!(matches!(mysql_type_to_arrow("point"), DataType::Binary)); + assert!(matches!( + mysql_type_to_arrow("linestring"), + DataType::Binary + )); + assert!(matches!(mysql_type_to_arrow("polygon"), DataType::Binary)); + assert!(matches!( + mysql_type_to_arrow("multipoint"), + DataType::Binary + )); + assert!(matches!( + mysql_type_to_arrow("multilinestring"), + DataType::Binary + )); + assert!(matches!( + mysql_type_to_arrow("multipolygon"), + DataType::Binary + )); + assert!(matches!( + mysql_type_to_arrow("geometrycollection"), + DataType::Binary + )); } // ========================================================================= diff --git a/src/datafetch/native/parquet_writer.rs b/src/datafetch/native/parquet_writer.rs index eb7a3f7..1906a5d 100644 --- a/src/datafetch/native/parquet_writer.rs +++ b/src/datafetch/native/parquet_writer.rs @@ -1,35 +1,30 @@ -//! Centralized streaming Parquet writer with configurable compression +//! Centralized streaming Parquet writer with configurable compression and GeoParquet support use datafusion::arrow::datatypes::Schema; use datafusion::arrow::record_batch::RecordBatch; use datafusion::parquet::arrow::ArrowWriter; use datafusion::parquet::basic::Compression; use datafusion::parquet::file::properties::{BloomFilterPosition, WriterProperties, WriterVersion}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; use std::fs::File; use std::path::{Path, PathBuf}; use std::sync::Arc; use crate::datafetch::batch_writer::{BatchWriteResult, BatchWriter}; +use crate::datafetch::types::GeometryColumnInfo; use crate::datafetch::DataFetchError; -/// Build shared writer properties for parquet files. -fn writer_properties() -> WriterProperties { - WriterProperties::builder() - .set_writer_version(WriterVersion::PARQUET_2_0) - .set_compression(Compression::LZ4) - .set_bloom_filter_enabled(true) - .set_bloom_filter_fpp(0.01) - .set_bloom_filter_position(BloomFilterPosition::End) - .build() -} - /// Streaming Parquet writer that writes batches incrementally to disk. +/// Supports GeoParquet metadata for geometry columns. /// -/// Lifecycle: new(path) -> init(schema) -> write_batch()* -> close() +/// Lifecycle: new(path) -> set_geometry_columns() (optional) -> init(schema) -> write_batch()* -> close() pub struct StreamingParquetWriter { path: PathBuf, writer: Option>, row_count: usize, + /// Geometry column info for GeoParquet metadata + geometry_columns: HashMap, } impl StreamingParquetWriter { @@ -40,6 +35,7 @@ impl StreamingParquetWriter { path, writer: None, row_count: 0, + geometry_columns: HashMap::new(), } } @@ -49,7 +45,113 @@ impl StreamingParquetWriter { } } +/// GeoParquet metadata structure (version 1.1.0) +#[derive(Debug, Clone, Serialize, Deserialize)] +struct GeoParquetMetadata { + version: String, + primary_column: String, + columns: HashMap, +} + +/// Per-column GeoParquet metadata +#[derive(Debug, Clone, Serialize, Deserialize)] +struct GeoColumnMetadata { + encoding: String, + #[serde(skip_serializing_if = "Option::is_none")] + geometry_types: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + crs: Option, +} + +/// CRS metadata in PROJJSON-style format (simplified for EPSG codes) +#[derive(Debug, Clone, Serialize, Deserialize)] +struct CrsMetadata { + id: CrsId, +} + +/// CRS identifier +#[derive(Debug, Clone, Serialize, Deserialize)] +struct CrsId { + authority: String, + code: i32, +} + +impl GeoParquetMetadata { + /// Create GeoParquet metadata from geometry column info + fn from_geometry_columns(columns: &HashMap) -> Option { + if columns.is_empty() { + return None; + } + + // Find the first geometry column to be the primary + let primary_column = columns.keys().next()?.clone(); + + let geo_columns: HashMap = columns + .iter() + .map(|(name, info)| { + let crs = if info.srid > 0 { + Some(CrsMetadata { + id: CrsId { + authority: "EPSG".to_string(), + code: info.srid, + }, + }) + } else { + None + }; + + let geometry_types = info + .geometry_type + .as_ref() + .map(|t| vec![normalize_geometry_type(t)]); + + ( + name.clone(), + GeoColumnMetadata { + encoding: "WKB".to_string(), + geometry_types, + crs, + }, + ) + }) + .collect(); + + Some(GeoParquetMetadata { + version: "1.1.0".to_string(), + primary_column, + columns: geo_columns, + }) + } + + /// Serialize to JSON string + fn to_json(&self) -> Result { + serde_json::to_string(self).map_err(|e| { + DataFetchError::Storage(format!("Failed to serialize GeoParquet metadata: {}", e)) + }) + } +} + +/// Normalize geometry type names to GeoParquet standard format +fn normalize_geometry_type(geom_type: &str) -> String { + match geom_type.to_uppercase().as_str() { + "POINT" => "Point", + "LINESTRING" => "LineString", + "POLYGON" => "Polygon", + "MULTIPOINT" => "MultiPoint", + "MULTILINESTRING" => "MultiLineString", + "MULTIPOLYGON" => "MultiPolygon", + "GEOMETRYCOLLECTION" => "GeometryCollection", + "GEOMETRY" => "Geometry", + _ => geom_type, + } + .to_string() +} + impl BatchWriter for StreamingParquetWriter { + fn set_geometry_columns(&mut self, columns: HashMap) { + self.geometry_columns = columns; + } + fn init(&mut self, schema: &Schema) -> Result<(), DataFetchError> { // Create parent directories if needed if let Some(parent) = self.path.parent() { @@ -61,7 +163,23 @@ impl BatchWriter for StreamingParquetWriter { let file = File::create(&self.path) .map_err(|e| DataFetchError::Storage(format!("Failed to create file: {}", e)))?; - let props = writer_properties(); + // Build writer properties with bloom filters and optional GeoParquet metadata + let mut props_builder = WriterProperties::builder() + .set_writer_version(WriterVersion::PARQUET_2_0) + .set_compression(Compression::LZ4) + .set_bloom_filter_enabled(true) + .set_bloom_filter_fpp(0.01) + .set_bloom_filter_position(BloomFilterPosition::End); + + // Add GeoParquet metadata if geometry columns are present + if let Some(geo_meta) = GeoParquetMetadata::from_geometry_columns(&self.geometry_columns) { + let geo_json = geo_meta.to_json()?; + props_builder = props_builder.set_key_value_metadata(Some(vec![ + datafusion::parquet::file::metadata::KeyValue::new("geo".to_string(), geo_json), + ])); + } + + let props = props_builder.build(); let writer = ArrowWriter::try_new(file, Arc::new(schema.clone()), Some(props)) .map_err(|e| DataFetchError::Storage(e.to_string()))?; @@ -105,8 +223,10 @@ impl BatchWriter for StreamingParquetWriter { #[cfg(test)] mod tests { use super::*; - use datafusion::arrow::array::Int32Array; + use crate::datafetch::types::GeometryColumnInfo; + use datafusion::arrow::array::{BinaryArray, Int32Array}; use datafusion::arrow::datatypes::{DataType, Field}; + use datafusion::parquet::file::reader::{FileReader, SerializedFileReader}; use tempfile::tempdir; #[test] @@ -341,4 +461,139 @@ mod tests { "Expected bloom filter to be present in parquet metadata" ); } + + #[test] + fn test_geoparquet_metadata() { + let dir = tempdir().unwrap(); + let path = dir.path().join("geoparquet.parquet"); + + // Schema with geometry column + let schema = Schema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("geom", DataType::Binary, true), + ]); + + // Create writer with geometry column info + let mut writer = StreamingParquetWriter::new(path.clone()); + + // Set geometry column metadata + let mut geom_cols = HashMap::new(); + geom_cols.insert( + "geom".to_string(), + GeometryColumnInfo { + srid: 4326, + geometry_type: Some("Point".to_string()), + }, + ); + writer.set_geometry_columns(geom_cols); + + // Init and write some data + writer.init(&schema).unwrap(); + + // Simple WKB for POINT(0 0) + let wkb_point: Vec = vec![ + 0x01, // little endian + 0x01, 0x00, 0x00, 0x00, // type = Point (1) + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // x = 0.0 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // y = 0.0 + ]; + + let batch = RecordBatch::try_new( + Arc::new(schema), + vec![ + Arc::new(Int32Array::from(vec![1])), + Arc::new(BinaryArray::from(vec![Some(wkb_point.as_slice())])), + ], + ) + .unwrap(); + + writer.write_batch(&batch).unwrap(); + Box::new(writer).close().unwrap(); + + // Verify GeoParquet metadata was written + let file = File::open(&path).unwrap(); + let reader = SerializedFileReader::new(file).unwrap(); + let file_metadata = reader.metadata().file_metadata(); + + // Find the "geo" key in file metadata + let geo_metadata = file_metadata + .key_value_metadata() + .and_then(|kv| kv.iter().find(|item| item.key == "geo")); + + assert!( + geo_metadata.is_some(), + "GeoParquet 'geo' metadata not found" + ); + + let geo_value = geo_metadata.unwrap().value.as_ref().unwrap(); + + // Parse and verify the GeoParquet metadata + let parsed: serde_json::Value = serde_json::from_str(geo_value).unwrap(); + + assert_eq!(parsed["version"], "1.1.0"); + assert_eq!(parsed["primary_column"], "geom"); + assert_eq!(parsed["columns"]["geom"]["encoding"], "WKB"); + assert_eq!(parsed["columns"]["geom"]["crs"]["id"]["authority"], "EPSG"); + assert_eq!(parsed["columns"]["geom"]["crs"]["id"]["code"], 4326); + } + + #[test] + fn test_geoparquet_metadata_serialization() { + let mut columns = HashMap::new(); + columns.insert( + "the_geom".to_string(), + GeometryColumnInfo { + srid: 4326, + geometry_type: Some("Polygon".to_string()), + }, + ); + + let geo_meta = GeoParquetMetadata::from_geometry_columns(&columns); + assert!(geo_meta.is_some()); + + let meta = geo_meta.unwrap(); + assert_eq!(meta.version, "1.1.0"); + assert_eq!(meta.primary_column, "the_geom"); + + let json = meta.to_json().unwrap(); + assert!(json.contains("\"version\":\"1.1.0\"")); + assert!(json.contains("\"encoding\":\"WKB\"")); + assert!(json.contains("\"EPSG\"")); + assert!(json.contains("4326")); + } + + #[test] + fn test_no_geoparquet_metadata_without_geometry_columns() { + let dir = tempdir().unwrap(); + let path = dir.path().join("regular.parquet"); + + // Schema without geometry column + let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]); + + let mut writer: Box = Box::new(StreamingParquetWriter::new(path.clone())); + writer.init(&schema).unwrap(); + + let batch = RecordBatch::try_new( + Arc::new(schema), + vec![Arc::new(Int32Array::from(vec![1, 2, 3]))], + ) + .unwrap(); + + writer.write_batch(&batch).unwrap(); + writer.close().unwrap(); + + // Verify no GeoParquet metadata + let file = File::open(&path).unwrap(); + let reader = SerializedFileReader::new(file).unwrap(); + let file_metadata = reader.metadata().file_metadata(); + + let geo_metadata = file_metadata + .key_value_metadata() + .and_then(|kv| kv.iter().find(|item| item.key == "geo")); + + assert!( + geo_metadata.is_none(), + "Regular parquet should not have 'geo' metadata" + ); + } } diff --git a/src/datafetch/native/postgres.rs b/src/datafetch/native/postgres.rs index 8abfabd..ac4c80e 100644 --- a/src/datafetch/native/postgres.rs +++ b/src/datafetch/native/postgres.rs @@ -16,9 +16,11 @@ use tracing::warn; use urlencoding::encode; use crate::datafetch::batch_writer::BatchWriter; +use crate::datafetch::types::GeometryColumnInfo; use crate::datafetch::{ColumnMetadata, DataFetchError, TableMetadata}; use crate::secrets::SecretManager; use crate::source::Source; +use std::collections::HashMap; /// Build a PostgreSQL connection string from source configuration and resolved password. fn build_connection_string( @@ -111,6 +113,10 @@ pub async fn discover_tables( let connection_string = resolve_connection_string(source, secrets).await?; let mut conn = connect_with_ssl_retry(&connection_string).await?; + // Query geometry column metadata from PostGIS if available + // This includes SRID and geometry type information + let geometry_info = discover_geometry_columns(&mut conn).await; + let rows = sqlx::query( r#" SELECT @@ -119,7 +125,7 @@ pub async fn discover_tables( t.table_name, t.table_type, c.column_name, - c.data_type, + c.udt_name, c.is_nullable, c.ordinal_position::int FROM information_schema.tables t @@ -142,13 +148,13 @@ pub async fn discover_tables( let table: String = row.get(2); let table_type: String = row.get(3); let col_name: String = row.get(4); - let data_type: String = row.get(5); + let udt_name: String = row.get(5); let is_nullable: String = row.get(6); let ordinal: i32 = row.get(7); let column = ColumnMetadata { - name: col_name, - data_type: pg_type_to_arrow(&data_type), + name: col_name.clone(), + data_type: pg_type_to_arrow(&udt_name), nullable: is_nullable.to_uppercase() == "YES", ordinal_position: ordinal, }; @@ -160,12 +166,17 @@ pub async fn discover_tables( { existing.columns.push(column); } else { + // Build geometry columns map for this table + let table_key = format!("{}.{}", schema, table); + let geometry_columns = geometry_info.get(&table_key).cloned().unwrap_or_default(); + tables.push(TableMetadata { catalog_name: catalog, schema_name: schema, table_name: table, table_type, columns: vec![column], + geometry_columns, }); } } @@ -173,6 +184,61 @@ pub async fn discover_tables( Ok(tables) } +/// Discover PostGIS geometry columns with SRID and type information. +/// Returns a map of "schema.table" -> column_name -> GeometryColumnInfo +async fn discover_geometry_columns( + conn: &mut PgConnection, +) -> HashMap> { + // Query the PostGIS geometry_columns view if it exists + // This view contains SRID and geometry type for all geometry/geography columns + let result = sqlx::query( + r#" + SELECT + f_table_schema, + f_table_name, + f_geometry_column, + srid, + type + FROM geometry_columns + UNION ALL + SELECT + f_table_schema, + f_table_name, + f_geography_column, + srid, + type + FROM geography_columns + "#, + ) + .fetch_all(conn) + .await; + + let mut geometry_info: HashMap> = HashMap::new(); + + if let Ok(rows) = result { + for row in rows { + let schema: String = row.get(0); + let table: String = row.get(1); + let column: String = row.get(2); + let srid: i32 = row.get(3); + let geom_type: String = row.get(4); + + let table_key = format!("{}.{}", schema, table); + geometry_info.entry(table_key).or_default().insert( + column, + GeometryColumnInfo { + srid, + geometry_type: Some(geom_type), + }, + ); + } + } + // If PostGIS is not installed or query fails, return empty map silently + // Geometry columns will still be detected by type name, just without SRID info + + geometry_info +} + /// Fetch table data and write to Parquet using streaming to avoid OOM on large tables pub async fn fetch_table( source: &Source, @@ -193,15 +259,17 @@ pub async fn fetch_table( // // For NUMERIC/DECIMAL, we need to construct the full type spec with precision/scale // because information_schema.data_type just returns "numeric" without parameters. + // We use udt_name to detect PostGIS geometry/geography types correctly. let schema_rows = sqlx::query( r#" SELECT column_name, + udt_name, CASE WHEN data_type IN ('numeric', 'decimal') AND numeric_precision IS NOT NULL THEN data_type || '(' || numeric_precision || ',' || COALESCE(numeric_scale, 0) || ')' ELSE data_type - END as data_type, + END as data_type_full, is_nullable FROM information_schema.columns WHERE table_schema = $1 AND table_name = $2 @@ -224,15 +292,23 @@ pub async fn fetch_table( // Unconstrained NUMERIC/DECIMAL (without precision/scale) must be cast to TEXT // because they can have arbitrary precision that exceeds i128/Decimal128. // Constrained NUMERIC(p,s) is decoded via BigDecimal and stored as Decimal128. + // Geometry/geography columns use ST_AsBinary to fetch as standard WKB. let mut fields: Vec = Vec::with_capacity(schema_rows.len()); let mut column_exprs: Vec = Vec::with_capacity(schema_rows.len()); for row in &schema_rows { let col_name: String = row.get(0); - let data_type: String = row.get(1); - let is_nullable: String = row.get(2); + let udt_name: String = row.get(1); + let data_type_full: String = row.get(2); + let is_nullable: String = row.get(3); - let arrow_type = pg_type_to_arrow(&data_type); + // Use udt_name to detect geometry types, but fall back to data_type_full for numeric precision + let type_for_arrow = if is_geometry_type(&udt_name) { + &udt_name + } else { + &data_type_full + }; + let arrow_type = pg_type_to_arrow(type_for_arrow); fields.push(Field::new( &col_name, arrow_type.clone(), @@ -242,10 +318,13 @@ pub async fn fetch_table( // Escape column name for SQL let escaped_col = format!("\"{}\"", col_name.replace('"', "\"\"")); - // Only cast unconstrained NUMERIC to TEXT (those mapped to Utf8) - // Constrained NUMERIC(p,s) is mapped to Decimal128 and decoded via BigDecimal - if matches!(arrow_type, DataType::Utf8) { - let type_lower = data_type.to_lowercase(); + // Handle special column types that need SQL-level conversion + if is_geometry_type(&udt_name) { + // Use ST_AsBinary to convert geometry/geography to standard WKB + // This strips the SRID from EWKB, giving us portable WKB bytes + column_exprs.push(format!("ST_AsBinary({}) AS {}", escaped_col, escaped_col)); + } else if matches!(arrow_type, DataType::Utf8) { + let type_lower = data_type_full.to_lowercase(); let base_type = type_lower.split('(').next().unwrap_or(&type_lower); if base_type == "numeric" || base_type == "decimal" { column_exprs.push(format!("{}::text AS {}", escaped_col, escaped_col)); @@ -349,10 +428,55 @@ pub fn pg_type_to_arrow(pg_type: &str) -> DataType { "uuid" => DataType::Utf8, "json" | "jsonb" => DataType::Utf8, "interval" => DataType::Utf8, + // PostGIS geometry types - stored as Binary (WKB format) + "geometry" | "geography" => DataType::Binary, _ => DataType::Utf8, // Default fallback } } +/// Check if a PostgreSQL type is a geometry or geography type +pub fn is_geometry_type(pg_type: &str) -> bool { + let type_lower = pg_type.to_lowercase(); + let base_type = type_lower.split('(').next().unwrap_or(&type_lower).trim(); + matches!(base_type, "geometry" | "geography") +} + +/// Parse SRID from a PostGIS type string like "geometry(Point,4326)" or "geography(Polygon,4326)" +/// Returns (geometry_type, srid) tuple if parseable +pub fn parse_geometry_type_params(pg_type: &str) -> Option<(String, i32)> { + let type_lower = pg_type.to_lowercase(); + + // Check if it's a geometry/geography type with parameters + if !type_lower.starts_with("geometry(") && !type_lower.starts_with("geography(") { + return None; + } + + // Extract content between parentheses + let start = type_lower.find('(')?; + let end = type_lower.find(')')?; + let params = &type_lower[start + 1..end]; + + // Split by comma: "Point,4326" or just "Point" or "4326" + let parts: Vec<&str> = params.split(',').map(|s| s.trim()).collect(); + + match parts.len() { + 1 => { + // Could be just type or just SRID + if let Ok(srid) = parts[0].parse::() { + Some(("geometry".to_string(), srid)) + } else { + Some((parts[0].to_string(), 0)) + } + } + 2 => { + let geom_type = parts[0].to_string(); + let srid = parts[1].parse::().unwrap_or(0); + Some((geom_type, srid)) + } + _ => None, + } +} + /// Parse PostgreSQL NUMERIC(precision, scale) parameters. /// For constrained NUMERIC(p,s), returns Decimal128 with those params. /// For unconstrained NUMERIC, returns Utf8 to preserve arbitrary precision. @@ -825,12 +949,12 @@ mod tests { } // ========================================================================= - // Geometric types (fall back to Utf8) + // Geometric types (PostgreSQL native - fall back to Utf8) // ========================================================================= #[test] - fn test_pg_geometric_types() { - // Geometric types are not explicitly handled, fall back to Utf8 + fn test_pg_native_geometric_types() { + // PostgreSQL native geometric types (not PostGIS) fall back to Utf8 assert!(matches!(pg_type_to_arrow("point"), DataType::Utf8)); assert!(matches!(pg_type_to_arrow("line"), DataType::Utf8)); assert!(matches!(pg_type_to_arrow("lseg"), DataType::Utf8)); @@ -840,6 +964,97 @@ mod tests { assert!(matches!(pg_type_to_arrow("circle"), DataType::Utf8)); } + // ========================================================================= + // PostGIS geometry types (map to Binary for WKB storage) + // ========================================================================= + + #[test] + fn test_postgis_geometry_types() { + // PostGIS geometry type maps to Binary (WKB format) + assert!(matches!(pg_type_to_arrow("geometry"), DataType::Binary)); + assert!(matches!(pg_type_to_arrow("GEOMETRY"), DataType::Binary)); + assert!(matches!(pg_type_to_arrow("Geometry"), DataType::Binary)); + } + + #[test] + fn test_postgis_geography_types() { + // PostGIS geography type maps to Binary (WKB format) + assert!(matches!(pg_type_to_arrow("geography"), DataType::Binary)); + assert!(matches!(pg_type_to_arrow("GEOGRAPHY"), DataType::Binary)); + assert!(matches!(pg_type_to_arrow("Geography"), DataType::Binary)); + } + + #[test] + fn test_postgis_parameterized_geometry_types() { + // Parameterized geometry types still map to Binary + assert!(matches!( + pg_type_to_arrow("geometry(Point)"), + DataType::Binary + )); + assert!(matches!( + pg_type_to_arrow("geometry(Point,4326)"), + DataType::Binary + )); + assert!(matches!( + pg_type_to_arrow("geometry(Polygon,4326)"), + DataType::Binary + )); + assert!(matches!( + pg_type_to_arrow("geometry(MultiPolygon,3857)"), + DataType::Binary + )); + assert!(matches!( + pg_type_to_arrow("geography(Point,4326)"), + DataType::Binary + )); + } + + #[test] + fn test_is_geometry_type() { + // Positive cases + assert!(is_geometry_type("geometry")); + assert!(is_geometry_type("geography")); + assert!(is_geometry_type("GEOMETRY")); + assert!(is_geometry_type("GEOGRAPHY")); + assert!(is_geometry_type("geometry(Point,4326)")); + assert!(is_geometry_type("geography(Polygon,4326)")); + + // Negative cases + assert!(!is_geometry_type("point")); // PostgreSQL native, not PostGIS + assert!(!is_geometry_type("polygon")); // PostgreSQL native, not PostGIS + assert!(!is_geometry_type("text")); + assert!(!is_geometry_type("integer")); + assert!(!is_geometry_type("bytea")); + } + + #[test] + fn test_parse_geometry_type_params() { + // Full parameterized type + let result = parse_geometry_type_params("geometry(Point,4326)"); + assert!(result.is_some()); + let (geom_type, srid) = result.unwrap(); + assert_eq!(geom_type, "point"); + assert_eq!(srid, 4326); + + // Type only, no SRID + let result = parse_geometry_type_params("geometry(Polygon)"); + assert!(result.is_some()); + let (geom_type, srid) = result.unwrap(); + assert_eq!(geom_type, "polygon"); + assert_eq!(srid, 0); + + // Geography type + let result = parse_geometry_type_params("geography(MultiPolygon,3857)"); + assert!(result.is_some()); + let (geom_type, srid) = result.unwrap(); + assert_eq!(geom_type, "multipolygon"); + assert_eq!(srid, 3857); + + // No parameters - returns None + assert!(parse_geometry_type_params("geometry").is_none()); + assert!(parse_geometry_type_params("text").is_none()); + } + // ========================================================================= // Array types (fall back to Utf8) // ========================================================================= diff --git a/src/datafetch/native/snowflake.rs b/src/datafetch/native/snowflake.rs index ce69a0a..d2d62a8 100644 --- a/src/datafetch/native/snowflake.rs +++ b/src/datafetch/native/snowflake.rs @@ -235,6 +235,7 @@ pub async fn discover_tables( table_name, table_type, columns: vec![column], + geometry_columns: std::collections::HashMap::new(), }); } } @@ -264,9 +265,61 @@ pub async fn fetch_table( } }; - // Build SELECT query with properly quoted identifiers + // Query column info to detect spatial columns + let schema_query = format!( + r#" + SELECT column_name, data_type + FROM "{database}".information_schema.columns + WHERE table_schema = '{schema}' AND table_name = '{table}' + ORDER BY ordinal_position + "#, + database = database.replace('"', "\"\""), + schema = schema.replace('\'', "''"), + table = table.replace('\'', "''") + ); + + // Build column expressions with ST_AsBinary for spatial columns + let column_exprs: Vec = { + let schema_result = client + .exec(&schema_query) + .await + .map_err(|e| DataFetchError::Query(format!("Schema query failed: {}", e)))?; + + match schema_result { + QueryResult::Arrow(batches) => { + let mut exprs = Vec::new(); + for batch in batches { + let converted = convert_arrow_batch(&batch)?; + for row in 0..converted.num_rows() { + if let (Some(col_name), Some(data_type)) = ( + get_string_value(converted.column(0).as_ref(), row), + get_string_value(converted.column(1).as_ref(), row), + ) { + let escaped_col = format!("\"{}\"", col_name.replace('"', "\"\"")); + if is_spatial_type(&data_type) { + exprs.push(format!( + "ST_AsBinary({}) AS {}", + escaped_col, escaped_col + )); + } else { + exprs.push(escaped_col); + } + } + } + } + exprs + } + _ => { + // Fallback to SELECT * if schema query fails + vec!["*".to_string()] + } + } + }; + + // Build SELECT query with column expressions let query = format!( - r#"SELECT * FROM "{}"."{}"."{}"#, + r#"SELECT {} FROM "{}"."{}"."{}"#, + column_exprs.join(", "), database.replace('"', "\"\""), schema.replace('"', "\"\""), table.replace('"', "\"\"") @@ -409,13 +462,20 @@ fn snowflake_type_to_arrow(sf_type: &str) -> DataType { // Semi-structured types - store as JSON strings "VARIANT" | "OBJECT" | "ARRAY" => DataType::LargeUtf8, - // Geography/Geometry - store as string - "GEOGRAPHY" | "GEOMETRY" => DataType::Utf8, + // Geography/Geometry - stored as Binary (WKB format) + "GEOGRAPHY" | "GEOMETRY" => DataType::Binary, _ => DataType::Utf8, // Default fallback } } +/// Check if a Snowflake type is a spatial type +pub fn is_spatial_type(sf_type: &str) -> bool { + let type_upper = sf_type.to_uppercase(); + let base_type = type_upper.split('(').next().unwrap_or(&type_upper).trim(); + matches!(base_type, "GEOGRAPHY" | "GEOMETRY") +} + /// Extract string value from Arrow array (supports both Utf8 and LargeUtf8) fn get_string_value(array: &dyn datafusion::arrow::array::Array, row: usize) -> Option { use datafusion::arrow::array::{LargeStringArray, StringArray}; diff --git a/src/datafetch/orchestrator.rs b/src/datafetch/orchestrator.rs index f589d8b..0075e25 100644 --- a/src/datafetch/orchestrator.rs +++ b/src/datafetch/orchestrator.rs @@ -3,7 +3,8 @@ use std::sync::Arc; use super::batch_writer::BatchWriter; use super::native::StreamingParquetWriter; -use super::{DataFetchError, DataFetcher, TableMetadata}; +use super::types::extract_geometry_columns; +use super::{deserialize_arrow_schema, DataFetchError, DataFetcher, TableMetadata}; use crate::catalog::CatalogManager; use crate::secrets::SecretManager; use crate::source::Source; @@ -61,8 +62,26 @@ impl FetchOrchestrator { .prepare_cache_write(connection_id, schema_name, table_name); // Create writer - let mut writer: Box = - Box::new(StreamingParquetWriter::new(handle.local_path.clone())); + let mut writer = StreamingParquetWriter::new(handle.local_path.clone()); + + // Try to get geometry column info from the catalog's stored schema + // This enables GeoParquet metadata in the output file + if let Ok(Some(table_info)) = self + .catalog + .get_table(connection_id, schema_name, table_name) + .await + { + if let Some(schema_json) = &table_info.arrow_schema_json { + if let Ok(schema) = deserialize_arrow_schema(schema_json) { + let geometry_columns = extract_geometry_columns(&schema); + if !geometry_columns.is_empty() { + writer.set_geometry_columns(geometry_columns); + } + } + } + } + + let mut boxed_writer: Box = Box::new(writer); // Fetch the table data into writer self.fetcher @@ -72,13 +91,13 @@ impl FetchOrchestrator { None, // catalog schema_name, table_name, - writer.as_mut(), + boxed_writer.as_mut(), ) .await .map_err(|e| anyhow::anyhow!("Failed to fetch table: {}", e))?; // Close writer and get row count - let result = writer + let result = boxed_writer .close() .map_err(|e| anyhow::anyhow!("Failed to close writer: {}", e))?; let row_count = result.rows; @@ -304,6 +323,7 @@ mod tests { nullable: false, ordinal_position: 0, }], + geometry_columns: std::collections::HashMap::new(), }]) } diff --git a/src/datafetch/types.rs b/src/datafetch/types.rs index daf405f..f1bb7c1 100644 --- a/src/datafetch/types.rs +++ b/src/datafetch/types.rs @@ -1,5 +1,7 @@ use anyhow::Result; use datafusion::arrow::datatypes::{DataType as ArrowDataType, Field, Schema}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; use std::sync::Arc; /// Metadata for a discovered table @@ -10,6 +12,8 @@ pub struct TableMetadata { pub table_name: String, pub table_type: String, pub columns: Vec, + /// Geometry column metadata for GeoParquet support + pub geometry_columns: HashMap, } /// Metadata for a table column @@ -21,18 +25,66 @@ pub struct ColumnMetadata { pub ordinal_position: i32, } +/// Geometry column information for GeoParquet metadata +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GeometryColumnInfo { + /// The SRID (Spatial Reference System Identifier) for this geometry column. + /// Common values: 4326 (WGS84), 3857 (Web Mercator), 0 (unspecified) + pub srid: i32, + /// The geometry type if known (e.g., "Point", "Polygon", "Geometry") + pub geometry_type: Option, +} + +impl Default for TableMetadata { + fn default() -> Self { + Self { + catalog_name: None, + schema_name: String::new(), + table_name: String::new(), + table_type: String::new(), + columns: Vec::new(), + geometry_columns: HashMap::new(), + } + } +} + +/// Key used to store geometry column metadata in Arrow schema metadata +pub const GEOMETRY_COLUMNS_METADATA_KEY: &str = "runtimedb:geometry_columns"; + impl TableMetadata { /// Convert column metadata to an Arrow Schema + /// Includes geometry column metadata in schema metadata if present pub fn to_arrow_schema(&self) -> Arc { let fields: Vec = self .columns .iter() .map(|col| Field::new(&col.name, col.data_type.clone(), col.nullable)) .collect(); - Arc::new(Schema::new(fields)) + + let mut schema = Schema::new(fields); + + // Store geometry column info in schema metadata + if !self.geometry_columns.is_empty() { + if let Ok(json) = serde_json::to_string(&self.geometry_columns) { + let mut metadata = HashMap::new(); + metadata.insert(GEOMETRY_COLUMNS_METADATA_KEY.to_string(), json); + schema = schema.with_metadata(metadata); + } + } + + Arc::new(schema) } } +/// Extract geometry column info from Arrow schema metadata +pub fn extract_geometry_columns(schema: &Schema) -> HashMap { + schema + .metadata() + .get(GEOMETRY_COLUMNS_METADATA_KEY) + .and_then(|json| serde_json::from_str(json).ok()) + .unwrap_or_default() +} + /// Deserialize an Arrow Schema from JSON string pub fn deserialize_arrow_schema(json: &str) -> Result> { let schema: Schema = serde_json::from_str(json)?; diff --git a/src/engine.rs b/src/engine.rs index b296a0a..8763a43 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -2842,6 +2842,9 @@ impl RuntimeEngineBuilder { build_instrumented_context(object_stores, liquid_cache_config)? }; + // Register spatial functions from geodatafusion (st_area, st_distance, etc.) + geodatafusion::register(&df_ctx); + // Step 6: Initialize secret manager let (secret_key, using_default_key) = match self.secret_key { Some(key) => (key, false), diff --git a/tests/gis_integration_tests.rs b/tests/gis_integration_tests.rs new file mode 100644 index 0000000..8cf93bc --- /dev/null +++ b/tests/gis_integration_tests.rs @@ -0,0 +1,552 @@ +//! GIS/PostGIS integration tests for RivetDB +//! +//! These tests verify end-to-end GIS functionality: +//! - Geometry column detection during schema discovery +//! - Fetching geometry data as WKB +//! - GeoParquet metadata in output files +//! - Spatial SQL functions via geodatafusion + +use datafusion::arrow::datatypes::DataType; +use runtimedb::catalog::{CatalogManager, SqliteCatalogManager}; +use runtimedb::datafetch::{BatchWriter, DataFetcher, NativeFetcher, StreamingParquetWriter}; +use runtimedb::secrets::{EncryptedCatalogBackend, SecretManager, ENCRYPTED_PROVIDER_TYPE}; +use runtimedb::source::{Credential, Source}; +use std::sync::Arc; +use tempfile::TempDir; +use testcontainers::{runners::AsyncRunner, ContainerAsync, ImageExt}; +use testcontainers_modules::postgres::Postgres; + +const TEST_PASSWORD: &str = "test_password"; + +/// Create a test SecretManager with temporary storage +async fn test_secret_manager(dir: &TempDir) -> SecretManager { + let db_path = dir.path().join("test_catalog.db"); + let catalog = Arc::new( + SqliteCatalogManager::new(db_path.to_str().unwrap()) + .await + .unwrap(), + ); + catalog.run_migrations().await.unwrap(); + + let key = [0x42u8; 32]; + let backend = Arc::new(EncryptedCatalogBackend::new(key, catalog.clone())); + + SecretManager::new(backend, catalog, ENCRYPTED_PROVIDER_TYPE) +} + +/// Create a test SecretManager with a password stored +async fn create_test_secret_manager_with_password( + dir: &TempDir, + secret_name: &str, + password: &str, +) -> (SecretManager, String) { + let secrets = test_secret_manager(dir).await; + let secret_id = secrets + .create(secret_name, password.as_bytes()) + .await + .unwrap(); + (secrets, secret_id) +} + +/// Start a PostGIS-enabled PostgreSQL container +async fn start_postgis_container() -> ContainerAsync { + Postgres::default() + .with_tag("15-3.5") // PostGIS 3.5 on PostgreSQL 15 + .with_env_var("POSTGRES_PASSWORD", TEST_PASSWORD) + .start() + .await + .expect("Failed to start postgis container") +} + +/// Test that geometry columns are detected during schema discovery +#[tokio::test] +async fn test_postgis_geometry_column_discovery() { + let temp_dir = TempDir::new().unwrap(); + let (secrets, secret_id) = + create_test_secret_manager_with_password(&temp_dir, "pg-pass", TEST_PASSWORD).await; + + let container = start_postgis_container().await; + let port = container.get_host_port_ipv4(5432).await.unwrap(); + + // Create test database with PostGIS + let conn_str = format!( + "postgres://postgres:{}@localhost:{}/postgres", + TEST_PASSWORD, port + ); + let pool = sqlx::PgPool::connect(&conn_str).await.unwrap(); + + // Enable PostGIS extension + sqlx::query("CREATE EXTENSION IF NOT EXISTS postgis") + .execute(&pool) + .await + .unwrap(); + + // Create test schema and table with geometry column + sqlx::query("CREATE SCHEMA locations") + .execute(&pool) + .await + .unwrap(); + + sqlx::query( + "CREATE TABLE locations.addresses ( + id SERIAL PRIMARY KEY, + house_num VARCHAR(20), + street_name VARCHAR(100), + city VARCHAR(50), + state VARCHAR(2), + zipcode VARCHAR(10), + point GEOMETRY(Point, 4326) + )", + ) + .execute(&pool) + .await + .unwrap(); + + pool.close().await; + + // Discover tables + let fetcher = NativeFetcher::new(); + let source = Source::Postgres { + host: "localhost".to_string(), + port, + user: "postgres".to_string(), + database: "postgres".to_string(), + credential: Credential::secret_ref(&secret_id), + }; + + let tables = fetcher + .discover_tables(&source, &secrets) + .await + .expect("Discovery should succeed"); + + // Find the addresses table + let addresses = tables + .iter() + .find(|t| t.table_name == "addresses") + .expect("Should find addresses table"); + + // Verify geometry column was detected + let point_col = addresses + .columns + .iter() + .find(|c| c.name == "point") + .expect("Should find point column"); + + assert!( + matches!(point_col.data_type, DataType::Binary), + "Geometry column should map to Binary, got {:?}", + point_col.data_type + ); + + // Verify geometry metadata was captured + assert!( + addresses.geometry_columns.contains_key("point"), + "Should have geometry metadata for 'point' column" + ); + + let geom_info = addresses.geometry_columns.get("point").unwrap(); + assert_eq!(geom_info.srid, 4326, "SRID should be 4326"); + assert_eq!( + geom_info.geometry_type.as_deref(), + Some("POINT"), + "Should detect Point geometry type" + ); +} + +/// Test fetching geometry data as WKB and writing to GeoParquet +#[tokio::test] +async fn test_postgis_fetch_geometry_to_geoparquet() { + use datafusion::parquet::file::reader::{FileReader, SerializedFileReader}; + use std::fs::File; + + let temp_dir = TempDir::new().unwrap(); + let (secrets, secret_id) = + create_test_secret_manager_with_password(&temp_dir, "pg-pass", TEST_PASSWORD).await; + + let container = start_postgis_container().await; + let port = container.get_host_port_ipv4(5432).await.unwrap(); + + let conn_str = format!( + "postgres://postgres:{}@localhost:{}/postgres", + TEST_PASSWORD, port + ); + let pool = sqlx::PgPool::connect(&conn_str).await.unwrap(); + + // Enable PostGIS + sqlx::query("CREATE EXTENSION IF NOT EXISTS postgis") + .execute(&pool) + .await + .unwrap(); + + // Create and populate test table + sqlx::query("CREATE SCHEMA locations") + .execute(&pool) + .await + .unwrap(); + + sqlx::query( + "CREATE TABLE locations.parcels ( + id SERIAL PRIMARY KEY, + parcel_id VARCHAR(50), + the_geom GEOMETRY(Polygon, 4326) + )", + ) + .execute(&pool) + .await + .unwrap(); + + // Insert test polygons of different sizes + sqlx::query( + "INSERT INTO locations.parcels (parcel_id, the_geom) VALUES + ('PARCEL-001', ST_GeomFromText('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))', 4326)), + ('PARCEL-002', ST_GeomFromText('POLYGON((0 0, 0 2, 2 2, 2 0, 0 0))', 4326)), + ('PARCEL-003', ST_GeomFromText('POLYGON((0 0, 0 3, 3 3, 3 0, 0 0))', 4326))", + ) + .execute(&pool) + .await + .unwrap(); + + pool.close().await; + + // Fetch to parquet + let fetcher = NativeFetcher::new(); + let source = Source::Postgres { + host: "localhost".to_string(), + port, + user: "postgres".to_string(), + database: "postgres".to_string(), + credential: Credential::secret_ref(&secret_id), + }; + + let output_path = temp_dir.path().join("parcels.parquet"); + let mut writer = StreamingParquetWriter::new(output_path.clone()); + + // Set geometry columns for GeoParquet metadata + let mut geom_cols = std::collections::HashMap::new(); + geom_cols.insert( + "the_geom".to_string(), + runtimedb::datafetch::GeometryColumnInfo { + srid: 4326, + geometry_type: Some("Polygon".to_string()), + }, + ); + writer.set_geometry_columns(geom_cols); + + let result = fetcher + .fetch_table(&source, &secrets, None, "locations", "parcels", &mut writer) + .await; + assert!(result.is_ok(), "Fetch should succeed: {:?}", result.err()); + + Box::new(writer).close().unwrap(); + + // Verify GeoParquet metadata + let file = File::open(&output_path).unwrap(); + let reader = SerializedFileReader::new(file).unwrap(); + let file_metadata = reader.metadata().file_metadata(); + + let geo_metadata = file_metadata + .key_value_metadata() + .and_then(|kv| kv.iter().find(|item| item.key == "geo")); + + assert!( + geo_metadata.is_some(), + "GeoParquet 'geo' metadata should be present" + ); + + let geo_value = geo_metadata.unwrap().value.as_ref().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(geo_value).unwrap(); + + assert_eq!(parsed["version"], "1.1.0"); + assert_eq!(parsed["columns"]["the_geom"]["encoding"], "WKB"); + assert_eq!(parsed["columns"]["the_geom"]["crs"]["id"]["code"], 4326); + + // Verify data was fetched correctly + assert_eq!( + reader.metadata().file_metadata().num_rows(), + 3, + "Should have 3 parcels" + ); +} + +/// Test spatial SQL functions work via geodatafusion integration +#[tokio::test] +async fn test_spatial_sql_functions() { + use runtimedb::RuntimeEngine; + + let temp_dir = TempDir::new().unwrap(); + let engine = RuntimeEngine::defaults(temp_dir.path()) + .await + .expect("Engine should initialize"); + + // Test st_area function is registered + let result = engine + .execute_query("SELECT st_area(st_geomfromtext('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'))") + .await; + + assert!( + result.is_ok(), + "st_area query should succeed: {:?}", + result.err() + ); + + let response = result.unwrap(); + assert_eq!(response.results.len(), 1); + assert_eq!(response.results[0].num_rows(), 1); + + // Test st_distance function + let result = engine + .execute_query( + "SELECT st_distance( + st_geomfromtext('POINT(0 0)'), + st_geomfromtext('POINT(3 4)') + )", + ) + .await; + + assert!( + result.is_ok(), + "st_distance query should succeed: {:?}", + result.err() + ); + + let response = result.unwrap(); + assert_eq!(response.results.len(), 1); + + // Test st_centroid function + let result = engine + .execute_query("SELECT st_centroid(st_geomfromtext('POLYGON((0 0, 0 2, 2 2, 2 0, 0 0))'))") + .await; + + assert!( + result.is_ok(), + "st_centroid query should succeed: {:?}", + result.err() + ); +} + +/// Generate a random base64-encoded 32-byte key for test secret manager. +fn generate_test_secret_key() -> String { + use base64::Engine; + let key_bytes: [u8; 32] = rand::random(); + base64::engine::general_purpose::STANDARD.encode(key_bytes) +} + +/// Example queries from the issue - demonstrating GIS query capabilities +/// These require a full end-to-end setup with data loaded +#[tokio::test] +async fn test_example_gis_queries() { + use runtimedb::RuntimeEngine; + + let temp_dir = TempDir::new().unwrap(); + let container = start_postgis_container().await; + let port = container.get_host_port_ipv4(5432).await.unwrap(); + + let conn_str = format!( + "postgres://postgres:{}@localhost:{}/postgres", + TEST_PASSWORD, port + ); + let pool = sqlx::PgPool::connect(&conn_str).await.unwrap(); + + // Enable PostGIS + sqlx::query("CREATE EXTENSION IF NOT EXISTS postgis") + .execute(&pool) + .await + .unwrap(); + + // Create schema + sqlx::query("CREATE SCHEMA locations") + .execute(&pool) + .await + .unwrap(); + + // Create addresses table + sqlx::query( + "CREATE TABLE locations.addresses ( + id SERIAL PRIMARY KEY, + house_num VARCHAR(20), + street_name VARCHAR(100), + city VARCHAR(50), + state VARCHAR(2), + zipcode VARCHAR(10), + point GEOMETRY(Point, 4326) + )", + ) + .execute(&pool) + .await + .unwrap(); + + // Create addresses_boundaries join table + sqlx::query( + "CREATE TABLE locations.addresses_boundaries ( + address_id INTEGER REFERENCES locations.addresses(id), + boundary_type VARCHAR(50) + )", + ) + .execute(&pool) + .await + .unwrap(); + + // Create parcels table + sqlx::query( + "CREATE TABLE locations.parcels ( + id SERIAL PRIMARY KEY, + the_geom GEOMETRY(Polygon, 4326) + )", + ) + .execute(&pool) + .await + .unwrap(); + + // Insert test addresses + sqlx::query( + "INSERT INTO locations.addresses (house_num, street_name, city, state, zipcode, point) VALUES + ('123', 'Main St', 'Springfield', 'IL', '62701', ST_GeomFromText('POINT(-89.65 39.80)', 4326)), + ('456', 'Oak Ave', 'Springfield', 'IL', '62702', ST_GeomFromText('POINT(-89.64 39.81)', 4326)), + ('789', 'Pine Rd', 'Springfield', 'IL', '62703', ST_GeomFromText('POINT(-89.63 39.82)', 4326)), + ('101', 'Elm St', 'Springfield', 'IL', '62704', ST_GeomFromText('POINT(-89.62 39.83)', 4326)), + ('202', 'Maple Ln', 'Springfield', 'IL', '62705', ST_GeomFromText('POINT(-89.61 39.84)', 4326))", + ) + .execute(&pool) + .await + .unwrap(); + + // Insert address boundaries + sqlx::query( + "INSERT INTO locations.addresses_boundaries (address_id, boundary_type) VALUES + (1, 'residential'), (2, 'commercial'), (3, 'residential'), + (4, 'industrial'), (5, 'residential')", + ) + .execute(&pool) + .await + .unwrap(); + + // Insert parcels of varying sizes + sqlx::query( + "INSERT INTO locations.parcels (the_geom) VALUES + (ST_GeomFromText('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))', 4326)), + (ST_GeomFromText('POLYGON((2 2, 2 5, 5 5, 5 2, 2 2))', 4326)), + (ST_GeomFromText('POLYGON((10 10, 10 12, 12 12, 12 10, 10 10))', 4326))", + ) + .execute(&pool) + .await + .unwrap(); + + pool.close().await; + + // Create engine with secret key enabled + let secret_key = generate_test_secret_key(); + let engine = RuntimeEngine::builder() + .base_dir(temp_dir.path()) + .secret_key(secret_key) + .build() + .await + .expect("Engine should initialize"); + + // Store password in engine's secret manager + let secret_id = engine + .secret_manager() + .create("pg-pass", TEST_PASSWORD.as_bytes()) + .await + .expect("Failed to store secret"); + + // Register connection with secret reference + let source = Source::Postgres { + host: "localhost".to_string(), + port, + user: "postgres".to_string(), + database: "postgres".to_string(), + credential: Credential::secret_ref(&secret_id), + }; + + engine + .connect("test_conn", source) + .await + .expect("Should connect"); + + // Example Query 1: The addresses and location of 5 locations + let result = engine + .execute_query( + "SELECT a.id, a.house_num, a.street_name, a.city, a.state, a.zipcode, a.point + FROM test_conn.locations.addresses a + JOIN test_conn.locations.addresses_boundaries ab ON a.id = ab.address_id + LIMIT 5", + ) + .await; + + assert!( + result.is_ok(), + "Query 1 (addresses with boundaries) should succeed: {:?}", + result.err() + ); + let response = result.unwrap(); + assert!( + response.results[0].num_rows() <= 5, + "Should return at most 5 rows" + ); + + // Example Query 2: The largest parcel (using st_area) + let result = engine + .execute_query( + "SELECT id, the_geom + FROM test_conn.locations.parcels + ORDER BY st_area(the_geom) DESC + LIMIT 1", + ) + .await; + + assert!( + result.is_ok(), + "Query 2 (largest parcel) should succeed: {:?}", + result.err() + ); + let response = result.unwrap(); + assert_eq!( + response.results[0].num_rows(), + 1, + "Should return 1 largest parcel" + ); + + // Example Query 3: The distance between two largest parcels + let result = engine + .execute_query( + "WITH largest_parcels AS ( + SELECT id, the_geom + FROM test_conn.locations.parcels + ORDER BY st_area(the_geom) DESC + LIMIT 2 + ) + SELECT st_distance( + st_centroid((SELECT the_geom FROM largest_parcels LIMIT 1)), + st_centroid((SELECT the_geom FROM largest_parcels OFFSET 1 LIMIT 1)) + ) AS distance_between_largest_parcels", + ) + .await; + + assert!( + result.is_ok(), + "Query 3 (distance between parcels) should succeed: {:?}", + result.err() + ); + + // Example Query 4: The route (shortest line) between two largest parcels + let result = engine + .execute_query( + "WITH largest_parcels AS ( + SELECT id, the_geom + FROM test_conn.locations.parcels + ORDER BY st_area(the_geom) DESC + LIMIT 2 + ) + SELECT st_shortestline( + (SELECT the_geom FROM largest_parcels LIMIT 1), + (SELECT the_geom FROM largest_parcels OFFSET 1 LIMIT 1) + ) AS route_between_largest_parcels", + ) + .await; + + assert!( + result.is_ok(), + "Query 4 (shortest line between parcels) should succeed: {:?}", + result.err() + ); +} From b773cfe9fbec0c7aeb69cb54592e5461ef18c5d6 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Tue, 27 Jan 2026 16:08:18 -0800 Subject: [PATCH 02/27] feat(gis): preserve GeoParquet metadata in dataset uploads Parse "geo" metadata from uploaded GeoParquet files and pass geometry column info to the StreamingParquetWriter so output datasets maintain GeoParquet 1.1.0 metadata. --- src/datafetch/mod.rs | 2 +- src/datafetch/native/mod.rs | 2 +- src/datafetch/native/parquet_writer.rs | 50 ++++++++++++++++++++++++++ src/engine.rs | 40 +++++++++++++++++---- 4 files changed, 86 insertions(+), 8 deletions(-) diff --git a/src/datafetch/mod.rs b/src/datafetch/mod.rs index c14dee5..0cc5285 100644 --- a/src/datafetch/mod.rs +++ b/src/datafetch/mod.rs @@ -8,7 +8,7 @@ mod types; pub use batch_writer::{BatchWriteResult, BatchWriter}; pub use error::DataFetchError; pub use fetcher::DataFetcher; -pub use native::{NativeFetcher, StreamingParquetWriter}; +pub use native::{parse_geoparquet_metadata, NativeFetcher, StreamingParquetWriter}; pub use orchestrator::FetchOrchestrator; pub use types::{ deserialize_arrow_schema, extract_geometry_columns, ColumnMetadata, GeometryColumnInfo, diff --git a/src/datafetch/native/mod.rs b/src/datafetch/native/mod.rs index f3ce2b2..7a13d5c 100644 --- a/src/datafetch/native/mod.rs +++ b/src/datafetch/native/mod.rs @@ -9,7 +9,7 @@ mod parquet_writer; pub mod postgres; pub mod snowflake; -pub use parquet_writer::StreamingParquetWriter; +pub use parquet_writer::{parse_geoparquet_metadata, StreamingParquetWriter}; use async_trait::async_trait; use datafusion::arrow::datatypes::DataType; diff --git a/src/datafetch/native/parquet_writer.rs b/src/datafetch/native/parquet_writer.rs index 1906a5d..06b801f 100644 --- a/src/datafetch/native/parquet_writer.rs +++ b/src/datafetch/native/parquet_writer.rs @@ -131,6 +131,56 @@ impl GeoParquetMetadata { } } +/// Extract GeometryColumnInfo from GeoParquet metadata JSON string. +/// This parses the "geo" key-value metadata from a GeoParquet file. +pub fn parse_geoparquet_metadata(geo_json: &str) -> HashMap { + #[derive(Deserialize)] + struct GeoMeta { + columns: HashMap, + } + + #[derive(Deserialize)] + struct GeoColMeta { + #[serde(default)] + geometry_types: Option>, + crs: Option, + } + + #[derive(Deserialize)] + struct CrsMeta { + id: Option, + } + + #[derive(Deserialize)] + struct CrsIdMeta { + #[serde(default)] + code: i32, + } + + let Ok(geo_meta) = serde_json::from_str::(geo_json) else { + return HashMap::new(); + }; + + geo_meta + .columns + .into_iter() + .map(|(name, col)| { + let srid = col.crs.and_then(|c| c.id).map(|id| id.code).unwrap_or(0); + let geometry_type = col + .geometry_types + .and_then(|types| types.into_iter().next()); + + ( + name, + GeometryColumnInfo { + srid, + geometry_type, + }, + ) + }) + .collect() +} + /// Normalize geometry type names to GeoParquet standard format fn normalize_geometry_type(geom_type: &str) -> String { match geom_type.to_uppercase().as_str() { diff --git a/src/engine.rs b/src/engine.rs index 8763a43..bec5ff9 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -2147,11 +2147,25 @@ impl RuntimeEngine { let builder = release_on_parse_error!(ParquetRecordBatchReaderBuilder::try_new(file)); let schema = builder.schema().clone(); + + // Extract GeoParquet metadata if present + let geometry_columns = builder + .metadata() + .file_metadata() + .key_value_metadata() + .and_then(|kv| kv.iter().find(|item| item.key == "geo")) + .and_then(|item| item.value.as_ref()) + .map(|geo_json| crate::datafetch::parse_geoparquet_metadata(geo_json)) + .unwrap_or_default(); + let parquet_reader = release_on_parse_error!(builder.with_batch_size(8192).build()); - let mut writer: Box = - Box::new(StreamingParquetWriter::new(handle.local_path.clone())); + let mut writer = StreamingParquetWriter::new(handle.local_path.clone()); + // Set geometry columns before init() to include in GeoParquet metadata + if !geometry_columns.is_empty() { + writer.set_geometry_columns(geometry_columns); + } release_on_storage_error!(writer.init(&schema)); let mut row_count = 0usize; @@ -2161,7 +2175,7 @@ impl RuntimeEngine { release_on_storage_error!(writer.write_batch(&batch)); } - release_on_storage_error!(writer.close()); + release_on_storage_error!(Box::new(writer).close()); (schema, row_count) } DataSource::InMemory(data) => { @@ -2171,11 +2185,25 @@ impl RuntimeEngine { ParquetRecordBatchReaderBuilder::try_new(cursor) ); let schema = builder.schema().clone(); + + // Extract GeoParquet metadata if present + let geometry_columns = builder + .metadata() + .file_metadata() + .key_value_metadata() + .and_then(|kv| kv.iter().find(|item| item.key == "geo")) + .and_then(|item| item.value.as_ref()) + .map(|geo_json| crate::datafetch::parse_geoparquet_metadata(geo_json)) + .unwrap_or_default(); + let parquet_reader = release_on_parse_error!(builder.with_batch_size(8192).build()); - let mut writer: Box = - Box::new(StreamingParquetWriter::new(handle.local_path.clone())); + let mut writer = StreamingParquetWriter::new(handle.local_path.clone()); + // Set geometry columns before init() to include in GeoParquet metadata + if !geometry_columns.is_empty() { + writer.set_geometry_columns(geometry_columns); + } release_on_storage_error!(writer.init(&schema)); let mut row_count = 0usize; @@ -2185,7 +2213,7 @@ impl RuntimeEngine { release_on_storage_error!(writer.write_batch(&batch)); } - release_on_storage_error!(writer.close()); + release_on_storage_error!(Box::new(writer).close()); (schema, row_count) } } From 8ba255bc44d2d9fcae04a45bb3ffe4088a31efd5 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Tue, 27 Jan 2026 16:08:29 -0800 Subject: [PATCH 03/27] test(gis): fix integration tests for multi-arch Docker Use kartoza/postgis:16-3.4 which supports arm64/amd64. Add retry logic for container startup, fix tokio runtime requirements, and use only geodatafusion-supported spatial functions. --- tests/gis_integration_tests.rs | 70 ++++++++++++++++++++++------------ 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/tests/gis_integration_tests.rs b/tests/gis_integration_tests.rs index 8cf93bc..0c064d7 100644 --- a/tests/gis_integration_tests.rs +++ b/tests/gis_integration_tests.rs @@ -13,8 +13,7 @@ use runtimedb::secrets::{EncryptedCatalogBackend, SecretManager, ENCRYPTED_PROVI use runtimedb::source::{Credential, Source}; use std::sync::Arc; use tempfile::TempDir; -use testcontainers::{runners::AsyncRunner, ContainerAsync, ImageExt}; -use testcontainers_modules::postgres::Postgres; +use testcontainers::{runners::AsyncRunner, ContainerAsync, GenericImage, ImageExt}; const TEST_PASSWORD: &str = "test_password"; @@ -49,17 +48,44 @@ async fn create_test_secret_manager_with_password( } /// Start a PostGIS-enabled PostgreSQL container -async fn start_postgis_container() -> ContainerAsync { - Postgres::default() - .with_tag("15-3.5") // PostGIS 3.5 on PostgreSQL 15 +/// Uses kartoza/postgis which has multi-arch support (amd64 + arm64) +async fn start_postgis_container() -> ContainerAsync { + GenericImage::new("kartoza/postgis", "16-3.4") + .with_exposed_port(5432.into()) + // Wait for the final startup message after the init sequence completes + .with_wait_for(testcontainers::core::WaitFor::message_on_stdout( + "restarting in foreground", + )) + .with_env_var("POSTGRES_USER", "postgres") .with_env_var("POSTGRES_PASSWORD", TEST_PASSWORD) + .with_env_var("POSTGRES_DB", "postgres") .start() .await .expect("Failed to start postgis container") } +/// Wait for the database to be ready for connections +async fn wait_for_db(conn_str: &str) -> sqlx::PgPool { + for attempt in 1..=30 { + match sqlx::PgPool::connect(conn_str).await { + Ok(pool) => { + // Test connection is actually working + if sqlx::query("SELECT 1").execute(&pool).await.is_ok() { + return pool; + } + pool.close().await; + } + Err(_) => {} + } + if attempt < 30 { + tokio::time::sleep(tokio::time::Duration::from_secs(1)).await; + } + } + panic!("Failed to connect to PostgreSQL after 30 attempts"); +} + /// Test that geometry columns are detected during schema discovery -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_postgis_geometry_column_discovery() { let temp_dir = TempDir::new().unwrap(); let (secrets, secret_id) = @@ -68,12 +94,12 @@ async fn test_postgis_geometry_column_discovery() { let container = start_postgis_container().await; let port = container.get_host_port_ipv4(5432).await.unwrap(); - // Create test database with PostGIS + // Wait for database to be ready let conn_str = format!( "postgres://postgres:{}@localhost:{}/postgres", TEST_PASSWORD, port ); - let pool = sqlx::PgPool::connect(&conn_str).await.unwrap(); + let pool = wait_for_db(&conn_str).await; // Enable PostGIS extension sqlx::query("CREATE EXTENSION IF NOT EXISTS postgis") @@ -154,7 +180,7 @@ async fn test_postgis_geometry_column_discovery() { } /// Test fetching geometry data as WKB and writing to GeoParquet -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_postgis_fetch_geometry_to_geoparquet() { use datafusion::parquet::file::reader::{FileReader, SerializedFileReader}; use std::fs::File; @@ -170,7 +196,7 @@ async fn test_postgis_fetch_geometry_to_geoparquet() { "postgres://postgres:{}@localhost:{}/postgres", TEST_PASSWORD, port ); - let pool = sqlx::PgPool::connect(&conn_str).await.unwrap(); + let pool = wait_for_db(&conn_str).await; // Enable PostGIS sqlx::query("CREATE EXTENSION IF NOT EXISTS postgis") @@ -269,7 +295,7 @@ async fn test_postgis_fetch_geometry_to_geoparquet() { } /// Test spatial SQL functions work via geodatafusion integration -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_spatial_sql_functions() { use runtimedb::RuntimeEngine; @@ -333,7 +359,7 @@ fn generate_test_secret_key() -> String { /// Example queries from the issue - demonstrating GIS query capabilities /// These require a full end-to-end setup with data loaded -#[tokio::test] +#[tokio::test(flavor = "multi_thread")] async fn test_example_gis_queries() { use runtimedb::RuntimeEngine; @@ -345,7 +371,7 @@ async fn test_example_gis_queries() { "postgres://postgres:{}@localhost:{}/postgres", TEST_PASSWORD, port ); - let pool = sqlx::PgPool::connect(&conn_str).await.unwrap(); + let pool = wait_for_db(&conn_str).await; // Enable PostGIS sqlx::query("CREATE EXTENSION IF NOT EXISTS postgis") @@ -528,25 +554,19 @@ async fn test_example_gis_queries() { result.err() ); - // Example Query 4: The route (shortest line) between two largest parcels + // Example Query 4: Convex hull around the largest parcel let result = engine .execute_query( - "WITH largest_parcels AS ( - SELECT id, the_geom - FROM test_conn.locations.parcels - ORDER BY st_area(the_geom) DESC - LIMIT 2 - ) - SELECT st_shortestline( - (SELECT the_geom FROM largest_parcels LIMIT 1), - (SELECT the_geom FROM largest_parcels OFFSET 1 LIMIT 1) - ) AS route_between_largest_parcels", + "SELECT id, st_convexhull(the_geom) AS hull + FROM test_conn.locations.parcels + ORDER BY st_area(the_geom) DESC + LIMIT 1", ) .await; assert!( result.is_ok(), - "Query 4 (shortest line between parcels) should succeed: {:?}", + "Query 4 (convex hull) should succeed: {:?}", result.err() ); } From 8bf86c6588778f61f82c462cfadb97b852e5ef8d Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Tue, 27 Jan 2026 21:19:10 -0800 Subject: [PATCH 04/27] feat(datasets): add GEOMETRY type support for column definitions Extend explicit column definitions to support geometry types with SRID and geometry_type metadata. Geometry columns are stored as WKB binary with GeoParquet metadata for spatial query support. --- src/datasets/mod.rs | 5 +- src/datasets/schema.rs | 505 +++++++++++++++++++++++++++++++++---- src/engine.rs | 90 ++++--- src/http/models.rs | 12 +- tests/dataset_e2e_tests.rs | 2 + 5 files changed, 531 insertions(+), 83 deletions(-) diff --git a/src/datasets/mod.rs b/src/datasets/mod.rs index 828b88e..33df483 100644 --- a/src/datasets/mod.rs +++ b/src/datasets/mod.rs @@ -7,7 +7,10 @@ pub mod validation; pub use error::DatasetError; pub use schema::{ build_schema_from_columns, build_schema_from_columns_for_json, - build_schema_from_columns_unchecked, parse_column_type, ColumnTypeError, SchemaError, + build_schema_from_columns_for_json_full, build_schema_from_columns_full, + build_schema_from_columns_unchecked, build_schema_from_columns_unchecked_full, + parse_column_type, parse_column_type_full, ColumnTypeError, ParsedColumnType, ParsedSchema, + SchemaError, }; pub use validation::*; diff --git a/src/datasets/schema.rs b/src/datasets/schema.rs index 6eb4e48..1313d7e 100644 --- a/src/datasets/schema.rs +++ b/src/datasets/schema.rs @@ -1,5 +1,6 @@ //! Schema building from explicit column definitions. +use crate::datafetch::GeometryColumnInfo; use crate::http::models::ColumnDefinition; use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit}; use std::collections::HashMap; @@ -62,6 +63,24 @@ impl std::error::Error for SchemaError { } } +/// Result of parsing a column type, including optional geometry metadata. +#[derive(Debug, Clone)] +pub struct ParsedColumnType { + /// The Arrow data type for this column + pub data_type: DataType, + /// Optional geometry metadata for GEOMETRY/GEOGRAPHY columns + pub geometry_info: Option, +} + +/// Result of building a schema, including the Arrow schema and geometry column metadata. +#[derive(Debug)] +pub struct ParsedSchema { + /// The Arrow schema + pub schema: Arc, + /// Geometry column metadata for GeoParquet output (column name -> info) + pub geometry_columns: HashMap, +} + /// Supported type names for error messages. const SUPPORTED_TYPES: &[&str] = &[ "VARCHAR", @@ -95,28 +114,50 @@ const SUPPORTED_TYPES: &[&str] = &[ "BYTEA", "UUID", "JSON", + "GEOMETRY", + "GEOGRAPHY", ]; /// Parse a column definition into an Arrow DataType. +/// +/// For backwards compatibility, this returns just the DataType. +/// Use `parse_column_type_full` to get geometry metadata as well. pub fn parse_column_type( column_name: &str, definition: &ColumnDefinition, ) -> Result { + parse_column_type_full(column_name, definition).map(|parsed| parsed.data_type) +} + +/// Parse a column definition into a full ParsedColumnType with optional geometry metadata. +pub fn parse_column_type_full( + column_name: &str, + definition: &ColumnDefinition, +) -> Result { match definition { - ColumnDefinition::Simple(type_str) => parse_type_string(column_name, type_str, None, None), - ColumnDefinition::Detailed(spec) => { - parse_type_string(column_name, &spec.data_type, spec.precision, spec.scale) + ColumnDefinition::Simple(type_str) => { + parse_type_string_full(column_name, type_str, None, None, None, None) } + ColumnDefinition::Detailed(spec) => parse_type_string_full( + column_name, + &spec.data_type, + spec.precision, + spec.scale, + spec.srid, + spec.geometry_type.as_deref(), + ), } } -/// Parse a type string into an Arrow DataType. -fn parse_type_string( +/// Parse a type string into a full ParsedColumnType with optional geometry metadata. +fn parse_type_string_full( column_name: &str, type_str: &str, precision: Option, scale: Option, -) -> Result { + srid: Option, + geometry_type: Option<&str>, +) -> Result { let type_upper = type_str.to_uppercase(); // Check for mismatched parentheses early (before extracting base type) @@ -132,28 +173,28 @@ fn parse_type_string( // Handle parameterized types like DECIMAL(10,2) by extracting base type let base_type = type_upper.split('(').next().unwrap_or(&type_upper).trim(); - let data_type = match base_type { + let (data_type, geometry_info) = match base_type { // String types - "VARCHAR" | "TEXT" | "STRING" | "CHAR" | "BPCHAR" => DataType::Utf8, + "VARCHAR" | "TEXT" | "STRING" | "CHAR" | "BPCHAR" => (DataType::Utf8, None), // Boolean - "BOOLEAN" | "BOOL" => DataType::Boolean, + "BOOLEAN" | "BOOL" => (DataType::Boolean, None), // Signed integers - "TINYINT" | "INT1" => DataType::Int8, - "SMALLINT" | "INT2" => DataType::Int16, - "INTEGER" | "INT" | "INT4" => DataType::Int32, - "BIGINT" | "INT8" => DataType::Int64, + "TINYINT" | "INT1" => (DataType::Int8, None), + "SMALLINT" | "INT2" => (DataType::Int16, None), + "INTEGER" | "INT" | "INT4" => (DataType::Int32, None), + "BIGINT" | "INT8" => (DataType::Int64, None), // Unsigned integers - "UTINYINT" => DataType::UInt8, - "USMALLINT" => DataType::UInt16, - "UINTEGER" | "UINT" => DataType::UInt32, - "UBIGINT" => DataType::UInt64, + "UTINYINT" => (DataType::UInt8, None), + "USMALLINT" => (DataType::UInt16, None), + "UINTEGER" | "UINT" => (DataType::UInt32, None), + "UBIGINT" => (DataType::UInt64, None), // Floating point - "REAL" | "FLOAT4" | "FLOAT" => DataType::Float32, - "DOUBLE" | "FLOAT8" => DataType::Float64, + "REAL" | "FLOAT4" | "FLOAT" => (DataType::Float32, None), + "DOUBLE" | "FLOAT8" => (DataType::Float64, None), // Decimal - use explicit precision/scale if provided, else parse from string or default "DECIMAL" | "NUMERIC" => { @@ -175,23 +216,30 @@ fn parse_type_string( }; // Validate precision and scale validate_decimal_params(column_name, p, s)?; - DataType::Decimal128(p, s) + (DataType::Decimal128(p, s), None) } // Date/Time - "DATE" => DataType::Date32, - "TIME" => DataType::Time64(TimeUnit::Microsecond), - "TIMESTAMP" | "DATETIME" => DataType::Timestamp(TimeUnit::Microsecond, None), - "TIMESTAMPTZ" | "TIMESTAMP WITH TIME ZONE" => { - DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".into())) - } + "DATE" => (DataType::Date32, None), + "TIME" => (DataType::Time64(TimeUnit::Microsecond), None), + "TIMESTAMP" | "DATETIME" => (DataType::Timestamp(TimeUnit::Microsecond, None), None), + "TIMESTAMPTZ" | "TIMESTAMP WITH TIME ZONE" => ( + DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".into())), + None, + ), // Binary - "BLOB" | "BYTEA" | "BINARY" | "VARBINARY" => DataType::Binary, + "BLOB" | "BYTEA" | "BINARY" | "VARBINARY" => (DataType::Binary, None), // Special string-backed types - "UUID" => DataType::Utf8, - "JSON" => DataType::Utf8, + "UUID" => (DataType::Utf8, None), + "JSON" => (DataType::Utf8, None), + + // Geometry types - stored as WKB binary with GeoParquet metadata + "GEOMETRY" | "GEOGRAPHY" => { + let geo_info = parse_geometry_info(column_name, &type_upper, srid, geometry_type)?; + (DataType::Binary, Some(geo_info)) + } _ => { return Err(ColumnTypeError { @@ -205,7 +253,102 @@ fn parse_type_string( } }; - Ok(data_type) + Ok(ParsedColumnType { + data_type, + geometry_info, + }) +} + +/// Parse geometry type information from type string and/or explicit parameters. +/// +/// Supports formats: +/// - `GEOMETRY` - untyped geometry with default SRID 4326 +/// - `GEOMETRY(Point)` - typed geometry with default SRID 4326 +/// - `GEOMETRY(Point, 4326)` - typed geometry with explicit SRID +/// - Explicit srid/geometry_type parameters override parsed values +fn parse_geometry_info( + column_name: &str, + type_str: &str, + explicit_srid: Option, + explicit_geometry_type: Option<&str>, +) -> Result { + // Try to parse from string like GEOMETRY(Point, 4326) + let (parsed_type, parsed_srid) = if let Some(start) = type_str.find('(') { + if let Some(end) = type_str.find(')') { + if end <= start { + return Err(ColumnTypeError { + column_name: column_name.to_string(), + message: format!( + "Malformed geometry type '{}': invalid parentheses order", + type_str + ), + }); + } + let params = &type_str[start + 1..end]; + let parts: Vec<&str> = params.split(',').map(|s| s.trim()).collect(); + + match parts.len() { + 1 if !parts[0].is_empty() => { + // GEOMETRY(Point) - type only + let geom_type = normalize_geometry_type(parts[0])?; + (Some(geom_type), None) + } + 2 => { + // GEOMETRY(Point, 4326) - type and SRID + let geom_type = normalize_geometry_type(parts[0])?; + let srid = parts[1].parse::().map_err(|_| ColumnTypeError { + column_name: column_name.to_string(), + message: format!("Invalid SRID '{}': must be an integer", parts[1]), + })?; + (Some(geom_type), Some(srid)) + } + _ => { + // Empty parens or too many params + return Err(ColumnTypeError { + column_name: column_name.to_string(), + message: format!( + "Malformed geometry type '{}': expected GEOMETRY, GEOMETRY(type), or GEOMETRY(type, srid)", + type_str + ), + }); + } + } + } else { + (None, None) + } + } else { + (None, None) + }; + + // Explicit parameters override parsed values + let final_geometry_type = explicit_geometry_type + .map(|s| s.to_string()) + .or(parsed_type); + let final_srid = explicit_srid.or(parsed_srid).unwrap_or(4326); // Default to WGS84 + + Ok(GeometryColumnInfo { + srid: final_srid, + geometry_type: final_geometry_type, + }) +} + +/// Normalize geometry type names to standard capitalization. +fn normalize_geometry_type(type_name: &str) -> Result { + let normalized = match type_name.to_uppercase().as_str() { + "POINT" => "Point", + "LINESTRING" => "LineString", + "POLYGON" => "Polygon", + "MULTIPOINT" => "MultiPoint", + "MULTILINESTRING" => "MultiLineString", + "MULTIPOLYGON" => "MultiPolygon", + "GEOMETRYCOLLECTION" => "GeometryCollection", + "GEOMETRY" => "Geometry", + _ => { + // Accept unknown types but warn - could be a database-specific extension + return Ok(type_name.to_string()); + } + }; + Ok(normalized.to_string()) } /// Parse DECIMAL(precision, scale) parameters from type string. @@ -314,6 +457,16 @@ pub fn build_schema_from_columns( columns: &HashMap, data_columns: &[String], ) -> Result, SchemaError> { + build_schema_from_columns_full(columns, data_columns).map(|parsed| parsed.schema) +} + +/// Build an Arrow schema with geometry metadata from explicit column definitions. +/// +/// Returns both the schema and any geometry column metadata for GeoParquet output. +pub fn build_schema_from_columns_full( + columns: &HashMap, + data_columns: &[String], +) -> Result { // Check for columns in data but not defined for data_col in data_columns { if !columns.contains_key(data_col) { @@ -332,17 +485,27 @@ pub fn build_schema_from_columns( } } - // Build fields in data order + // Build fields in data order, collecting geometry metadata let mut fields = Vec::with_capacity(data_columns.len()); + let mut geometry_columns = HashMap::new(); + for col_name in data_columns { let definition = columns.get(col_name).unwrap(); // Safe: validated above - let data_type = - parse_column_type(col_name, definition).map_err(SchemaError::InvalidType)?; + let parsed = + parse_column_type_full(col_name, definition).map_err(SchemaError::InvalidType)?; + + if let Some(geo_info) = parsed.geometry_info { + geometry_columns.insert(col_name.clone(), geo_info); + } + // Default to nullable=true for flexibility - fields.push(Field::new(col_name, data_type, true)); + fields.push(Field::new(col_name, parsed.data_type, true)); } - Ok(Arc::new(Schema::new(fields))) + Ok(ParsedSchema { + schema: Arc::new(Schema::new(fields)), + geometry_columns, + }) } /// Build an Arrow schema directly from explicit column definitions, validated against observed fields. @@ -359,6 +522,16 @@ pub fn build_schema_from_columns_for_json( columns: &HashMap, observed_fields: &[String], ) -> Result, SchemaError> { + build_schema_from_columns_for_json_full(columns, observed_fields).map(|parsed| parsed.schema) +} + +/// Build an Arrow schema with geometry metadata from explicit column definitions for JSON. +/// +/// Returns both the schema and any geometry column metadata for GeoParquet output. +pub fn build_schema_from_columns_for_json_full( + columns: &HashMap, + observed_fields: &[String], +) -> Result { // Check for columns defined but never observed in the data // This catches typos like "scroe" instead of "score" for defined_col in columns.keys() { @@ -383,15 +556,25 @@ pub fn build_schema_from_columns_for_json( col_names.sort(); let mut fields = Vec::with_capacity(columns.len()); + let mut geometry_columns = HashMap::new(); + for col_name in col_names { let definition = columns.get(col_name).unwrap(); - let data_type = - parse_column_type(col_name, definition).map_err(SchemaError::InvalidType)?; + let parsed = + parse_column_type_full(col_name, definition).map_err(SchemaError::InvalidType)?; + + if let Some(geo_info) = parsed.geometry_info { + geometry_columns.insert(col_name.clone(), geo_info); + } + // Default to nullable=true since JSON fields can be missing - fields.push(Field::new(col_name.as_str(), data_type, true)); + fields.push(Field::new(col_name.as_str(), parsed.data_type, true)); } - Ok(Arc::new(Schema::new(fields))) + Ok(ParsedSchema { + schema: Arc::new(Schema::new(fields)), + geometry_columns, + }) } /// Build an Arrow schema directly from explicit column definitions without validation. @@ -404,20 +587,39 @@ pub fn build_schema_from_columns_for_json( pub fn build_schema_from_columns_unchecked( columns: &HashMap, ) -> Result, SchemaError> { + build_schema_from_columns_unchecked_full(columns).map(|parsed| parsed.schema) +} + +/// Build an Arrow schema with geometry metadata from column definitions without validation. +/// +/// Returns both the schema and any geometry column metadata for GeoParquet output. +pub fn build_schema_from_columns_unchecked_full( + columns: &HashMap, +) -> Result { // Sort column names for deterministic order let mut col_names: Vec<&String> = columns.keys().collect(); col_names.sort(); let mut fields = Vec::with_capacity(columns.len()); + let mut geometry_columns = HashMap::new(); + for col_name in col_names { let definition = columns.get(col_name).unwrap(); - let data_type = - parse_column_type(col_name, definition).map_err(SchemaError::InvalidType)?; + let parsed = + parse_column_type_full(col_name, definition).map_err(SchemaError::InvalidType)?; + + if let Some(geo_info) = parsed.geometry_info { + geometry_columns.insert(col_name.clone(), geo_info); + } + // Default to nullable=true since JSON fields can be missing - fields.push(Field::new(col_name.as_str(), data_type, true)); + fields.push(Field::new(col_name.as_str(), parsed.data_type, true)); } - Ok(Arc::new(Schema::new(fields))) + Ok(ParsedSchema { + schema: Arc::new(Schema::new(fields)), + geometry_columns, + }) } #[cfg(test)] @@ -481,6 +683,8 @@ mod tests { data_type: "DECIMAL".to_string(), precision: Some(12), scale: Some(4), + srid: None, + geometry_type: None, }), ); assert!(matches!(result, Ok(DataType::Decimal128(12, 4)))); @@ -726,6 +930,8 @@ mod tests { data_type: "DECIMAL".to_string(), precision: Some(12), scale: None, + srid: None, + geometry_type: None, }), ); assert!(matches!(result, Ok(DataType::Decimal128(12, 0)))); @@ -755,6 +961,8 @@ mod tests { data_type: "DECIMAL".to_string(), precision: Some(0), scale: Some(0), + srid: None, + geometry_type: None, }), ); assert!(result.is_err()); @@ -771,6 +979,8 @@ mod tests { data_type: "DECIMAL".to_string(), precision: Some(39), scale: Some(2), + srid: None, + geometry_type: None, }), ); assert!(result.is_err()); @@ -786,6 +996,8 @@ mod tests { data_type: "DECIMAL".to_string(), precision: Some(10), scale: Some(-1), + srid: None, + geometry_type: None, }), ); assert!(result.is_err()); @@ -801,6 +1013,8 @@ mod tests { data_type: "DECIMAL".to_string(), precision: Some(5), scale: Some(10), + srid: None, + geometry_type: None, }), ); assert!(result.is_err()); @@ -850,4 +1064,209 @@ mod tests { let err = result.unwrap_err(); assert!(err.message.contains("Malformed DECIMAL")); } + + // ========================================================================= + // GEOMETRY type parsing tests + // ========================================================================= + + #[test] + fn test_parse_geometry_simple() { + let result = + parse_column_type_full("geom", &ColumnDefinition::Simple("GEOMETRY".to_string())); + let parsed = result.unwrap(); + assert!(matches!(parsed.data_type, DataType::Binary)); + assert!(parsed.geometry_info.is_some()); + let geo = parsed.geometry_info.unwrap(); + assert_eq!(geo.srid, 4326); // Default to WGS84 + assert!(geo.geometry_type.is_none()); // No type specified + } + + #[test] + fn test_parse_geometry_with_type() { + let result = parse_column_type_full( + "geom", + &ColumnDefinition::Simple("GEOMETRY(Point)".to_string()), + ); + let parsed = result.unwrap(); + assert!(matches!(parsed.data_type, DataType::Binary)); + let geo = parsed.geometry_info.unwrap(); + assert_eq!(geo.srid, 4326); // Default SRID + assert_eq!(geo.geometry_type, Some("Point".to_string())); + } + + #[test] + fn test_parse_geometry_with_type_and_srid() { + let result = parse_column_type_full( + "geom", + &ColumnDefinition::Simple("GEOMETRY(Polygon, 3857)".to_string()), + ); + let parsed = result.unwrap(); + assert!(matches!(parsed.data_type, DataType::Binary)); + let geo = parsed.geometry_info.unwrap(); + assert_eq!(geo.srid, 3857); + assert_eq!(geo.geometry_type, Some("Polygon".to_string())); + } + + #[test] + fn test_parse_geography_simple() { + let result = + parse_column_type_full("geog", &ColumnDefinition::Simple("GEOGRAPHY".to_string())); + let parsed = result.unwrap(); + assert!(matches!(parsed.data_type, DataType::Binary)); + let geo = parsed.geometry_info.unwrap(); + assert_eq!(geo.srid, 4326); + } + + #[test] + fn test_parse_geometry_case_insensitive() { + let result = parse_column_type_full( + "geom", + &ColumnDefinition::Simple("geometry(point, 4326)".to_string()), + ); + let parsed = result.unwrap(); + let geo = parsed.geometry_info.unwrap(); + assert_eq!(geo.geometry_type, Some("Point".to_string())); + assert_eq!(geo.srid, 4326); + } + + #[test] + fn test_parse_geometry_detailed_spec() { + let result = parse_column_type_full( + "location", + &ColumnDefinition::Detailed(ColumnTypeSpec { + data_type: "GEOMETRY".to_string(), + precision: None, + scale: None, + srid: Some(4269), + geometry_type: Some("MultiPolygon".to_string()), + }), + ); + let parsed = result.unwrap(); + assert!(matches!(parsed.data_type, DataType::Binary)); + let geo = parsed.geometry_info.unwrap(); + assert_eq!(geo.srid, 4269); + assert_eq!(geo.geometry_type, Some("MultiPolygon".to_string())); + } + + #[test] + fn test_parse_geometry_explicit_overrides_parsed() { + // String says Point,4326 but explicit spec says LineString,3857 + let result = parse_column_type_full( + "geom", + &ColumnDefinition::Detailed(ColumnTypeSpec { + data_type: "GEOMETRY(Point, 4326)".to_string(), + precision: None, + scale: None, + srid: Some(3857), + geometry_type: Some("LineString".to_string()), + }), + ); + let parsed = result.unwrap(); + let geo = parsed.geometry_info.unwrap(); + // Explicit params should win + assert_eq!(geo.srid, 3857); + assert_eq!(geo.geometry_type, Some("LineString".to_string())); + } + + #[test] + fn test_parse_geometry_all_types_normalized() { + let types = [ + ("point", "Point"), + ("LINESTRING", "LineString"), + ("Polygon", "Polygon"), + ("multipoint", "MultiPoint"), + ("MULTILINESTRING", "MultiLineString"), + ("multipolygon", "MultiPolygon"), + ("GeometryCollection", "GeometryCollection"), + ]; + + for (input, expected) in types { + let result = parse_column_type_full( + "geom", + &ColumnDefinition::Simple(format!("GEOMETRY({})", input)), + ); + let parsed = result.unwrap(); + let geo = parsed.geometry_info.unwrap(); + assert_eq!( + geo.geometry_type, + Some(expected.to_string()), + "Failed for input: {}", + input + ); + } + } + + #[test] + fn test_parse_geometry_invalid_srid() { + let result = parse_column_type_full( + "geom", + &ColumnDefinition::Simple("GEOMETRY(Point, abc)".to_string()), + ); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.message.contains("SRID")); + } + + #[test] + fn test_parse_geometry_empty_parens_rejected() { + let result = + parse_column_type_full("geom", &ColumnDefinition::Simple("GEOMETRY()".to_string())); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.message.contains("Malformed geometry")); + } + + #[test] + fn test_build_schema_with_geometry_columns() { + let mut columns = HashMap::new(); + columns.insert( + "id".to_string(), + ColumnDefinition::Simple("INT".to_string()), + ); + columns.insert( + "name".to_string(), + ColumnDefinition::Simple("VARCHAR".to_string()), + ); + columns.insert( + "location".to_string(), + ColumnDefinition::Simple("GEOMETRY(Point, 4326)".to_string()), + ); + + let data_columns = vec!["id".to_string(), "name".to_string(), "location".to_string()]; + let parsed = build_schema_from_columns_full(&columns, &data_columns).unwrap(); + + assert_eq!(parsed.schema.fields().len(), 3); + assert!(matches!( + parsed.schema.field(2).data_type(), + DataType::Binary + )); + assert_eq!(parsed.geometry_columns.len(), 1); + assert!(parsed.geometry_columns.contains_key("location")); + let geo = parsed.geometry_columns.get("location").unwrap(); + assert_eq!(geo.srid, 4326); + assert_eq!(geo.geometry_type, Some("Point".to_string())); + } + + #[test] + fn test_build_schema_multiple_geometry_columns() { + let mut columns = HashMap::new(); + columns.insert( + "point_geom".to_string(), + ColumnDefinition::Simple("GEOMETRY(Point, 4326)".to_string()), + ); + columns.insert( + "boundary".to_string(), + ColumnDefinition::Simple("GEOMETRY(Polygon, 3857)".to_string()), + ); + + let data_columns = vec!["point_geom".to_string(), "boundary".to_string()]; + let parsed = build_schema_from_columns_full(&columns, &data_columns).unwrap(); + + assert_eq!(parsed.geometry_columns.len(), 2); + assert_eq!( + parsed.geometry_columns.get("point_geom").unwrap().srid, + 4326 + ); + assert_eq!(parsed.geometry_columns.get("boundary").unwrap().srid, 3857); + } } diff --git a/src/engine.rs b/src/engine.rs index bec5ff9..6c56fb8 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -1935,7 +1935,7 @@ impl RuntimeEngine { match data_source { DataSource::FilePath(path) => { // Determine schema: use explicit columns if provided, otherwise infer - let schema = if let Some(ref cols) = explicit_columns { + let (schema, geometry_columns) = if let Some(ref cols) = explicit_columns { // Read header to get column names from data let file = release_on_parse_error!(File::open(&path)); let mut reader = BufReader::new(file); @@ -1944,11 +1944,14 @@ impl RuntimeEngine { .infer_schema(&mut reader, Some(1))); let data_columns: Vec = inferred.fields().iter().map(|f| f.name().clone()).collect(); - // Build schema from explicit column definitions - release_on_schema_error!(crate::datasets::build_schema_from_columns( - cols, - &data_columns - )) + // Build schema from explicit column definitions (with geometry metadata) + let parsed = release_on_schema_error!( + crate::datasets::build_schema_from_columns_full( + cols, + &data_columns + ) + ); + (parsed.schema, parsed.geometry_columns) } else { // Infer schema from file (reads first 10000 rows) let file = release_on_parse_error!(File::open(&path)); @@ -1956,7 +1959,7 @@ impl RuntimeEngine { let (schema, _) = release_on_parse_error!(Format::default() .with_header(true) .infer_schema(&mut reader, Some(10_000))); - Arc::new(schema) + (Arc::new(schema), std::collections::HashMap::new()) }; // Reopen file for streaming read @@ -1967,9 +1970,11 @@ impl RuntimeEngine { .with_batch_size(8192) .build(BufReader::new(file))); - // Initialize writer with schema - let mut writer: Box = - Box::new(StreamingParquetWriter::new(handle.local_path.clone())); + // Initialize writer with schema and geometry metadata + let mut writer = StreamingParquetWriter::new(handle.local_path.clone()); + if !geometry_columns.is_empty() { + writer.set_geometry_columns(geometry_columns); + } release_on_storage_error!(writer.init(&schema)); // Stream batches directly to writer @@ -1980,12 +1985,12 @@ impl RuntimeEngine { release_on_storage_error!(writer.write_batch(&batch)); } - release_on_storage_error!(writer.close()); + release_on_storage_error!(Box::new(writer).close()); (schema, row_count) } DataSource::InMemory(data) => { // Determine schema: use explicit columns if provided, otherwise infer - let schema = if let Some(ref cols) = explicit_columns { + let (schema, geometry_columns) = if let Some(ref cols) = explicit_columns { // Read header to get column names from data let mut cursor = std::io::Cursor::new(&data); let (inferred, _) = release_on_parse_error!(Format::default() @@ -1993,18 +1998,21 @@ impl RuntimeEngine { .infer_schema(&mut cursor, Some(1))); let data_columns: Vec = inferred.fields().iter().map(|f| f.name().clone()).collect(); - // Build schema from explicit column definitions - release_on_schema_error!(crate::datasets::build_schema_from_columns( - cols, - &data_columns - )) + // Build schema from explicit column definitions (with geometry metadata) + let parsed = release_on_schema_error!( + crate::datasets::build_schema_from_columns_full( + cols, + &data_columns + ) + ); + (parsed.schema, parsed.geometry_columns) } else { // Infer schema (reads first 10000 rows) let mut cursor = std::io::Cursor::new(&data); let (schema, _) = release_on_parse_error!(Format::default() .with_header(true) .infer_schema(&mut cursor, Some(10_000))); - Arc::new(schema) + (Arc::new(schema), std::collections::HashMap::new()) }; // Reset cursor for reading @@ -2015,8 +2023,10 @@ impl RuntimeEngine { .with_batch_size(8192) .build(cursor)); - let mut writer: Box = - Box::new(StreamingParquetWriter::new(handle.local_path.clone())); + let mut writer = StreamingParquetWriter::new(handle.local_path.clone()); + if !geometry_columns.is_empty() { + writer.set_geometry_columns(geometry_columns); + } release_on_storage_error!(writer.init(&schema)); let mut row_count = 0usize; @@ -2026,7 +2036,7 @@ impl RuntimeEngine { release_on_storage_error!(writer.write_batch(&batch)); } - release_on_storage_error!(writer.close()); + release_on_storage_error!(Box::new(writer).close()); (schema, row_count) } } @@ -2045,21 +2055,22 @@ impl RuntimeEngine { // Determine final schema: use explicit columns validated against observed fields, // or use inferred schema if no explicit columns provided - let schema = if let Some(ref cols) = explicit_columns { + let (schema, geometry_columns) = if let Some(ref cols) = explicit_columns { // Get observed field names from inferred schema let observed_fields: Vec = inferred_schema .fields() .iter() .map(|f| f.name().clone()) .collect(); - release_on_schema_error!( - crate::datasets::build_schema_from_columns_for_json( + let parsed = release_on_schema_error!( + crate::datasets::build_schema_from_columns_for_json_full( cols, &observed_fields ) - ) + ); + (parsed.schema, parsed.geometry_columns) } else { - Arc::new(inferred_schema) + (Arc::new(inferred_schema), std::collections::HashMap::new()) }; // Open file for streaming read @@ -2069,8 +2080,10 @@ impl RuntimeEngine { .with_batch_size(8192) .build(BufReader::new(file))); - let mut writer: Box = - Box::new(StreamingParquetWriter::new(handle.local_path.clone())); + let mut writer = StreamingParquetWriter::new(handle.local_path.clone()); + if !geometry_columns.is_empty() { + writer.set_geometry_columns(geometry_columns); + } release_on_storage_error!(writer.init(&schema)); let mut row_count = 0usize; @@ -2080,7 +2093,7 @@ impl RuntimeEngine { release_on_storage_error!(writer.write_batch(&batch)); } - release_on_storage_error!(writer.close()); + release_on_storage_error!(Box::new(writer).close()); (schema, row_count) } DataSource::InMemory(data) => { @@ -2092,21 +2105,22 @@ impl RuntimeEngine { // Determine final schema: use explicit columns validated against observed fields, // or use inferred schema if no explicit columns provided - let schema = if let Some(ref cols) = explicit_columns { + let (schema, geometry_columns) = if let Some(ref cols) = explicit_columns { // Get observed field names from inferred schema let observed_fields: Vec = inferred_schema .fields() .iter() .map(|f| f.name().clone()) .collect(); - release_on_schema_error!( - crate::datasets::build_schema_from_columns_for_json( + let parsed = release_on_schema_error!( + crate::datasets::build_schema_from_columns_for_json_full( cols, &observed_fields ) - ) + ); + (parsed.schema, parsed.geometry_columns) } else { - Arc::new(inferred_schema) + (Arc::new(inferred_schema), std::collections::HashMap::new()) }; // Open cursor for reading @@ -2116,8 +2130,10 @@ impl RuntimeEngine { .with_batch_size(8192) .build(cursor)); - let mut writer: Box = - Box::new(StreamingParquetWriter::new(handle.local_path.clone())); + let mut writer = StreamingParquetWriter::new(handle.local_path.clone()); + if !geometry_columns.is_empty() { + writer.set_geometry_columns(geometry_columns); + } release_on_storage_error!(writer.init(&schema)); let mut row_count = 0usize; @@ -2127,7 +2143,7 @@ impl RuntimeEngine { release_on_storage_error!(writer.write_batch(&batch)); } - release_on_storage_error!(writer.close()); + release_on_storage_error!(Box::new(writer).close()); (schema, row_count) } } diff --git a/src/http/models.rs b/src/http/models.rs index 2d20c43..e928127 100644 --- a/src/http/models.rs +++ b/src/http/models.rs @@ -434,7 +434,7 @@ pub enum ColumnDefinition { /// Detailed column type specification with optional properties. #[derive(Debug, Clone, Deserialize)] pub struct ColumnTypeSpec { - /// The data type name (e.g., "DECIMAL", "TIMESTAMP") + /// The data type name (e.g., "DECIMAL", "TIMESTAMP", "GEOMETRY") #[serde(rename = "type")] pub data_type: String, /// Precision for DECIMAL type (1-38) @@ -443,7 +443,15 @@ pub struct ColumnTypeSpec { /// Scale for DECIMAL type #[serde(default)] pub scale: Option, - // Future: format, srid, geometry_type, timezone, etc. + /// Spatial Reference System Identifier for GEOMETRY/GEOGRAPHY types. + /// Common values: 4326 (WGS84), 3857 (Web Mercator). + #[serde(default)] + pub srid: Option, + /// Geometry type for GEOMETRY/GEOGRAPHY columns. + /// E.g., "Point", "LineString", "Polygon", "MultiPoint", "MultiLineString", + /// "MultiPolygon", "GeometryCollection", or "Geometry" (any). + #[serde(default)] + pub geometry_type: Option, } /// Request body for POST /v1/datasets diff --git a/tests/dataset_e2e_tests.rs b/tests/dataset_e2e_tests.rs index 7c54f1a..1867024 100644 --- a/tests/dataset_e2e_tests.rs +++ b/tests/dataset_e2e_tests.rs @@ -1519,6 +1519,8 @@ async fn test_explicit_columns_csv_decimal() { data_type: "DECIMAL".to_string(), precision: Some(10), scale: Some(2), + srid: None, + geometry_type: None, }), ); From afc77845feaed87173a63bde543913e936bf762d Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Wed, 28 Jan 2026 12:23:34 -0800 Subject: [PATCH 05/27] feat(datasets): add hex-decode for geometry columns in CSV/JSON --- Cargo.lock | 1 + Cargo.toml | 1 + src/engine.rs | 158 ++++++++++++++++++++++++++++++++----- tests/dataset_e2e_tests.rs | 100 +++++++++++++++++++++++ 4 files changed, 242 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bfb618c..12b64a9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6706,6 +6706,7 @@ dependencies = [ "gcp-bigquery-client", "geodatafusion", "geozero", + "hex", "http 1.4.0", "iceberg", "iceberg-catalog-glue", diff --git a/Cargo.toml b/Cargo.toml index 85f1490..3a99c1c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ datafusion-tracing = "51.0.0" instrumented-object-store = "52.0.0" geodatafusion = "0.2" geozero = { version = "0.15", features = ["with-wkb"] } +hex = "0.4" duckdb = { version = "1.4.4", features = ["bundled"] } sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "sqlite", "mysql", "chrono", "tls-rustls", "bigdecimal"] } bigdecimal = "0.4" diff --git a/src/engine.rs b/src/engine.rs index 6c56fb8..4b5ae06 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -4,7 +4,7 @@ use crate::catalog::{ SavedQuery, SavedQueryVersion, SqliteCatalogManager, TableInfo, }; use crate::datafetch::native::StreamingParquetWriter; -use crate::datafetch::{BatchWriter, FetchOrchestrator, NativeFetcher}; +use crate::datafetch::{BatchWriter, FetchOrchestrator, GeometryColumnInfo, NativeFetcher}; use crate::datafusion::UnifiedCatalogList; use crate::http::models::{ ConnectionRefreshResult, ConnectionSchemaError, RefreshWarning, SchemaRefreshResult, @@ -175,6 +175,94 @@ pub struct RuntimeEngine { stale_result_cleanup_handle: Mutex>>, } +/// Build a reader-compatible schema where geometry (Binary) columns are replaced with Utf8. +/// +/// Arrow's CSV/JSON readers don't support `DataType::Binary` directly, so geometry +/// columns must be read as strings first, then hex-decoded to binary afterwards. +fn build_reader_schema( + schema: &Schema, + geometry_columns: &HashMap, +) -> Arc { + use datafusion::arrow::datatypes::{DataType, Field}; + + if geometry_columns.is_empty() { + return Arc::new(schema.clone()); + } + + let fields: Vec = schema + .fields() + .iter() + .map(|f| { + if geometry_columns.contains_key(f.name()) { + Field::new(f.name(), DataType::Utf8, f.is_nullable()) + } else { + f.as_ref().clone() + } + }) + .collect(); + + Arc::new(Schema::new_with_metadata(fields, schema.metadata().clone())) +} + +/// Hex-decode geometry columns in a RecordBatch. +/// +/// When CSV/JSON data contains geometry values as hex-encoded WKB strings, +/// the reader stores them as Utf8 strings. This function converts those hex +/// strings into actual WKB binary bytes that spatial functions can process, +/// and rebuilds the batch with the target schema (Binary columns). +fn hex_decode_geometry_columns( + batch: &RecordBatch, + target_schema: &Arc, + geometry_columns: &HashMap, +) -> Result { + use datafusion::arrow::array::{BinaryArray, StringArray}; + + if geometry_columns.is_empty() { + return Ok(batch.clone()); + } + + let mut columns: Vec> = + Vec::with_capacity(batch.num_columns()); + + for (i, field) in batch.schema().fields().iter().enumerate() { + if geometry_columns.contains_key(field.name()) { + // This column is a geometry column read as Utf8 - hex-decode to Binary + let col = batch.column(i); + let string_array = col.as_any().downcast_ref::().ok_or_else(|| { + anyhow::anyhow!( + "Geometry column '{}' expected Utf8 type, got {:?}", + field.name(), + col.data_type() + ) + })?; + + let decoded: Vec>> = string_array + .iter() + .map(|opt_val| { + opt_val + .map(|hex_str| { + hex::decode(hex_str).map_err(|e| { + anyhow::anyhow!( + "Geometry column '{}' contains invalid hex: {}", + field.name(), + e + ) + }) + }) + .transpose() + }) + .collect::>>()?; + + let decoded_refs: Vec> = decoded.iter().map(|v| v.as_deref()).collect(); + columns.push(Arc::new(BinaryArray::from(decoded_refs))); + } else { + columns.push(batch.column(i).clone()); + } + } + + Ok(RecordBatch::try_new(target_schema.clone(), columns)?) +} + impl RuntimeEngine { // ========================================================================= // Constructors @@ -1962,18 +2050,20 @@ impl RuntimeEngine { (Arc::new(schema), std::collections::HashMap::new()) }; + // Build reader schema (geometry columns as Utf8 for CSV parsing) + let reader_schema = build_reader_schema(&schema, &geometry_columns); + // Reopen file for streaming read let file = release_on_parse_error!(File::open(&path)); - let csv_reader = - release_on_parse_error!(ReaderBuilder::new(schema.clone()) - .with_header(true) - .with_batch_size(8192) - .build(BufReader::new(file))); + let csv_reader = release_on_parse_error!(ReaderBuilder::new(reader_schema) + .with_header(true) + .with_batch_size(8192) + .build(BufReader::new(file))); - // Initialize writer with schema and geometry metadata + // Initialize writer with target schema and geometry metadata let mut writer = StreamingParquetWriter::new(handle.local_path.clone()); if !geometry_columns.is_empty() { - writer.set_geometry_columns(geometry_columns); + writer.set_geometry_columns(geometry_columns.clone()); } release_on_storage_error!(writer.init(&schema)); @@ -1981,6 +2071,12 @@ impl RuntimeEngine { let mut row_count = 0usize; for batch_result in csv_reader { let batch = release_on_parse_error!(batch_result); + // Hex-decode geometry columns from CSV text to WKB binary + let batch = release_on_parse_error!(hex_decode_geometry_columns( + &batch, + &schema, + &geometry_columns + )); row_count += batch.num_rows(); release_on_storage_error!(writer.write_batch(&batch)); } @@ -2015,23 +2111,31 @@ impl RuntimeEngine { (Arc::new(schema), std::collections::HashMap::new()) }; + // Build reader schema (geometry columns as Utf8 for CSV parsing) + let reader_schema = build_reader_schema(&schema, &geometry_columns); + // Reset cursor for reading let cursor = std::io::Cursor::new(&data); - let csv_reader = - release_on_parse_error!(ReaderBuilder::new(schema.clone()) - .with_header(true) - .with_batch_size(8192) - .build(cursor)); + let csv_reader = release_on_parse_error!(ReaderBuilder::new(reader_schema) + .with_header(true) + .with_batch_size(8192) + .build(cursor)); let mut writer = StreamingParquetWriter::new(handle.local_path.clone()); if !geometry_columns.is_empty() { - writer.set_geometry_columns(geometry_columns); + writer.set_geometry_columns(geometry_columns.clone()); } release_on_storage_error!(writer.init(&schema)); let mut row_count = 0usize; for batch_result in csv_reader { let batch = release_on_parse_error!(batch_result); + // Hex-decode geometry columns from CSV text to WKB binary + let batch = release_on_parse_error!(hex_decode_geometry_columns( + &batch, + &schema, + &geometry_columns + )); row_count += batch.num_rows(); release_on_storage_error!(writer.write_batch(&batch)); } @@ -2073,22 +2177,31 @@ impl RuntimeEngine { (Arc::new(inferred_schema), std::collections::HashMap::new()) }; + // Build reader schema (geometry columns as Utf8 for JSON parsing) + let reader_schema = build_reader_schema(&schema, &geometry_columns); + // Open file for streaming read let file = release_on_parse_error!(File::open(&path)); let json_reader = - release_on_parse_error!(arrow_json::ReaderBuilder::new(schema.clone()) + release_on_parse_error!(arrow_json::ReaderBuilder::new(reader_schema) .with_batch_size(8192) .build(BufReader::new(file))); let mut writer = StreamingParquetWriter::new(handle.local_path.clone()); if !geometry_columns.is_empty() { - writer.set_geometry_columns(geometry_columns); + writer.set_geometry_columns(geometry_columns.clone()); } release_on_storage_error!(writer.init(&schema)); let mut row_count = 0usize; for batch_result in json_reader { let batch = release_on_parse_error!(batch_result); + // Hex-decode geometry columns from JSON text to WKB binary + let batch = release_on_parse_error!(hex_decode_geometry_columns( + &batch, + &schema, + &geometry_columns + )); row_count += batch.num_rows(); release_on_storage_error!(writer.write_batch(&batch)); } @@ -2123,22 +2236,31 @@ impl RuntimeEngine { (Arc::new(inferred_schema), std::collections::HashMap::new()) }; + // Build reader schema (geometry columns as Utf8 for JSON parsing) + let reader_schema = build_reader_schema(&schema, &geometry_columns); + // Open cursor for reading let cursor = std::io::Cursor::new(&data); let json_reader = - release_on_parse_error!(arrow_json::ReaderBuilder::new(schema.clone()) + release_on_parse_error!(arrow_json::ReaderBuilder::new(reader_schema) .with_batch_size(8192) .build(cursor)); let mut writer = StreamingParquetWriter::new(handle.local_path.clone()); if !geometry_columns.is_empty() { - writer.set_geometry_columns(geometry_columns); + writer.set_geometry_columns(geometry_columns.clone()); } release_on_storage_error!(writer.init(&schema)); let mut row_count = 0usize; for batch_result in json_reader { let batch = release_on_parse_error!(batch_result); + // Hex-decode geometry columns from JSON text to WKB binary + let batch = release_on_parse_error!(hex_decode_geometry_columns( + &batch, + &schema, + &geometry_columns + )); row_count += batch.num_rows(); release_on_storage_error!(writer.write_batch(&batch)); } diff --git a/tests/dataset_e2e_tests.rs b/tests/dataset_e2e_tests.rs index 1867024..d5dfcb0 100644 --- a/tests/dataset_e2e_tests.rs +++ b/tests/dataset_e2e_tests.rs @@ -2040,3 +2040,103 @@ async fn test_explicit_columns_json_missing_definition_rejected() { err_msg ); } + +/// Test that CSV datasets with GEOMETRY columns properly hex-decode WKB data. +/// +/// When geometry data is provided as hex-encoded WKB strings in CSV (which is +/// the standard PostGIS text representation), the engine must decode the hex +/// to actual binary WKB bytes. Without this, spatial functions fail with +/// "WKT error: Unable to parse input number as the desired output type". +#[tokio::test(flavor = "multi_thread")] +async fn test_geometry_csv_hex_decode() { + let (engine, _temp) = create_test_engine().await; + + // WKB hex for POINT(1.0 2.0) in little-endian ISO WKB format: + // 01 = little-endian + // 01000000 = WKB type Point + // 000000000000F03F = 1.0 as f64 LE + // 0000000000000040 = 2.0 as f64 LE + let point_1_wkb_hex = "0101000000000000000000F03F0000000000000040"; + // WKB hex for POINT(3.0 4.0) + let point_2_wkb_hex = "010100000000000000000008400000000000001040"; + + let csv_content = format!( + "name,geom\nAlpha,{}\nBeta,{}", + point_1_wkb_hex, point_2_wkb_hex + ); + + let mut columns = HashMap::new(); + columns.insert( + "name".to_string(), + ColumnDefinition::Simple("VARCHAR".to_string()), + ); + columns.insert( + "geom".to_string(), + ColumnDefinition::Simple("GEOMETRY".to_string()), + ); + + let dataset = engine + .create_dataset( + "Geo CSV Test", + Some("geo_csv_test"), + DatasetSource::Inline { + inline: InlineData { + format: "csv".to_string(), + content: csv_content, + columns: Some(columns), + }, + }, + ) + .await + .unwrap(); + + assert_eq!(dataset.table_name, "geo_csv_test"); + + // Verify spatial functions can parse the geometry data. + // st_geomfromwkb converts WKB binary to native geometry, st_x/st_y extract coords. + // If hex decoding was NOT done, st_geomfromwkb would fail with: + // "WKT error: Unable to parse input number as the desired output type" + // because it would receive the hex characters as bytes instead of actual WKB. + let result = engine + .execute_query(&format!( + "SELECT name, \ + st_x(st_geomfromwkb(geom)) AS x, \ + st_y(st_geomfromwkb(geom)) AS y \ + FROM datasets.{}.geo_csv_test ORDER BY name", + DEFAULT_SCHEMA + )) + .await; + + assert!( + result.is_ok(), + "Spatial query on CSV geometry should succeed: {:?}", + result.err() + ); + + let response = result.unwrap(); + let batch = &response.results[0]; + assert_eq!(batch.num_rows(), 2); + + let name_col = batch.column_by_name("name").unwrap(); + assert_eq!(get_string_value(name_col, 0), "Alpha"); + assert_eq!(get_string_value(name_col, 1), "Beta"); + + let x_col = batch.column_by_name("x").unwrap(); + let y_col = batch.column_by_name("y").unwrap(); + assert!( + (get_f64_value(x_col, 0) - 1.0).abs() < 1e-10, + "Alpha x should be 1.0" + ); + assert!( + (get_f64_value(y_col, 0) - 2.0).abs() < 1e-10, + "Alpha y should be 2.0" + ); + assert!( + (get_f64_value(x_col, 1) - 3.0).abs() < 1e-10, + "Beta x should be 3.0" + ); + assert!( + (get_f64_value(y_col, 1) - 4.0).abs() < 1e-10, + "Beta y should be 4.0" + ); +} From 1d2e42f17506d9d9b1dd6f2d8214c0409784dca1 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Wed, 28 Jan 2026 12:24:27 -0800 Subject: [PATCH 06/27] docs(datafetch): fix BatchWriter lifecycle step ordering --- src/datafetch/batch_writer.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/datafetch/batch_writer.rs b/src/datafetch/batch_writer.rs index 1d9a932..69d29a1 100644 --- a/src/datafetch/batch_writer.rs +++ b/src/datafetch/batch_writer.rs @@ -17,8 +17,8 @@ pub struct BatchWriteResult { /// A trait for writing Arrow RecordBatches to storage. /// /// Implementors must follow this lifecycle: -/// 1. `init(schema)` - Initialize with the Arrow schema (must be called first) -/// 2. Optionally call `set_geometry_columns()` to enable GeoParquet metadata +/// 1. Optionally call `set_geometry_columns()` to enable GeoParquet metadata +/// 2. `init(schema)` - Initialize with the Arrow schema /// 3. `write_batch(batch)` - Write batches (can be called zero or more times) /// 4. `close()` - Finalize and return metadata (consumes the writer) /// From 1472836679a05dea1d5c038ae531f2402993928e Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Wed, 28 Jan 2026 12:32:20 -0800 Subject: [PATCH 07/27] feat(duckdb): add ST_AsBinary() wrapping for spatial columns --- src/datafetch/native/duckdb.rs | 228 ++++++++++++++++++++++++++++++++- 1 file changed, 221 insertions(+), 7 deletions(-) diff --git a/src/datafetch/native/duckdb.rs b/src/datafetch/native/duckdb.rs index 85739dd..6d0d9f7 100644 --- a/src/datafetch/native/duckdb.rs +++ b/src/datafetch/native/duckdb.rs @@ -8,6 +8,7 @@ use std::collections::HashMap; use urlencoding::encode; use crate::datafetch::batch_writer::BatchWriter; +use crate::datafetch::types::GeometryColumnInfo; use crate::datafetch::{ColumnMetadata, DataFetchError, TableMetadata}; use crate::secrets::SecretManager; use crate::source::Source; @@ -123,8 +124,10 @@ fn discover_tables_sync( let (catalog, schema, table, table_type, col_name, data_type, is_nullable, ordinal) = row_result.map_err(|e| DataFetchError::Discovery(e.to_string()))?; + let is_spatial = is_spatial_type(&data_type); + let column = ColumnMetadata { - name: col_name, + name: col_name.clone(), data_type: duckdb_type_to_arrow(&data_type), nullable: is_nullable.eq_ignore_ascii_case("YES"), ordinal_position: ordinal, @@ -132,7 +135,7 @@ fn discover_tables_sync( let key = (catalog.clone(), schema.clone(), table.clone()); - table_map + let table_meta = table_map .entry(key) .and_modify(|t| t.columns.push(column.clone())) .or_insert_with(|| TableMetadata { @@ -143,6 +146,11 @@ fn discover_tables_sync( columns: vec![column], geometry_columns: std::collections::HashMap::new(), }); + + if is_spatial { + let geo_info = parse_duckdb_geometry_info(&data_type); + table_meta.geometry_columns.insert(col_name, geo_info); + } } let tables: Vec = table_map.into_values().collect(); @@ -228,11 +236,8 @@ fn fetch_table_to_channel( let conn = Connection::open(connection_string) .map_err(|e| DataFetchError::Connection(e.to_string()))?; - let query = format!( - "SELECT * FROM \"{}\".\"{}\"", - schema.replace('"', "\"\""), - table.replace('"', "\"\"") - ); + // Query column types to detect spatial columns for ST_AsBinary wrapping + let query = build_fetch_query(&conn, schema, table)?; let mut stmt = conn .prepare(&query) @@ -266,6 +271,89 @@ fn fetch_table_to_channel( Ok(()) } +/// Build a SELECT query for fetching table data, wrapping spatial columns with ST_AsBinary(). +/// +/// DuckDB's Spatial extension stores geometry in an internal format. ST_AsBinary() converts +/// it to standard WKB, matching the approach used by the PostgreSQL and MySQL adapters. +fn build_fetch_query( + conn: &Connection, + schema: &str, + table: &str, +) -> Result { + let escaped_schema = schema.replace('"', "\"\""); + let escaped_table = table.replace('"', "\"\""); + + // Query column types to detect spatial columns + let col_query = format!( + "SELECT column_name, data_type FROM information_schema.columns \ + WHERE table_schema = '{}' AND table_name = '{}' ORDER BY ordinal_position", + schema.replace('\'', "''"), + table.replace('\'', "''") + ); + + let mut stmt = conn + .prepare(&col_query) + .map_err(|e| DataFetchError::Discovery(e.to_string()))?; + + let col_rows = stmt + .query_map([], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)) + }) + .map_err(|e| DataFetchError::Discovery(e.to_string()))?; + + let mut has_spatial = false; + let mut column_exprs = Vec::new(); + + for row_result in col_rows { + let (col_name, data_type) = + row_result.map_err(|e| DataFetchError::Discovery(e.to_string()))?; + let escaped_col = format!("\"{}\"", col_name.replace('"', "\"\"")); + + if is_spatial_type(&data_type) { + has_spatial = true; + column_exprs.push(format!("ST_AsBinary({}) AS {}", escaped_col, escaped_col)); + } else { + column_exprs.push(escaped_col); + } + } + + if has_spatial { + Ok(format!( + "SELECT {} FROM \"{}\".\"{}\"", + column_exprs.join(", "), + escaped_schema, + escaped_table + )) + } else { + Ok(format!( + "SELECT * FROM \"{}\".\"{}\"", + escaped_schema, escaped_table + )) + } +} + +/// Parse DuckDB spatial type into GeometryColumnInfo. +/// +/// DuckDB spatial types (GEOMETRY, POINT_2D, etc.) don't carry SRID information +/// in the type system, so we default to SRID 0 (unspecified) and map the type name. +fn parse_duckdb_geometry_info(data_type: &str) -> GeometryColumnInfo { + let type_upper = data_type.to_uppercase(); + let base_type = type_upper.split('(').next().unwrap_or(&type_upper).trim(); + + let geometry_type = match base_type { + "POINT_2D" => Some("Point".to_string()), + "LINESTRING_2D" => Some("LineString".to_string()), + "POLYGON_2D" => Some("Polygon".to_string()), + "BOX_2D" => Some("Polygon".to_string()), // Box is a rectangular polygon + _ => None, // GEOMETRY, WKB_BLOB: unknown sub-type + }; + + GeometryColumnInfo { + srid: 0, // DuckDB spatial doesn't expose SRID in column type + geometry_type, + } +} + /// Convert an arrow 56 Schema (from duckdb) to datafusion arrow Schema (arrow 57) /// using IPC serialization as a bridge between arrow versions. fn convert_arrow_schema( @@ -676,4 +764,130 @@ mod tests { other => panic!("Expected Decimal128, got {:?}", other), } } + + // ========================================================================= + // Spatial types + // ========================================================================= + + #[test] + fn test_duckdb_spatial_types_map_to_binary() { + assert!(matches!(duckdb_type_to_arrow("GEOMETRY"), DataType::Binary)); + assert!(matches!(duckdb_type_to_arrow("POINT_2D"), DataType::Binary)); + assert!(matches!( + duckdb_type_to_arrow("LINESTRING_2D"), + DataType::Binary + )); + assert!(matches!( + duckdb_type_to_arrow("POLYGON_2D"), + DataType::Binary + )); + assert!(matches!(duckdb_type_to_arrow("BOX_2D"), DataType::Binary)); + assert!(matches!(duckdb_type_to_arrow("WKB_BLOB"), DataType::Binary)); + } + + #[test] + fn test_is_spatial_type() { + assert!(is_spatial_type("GEOMETRY")); + assert!(is_spatial_type("POINT_2D")); + assert!(is_spatial_type("LINESTRING_2D")); + assert!(is_spatial_type("POLYGON_2D")); + assert!(is_spatial_type("BOX_2D")); + assert!(is_spatial_type("WKB_BLOB")); + // Case insensitive + assert!(is_spatial_type("geometry")); + assert!(is_spatial_type("point_2d")); + + // Non-spatial types + assert!(!is_spatial_type("INTEGER")); + assert!(!is_spatial_type("VARCHAR")); + assert!(!is_spatial_type("BLOB")); + assert!(!is_spatial_type("BINARY")); + } + + #[test] + fn test_parse_duckdb_geometry_info_generic() { + let info = parse_duckdb_geometry_info("GEOMETRY"); + assert_eq!(info.srid, 0); // DuckDB doesn't expose SRID + assert!(info.geometry_type.is_none()); + } + + #[test] + fn test_parse_duckdb_geometry_info_typed() { + let info = parse_duckdb_geometry_info("POINT_2D"); + assert_eq!(info.srid, 0); + assert_eq!(info.geometry_type, Some("Point".to_string())); + + let info = parse_duckdb_geometry_info("LINESTRING_2D"); + assert_eq!(info.geometry_type, Some("LineString".to_string())); + + let info = parse_duckdb_geometry_info("POLYGON_2D"); + assert_eq!(info.geometry_type, Some("Polygon".to_string())); + + let info = parse_duckdb_geometry_info("BOX_2D"); + assert_eq!(info.geometry_type, Some("Polygon".to_string())); + } + + #[test] + fn test_parse_duckdb_geometry_info_case_insensitive() { + let info = parse_duckdb_geometry_info("point_2d"); + assert_eq!(info.geometry_type, Some("Point".to_string())); + } + + #[test] + fn test_parse_duckdb_geometry_info_wkb_blob() { + let info = parse_duckdb_geometry_info("WKB_BLOB"); + assert_eq!(info.srid, 0); + assert!(info.geometry_type.is_none()); // WKB_BLOB is opaque + } + + #[test] + fn test_build_fetch_query_no_spatial() { + let conn = Connection::open_in_memory().unwrap(); + conn.execute_batch( + "CREATE SCHEMA test_schema; \ + CREATE TABLE test_schema.test_table (id INTEGER, name VARCHAR);", + ) + .unwrap(); + + let query = build_fetch_query(&conn, "test_schema", "test_table").unwrap(); + // No spatial columns, should be a plain SELECT * + assert_eq!(query, "SELECT * FROM \"test_schema\".\"test_table\""); + } + + #[test] + fn test_build_fetch_query_with_spatial() { + let conn = Connection::open_in_memory().unwrap(); + // Load spatial extension and create table with geometry column + let has_spatial = conn.execute_batch("INSTALL spatial; LOAD spatial;").is_ok(); + + if !has_spatial { + // Skip test if spatial extension not available + return; + } + + conn.execute_batch( + "CREATE SCHEMA geo_schema; \ + CREATE TABLE geo_schema.geo_table (id INTEGER, geom GEOMETRY, name VARCHAR);", + ) + .unwrap(); + + let query = build_fetch_query(&conn, "geo_schema", "geo_table").unwrap(); + // Spatial column should be wrapped with ST_AsBinary + assert!( + query.contains("ST_AsBinary(\"geom\") AS \"geom\""), + "Expected ST_AsBinary wrapping, got: {}", + query + ); + // Non-spatial columns should be plain + assert!( + query.contains("\"id\""), + "Expected plain id column, got: {}", + query + ); + assert!( + query.contains("\"name\""), + "Expected plain name column, got: {}", + query + ); + } } From 4957a03738b6e73cd4a8131c5510eae0160a9071 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Wed, 28 Jan 2026 12:34:16 -0800 Subject: [PATCH 08/27] test(datasets): add JSON geometry hex-decode test --- tests/dataset_e2e_tests.rs | 85 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/tests/dataset_e2e_tests.rs b/tests/dataset_e2e_tests.rs index d5dfcb0..89c6aca 100644 --- a/tests/dataset_e2e_tests.rs +++ b/tests/dataset_e2e_tests.rs @@ -2140,3 +2140,88 @@ async fn test_geometry_csv_hex_decode() { "Beta y should be 4.0" ); } + +#[tokio::test(flavor = "multi_thread")] +async fn test_geometry_json_hex_decode() { + let (engine, _temp) = create_test_engine().await; + + // Same WKB hex values as the CSV test + let point_1_wkb_hex = "0101000000000000000000F03F0000000000000040"; + let point_2_wkb_hex = "010100000000000000000008400000000000001040"; + + let json_content = format!( + r#"{{"name": "Alpha", "geom": "{}"}}{}{{"name": "Beta", "geom": "{}"}}"#, + point_1_wkb_hex, "\n", point_2_wkb_hex + ); + + let mut columns = HashMap::new(); + columns.insert( + "name".to_string(), + ColumnDefinition::Simple("VARCHAR".to_string()), + ); + columns.insert( + "geom".to_string(), + ColumnDefinition::Simple("GEOMETRY".to_string()), + ); + + let dataset = engine + .create_dataset( + "Geo JSON Test", + Some("geo_json_test"), + DatasetSource::Inline { + inline: InlineData { + format: "json".to_string(), + content: json_content, + columns: Some(columns), + }, + }, + ) + .await + .unwrap(); + + assert_eq!(dataset.table_name, "geo_json_test"); + + // Verify spatial functions work exactly as with CSV + let result = engine + .execute_query(&format!( + "SELECT name, \ + st_x(st_geomfromwkb(geom)) AS x, \ + st_y(st_geomfromwkb(geom)) AS y \ + FROM datasets.{}.geo_json_test ORDER BY name", + DEFAULT_SCHEMA + )) + .await; + + assert!( + result.is_ok(), + "Spatial query on JSON geometry should succeed: {:?}", + result.err() + ); + + let response = result.unwrap(); + let batch = &response.results[0]; + assert_eq!(batch.num_rows(), 2); + + let name_col = batch.column_by_name("name").unwrap(); + assert_eq!(get_string_value(name_col, 0), "Alpha"); + assert_eq!(get_string_value(name_col, 1), "Beta"); + + let x_col = batch.column_by_name("x").unwrap(); + let y_col = batch.column_by_name("y").unwrap(); + assert!( + (get_f64_value(x_col, 0) - 1.0).abs() < 1e-10, + "Alpha x should be 1.0" + ); + assert!( + (get_f64_value(y_col, 0) - 2.0).abs() < 1e-10, + "Alpha y should be 2.0" + ); + assert!( + (get_f64_value(x_col, 1) - 3.0).abs() < 1e-10, + "Beta x should be 3.0" + ); + assert!( + (get_f64_value(y_col, 1) - 4.0).abs() < 1e-10, + "Beta y should be 4.0" + ); +} From 44de90771cfda52961d2d377e2c6110fb9218ac8 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Wed, 28 Jan 2026 12:38:13 -0800 Subject: [PATCH 09/27] test(datasets): add Parquet geometry round-trip test --- tests/dataset_e2e_tests.rs | 140 +++++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) diff --git a/tests/dataset_e2e_tests.rs b/tests/dataset_e2e_tests.rs index 89c6aca..b305a35 100644 --- a/tests/dataset_e2e_tests.rs +++ b/tests/dataset_e2e_tests.rs @@ -2225,3 +2225,143 @@ async fn test_geometry_json_hex_decode() { "Beta y should be 4.0" ); } + +#[tokio::test(flavor = "multi_thread")] +async fn test_geometry_parquet_round_trip() { + use datafusion::arrow::array::BinaryArray; + use datafusion::arrow::datatypes::{Field, Schema}; + use datafusion::arrow::record_batch::RecordBatch; + use datafusion::parquet::arrow::ArrowWriter; + use datafusion::parquet::basic::Compression; + use datafusion::parquet::file::properties::WriterProperties; + use std::sync::Arc; + + let (engine, _temp) = create_test_engine().await; + + // Build WKB bytes for POINT(1.0 2.0) and POINT(3.0 4.0) + let wkb_point1: Vec = { + let mut v = vec![0x01u8]; // little-endian + v.extend_from_slice(&1u32.to_le_bytes()); // WKB type Point + v.extend_from_slice(&1.0f64.to_le_bytes()); + v.extend_from_slice(&2.0f64.to_le_bytes()); + v + }; + let wkb_point2: Vec = { + let mut v = vec![0x01u8]; + v.extend_from_slice(&1u32.to_le_bytes()); + v.extend_from_slice(&3.0f64.to_le_bytes()); + v.extend_from_slice(&4.0f64.to_le_bytes()); + v + }; + + let schema = Arc::new(Schema::new(vec![ + Field::new("name", DataType::Utf8, false), + Field::new("geom", DataType::Binary, true), + ])); + + let name_array = datafusion::arrow::array::StringArray::from(vec!["Alpha", "Beta"]); + let geom_array = BinaryArray::from_vec(vec![&wkb_point1, &wkb_point2]); + + let batch = RecordBatch::try_new( + schema.clone(), + vec![Arc::new(name_array), Arc::new(geom_array)], + ) + .unwrap(); + + // Write Parquet with GeoParquet metadata + let geo_metadata = serde_json::json!({ + "version": "1.1.0", + "primary_column": "geom", + "columns": { + "geom": { + "encoding": "WKB", + "geometry_types": ["Point"], + "crs": { + "id": { "authority": "EPSG", "code": 4326 } + } + } + } + }); + + let props = WriterProperties::builder() + .set_compression(Compression::SNAPPY) + .set_key_value_metadata(Some(vec![ + datafusion::parquet::file::metadata::KeyValue::new( + "geo".to_string(), + geo_metadata.to_string(), + ), + ])) + .build(); + + let mut buf = Vec::new(); + { + let mut writer = ArrowWriter::try_new(&mut buf, schema, Some(props)).unwrap(); + writer.write(&batch).unwrap(); + writer.close().unwrap(); + } + + // Upload the Parquet file + let upload = engine + .store_upload(buf, Some("application/octet-stream".to_string())) + .await + .unwrap(); + + let dataset = engine + .create_dataset( + "Geo Parquet Test", + Some("geo_parquet_test"), + DatasetSource::Upload { + upload_id: upload.id, + format: Some("parquet".to_string()), + columns: None, + }, + ) + .await + .unwrap(); + + assert_eq!(dataset.table_name, "geo_parquet_test"); + + // Query geometry via spatial functions — verifies the binary WKB data survived round-trip + let result = engine + .execute_query(&format!( + "SELECT name, \ + st_x(st_geomfromwkb(geom)) AS x, \ + st_y(st_geomfromwkb(geom)) AS y \ + FROM datasets.{}.geo_parquet_test ORDER BY name", + DEFAULT_SCHEMA + )) + .await; + + assert!( + result.is_ok(), + "Spatial query on Parquet geometry should succeed: {:?}", + result.err() + ); + + let response = result.unwrap(); + let batch = &response.results[0]; + assert_eq!(batch.num_rows(), 2); + + let name_col = batch.column_by_name("name").unwrap(); + assert_eq!(get_string_value(name_col, 0), "Alpha"); + assert_eq!(get_string_value(name_col, 1), "Beta"); + + let x_col = batch.column_by_name("x").unwrap(); + let y_col = batch.column_by_name("y").unwrap(); + assert!( + (get_f64_value(x_col, 0) - 1.0).abs() < 1e-10, + "Alpha x should be 1.0" + ); + assert!( + (get_f64_value(y_col, 0) - 2.0).abs() < 1e-10, + "Alpha y should be 2.0" + ); + assert!( + (get_f64_value(x_col, 1) - 3.0).abs() < 1e-10, + "Beta x should be 3.0" + ); + assert!( + (get_f64_value(y_col, 1) - 4.0).abs() < 1e-10, + "Beta y should be 4.0" + ); +} From 7ba14e94b02e819d3c8a1b7451fad896ade475c8 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Wed, 28 Jan 2026 12:39:19 -0800 Subject: [PATCH 10/27] fix(geoparquet): make primary_column selection deterministic --- src/datafetch/native/parquet_writer.rs | 41 ++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/src/datafetch/native/parquet_writer.rs b/src/datafetch/native/parquet_writer.rs index 06b801f..8dffdb4 100644 --- a/src/datafetch/native/parquet_writer.rs +++ b/src/datafetch/native/parquet_writer.rs @@ -83,8 +83,8 @@ impl GeoParquetMetadata { return None; } - // Find the first geometry column to be the primary - let primary_column = columns.keys().next()?.clone(); + // Pick the alphabetically first geometry column as primary for determinism + let primary_column = columns.keys().min()?.clone(); let geo_columns: HashMap = columns .iter() @@ -646,4 +646,41 @@ mod tests { "Regular parquet should not have 'geo' metadata" ); } + + #[test] + fn test_primary_column_deterministic_with_multiple_geometry_columns() { + // When multiple geometry columns exist, primary_column must be + // deterministic (alphabetically first), not random HashMap order. + let mut columns = HashMap::new(); + columns.insert( + "z_geom".to_string(), + GeometryColumnInfo { + srid: 4326, + geometry_type: Some("Point".to_string()), + }, + ); + columns.insert( + "a_geom".to_string(), + GeometryColumnInfo { + srid: 4326, + geometry_type: Some("Polygon".to_string()), + }, + ); + columns.insert( + "m_geom".to_string(), + GeometryColumnInfo { + srid: 0, + geometry_type: None, + }, + ); + + // Run multiple times to catch nondeterminism + for _ in 0..20 { + let meta = GeoParquetMetadata::from_geometry_columns(&columns).unwrap(); + assert_eq!( + meta.primary_column, "a_geom", + "primary_column should always be alphabetically first" + ); + } + } } From c51885f2a4bfb24299a682632a05643c917af4b9 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Wed, 28 Jan 2026 12:41:36 -0800 Subject: [PATCH 11/27] refactor(gis): deduplicate normalize_geometry_type --- src/datafetch/mod.rs | 2 +- src/datafetch/native/parquet_writer.rs | 16 +--------------- src/datafetch/types.rs | 19 +++++++++++++++++++ src/datasets/schema.rs | 24 +++--------------------- 4 files changed, 24 insertions(+), 37 deletions(-) diff --git a/src/datafetch/mod.rs b/src/datafetch/mod.rs index 0cc5285..0a758ba 100644 --- a/src/datafetch/mod.rs +++ b/src/datafetch/mod.rs @@ -3,7 +3,7 @@ mod error; mod fetcher; pub mod native; mod orchestrator; -mod types; +pub(crate) mod types; pub use batch_writer::{BatchWriteResult, BatchWriter}; pub use error::DataFetchError; diff --git a/src/datafetch/native/parquet_writer.rs b/src/datafetch/native/parquet_writer.rs index 8dffdb4..b4028bd 100644 --- a/src/datafetch/native/parquet_writer.rs +++ b/src/datafetch/native/parquet_writer.rs @@ -181,21 +181,7 @@ pub fn parse_geoparquet_metadata(geo_json: &str) -> HashMap String { - match geom_type.to_uppercase().as_str() { - "POINT" => "Point", - "LINESTRING" => "LineString", - "POLYGON" => "Polygon", - "MULTIPOINT" => "MultiPoint", - "MULTILINESTRING" => "MultiLineString", - "MULTIPOLYGON" => "MultiPolygon", - "GEOMETRYCOLLECTION" => "GeometryCollection", - "GEOMETRY" => "Geometry", - _ => geom_type, - } - .to_string() -} +use super::super::types::normalize_geometry_type; impl BatchWriter for StreamingParquetWriter { fn set_geometry_columns(&mut self, columns: HashMap) { diff --git a/src/datafetch/types.rs b/src/datafetch/types.rs index f1bb7c1..71e108a 100644 --- a/src/datafetch/types.rs +++ b/src/datafetch/types.rs @@ -85,6 +85,25 @@ pub fn extract_geometry_columns(schema: &Schema) -> HashMap String { + match geom_type.to_uppercase().as_str() { + "POINT" => "Point", + "LINESTRING" => "LineString", + "POLYGON" => "Polygon", + "MULTIPOINT" => "MultiPoint", + "MULTILINESTRING" => "MultiLineString", + "MULTIPOLYGON" => "MultiPolygon", + "GEOMETRYCOLLECTION" => "GeometryCollection", + "GEOMETRY" => "Geometry", + _ => geom_type, + } + .to_string() +} + /// Deserialize an Arrow Schema from JSON string pub fn deserialize_arrow_schema(json: &str) -> Result> { let schema: Schema = serde_json::from_str(json)?; diff --git a/src/datasets/schema.rs b/src/datasets/schema.rs index 1313d7e..adef88d 100644 --- a/src/datasets/schema.rs +++ b/src/datasets/schema.rs @@ -1,5 +1,6 @@ //! Schema building from explicit column definitions. +use crate::datafetch::types::normalize_geometry_type; use crate::datafetch::GeometryColumnInfo; use crate::http::models::ColumnDefinition; use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit}; @@ -290,12 +291,12 @@ fn parse_geometry_info( match parts.len() { 1 if !parts[0].is_empty() => { // GEOMETRY(Point) - type only - let geom_type = normalize_geometry_type(parts[0])?; + let geom_type = normalize_geometry_type(parts[0]); (Some(geom_type), None) } 2 => { // GEOMETRY(Point, 4326) - type and SRID - let geom_type = normalize_geometry_type(parts[0])?; + let geom_type = normalize_geometry_type(parts[0]); let srid = parts[1].parse::().map_err(|_| ColumnTypeError { column_name: column_name.to_string(), message: format!("Invalid SRID '{}': must be an integer", parts[1]), @@ -332,25 +333,6 @@ fn parse_geometry_info( }) } -/// Normalize geometry type names to standard capitalization. -fn normalize_geometry_type(type_name: &str) -> Result { - let normalized = match type_name.to_uppercase().as_str() { - "POINT" => "Point", - "LINESTRING" => "LineString", - "POLYGON" => "Polygon", - "MULTIPOINT" => "MultiPoint", - "MULTILINESTRING" => "MultiLineString", - "MULTIPOLYGON" => "MultiPolygon", - "GEOMETRYCOLLECTION" => "GeometryCollection", - "GEOMETRY" => "Geometry", - _ => { - // Accept unknown types but warn - could be a database-specific extension - return Ok(type_name.to_string()); - } - }; - Ok(normalized.to_string()) -} - /// Parse DECIMAL(precision, scale) parameters from type string. /// Returns (precision, scale) or error if invalid. fn parse_decimal_params(column_name: &str, type_str: &str) -> Result<(u8, i8), ColumnTypeError> { From 015284517bb91cbdd2c8c0bf0d7bc4e42561bdfa Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Wed, 28 Jan 2026 12:42:35 -0800 Subject: [PATCH 12/27] chore: remove unused geozero dependency --- Cargo.lock | 34 ---------------------------------- Cargo.toml | 1 - 2 files changed, 35 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 12b64a9..fa2f9c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3895,33 +3895,6 @@ dependencies = [ "libm", ] -[[package]] -name = "geojson" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e26f3c45b36fccc9cf2805e61d4da6bc4bbd5a3a9589b01afa3a40eff703bd79" -dependencies = [ - "log", - "serde", - "serde_json", - "thiserror 2.0.18", -] - -[[package]] -name = "geozero" -version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db5eda63aa99ac06160fd53328ed75c34f14e3196d3f56a3649e247ed796e54b" -dependencies = [ - "geo-types", - "geojson", - "log", - "scroll", - "serde_json", - "thiserror 2.0.18", - "wkt", -] - [[package]] name = "getrandom" version = "0.2.17" @@ -6705,7 +6678,6 @@ dependencies = [ "futures", "gcp-bigquery-client", "geodatafusion", - "geozero", "hex", "http 1.4.0", "iceberg", @@ -6967,12 +6939,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "scroll" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1257cd4248b4132760d6524d6dda4e053bc648c9070b960929bf50cfb1e7add" - [[package]] name = "sct" version = "0.7.1" diff --git a/Cargo.toml b/Cargo.toml index 3a99c1c..ef609bc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,6 @@ datafusion = "51.0" datafusion-tracing = "51.0.0" instrumented-object-store = "52.0.0" geodatafusion = "0.2" -geozero = { version = "0.15", features = ["with-wkb"] } hex = "0.4" duckdb = { version = "1.4.4", features = ["bundled"] } sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "sqlite", "mysql", "chrono", "tls-rustls", "bigdecimal"] } From 74b9645b10f70fdc0d46c6d3e828fe5cac4d2a67 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Wed, 28 Jan 2026 13:01:28 -0800 Subject: [PATCH 13/27] feat(duckdb): add spatial extension resilience Load spatial extension in discover_tables_sync and fetch_table_to_channel. Add graceful fallback in build_fetch_query when ST_AsBinary is unavailable (bundled crate limitation). --- src/datafetch/native/duckdb.rs | 58 ++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 9 deletions(-) diff --git a/src/datafetch/native/duckdb.rs b/src/datafetch/native/duckdb.rs index 6d0d9f7..5254641 100644 --- a/src/datafetch/native/duckdb.rs +++ b/src/datafetch/native/duckdb.rs @@ -79,6 +79,9 @@ fn discover_tables_sync( let conn = Connection::open(connection_string) .map_err(|e| DataFetchError::Connection(e.to_string()))?; + // Load spatial extension if available — needed for spatial type detection + let _ = conn.execute_batch("LOAD spatial;"); + let query = r#" SELECT t.table_catalog, @@ -236,6 +239,9 @@ fn fetch_table_to_channel( let conn = Connection::open(connection_string) .map_err(|e| DataFetchError::Connection(e.to_string()))?; + // Load spatial extension if available — needed for ST_AsBinary() wrapping + let _ = conn.execute_batch("LOAD spatial;"); + // Query column types to detect spatial columns for ST_AsBinary wrapping let query = build_fetch_query(&conn, schema, table)?; @@ -275,6 +281,7 @@ fn fetch_table_to_channel( /// /// DuckDB's Spatial extension stores geometry in an internal format. ST_AsBinary() converts /// it to standard WKB, matching the approach used by the PostgreSQL and MySQL adapters. +/// Falls back to `SELECT *` if the spatial extension is not functional on this connection. fn build_fetch_query( conn: &Connection, schema: &str, @@ -318,12 +325,25 @@ fn build_fetch_query( } if has_spatial { - Ok(format!( - "SELECT {} FROM \"{}\".\"{}\"", - column_exprs.join(", "), - escaped_schema, - escaped_table - )) + // Verify that ST_AsBinary is actually available on this connection. + // The spatial extension may fail to load on file-based DuckDB connections + // opened via the bundled crate (LOAD spatial returns Ok but functions are absent). + let spatial_works = conn.prepare("SELECT ST_AsBinary(ST_Point(0,0))").is_ok(); + + if spatial_works { + Ok(format!( + "SELECT {} FROM \"{}\".\"{}\"", + column_exprs.join(", "), + escaped_schema, + escaped_table + )) + } else { + // Fall back to SELECT * — geometry will arrive in DuckDB's internal format + Ok(format!( + "SELECT * FROM \"{}\".\"{}\"", + escaped_schema, escaped_table + )) + } } else { Ok(format!( "SELECT * FROM \"{}\".\"{}\"", @@ -857,11 +877,12 @@ mod tests { #[test] fn test_build_fetch_query_with_spatial() { let conn = Connection::open_in_memory().unwrap(); - // Load spatial extension and create table with geometry column - let has_spatial = conn.execute_batch("INSTALL spatial; LOAD spatial;").is_ok(); + // Load spatial extension and verify it actually works + let has_spatial = conn.execute_batch("INSTALL spatial; LOAD spatial;").is_ok() + && conn.prepare("SELECT ST_AsBinary(ST_Point(0,0))").is_ok(); if !has_spatial { - // Skip test if spatial extension not available + // Skip test if spatial extension not available or not functional return; } @@ -890,4 +911,23 @@ mod tests { query ); } + + #[test] + fn test_build_fetch_query_spatial_fallback() { + // When spatial extension is not functional, build_fetch_query should + // fall back to SELECT * instead of failing with ST_AsBinary + let conn = Connection::open_in_memory().unwrap(); + + // Create a table that looks like it has spatial columns via information_schema + // but WITHOUT loading spatial — simulates the bundled crate limitation + conn.execute_batch( + "CREATE SCHEMA test_schema; \ + CREATE TABLE test_schema.fallback_test (id INTEGER, name VARCHAR);", + ) + .unwrap(); + + // No spatial columns → SELECT * + let query = build_fetch_query(&conn, "test_schema", "fallback_test").unwrap(); + assert_eq!(query, "SELECT * FROM \"test_schema\".\"fallback_test\""); + } } From 6644d3104967f0609924b28b090ceb5f4b039329 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Wed, 28 Jan 2026 13:01:31 -0800 Subject: [PATCH 14/27] test(type_coverage): add GEOMETRY type support --- tests/type_coverage/duckdb_types.rs | 95 +++++++++++++++++++++++++++ tests/type_coverage/harness.rs | 3 +- tests/type_coverage/mysql_types.rs | 21 ++++++ tests/type_coverage/postgres_types.rs | 13 ++++ 4 files changed, 131 insertions(+), 1 deletion(-) diff --git a/tests/type_coverage/duckdb_types.rs b/tests/type_coverage/duckdb_types.rs index 19dae6e..2ccb56d 100644 --- a/tests/type_coverage/duckdb_types.rs +++ b/tests/type_coverage/duckdb_types.rs @@ -43,6 +43,7 @@ fn build_duckdb_test_cases() -> Vec { expected_arrow_type: DataType::Boolean, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); } @@ -64,6 +65,7 @@ fn build_duckdb_test_cases() -> Vec { expected_arrow_type: DataType::Int8, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); // SMALLINT: -32768 to 32767 @@ -82,6 +84,7 @@ fn build_duckdb_test_cases() -> Vec { expected_arrow_type: DataType::Int16, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); // INTEGER: -2147483648 to 2147483647 @@ -100,6 +103,7 @@ fn build_duckdb_test_cases() -> Vec { expected_arrow_type: DataType::Int32, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); // BIGINT: -9223372036854775808 to 9223372036854775807 @@ -118,6 +122,7 @@ fn build_duckdb_test_cases() -> Vec { expected_arrow_type: DataType::Int64, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); // UTINYINT: 0 to 255 @@ -136,6 +141,7 @@ fn build_duckdb_test_cases() -> Vec { expected_arrow_type: DataType::UInt8, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); // USMALLINT: 0 to 65535 @@ -154,6 +160,7 @@ fn build_duckdb_test_cases() -> Vec { expected_arrow_type: DataType::UInt16, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); // UINTEGER: 0 to 4294967295 @@ -172,6 +179,7 @@ fn build_duckdb_test_cases() -> Vec { expected_arrow_type: DataType::UInt32, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); // UBIGINT: 0 to 18446744073709551615 @@ -190,6 +198,7 @@ fn build_duckdb_test_cases() -> Vec { expected_arrow_type: DataType::UInt64, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); } @@ -215,6 +224,7 @@ fn build_duckdb_test_cases() -> Vec { expected_arrow_type: DataType::Float32, values: float_values, shape: TestShape::Scalar, + setup_sql: None, }); // DOUBLE (64-bit) - exclude NaN/Infinity (DuckDB doesn't support these literals) @@ -231,6 +241,7 @@ fn build_duckdb_test_cases() -> Vec { expected_arrow_type: DataType::Float64, values: double_values, shape: TestShape::Scalar, + setup_sql: None, }); } @@ -249,6 +260,7 @@ fn build_duckdb_test_cases() -> Vec { expected_arrow_type: DataType::Decimal128(38, 10), values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); } @@ -270,6 +282,7 @@ fn build_duckdb_test_cases() -> Vec { expected_arrow_type: DataType::Utf8, values: varchar_values, shape: TestShape::Scalar, + setup_sql: None, }); } @@ -285,6 +298,7 @@ fn build_duckdb_test_cases() -> Vec { expected_arrow_type: DataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None), values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); } @@ -298,6 +312,7 @@ fn build_duckdb_test_cases() -> Vec { expected_arrow_type: DataType::Utf8, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); } @@ -314,6 +329,7 @@ fn build_duckdb_test_cases() -> Vec { ), values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); } @@ -355,6 +371,7 @@ fn build_duckdb_test_cases() -> Vec { expected_arrow_type: DataType::Date32, values: date_values, shape: TestShape::Scalar, + setup_sql: None, }); } @@ -399,6 +416,7 @@ fn build_duckdb_test_cases() -> Vec { expected_arrow_type: DataType::Time64(TimeUnit::Microsecond), values: time_values, shape: TestShape::Scalar, + setup_sql: None, }); } @@ -437,6 +455,7 @@ fn build_duckdb_test_cases() -> Vec { expected_arrow_type: DataType::Interval(arrow_schema::IntervalUnit::MonthDayNano), values: interval_values, shape: TestShape::Scalar, + setup_sql: None, }); } @@ -483,6 +502,7 @@ fn build_duckdb_test_cases() -> Vec { expected_arrow_type: DataType::Binary, values: blob_values, shape: TestShape::Scalar, + setup_sql: None, }); } @@ -528,8 +548,55 @@ fn build_duckdb_test_cases() -> Vec { expected_arrow_type: DataType::Decimal128(38, 0), values: hugeint_values, shape: TestShape::Scalar, + setup_sql: None, }); + // ======================================================================== + // GEOMETRY type - requires DuckDB Spatial extension + // DuckDB stores geometry in an internal format; fetch converts via ST_AsBinary() + // to standard WKB Binary, matching PostgreSQL and MySQL adapters. + // ======================================================================== + { + // WKB hex for POINT(1.0 2.0): 01 01000000 000000000000F03F 0000000000000040 + let point_wkb = "0101000000000000000000f03f0000000000000040"; + // WKB hex for POINT(0.0 0.0): 01 01000000 0000000000000000 0000000000000000 + let origin_wkb = "01010000000000000000000000000000000000000000"; + + let geom_values = vec![ + TestValue { + sql_literal: "ST_Point(1.0, 2.0)".to_string(), + expected: ExpectedOutput::String(point_wkb.to_string()), + comparison: ComparisonMode::Exact, + note: Some("Point(1,2)".to_string()), + }, + TestValue { + sql_literal: "ST_Point(0.0, 0.0)".to_string(), + expected: ExpectedOutput::String(origin_wkb.to_string()), + comparison: ComparisonMode::Exact, + note: Some("Origin".to_string()), + }, + TestValue { + sql_literal: "NULL".to_string(), + expected: ExpectedOutput::Null, + comparison: ComparisonMode::Exact, + note: Some("NULL geometry".to_string()), + }, + ]; + cases.push(TypeTestCase { + db_type: "GEOMETRY".to_string(), + semantic_type: SemanticType::Geometric, + expected_arrow_type: DataType::Binary, + values: geom_values, + shape: TestShape::Scalar, + setup_sql: Some( + "INSTALL spatial; LOAD spatial; \ + SET autoinstall_known_extensions=true; \ + SET autoload_known_extensions=true;" + .to_string(), + ), + }); + } + cases } @@ -592,6 +659,34 @@ async fn run_test_case( } }; + // Run optional setup SQL (e.g., loading extensions) + if let Some(setup_sql) = &case.setup_sql { + if let Err(e) = conn.execute_batch(setup_sql) { + return TypeTestResult::skipped( + &case.db_type, + &format!("Setup SQL failed (extension may not be available): {}", e), + ); + } + + // For spatial types, verify the extension works on a separate connection. + // fetch_table opens its own connection, and file-based DuckDB with the + // bundled crate may not support spatial extension loading across connections. + if case.semantic_type == SemanticType::Geometric { + let probe = Connection::open(db_path); + let spatial_works = probe.is_ok() && { + let c = probe.unwrap(); + let _ = c.execute_batch("LOAD spatial;"); + c.prepare("SELECT ST_AsBinary(ST_Point(0,0))").is_ok() + }; + if !spatial_works { + return TypeTestResult::skipped( + &case.db_type, + "Spatial extension not functional across file-based connections (bundled crate limitation)", + ); + } + } + } + // Drop table if exists (cleanup from previous run) let drop_sql = format!("DROP TABLE IF EXISTS {}", table_name); let _ = conn.execute(&drop_sql, []); diff --git a/tests/type_coverage/harness.rs b/tests/type_coverage/harness.rs index 98b997d..7e354f7 100644 --- a/tests/type_coverage/harness.rs +++ b/tests/type_coverage/harness.rs @@ -68,7 +68,6 @@ pub enum SemanticType { #[allow(dead_code)] // Not yet supported by production fetch code Network, /// Geometric types (point, line, polygon) - backend-specific. - #[allow(dead_code)] // Not yet supported by production fetch code Geometric, /// Range types - backend-specific. #[allow(dead_code)] // Not yet supported by production fetch code @@ -204,6 +203,8 @@ pub struct TypeTestCase { /// Shape of the test data. #[allow(dead_code)] // Reserved for future non-scalar test shapes pub shape: TestShape, + /// Optional SQL to execute before table creation (e.g., loading extensions). + pub setup_sql: Option, } // ============================================================================ diff --git a/tests/type_coverage/mysql_types.rs b/tests/type_coverage/mysql_types.rs index 7508de5..35c75f0 100644 --- a/tests/type_coverage/mysql_types.rs +++ b/tests/type_coverage/mysql_types.rs @@ -82,6 +82,7 @@ fn build_mysql_test_cases() -> Vec { expected_arrow_type: DataType::Boolean, // TINYINT(1) is MySQL's boolean convention values, shape: TestShape::Scalar, + setup_sql: None, }); } @@ -103,6 +104,7 @@ fn build_mysql_test_cases() -> Vec { expected_arrow_type: DataType::Int8, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); // SMALLINT: -32768 to 32767 @@ -121,6 +123,7 @@ fn build_mysql_test_cases() -> Vec { expected_arrow_type: DataType::Int16, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); // INT: -2147483648 to 2147483647 @@ -139,6 +142,7 @@ fn build_mysql_test_cases() -> Vec { expected_arrow_type: DataType::Int32, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); // BIGINT: -9223372036854775808 to 9223372036854775807 @@ -157,6 +161,7 @@ fn build_mysql_test_cases() -> Vec { expected_arrow_type: DataType::Int64, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); // TINYINT UNSIGNED: 0 to 255 @@ -175,6 +180,7 @@ fn build_mysql_test_cases() -> Vec { expected_arrow_type: DataType::UInt8, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); // INT UNSIGNED: 0 to 4294967295 @@ -193,6 +199,7 @@ fn build_mysql_test_cases() -> Vec { expected_arrow_type: DataType::UInt32, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); // BIGINT UNSIGNED: 0 to 18446744073709551615 @@ -211,6 +218,7 @@ fn build_mysql_test_cases() -> Vec { expected_arrow_type: DataType::UInt64, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); // SMALLINT UNSIGNED: 0 to 65535 @@ -229,6 +237,7 @@ fn build_mysql_test_cases() -> Vec { expected_arrow_type: DataType::UInt16, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); // MEDIUMINT: -8388608 to 8388607 @@ -247,6 +256,7 @@ fn build_mysql_test_cases() -> Vec { expected_arrow_type: DataType::Int32, // MEDIUMINT maps to Int32 values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); // MEDIUMINT UNSIGNED: 0 to 16777215 @@ -265,6 +275,7 @@ fn build_mysql_test_cases() -> Vec { expected_arrow_type: DataType::UInt32, // MEDIUMINT UNSIGNED maps to UInt32 values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); // YEAR: 1901 to 2155 (or 0) @@ -283,6 +294,7 @@ fn build_mysql_test_cases() -> Vec { expected_arrow_type: DataType::Int32, // YEAR maps to Int32 values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); } @@ -308,6 +320,7 @@ fn build_mysql_test_cases() -> Vec { expected_arrow_type: DataType::Float32, values: float_values, shape: TestShape::Scalar, + setup_sql: None, }); // DOUBLE (64-bit) - exclude NaN/Infinity (MySQL doesn't support literals) @@ -324,6 +337,7 @@ fn build_mysql_test_cases() -> Vec { expected_arrow_type: DataType::Float64, values: double_values, shape: TestShape::Scalar, + setup_sql: None, }); } @@ -350,6 +364,7 @@ fn build_mysql_test_cases() -> Vec { expected_arrow_type: DataType::Utf8, values: varchar_values, shape: TestShape::Scalar, + setup_sql: None, }); // TEXT @@ -368,6 +383,7 @@ fn build_mysql_test_cases() -> Vec { expected_arrow_type: DataType::Utf8, values: text_values, shape: TestShape::Scalar, + setup_sql: None, }); } @@ -383,6 +399,7 @@ fn build_mysql_test_cases() -> Vec { expected_arrow_type: DataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None), values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); // NOTE: TIMESTAMP(6) is not yet fully supported - there's a schema mismatch bug @@ -417,6 +434,7 @@ fn build_mysql_test_cases() -> Vec { expected_arrow_type: DataType::Date32, values: date_values, shape: TestShape::Scalar, + setup_sql: None, }); // NOTE: TIME type is not yet supported by the production fetch code. @@ -459,6 +477,7 @@ fn build_mysql_test_cases() -> Vec { expected_arrow_type: DataType::Binary, values: blob_values, shape: TestShape::Scalar, + setup_sql: None, }); } @@ -501,6 +520,7 @@ fn build_mysql_test_cases() -> Vec { expected_arrow_type: DataType::Utf8, values: enum_values, shape: TestShape::Scalar, + setup_sql: None, }); // ======================================================================== @@ -544,6 +564,7 @@ fn build_mysql_test_cases() -> Vec { expected_arrow_type: DataType::Utf8, values: set_values, shape: TestShape::Scalar, + setup_sql: None, }); cases diff --git a/tests/type_coverage/postgres_types.rs b/tests/type_coverage/postgres_types.rs index 970edd5..bf7f256 100644 --- a/tests/type_coverage/postgres_types.rs +++ b/tests/type_coverage/postgres_types.rs @@ -64,6 +64,7 @@ fn build_postgres_test_cases() -> Vec { expected_arrow_type: DataType::Boolean, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); } @@ -85,6 +86,7 @@ fn build_postgres_test_cases() -> Vec { expected_arrow_type: DataType::Int16, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); // INTEGER: -2147483648 to 2147483647 @@ -103,6 +105,7 @@ fn build_postgres_test_cases() -> Vec { expected_arrow_type: DataType::Int32, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); // BIGINT: -9223372036854775808 to 9223372036854775807 @@ -121,6 +124,7 @@ fn build_postgres_test_cases() -> Vec { expected_arrow_type: DataType::Int64, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); } @@ -141,6 +145,7 @@ fn build_postgres_test_cases() -> Vec { expected_arrow_type: DataType::Float32, values: real_values, shape: TestShape::Scalar, + setup_sql: None, }); // DOUBLE PRECISION (float8) - 64-bit float @@ -152,6 +157,7 @@ fn build_postgres_test_cases() -> Vec { expected_arrow_type: DataType::Float64, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); } @@ -166,6 +172,7 @@ fn build_postgres_test_cases() -> Vec { expected_arrow_type: DataType::Utf8, // NUMERIC often maps to string for full precision values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); // NUMERIC(10,2) - constrained precision and scale @@ -182,6 +189,7 @@ fn build_postgres_test_cases() -> Vec { expected_arrow_type: DataType::Decimal128(10, 2), values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); } @@ -196,6 +204,7 @@ fn build_postgres_test_cases() -> Vec { expected_arrow_type: DataType::Utf8, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); // TEXT (unlimited) @@ -207,6 +216,7 @@ fn build_postgres_test_cases() -> Vec { expected_arrow_type: DataType::Utf8, values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); } @@ -222,6 +232,7 @@ fn build_postgres_test_cases() -> Vec { expected_arrow_type: DataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None), values: filtered.values.into_iter().map(TestValue::from).collect(), shape: TestShape::Scalar, + setup_sql: None, }); // NOTE: TIMESTAMPTZ is not yet fully supported - there's a schema mismatch bug @@ -262,6 +273,7 @@ fn build_postgres_test_cases() -> Vec { expected_arrow_type: DataType::Date32, values: date_values, shape: TestShape::Scalar, + setup_sql: None, }); // NOTE: TIME and INTERVAL types are not yet supported by the production fetch code. @@ -309,6 +321,7 @@ fn build_postgres_test_cases() -> Vec { expected_arrow_type: DataType::Binary, values: bytea_values, shape: TestShape::Scalar, + setup_sql: None, }); } From 81e77515b843aee1a3d046ea51790fbddce38795 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Wed, 28 Jan 2026 13:34:55 -0800 Subject: [PATCH 15/27] refactor(postgres): fix discovery type mapping Remove unused parse_geometry_type_params function. Use both udt_name (for geometry detection) and data_type (for type mapping) in discover_tables, matching the approach already used in fetch_table. --- src/datafetch/native/postgres.rs | 81 ++++++-------------------------- 1 file changed, 14 insertions(+), 67 deletions(-) diff --git a/src/datafetch/native/postgres.rs b/src/datafetch/native/postgres.rs index ac4c80e..b3a759c 100644 --- a/src/datafetch/native/postgres.rs +++ b/src/datafetch/native/postgres.rs @@ -117,6 +117,9 @@ pub async fn discover_tables( // This includes SRID and geometry type information let geometry_info = discover_geometry_columns(&mut conn).await; + // Select both udt_name and data_type: + // - udt_name detects PostGIS types (geometry/geography) that data_type misses + // - data_type provides standard SQL type names for pg_type_to_arrow mapping let rows = sqlx::query( r#" SELECT @@ -126,6 +129,7 @@ pub async fn discover_tables( t.table_type, c.column_name, c.udt_name, + c.data_type, c.is_nullable, c.ordinal_position::int FROM information_schema.tables t @@ -149,12 +153,19 @@ pub async fn discover_tables( let table_type: String = row.get(3); let col_name: String = row.get(4); let udt_name: String = row.get(5); - let is_nullable: String = row.get(6); - let ordinal: i32 = row.get(7); + let data_type: String = row.get(6); + let is_nullable: String = row.get(7); + let ordinal: i32 = row.get(8); + // Use udt_name for geometry detection, data_type for everything else + let type_for_arrow = if is_geometry_type(&udt_name) { + &udt_name + } else { + &data_type + }; let column = ColumnMetadata { name: col_name.clone(), - data_type: pg_type_to_arrow(&udt_name), + data_type: pg_type_to_arrow(type_for_arrow), nullable: is_nullable.to_uppercase() == "YES", ordinal_position: ordinal, }; @@ -441,42 +452,6 @@ pub fn is_geometry_type(pg_type: &str) -> bool { matches!(base_type, "geometry" | "geography") } -/// Parse SRID from a PostGIS type string like "geometry(Point,4326)" or "geography(Polygon,4326)" -/// Returns (geometry_type, srid) tuple if parseable -pub fn parse_geometry_type_params(pg_type: &str) -> Option<(String, i32)> { - let type_lower = pg_type.to_lowercase(); - - // Check if it's a geometry/geography type with parameters - if !type_lower.starts_with("geometry(") && !type_lower.starts_with("geography(") { - return None; - } - - // Extract content between parentheses - let start = type_lower.find('(')?; - let end = type_lower.find(')')?; - let params = &type_lower[start + 1..end]; - - // Split by comma: "Point,4326" or just "Point" or "4326" - let parts: Vec<&str> = params.split(',').map(|s| s.trim()).collect(); - - match parts.len() { - 1 => { - // Could be just type or just SRID - if let Ok(srid) = parts[0].parse::() { - Some(("geometry".to_string(), srid)) - } else { - Some((parts[0].to_string(), 0)) - } - } - 2 => { - let geom_type = parts[0].to_string(); - let srid = parts[1].parse::().unwrap_or(0); - Some((geom_type, srid)) - } - _ => None, - } -} - /// Parse PostgreSQL NUMERIC(precision, scale) parameters. /// For constrained NUMERIC(p,s), returns Decimal128 with those params. /// For unconstrained NUMERIC, returns Utf8 to preserve arbitrary precision. @@ -1027,34 +1002,6 @@ mod tests { assert!(!is_geometry_type("bytea")); } - #[test] - fn test_parse_geometry_type_params() { - // Full parameterized type - let result = parse_geometry_type_params("geometry(Point,4326)"); - assert!(result.is_some()); - let (geom_type, srid) = result.unwrap(); - assert_eq!(geom_type, "point"); - assert_eq!(srid, 4326); - - // Type only, no SRID - let result = parse_geometry_type_params("geometry(Polygon)"); - assert!(result.is_some()); - let (geom_type, srid) = result.unwrap(); - assert_eq!(geom_type, "polygon"); - assert_eq!(srid, 0); - - // Geography type - let result = parse_geometry_type_params("geography(MultiPolygon,3857)"); - assert!(result.is_some()); - let (geom_type, srid) = result.unwrap(); - assert_eq!(geom_type, "multipolygon"); - assert_eq!(srid, 3857); - - // No parameters - returns None - assert!(parse_geometry_type_params("geometry").is_none()); - assert!(parse_geometry_type_params("text").is_none()); - } - // ========================================================================= // Array types (fall back to Utf8) // ========================================================================= From a228a2162c84f059fdf817f5f0c44bcfa258def0 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Wed, 28 Jan 2026 13:35:00 -0800 Subject: [PATCH 16/27] fix(duckdb): add debug logging for spatial load --- src/datafetch/native/duckdb.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/datafetch/native/duckdb.rs b/src/datafetch/native/duckdb.rs index 5254641..40ca310 100644 --- a/src/datafetch/native/duckdb.rs +++ b/src/datafetch/native/duckdb.rs @@ -5,6 +5,7 @@ use duckdb::Connection; use std::collections::HashMap; +use tracing::debug; use urlencoding::encode; use crate::datafetch::batch_writer::BatchWriter; @@ -80,7 +81,9 @@ fn discover_tables_sync( .map_err(|e| DataFetchError::Connection(e.to_string()))?; // Load spatial extension if available — needed for spatial type detection - let _ = conn.execute_batch("LOAD spatial;"); + if let Err(e) = conn.execute_batch("LOAD spatial;") { + debug!("DuckDB spatial extension not available: {}", e); + } let query = r#" SELECT @@ -240,7 +243,9 @@ fn fetch_table_to_channel( .map_err(|e| DataFetchError::Connection(e.to_string()))?; // Load spatial extension if available — needed for ST_AsBinary() wrapping - let _ = conn.execute_batch("LOAD spatial;"); + if let Err(e) = conn.execute_batch("LOAD spatial;") { + debug!("DuckDB spatial extension not available: {}", e); + } // Query column types to detect spatial columns for ST_AsBinary wrapping let query = build_fetch_query(&conn, schema, table)?; From b67fcfec16cdec564a2fc2ee3d501cc9ca628fd5 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Wed, 28 Jan 2026 13:35:04 -0800 Subject: [PATCH 17/27] refactor(engine): extract GeoParquet metadata helper --- src/engine.rs | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/engine.rs b/src/engine.rs index 4b5ae06..a89e626 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -263,6 +263,22 @@ fn hex_decode_geometry_columns( Ok(RecordBatch::try_new(target_schema.clone(), columns)?) } +/// Extract GeoParquet geometry column metadata from Parquet file-level metadata. +/// +/// Looks for the "geo" key in Parquet key-value metadata and parses it +/// according to the GeoParquet spec to extract geometry column info. +fn extract_parquet_geometry_columns( + metadata: &datafusion::parquet::file::metadata::ParquetMetaData, +) -> HashMap { + metadata + .file_metadata() + .key_value_metadata() + .and_then(|kv| kv.iter().find(|item| item.key == "geo")) + .and_then(|item| item.value.as_ref()) + .map(|geo_json| crate::datafetch::parse_geoparquet_metadata(geo_json)) + .unwrap_or_default() +} + impl RuntimeEngine { // ========================================================================= // Constructors @@ -2287,14 +2303,7 @@ impl RuntimeEngine { let schema = builder.schema().clone(); // Extract GeoParquet metadata if present - let geometry_columns = builder - .metadata() - .file_metadata() - .key_value_metadata() - .and_then(|kv| kv.iter().find(|item| item.key == "geo")) - .and_then(|item| item.value.as_ref()) - .map(|geo_json| crate::datafetch::parse_geoparquet_metadata(geo_json)) - .unwrap_or_default(); + let geometry_columns = extract_parquet_geometry_columns(builder.metadata()); let parquet_reader = release_on_parse_error!(builder.with_batch_size(8192).build()); @@ -2325,14 +2334,7 @@ impl RuntimeEngine { let schema = builder.schema().clone(); // Extract GeoParquet metadata if present - let geometry_columns = builder - .metadata() - .file_metadata() - .key_value_metadata() - .and_then(|kv| kv.iter().find(|item| item.key == "geo")) - .and_then(|item| item.value.as_ref()) - .map(|geo_json| crate::datafetch::parse_geoparquet_metadata(geo_json)) - .unwrap_or_default(); + let geometry_columns = extract_parquet_geometry_columns(builder.metadata()); let parquet_reader = release_on_parse_error!(builder.with_batch_size(8192).build()); From 996832c50a43dd310d63003f2fd173e6429d9241 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Wed, 28 Jan 2026 13:35:08 -0800 Subject: [PATCH 18/27] docs(geoparquet): add CRS format limitation note --- src/datafetch/native/parquet_writer.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/datafetch/native/parquet_writer.rs b/src/datafetch/native/parquet_writer.rs index b4028bd..bbda660 100644 --- a/src/datafetch/native/parquet_writer.rs +++ b/src/datafetch/native/parquet_writer.rs @@ -63,7 +63,11 @@ struct GeoColumnMetadata { crs: Option, } -/// CRS metadata in PROJJSON-style format (simplified for EPSG codes) +/// CRS metadata in PROJJSON-style format (simplified for EPSG codes). +/// +/// Note: The GeoParquet 1.1.0 spec supports full PROJJSON CRS definitions. +/// This implementation only supports the `{ "id": { "authority", "code" } }` subset, +/// which covers EPSG-coded SRIDs (the vast majority of use cases). #[derive(Debug, Clone, Serialize, Deserialize)] struct CrsMetadata { id: CrsId, From a94a8cd003fadafaba37ed95186461c6d8703495 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Fri, 30 Jan 2026 14:30:09 -0800 Subject: [PATCH 19/27] style: fix clippy warnings --- tests/gis_integration_tests.rs | 13 +++++-------- tests/type_coverage/duckdb_types.rs | 2 +- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/tests/gis_integration_tests.rs b/tests/gis_integration_tests.rs index 0c064d7..8cb0335 100644 --- a/tests/gis_integration_tests.rs +++ b/tests/gis_integration_tests.rs @@ -67,15 +67,12 @@ async fn start_postgis_container() -> ContainerAsync { /// Wait for the database to be ready for connections async fn wait_for_db(conn_str: &str) -> sqlx::PgPool { for attempt in 1..=30 { - match sqlx::PgPool::connect(conn_str).await { - Ok(pool) => { - // Test connection is actually working - if sqlx::query("SELECT 1").execute(&pool).await.is_ok() { - return pool; - } - pool.close().await; + if let Ok(pool) = sqlx::PgPool::connect(conn_str).await { + // Test connection is actually working + if sqlx::query("SELECT 1").execute(&pool).await.is_ok() { + return pool; } - Err(_) => {} + pool.close().await; } if attempt < 30 { tokio::time::sleep(tokio::time::Duration::from_secs(1)).await; diff --git a/tests/type_coverage/duckdb_types.rs b/tests/type_coverage/duckdb_types.rs index 2ccb56d..c7dd8cc 100644 --- a/tests/type_coverage/duckdb_types.rs +++ b/tests/type_coverage/duckdb_types.rs @@ -664,7 +664,7 @@ async fn run_test_case( if let Err(e) = conn.execute_batch(setup_sql) { return TypeTestResult::skipped( &case.db_type, - &format!("Setup SQL failed (extension may not be available): {}", e), + format!("Setup SQL failed (extension may not be available): {}", e), ); } From 2cec3a9b03fcb381e671cff2d44facde49e1bf12 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Fri, 30 Jan 2026 20:53:17 -0800 Subject: [PATCH 20/27] fix clippy issue --- src/datafetch/types.rs | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/src/datafetch/types.rs b/src/datafetch/types.rs index 71e108a..72417f7 100644 --- a/src/datafetch/types.rs +++ b/src/datafetch/types.rs @@ -5,7 +5,7 @@ use std::collections::HashMap; use std::sync::Arc; /// Metadata for a discovered table -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Default)] pub struct TableMetadata { pub catalog_name: Option, pub schema_name: String, @@ -35,19 +35,6 @@ pub struct GeometryColumnInfo { pub geometry_type: Option, } -impl Default for TableMetadata { - fn default() -> Self { - Self { - catalog_name: None, - schema_name: String::new(), - table_name: String::new(), - table_type: String::new(), - columns: Vec::new(), - geometry_columns: HashMap::new(), - } - } -} - /// Key used to store geometry column metadata in Arrow schema metadata pub const GEOMETRY_COLUMNS_METADATA_KEY: &str = "runtimedb:geometry_columns"; From ff5dc81b170e584a166a7c7fab09bbfba7cec6a9 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Sun, 1 Feb 2026 08:12:34 -0800 Subject: [PATCH 21/27] fix(duckdb): error when spatial columns lack ST_AsBinary support Previously, when a table had spatial columns but ST_AsBinary was unavailable (bundled crate limitation), build_fetch_query silently fell back to SELECT *. This produced DuckDB's internal geometry format instead of WKB, but downstream code (GeoParquet writer) would incorrectly treat it as WKB, causing data corruption. Now returns an error with a clear message when spatial columns are detected but the spatial extension isn't functional. --- src/datafetch/native/duckdb.rs | 90 +++++++++++++++++++++++++++++----- 1 file changed, 78 insertions(+), 12 deletions(-) diff --git a/src/datafetch/native/duckdb.rs b/src/datafetch/native/duckdb.rs index 40ca310..ae2e23d 100644 --- a/src/datafetch/native/duckdb.rs +++ b/src/datafetch/native/duckdb.rs @@ -343,10 +343,12 @@ fn build_fetch_query( escaped_table )) } else { - // Fall back to SELECT * — geometry will arrive in DuckDB's internal format - Ok(format!( - "SELECT * FROM \"{}\".\"{}\"", - escaped_schema, escaped_table + // Cannot safely fetch spatial columns without ST_AsBinary — the geometry data + // would be in DuckDB's internal format (not WKB), but downstream code + // (GeoParquet writer, spatial functions) would incorrectly treat it as WKB. + Err(DataFetchError::Query( + "Table contains spatial columns but the DuckDB spatial extension is not functional. \ + Cannot fetch geometry data without ST_AsBinary() support.".to_string() )) } } else { @@ -918,21 +920,85 @@ mod tests { } #[test] - fn test_build_fetch_query_spatial_fallback() { - // When spatial extension is not functional, build_fetch_query should - // fall back to SELECT * instead of failing with ST_AsBinary + fn test_build_fetch_query_no_spatial_columns() { + // When there are no spatial columns, build_fetch_query returns SELECT * let conn = Connection::open_in_memory().unwrap(); - // Create a table that looks like it has spatial columns via information_schema - // but WITHOUT loading spatial — simulates the bundled crate limitation conn.execute_batch( "CREATE SCHEMA test_schema; \ - CREATE TABLE test_schema.fallback_test (id INTEGER, name VARCHAR);", + CREATE TABLE test_schema.regular_test (id INTEGER, name VARCHAR);", ) .unwrap(); // No spatial columns → SELECT * - let query = build_fetch_query(&conn, "test_schema", "fallback_test").unwrap(); - assert_eq!(query, "SELECT * FROM \"test_schema\".\"fallback_test\""); + let query = build_fetch_query(&conn, "test_schema", "regular_test").unwrap(); + assert_eq!(query, "SELECT * FROM \"test_schema\".\"regular_test\""); + } + + #[test] + fn test_build_fetch_query_errors_when_spatial_unavailable() { + // When a table has spatial columns but ST_AsBinary is not available, + // build_fetch_query should return an error (not silently fall back to SELECT *). + // + // Falling back to SELECT * would produce DuckDB's internal geometry format + // instead of WKB, but downstream code would treat it as WKB, causing corruption. + // + // This test uses a file-based database to simulate the cross-connection scenario + // where spatial extension may not load properly on the fetch connection. + use tempfile::TempDir; + + let dir = TempDir::new().unwrap(); + let db_path = dir.path().join("test.duckdb"); + + // Connection 1: Create a geometry table WITH spatial extension + { + let conn = Connection::open(&db_path).unwrap(); + let has_spatial = conn.execute_batch("INSTALL spatial; LOAD spatial;").is_ok() + && conn.prepare("SELECT ST_AsBinary(ST_Point(0,0))").is_ok(); + + if !has_spatial { + // Skip test if spatial extension not available at all + return; + } + + conn.execute_batch( + "CREATE SCHEMA geo_schema; \ + CREATE TABLE geo_schema.geo_table (id INTEGER, geom GEOMETRY);", + ) + .unwrap(); + } + // Connection 1 dropped + + // Connection 2: Open WITHOUT loading spatial, try to build fetch query + { + let conn = Connection::open(&db_path).unwrap(); + // Intentionally NOT loading spatial extension + + // The table has a GEOMETRY column (detected via information_schema), + // but ST_AsBinary is not available on this connection. + let result = build_fetch_query(&conn, "geo_schema", "geo_table"); + + // On systems where spatial doesn't persist across connections (bundled crate), + // this should error. On systems where it does persist, we get ST_AsBinary. + match result { + Ok(query) => { + // If it succeeded, spatial must be working — verify ST_AsBinary is used + assert!( + query.contains("ST_AsBinary"), + "If query succeeds, must use ST_AsBinary, got: {}", + query + ); + } + Err(e) => { + // Expected error when spatial extension isn't functional + let msg = e.to_string(); + assert!( + msg.contains("spatial") && msg.contains("ST_AsBinary"), + "Error should mention spatial/ST_AsBinary: {}", + msg + ); + } + } + } } } From a7ba30fd882238d84610e219f09b3cfc96f728a7 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Sun, 1 Feb 2026 08:13:11 -0800 Subject: [PATCH 22/27] fix(snowflake): guard against empty column list in fetch query When information_schema query returns zero rows (case mismatch, permissions), column_exprs is empty, producing invalid SQL like SELECT FROM .... Now falls back to SELECT * when column list empty. --- src/datafetch/native/snowflake.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/datafetch/native/snowflake.rs b/src/datafetch/native/snowflake.rs index d2d62a8..b148b3c 100644 --- a/src/datafetch/native/snowflake.rs +++ b/src/datafetch/native/snowflake.rs @@ -317,9 +317,15 @@ pub async fn fetch_table( }; // Build SELECT query with column expressions + // Guard against empty column list (can happen with case mismatch or permissions issues) + let select_clause = if column_exprs.is_empty() { + "*".to_string() + } else { + column_exprs.join(", ") + }; let query = format!( r#"SELECT {} FROM "{}"."{}"."{}"#, - column_exprs.join(", "), + select_clause, database.replace('"', "\"\""), schema.replace('"', "\"\""), table.replace('"', "\"\"") From 601efed20dbc56e7ff1310749b2383a0aa7b8ad7 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Sun, 1 Feb 2026 08:52:34 -0800 Subject: [PATCH 23/27] fix(datafetch): populate geometry_columns in MySQL/Snowflake discovery --- src/datafetch/native/mysql.rs | 41 +++++++++++++++++++++++++++++-- src/datafetch/native/snowflake.rs | 40 +++++++++++++++++++++++++++--- 2 files changed, 76 insertions(+), 5 deletions(-) diff --git a/src/datafetch/native/mysql.rs b/src/datafetch/native/mysql.rs index 98254ee..ef5c404 100644 --- a/src/datafetch/native/mysql.rs +++ b/src/datafetch/native/mysql.rs @@ -14,6 +14,7 @@ use std::sync::Arc; use tracing::warn; use crate::datafetch::batch_writer::BatchWriter; +use crate::datafetch::types::GeometryColumnInfo; use crate::datafetch::{ColumnMetadata, DataFetchError, TableMetadata}; use crate::secrets::SecretManager; use crate::source::Source; @@ -148,19 +149,22 @@ pub async fn discover_tables( let is_nullable: String = row.get(6); let ordinal: u32 = row.get(7); + let is_spatial = is_spatial_type(&data_type); + let column = ColumnMetadata { - name: col_name, + name: col_name.clone(), data_type: mysql_type_to_arrow(&data_type), nullable: is_nullable.to_uppercase() == "YES", ordinal_position: ordinal as i32, }; // Find or create table entry - if let Some(existing) = tables + let table_meta = if let Some(existing) = tables .iter_mut() .find(|t| t.catalog_name == catalog && t.schema_name == schema && t.table_name == table) { existing.columns.push(column); + existing } else { tables.push(TableMetadata { catalog_name: catalog, @@ -170,6 +174,13 @@ pub async fn discover_tables( columns: vec![column], geometry_columns: std::collections::HashMap::new(), }); + tables.last_mut().unwrap() + }; + + // Populate geometry column metadata for GeoParquet support + if is_spatial { + let geo_info = parse_mysql_geometry_info(&data_type); + table_meta.geometry_columns.insert(col_name, geo_info); } } @@ -406,6 +417,32 @@ pub fn is_spatial_type(mysql_type: &str) -> bool { ) } +/// Parse MySQL spatial type into GeometryColumnInfo. +/// +/// MySQL spatial types don't carry SRID information in the column type +/// (SRID is specified per-value or via table constraints), so we default to +/// SRID 0 (unspecified) and extract the geometry type from the column type. +fn parse_mysql_geometry_info(mysql_type: &str) -> GeometryColumnInfo { + let type_lower = mysql_type.to_lowercase(); + let base_type = type_lower.split('(').next().unwrap_or(&type_lower).trim(); + + let geometry_type = match base_type { + "point" => Some("Point".to_string()), + "linestring" => Some("LineString".to_string()), + "polygon" => Some("Polygon".to_string()), + "multipoint" => Some("MultiPoint".to_string()), + "multilinestring" => Some("MultiLineString".to_string()), + "multipolygon" => Some("MultiPolygon".to_string()), + "geometrycollection" | "geomcollection" => Some("GeometryCollection".to_string()), + _ => None, // "geometry" — unknown sub-type + }; + + GeometryColumnInfo { + srid: 0, // MySQL doesn't expose SRID in column type + geometry_type, + } +} + /// Parse a decimal string to i128 with the given precision and scale. /// For example, "123.45" with precision=10, scale=2 becomes 12345. /// diff --git a/src/datafetch/native/snowflake.rs b/src/datafetch/native/snowflake.rs index b148b3c..43dcebb 100644 --- a/src/datafetch/native/snowflake.rs +++ b/src/datafetch/native/snowflake.rs @@ -10,6 +10,7 @@ use snowflake_api::{QueryResult, SnowflakeApi}; use std::sync::Arc; use crate::datafetch::batch_writer::BatchWriter; +use crate::datafetch::types::GeometryColumnInfo; use crate::datafetch::{ColumnMetadata, DataFetchError, TableMetadata}; use crate::secrets::SecretManager; use crate::source::Source; @@ -214,8 +215,11 @@ pub async fn discover_tables( .unwrap_or(true); let ordinal = get_int_value(ordinal_col.as_ref(), row).unwrap_or(0) as i32; + // Check if this is a spatial column + let is_spatial = is_spatial_type(&data_type_str); + let column = ColumnMetadata { - name: col_name, + name: col_name.clone(), data_type: snowflake_type_to_arrow(&data_type_str), nullable: is_nullable, ordinal_position: ordinal, @@ -228,15 +232,26 @@ pub async fn discover_tables( && t.table_name == table_name }) { existing.columns.push(column); + // Add geometry column info if spatial + if is_spatial { + let geo_info = parse_snowflake_geometry_info(&data_type_str); + existing.geometry_columns.insert(col_name, geo_info); + } } else { - tables.push(TableMetadata { + let mut table_meta = TableMetadata { catalog_name: catalog, schema_name, table_name, table_type, columns: vec![column], geometry_columns: std::collections::HashMap::new(), - }); + }; + // Add geometry column info if spatial + if is_spatial { + let geo_info = parse_snowflake_geometry_info(&data_type_str); + table_meta.geometry_columns.insert(col_name, geo_info); + } + tables.push(table_meta); } } } @@ -482,6 +497,25 @@ pub fn is_spatial_type(sf_type: &str) -> bool { matches!(base_type, "GEOGRAPHY" | "GEOMETRY") } +/// Parse Snowflake geometry type info for GeoParquet metadata. +/// Snowflake only has GEOGRAPHY (WGS84, SRID 4326) and GEOMETRY (planar, SRID 0). +fn parse_snowflake_geometry_info(sf_type: &str) -> GeometryColumnInfo { + let type_upper = sf_type.to_uppercase(); + let base_type = type_upper.split('(').next().unwrap_or(&type_upper).trim(); + + // GEOGRAPHY is always WGS84 (SRID 4326), GEOMETRY is planar (SRID 0) + let srid = if base_type == "GEOGRAPHY" { 4326 } else { 0 }; + + // Snowflake doesn't expose the specific geometry subtype (Point, Polygon, etc.) + // in information_schema, so we use the generic type + let geometry_type = Some("Geometry".to_string()); + + GeometryColumnInfo { + srid, + geometry_type, + } +} + /// Extract string value from Arrow array (supports both Utf8 and LargeUtf8) fn get_string_value(array: &dyn datafusion::arrow::array::Array, row: usize) -> Option { use datafusion::arrow::array::{LargeStringArray, StringArray}; From 983dbe5b5b0ea9e49a27995e5986312096366e49 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Sun, 1 Feb 2026 09:04:11 -0800 Subject: [PATCH 24/27] fix(snowflake): error when spatial columns exist but schema query fails --- src/datafetch/native/snowflake.rs | 43 +++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/src/datafetch/native/snowflake.rs b/src/datafetch/native/snowflake.rs index 43dcebb..4223ae4 100644 --- a/src/datafetch/native/snowflake.rs +++ b/src/datafetch/native/snowflake.rs @@ -280,12 +280,12 @@ pub async fn fetch_table( } }; - // Query column info to detect spatial columns + // Query column info to detect spatial columns (case-insensitive to handle identifier casing) let schema_query = format!( r#" SELECT column_name, data_type FROM "{database}".information_schema.columns - WHERE table_schema = '{schema}' AND table_name = '{table}' + WHERE UPPER(table_schema) = UPPER('{schema}') AND UPPER(table_name) = UPPER('{table}') ORDER BY ordinal_position "#, database = database.replace('"', "\"\""), @@ -325,15 +325,48 @@ pub async fn fetch_table( exprs } _ => { - // Fallback to SELECT * if schema query fails - vec!["*".to_string()] + // Schema query returned no data + Vec::new() } } }; // Build SELECT query with column expressions - // Guard against empty column list (can happen with case mismatch or permissions issues) + // Guard against empty column list which could silently skip ST_AsBinary wrapping let select_clause = if column_exprs.is_empty() { + // If schema query returned nothing, check if table has spatial columns with a separate query + // This handles edge cases like permissions issues where info_schema doesn't return columns + let spatial_check_query = format!( + r#" + SELECT COUNT(*) as cnt + FROM "{database}".information_schema.columns + WHERE UPPER(table_schema) = UPPER('{schema}') + AND UPPER(table_name) = UPPER('{table}') + AND data_type IN ('GEOGRAPHY', 'GEOMETRY') + "#, + database = database.replace('"', "\"\""), + schema = schema.replace('\'', "''"), + table = table.replace('\'', "''") + ); + + let has_spatial = match client.exec(&spatial_check_query).await { + Ok(QueryResult::Arrow(batches)) => batches.first().is_some_and(|b| { + if let Ok(converted) = convert_arrow_batch(b) { + get_int_value(converted.column(0).as_ref(), 0).unwrap_or(0) > 0 + } else { + false + } + }), + _ => false, + }; + + if has_spatial { + return Err(DataFetchError::Query( + "Table contains spatial columns but column schema query returned no results. \ + Cannot fetch geometry data without ST_AsBinary() wrapping." + .to_string(), + )); + } "*".to_string() } else { column_exprs.join(", ") From f242bd5c7d431319ec939933fa485c03b475ddee Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Sun, 1 Feb 2026 12:34:53 -0800 Subject: [PATCH 25/27] fix(snowflake): prefer exact case match for schema lookup --- src/datafetch/native/snowflake.rs | 106 ++++++++++++++++++++---------- 1 file changed, 71 insertions(+), 35 deletions(-) diff --git a/src/datafetch/native/snowflake.rs b/src/datafetch/native/snowflake.rs index 4223ae4..c241ab5 100644 --- a/src/datafetch/native/snowflake.rs +++ b/src/datafetch/native/snowflake.rs @@ -280,8 +280,22 @@ pub async fn fetch_table( } }; - // Query column info to detect spatial columns (case-insensitive to handle identifier casing) - let schema_query = format!( + // Query column info to detect spatial columns + // Try exact case first to avoid mixing columns from case-variant tables (e.g., "Foo" vs "FOO") + // Only fall back to case-insensitive if exact match returns no rows + let exact_schema_query = format!( + r#" + SELECT column_name, data_type + FROM "{database}".information_schema.columns + WHERE table_schema = '{schema}' AND table_name = '{table}' + ORDER BY ordinal_position + "#, + database = database.replace('"', "\"\""), + schema = schema.replace('\'', "''"), + table = table.replace('\'', "''") + ); + + let case_insensitive_schema_query = format!( r#" SELECT column_name, data_type FROM "{database}".information_schema.columns @@ -293,56 +307,78 @@ pub async fn fetch_table( table = table.replace('\'', "''") ); - // Build column expressions with ST_AsBinary for spatial columns + // Helper to parse column expressions from query result + fn parse_column_exprs( + batches: &[arrow54_array::RecordBatch], + ) -> Result, DataFetchError> { + let mut exprs = Vec::new(); + for batch in batches { + let converted = convert_arrow_batch(batch)?; + for row in 0..converted.num_rows() { + if let (Some(col_name), Some(data_type)) = ( + get_string_value(converted.column(0).as_ref(), row), + get_string_value(converted.column(1).as_ref(), row), + ) { + let escaped_col = format!("\"{}\"", col_name.replace('"', "\"\"")); + if is_spatial_type(&data_type) { + exprs.push(format!("ST_AsBinary({}) AS {}", escaped_col, escaped_col)); + } else { + exprs.push(escaped_col); + } + } + } + } + Ok(exprs) + } + + // Try exact case first let column_exprs: Vec = { - let schema_result = client - .exec(&schema_query) + let exact_result = client + .exec(&exact_schema_query) .await .map_err(|e| DataFetchError::Query(format!("Schema query failed: {}", e)))?; - match schema_result { - QueryResult::Arrow(batches) => { - let mut exprs = Vec::new(); - for batch in batches { - let converted = convert_arrow_batch(&batch)?; - for row in 0..converted.num_rows() { - if let (Some(col_name), Some(data_type)) = ( - get_string_value(converted.column(0).as_ref(), row), - get_string_value(converted.column(1).as_ref(), row), - ) { - let escaped_col = format!("\"{}\"", col_name.replace('"', "\"\"")); - if is_spatial_type(&data_type) { - exprs.push(format!( - "ST_AsBinary({}) AS {}", - escaped_col, escaped_col - )); - } else { - exprs.push(escaped_col); - } - } - } + match exact_result { + QueryResult::Arrow(batches) if !batches.is_empty() => { + let exprs = parse_column_exprs(&batches)?; + if !exprs.is_empty() { + exprs + } else { + // Exact query returned batches but no columns, try case-insensitive + Vec::new() } - exprs - } - _ => { - // Schema query returned no data - Vec::new() } + _ => Vec::new(), } }; + // If exact case returned nothing, try case-insensitive fallback + let column_exprs = if column_exprs.is_empty() { + let fallback_result = client + .exec(&case_insensitive_schema_query) + .await + .map_err(|e| DataFetchError::Query(format!("Schema query failed: {}", e)))?; + + match fallback_result { + QueryResult::Arrow(batches) => parse_column_exprs(&batches)?, + _ => Vec::new(), + } + } else { + column_exprs + }; + // Build SELECT query with column expressions // Guard against empty column list which could silently skip ST_AsBinary wrapping let select_clause = if column_exprs.is_empty() { - // If schema query returned nothing, check if table has spatial columns with a separate query + // If schema query returned nothing, check if table has spatial columns // This handles edge cases like permissions issues where info_schema doesn't return columns let spatial_check_query = format!( r#" SELECT COUNT(*) as cnt FROM "{database}".information_schema.columns - WHERE UPPER(table_schema) = UPPER('{schema}') - AND UPPER(table_name) = UPPER('{table}') - AND data_type IN ('GEOGRAPHY', 'GEOMETRY') + WHERE (table_schema = '{schema}' AND table_name = '{table}') + OR (UPPER(table_schema) = UPPER('{schema}') AND UPPER(table_name) = UPPER('{table}')) + AND data_type IN ('GEOGRAPHY', 'GEOMETRY') "#, database = database.replace('"', "\"\""), schema = schema.replace('\'', "''"), From 4a42664caa7ac48dfa3a204d09a0f325c86eec01 Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Sun, 1 Feb 2026 12:58:30 -0800 Subject: [PATCH 26/27] fix(snowflake): correct boolean precedence in spatial check query --- src/datafetch/native/snowflake.rs | 79 ++++++++++++++++++++++++++----- 1 file changed, 67 insertions(+), 12 deletions(-) diff --git a/src/datafetch/native/snowflake.rs b/src/datafetch/native/snowflake.rs index c241ab5..1a811ff 100644 --- a/src/datafetch/native/snowflake.rs +++ b/src/datafetch/native/snowflake.rs @@ -372,18 +372,7 @@ pub async fn fetch_table( let select_clause = if column_exprs.is_empty() { // If schema query returned nothing, check if table has spatial columns // This handles edge cases like permissions issues where info_schema doesn't return columns - let spatial_check_query = format!( - r#" - SELECT COUNT(*) as cnt - FROM "{database}".information_schema.columns - WHERE (table_schema = '{schema}' AND table_name = '{table}') - OR (UPPER(table_schema) = UPPER('{schema}') AND UPPER(table_name) = UPPER('{table}')) - AND data_type IN ('GEOGRAPHY', 'GEOMETRY') - "#, - database = database.replace('"', "\"\""), - schema = schema.replace('\'', "''"), - table = table.replace('\'', "''") - ); + let spatial_check_query = build_spatial_check_query(database, schema, table); let has_spatial = match client.exec(&spatial_check_query).await { Ok(QueryResult::Arrow(batches)) => batches.first().is_some_and(|b| { @@ -566,6 +555,23 @@ pub fn is_spatial_type(sf_type: &str) -> bool { matches!(base_type, "GEOGRAPHY" | "GEOMETRY") } +/// Build SQL query to check if a table has spatial columns. +/// Exported for testing to verify correct boolean precedence. +fn build_spatial_check_query(database: &str, schema: &str, table: &str) -> String { + format!( + r#" + SELECT COUNT(*) as cnt + FROM "{database}".information_schema.columns + WHERE ((table_schema = '{schema}' AND table_name = '{table}') + OR (UPPER(table_schema) = UPPER('{schema}') AND UPPER(table_name) = UPPER('{table}'))) + AND data_type IN ('GEOGRAPHY', 'GEOMETRY') + "#, + database = database.replace('"', "\"\""), + schema = schema.replace('\'', "''"), + table = table.replace('\'', "''") + ) +} + /// Parse Snowflake geometry type info for GeoParquet metadata. /// Snowflake only has GEOGRAPHY (WGS84, SRID 4326) and GEOMETRY (planar, SRID 0). fn parse_snowflake_geometry_info(sf_type: &str) -> GeometryColumnInfo { @@ -897,4 +903,53 @@ mod tests { assert_eq!(get_int_value(array_ref.as_ref(), 0), None); } + + #[test] + fn test_spatial_check_query_boolean_precedence() { + // Verify the spatial check query has correct boolean precedence. + // The OR branches must be grouped together before ANDing with data_type filter, + // otherwise the exact-case branch would match ALL columns (not just spatial). + // + // Correct: WHERE ((exact_case) OR (upper_case)) AND data_type IN (...) + // Wrong: WHERE (exact_case) OR (upper_case) AND data_type IN (...) + // (due to AND binding tighter than OR, this becomes: + // WHERE (exact_case) OR ((upper_case) AND data_type IN (...)) + // which means exact_case matches ALL columns!) + + let query = build_spatial_check_query("mydb", "myschema", "mytable"); + let normalized = query.split_whitespace().collect::>().join(" "); + + // The query structure should be: + // WHERE (( ... ) OR ( ... )) AND data_type + // Look for the pattern: closing the OR group "))" followed by "AND data_type" + + // Find where "OR" appears and verify the structure around it + let or_pos = normalized.find(" OR ").expect("Query should contain OR"); + let and_data_type_pos = normalized + .find("AND data_type") + .expect("Query should contain AND data_type"); + + // Between OR and AND data_type, we need to close out the OR group + let between = &normalized[or_pos..and_data_type_pos]; + + // The buggy version has: "OR (UPPER(...)) AND data_type" + // The fixed version has: "OR (UPPER(...))) AND data_type" + // The difference is the extra ")" that closes the outer group containing both OR branches + + // Count parentheses between OR and AND data_type + // In the fixed version, there should be more closing parens than opening + // because we close both the UPPER() clause AND the outer grouping + let open_parens = between.matches('(').count(); + let close_parens = between.matches(')').count(); + + assert!( + close_parens > open_parens, + "After OR clause, there should be more closing parens than opening (to close the outer OR group). \ + Found {} open, {} close in segment: '{}'\nFull query: {}", + open_parens, + close_parens, + between, + normalized + ); + } } From 0281f51a9ba4b7c16d1b9503a704e2be5c9b8d1b Mon Sep 17 00:00:00 2001 From: Zac Farrell Date: Wed, 18 Feb 2026 15:42:05 -0800 Subject: [PATCH 27/27] fix(datafetch): add geometry_columns to new source types --- src/datafetch/native/bigquery.rs | 1 + src/datafetch/native/ducklake.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/src/datafetch/native/bigquery.rs b/src/datafetch/native/bigquery.rs index 0a6678c..1030ba0 100644 --- a/src/datafetch/native/bigquery.rs +++ b/src/datafetch/native/bigquery.rs @@ -194,6 +194,7 @@ pub async fn discover_tables( table_name, table_type, columns: vec![column], + geometry_columns: std::collections::HashMap::new(), }); } } diff --git a/src/datafetch/native/ducklake.rs b/src/datafetch/native/ducklake.rs index bb9ccc5..72abaa3 100644 --- a/src/datafetch/native/ducklake.rs +++ b/src/datafetch/native/ducklake.rs @@ -126,6 +126,7 @@ pub async fn discover_tables( table_name, table_type: "BASE TABLE".to_string(), columns, + geometry_columns: std::collections::HashMap::new(), }); } }