diff --git a/Cargo.lock b/Cargo.lock index bfded3e..c2040d9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -382,14 +382,16 @@ dependencies = [ "chrono", "datafusion", "futures", + "geo-postgis", + "geo-traits", "geoarrow", "geoarrow-schema", "pg_interval_2", "pgwire", + "postgis", "postgres-types", "rust_decimal", "tokio", - "wkb", ] [[package]] @@ -1700,6 +1702,7 @@ dependencies = [ "datafusion-pg-catalog", "env_logger", "futures", + "geodatafusion", "getset", "log", "pgwire", @@ -1815,6 +1818,16 @@ dependencies = [ "syn 2.0.114", ] +[[package]] +name = "earcutr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79127ed59a85d7687c409e9978547cffb7dc79675355ed22da6b66fd5f6ead01" +dependencies = [ + "itertools 0.11.0", + "num-traits", +] + [[package]] name = "either" version = "1.15.0" @@ -1905,6 +1918,12 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "float_next_after" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8" + [[package]] name = "fnv" version = "1.0.7" @@ -2037,6 +2056,34 @@ dependencies = [ "version_check", ] +[[package]] +name = "geo" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fc1a1678e54befc9b4bcab6cd43b8e7f834ae8ea121118b0fd8c42747675b4a" +dependencies = [ + "earcutr", + "float_next_after", + "geo-types", + "geographiclib-rs", + "i_overlay", + "log", + "num-traits", + "robust", + "rstar", + "spade", +] + +[[package]] +name = "geo-postgis" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fdc8b3bd7e9f4c91b8e69b508cd8a5520b83bad3e4a94b8e08a2b184a152b8" +dependencies = [ + "geo-types", + "postgis", +] + [[package]] name = "geo-traits" version = "0.3.0" @@ -2048,12 +2095,14 @@ dependencies = [ [[package]] name = "geo-types" -version = "0.7.18" +version = "0.7.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24f8647af4005fa11da47cd56252c6ef030be8fa97bdbf355e7dfb6348f0a82c" +checksum = "75a4dcd69d35b2c87a7c83bce9af69fd65c9d68d3833a0ded568983928f3fc99" dependencies = [ "approx", "num-traits", + "rayon", + "rstar", "serde", ] @@ -2083,6 +2132,20 @@ dependencies = [ "wkt", ] +[[package]] +name = "geoarrow-expr-geo" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa84300361ce57fb875bcaa6e32b95b0aff5c6b1af692b936bdd58ff343f4394" +dependencies = [ + "arrow-array", + "arrow-buffer", + "geo", + "geo-traits", + "geoarrow-array", + "geoarrow-schema", +] + [[package]] name = "geoarrow-schema" version = "0.7.0" @@ -2096,6 +2159,44 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "geodatafusion" +version = "0.2.0" +source = "git+https://github.com/sunng87/geodatafusion?branch=feature%2Fdatafusion-52#5adca3dc3b4a16fa884c1a8843c29b35ec2278c1" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-schema", + "datafusion", + "geo", + "geo-traits", + "geoarrow-array", + "geoarrow-expr-geo", + "geoarrow-schema", + "geohash", + "thiserror 1.0.69", + "wkt", +] + +[[package]] +name = "geographiclib-rs" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f611040a2bb37eaa29a78a128d1e92a378a03e0b6e66ae27398d42b1ba9a7841" +dependencies = [ + "libm", +] + +[[package]] +name = "geohash" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fb94b1a65401d6cbf22958a9040aa364812c26674f841bee538b12c135db1e6" +dependencies = [ + "geo-types", + "libm", +] + [[package]] name = "getrandom" version = "0.2.16" @@ -2149,6 +2250,15 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "hash32" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47d60b12902ba28e2730cd37e95b8c9223af2808df9e902d4df49588d1470606" +dependencies = [ + "byteorder", +] + [[package]] name = "hashbrown" version = "0.12.3" @@ -2170,6 +2280,8 @@ version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ + "allocator-api2", + "equivalent", "foldhash 0.1.5", ] @@ -2184,6 +2296,16 @@ dependencies = [ "foldhash 0.2.0", ] +[[package]] +name = "heapless" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bfb9eb618601c89945a70e254898da93b13be0388091d42117462b265bb3fad" +dependencies = [ + "hash32", + "stable_deref_trait", +] + [[package]] name = "heck" version = "0.3.3" @@ -2230,6 +2352,49 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" +[[package]] +name = "i_float" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "010025c2c532c8d82e42d0b8bb5184afa449fa6f06c709ea9adcb16c49ae405b" +dependencies = [ + "libm", +] + +[[package]] +name = "i_key_sort" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9190f86706ca38ac8add223b2aed8b1330002b5cdbbce28fb58b10914d38fc27" + +[[package]] +name = "i_overlay" +version = "4.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fcccbd4e4274e0f80697f5fbc6540fdac533cce02f2081b328e68629cce24f9" +dependencies = [ + "i_float", + "i_key_sort", + "i_shape", + "i_tree", + "rayon", +] + +[[package]] +name = "i_shape" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ea154b742f7d43dae2897fcd5ead86bc7b5eefcedd305a7ebf9f69d44d61082" +dependencies = [ + "i_float", +] + +[[package]] +name = "i_tree" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35e6d558e6d4c7b82bc51d9c771e7a927862a161a7d87bf2b0541450e0e20915" + [[package]] name = "iana-time-zone" version = "0.1.64" @@ -2384,6 +2549,15 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.13.0" @@ -3012,6 +3186,17 @@ dependencies = [ "portable-atomic", ] +[[package]] +name = "postgis" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b52406590b7a682cadd0f0339c43905eb323568e84a2e97e855ef92645e0ec09" +dependencies = [ + "byteorder", + "bytes", + "postgres-types", +] + [[package]] name = "postgres-protocol" version = "0.6.9" @@ -3390,6 +3575,23 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "robust" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e27ee8bb91ca0adcf0ecb116293afa12d393f9c2b9b9cd54d33e8078fe19839" + +[[package]] +name = "rstar" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "421400d13ccfd26dfa5858199c30a5d76f9c54e0dba7575273025b43c5175dbb" +dependencies = [ + "heapless", + "num-traits", + "smallvec", +] + [[package]] name = "rust_decimal" version = "1.40.0" @@ -3663,6 +3865,18 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "spade" +version = "2.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb313e1c8afee5b5647e00ee0fe6855e3d529eb863a0fdae1d60006c4d1e9990" +dependencies = [ + "hashbrown 0.15.5", + "num-traits", + "robust", + "smallvec", +] + [[package]] name = "spki" version = "0.7.3" diff --git a/arrow-pg/Cargo.toml b/arrow-pg/Cargo.toml index d0f658a..baf4995 100644 --- a/arrow-pg/Cargo.toml +++ b/arrow-pg/Cargo.toml @@ -13,10 +13,10 @@ readme = "../README.md" rust-version.workspace = true [features] -default = ["arrow"] +default = ["arrow", "postgis"] arrow = ["dep:arrow"] datafusion = ["dep:datafusion"] -geo = ["postgres-types/with-geo-types-0_7", "dep:geoarrow", "dep:geoarrow-schema", "dep:wkb"] +postgis = ["postgres-types/with-geo-types-0_7", "dep:geoarrow", "dep:geoarrow-schema", "dep:postgis", "dep:geo-postgis", "pgwire/pg-type-postgis", "dep:geo-traits"] [dependencies] arrow = { workspace = true, optional = true } @@ -31,7 +31,9 @@ pg_interval = { version = "0.5.1", package = "pg_interval_2" } pgwire = { workspace = true, default-features = false, features = ["server-api", "pg-ext-types"] } postgres-types.workspace = true rust_decimal.workspace = true -wkb = { version = "0.9", optional = true } +postgis = { version = "0.9", optional = true } +geo-postgis = { version = "0.2", optional = true } +geo-traits = { version = "0.3", optional = true } [dev-dependencies] async-trait = "0.1" diff --git a/arrow-pg/src/datatypes.rs b/arrow-pg/src/datatypes.rs index 66e28f9..24ae382 100644 --- a/arrow-pg/src/datatypes.rs +++ b/arrow-pg/src/datatypes.rs @@ -2,7 +2,7 @@ use std::sync::Arc; #[cfg(not(feature = "datafusion"))] use arrow::{datatypes::*, record_batch::RecordBatch}; -#[cfg(feature = "geo")] +#[cfg(feature = "postgis")] use arrow_schema::extension::ExtensionType; #[cfg(feature = "datafusion")] use datafusion::arrow::{datatypes::*, record_batch::RecordBatch}; @@ -124,8 +124,31 @@ pub fn field_into_pg_type(field: &Arc) -> PgWireResult { match field.extension_type_name() { // As of arrow 56, there are additional extension logical type that is // defined using field metadata, for instance, json or geo. - #[cfg(feature = "geo")] - Some(geoarrow_schema::PointType::NAME) => Ok(Type::POINT), + // + // TODO: there is no fixed Geometry/Geography type id, here we use text + // for placeholder. + #[cfg(feature = "postgis")] + Some(geoarrow_schema::PointType::NAME) => Ok(Type::TEXT), + #[cfg(feature = "postgis")] + Some(geoarrow_schema::LineStringType::NAME) => Ok(Type::TEXT), + #[cfg(feature = "postgis")] + Some(geoarrow_schema::PolygonType::NAME) => Ok(Type::TEXT), + #[cfg(feature = "postgis")] + Some(geoarrow_schema::MultiPointType::NAME) => Ok(Type::TEXT), + #[cfg(feature = "postgis")] + Some(geoarrow_schema::MultiLineStringType::NAME) => Ok(Type::TEXT), + #[cfg(feature = "postgis")] + Some(geoarrow_schema::MultiPolygonType::NAME) => Ok(Type::TEXT), + #[cfg(feature = "postgis")] + Some(geoarrow_schema::GeometryCollectionType::NAME) => Ok(Type::TEXT), + #[cfg(feature = "postgis")] + Some(geoarrow_schema::GeometryType::NAME) => Ok(Type::TEXT), + #[cfg(feature = "postgis")] + Some(geoarrow_schema::RectType::NAME) => Ok(Type::TEXT), + #[cfg(feature = "postgis")] + Some(geoarrow_schema::WktType::NAME) => Ok(Type::TEXT), + #[cfg(feature = "postgis")] + Some(geoarrow_schema::WkbType::NAME) => Ok(Type::TEXT), _ => into_pg_type(arrow_type), } diff --git a/arrow-pg/src/encoder.rs b/arrow-pg/src/encoder.rs index b703f8c..1ea611a 100644 --- a/arrow-pg/src/encoder.rs +++ b/arrow-pg/src/encoder.rs @@ -16,7 +16,7 @@ use rust_decimal::Decimal; use timezone::Tz; use crate::error::ToSqlError; -#[cfg(feature = "geo")] +#[cfg(feature = "postgis")] use crate::geo_encoder::encode_geo; use crate::list_encoder::encode_list; use crate::struct_encoder::encode_struct; @@ -234,7 +234,7 @@ pub fn encode_value( ) -> PgWireResult<()> { let arrow_type = arrow_field.data_type(); - #[cfg(feature = "geo")] + #[cfg(feature = "postgis")] if let Some(geoarrow_type) = geoarrow_schema::GeoArrowType::from_extension_field(arrow_field) .map_err(|e| PgWireError::ApiError(Box::new(e)))? { diff --git a/arrow-pg/src/geo_encoder.rs b/arrow-pg/src/geo_encoder.rs index d323f70..c1e6429 100644 --- a/arrow-pg/src/geo_encoder.rs +++ b/arrow-pg/src/geo_encoder.rs @@ -4,13 +4,91 @@ use std::sync::Arc; use arrow::datatypes::*; #[cfg(feature = "datafusion")] use datafusion::arrow::datatypes::*; -use geoarrow::array::AsGeoArrowArray; +use geo_postgis::ToPostgis; +use geo_traits::to_geo::{ + ToGeoGeometry, ToGeoGeometryCollection, ToGeoLineString, ToGeoMultiLineString, ToGeoMultiPoint, + ToGeoMultiPolygon, ToGeoPoint, ToGeoPolygon, ToGeoRect, +}; +use geoarrow::array::{AsGeoArrowArray, GeoArrowArray, GeoArrowArrayAccessor}; use geoarrow_schema::GeoArrowType; use pgwire::api::results::FieldInfo; -use pgwire::error::PgWireResult; +use pgwire::error::{PgWireError, PgWireResult}; use crate::encoder::Encoder; +macro_rules! encode_geo_fn { + ( + $name:ident, + $array_type:ty, + $postgis_type:ty, + $($conversion:tt)+ + ) => { + fn $name( + encoder: &mut T, + array: &$array_type, + idx: usize, + pg_field: &FieldInfo, + ) -> PgWireResult<()> { + if array.is_null(idx) { + return encoder.encode_field(&None::<$postgis_type>, pg_field); + } + + let value = array + .value(idx) + .map_err(|e| PgWireError::ApiError(Box::new(e)))?; + + let converted_value = value $($conversion)+; + + encoder.encode_field(&converted_value, pg_field) + } + }; +} + +encode_geo_fn!(encode_point, geoarrow::array::PointArray, postgis::ewkb::Point, + .to_point().to_postgis_with_srid(None)); + +encode_geo_fn!(encode_linestring, geoarrow::array::LineStringArray, postgis::ewkb::LineString, + .to_line_string().to_postgis_with_srid(None)); + +encode_geo_fn!(encode_polygon, geoarrow::array::PolygonArray, postgis::ewkb::Polygon, + .to_polygon().to_postgis_with_srid(None)); + +encode_geo_fn!(encode_multipoint, geoarrow::array::MultiPointArray, postgis::ewkb::MultiPoint, + .to_multi_point().to_postgis_with_srid(None)); + +encode_geo_fn!(encode_multilinestring, geoarrow::array::MultiLineStringArray, postgis::ewkb::MultiLineString, + .to_multi_line_string().to_postgis_with_srid(None)); + +encode_geo_fn!(encode_multipolygon, geoarrow::array::MultiPolygonArray, postgis::ewkb::MultiPolygon, + .to_multi_polygon().to_postgis_with_srid(None)); + +encode_geo_fn!(encode_geometrycollection, geoarrow::array::GeometryCollectionArray, postgis::ewkb::GeometryCollection, + .to_geometry_collection().to_postgis_with_srid(None)); + +encode_geo_fn!(encode_rect, geoarrow::array::RectArray, postgis::ewkb::Polygon, + .to_rect().to_polygon().to_postgis_with_srid(None)); + +encode_geo_fn!(encode_wkt, geoarrow::array::WktArray, String, + .to_string()); + +encode_geo_fn!(encode_large_wkt, geoarrow::array::LargeWktArray, String, + .to_string()); + +encode_geo_fn!(encode_wkt_view, geoarrow::array::WktViewArray, String, + .to_string()); + +encode_geo_fn!(encode_wkb, geoarrow::array::WkbArray, Vec, + .buf().to_vec()); + +encode_geo_fn!(encode_large_wkb, geoarrow::array::LargeWkbArray, Vec, + .buf().to_vec()); + +encode_geo_fn!(encode_wkb_view, geoarrow::array::WkbViewArray, Vec, + .buf().to_vec()); + +encode_geo_fn!(encode_geometry, geoarrow::array::GeometryArray, postgis::ewkb::Geometry, + .to_geometry().to_postgis_with_srid(None)); + pub fn encode_geo( encoder: &mut T, geoarrow_type: GeoArrowType, @@ -20,20 +98,65 @@ pub fn encode_geo( pg_field: &FieldInfo, ) -> PgWireResult<()> { match geoarrow_type { - geoarrow_schema::GeoArrowType::Point(_) => { - let array: &geoarrow::array::PointArray = arr.as_point(); - encode_point(encoder, array, idx, pg_field)?; + GeoArrowType::Point(_) => { + let array = arr.as_point(); + encode_point(encoder, array, idx, pg_field) + } + GeoArrowType::LineString(_) => { + let array = arr.as_line_string(); + encode_linestring(encoder, array, idx, pg_field) + } + GeoArrowType::Polygon(_) => { + let array = arr.as_polygon(); + encode_polygon(encoder, array, idx, pg_field) + } + GeoArrowType::MultiPoint(_) => { + let array = arr.as_multi_point(); + encode_multipoint(encoder, array, idx, pg_field) + } + GeoArrowType::MultiLineString(_) => { + let array = arr.as_multi_line_string(); + encode_multilinestring(encoder, array, idx, pg_field) + } + GeoArrowType::MultiPolygon(_) => { + let array = arr.as_multi_polygon(); + encode_multipolygon(encoder, array, idx, pg_field) + } + GeoArrowType::GeometryCollection(_) => { + let array = arr.as_geometry_collection(); + encode_geometrycollection(encoder, array, idx, pg_field) + } + GeoArrowType::Rect(_) => { + let array = arr.as_rect(); + encode_rect(encoder, array, idx, pg_field) + } + GeoArrowType::Wkt(_) => { + let array = arr.as_wkt(); + encode_wkt(encoder, array, idx, pg_field) + } + GeoArrowType::WktView(_) => { + let array = arr.as_wkt_view(); + encode_wkt_view(encoder, array, idx, pg_field) + } + GeoArrowType::LargeWkt(_) => { + let array = arr.as_wkt(); + encode_large_wkt(encoder, array, idx, pg_field) + } + GeoArrowType::Wkb(_) => { + let array = arr.as_wkb(); + encode_wkb(encoder, array, idx, pg_field) + } + GeoArrowType::WkbView(_) => { + let array = arr.as_wkb_view(); + encode_wkb_view(encoder, array, idx, pg_field) + } + GeoArrowType::LargeWkb(_) => { + let array = arr.as_wkb(); + encode_large_wkb(encoder, array, idx, pg_field) + } + GeoArrowType::Geometry(_) => { + let array = arr.as_geometry(); + encode_geometry(encoder, array, idx, pg_field) } - _ => todo!("handle other geometry types"), } - Ok(()) -} - -fn encode_point( - _encoder: &mut T, - _array: &geoarrow::array::PointArray, - _idx: usize, - _pg_field: &FieldInfo, -) -> PgWireResult<()> { - todo!() } diff --git a/arrow-pg/src/lib.rs b/arrow-pg/src/lib.rs index 20a47cc..bf93307 100644 --- a/arrow-pg/src/lib.rs +++ b/arrow-pg/src/lib.rs @@ -6,7 +6,7 @@ pub mod datatypes; pub mod encoder; mod error; -#[cfg(feature = "geo")] +#[cfg(feature = "postgis")] pub mod geo_encoder; pub mod list_encoder; pub mod row_encoder; diff --git a/datafusion-postgres/Cargo.toml b/datafusion-postgres/Cargo.toml index 1467239..637d90b 100644 --- a/datafusion-postgres/Cargo.toml +++ b/datafusion-postgres/Cargo.toml @@ -19,6 +19,8 @@ async-trait = "0.1" chrono.workspace = true datafusion.workspace = true datafusion-pg-catalog = { path = "../datafusion-pg-catalog", version = "0.14.0" } +#geodatafusion = { version = "0.1.1", optional = true } +geodatafusion = { git = "https://github.com/sunng87/geodatafusion", branch = "feature/datafusion-52", optional = true } futures.workspace = true getset = "0.1" log = "0.4" @@ -32,3 +34,7 @@ rustls-pki-types = "1.14" [dev-dependencies] env_logger = "0.11" + +[features] +default = [] +postgis = ["geodatafusion", "arrow-pg/postgis"] diff --git a/datafusion-postgres/src/lib.rs b/datafusion-postgres/src/lib.rs index a83dfcb..d4254c3 100644 --- a/datafusion-postgres/src/lib.rs +++ b/datafusion-postgres/src/lib.rs @@ -86,6 +86,9 @@ pub async fn serve( session_context: Arc, opts: &ServerOptions, ) -> Result<(), std::io::Error> { + #[cfg(feature = "postgis")] + geodatafusion::register(&session_context); + // Create the handler factory with authentication let factory = Arc::new(HandlerFactory::new(session_context)); @@ -99,6 +102,9 @@ pub async fn serve_with_hooks( opts: &ServerOptions, hooks: Vec>, ) -> Result<(), std::io::Error> { + #[cfg(feature = "postgis")] + geodatafusion::register(&session_context); + // Create the handler factory with authentication let factory = Arc::new(HandlerFactory::new_with_hooks(session_context, hooks)); diff --git a/tests-integration/test.sh b/tests-integration/test.sh index a8caf34..044ca7e 100755 --- a/tests-integration/test.sh +++ b/tests-integration/test.sh @@ -5,7 +5,7 @@ set -e # Function to cleanup processes cleanup() { echo "🧹 Cleaning up processes..." - for pid in $CSV_PID $TRANSACTION_PID $PARQUET_PID $RBAC_PID $SSL_PID; do + for pid in $CSV_PID $TRANSACTION_PID $PARQUET_PID $RBAC_PID $SSL_PID $POSTGIS_PID; do if [ ! -z "$pid" ]; then kill -9 $pid 2>/dev/null || true fi @@ -38,7 +38,7 @@ echo "==================================================" # Build the project echo "Building datafusion-postgres..." cd .. -cargo build +cargo build --features datafusion-postgres/postgis cd tests-integration # Set up test environment @@ -144,6 +144,32 @@ else fi kill -9 $SSL_PID 2>/dev/null || true +sleep 3 + +# Test 6: PostGIS Spatial Functions +echo "" +echo "πŸ—ΊοΈ Test 6: PostGIS Spatial Functions" +echo "--------------------------------------" +wait_for_port 5437 +../target/debug/datafusion-postgres-cli -p 5437 --csv delhi:delhiclimate.csv & +POSTGIS_PID=$! +sleep 5 + +# Check if server is actually running +if ! ps -p $POSTGIS_PID > /dev/null 2>&1; then + echo "❌ PostGIS server failed to start" + exit 1 +fi + +if python3 test_postgis.py; then + echo "βœ… PostGIS test passed" +else + echo "❌ PostGIS test failed" + kill -9 $POSTGIS_PID 2>/dev/null || true + exit 1 +fi + +kill -9 $POSTGIS_PID 2>/dev/null || true echo "" echo "πŸŽ‰ All enhanced integration tests passed!" @@ -157,4 +183,5 @@ echo " βœ… Array types and complex data type support" echo " βœ… Improved pg_catalog system tables" echo " βœ… PostgreSQL function compatibility" echo " βœ… SSL/TLS encryption support" +echo " βœ… PostGIS spatial functions support" echo "" diff --git a/tests-integration/test_postgis.py b/tests-integration/test_postgis.py new file mode 100755 index 0000000..6334087 --- /dev/null +++ b/tests-integration/test_postgis.py @@ -0,0 +1,305 @@ +#!/usr/bin/env python3 +""" +Integration tests for PostGIS spatial functionality. +Tests typical PostGIS queries to ensure they succeed without protocol/client errors. +""" + +import psycopg + + +def main(): + print("πŸ—ΊοΈ Testing PostGIS Spatial Queries") + print("=" * 50) + + conn = psycopg.connect("host=127.0.0.1 port=5437 user=postgres dbname=public") + conn.autocommit = True + + with conn.cursor() as cur: + print("\nπŸ“‹ Test 2: Geometry Creation Functions") + test_geometry_creation(cur) + + print("\nπŸ“‹ Test 3: Geometry Output Functions") + test_geometry_output(cur) + + # print("\nπŸ“‹ Test 4: Spatial Reference System") + # test_spatial_reference_system(cur) + + print("\nπŸ“‹ Test 5: Spatial Measurement Functions") + test_spatial_measurements(cur) + + print("\nπŸ“‹ Test 6: Spatial Predicate Functions") + test_spatial_predicates(cur) + + print("\nπŸ“‹ Test 7: Spatial Analysis Functions") + test_spatial_analysis(cur) + + print("\nπŸ“‹ Test 8: Bounding Box Functions") + test_bounding_box(cur) + + conn.close() + print("\nβœ… All PostGIS tests passed!") + + +def test_geometry_creation(cur): + """Test geometry creation functions.""" + # Test ST_GeomFromText + cur.execute("SELECT ST_GeomFromText('POINT(1 1)')") + result = cur.fetchone()[0] + assert result is not None + print(" βœ“ ST_GeomFromText('POINT(1 1)')") + + # Test ST_Point + cur.execute("SELECT ST_Point(1, 2)") + result = cur.fetchone()[0] + assert result is not None + print(" βœ“ ST_Point(1, 2)") + + # Test ST_MakePoint + cur.execute("SELECT ST_MakePoint(1, 2, 3)") + result = cur.fetchone()[0] + assert result is not None + print(" βœ“ ST_MakePoint(1, 2, 3)") + + # Test ST_LineFromText + # cur.execute("SELECT ST_LineFromText('LINESTRING(0 0, 1 1, 2 2)')") + # result = cur.fetchone()[0] + # assert result is not None + # print(" βœ“ ST_LineFromText('LINESTRING(0 0, 1 1, 2 2)')") + + # # Test ST_PolygonFromText + # cur.execute("SELECT ST_PolygonFromText('POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))')") + # result = cur.fetchone()[0] + # assert result is not None + # print(" βœ“ ST_PolygonFromText('POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))')") + + # Test ST_GeomFromWKB + # try: + # cur.execute("SELECT ST_AsBinary(ST_Point(1, 2))") + # wkb = cur.fetchone()[0] + # cur.execute("SELECT ST_GeomFromWKB(%s)", [wkb]) + # result = cur.fetchone()[0] + # assert result is not None + # print(" βœ“ ST_GeomFromWKB() and ST_AsBinary()") + # except Exception as e: + # print(f" ℹ️ ST_GeomFromWKB() not fully supported: {type(e).__name__}") + + +def test_geometry_output(cur): + """Test geometry output/formatting functions.""" + # Test ST_AsText + cur.execute("SELECT ST_AsText(ST_Point(1, 2))") + result = cur.fetchone()[0] + assert result is not None + print(f" βœ“ ST_AsText(): {result}") + + # Test ST_AsBinary + cur.execute("SELECT ST_AsBinary(ST_Point(1, 2))") + result = cur.fetchone()[0] + assert result is not None + print(" βœ“ ST_AsBinary()") + + # # Test ST_AsEWKT + # try: + # cur.execute("SELECT ST_AsEWKT(ST_Point(1, 2))") + # result = cur.fetchone()[0] + # assert result is not None + # print(f" βœ“ ST_AsEWKT(): {result}") + # except Exception as e: + # print(f" ℹ️ ST_AsEWKT() not available: {type(e).__name__}") + + # Test ST_AsGeoJSON + # try: + # cur.execute("SELECT ST_AsGeoJSON(ST_Point(1, 2))") + # result = cur.fetchone()[0] + # assert result is not None + # print(f" βœ“ ST_AsGeoJSON(): {result[:50]}...") + # except Exception as e: + # print(f" ℹ️ ST_AsGeoJSON() not available: {type(e).__name__}") + + +# def test_spatial_reference_system(cur): +# """Test spatial reference system functions.""" +# # Test ST_SRID +# cur.execute("SELECT ST_SRID(ST_GeomFromText('POINT(1 1)', 4326))") +# srid = cur.fetchone()[0] +# assert srid == 4326 or srid is not None +# print(f" βœ“ ST_SRID(): {srid}") + +# # Test ST_SetSRID +# cur.execute("SELECT ST_SRID(ST_SetSRID(ST_Point(1, 2), 4326))") +# srid = cur.fetchone()[0] +# assert srid == 4326 or srid is not None +# print(f" βœ“ ST_SetSRID(): {srid}") + +# # Test spatial_ref_sys table +# try: +# cur.execute("SELECT count(*) FROM spatial_ref_sys LIMIT 1") +# count = cur.fetchone()[0] +# print(f" βœ“ spatial_ref_sys table: {count} records") +# except Exception as e: +# print(f" ℹ️ spatial_ref_sys table not available: {type(e).__name__}") + + +def test_spatial_measurements(cur): + """Test spatial measurement functions.""" + # Test ST_Distance + cur.execute("SELECT ST_Distance(ST_Point(0, 0), ST_Point(3, 4))") + distance = cur.fetchone()[0] + assert abs(distance - 5.0) < 0.01 + print(f" βœ“ ST_Distance(): {distance}") + + # Test ST_Length (line) + # cur.execute("SELECT ST_Length(ST_LineFromText('LINESTRING(0 0, 3 4)'))") + # length = cur.fetchone()[0] + # assert length is not None + # print(f" βœ“ ST_Length(): {length}") + + # Test ST_Area (polygon) + # cur.execute( + # "SELECT ST_Area(ST_PolygonFromText('POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))'))" + # ) + # area = cur.fetchone()[0] + # assert area is not None + # print(f" βœ“ ST_Area(): {area}") + + # Test ST_Perimeter (polygon) + # try: + # cur.execute( + # "SELECT ST_Perimeter(ST_PolygonFromText('POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))'))" + # ) + # perimeter = cur.fetchone()[0] + # assert perimeter is not None + # print(f" βœ“ ST_Perimeter(): {perimeter}") + # except Exception as e: + # print(f" ℹ️ ST_Perimeter() not available: {type(e).__name__}") + + # test ST_X, ST_Y + cur.execute("SELECT ST_X(ST_Point(5, 10)), ST_Y(ST_Point(5, 10))") + x, y = cur.fetchone() + assert x == 5 and y == 10 + print(f" βœ“ ST_X(), ST_Y(): ({x}, {y})") + + +def test_spatial_predicates(cur): + """Test spatial predicate functions.""" + # Test ST_Equals + cur.execute("SELECT ST_Equals(ST_Point(1, 1), ST_Point(1, 1))") + equals = cur.fetchone()[0] + assert equals is True + print(f" βœ“ ST_Equals(): {equals}") + + # Test ST_Intersects + cur.execute("SELECT ST_Intersects(ST_Point(0, 0), ST_Point(1, 1))") + intersects = cur.fetchone()[0] + assert intersects is not None + print(f" βœ“ ST_Intersects(): {intersects}") + + # Test ST_Contains + # cur.execute( + # "SELECT ST_Contains(ST_PolygonFromText('POLYGON((0 0, 2 0, 2 2, 0 2, 0 0))'), ST_Point(1, 1))" + # ) + # contains = cur.fetchone()[0] + # assert contains is not None + # print(f" βœ“ ST_Contains(): {contains}") + + # Test ST_Within + # cur.execute( + # "SELECT ST_Within(ST_Point(1, 1), ST_PolygonFromText('POLYGON((0 0, 2 0, 2 2, 0 2, 0 0))'))" + # ) + # within = cur.fetchone()[0] + # assert within is not None + # print(f" βœ“ ST_Within(): {within}") + + # Test ST_Touches + try: + cur.execute("SELECT ST_Touches(ST_Point(0, 0), ST_Point(0, 0))") + touches = cur.fetchone()[0] + print(f" βœ“ ST_Touches(): {touches}") + except Exception as e: + print(f" ℹ️ ST_Touches() not available: {type(e).__name__}") + + # Test ST_Disjoint + try: + cur.execute("SELECT ST_Disjoint(ST_Point(0, 0), ST_Point(10, 10))") + disjoint = cur.fetchone()[0] + print(f" βœ“ ST_Disjoint(): {disjoint}") + except Exception as e: + print(f" ℹ️ ST_Disjoint() not available: {type(e).__name__}") + + +def test_spatial_analysis(cur): + """Test spatial analysis functions.""" + # Test ST_Buffer + # try: + # cur.execute("SELECT ST_Buffer(ST_Point(0, 0), 1.0)") + # buffer = cur.fetchone()[0] + # assert buffer is not None + # print(" βœ“ ST_Buffer()") + # except Exception as e: + # print(f" ℹ️ ST_Buffer() not available: {type(e).__name__}") + + # Test ST_Union + # try: + # cur.execute("SELECT ST_Union(ST_Point(0, 0), ST_Point(1, 1))") + # union = cur.fetchone()[0] + # assert union is not None + # print(" βœ“ ST_Union()") + # except Exception as e: + # print(f" ℹ️ ST_Union() not available: {type(e).__name__}") + + # Test ST_Intersection + # try: + # cur.execute("SELECT ST_Intersection(ST_Point(0, 0), ST_Point(0, 0))") + # intersection = cur.fetchone()[0] + # assert intersection is not None + # print(" βœ“ ST_Intersection()") + # except Exception as e: + # print(f" ℹ️ ST_Intersection() not available: {type(e).__name__}") + + # Test ST_Difference + # try: + # cur.execute("SELECT ST_Difference(ST_Point(0, 0), ST_Point(1, 1))") + # difference = cur.fetchone()[0] + # assert difference is not None + # print(" βœ“ ST_Difference()") + # except Exception as e: + # print(f" ℹ️ ST_Difference() not available: {type(e).__name__}") + + # Test ST_ConvexHull + # try: + # cur.execute("SELECT ST_ConvexHull(ST_Point(0, 0))") + # hull = cur.fetchone()[0] + # assert hull is not None + # print(" βœ“ ST_ConvexHull()") + # except Exception as e: + # print(f" ℹ️ ST_ConvexHull() not available: {type(e).__name__}") + + +def test_bounding_box(cur): + """Test bounding box functions.""" + # Test ST_Envelope + # cur.execute( + # "SELECT ST_Envelope(ST_PolygonFromText('POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))'))" + # ) + # envelope = cur.fetchone()[0] + # assert envelope is not None + # print(" βœ“ ST_Envelope()") + + # Test ST_MinX, ST_MaxX, ST_MinY, ST_MaxY + try: + cur.execute( + "SELECT ST_XMin(ST_Point(5, 10)), ST_XMax(ST_Point(5, 10)), ST_YMin(ST_Point(5, 10)), ST_YMax(ST_Point(5, 10))" + ) + minx, maxx, miny, maxy = cur.fetchone() + assert minx == 5 and maxx == 5 and miny == 10 and maxy == 10 + print( + f" βœ“ ST_MinX(), ST_MaxX(), ST_MinY(), ST_MaxY(): ({minx}, {maxx}, {miny}, {maxy})" + ) + except Exception as e: + print( + f" ℹ️ Bounding box coordinate functions not available: {type(e).__name__}" + ) + + +if __name__ == "__main__": + main()