Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
bd55acd
feat(gis): add GIS/PostGIS query support with GeoParquet output
zfarrell Jan 27, 2026
b773cfe
feat(gis): preserve GeoParquet metadata in dataset uploads
zfarrell Jan 28, 2026
8ba255b
test(gis): fix integration tests for multi-arch Docker
zfarrell Jan 28, 2026
8bf86c6
feat(datasets): add GEOMETRY type support for column definitions
zfarrell Jan 28, 2026
afc7784
feat(datasets): add hex-decode for geometry columns in CSV/JSON
zfarrell Jan 28, 2026
1d2e42f
docs(datafetch): fix BatchWriter lifecycle step ordering
zfarrell Jan 28, 2026
1472836
feat(duckdb): add ST_AsBinary() wrapping for spatial columns
zfarrell Jan 28, 2026
4957a03
test(datasets): add JSON geometry hex-decode test
zfarrell Jan 28, 2026
44de907
test(datasets): add Parquet geometry round-trip test
zfarrell Jan 28, 2026
7ba14e9
fix(geoparquet): make primary_column selection deterministic
zfarrell Jan 28, 2026
c51885f
refactor(gis): deduplicate normalize_geometry_type
zfarrell Jan 28, 2026
0152845
chore: remove unused geozero dependency
zfarrell Jan 28, 2026
74b9645
feat(duckdb): add spatial extension resilience
zfarrell Jan 28, 2026
6644d31
test(type_coverage): add GEOMETRY type support
zfarrell Jan 28, 2026
81e7751
refactor(postgres): fix discovery type mapping
zfarrell Jan 28, 2026
a228a21
fix(duckdb): add debug logging for spatial load
zfarrell Jan 28, 2026
b67fcfe
refactor(engine): extract GeoParquet metadata helper
zfarrell Jan 28, 2026
996832c
docs(geoparquet): add CRS format limitation note
zfarrell Jan 28, 2026
a94a8cd
style: fix clippy warnings
zfarrell Jan 30, 2026
2cec3a9
fix clippy issue
zfarrell Jan 31, 2026
ff5dc81
fix(duckdb): error when spatial columns lack ST_AsBinary support
zfarrell Feb 1, 2026
a7ba30f
fix(snowflake): guard against empty column list in fetch query
zfarrell Feb 1, 2026
601efed
fix(datafetch): populate geometry_columns in MySQL/Snowflake discovery
zfarrell Feb 1, 2026
983dbe5
fix(snowflake): error when spatial columns exist but schema query fails
zfarrell Feb 1, 2026
f242bd5
fix(snowflake): prefer exact case match for schema lookup
zfarrell Feb 1, 2026
4a42664
fix(snowflake): correct boolean precedence in spatial check query
zfarrell Feb 1, 2026
0281f51
fix(datafetch): add geometry_columns to new source types
zfarrell Feb 18, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,002 changes: 1,263 additions & 739 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ path = "src/bin/server.rs"
datafusion = "51.0"
datafusion-tracing = "51.0.0"
instrumented-object-store = "52.0.0"
geodatafusion = "0.2"
hex = "0.4"
duckdb = { version = "1.4.4", features = ["bundled"] }
sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "sqlite", "mysql", "chrono", "tls-rustls", "bigdecimal"] }
bigdecimal = "0.4"
Expand Down
16 changes: 13 additions & 3 deletions src/datafetch/batch_writer.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use datafusion::arrow::datatypes::Schema;
use datafusion::arrow::record_batch::RecordBatch;
use std::collections::HashMap;

use super::types::GeometryColumnInfo;
use super::DataFetchError;

/// Summary returned when a BatchWriter is closed.
Expand All @@ -15,16 +17,24 @@ pub struct BatchWriteResult {
/// A trait for writing Arrow RecordBatches to storage.
///
/// Implementors must follow this lifecycle:
/// 1. `init(schema)` - Initialize with the Arrow schema (must be called first)
/// 2. `write_batch(batch)` - Write batches (can be called zero or more times)
/// 3. `close()` - Finalize and return metadata (consumes the writer)
/// 1. Optionally call `set_geometry_columns()` to enable GeoParquet metadata
/// 2. `init(schema)` - Initialize with the Arrow schema
/// 3. `write_batch(batch)` - Write batches (can be called zero or more times)
/// 4. `close()` - Finalize and return metadata (consumes the writer)
///
/// All methods are synchronous. When used in async contexts, callers should
/// ensure writes are batched to minimize blocking time.
pub trait BatchWriter: Send {
/// Initialize the writer with the schema for the data to be written.
fn init(&mut self, schema: &Schema) -> Result<(), DataFetchError>;

/// Set geometry column metadata for GeoParquet support.
/// Must be called before `init()` for the metadata to be included.
/// The map key is the column name.
fn set_geometry_columns(&mut self, _columns: HashMap<String, GeometryColumnInfo>) {
// Default implementation does nothing - non-GeoParquet writers can ignore this
}

/// Write a single RecordBatch. May be called multiple times.
fn write_batch(&mut self, batch: &RecordBatch) -> Result<(), DataFetchError>;

Expand Down
9 changes: 6 additions & 3 deletions src/datafetch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@ mod error;
mod fetcher;
pub mod native;
mod orchestrator;
mod types;
pub(crate) mod types;

pub use batch_writer::{BatchWriteResult, BatchWriter};
pub use error::DataFetchError;
pub use fetcher::DataFetcher;
pub use native::{NativeFetcher, StreamingParquetWriter};
pub use native::{parse_geoparquet_metadata, NativeFetcher, StreamingParquetWriter};
pub use orchestrator::FetchOrchestrator;
pub use types::{deserialize_arrow_schema, ColumnMetadata, TableMetadata};
pub use types::{
deserialize_arrow_schema, extract_geometry_columns, ColumnMetadata, GeometryColumnInfo,
TableMetadata, GEOMETRY_COLUMNS_METADATA_KEY,
};
1 change: 1 addition & 0 deletions src/datafetch/native/bigquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ pub async fn discover_tables(
table_name,
table_type,
columns: vec![column],
geometry_columns: std::collections::HashMap::new(),
});
}
}
Expand Down
Loading
Loading