diff --git a/crates/integrations/datafusion/src/schema.rs b/crates/integrations/datafusion/src/schema.rs index 022964ba6d..134da010de 100644 --- a/crates/integrations/datafusion/src/schema.rs +++ b/crates/integrations/datafusion/src/schema.rs @@ -21,7 +21,7 @@ use std::sync::Arc; use async_trait::async_trait; use dashmap::DashMap; use datafusion::catalog::SchemaProvider; -use datafusion::datasource::TableProvider; +use datafusion::datasource::{MemTable, TableProvider}; use datafusion::error::{DataFusionError, Result as DFResult}; use datafusion::execution::TaskContext; use datafusion::prelude::SessionContext; @@ -31,7 +31,7 @@ use iceberg::arrow::arrow_schema_to_schema_auto_assign_ids; use iceberg::inspect::MetadataTableType; use iceberg::{Catalog, Error, ErrorKind, NamespaceIdent, Result, TableCreation}; -use crate::table::IcebergTableProvider; +use crate::table::{IcebergStaticTableProvider, IcebergTableProvider}; use crate::to_datafusion_error; /// Represents a [`SchemaProvider`] for the Iceberg [`Catalog`], managing @@ -150,6 +150,13 @@ impl SchemaProvider for IcebergSchemaProvider { name: String, table: Arc, ) -> DFResult>> { + // Reject unsupported table types + if !is_iceberg_or_mem_table(&table) { + return Err(DataFusionError::Execution(format!( + "Cannot register a non-Iceberg table: {table:?}" + ))); + } + // Check if table already exists if self.table_exist(name.as_str()) { return Err(DataFusionError::Execution(format!( @@ -213,6 +220,19 @@ impl SchemaProvider for IcebergSchemaProvider { } } +/// Checks if a TableProvider is an Iceberg table (either catalog-backed or static) or a MemTable. +fn is_iceberg_or_mem_table(table: &Arc) -> bool { + table + .as_any() + .downcast_ref::() + .is_some() + || table + .as_any() + .downcast_ref::() + .is_some() + || table.as_any().downcast_ref::().is_some() +} + /// Verifies that a table provider contains no data by scanning with LIMIT 1. /// Returns an error if the table has any rows. async fn ensure_table_is_empty(table: &Arc) -> Result<()> { diff --git a/crates/integrations/datafusion/src/table/table_provider_factory.rs b/crates/integrations/datafusion/src/table/table_provider_factory.rs index 8cae597b7b..43b241f1ef 100644 --- a/crates/integrations/datafusion/src/table/table_provider_factory.rs +++ b/crates/integrations/datafusion/src/table/table_provider_factory.rs @@ -26,14 +26,54 @@ use datafusion::logical_expr::CreateExternalTable; use datafusion::sql::TableReference; use iceberg::io::FileIO; use iceberg::table::StaticTable; -use iceberg::{Error, ErrorKind, Result, TableIdent}; +use iceberg::{Catalog, Error, ErrorKind, NamespaceIdent, Result, TableIdent}; -use super::IcebergStaticTableProvider; +use super::{IcebergStaticTableProvider, IcebergTableProvider}; use crate::to_datafusion_error; -/// A factory that implements DataFusion's `TableProviderFactory` to create `IcebergTableProvider` instances. +/// A factory that implements DataFusion's `TableProviderFactory` to create Iceberg table providers. /// -/// # Example +/// This factory supports two modes of operation: +/// +/// 1. **Catalog-backed mode**: When constructed with a catalog via `new_with_catalog()`, creates +/// `IcebergTableProvider` instances backed by the catalog with full read/write support. +/// +/// 2. **Static mode**: When constructed without a catalog via `new()`, creates static +/// `IcebergStaticTableProvider` instances from metadata file paths (backward compatible behavior). +/// +/// /// # Example (Catalog-backed Mode) +/// +/// ```ignore +/// use std::sync::Arc; +/// +/// use datafusion::execution::session_state::SessionStateBuilder; +/// use datafusion::prelude::*; +/// use iceberg_datafusion::IcebergTableProviderFactory; +/// +/// #[tokio::main] +/// async fn main() { +/// // Assume `catalog` is a pre-built Iceberg catalog (e.g., REST, Glue, HMS) +/// let catalog: Arc = /* ... */; +/// +/// let mut state = SessionStateBuilder::new().with_default_features().build(); +/// +/// // Register the factory with an injected catalog +/// state.table_factories_mut().insert( +/// "ICEBERG".to_string(), +/// Arc::new(IcebergTableProviderFactory::new_with_catalog(catalog)), +/// ); +/// +/// let ctx = SessionContext::new_with_state(state); +/// +/// // Create an external table backed by the catalog +/// // The table name is used to look up the table in the catalog +/// ctx.sql("CREATE EXTERNAL TABLE my_ns.my_table STORED AS ICEBERG LOCATION ''") +/// .await +/// .expect("Failed to create table"); +/// } +/// ``` +/// +/// # Example (Static Mode) /// /// The following example demonstrates how to create an Iceberg external table using SQL in /// a DataFusion session with `IcebergTableProviderFactory`: @@ -90,18 +130,47 @@ use crate::to_datafusion_error; /// # Note /// This factory is designed to work with the DataFusion query engine, /// specifically for handling Iceberg tables in external table commands. -/// Currently, this implementation supports only reading Iceberg tables, with -/// the creation of new tables not yet available. +/// In static mode, only reading Iceberg tables is supported. +/// In catalog-backed mode, both reading and writing are supported. /// /// # Errors /// An error will be returned if any unsupported feature, such as partition columns, /// order expressions, constraints, or column defaults, is detected in the table creation command. -#[derive(Debug, Default)] -pub struct IcebergTableProviderFactory {} +#[derive(Debug, Clone)] +pub struct IcebergTableProviderFactory { + /// Optional catalog for creating catalog-backed table providers. + /// When None, falls back to static table provider creation. + catalog: Option>, +} impl IcebergTableProviderFactory { + /// Creates a new factory without a catalog. + /// + /// Tables created by this factory will be static (read-only) providers + /// loaded from metadata file paths specified in LOCATION. pub fn new() -> Self { - Self {} + Self { catalog: None } + } + + /// Creates a new factory with an injected catalog. + /// + /// Tables created by this factory will be catalog-backed providers + /// with full read/write support. The table name from the `CREATE EXTERNAL TABLE` + /// command will be used to look up the table in the catalog. + /// + /// # Arguments + /// + /// * `catalog` - An Iceberg catalog instance + pub fn new_with_catalog(catalog: Arc) -> Self { + Self { + catalog: Some(catalog), + } + } +} + +impl Default for IcebergTableProviderFactory { + fn default() -> Self { + Self::new() } } @@ -115,21 +184,48 @@ impl TableProviderFactory for IcebergTableProviderFactory { check_cmd(cmd).map_err(to_datafusion_error)?; let table_name = &cmd.name; - let metadata_file_path = &cmd.location; - let options = &cmd.options; - let table_name_with_ns = complement_namespace_if_necessary(table_name); - let table = create_static_table(table_name_with_ns, metadata_file_path, options) - .await - .map_err(to_datafusion_error)? - .into_table(); - - let provider = IcebergStaticTableProvider::try_new_from_table(table) - .await - .map_err(to_datafusion_error)?; - - Ok(Arc::new(provider)) + match &self.catalog { + Some(catalog) => { + // Catalog-backed: create IcebergTableProvider + let (namespace, name) = parse_table_reference(&table_name_with_ns); + let table_ident = TableIdent::new(namespace.clone(), name.clone()); + + // Check if table exists before attempting to load + if !catalog + .table_exists(&table_ident) + .await + .map_err(to_datafusion_error)? + { + return Err(to_datafusion_error(Error::new( + ErrorKind::TableNotFound, + format!("Table '{table_ident}' not found in catalog"), + ))); + } + + let provider = IcebergTableProvider::try_new(catalog.clone(), namespace, name) + .await + .map_err(to_datafusion_error)?; + Ok(Arc::new(provider)) + } + None => { + // Static: create IcebergStaticTableProvider + let metadata_file_path = &cmd.location; + let options = &cmd.options; + + let table = create_static_table(table_name_with_ns, metadata_file_path, options) + .await + .map_err(to_datafusion_error)? + .into_table(); + + let provider = IcebergStaticTableProvider::try_new_from_table(table) + .await + .map_err(to_datafusion_error)?; + + Ok(Arc::new(provider)) + } + } } } @@ -160,6 +256,31 @@ fn check_cmd(cmd: &CreateExternalTable) -> Result<()> { Ok(()) } +/// Parses a TableReference into namespace and table name components. +/// +/// This function extracts the namespace and table name from a DataFusion `TableReference`, +/// following these rules: +/// - `Bare` names (e.g., `my_table`) use the "default" namespace +/// - `Partial` names (e.g., `my_ns.my_table`) use the schema as the namespace +/// - `Full` names (e.g., `catalog.my_ns.my_table`) use the schema as the namespace, +/// ignoring the catalog component (which is the DataFusion catalog, not the Iceberg catalog) +fn parse_table_reference(table_ref: &TableReference) -> (NamespaceIdent, String) { + match table_ref { + TableReference::Bare { table } => ( + NamespaceIdent::new("default".to_string()), + table.to_string(), + ), + TableReference::Partial { schema, table } => { + (NamespaceIdent::new(schema.to_string()), table.to_string()) + } + TableReference::Full { schema, table, .. } => { + // For fully qualified names, use schema as namespace + // (catalog is typically the DataFusion catalog, not Iceberg catalog) + (NamespaceIdent::new(schema.to_string()), table.to_string()) + } + } +} + /// Complements the namespace of a table name if necessary. /// /// # Note @@ -202,6 +323,10 @@ mod tests { use datafusion::parquet::arrow::PARQUET_FIELD_ID_META_KEY; use datafusion::prelude::SessionContext; use datafusion::sql::TableReference; + use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder}; + use iceberg::spec::{NestedField, PrimitiveType, Schema as IcebergSchema, Type}; + use iceberg::{CatalogBuilder, TableCreation}; + use tempfile::TempDir; use super::*; @@ -302,4 +427,203 @@ mod tests { assert_eq!(actual_schema.as_ref(), &expected_schema); } + + #[test] + fn test_parse_table_reference() { + // Bare name should use "default" namespace + let table_ref = TableReference::bare("my_table"); + let (namespace, table_name) = parse_table_reference(&table_ref); + + assert_eq!(namespace.as_ref(), &vec!["default".to_string()]); + assert_eq!(table_name, "my_table"); + + // Partial name should use schema as namespace + let table_ref = TableReference::partial("my_namespace", "my_table"); + let (namespace, table_name) = parse_table_reference(&table_ref); + + assert_eq!(namespace.as_ref(), &vec!["my_namespace".to_string()]); + assert_eq!(table_name, "my_table"); + + // Full name should use schema as namespace, ignoring catalog + let table_ref = TableReference::full("my_catalog", "my_namespace", "my_table"); + let (namespace, table_name) = parse_table_reference(&table_ref); + + assert_eq!(namespace.as_ref(), &vec!["my_namespace".to_string()]); + assert_eq!(table_name, "my_table"); + } + + #[tokio::test] + async fn test_factory_with_catalog_creates_catalog_backed_provider() { + // Set up a memory catalog with a test table + let temp_dir = TempDir::new().unwrap(); + let warehouse_path = temp_dir.path().to_str().unwrap().to_string(); + + let catalog = MemoryCatalogBuilder::default() + .load( + "memory", + HashMap::from([(MEMORY_CATALOG_WAREHOUSE.to_string(), warehouse_path.clone())]), + ) + .await + .unwrap(); + + let namespace = iceberg::NamespaceIdent::new("test_ns".to_string()); + catalog + .create_namespace(&namespace, HashMap::new()) + .await + .unwrap(); + + let schema = IcebergSchema::builder() + .with_schema_id(0) + .with_fields(vec![ + NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(), + NestedField::required(2, "name", Type::Primitive(PrimitiveType::String)).into(), + ]) + .build() + .unwrap(); + + let table_creation = TableCreation::builder() + .name("test_table".to_string()) + .location(format!("{warehouse_path}/test_table")) + .schema(schema) + .properties(HashMap::new()) + .build(); + + catalog + .create_table(&namespace, table_creation) + .await + .unwrap(); + + // Create factory with catalog + let factory = IcebergTableProviderFactory::new_with_catalog(Arc::new(catalog)); + + // Create external table command + let cmd = CreateExternalTable { + name: TableReference::partial("test_ns", "test_table"), + location: String::new(), // Location is ignored when catalog is present + schema: Arc::new(DFSchema::empty()), + file_type: "iceberg".to_string(), + options: Default::default(), + table_partition_cols: Default::default(), + order_exprs: Default::default(), + constraints: Constraints::default(), + column_defaults: Default::default(), + if_not_exists: Default::default(), + or_replace: false, + temporary: false, + definition: Default::default(), + unbounded: Default::default(), + }; + + let state = SessionStateBuilder::new().build(); + let table_provider = factory + .create(&state, &cmd) + .await + .expect("create table failed"); + + // Verify the schema matches the catalog table + let schema = table_provider.schema(); + assert_eq!(schema.fields().len(), 2); + assert_eq!(schema.field(0).name(), "id"); + assert_eq!(schema.field(1).name(), "name"); + + // Verify it's a catalog-backed provider by checking it supports writes + // (IcebergStaticTableProvider does not support writes) + let ctx = SessionContext::new(); + ctx.register_table("test_table", table_provider).unwrap(); + + // This should succeed for catalog-backed provider + let result = ctx.sql("INSERT INTO test_table VALUES (1, 'test')").await; + assert!( + result.is_ok(), + "Catalog-backed provider should support INSERT" + ); + } + + #[tokio::test] + async fn test_factory_without_catalog_creates_static_provider() { + // Create factory without catalog (default) + let factory = IcebergTableProviderFactory::new(); + + let state = SessionStateBuilder::new().build(); + let cmd = create_external_table_cmd(); + + let table_provider = factory + .create(&state, &cmd) + .await + .expect("create table failed"); + + // Verify the schema matches the static table + let expected_schema = table_metadata_v2_schema(); + let actual_schema = table_provider.schema(); + assert_eq!(actual_schema.as_ref(), &expected_schema); + + // Verify it's a static provider by checking it rejects writes + let ctx = SessionContext::new(); + ctx.register_table("static_table", table_provider).unwrap(); + + // This should fail for static provider + let result = ctx.sql("INSERT INTO static_table VALUES (1, 2, 3)").await; + // The error should occur during planning or execution + assert!( + result.is_err() || { + let df = result.unwrap(); + df.collect().await.is_err() + }, + "Static provider should reject INSERT" + ); + } + + #[tokio::test] + async fn test_factory_with_catalog_returns_error_for_nonexistent_table() { + // Set up a memory catalog without any tables + let temp_dir = TempDir::new().unwrap(); + let warehouse_path = temp_dir.path().to_str().unwrap().to_string(); + + let catalog = MemoryCatalogBuilder::default() + .load( + "memory", + HashMap::from([(MEMORY_CATALOG_WAREHOUSE.to_string(), warehouse_path)]), + ) + .await + .unwrap(); + + let namespace = iceberg::NamespaceIdent::new("test_ns".to_string()); + catalog + .create_namespace(&namespace, HashMap::new()) + .await + .unwrap(); + + // Create factory with catalog + let factory = IcebergTableProviderFactory::new_with_catalog(Arc::new(catalog)); + + // Create external table command for a non-existent table + let cmd = CreateExternalTable { + name: TableReference::partial("test_ns", "nonexistent_table"), + location: String::new(), + schema: Arc::new(DFSchema::empty()), + file_type: "iceberg".to_string(), + options: Default::default(), + table_partition_cols: Default::default(), + order_exprs: Default::default(), + constraints: Constraints::default(), + column_defaults: Default::default(), + if_not_exists: Default::default(), + or_replace: false, + temporary: false, + definition: Default::default(), + unbounded: Default::default(), + }; + + let state = SessionStateBuilder::new().build(); + let result = factory.create(&state, &cmd).await; + + // Should return an error because the table doesn't exist + assert!(result.is_err()); + let err = result.unwrap_err(); + let err_msg = err.to_string(); + assert!( + err_msg.contains("not found"), + "Error message should indicate table not found: {err_msg}", + ); + } } diff --git a/crates/sqllogictest/src/engine/datafusion.rs b/crates/sqllogictest/src/engine/datafusion.rs index 487d8dc977..28bee54565 100644 --- a/crates/sqllogictest/src/engine/datafusion.rs +++ b/crates/sqllogictest/src/engine/datafusion.rs @@ -20,12 +20,13 @@ use std::path::{Path, PathBuf}; use std::sync::Arc; use datafusion::catalog::CatalogProvider; +use datafusion::execution::session_state::SessionStateBuilder; use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion_sqllogictest::DataFusion; use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder}; use iceberg::spec::{NestedField, PrimitiveType, Schema, Transform, Type, UnboundPartitionSpec}; use iceberg::{Catalog, CatalogBuilder, NamespaceIdent, TableCreation}; -use iceberg_datafusion::IcebergCatalogProvider; +use iceberg_datafusion::{IcebergCatalogProvider, IcebergTableProviderFactory}; use indicatif::ProgressBar; use crate::engine::{DatafusionCatalogConfig, EngineRunner, run_slt_with_runner}; @@ -62,12 +63,29 @@ impl DataFusionEngine { let session_config = SessionConfig::new() .with_target_partitions(4) .with_information_schema(true); - let ctx = SessionContext::new_with_config(session_config); - ctx.register_catalog( - "default", - Self::create_catalog(catalog_config.as_ref()).await?, + + // Create the catalog first so we can share it with the factory + let (catalog_provider, iceberg_catalog) = + Self::create_catalog(catalog_config.as_ref()).await?; + + // Build session state with the IcebergTableProviderFactory registered + let mut state = SessionStateBuilder::new() + .with_config(session_config) + .with_default_features() + .build(); + + // Register the IcebergTableProviderFactory with the injected catalog + // This enables CREATE EXTERNAL TABLE ... STORED AS ICEBERG to load tables from the catalog + state.table_factories_mut().insert( + "ICEBERG".to_string(), + Arc::new(IcebergTableProviderFactory::new_with_catalog( + iceberg_catalog, + )), ); + let ctx = SessionContext::new_with_state(state); + ctx.register_catalog("default", catalog_provider); + Ok(Self { test_data_path: PathBuf::from("testdata"), session_context: ctx, @@ -76,7 +94,7 @@ impl DataFusionEngine { async fn create_catalog( _catalog_config: Option<&DatafusionCatalogConfig>, - ) -> anyhow::Result> { + ) -> anyhow::Result<(Arc, Arc)> { // TODO: Use catalog_config to load different catalog types via iceberg-catalog-loader // See: https://github.com/apache/iceberg-rust/issues/1780 let catalog = MemoryCatalogBuilder::default() @@ -96,14 +114,21 @@ impl DataFusionEngine { // Create partitioned test table (unpartitioned tables are now created via SQL) Self::create_partitioned_table(&catalog, &namespace).await?; - Ok(Arc::new( - IcebergCatalogProvider::try_new(Arc::new(catalog)).await?, - )) + let catalog_arc = Arc::new(catalog); + let catalog_provider = + Arc::new(IcebergCatalogProvider::try_new(catalog_arc.clone()).await?); + + Ok((catalog_provider, catalog_arc)) } /// Create a partitioned test table with id, category, and value columns /// Partitioned by category using identity transform - /// TODO: this can be removed when we support CREATE EXTERNAL TABLE + /// + /// This table is created in the Iceberg catalog and can be accessed via: + /// 1. `default.default.test_partitioned_table` - auto-registered by IcebergCatalogProvider + /// 2. `CREATE EXTERNAL TABLE test_partitioned_table STORED AS ICEBERG LOCATION ''` - loaded via factory + /// + /// The insert_into.slt tests use approach #2 to demonstrate the CREATE EXTERNAL TABLE feature. async fn create_partitioned_table( catalog: &impl Catalog, namespace: &NamespaceIdent, diff --git a/crates/sqllogictest/testdata/schedules/df_test.toml b/crates/sqllogictest/testdata/schedules/df_test.toml index 1d7f42c8d4..e45ac6dbd2 100644 --- a/crates/sqllogictest/testdata/schedules/df_test.toml +++ b/crates/sqllogictest/testdata/schedules/df_test.toml @@ -18,13 +18,14 @@ [engines] df = { type = "datafusion" } +# create_table.slt must run first to create tables used by other tests [[steps]] engine = "df" -slt = "df_test/show_tables.slt" +slt = "df_test/create_table.slt" [[steps]] engine = "df" -slt = "df_test/create_table.slt" +slt = "df_test/show_tables.slt" [[steps]] engine = "df" diff --git a/crates/sqllogictest/testdata/slts/df_test/create_table.slt b/crates/sqllogictest/testdata/slts/df_test/create_table.slt index 2eab1b6bab..a566eb7526 100644 --- a/crates/sqllogictest/testdata/slts/df_test/create_table.slt +++ b/crates/sqllogictest/testdata/slts/df_test/create_table.slt @@ -15,6 +15,10 @@ # specific language governing permissions and limitations # under the License. +# ============================================================================= +# Test CREATE TABLE (catalog-backed table creation) +# ============================================================================= + # Test CREATE TABLE with explicit schema statement ok CREATE TABLE default.default.empty_table (id INT NOT NULL, name STRING) @@ -88,3 +92,79 @@ SELECT * FROM default.default.nullable_table ---- 1 Value 2 NULL + +# ============================================================================= +# Test CREATE EXTERNAL TABLE (catalog-backed table loading) +# ============================================================================= + +# First, create a table in the catalog that we'll load via CREATE EXTERNAL TABLE +statement ok +CREATE TABLE default.default.source_table (id INT NOT NULL, data STRING) + +# Insert some data into the source table +query I +INSERT INTO default.default.source_table VALUES (1, 'first'), (2, 'second') +---- +2 + +# Verify source table data +query IT rowsort +SELECT * FROM default.default.source_table +---- +1 first +2 second + +# Create an external table that loads the existing catalog table +# The table name in CREATE EXTERNAL TABLE is used to look up the table in the catalog +# LOCATION is ignored when using catalog-backed factory +statement ok +CREATE EXTERNAL TABLE source_table STORED AS ICEBERG LOCATION '' + +# Query the external table - should see the same data as the catalog table +query IT rowsort +SELECT * FROM source_table +---- +1 first +2 second + +# Insert data via the external table (catalog-backed providers support writes) +query I +INSERT INTO source_table VALUES (3, 'third') +---- +1 + +# Verify the insert worked - query via external table +query IT rowsort +SELECT * FROM source_table +---- +1 first +2 second +3 third + +# Verify the insert is visible via the original catalog table path +query IT rowsort +SELECT * FROM default.default.source_table +---- +1 first +2 second +3 third + +# Test CREATE EXTERNAL TABLE with namespace-qualified name +statement ok +CREATE TABLE default.default.ns_table (value INT NOT NULL) + +query I +INSERT INTO default.default.ns_table VALUES (100), (200) +---- +2 + +# Create external table with bare name (uses "default" namespace) +statement ok +CREATE EXTERNAL TABLE ns_table STORED AS ICEBERG LOCATION '' + +# Query via external table +query I rowsort +SELECT * FROM ns_table +---- +100 +200 diff --git a/crates/sqllogictest/testdata/slts/df_test/insert_into.slt b/crates/sqllogictest/testdata/slts/df_test/insert_into.slt index 1e07844326..1a1f74433e 100644 --- a/crates/sqllogictest/testdata/slts/df_test/insert_into.slt +++ b/crates/sqllogictest/testdata/slts/df_test/insert_into.slt @@ -15,6 +15,10 @@ # specific language governing permissions and limitations # under the License. +# ============================================================================= +# Test INSERT INTO unpartitioned table (created via CREATE TABLE) +# ============================================================================= + # Create unpartitioned test table statement ok CREATE TABLE default.default.test_unpartitioned_table (id INT NOT NULL, name STRING) @@ -65,32 +69,45 @@ SELECT * FROM default.default.test_unpartitioned_table 3 Charlie 4 NULL -# Test partitioned table - verify initially empty +# ============================================================================= +# Test INSERT INTO partitioned table (loaded via CREATE EXTERNAL TABLE) +# ============================================================================= + +# The partitioned table 'test_partitioned_table' was pre-created in the catalog +# by the test engine. We load it using CREATE EXTERNAL TABLE to demonstrate +# the catalog-backed external table feature. + +# Load the partitioned table from the catalog using CREATE EXTERNAL TABLE +# The table name is used to look up the table in the injected catalog +statement ok +CREATE EXTERNAL TABLE test_partitioned_table STORED AS ICEBERG LOCATION '' + +# Verify the table is initially empty query ITT rowsort -SELECT * FROM default.default.test_partitioned_table +SELECT * FROM test_partitioned_table ---- # Insert single row into partitioned table query I -INSERT INTO default.default.test_partitioned_table VALUES (1, 'electronics', 'laptop') +INSERT INTO test_partitioned_table VALUES (1, 'electronics', 'laptop') ---- 1 # Verify the inserted row in partitioned table query ITT rowsort -SELECT * FROM default.default.test_partitioned_table +SELECT * FROM test_partitioned_table ---- 1 electronics laptop # Insert multiple rows with different partition values query I -INSERT INTO default.default.test_partitioned_table VALUES (2, 'electronics', 'phone'), (3, 'books', 'novel'), (4, 'books', 'textbook'), (5, 'clothing', 'shirt') +INSERT INTO test_partitioned_table VALUES (2, 'electronics', 'phone'), (3, 'books', 'novel'), (4, 'books', 'textbook'), (5, 'clothing', 'shirt') ---- 4 # Verify all rows in partitioned table query ITT rowsort -SELECT * FROM default.default.test_partitioned_table +SELECT * FROM test_partitioned_table ---- 1 electronics laptop 2 electronics phone @@ -100,13 +117,13 @@ SELECT * FROM default.default.test_partitioned_table # Insert with NULL value in optional column query I -INSERT INTO default.default.test_partitioned_table VALUES (6, 'electronics', NULL) +INSERT INTO test_partitioned_table VALUES (6, 'electronics', NULL) ---- 1 # Verify NULL handling in partitioned table query ITT rowsort -SELECT * FROM default.default.test_partitioned_table +SELECT * FROM test_partitioned_table ---- 1 electronics laptop 2 electronics phone @@ -117,7 +134,7 @@ SELECT * FROM default.default.test_partitioned_table # Verify partition filtering works query ITT rowsort -SELECT * FROM default.default.test_partitioned_table WHERE category = 'books' +SELECT * FROM test_partitioned_table WHERE category = 'books' ---- 3 books novel 4 books textbook diff --git a/crates/sqllogictest/testdata/slts/df_test/show_tables.slt b/crates/sqllogictest/testdata/slts/df_test/show_tables.slt index 770072f9dc..bb4c2c80c0 100644 --- a/crates/sqllogictest/testdata/slts/df_test/show_tables.slt +++ b/crates/sqllogictest/testdata/slts/df_test/show_tables.slt @@ -15,6 +15,9 @@ # specific language governing permissions and limitations # under the License. +# This test runs after create_table.slt, so it sees all tables created there +# plus the pre-created test_partitioned_table + query TTTT rowsort SHOW TABLES ---- @@ -25,9 +28,26 @@ datafusion information_schema routines VIEW datafusion information_schema schemata VIEW datafusion information_schema tables VIEW datafusion information_schema views VIEW +datafusion public ns_table BASE TABLE +datafusion public source_table BASE TABLE +default default empty_table BASE TABLE +default default empty_table$manifests BASE TABLE +default default empty_table$snapshots BASE TABLE +default default ns_table BASE TABLE +default default ns_table$manifests BASE TABLE +default default ns_table$snapshots BASE TABLE +default default nullable_table BASE TABLE +default default nullable_table$manifests BASE TABLE +default default nullable_table$snapshots BASE TABLE +default default source_table BASE TABLE +default default source_table$manifests BASE TABLE +default default source_table$snapshots BASE TABLE default default test_partitioned_table BASE TABLE default default test_partitioned_table$manifests BASE TABLE default default test_partitioned_table$snapshots BASE TABLE +default default typed_table BASE TABLE +default default typed_table$manifests BASE TABLE +default default typed_table$snapshots BASE TABLE default information_schema columns VIEW default information_schema df_settings VIEW default information_schema parameters VIEW