From 4830a743dfd0eaa015ee2b79d540e782d41c3cea Mon Sep 17 00:00:00 2001
From: nathanmetzger <xbattlax@gmail.com>
Date: Thu, 15 Jan 2026 09:28:03 +0100
Subject: [PATCH 1/3] docs(datafusion): add user documentation for DataFusion
 integration

Add comprehensive user documentation for the DataFusion integration
that covers SQL-based table operations, catalog integration, and
query optimization features.

Changes:
- Add datafusion.md documentation page with setup, SQL operations,
  metadata tables, partitioned tables, and configuration options
- Add datafusion_integration.rs example with annotated code sections
- Update SUMMARY.md to include new documentation page
- Add required dependencies to examples crate

Closes #2027
---
 Cargo.lock                                    |   3 +
 crates/examples/Cargo.toml                    |   7 +
 crates/examples/src/datafusion_integration.rs | 170 ++++++++++++++
 website/src/SUMMARY.md                        |   1 +
 website/src/datafusion.md                     | 218 ++++++++++++++++++
 5 files changed, 399 insertions(+)
 create mode 100644 crates/examples/src/datafusion_integration.rs
 create mode 100644 website/src/datafusion.md

diff --git a/Cargo.lock b/Cargo.lock
index 3de43e685b..24bd3b5f39 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3540,9 +3540,12 @@ dependencies = [
 name = "iceberg-examples"
 version = "0.8.0"
 dependencies = [
+ "datafusion",
  "futures",
  "iceberg",
  "iceberg-catalog-rest",
+ "iceberg-datafusion",
+ "tempfile",
  "tokio",
 ]
 
diff --git a/crates/examples/Cargo.toml b/crates/examples/Cargo.toml
index c7874d9a17..3666c6c6f8 100644
--- a/crates/examples/Cargo.toml
+++ b/crates/examples/Cargo.toml
@@ -25,9 +25,12 @@ rust-version = { workspace = true }
 version = { workspace = true }
 
 [dependencies]
+datafusion = { workspace = true }
 futures = { workspace = true }
 iceberg = { workspace = true }
 iceberg-catalog-rest = { workspace = true }
+iceberg-datafusion = { workspace = true }
+tempfile = { workspace = true }
 tokio = { workspace = true, features = ["full"] }
 
 [[example]]
@@ -43,6 +46,10 @@ name = "oss-backend"
 path = "src/oss_backend.rs"
 required-features = ["storage-oss"]
 
+[[example]]
+name = "datafusion-integration"
+path = "src/datafusion_integration.rs"
+
 [features]
 default = []
 storage-oss = ["iceberg/storage-oss"]
diff --git a/crates/examples/src/datafusion_integration.rs b/crates/examples/src/datafusion_integration.rs
new file mode 100644
index 0000000000..bc1a9d6d6b
--- /dev/null
+++ b/crates/examples/src/datafusion_integration.rs
@@ -0,0 +1,170 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Example demonstrating DataFusion integration with Apache Iceberg.
+//!
+//! This example shows how to:
+//! - Set up an Iceberg catalog with DataFusion
+//! - Create tables using SQL
+//! - Insert and query data
+//! - Query metadata tables
+
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use datafusion::execution::context::SessionContext;
+use datafusion::execution::session_state::SessionStateBuilder;
+use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder};
+use iceberg::{Catalog, CatalogBuilder, NamespaceIdent};
+use iceberg_datafusion::{IcebergCatalogProvider, IcebergTableProviderFactory};
+use tempfile::TempDir;
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    // Create a temporary directory for the warehouse
+    let temp_dir = TempDir::new()?;
+    let warehouse_path = temp_dir.path().to_str().unwrap().to_string();
+
+    // ANCHOR: catalog_setup
+    // Create an in-memory Iceberg catalog
+    let iceberg_catalog = MemoryCatalogBuilder::default()
+        .load(
+            "memory",
+            HashMap::from([(MEMORY_CATALOG_WAREHOUSE.to_string(), warehouse_path)]),
+        )
+        .await?;
+
+    // Create a namespace for our tables
+    let namespace = NamespaceIdent::new("demo".to_string());
+    iceberg_catalog
+        .create_namespace(&namespace, HashMap::new())
+        .await?;
+
+    // Create the IcebergCatalogProvider and register it with DataFusion
+    let catalog_provider =
+        Arc::new(IcebergCatalogProvider::try_new(Arc::new(iceberg_catalog)).await?);
+
+    let ctx = SessionContext::new();
+    ctx.register_catalog("iceberg", catalog_provider);
+    // ANCHOR_END: catalog_setup
+
+    // ANCHOR: create_table
+    // Create a table using SQL
+    ctx.sql(
+        "CREATE TABLE iceberg.demo.users (
+            id INT NOT NULL,
+            name STRING NOT NULL,
+            email STRING
+        )",
+    )
+    .await?;
+
+    println!("Table 'users' created successfully.");
+    // ANCHOR_END: create_table
+
+    // ANCHOR: insert_data
+    // Insert data into the table
+    let result = ctx
+        .sql(
+            "INSERT INTO iceberg.demo.users VALUES
+            (1, 'Alice', 'alice@example.com'),
+            (2, 'Bob', 'bob@example.com'),
+            (3, 'Charlie', NULL)",
+        )
+        .await?
+        .collect()
+        .await?;
+
+    // The result contains the number of rows inserted
+    println!("Inserted {} rows.", result[0].num_rows());
+    // ANCHOR_END: insert_data
+
+    // ANCHOR: query_data
+    // Query the data with filtering
+    println!("\nQuerying users with email:");
+    let df = ctx
+        .sql("SELECT id, name, email FROM iceberg.demo.users WHERE email IS NOT NULL")
+        .await?;
+
+    df.show().await?;
+
+    // Query with projection (only specific columns)
+    println!("\nQuerying only names:");
+    let df = ctx
+        .sql("SELECT name FROM iceberg.demo.users ORDER BY id")
+        .await?;
+
+    df.show().await?;
+    // ANCHOR_END: query_data
+
+    // ANCHOR: metadata_tables
+    // Query the snapshots metadata table
+    println!("\nTable snapshots:");
+    let df = ctx
+        .sql("SELECT snapshot_id, operation FROM iceberg.demo.users$snapshots")
+        .await?;
+
+    df.show().await?;
+
+    // Query the manifests metadata table
+    println!("\nTable manifests:");
+    let df = ctx
+        .sql("SELECT path, added_data_files_count FROM iceberg.demo.users$manifests")
+        .await?;
+
+    df.show().await?;
+    // ANCHOR_END: metadata_tables
+
+    println!("\nDataFusion integration example completed successfully!");
+
+    Ok(())
+}
+
+// ANCHOR: external_table_setup
+/// Example of setting up IcebergTableProviderFactory for external tables.
+///
+/// This allows reading existing Iceberg tables via `CREATE EXTERNAL TABLE` syntax.
+#[allow(dead_code)]
+async fn setup_external_table_support() -> SessionContext {
+    // Create a session state with the Iceberg table factory registered
+    let mut state = SessionStateBuilder::new().with_default_features().build();
+
+    // Register the IcebergTableProviderFactory to handle "ICEBERG" file type
+    state.table_factories_mut().insert(
+        "ICEBERG".to_string(),
+        Arc::new(IcebergTableProviderFactory::new()),
+    );
+
+    SessionContext::new_with_state(state)
+}
+// ANCHOR_END: external_table_setup
+
+// ANCHOR: external_table_query
+/// Example SQL for creating and querying an external Iceberg table.
+///
+/// ```sql
+/// -- Create an external table from an existing Iceberg metadata file
+/// CREATE EXTERNAL TABLE my_table
+/// STORED AS ICEBERG
+/// LOCATION '/path/to/iceberg/metadata/v1.metadata.json';
+///
+/// -- Query the external table
+/// SELECT * FROM my_table WHERE column > 100;
+/// ```
+#[allow(dead_code)]
+fn external_table_sql_example() {}
+// ANCHOR_END: external_table_query
diff --git a/website/src/SUMMARY.md b/website/src/SUMMARY.md
index da82364f58..21159497ec 100644
--- a/website/src/SUMMARY.md
+++ b/website/src/SUMMARY.md
@@ -24,6 +24,7 @@
 - [Install](./install.md)
 - [Download](./download.md)
 - [API](./api.md)
+- [DataFusion Integration](./datafusion.md)
 
 # Developer Guide
 
diff --git a/website/src/datafusion.md b/website/src/datafusion.md
new file mode 100644
index 0000000000..eb5dbb4d7c
--- /dev/null
+++ b/website/src/datafusion.md
@@ -0,0 +1,218 @@
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one
+  ~ or more contributor license agreements.  See the NOTICE file
+  ~ distributed with this work for additional information
+  ~ regarding copyright ownership.  The ASF licenses this file
+  ~ to you under the Apache License, Version 2.0 (the
+  ~ "License"); you may not use this file except in compliance
+  ~ with the License.  You may obtain a copy of the License at
+  ~
+  ~   http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing,
+  ~ software distributed under the License is distributed on an
+  ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  ~ KIND, either express or implied.  See the License for the
+  ~ specific language governing permissions and limitations
+  ~ under the License.
+-->
+
+# DataFusion Integration
+
+The `iceberg-datafusion` crate provides integration between Apache Iceberg and [DataFusion](https://datafusion.apache.org/), enabling SQL queries on Iceberg tables.
+
+## Features
+
+- **SQL DDL/DML**: `CREATE TABLE`, `INSERT INTO`, `SELECT`
+- **Query Optimization**: Projection, filter, and LIMIT pushdown
+- **Metadata Tables**: Query snapshots and manifests
+- **Partitioned Tables**: Automatic partition routing for writes
+
+## Dependencies
+
+Add the following to your `Cargo.toml`:
+
+```toml
+[dependencies]
+iceberg = "0.8"
+iceberg-datafusion = "0.8"
+datafusion = "51"
+tokio = { version = "1", features = ["full"] }
+```
+
+## Catalog-Based Access
+
+The recommended way to use Iceberg with DataFusion is through `IcebergCatalogProvider`, which integrates an Iceberg catalog with DataFusion's catalog system.
+
+### Setup
+
+```rust,no_run,noplayground
+{{#rustdoc_include ../../crates/examples/src/datafusion_integration.rs:catalog_setup}}
+```
+
+### Creating Tables
+
+Once the catalog is registered, you can create tables using SQL:
+
+```rust,no_run,noplayground
+{{#rustdoc_include ../../crates/examples/src/datafusion_integration.rs:create_table}}
+```
+
+Supported column types include:
+- `INT`, `BIGINT` - Integer types
+- `FLOAT`, `DOUBLE` - Floating point types
+- `STRING` - String/text type
+- `BOOLEAN` - Boolean type
+- `DATE`, `TIMESTAMP` - Date/time types
+
+> **Note**: `CREATE TABLE AS SELECT` is not currently supported. Create the table first, then use `INSERT INTO`.
+
+### Inserting Data
+
+```rust,no_run,noplayground
+{{#rustdoc_include ../../crates/examples/src/datafusion_integration.rs:insert_data}}
+```
+
+For nested structs, use the `named_struct()` function:
+
+```sql
+INSERT INTO catalog.namespace.table
+SELECT
+    1 as id,
+    named_struct('street', '123 Main St', 'city', 'NYC') as address
+```
+
+### Querying Data
+
+```rust,no_run,noplayground
+{{#rustdoc_include ../../crates/examples/src/datafusion_integration.rs:query_data}}
+```
+
+## Metadata Tables
+
+Iceberg metadata tables can be queried using the `$` syntax (following Flink convention):
+
+```rust,no_run,noplayground
+{{#rustdoc_include ../../crates/examples/src/datafusion_integration.rs:metadata_tables}}
+```
+
+Available metadata tables:
+- `table$snapshots` - Table snapshot history
+- `table$manifests` - Manifest file information
+
+## File-Based Access (External Tables)
+
+For reading existing Iceberg tables without a catalog, use `IcebergTableProviderFactory`:
+
+```rust,no_run,noplayground
+{{#rustdoc_include ../../crates/examples/src/datafusion_integration.rs:external_table_setup}}
+```
+
+Then create external tables via SQL:
+
+```sql
+CREATE EXTERNAL TABLE my_table
+STORED AS ICEBERG
+LOCATION '/path/to/iceberg/metadata/v1.metadata.json';
+
+SELECT * FROM my_table;
+```
+
+> **Note**: External tables are read-only. For write operations, use `IcebergCatalogProvider`.
+
+## Table Provider Types
+
+### IcebergTableProvider
+
+- Backed by an Iceberg catalog
+- Automatically refreshes metadata on each operation
+- Supports both read and write operations
+- Use when you need the latest table state or write capability
+
+### IcebergStaticTableProvider
+
+- Fixed table snapshot at construction time
+- No catalog round-trips (better performance)
+- Read-only
+- Use for time-travel queries or when consistency within a query is important
+
+## Partitioned Tables
+
+### Creating Partitioned Tables
+
+Partitioned tables must be created using the Iceberg catalog API (not SQL):
+
+```rust,no_run
+use iceberg::spec::{Transform, UnboundPartitionSpec};
+
+let partition_spec = UnboundPartitionSpec::builder()
+    .with_spec_id(0)
+    .add_partition_field(column_id, "partition_column", Transform::Identity)?
+    .build();
+```
+
+Supported partition transforms:
+- `Identity` - Partition by exact value
+- `Year`, `Month`, `Day`, `Hour` - Time-based partitioning
+- `Bucket(n)` - Hash partitioning into n buckets
+- `Truncate(width)` - String/number truncation
+
+### Writing to Partitioned Tables
+
+When inserting into a partitioned table, data is automatically routed to the correct partition directories:
+
+```sql
+INSERT INTO catalog.namespace.partitioned_table VALUES
+    (1, 'electronics', 'laptop'),
+    (2, 'books', 'novel');
+-- Data files will be created under:
+--   data/category=electronics/
+--   data/category=books/
+```
+
+### Write Modes
+
+Two write modes are available for partitioned tables:
+
+| Mode | Property Value | Description |
+|------|---------------|-------------|
+| **Fanout** (default) | `true` | Handles unsorted data, maintains open writers for all partitions |
+| **Clustered** | `false` | Requires sorted input, more memory efficient |
+
+Configure via table property:
+```
+write.datafusion.fanout.enabled = true
+```
+
+## Query Optimization
+
+The DataFusion integration supports several query optimizations:
+
+- **Projection Pushdown**: Only reads columns referenced in the query
+- **Filter Pushdown**: Prunes data files using manifest statistics
+- **LIMIT Pushdown**: Reduces the amount of data scanned
+
+These optimizations are applied automatically by the query planner.
+
+## Configuration Options
+
+| Property | Default | Description |
+|----------|---------|-------------|
+| `write.datafusion.fanout.enabled` | `true` | Use FanoutWriter (true) or ClusteredWriter (false) for partitioned writes |
+| `write.target-file-size-bytes` | `536870912` (512MB) | Target size for data files |
+| `write.format.default` | `parquet` | Default file format for new data files |
+
+## Current Limitations
+
+- `CREATE TABLE AS SELECT` is not supported
+- Metadata tables are limited to `$snapshots` and `$manifests`
+- `ALTER TABLE` and `DROP TABLE` via SQL are not supported (use catalog API)
+- Schema evolution through SQL is not supported
+
+## Running the Example
+
+A complete example is available in the repository:
+
+```bash
+cargo run -p iceberg-examples --example datafusion-integration
+```

From 94a836d7df5fa0dae387f1c1199908c0353b07ad Mon Sep 17 00:00:00 2001
From: nathanmetzger <xbattlax@gmail.com>
Date: Fri, 16 Jan 2026 08:43:34 +0100
Subject: [PATCH 2/3] Address PR review feedback

- Change catalog name from "iceberg" to "my_catalog" to clarify it has no special meaning
- Move external table code to separate example file
- Remove sections not suited for end-users: External Tables, Table Provider Types, Creating Partitioned Tables (Rust API), Query Optimization
- Add clarification that table properties must be set via Iceberg catalog API
---
 crates/examples/Cargo.toml                    |  4 +
 .../examples/src/datafusion_external_table.rs | 80 +++++++++++++++++++
 crates/examples/src/datafusion_integration.rs | 52 ++----------
 website/src/datafusion.md                     | 68 +---------------
 4 files changed, 94 insertions(+), 110 deletions(-)
 create mode 100644 crates/examples/src/datafusion_external_table.rs

diff --git a/crates/examples/Cargo.toml b/crates/examples/Cargo.toml
index 3666c6c6f8..63c1fb1dc9 100644
--- a/crates/examples/Cargo.toml
+++ b/crates/examples/Cargo.toml
@@ -50,6 +50,10 @@ required-features = ["storage-oss"]
 name = "datafusion-integration"
 path = "src/datafusion_integration.rs"
 
+[[example]]
+name = "datafusion-external-table"
+path = "src/datafusion_external_table.rs"
+
 [features]
 default = []
 storage-oss = ["iceberg/storage-oss"]
diff --git a/crates/examples/src/datafusion_external_table.rs b/crates/examples/src/datafusion_external_table.rs
new file mode 100644
index 0000000000..dcd66515c8
--- /dev/null
+++ b/crates/examples/src/datafusion_external_table.rs
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Example demonstrating external Iceberg table access via DataFusion.
+//!
+//! This example shows how to use `IcebergTableProviderFactory` to read
+//! existing Iceberg tables via `CREATE EXTERNAL TABLE` syntax.
+//!
+//! Note: External tables are read-only. For write operations, use
+//! `IcebergCatalogProvider` instead (see `datafusion_integration.rs`).
+
+use std::sync::Arc;
+
+use datafusion::execution::context::SessionContext;
+use datafusion::execution::session_state::SessionStateBuilder;
+use iceberg_datafusion::IcebergTableProviderFactory;
+
+// ANCHOR: external_table_setup
+/// Set up a DataFusion session with IcebergTableProviderFactory registered.
+///
+/// This allows reading existing Iceberg tables via `CREATE EXTERNAL TABLE` syntax.
+fn setup_external_table_support() -> SessionContext {
+    // Create a session state with the Iceberg table factory registered
+    let mut state = SessionStateBuilder::new().with_default_features().build();
+
+    // Register the IcebergTableProviderFactory to handle "ICEBERG" file type
+    state.table_factories_mut().insert(
+        "ICEBERG".to_string(),
+        Arc::new(IcebergTableProviderFactory::new()),
+    );
+
+    SessionContext::new_with_state(state)
+}
+// ANCHOR_END: external_table_setup
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let ctx = setup_external_table_support();
+
+    // ANCHOR: external_table_query
+    // Example SQL for creating and querying an external Iceberg table:
+    //
+    // CREATE EXTERNAL TABLE my_table
+    // STORED AS ICEBERG
+    // LOCATION '/path/to/iceberg/metadata/v1.metadata.json';
+    //
+    // SELECT * FROM my_table WHERE column > 100;
+    // ANCHOR_END: external_table_query
+
+    println!("External table support configured.");
+    println!("Use CREATE EXTERNAL TABLE ... STORED AS ICEBERG to read existing tables.");
+    println!();
+    println!("Example:");
+    println!("  CREATE EXTERNAL TABLE my_table");
+    println!("  STORED AS ICEBERG");
+    println!("  LOCATION '/path/to/iceberg/metadata/v1.metadata.json';");
+
+    // This example requires an actual Iceberg table to query.
+    // For a complete working example with table creation, see datafusion_integration.rs
+
+    // Verify the session is configured correctly
+    let tables = ctx.catalog_names();
+    println!("\nRegistered catalogs: {tables:?}");
+
+    Ok(())
+}
diff --git a/crates/examples/src/datafusion_integration.rs b/crates/examples/src/datafusion_integration.rs
index bc1a9d6d6b..b2c9ccad37 100644
--- a/crates/examples/src/datafusion_integration.rs
+++ b/crates/examples/src/datafusion_integration.rs
@@ -27,10 +27,9 @@ use std::collections::HashMap;
 use std::sync::Arc;
 
 use datafusion::execution::context::SessionContext;
-use datafusion::execution::session_state::SessionStateBuilder;
 use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder};
 use iceberg::{Catalog, CatalogBuilder, NamespaceIdent};
-use iceberg_datafusion::{IcebergCatalogProvider, IcebergTableProviderFactory};
+use iceberg_datafusion::IcebergCatalogProvider;
 use tempfile::TempDir;
 
 #[tokio::main]
@@ -59,13 +58,13 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
         Arc::new(IcebergCatalogProvider::try_new(Arc::new(iceberg_catalog)).await?);
 
     let ctx = SessionContext::new();
-    ctx.register_catalog("iceberg", catalog_provider);
+    ctx.register_catalog("my_catalog", catalog_provider);
     // ANCHOR_END: catalog_setup
 
     // ANCHOR: create_table
     // Create a table using SQL
     ctx.sql(
-        "CREATE TABLE iceberg.demo.users (
+        "CREATE TABLE my_catalog.demo.users (
             id INT NOT NULL,
             name STRING NOT NULL,
             email STRING
@@ -80,7 +79,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     // Insert data into the table
     let result = ctx
         .sql(
-            "INSERT INTO iceberg.demo.users VALUES
+            "INSERT INTO my_catalog.demo.users VALUES
             (1, 'Alice', 'alice@example.com'),
             (2, 'Bob', 'bob@example.com'),
             (3, 'Charlie', NULL)",
@@ -97,7 +96,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     // Query the data with filtering
     println!("\nQuerying users with email:");
     let df = ctx
-        .sql("SELECT id, name, email FROM iceberg.demo.users WHERE email IS NOT NULL")
+        .sql("SELECT id, name, email FROM my_catalog.demo.users WHERE email IS NOT NULL")
         .await?;
 
     df.show().await?;
@@ -105,7 +104,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     // Query with projection (only specific columns)
     println!("\nQuerying only names:");
     let df = ctx
-        .sql("SELECT name FROM iceberg.demo.users ORDER BY id")
+        .sql("SELECT name FROM my_catalog.demo.users ORDER BY id")
         .await?;
 
     df.show().await?;
@@ -115,7 +114,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     // Query the snapshots metadata table
     println!("\nTable snapshots:");
     let df = ctx
-        .sql("SELECT snapshot_id, operation FROM iceberg.demo.users$snapshots")
+        .sql("SELECT snapshot_id, operation FROM my_catalog.demo.users$snapshots")
         .await?;
 
     df.show().await?;
@@ -123,7 +122,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     // Query the manifests metadata table
     println!("\nTable manifests:");
     let df = ctx
-        .sql("SELECT path, added_data_files_count FROM iceberg.demo.users$manifests")
+        .sql("SELECT path, added_data_files_count FROM my_catalog.demo.users$manifests")
         .await?;
 
     df.show().await?;
@@ -133,38 +132,3 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
 
     Ok(())
 }
-
-// ANCHOR: external_table_setup
-/// Example of setting up IcebergTableProviderFactory for external tables.
-///
-/// This allows reading existing Iceberg tables via `CREATE EXTERNAL TABLE` syntax.
-#[allow(dead_code)]
-async fn setup_external_table_support() -> SessionContext {
-    // Create a session state with the Iceberg table factory registered
-    let mut state = SessionStateBuilder::new().with_default_features().build();
-
-    // Register the IcebergTableProviderFactory to handle "ICEBERG" file type
-    state.table_factories_mut().insert(
-        "ICEBERG".to_string(),
-        Arc::new(IcebergTableProviderFactory::new()),
-    );
-
-    SessionContext::new_with_state(state)
-}
-// ANCHOR_END: external_table_setup
-
-// ANCHOR: external_table_query
-/// Example SQL for creating and querying an external Iceberg table.
-///
-/// ```sql
-/// -- Create an external table from an existing Iceberg metadata file
-/// CREATE EXTERNAL TABLE my_table
-/// STORED AS ICEBERG
-/// LOCATION '/path/to/iceberg/metadata/v1.metadata.json';
-///
-/// -- Query the external table
-/// SELECT * FROM my_table WHERE column > 100;
-/// ```
-#[allow(dead_code)]
-fn external_table_sql_example() {}
-// ANCHOR_END: external_table_query
diff --git a/website/src/datafusion.md b/website/src/datafusion.md
index eb5dbb4d7c..128ebdae4f 100644
--- a/website/src/datafusion.md
+++ b/website/src/datafusion.md
@@ -24,7 +24,6 @@ The `iceberg-datafusion` crate provides integration between Apache Iceberg and [
 ## Features
 
 - **SQL DDL/DML**: `CREATE TABLE`, `INSERT INTO`, `SELECT`
-- **Query Optimization**: Projection, filter, and LIMIT pushdown
 - **Metadata Tables**: Query snapshots and manifests
 - **Partitioned Tables**: Automatic partition routing for writes
 
@@ -100,63 +99,8 @@ Available metadata tables:
 - `table$snapshots` - Table snapshot history
 - `table$manifests` - Manifest file information
 
-## File-Based Access (External Tables)
-
-For reading existing Iceberg tables without a catalog, use `IcebergTableProviderFactory`:
-
-```rust,no_run,noplayground
-{{#rustdoc_include ../../crates/examples/src/datafusion_integration.rs:external_table_setup}}
-```
-
-Then create external tables via SQL:
-
-```sql
-CREATE EXTERNAL TABLE my_table
-STORED AS ICEBERG
-LOCATION '/path/to/iceberg/metadata/v1.metadata.json';
-
-SELECT * FROM my_table;
-```
-
-> **Note**: External tables are read-only. For write operations, use `IcebergCatalogProvider`.
-
-## Table Provider Types
-
-### IcebergTableProvider
-
-- Backed by an Iceberg catalog
-- Automatically refreshes metadata on each operation
-- Supports both read and write operations
-- Use when you need the latest table state or write capability
-
-### IcebergStaticTableProvider
-
-- Fixed table snapshot at construction time
-- No catalog round-trips (better performance)
-- Read-only
-- Use for time-travel queries or when consistency within a query is important
-
 ## Partitioned Tables
 
-### Creating Partitioned Tables
-
-Partitioned tables must be created using the Iceberg catalog API (not SQL):
-
-```rust,no_run
-use iceberg::spec::{Transform, UnboundPartitionSpec};
-
-let partition_spec = UnboundPartitionSpec::builder()
-    .with_spec_id(0)
-    .add_partition_field(column_id, "partition_column", Transform::Identity)?
-    .build();
-```
-
-Supported partition transforms:
-- `Identity` - Partition by exact value
-- `Year`, `Month`, `Day`, `Hour` - Time-based partitioning
-- `Bucket(n)` - Hash partitioning into n buckets
-- `Truncate(width)` - String/number truncation
-
 ### Writing to Partitioned Tables
 
 When inserting into a partitioned table, data is automatically routed to the correct partition directories:
@@ -184,18 +128,10 @@ Configure via table property:
 write.datafusion.fanout.enabled = true
 ```
 
-## Query Optimization
-
-The DataFusion integration supports several query optimizations:
-
-- **Projection Pushdown**: Only reads columns referenced in the query
-- **Filter Pushdown**: Prunes data files using manifest statistics
-- **LIMIT Pushdown**: Reduces the amount of data scanned
-
-These optimizations are applied automatically by the query planner.
-
 ## Configuration Options
 
+These table properties control write behavior. They must be set when creating the table via the Iceberg catalog API, as DataFusion SQL does not support `ALTER TABLE` for property changes.
+
 | Property | Default | Description |
 |----------|---------|-------------|
 | `write.datafusion.fanout.enabled` | `true` | Use FanoutWriter (true) or ClusteredWriter (false) for partitioned writes |

From 7f6bfda54ac135d8470d4f89cbda65d29d54bade Mon Sep 17 00:00:00 2001
From: nathanmetzger <xbattlax@gmail.com>
Date: Fri, 16 Jan 2026 08:47:54 +0100
Subject: [PATCH 3/3] Remove Partitioned Tables section entirely

---
 website/src/datafusion.md | 31 -------------------------------
 1 file changed, 31 deletions(-)

diff --git a/website/src/datafusion.md b/website/src/datafusion.md
index 128ebdae4f..c3921720c6 100644
--- a/website/src/datafusion.md
+++ b/website/src/datafusion.md
@@ -25,7 +25,6 @@ The `iceberg-datafusion` crate provides integration between Apache Iceberg and [
 
 - **SQL DDL/DML**: `CREATE TABLE`, `INSERT INTO`, `SELECT`
 - **Metadata Tables**: Query snapshots and manifests
-- **Partitioned Tables**: Automatic partition routing for writes
 
 ## Dependencies
 
@@ -99,42 +98,12 @@ Available metadata tables:
 - `table$snapshots` - Table snapshot history
 - `table$manifests` - Manifest file information
 
-## Partitioned Tables
-
-### Writing to Partitioned Tables
-
-When inserting into a partitioned table, data is automatically routed to the correct partition directories:
-
-```sql
-INSERT INTO catalog.namespace.partitioned_table VALUES
-    (1, 'electronics', 'laptop'),
-    (2, 'books', 'novel');
--- Data files will be created under:
---   data/category=electronics/
---   data/category=books/
-```
-
-### Write Modes
-
-Two write modes are available for partitioned tables:
-
-| Mode | Property Value | Description |
-|------|---------------|-------------|
-| **Fanout** (default) | `true` | Handles unsorted data, maintains open writers for all partitions |
-| **Clustered** | `false` | Requires sorted input, more memory efficient |
-
-Configure via table property:
-```
-write.datafusion.fanout.enabled = true
-```
-
 ## Configuration Options
 
 These table properties control write behavior. They must be set when creating the table via the Iceberg catalog API, as DataFusion SQL does not support `ALTER TABLE` for property changes.
 
 | Property | Default | Description |
 |----------|---------|-------------|
-| `write.datafusion.fanout.enabled` | `true` | Use FanoutWriter (true) or ClusteredWriter (false) for partitioned writes |
 | `write.target-file-size-bytes` | `536870912` (512MB) | Target size for data files |
 | `write.format.default` | `parquet` | Default file format for new data files |