From d090eb346f3cf0b366be038ded866a9c0600f95b Mon Sep 17 00:00:00 2001 From: Xander Date: Thu, 15 Jan 2026 10:50:08 +0000 Subject: [PATCH] Add Table Properties for Encryption Configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR introduces table-level encryption properties to enable configuration of encryption settings for Iceberg tables. These properties lay the groundwork for future encryption implementation while maintaining compatibility with the Java implementation's property names and structure. Table-level encryption is a critical security feature in Apache Iceberg's Java implementation. To support encryption in iceberg-rust and ensure interoperability between Java and Rust implementations, we need to start by adding the configuration properties that control encryption behavior. This PR adds the property definitions and parsing logic without implementing the actual encryption, keeping the change focused and reviewable. **Modified:** `crates/iceberg/src/spec/table_properties.rs` Added encryption-related properties to the `TableProperties` struct: - `PROPERTY_ENCRYPTION_KEY_ID` (`"encryption.key-id"`) - Master key ID for encrypting data encryption keys - `PROPERTY_ENCRYPTION_DEK_LENGTH` (`"encryption.data-key-length"`) - Data encryption key length (default: 16 bytes) - `PROPERTY_ENCRYPTION_AAD_LENGTH` (`"encryption.aad-length"`) - AAD prefix length for GCM (default: 16 bytes) - `PROPERTY_ENCRYPTION_KMS_TYPE` (`"encryption.kms-type"`) - KMS type (e.g., "aws", "gcp", "azure") All `Option` as encryption is optional: - `encryption_key_id: Option` - `encryption_dek_length: Option` - `encryption_aad_length: Option` - `encryption_kms_type: Option` Extended `TryFrom<&HashMap>` implementation to parse encryption properties Property names match exactly with Java's implementation: - Java: `TableProperties.ENCRYPTION_TABLE_KEY` → Rust: `PROPERTY_ENCRYPTION_KEY_ID` - Java: `TableProperties.ENCRYPTION_DEK_LENGTH` → Rust: `PROPERTY_ENCRYPTION_DEK_LENGTH` - Java: `CatalogProperties.ENCRYPTION_KMS_TYPE` → Rust: `PROPERTY_ENCRYPTION_KMS_TYPE` **Note:** Java's `ENCRYPTION_KMS_IMPL` property (for custom KMS implementations via reflection) is intentionally not included since Rust doesn't support runtime reflection. KMS implementations will be selected based on the `encryption.kms-type` property with compiled-in implementations. Added comprehensive test coverage: 1. `test_table_properties_default`: Verifies encryption properties are None by default 2. `test_encryption_properties_valid`: Tests parsing all encryption properties with valid values 3. `test_encryption_properties_partial`: Tests partial encryption configuration 4. `test_encryption_properties_invalid_numeric`: Verifies invalid numeric values are handled gracefully (parsed as None) 5. `test_encryption_properties_with_other_properties`: Tests encryption properties alongside existing table properties All tests pass: ``` running 7 tests test spec::table_properties::tests::test_table_properties_default ... ok test spec::table_properties::tests::test_encryption_properties_partial ... ok test spec::table_properties::tests::test_encryption_properties_invalid_numeric ... ok test spec::table_properties::tests::test_encryption_properties_valid ... ok test spec::table_properties::tests::test_encryption_properties_with_other_properties ... ok test spec::table_properties::tests::test_table_properties_valid ... ok test spec::table_properties::tests::test_table_properties_invalid ... ok ``` 1. **Optional Fields**: All encryption properties are `Option` since encryption is an optional feature 2. **Silent Failure for Invalid Numbers**: Invalid numeric values for `dek_length` and `aad_length` are parsed as None rather than failing, matching the pattern for optional properties 3. **No Validation**: This PR doesn't validate property values (e.g., valid key lengths), leaving that for the encryption implementation 4. **No Custom KMS**: Omitted `encryption.kms-impl` property since Rust lacks reflection - KMS type selection will use `encryption.kms-type` with a factory pattern 5. **Independent PR**: No dependencies on other encryption code, can be merged independently This PR is part of a series to implement encryption support: - ✅ PR 1: Core encryption primitives (AES-GCM operations) - ✅ PR 2: Table properties for encryption (this PR) - PR 3: Key management interfaces - PR 4: EncryptionManager implementation - PR 5: Native Parquet encryption support - PR 6: Integration with Table and FileIO --- crates/iceberg/src/spec/table_properties.rs | 127 ++++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/crates/iceberg/src/spec/table_properties.rs b/crates/iceberg/src/spec/table_properties.rs index 413604f51c..203cd6d4b8 100644 --- a/crates/iceberg/src/spec/table_properties.rs +++ b/crates/iceberg/src/spec/table_properties.rs @@ -51,6 +51,10 @@ pub struct TableProperties { pub write_target_file_size_bytes: usize, /// Whether to use `FanoutWriter` for partitioned tables. pub write_datafusion_fanout_enabled: bool, + /// Master key ID for encryption. When set, all data and manifest files will be encrypted. + pub encryption_key_id: Option, + /// Length of data encryption keys in bytes. + pub encryption_dek_length: Option, } impl TableProperties { @@ -144,6 +148,29 @@ impl TableProperties { pub const PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED: &str = "write.datafusion.fanout.enabled"; /// Default value for fanout writer enabled pub const PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED_DEFAULT: bool = true; + + // Encryption properties + + /// Master key ID for encrypting data encryption keys. + /// + /// When set, enables table-level encryption where all data and manifest + /// files are encrypted using data encryption keys (DEKs) that are + /// themselves encrypted with this master key. + pub const PROPERTY_ENCRYPTION_KEY_ID: &str = "encryption.key-id"; + + /// Length of data encryption keys in bytes. + /// + /// Controls the key size for AES encryption. Common values are 16 (AES-128) + /// which is the only encryption method currently supported in the parquet + pub const PROPERTY_ENCRYPTION_DEK_LENGTH: &str = "encryption.data-key-length"; + /// Default length for data encryption keys (16 bytes = AES-128). + pub const PROPERTY_ENCRYPTION_DEK_LENGTH_DEFAULT: usize = 16; + + /// Default AAD (Additional Authenticated Data) length for GCM encryption. + /// + /// AAD provides additional context for authenticated encryption modes like AES-GCM. + /// This is hardcoded to 16 bytes for Java compatibility and is not configurable. + pub const PROPERTY_ENCRYPTION_AAD_LENGTH_DEFAULT: usize = 16; } impl TryFrom<&HashMap> for TableProperties { @@ -187,6 +214,13 @@ impl TryFrom<&HashMap> for TableProperties { TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED, TableProperties::PROPERTY_DATAFUSION_WRITE_FANOUT_ENABLED_DEFAULT, )?, + // Encryption properties - all optional + encryption_key_id: props + .get(TableProperties::PROPERTY_ENCRYPTION_KEY_ID) + .cloned(), + encryption_dek_length: props + .get(TableProperties::PROPERTY_ENCRYPTION_DEK_LENGTH) + .and_then(|v| v.parse().ok()), }) } } @@ -219,6 +253,9 @@ mod tests { table_properties.write_target_file_size_bytes, TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT ); + // Encryption properties should be None by default + assert_eq!(table_properties.encryption_key_id, None); + assert_eq!(table_properties.encryption_dek_length, None); } #[test] @@ -293,4 +330,94 @@ mod tests { "Invalid value for write.target-file-size-bytes: invalid digit found in string" )); } + + #[test] + fn test_encryption_properties_valid() { + let props = HashMap::from([ + ( + TableProperties::PROPERTY_ENCRYPTION_KEY_ID.to_string(), + "test-key-123".to_string(), + ), + ( + TableProperties::PROPERTY_ENCRYPTION_DEK_LENGTH.to_string(), + "32".to_string(), + ), + ]); + let table_properties = TableProperties::try_from(&props).unwrap(); + assert_eq!( + table_properties.encryption_key_id, + Some("test-key-123".to_string()) + ); + assert_eq!(table_properties.encryption_dek_length, Some(32)); + } + + #[test] + fn test_encryption_properties_partial() { + // Test with only the key ID set, not the DEK length + let props = HashMap::from([( + TableProperties::PROPERTY_ENCRYPTION_KEY_ID.to_string(), + "my-master-key".to_string(), + )]); + let table_properties = TableProperties::try_from(&props).unwrap(); + assert_eq!( + table_properties.encryption_key_id, + Some("my-master-key".to_string()) + ); + assert_eq!(table_properties.encryption_dek_length, None); + } + + #[test] + fn test_encryption_properties_invalid_numeric() { + // Test that invalid numeric values are silently ignored (parsed as None) + let props = HashMap::from([ + ( + TableProperties::PROPERTY_ENCRYPTION_KEY_ID.to_string(), + "key-456".to_string(), + ), + ( + TableProperties::PROPERTY_ENCRYPTION_DEK_LENGTH.to_string(), + "not-a-number".to_string(), + ), + ]); + let table_properties = TableProperties::try_from(&props).unwrap(); + assert_eq!( + table_properties.encryption_key_id, + Some("key-456".to_string()) + ); + // Invalid numeric values should be parsed as None + assert_eq!(table_properties.encryption_dek_length, None); + } + + #[test] + fn test_encryption_properties_with_other_properties() { + // Test encryption properties alongside other table properties + let props = HashMap::from([ + ( + TableProperties::PROPERTY_COMMIT_NUM_RETRIES.to_string(), + "8".to_string(), + ), + ( + TableProperties::PROPERTY_DEFAULT_FILE_FORMAT.to_string(), + "orc".to_string(), + ), + ( + TableProperties::PROPERTY_ENCRYPTION_KEY_ID.to_string(), + "combined-test-key".to_string(), + ), + ( + TableProperties::PROPERTY_ENCRYPTION_DEK_LENGTH.to_string(), + "16".to_string(), + ), + ]); + let table_properties = TableProperties::try_from(&props).unwrap(); + // Check regular properties + assert_eq!(table_properties.commit_num_retries, 8); + assert_eq!(table_properties.write_format_default, "orc".to_string()); + // Check encryption properties + assert_eq!( + table_properties.encryption_key_id, + Some("combined-test-key".to_string()) + ); + assert_eq!(table_properties.encryption_dek_length, Some(16)); + } }