From 364470b41ed6207862d1ff9bd789585cc8dad2e5 Mon Sep 17 00:00:00 2001 From: Roman Nikitin Date: Wed, 17 Dec 2025 11:37:13 +0000 Subject: [PATCH] fix: Handle negative block counts in Avro map/header parsing Per the Avro specification, when reading maps (and arrays), a negative block count indicates that the absolute value should be used as the count, and a byte size follows for fast skipping. Previously, the code cast the signed zigzag value directly to usize, causing a negative value like -8 to become 18,446,744,073,709,551,608 on 64-bit systems, triggering a hash table capacity overflow panic. Also skips parsing of 'default' field values since the current implementation incorrectly expects them to be Schema types rather than actual default values. Fixes reading of Apache Iceberg manifest files which use this encoding. --- src/read/mod.rs | 14 ++++++++++++-- src/schema/de.rs | 6 +++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/read/mod.rs b/src/read/mod.rs index a62119e..b2ab900 100644 --- a/src/read/mod.rs +++ b/src/read/mod.rs @@ -42,11 +42,21 @@ macro_rules! read_header { let mut items = HashMap::new(); loop { - let len = zigzag_i64($reader)$($_await)*.map_err(|_| Error::OutOfSpec)? as usize; - if len == 0 { + let len_signed = zigzag_i64($reader)$($_await)*.map_err(|_| Error::OutOfSpec)?; + if len_signed == 0 { break Ok(items); } + // Per Avro spec: if count is negative, its absolute value is used, + // and the count is followed by a long block size (for fast skipping). + let len = if len_signed < 0 { + // Read and discard the block size + let _block_size = zigzag_i64($reader)$($_await)*.map_err(|_| Error::OutOfSpec)?; + (-len_signed) as usize + } else { + len_signed as usize + }; + items.reserve(len); for _ in 0..len { let key = _read_binary($reader)$($_await)*?; diff --git a/src/schema/de.rs b/src/schema/de.rs index 356c842..9596611 100644 --- a/src/schema/de.rs +++ b/src/schema/de.rs @@ -345,13 +345,17 @@ impl<'de> Visitor<'de> for FieldVisitor { map.insert(key, value); } + // Remove 'default' from the map - it's a value, not a schema, so we skip it + // The avro-schema crate incorrectly expects it to be a Schema type + let _ = map.remove("default"); + Ok(Field { name: remove_string(&mut map, "name")? .ok_or_else(|| serde::de::Error::custom("name is required in enum"))?, doc: remove_string(&mut map, "doc")?, schema: to_schema(&mut map, "type")? .ok_or_else(|| serde::de::Error::custom("type is required in Field"))?, - default: to_schema(&mut map, "default")?, + default: None, // Skip default value parsing - it's not properly supported order: to_order(&mut map, "order")?, aliases: remove_vec_string(&mut map, "aliases")?, })