From c34a33681916a4ade05aa6424980775b9fbdfcb7 Mon Sep 17 00:00:00 2001 From: dttung2905 Date: Mon, 15 Dec 2025 19:17:24 +0000 Subject: [PATCH 1/8] Add duplicate field ID checking mechanism Signed-off-by: dttung2905 --- manifest_test.go | 2 +- schema.go | 13 +++++- schema_test.go | 101 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+), 3 deletions(-) diff --git a/manifest_test.go b/manifest_test.go index 1192474ee..101294943 100644 --- a/manifest_test.go +++ b/manifest_test.go @@ -463,7 +463,7 @@ var ( NestedField{ID: 16, Name: "improvement_surcharge", Type: PrimitiveTypes.Float64, Required: false}, NestedField{ID: 17, Name: "total_amount", Type: PrimitiveTypes.Float64, Required: true}, NestedField{ID: 18, Name: "congestion_surcharge", Type: PrimitiveTypes.Float64, Required: false}, - NestedField{ID: 19, Name: "VendorID", Type: PrimitiveTypes.Int32, Required: false}, + NestedField{ID: 19, Name: "vendor_id_alt", Type: PrimitiveTypes.Int32, Required: false}, ) ) diff --git a/schema.go b/schema.go index a13b8a325..e84a6063e 100644 --- a/schema.go +++ b/schema.go @@ -86,6 +86,11 @@ func (s *Schema) init() { s.lazyNameMapping = sync.OnceValue(func() NameMapping { return createMappingFromSchema(s) }) + + // Validate that the schema does not contain duplicate field IDs. + if _, err := IndexNameByID(s); err != nil { + panic(err) + } } func (s *Schema) String() string { @@ -812,8 +817,12 @@ type indexByName struct { func (i *indexByName) ByID() map[int]string { idToName := make(map[int]string) - for k, v := range i.index { - idToName[v] = k + for name, id := range i.index { + if existingName, ok := idToName[id]; ok && existingName != name { + panic(fmt.Errorf("%w: multiple fields for id %d: %s and %s", + ErrInvalidSchema, id, existingName, name)) + } + idToName[id] = name } return idToName diff --git a/schema_test.go b/schema_test.go index c4ea21a6d..064cc260d 100644 --- a/schema_test.go +++ b/schema_test.go @@ -19,6 +19,7 @@ package iceberg_test import ( "encoding/json" + "fmt" "strings" "testing" @@ -922,3 +923,103 @@ func TestHighestFieldIDListType(t *testing.T) { ) assert.Equal(t, 2, tableSchema.HighestFieldID()) } + +// TestSchemaDuplicateFieldIDPanics tests that creating a schema with duplicate field IDs +// panics, matching the Java implementation behavior. +// This includes the exact scenario from GitHub issue #593. +func TestSchemaDuplicateFieldIDPanics(t *testing.T) { + tests := []struct { + name string + setupSchema func() + expectedID int + expectedNames []string + }{ + { + name: "nested struct fields with duplicate ID", + setupSchema: func() { + iceberg.NewSchema(0, + iceberg.NestedField{ + ID: 5, + Name: "struct", + Type: &iceberg.StructType{ + FieldList: []iceberg.NestedField{ + {ID: 6, Name: "inner_op", Type: iceberg.PrimitiveTypes.String}, + {ID: 6, Name: "inner_req", Type: iceberg.PrimitiveTypes.String, Required: true}, + }, + }, + Required: true, + }, + ) + }, + expectedID: 6, + expectedNames: []string{"struct.inner_op", "struct.inner_req"}, + }, + { + name: "top-level fields with duplicate ID", + setupSchema: func() { + iceberg.NewSchema(0, + iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, + iceberg.NestedField{ID: 1, Name: "bar", Type: iceberg.PrimitiveTypes.Int32}, + ) + }, + expectedID: 1, + expectedNames: []string{"foo", "bar"}, + }, + { + name: "duplicate ID from JSON deserialization", + setupSchema: func() { + testFieldsStr := `[ + {"id":1,"name":"foo","type":"string","required":false}, + {"id":1,"name":"bar","type":"int","required":true} + ]` + _, _ = iceberg.NewSchemaFromJsonFields(1, testFieldsStr) + }, + expectedID: 1, + expectedNames: []string{"foo", "bar"}, + }, + { + name: "nested struct with duplicate ID", + setupSchema: func() { + iceberg.NewSchema(0, + iceberg.NestedField{ + ID: 5, + Name: "person", + Type: &iceberg.StructType{ + FieldList: []iceberg.NestedField{ + {ID: 10, Name: "name", Type: iceberg.PrimitiveTypes.String}, + {ID: 10, Name: "age", Type: iceberg.PrimitiveTypes.Int32}, + }, + }, + }, + ) + }, + expectedID: 10, + expectedNames: []string{"person.name", "person.age"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + panicked := false + var panicVal interface{} + func() { + defer func() { + if r := recover(); r != nil { + panicked = true + panicVal = r + } + }() + tt.setupSchema() + }() + + assert.True(t, panicked, "expected panic but none occurred") + err, ok := panicVal.(error) + require.True(t, ok, "panic value should be an error") + assert.ErrorIs(t, err, iceberg.ErrInvalidSchema) + assert.Contains(t, err.Error(), fmt.Sprintf("multiple fields for id %d", tt.expectedID)) + for _, expectedName := range tt.expectedNames { + assert.Contains(t, err.Error(), expectedName) + } + }) + } +} From fc8bb0e45a3999cd2a7178339dd4b3426ce56535 Mon Sep 17 00:00:00 2001 From: dttung2905 Date: Thu, 25 Dec 2025 22:12:22 +0000 Subject: [PATCH 2/8] Small refactoring for the init() func Signed-off-by: dttung2905 --- exprs_test.go | 27 +- manifest.go | 2 +- manifest_test.go | 30 +- name_mapping.go | 2 +- partitions_bench_test.go | 29 +- partitions_test.go | 12 +- schema.go | 51 ++- schema_test.go | 416 ++++++++++++++++++--- table/arrow_utils.go | 17 +- table/arrow_utils_internal_test.go | 13 +- table/evaluators.go | 5 +- table/evaluators_test.go | 45 ++- table/metadata_builder_internal_test.go | 69 ++-- table/partitioned_throughput_bench_test.go | 15 +- table/scanner.go | 5 +- table/snapshots_internal_test.go | 24 +- table/table_test.go | 29 +- table/update_schema.go | 13 +- table/update_schema_test.go | 59 +-- transforms_test.go | 2 +- types_test.go | 3 +- view/metadata_builder.go | 6 +- visitors_test.go | 48 ++- 23 files changed, 711 insertions(+), 211 deletions(-) diff --git a/exprs_test.go b/exprs_test.go index 1bb9c3f09..c39287503 100644 --- a/exprs_test.go +++ b/exprs_test.go @@ -68,18 +68,22 @@ func TestUnaryExpr(t *testing.T) { assert.True(t, n.Equals(exp)) }) - sc := iceberg.NewSchema(1, iceberg.NestedField{ + sc, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 2, Name: "a", Type: iceberg.PrimitiveTypes.Int32, }) - sc2 := iceberg.NewSchema(1, iceberg.NestedField{ + require.NoError(t, err) + sc2, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 2, Name: "a", Type: iceberg.PrimitiveTypes.Float64, }) - sc3 := iceberg.NewSchema(1, iceberg.NestedField{ + require.NoError(t, err) + sc3, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 2, Name: "a", Type: iceberg.PrimitiveTypes.Int32, Required: true, }) - sc4 := iceberg.NewSchema(1, iceberg.NestedField{ + require.NoError(t, err) + sc4, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 2, Name: "a", Type: iceberg.PrimitiveTypes.Float32, Required: true, }) + require.NoError(t, err) t.Run("isnull and notnull", func(t *testing.T) { t.Run("bind", func(t *testing.T) { @@ -202,7 +206,7 @@ func TestRefBindingCaseSensitive(t *testing.T) { } func TestRefTypes(t *testing.T) { - sc := iceberg.NewSchema(1, + sc, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "a", Type: iceberg.PrimitiveTypes.Bool}, iceberg.NestedField{ID: 2, Name: "b", Type: iceberg.PrimitiveTypes.Int32}, iceberg.NestedField{ID: 3, Name: "c", Type: iceberg.PrimitiveTypes.Int64}, @@ -215,7 +219,9 @@ func TestRefTypes(t *testing.T) { iceberg.NestedField{ID: 10, Name: "j", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 11, Name: "k", Type: iceberg.PrimitiveTypes.Binary}, iceberg.NestedField{ID: 12, Name: "l", Type: iceberg.PrimitiveTypes.UUID}, - iceberg.NestedField{ID: 13, Name: "m", Type: iceberg.FixedTypeOf(5)}) + iceberg.NestedField{ID: 13, Name: "m", Type: iceberg.FixedTypeOf(5)}, + ) + require.NoError(t, err) t.Run("bind term", func(t *testing.T) { for i := 0; i < sc.NumFields(); i++ { @@ -635,7 +641,7 @@ func TestBoundReferenceToString(t *testing.T) { } func TestToString(t *testing.T) { - schema := iceberg.NewSchema(1, + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "a", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "b", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 3, Name: "c", Type: iceberg.PrimitiveTypes.String}, @@ -647,7 +653,9 @@ func TestToString(t *testing.T) { iceberg.NestedField{ID: 9, Name: "i", Type: iceberg.PrimitiveTypes.UUID}, iceberg.NestedField{ID: 10, Name: "j", Type: iceberg.PrimitiveTypes.Bool}, iceberg.NestedField{ID: 11, Name: "k", Type: iceberg.PrimitiveTypes.Bool}, - iceberg.NestedField{ID: 12, Name: "l", Type: iceberg.PrimitiveTypes.Binary}) + iceberg.NestedField{ID: 12, Name: "l", Type: iceberg.PrimitiveTypes.Binary}, + ) + require.NoError(t, err) null := iceberg.IsNull(iceberg.Reference("a")) nan := iceberg.IsNaN(iceberg.Reference("g")) @@ -770,10 +778,11 @@ func TestToString(t *testing.T) { } func TestBindAboveBelowIntMax(t *testing.T) { - sc := iceberg.NewSchema(1, + sc, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "a", Type: iceberg.PrimitiveTypes.Int32}, iceberg.NestedField{ID: 2, Name: "b", Type: iceberg.PrimitiveTypes.Float32}, ) + require.NoError(t, err) ref, ref2 := iceberg.Reference("a"), iceberg.Reference("b") above, below := int64(math.MaxInt32)+1, int64(math.MinInt32)-1 diff --git a/manifest.go b/manifest.go index 5e591a99d..5e4f18659 100644 --- a/manifest.go +++ b/manifest.go @@ -2312,7 +2312,7 @@ type ManifestEntry interface { wrap(status ManifestEntryStatus, snapshotID, seqNum, fileSeqNum *int64, datafile DataFile) ManifestEntry } -var PositionalDeleteSchema = NewSchema(0, +var PositionalDeleteSchema = MustNewSchema(0, NestedField{ID: 2147483546, Type: PrimitiveTypes.String, Name: "file_path", Required: true}, NestedField{ID: 2147483545, Type: PrimitiveTypes.Int32, Name: "pos", Required: true}, ) diff --git a/manifest_test.go b/manifest_test.go index 101294943..523c2462f 100644 --- a/manifest_test.go +++ b/manifest_test.go @@ -20,6 +20,7 @@ package iceberg import ( "bytes" "errors" + "fmt" "io" "testing" "time" @@ -444,7 +445,12 @@ var ( }, } - testSchema = NewSchema(0, + testSchema *Schema +) + +func init() { + var err error + testSchema, err = NewSchema(0, NestedField{ID: 1, Name: "VendorID", Type: PrimitiveTypes.Int32, Required: true}, NestedField{ID: 2, Name: "tpep_pickup_datetime", Type: PrimitiveTypes.Timestamp, Required: true}, NestedField{ID: 3, Name: "tpep_dropoff_datetime", Type: PrimitiveTypes.Timestamp, Required: true}, @@ -465,7 +471,10 @@ var ( NestedField{ID: 18, Name: "congestion_surcharge", Type: PrimitiveTypes.Float64, Required: false}, NestedField{ID: 19, Name: "vendor_id_alt", Type: PrimitiveTypes.Int32, Required: false}, ) -) + if err != nil { + panic(fmt.Sprintf("failed to create testSchema: %v", err)) + } +} type ManifestTestSuite struct { suite.Suite @@ -944,10 +953,16 @@ func (m *ManifestTestSuite) TestReadManifestIncompleteSchema() { file, err := WriteManifest( "s3://bucket/namespace/table/metadata/abcd-0123.avro", &buf, 2, partitionSpec, - NewSchema(123, - NestedField{ID: 1, Name: "id", Type: Int64Type{}}, - NestedField{ID: 2, Name: "name", Type: StringType{}}, - ), + func() *Schema { + sch, err := NewSchema(123, + NestedField{ID: 1, Name: "id", Type: Int64Type{}}, + NestedField{ID: 2, Name: "name", Type: StringType{}}, + ) + if err != nil { + panic(fmt.Sprintf("failed to create schema: %v", err)) + } + return sch + }(), snapshotID, []ManifestEntry{NewManifestEntry( EntryStatusADDED, @@ -1362,7 +1377,8 @@ func (m *ManifestTestSuite) TestManifestEntryBuilder() { } func (m *ManifestTestSuite) TestManifestWriterMeta() { - sch := NewSchema(0, NestedField{ID: 0, Name: "test01", Type: StringType{}}) + sch, err := NewSchema(0, NestedField{ID: 0, Name: "test01", Type: StringType{}}) + m.Require().NoError(err) w, err := NewManifestWriter(2, io.Discard, *UnpartitionedSpec, sch, 1) m.Require().NoError(err) md, err := w.meta() diff --git a/name_mapping.go b/name_mapping.go index e87f6c5e3..dd7c3640c 100644 --- a/name_mapping.go +++ b/name_mapping.go @@ -432,7 +432,7 @@ func ApplyNameMapping(schemaWithoutIDs *Schema, nameMapping NameMapping) (*Schem } return NewSchema(schemaWithoutIDs.ID, - top.Type.(*StructType).FieldList...), nil + top.Type.(*StructType).FieldList...) } type createMapping struct{} diff --git a/partitions_bench_test.go b/partitions_bench_test.go index e170b981d..12f98b8cf 100644 --- a/partitions_bench_test.go +++ b/partitions_bench_test.go @@ -27,13 +27,16 @@ import ( // which uses cached URL-escaped field names and strings.Builder for efficient // string concatenation. func BenchmarkPartitionToPath(b *testing.B) { - schema := iceberg.NewSchema(0, + schema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "str", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "other_str", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 3, Name: "int", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 4, Name: "date", Type: iceberg.PrimitiveTypes.Date, Required: true}, iceberg.NestedField{ID: 5, Name: "ts", Type: iceberg.PrimitiveTypes.Timestamp, Required: true}, ) + if err != nil { + b.Fatalf("failed to create schema: %v", err) + } // Create a partition spec with fields that need URL escaping spec := iceberg.NewPartitionSpecID(1, @@ -88,7 +91,10 @@ func BenchmarkPartitionToPathManyFields(b *testing.B) { recordValues = append(recordValues, "value_"+string(rune('a'+i-1))) } - schema := iceberg.NewSchema(0, fields...) + schema, err := iceberg.NewSchema(0, fields...) + if err != nil { + b.Fatalf("failed to create schema: %v", err) + } spec := iceberg.NewPartitionSpecID(1, partitionFields...) record := partitionRecord(recordValues) @@ -103,11 +109,14 @@ func BenchmarkPartitionToPathManyFields(b *testing.B) { // The first call should be slower (builds the type), subsequent calls should // be faster (uses cache). func BenchmarkPartitionType(b *testing.B) { - schema := iceberg.NewSchema(0, + schema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "str", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "int", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 3, Name: "bool", Type: iceberg.PrimitiveTypes.Bool, Required: false}, ) + if err != nil { + b.Fatalf("failed to create schema: %v", err) + } spec := iceberg.NewPartitionSpecID(1, iceberg.PartitionField{ @@ -136,10 +145,14 @@ func BenchmarkPartitionType(b *testing.B) { func BenchmarkPartitionTypeMultipleSchemas(b *testing.B) { schemas := make([]*iceberg.Schema, 10) for i := range schemas { - schemas[i] = iceberg.NewSchema(i, + var err error + schemas[i], err = iceberg.NewSchema(i, iceberg.NestedField{ID: 1, Name: "str", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "int", Type: iceberg.PrimitiveTypes.Int32, Required: true}, ) + if err != nil { + b.Fatalf("failed to create schema: %v", err) + } } spec := iceberg.NewPartitionSpecID(1, @@ -164,11 +177,17 @@ func BenchmarkPartitionTypeMultipleSchemas(b *testing.B) { // BenchmarkPartitionToPathRepeated benchmarks repeated calls to PartitionToPath // with the same spec to verify that cached escaped names provide performance benefits. func BenchmarkPartitionToPathRepeated(b *testing.B) { - schema := iceberg.NewSchema(0, + schema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "str", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "other_str", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 3, Name: "int", Type: iceberg.PrimitiveTypes.Int32, Required: true}, ) + if err != nil { + b.Fatalf("failed to create schema: %v", err) + } + if err != nil { + b.Fatalf("failed to create schema: %v", err) + } // Use field names that require URL escaping to maximize the benefit of caching spec := iceberg.NewPartitionSpecID(1, diff --git a/partitions_test.go b/partitions_test.go index 5ad963003..aa788557d 100644 --- a/partitions_test.go +++ b/partitions_test.go @@ -163,10 +163,12 @@ func (p partitionRecord) Get(pos int) any { return p[pos] } func (p partitionRecord) Set(pos int, val any) { p[pos] = val } func TestPartitionSpecToPath(t *testing.T) { - schema := iceberg.NewSchema(0, + schema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "str", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "other_str", Type: iceberg.PrimitiveTypes.String}, - iceberg.NestedField{ID: 3, Name: "int", Type: iceberg.PrimitiveTypes.Int32, Required: true}) + iceberg.NestedField{ID: 3, Name: "int", Type: iceberg.PrimitiveTypes.Int32, Required: true}, + ) + require.NoError(t, err) spec := iceberg.NewPartitionSpecID(3, iceberg.PartitionField{ @@ -191,10 +193,12 @@ func TestPartitionSpecToPath(t *testing.T) { } func TestGetPartitionFieldName(t *testing.T) { - schema := iceberg.NewSchema(0, + schema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "str", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "int", Type: iceberg.PrimitiveTypes.Int32, Required: true}, - iceberg.NestedField{ID: 3, Name: "ts", Type: iceberg.PrimitiveTypes.Timestamp}) + iceberg.NestedField{ID: 3, Name: "ts", Type: iceberg.PrimitiveTypes.Timestamp}, + ) + require.NoError(t, err) tests := []struct { field iceberg.PartitionField diff --git a/schema.go b/schema.go index e84a6063e..9ee05296a 100644 --- a/schema.go +++ b/schema.go @@ -60,26 +60,39 @@ func NewSchemaFromJsonFields(id int, jsonFieldsStr string) (*Schema, error) { return nil, fmt.Errorf("failed to parse schema JSON: %w", err) } - return NewSchema(id, fields...), nil + return NewSchema(id, fields...) } // NewSchema constructs a new schema with the provided ID // and list of fields. -func NewSchema(id int, fields ...NestedField) *Schema { +func NewSchema(id int, fields ...NestedField) (*Schema, error) { return NewSchemaWithIdentifiers(id, []int{}, fields...) } // NewSchemaWithIdentifiers constructs a new schema with the provided ID // and fields, along with a slice of field IDs to be listed as identifier // fields. -func NewSchemaWithIdentifiers(id int, identifierIDs []int, fields ...NestedField) *Schema { +func NewSchemaWithIdentifiers(id int, identifierIDs []int, fields ...NestedField) (*Schema, error) { s := &Schema{ID: id, fields: fields, IdentifierFieldIDs: identifierIDs} - s.init() + if err := s.init(); err != nil { + return nil, err + } + return s, nil +} + +// MustNewSchema is a helper that panics if NewSchema returns an error. +// It is intended for use in variable initializations where the schema +// is known to be valid at compile time. +func MustNewSchema(id int, fields ...NestedField) *Schema { + s, err := NewSchema(id, fields...) + if err != nil { + panic(err) + } return s } -func (s *Schema) init() { +func (s *Schema) init() error { s.lazyIDToParent = sync.OnceValues(func() (map[int]int, error) { return IndexParents(s) }) @@ -89,8 +102,10 @@ func (s *Schema) init() { // Validate that the schema does not contain duplicate field IDs. if _, err := IndexNameByID(s); err != nil { - panic(err) + return err } + + return nil } func (s *Schema) String() string { @@ -228,13 +243,15 @@ func (s *Schema) UnmarshalJSON(b []byte) error { return err } - s.init() - s.fields = aux.Fields if s.IdentifierFieldIDs == nil { s.IdentifierFieldIDs = []int{} } + if err := s.init(); err != nil { + return err + } + return nil } @@ -780,6 +797,7 @@ func IndexByName(schema *Schema) (map[string]int, error) { shortNameId: make(map[string]int), fieldNames: make([]string, 0), shortFieldNames: make([]string, 0), + idToName: make(map[int]string), } if _, err := Visit(schema, indexer); err != nil { return nil, err @@ -799,6 +817,7 @@ func IndexNameByID(schema *Schema) (map[int]string, error) { shortNameId: make(map[string]int), fieldNames: make([]string, 0), shortFieldNames: make([]string, 0), + idToName: make(map[int]string), } if _, err := Visit(schema, indexer); err != nil { return nil, err @@ -813,15 +832,12 @@ type indexByName struct { combinedIndex map[string]int fieldNames []string shortFieldNames []string + idToName map[int]string } func (i *indexByName) ByID() map[int]string { idToName := make(map[int]string) for name, id := range i.index { - if existingName, ok := idToName[id]; ok && existingName != name { - panic(fmt.Errorf("%w: multiple fields for id %d: %s and %s", - ErrInvalidSchema, id, existingName, name)) - } idToName[id] = name } @@ -847,6 +863,13 @@ func (i *indexByName) addField(name string, fieldID int) { ErrInvalidSchema, fullName, i.index[fullName], fieldID)) } + if existingName, ok := i.idToName[fieldID]; ok && existingName != fullName { + panic(fmt.Errorf("%w: Multiple entries with same key: %d=%s and %d=%s", + ErrInvalidSchema, fieldID, existingName, fieldID, fullName)) + } + + // Track the ID -> name mapping + i.idToName[fieldID] = fullName i.index[fullName] = fieldID if len(i.shortFieldNames) > 0 { shortName := strings.Join(i.shortFieldNames, ".") + "." + name @@ -1323,7 +1346,7 @@ func AssignFreshSchemaIDs(sc *Schema, nextID func() int) (*Schema, error) { } } - return NewSchemaWithIdentifiers(0, newIdentifierIDs, fields...), nil + return NewSchemaWithIdentifiers(0, newIdentifierIDs, fields...) } type SchemaWithPartnerVisitor[T, P any] interface { @@ -1538,7 +1561,7 @@ func SanitizeColumnNames(sc *Schema) (*Schema, error) { } return NewSchemaWithIdentifiers(sc.ID, sc.IdentifierFieldIDs, - result.Type.(*StructType).FieldList...), nil + result.Type.(*StructType).FieldList...) } type sanitizeColumnNameVisitor struct{} diff --git a/schema_test.go b/schema_test.go index 064cc260d..65f36e29a 100644 --- a/schema_test.go +++ b/schema_test.go @@ -30,7 +30,13 @@ import ( ) var ( - tableSchemaNested = iceberg.NewSchemaWithIdentifiers(1, + tableSchemaNested *iceberg.Schema + tableSchemaSimple *iceberg.Schema +) + +func init() { + var err error + tableSchemaNested, err = iceberg.NewSchemaWithIdentifiers(1, []int{1}, iceberg.NestedField{ ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: false, @@ -109,14 +115,20 @@ var ( Required: false, }, ) + if err != nil { + panic(fmt.Sprintf("failed to create tableSchemaNested: %v", err)) + } - tableSchemaSimple = iceberg.NewSchemaWithIdentifiers(1, + tableSchemaSimple, err = iceberg.NewSchemaWithIdentifiers(1, []int{2}, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool}, ) -) + if err != nil { + panic(fmt.Sprintf("failed to create tableSchemaSimple: %v", err)) + } +} func TestSchemaFromJson(t *testing.T) { testFieldsStr := `[ @@ -447,26 +459,32 @@ func TestPruneColumnsString(t *testing.T) { sc, err := iceberg.PruneColumns(tableSchemaNested, map[int]iceberg.Void{1: {}}, false) require.NoError(t, err) - assert.True(t, sc.Equals(iceberg.NewSchemaWithIdentifiers(1, []int{1}, - iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: false}))) + expected, err := iceberg.NewSchemaWithIdentifiers(1, []int{1}, + iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: false}) + require.NoError(t, err) + assert.True(t, sc.Equals(expected)) } func TestPruneColumnsStringFull(t *testing.T) { sc, err := iceberg.PruneColumns(tableSchemaNested, map[int]iceberg.Void{1: {}}, true) require.NoError(t, err) - assert.True(t, sc.Equals(iceberg.NewSchemaWithIdentifiers(1, []int{1}, - iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: false}))) + expected, err := iceberg.NewSchemaWithIdentifiers(1, []int{1}, + iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: false}) + require.NoError(t, err) + assert.True(t, sc.Equals(expected)) } func TestPruneColumnsList(t *testing.T) { sc, err := iceberg.PruneColumns(tableSchemaNested, map[int]iceberg.Void{5: {}}, false) require.NoError(t, err) - assert.True(t, sc.Equals(iceberg.NewSchema(1, + expected, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 4, Name: "qux", Required: true, Type: &iceberg.ListType{ ElementID: 5, Element: iceberg.PrimitiveTypes.String, ElementRequired: true, - }}))) + }}) + require.NoError(t, err) + assert.True(t, sc.Equals(expected)) } func TestPruneColumnsListItself(t *testing.T) { @@ -480,17 +498,19 @@ func TestPruneColumnsListFull(t *testing.T) { sc, err := iceberg.PruneColumns(tableSchemaNested, map[int]iceberg.Void{5: {}}, true) require.NoError(t, err) - assert.True(t, sc.Equals(iceberg.NewSchema(1, + expected, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 4, Name: "qux", Required: true, Type: &iceberg.ListType{ ElementID: 5, Element: iceberg.PrimitiveTypes.String, ElementRequired: true, - }}))) + }}) + require.NoError(t, err) + assert.True(t, sc.Equals(expected)) } func TestPruneColumnsMap(t *testing.T) { sc, err := iceberg.PruneColumns(tableSchemaNested, map[int]iceberg.Void{9: {}}, false) require.NoError(t, err) - assert.True(t, sc.Equals(iceberg.NewSchema(1, + expected, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 6, Name: "quux", @@ -508,7 +528,9 @@ func TestPruneColumnsMap(t *testing.T) { }, ValueRequired: true, }, - }))) + }) + require.NoError(t, err) + assert.True(t, sc.Equals(expected)) } func TestPruneColumnsMapItself(t *testing.T) { @@ -521,7 +543,7 @@ func TestPruneColumnsMapFull(t *testing.T) { sc, err := iceberg.PruneColumns(tableSchemaNested, map[int]iceberg.Void{9: {}}, true) require.NoError(t, err) - assert.True(t, sc.Equals(iceberg.NewSchema(1, + expected, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 6, Name: "quux", @@ -539,14 +561,16 @@ func TestPruneColumnsMapFull(t *testing.T) { }, ValueRequired: true, }, - }))) + }) + require.NoError(t, err) + assert.True(t, sc.Equals(expected)) } func TestPruneColumnsMapKey(t *testing.T) { sc, err := iceberg.PruneColumns(tableSchemaNested, map[int]iceberg.Void{10: {}}, false) require.NoError(t, err) - assert.True(t, sc.Equals(iceberg.NewSchema(1, + expected, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 6, Name: "quux", @@ -564,14 +588,16 @@ func TestPruneColumnsMapKey(t *testing.T) { }, ValueRequired: true, }, - }))) + }) + require.NoError(t, err) + assert.True(t, sc.Equals(expected)) } func TestPruneColumnsStruct(t *testing.T) { sc, err := iceberg.PruneColumns(tableSchemaNested, map[int]iceberg.Void{16: {}}, false) require.NoError(t, err) - assert.True(t, sc.Equals(iceberg.NewSchema(1, + expected, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 15, Name: "person", @@ -581,14 +607,16 @@ func TestPruneColumnsStruct(t *testing.T) { ID: 16, Name: "name", Type: iceberg.PrimitiveTypes.String, Required: false, }}, }, - }))) + }) + require.NoError(t, err) + assert.True(t, sc.Equals(expected)) } func TestPruneColumnsStructFull(t *testing.T) { sc, err := iceberg.PruneColumns(tableSchemaNested, map[int]iceberg.Void{16: {}}, true) require.NoError(t, err) - assert.True(t, sc.Equals(iceberg.NewSchema(1, + expected, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 15, Name: "person", @@ -598,39 +626,47 @@ func TestPruneColumnsStructFull(t *testing.T) { ID: 16, Name: "name", Type: iceberg.PrimitiveTypes.String, Required: false, }}, }, - }))) + }) + require.NoError(t, err) + assert.True(t, sc.Equals(expected)) } func TestPruneColumnsEmptyStruct(t *testing.T) { - schemaEmptyStruct := iceberg.NewSchema(0, iceberg.NestedField{ + schemaEmptyStruct, err := iceberg.NewSchema(0, iceberg.NestedField{ ID: 15, Name: "person", Type: &iceberg.StructType{}, Required: false, }) + require.NoError(t, err) sc, err := iceberg.PruneColumns(schemaEmptyStruct, map[int]iceberg.Void{15: {}}, false) require.NoError(t, err) - assert.True(t, sc.Equals(iceberg.NewSchema(0, + expected, err := iceberg.NewSchema(0, iceberg.NestedField{ ID: 15, Name: "person", Type: &iceberg.StructType{}, Required: false, - }))) + }) + require.NoError(t, err) + assert.True(t, sc.Equals(expected)) } func TestPruneColumnsEmptyStructFull(t *testing.T) { - schemaEmptyStruct := iceberg.NewSchema(0, iceberg.NestedField{ + schemaEmptyStruct, err := iceberg.NewSchema(0, iceberg.NestedField{ ID: 15, Name: "person", Type: &iceberg.StructType{}, Required: false, }) + require.NoError(t, err) sc, err := iceberg.PruneColumns(schemaEmptyStruct, map[int]iceberg.Void{15: {}}, true) require.NoError(t, err) - assert.True(t, sc.Equals(iceberg.NewSchema(0, + expected, err := iceberg.NewSchema(0, iceberg.NestedField{ ID: 15, Name: "person", Type: &iceberg.StructType{}, Required: false, - }))) + }) + require.NoError(t, err) + assert.True(t, sc.Equals(expected)) } func TestPruneColumnsStructInMap(t *testing.T) { - nestedSchema := iceberg.NewSchemaWithIdentifiers(1, []int{1}, + nestedSchema, err := iceberg.NewSchemaWithIdentifiers(1, []int{1}, iceberg.NestedField{ ID: 6, Name: "id_to_person", @@ -648,11 +684,12 @@ func TestPruneColumnsStructInMap(t *testing.T) { ValueRequired: true, }, }) + require.NoError(t, err) sc, err := iceberg.PruneColumns(nestedSchema, map[int]iceberg.Void{11: {}}, false) require.NoError(t, err) - expected := iceberg.NewSchema(1, + expected, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 6, Name: "id_to_person", @@ -669,12 +706,13 @@ func TestPruneColumnsStructInMap(t *testing.T) { ValueRequired: true, }, }) + require.NoError(t, err) assert.Truef(t, sc.Equals(expected), "expected: %s\ngot: %s", expected, sc) } func TestPruneColumnsStructInMapFull(t *testing.T) { - nestedSchema := iceberg.NewSchemaWithIdentifiers(1, []int{1}, + nestedSchema, err := iceberg.NewSchemaWithIdentifiers(1, []int{1}, iceberg.NestedField{ ID: 6, Name: "id_to_person", @@ -692,11 +730,12 @@ func TestPruneColumnsStructInMapFull(t *testing.T) { ValueRequired: true, }, }) + require.NoError(t, err) sc, err := iceberg.PruneColumns(nestedSchema, map[int]iceberg.Void{11: {}}, true) require.NoError(t, err) - expected := iceberg.NewSchema(1, + expected, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 6, Name: "id_to_person", @@ -713,6 +752,7 @@ func TestPruneColumnsStructInMapFull(t *testing.T) { ValueRequired: true, }, }) + require.NoError(t, err) assert.Truef(t, sc.Equals(expected), "expected: %s\ngot: %s", expected, sc) } @@ -910,7 +950,7 @@ func TestHighestFieldID(t *testing.T) { // TestHighestFieldIDListType tests that HighestFieldID correctly computes // the highest field ID in a schema that includes a ListType as the final field. func TestHighestFieldIDListType(t *testing.T) { - tableSchema := iceberg.NewSchemaWithIdentifiers(1, + tableSchema, err := iceberg.NewSchemaWithIdentifiers(1, []int{1}, iceberg.NestedField{ ID: 1, Name: "list_field", Type: &iceberg.ListType{ @@ -921,23 +961,24 @@ func TestHighestFieldIDListType(t *testing.T) { Required: true, }, ) + require.NoError(t, err) assert.Equal(t, 2, tableSchema.HighestFieldID()) } -// TestSchemaDuplicateFieldIDPanics tests that creating a schema with duplicate field IDs -// panics, matching the Java implementation behavior. +// TestSchemaDuplicateFieldIDReturnsError tests that creating a schema with duplicate field IDs +// returns an error, matching the Java implementation behavior. // This includes the exact scenario from GitHub issue #593. -func TestSchemaDuplicateFieldIDPanics(t *testing.T) { +func TestSchemaDuplicateFieldIDReturnsError(t *testing.T) { tests := []struct { name string - setupSchema func() + setupSchema func() (*iceberg.Schema, error) expectedID int expectedNames []string }{ { name: "nested struct fields with duplicate ID", - setupSchema: func() { - iceberg.NewSchema(0, + setupSchema: func() (*iceberg.Schema, error) { + return iceberg.NewSchema(0, iceberg.NestedField{ ID: 5, Name: "struct", @@ -956,8 +997,8 @@ func TestSchemaDuplicateFieldIDPanics(t *testing.T) { }, { name: "top-level fields with duplicate ID", - setupSchema: func() { - iceberg.NewSchema(0, + setupSchema: func() (*iceberg.Schema, error) { + return iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 1, Name: "bar", Type: iceberg.PrimitiveTypes.Int32}, ) @@ -967,20 +1008,20 @@ func TestSchemaDuplicateFieldIDPanics(t *testing.T) { }, { name: "duplicate ID from JSON deserialization", - setupSchema: func() { + setupSchema: func() (*iceberg.Schema, error) { testFieldsStr := `[ {"id":1,"name":"foo","type":"string","required":false}, {"id":1,"name":"bar","type":"int","required":true} ]` - _, _ = iceberg.NewSchemaFromJsonFields(1, testFieldsStr) + return iceberg.NewSchemaFromJsonFields(1, testFieldsStr) }, expectedID: 1, expectedNames: []string{"foo", "bar"}, }, { name: "nested struct with duplicate ID", - setupSchema: func() { - iceberg.NewSchema(0, + setupSchema: func() (*iceberg.Schema, error) { + return iceberg.NewSchema(0, iceberg.NestedField{ ID: 5, Name: "person", @@ -1000,26 +1041,281 @@ func TestSchemaDuplicateFieldIDPanics(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - panicked := false - var panicVal interface{} - func() { - defer func() { - if r := recover(); r != nil { - panicked = true - panicVal = r - } - }() - tt.setupSchema() - }() - - assert.True(t, panicked, "expected panic but none occurred") - err, ok := panicVal.(error) - require.True(t, ok, "panic value should be an error") + _, err := tt.setupSchema() + require.Error(t, err) assert.ErrorIs(t, err, iceberg.ErrInvalidSchema) - assert.Contains(t, err.Error(), fmt.Sprintf("multiple fields for id %d", tt.expectedID)) + assert.Contains(t, err.Error(), fmt.Sprintf("Multiple entries with same key: %d=", tt.expectedID)) for _, expectedName := range tt.expectedNames { assert.Contains(t, err.Error(), expectedName) } }) } } + +// TestSchemaValidSchemasNoError tests that valid schemas with unique IDs don't return errors +func TestSchemaValidSchemasNoError(t *testing.T) { + tests := []struct { + name string + schema func() (*iceberg.Schema, error) + }{ + { + name: "simple schema with unique IDs", + schema: func() (*iceberg.Schema, error) { + return iceberg.NewSchema(0, + iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, + iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32}, + iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool}, + ) + }, + }, + { + name: "nested struct with unique IDs", + schema: func() (*iceberg.Schema, error) { + return iceberg.NewSchema(0, + iceberg.NestedField{ + ID: 1, + Name: "struct", + Type: &iceberg.StructType{ + FieldList: []iceberg.NestedField{ + {ID: 2, Name: "inner1", Type: iceberg.PrimitiveTypes.String}, + {ID: 3, Name: "inner2", Type: iceberg.PrimitiveTypes.Int32}, + }, + }, + }, + ) + }, + }, + { + name: "list with unique element ID", + schema: func() (*iceberg.Schema, error) { + return iceberg.NewSchema(0, + iceberg.NestedField{ + ID: 1, + Name: "list_field", + Type: &iceberg.ListType{ + ElementID: 2, + Element: iceberg.PrimitiveTypes.String, + ElementRequired: true, + }, + }, + ) + }, + }, + { + name: "map with unique key and value IDs", + schema: func() (*iceberg.Schema, error) { + return iceberg.NewSchema(0, + iceberg.NestedField{ + ID: 1, + Name: "map_field", + Type: &iceberg.MapType{ + KeyID: 2, + KeyType: iceberg.PrimitiveTypes.String, + ValueID: 3, + ValueType: iceberg.PrimitiveTypes.Int32, + ValueRequired: true, + }, + }, + ) + }, + }, + { + name: "complex nested structure with unique IDs", + schema: func() (*iceberg.Schema, error) { + return iceberg.NewSchema(0, + iceberg.NestedField{ + ID: 1, + Name: "outer", + Type: &iceberg.StructType{ + FieldList: []iceberg.NestedField{ + { + ID: 2, + Name: "nested_list", + Type: &iceberg.ListType{ + ElementID: 3, + Element: iceberg.PrimitiveTypes.String, + ElementRequired: true, + }, + }, + { + ID: 4, + Name: "nested_map", + Type: &iceberg.MapType{ + KeyID: 5, + KeyType: iceberg.PrimitiveTypes.String, + ValueID: 6, + ValueType: iceberg.PrimitiveTypes.Int32, + ValueRequired: true, + }, + }, + }, + }, + }, + ) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + schema, err := tt.schema() + require.NoError(t, err) + assert.NotNil(t, schema) + }) + } +} + +// TestSchemaDuplicateIDInListElement tests duplicate IDs in list elements +func TestSchemaDuplicateIDInListElement(t *testing.T) { + _, err := iceberg.NewSchema(0, + iceberg.NestedField{ + ID: 1, + Name: "list1", + Type: &iceberg.ListType{ + ElementID: 5, + Element: iceberg.PrimitiveTypes.String, + ElementRequired: true, + }, + }, + iceberg.NestedField{ + ID: 2, + Name: "list2", + Type: &iceberg.ListType{ + ElementID: 5, // Duplicate ID + Element: iceberg.PrimitiveTypes.Int32, + ElementRequired: true, + }, + }, + ) + require.Error(t, err) + assert.ErrorIs(t, err, iceberg.ErrInvalidSchema) + assert.Contains(t, err.Error(), "Multiple entries with same key: 5=") +} + +// TestSchemaDuplicateIDInMapKey tests duplicate IDs in map keys +func TestSchemaDuplicateIDInMapKey(t *testing.T) { + _, err := iceberg.NewSchema(0, + iceberg.NestedField{ + ID: 1, + Name: "map1", + Type: &iceberg.MapType{ + KeyID: 5, + KeyType: iceberg.PrimitiveTypes.String, + ValueID: 6, + ValueType: iceberg.PrimitiveTypes.Int32, + ValueRequired: true, + }, + }, + iceberg.NestedField{ + ID: 2, + Name: "map2", + Type: &iceberg.MapType{ + KeyID: 5, // Duplicate ID + KeyType: iceberg.PrimitiveTypes.String, + ValueID: 7, + ValueType: iceberg.PrimitiveTypes.Bool, + ValueRequired: true, + }, + }, + ) + require.Error(t, err) + assert.ErrorIs(t, err, iceberg.ErrInvalidSchema) + assert.Contains(t, err.Error(), "Multiple entries with same key: 5=") +} + +// TestSchemaDuplicateIDInMapValue tests duplicate IDs in map values +func TestSchemaDuplicateIDInMapValue(t *testing.T) { + _, err := iceberg.NewSchema(0, + iceberg.NestedField{ + ID: 1, + Name: "map1", + Type: &iceberg.MapType{ + KeyID: 5, + KeyType: iceberg.PrimitiveTypes.String, + ValueID: 6, + ValueType: iceberg.PrimitiveTypes.Int32, + ValueRequired: true, + }, + }, + iceberg.NestedField{ + ID: 2, + Name: "map2", + Type: &iceberg.MapType{ + KeyID: 7, + KeyType: iceberg.PrimitiveTypes.String, + ValueID: 6, // Duplicate ID + ValueType: iceberg.PrimitiveTypes.Bool, + ValueRequired: true, + }, + }, + ) + require.Error(t, err) + assert.ErrorIs(t, err, iceberg.ErrInvalidSchema) + assert.Contains(t, err.Error(), "Multiple entries with same key: 6=") +} + +// TestSchemaDuplicateIDAcrossNestingLevels tests duplicate IDs across different nesting levels +func TestSchemaDuplicateIDAcrossNestingLevels(t *testing.T) { + _, err := iceberg.NewSchema(0, + iceberg.NestedField{ + ID: 1, + Name: "top_level", + Type: iceberg.PrimitiveTypes.String, + }, + iceberg.NestedField{ + ID: 2, + Name: "nested", + Type: &iceberg.StructType{ + FieldList: []iceberg.NestedField{ + {ID: 1, Name: "inner", Type: iceberg.PrimitiveTypes.Int32}, // Duplicate of top-level ID + }, + }, + }, + ) + require.Error(t, err) + assert.ErrorIs(t, err, iceberg.ErrInvalidSchema) + assert.Contains(t, err.Error(), "Multiple entries with same key: 1=") +} + +// TestSchemaErrorPropagation tests that errors are properly propagated from constructors +func TestSchemaErrorPropagation(t *testing.T) { + // Test NewSchema error propagation + _, err := iceberg.NewSchema(0, + iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, + iceberg.NestedField{ID: 1, Name: "bar", Type: iceberg.PrimitiveTypes.Int32}, + ) + require.Error(t, err) + assert.ErrorIs(t, err, iceberg.ErrInvalidSchema) + + // Test NewSchemaWithIdentifiers error propagation + _, err = iceberg.NewSchemaWithIdentifiers(0, []int{1}, + iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, + iceberg.NestedField{ID: 1, Name: "bar", Type: iceberg.PrimitiveTypes.Int32}, + ) + require.Error(t, err) + assert.ErrorIs(t, err, iceberg.ErrInvalidSchema) + + // Test NewSchemaFromJsonFields error propagation + _, err = iceberg.NewSchemaFromJsonFields(1, `[ + {"id":1,"name":"foo","type":"string","required":false}, + {"id":1,"name":"bar","type":"int","required":true} + ]`) + require.Error(t, err) + assert.ErrorIs(t, err, iceberg.ErrInvalidSchema) +} + +// TestSchemaErrorMessageFormat tests that error messages match Java format exactly +func TestSchemaErrorMessageFormat(t *testing.T) { + _, err := iceberg.NewSchema(0, + iceberg.NestedField{ID: 5, Name: "field1", Type: iceberg.PrimitiveTypes.String}, + iceberg.NestedField{ID: 5, Name: "field2", Type: iceberg.PrimitiveTypes.Int32}, + ) + require.Error(t, err) + + // Check for Java-compatible format: "Multiple entries with same key: ID=name1 and ID=name2" + errMsg := err.Error() + assert.Contains(t, errMsg, "Multiple entries with same key") + assert.Contains(t, errMsg, "5=field1") + assert.Contains(t, errMsg, "5=field2") + assert.Contains(t, errMsg, " and ") +} diff --git a/table/arrow_utils.go b/table/arrow_utils.go index 0dffa74a9..7ff518646 100644 --- a/table/arrow_utils.go +++ b/table/arrow_utils.go @@ -405,7 +405,11 @@ func ArrowSchemaToIceberg(sc *arrow.Schema, downcastNsTimestamp bool, nameMappin return nil, err } - return iceberg.NewSchema(0, out.Type.(*iceberg.StructType).FieldList...), nil + schema, err := iceberg.NewSchema(0, out.Type.(*iceberg.StructType).FieldList...) + if err != nil { + return nil, err + } + return schema, nil case nameMapping != nil: schemaWithoutIDs, err := arrowToSchemaWithoutIDs(sc, downcastNsTimestamp) if err != nil { @@ -437,7 +441,10 @@ func arrowToSchemaWithoutIDs(sc *arrow.Schema, downcastNsTimestamp bool) (*icebe return nil, err } - schemaWithoutIDs := iceberg.NewSchema(0, withoutIDs.Type.(*iceberg.StructType).FieldList...) + schemaWithoutIDs, err := iceberg.NewSchema(0, withoutIDs.Type.(*iceberg.StructType).FieldList...) + if err != nil { + return nil, err + } return schemaWithoutIDs, nil } @@ -644,7 +651,11 @@ func SchemaToArrowSchema(sc *iceberg.Schema, metadata map[string]string, include // For dealing with nested fields (List, Struct, Map) if includeFieldIDs is true, then // the child fields will contain a metadata key PARQUET:field_id set to the field id. func TypeToArrowType(t iceberg.Type, includeFieldIDs bool, useLargeTypes bool) (arrow.DataType, error) { - top, err := iceberg.Visit(iceberg.NewSchema(0, iceberg.NestedField{Type: t}), + tempSchema, err := iceberg.NewSchema(0, iceberg.NestedField{Type: t}) + if err != nil { + return nil, err + } + top, err := iceberg.Visit(tempSchema, convertToArrow{includeFieldIDs: includeFieldIDs, useLargeTypes: useLargeTypes}) if err != nil { return nil, err diff --git a/table/arrow_utils_internal_test.go b/table/arrow_utils_internal_test.go index d32298225..a02cbd13a 100644 --- a/table/arrow_utils_internal_test.go +++ b/table/arrow_utils_internal_test.go @@ -20,6 +20,7 @@ package table import ( "bytes" "cmp" + "fmt" "math" "slices" "testing" @@ -358,7 +359,11 @@ func TestFileMetrics(t *testing.T) { suite.Run(t, new(FileStatsMetricsSuite)) } -var tableSchemaNested = iceberg.NewSchemaWithIdentifiers(1, +var tableSchemaNested *iceberg.Schema + +func init() { + var err error + tableSchemaNested, err = iceberg.NewSchemaWithIdentifiers(1, []int{1}, iceberg.NestedField{ ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: false, @@ -414,7 +419,11 @@ var tableSchemaNested = iceberg.NewSchemaWithIdentifiers(1, }, Required: false, }, -) + ) + if err != nil { + panic(fmt.Sprintf("failed to create tableSchemaNested: %v", err)) + } +} func TestStatsTypes(t *testing.T) { statsCols, err := iceberg.PreOrderVisit(tableSchemaNested, diff --git a/table/evaluators.go b/table/evaluators.go index fd751632d..c6731eec9 100644 --- a/table/evaluators.go +++ b/table/evaluators.go @@ -38,7 +38,10 @@ const ( // the stats provided in the partitions (UpperBound/LowerBound/ContainsNull/ContainsNaN). func newManifestEvaluator(spec iceberg.PartitionSpec, schema *iceberg.Schema, partitionFilter iceberg.BooleanExpression, caseSensitive bool) (func(iceberg.ManifestFile) (bool, error), error) { partType := spec.PartitionType(schema) - partSchema := iceberg.NewSchema(0, partType.FieldList...) + partSchema, err := iceberg.NewSchema(0, partType.FieldList...) + if err != nil { + return nil, fmt.Errorf("failed to create partition schema: %w", err) + } filter, err := iceberg.RewriteNotExpr(partitionFilter) if err != nil { return nil, err diff --git a/table/evaluators_test.go b/table/evaluators_test.go index cec68b430..540960def 100644 --- a/table/evaluators_test.go +++ b/table/evaluators_test.go @@ -18,6 +18,7 @@ package table import ( + "fmt" "math" "testing" @@ -31,17 +32,11 @@ const ( IntMinValue, IntMaxValue int32 = 30, 79 ) -func TestManifestEvaluator(t *testing.T) { - var ( - IntMin, IntMax = []byte{byte(IntMinValue), 0x00, 0x00, 0x00}, []byte{byte(IntMaxValue), 0x00, 0x00, 0x00} - StringMin, StringMax = []byte("a"), []byte("z") - FloatMin, _ = iceberg.Float32Literal(0).MarshalBinary() - FloatMax, _ = iceberg.Float32Literal(20).MarshalBinary() - DblMin, _ = iceberg.Float64Literal(0).MarshalBinary() - DblMax, _ = iceberg.Float64Literal(20).MarshalBinary() - NanTrue, NanFalse = true, false +var testSchema *iceberg.Schema - testSchema = iceberg.NewSchema(1, +func init() { + var err error + testSchema, err = iceberg.NewSchema(1, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, @@ -95,6 +90,20 @@ func TestManifestEvaluator(t *testing.T) { Type: iceberg.PrimitiveTypes.Binary, Required: false, }, ) + if err != nil { + panic(fmt.Sprintf("failed to create testSchema: %v", err)) + } + } + +func TestManifestEvaluator(t *testing.T) { + var ( + IntMin, IntMax = []byte{byte(IntMinValue), 0x00, 0x00, 0x00}, []byte{byte(IntMaxValue), 0x00, 0x00, 0x00} + StringMin, StringMax = []byte("a"), []byte("z") + FloatMin, _ = iceberg.Float32Literal(0).MarshalBinary() + FloatMax, _ = iceberg.Float32Literal(20).MarshalBinary() + DblMin, _ = iceberg.Float64Literal(0).MarshalBinary() + DblMax, _ = iceberg.Float64Literal(20).MarshalBinary() + NanTrue, NanFalse = true, false ) partFields := make([]iceberg.PartitionField, 0, testSchema.NumFields()) @@ -720,12 +729,16 @@ type ProjectionTestSuite struct { } func (*ProjectionTestSuite) schema() *iceberg.Schema { - return iceberg.NewSchema(0, + sch, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int64}, iceberg.NestedField{ID: 2, Name: "data", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 3, Name: "event_date", Type: iceberg.PrimitiveTypes.Date}, iceberg.NestedField{ID: 4, Name: "event_ts", Type: iceberg.PrimitiveTypes.Timestamp}, ) + if err != nil { + panic(fmt.Sprintf("failed to create schema: %v", err)) + } + return sch } func (*ProjectionTestSuite) emptySpec() iceberg.PartitionSpec { @@ -1119,7 +1132,8 @@ type InclusiveMetricsTestSuite struct { } func (suite *InclusiveMetricsTestSuite) SetupSuite() { - suite.schemaDataFile = iceberg.NewSchema(0, + var err error + suite.schemaDataFile, err = iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 2, Name: "no_stats", Type: iceberg.PrimitiveTypes.Int32, Required: false}, iceberg.NestedField{ID: 3, Name: "required", Type: iceberg.PrimitiveTypes.String, Required: true}, @@ -1135,6 +1149,7 @@ func (suite *InclusiveMetricsTestSuite) SetupSuite() { iceberg.NestedField{ID: 13, Name: "no_nan_stats", Type: iceberg.PrimitiveTypes.Float64}, iceberg.NestedField{ID: 14, Name: "some_empty", Type: iceberg.PrimitiveTypes.String}, ) + suite.Require().NoError(err) var ( IntMin, _ = iceberg.Int32Literal(IntMinValue).MarshalBinary() @@ -1380,8 +1395,9 @@ func (suite *InclusiveMetricsTestSuite) TestMissingColumn() { } func (suite *InclusiveMetricsTestSuite) TestMissingStats() { - noStatsSchema := iceberg.NewSchema(0, + noStatsSchema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 2, Name: "no_stats", Type: iceberg.PrimitiveTypes.Float64}) + suite.Require().NoError(err) noStatsFile := &mockDataFile{ path: "file_1.parquet", @@ -2310,8 +2326,9 @@ func (suite *StrictMetricsTestSuite) TestMissingColumn() { } func (suite *StrictMetricsTestSuite) TestMissingStats() { - noStatsSchema := iceberg.NewSchema(0, + noStatsSchema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 2, Name: "no_stats", Type: iceberg.PrimitiveTypes.Float64}) + suite.Require().NoError(err) noStatsFile := &mockDataFile{ path: "file_1.parquet", diff --git a/table/metadata_builder_internal_test.go b/table/metadata_builder_internal_test.go index 8fe671235..6680db679 100644 --- a/table/metadata_builder_internal_test.go +++ b/table/metadata_builder_internal_test.go @@ -28,12 +28,16 @@ import ( ) func schema() iceberg.Schema { - return *iceberg.NewSchema( + sch, err := iceberg.NewSchema( 0, iceberg.NestedField{ID: 1, Name: "x", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 2, Name: "y", Type: iceberg.PrimitiveTypes.Int64, Required: true, Doc: "comment"}, iceberg.NestedField{ID: 3, Name: "z", Type: iceberg.PrimitiveTypes.Int64, Required: true}, ) + if err != nil { + panic(fmt.Sprintf("failed to create schema: %v", err)) + } + return *sch } func sortOrder() SortOrder { @@ -151,7 +155,7 @@ func TestBuildUnpartitionedUnsorted(t *testing.T) { func TestReassignIds(t *testing.T) { TestLocation := "file:///tmp/iceberg-test" - schema := iceberg.NewSchema(10, iceberg.NestedField{ + schema, err := iceberg.NewSchema(10, iceberg.NestedField{ ID: 11, Name: "a", Type: iceberg.PrimitiveTypes.Int64, @@ -208,7 +212,7 @@ func TestReassignIds(t *testing.T) { require.NoError(t, err) require.NotNil(t, meta) - expectedSchema := iceberg.NewSchema(0, iceberg.NestedField{ + expectedSchema, err := iceberg.NewSchema(0, iceberg.NestedField{ ID: 1, Name: "a", Type: iceberg.PrimitiveTypes.Int64, @@ -889,8 +893,9 @@ func TestConstructDefaultMainBranch(t *testing.T) { func TestAddIncompatibleCurrentSchemaFails(t *testing.T) { builder := builderWithoutChanges(2) - addedSchema := iceberg.NewSchema(1) - err := builder.AddSchema(addedSchema) + addedSchema, err := iceberg.NewSchema(1) + require.NoError(t, err) + err = builder.AddSchema(addedSchema) require.NoError(t, err) err = builder.SetCurrentSchemaID(1) require.NoError(t, err) @@ -944,10 +949,11 @@ func TestRemoveSchemas(t *testing.T) { // Java: TestTableMetadata.testUpdateSchema func TestUpdateSchema(t *testing.T) { // Test schema updates and evolution - schema1 := iceberg.NewSchema( + schema1, err := iceberg.NewSchema( 0, iceberg.NestedField{ID: 1, Name: "y", Type: iceberg.PrimitiveTypes.Int64, Required: true, Doc: "comment"}, ) + require.NoError(t, err) meta, err := NewMetadata( schema1, @@ -963,11 +969,12 @@ func TestUpdateSchema(t *testing.T) { require.Equal(t, 1, meta.LastColumnID()) // Update schema by adding a field - schema2 := iceberg.NewSchema( + schema2, err := iceberg.NewSchema( 1, iceberg.NestedField{ID: 1, Name: "y", Type: iceberg.PrimitiveTypes.Int64, Required: true, Doc: "comment"}, iceberg.NestedField{ID: 2, Name: "x", Type: iceberg.PrimitiveTypes.String, Required: true}, ) + require.NoError(t, err) builder, err := MetadataBuilderFromBase(meta, "") require.NoError(t, err) @@ -1053,12 +1060,13 @@ func TestRemoveReservedPropertiesFails(t *testing.T) { func TestIdsAreReassignedForNewMetadata(t *testing.T) { // Create schema with ID 10 (should be reassigned to 0) - tableSchema := iceberg.NewSchema( + tableSchema, err := iceberg.NewSchema( 10, iceberg.NestedField{ID: 1, Name: "x", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 2, Name: "y", Type: iceberg.PrimitiveTypes.Int64, Required: true, Doc: "comment"}, iceberg.NestedField{ID: 3, Name: "z", Type: iceberg.PrimitiveTypes.Int64, Required: true}, ) + require.NoError(t, err) partitionSpec := partitionSpec() sortOrder := sortOrder() @@ -1131,7 +1139,9 @@ func TestNewMetadataChanges(t *testing.T) { } func TestNewMetadataChangesUnpartitionedUnsorted(t *testing.T) { - tableSchema := *iceberg.NewSchema(0) + tableSchemaPtr, err := iceberg.NewSchema(0) + require.NoError(t, err) + tableSchema := *tableSchemaPtr partitionSpec := *iceberg.UnpartitionedSpec sortOrder := UnsortedSortOrder @@ -1180,13 +1190,14 @@ func TestNewMetadataChangesUnpartitionedUnsorted(t *testing.T) { func TestSetCurrentSchemaChangeIsMinusOneIfSchemaWasAddedInThisChange(t *testing.T) { builder := builderWithoutChanges(2) - addedSchema := iceberg.NewSchema( + addedSchema, err := iceberg.NewSchema( 1, iceberg.NestedField{ID: 1, Name: "x", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 2, Name: "y", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 3, Name: "z", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 4, Name: "a", Type: iceberg.PrimitiveTypes.Int64, Required: true}, ) + require.NoError(t, err) err := builder.AddSchema(addedSchema) require.NoError(t, err) @@ -1542,7 +1553,7 @@ func TestAddSnapshotV3AcceptsFirstRowIDEqualToNextRowID(t *testing.T) { } func generateTypeSchema(typ iceberg.Type) *iceberg.Schema { - sc := iceberg.NewSchema(0, + sc, err := iceberg.NewSchema(0, iceberg.NestedField{ Type: iceberg.Int64Type{}, ID: 1, Name: "id", Required: true, }, @@ -1589,107 +1600,121 @@ func generateTypeSchema(typ iceberg.Type) *iceberg.Schema { Required: true, }, ) + if err != nil { + panic(fmt.Sprintf("failed to create generateTypeSchema: %v", err)) + } return sc } func TestUnknownTypeValidation(t *testing.T) { t.Run("ValidUnknownTypeInFields", func(t *testing.T) { - validSchema := iceberg.NewSchema(1, + validSchema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.Int64Type{}, Required: true}, iceberg.NestedField{ID: 2, Name: "unknown_field", Type: iceberg.UnknownType{}, Required: false}, ) + require.NoError(t, err) err := checkSchemaCompatibility(validSchema, 3) require.NoError(t, err, "Valid unknown type schema should pass validation") }) t.Run("InvalidRequiredUnknownType", func(t *testing.T) { - invalidSchema := iceberg.NewSchema(1, + invalidSchema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "invalid_unknown", Type: iceberg.UnknownType{}, Required: true}, ) + require.NoError(t, err) err := checkSchemaCompatibility(invalidSchema, 3) require.Error(t, err, "should error when unknown type is required") require.ErrorContains(t, err, "must be optional") }) t.Run("InvalidInitialDefaultUnknownType", func(t *testing.T) { - invalidSchema := iceberg.NewSchema(1, + invalidSchema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "invalid_unknown", Type: iceberg.UnknownType{}, Required: false, InitialDefault: "invalid"}, ) + require.NoError(t, err) err := checkSchemaCompatibility(invalidSchema, 3) require.Error(t, err, "should error when unknown type has non-null initial-default") require.ErrorContains(t, err, "must have null initial-default") }) t.Run("InvalidWriteDefaultUnknownType", func(t *testing.T) { - invalidSchema := iceberg.NewSchema(1, + invalidSchema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "invalid_unknown", Type: iceberg.UnknownType{}, Required: false, WriteDefault: "invalid"}, ) + require.NoError(t, err) err := checkSchemaCompatibility(invalidSchema, 3) require.Error(t, err, "should error when unknown type has non-null write-default") require.ErrorContains(t, err, "must have null write-default") }) t.Run("ValidNestedUnknownType", func(t *testing.T) { - validSchema := iceberg.NewSchema(1, + validSchema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.Int64Type{}, Required: true}, iceberg.NestedField{ID: 2, Name: "nested", Type: &iceberg.StructType{FieldList: []iceberg.NestedField{ {ID: 3, Name: "unknown_field", Type: iceberg.UnknownType{}, Required: false}, }}, Required: false}, ) + require.NoError(t, err) err := checkSchemaCompatibility(validSchema, 3) require.NoError(t, err, "Valid nested unknown type schema should pass validation") }) t.Run("InvalidRequiredNestedUnknownType", func(t *testing.T) { - invalidSchema := iceberg.NewSchema(1, + invalidSchema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 2, Name: "nested", Type: &iceberg.StructType{FieldList: []iceberg.NestedField{ {ID: 3, Name: "unknown_field", Type: iceberg.UnknownType{}, Required: true}, }}, Required: false}, ) + require.NoError(t, err) err := checkSchemaCompatibility(invalidSchema, 3) require.Error(t, err, "should error when unknown type is required") require.ErrorContains(t, err, "must be optional") }) t.Run("ValidListWithUnknownType", func(t *testing.T) { - validSchema := iceberg.NewSchema(1, + validSchema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.Int64Type{}, Required: true}, iceberg.NestedField{ID: 2, Name: "list", Type: &iceberg.ListType{ElementID: 3, Element: iceberg.UnknownType{}, ElementRequired: false}, Required: false}, ) + require.NoError(t, err) err := checkSchemaCompatibility(validSchema, 3) require.NoError(t, err, "Valid list with unknown type schema should pass validation") }) t.Run("InvalidRequiredListElement", func(t *testing.T) { - invalidSchema := iceberg.NewSchema(1, + invalidSchema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.Int64Type{}, Required: true}, iceberg.NestedField{ID: 2, Name: "list", Type: &iceberg.ListType{ElementID: 3, Element: iceberg.UnknownType{}, ElementRequired: true}, Required: false}, ) + require.NoError(t, err) err := checkSchemaCompatibility(invalidSchema, 3) require.Error(t, err, "should error when unknown type is required") require.ErrorContains(t, err, "must be optional") }) t.Run("ValidMapWithUnknownType", func(t *testing.T) { - validSchema := iceberg.NewSchema(1, + validSchema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.Int64Type{}, Required: true}, iceberg.NestedField{ID: 2, Name: "map", Type: &iceberg.MapType{KeyID: 3, KeyType: iceberg.StringType{}, ValueID: 4, ValueType: iceberg.UnknownType{}, ValueRequired: false}, Required: false}, ) + require.NoError(t, err) err := checkSchemaCompatibility(validSchema, 3) require.NoError(t, err, "Valid map with unknown type schema should pass validation") }) t.Run("InvalidUnknownMapKey", func(t *testing.T) { - invalidSchema := iceberg.NewSchema(1, + invalidSchema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 2, Name: "invalid_map", Type: &iceberg.MapType{KeyID: 3, KeyType: iceberg.UnknownType{}, ValueID: 4, ValueType: iceberg.StringType{}, ValueRequired: false}, Required: true}, ) + require.NoError(t, err) err := checkSchemaCompatibility(invalidSchema, 3) require.Error(t, err, "should error when unknown type is used as map key") require.ErrorContains(t, err, "must be optional") }) t.Run("InvalidUnknownMapValue", func(t *testing.T) { - invalidSchema := iceberg.NewSchema(1, + invalidSchema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 2, Name: "invalid_map", Type: &iceberg.MapType{KeyID: 3, KeyType: iceberg.StringType{}, ValueID: 4, ValueType: iceberg.UnknownType{}, ValueRequired: true}, Required: false}, ) + require.NoError(t, err) err := checkSchemaCompatibility(invalidSchema, 3) require.Error(t, err, "should error when unknown type is used as map value") require.ErrorContains(t, err, "must be optional") diff --git a/table/partitioned_throughput_bench_test.go b/table/partitioned_throughput_bench_test.go index 2ba1a98d4..f87ae4f80 100644 --- a/table/partitioned_throughput_bench_test.go +++ b/table/partitioned_throughput_bench_test.go @@ -126,7 +126,7 @@ func BenchmarkPartitionedWriteThroughput_Simple(b *testing.B) { mem := memory.DefaultAllocator // Define Iceberg schema with only primitive types - icebergSchema := iceberg.NewSchema(0, + icebergSchema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 2, Name: "ts", Type: iceberg.PrimitiveTypes.TimestampTz, Required: true}, iceberg.NestedField{ID: 3, Name: "host", Type: iceberg.PrimitiveTypes.String, Required: true}, @@ -134,6 +134,9 @@ func BenchmarkPartitionedWriteThroughput_Simple(b *testing.B) { iceberg.NestedField{ID: 5, Name: "bytes_sent", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 6, Name: "user_agent", Type: iceberg.PrimitiveTypes.String, Required: true}, ) + if err != nil { + b.Fatalf("failed to create schema: %v", err) + } // Define Arrow schema (must match Iceberg schema types) arrSchema := arrow.NewSchema([]arrow.Field{ @@ -193,7 +196,7 @@ func BenchmarkPartitionedWriteThroughput_ListPrimitive(b *testing.B) { mem := memory.DefaultAllocator // Define Iceberg schema with list - icebergSchema := iceberg.NewSchema(0, + icebergSchema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 2, Name: "ts", Type: iceberg.PrimitiveTypes.TimestampTz, Required: true}, iceberg.NestedField{ID: 3, Name: "host", Type: iceberg.PrimitiveTypes.String, Required: true}, @@ -206,6 +209,9 @@ func BenchmarkPartitionedWriteThroughput_ListPrimitive(b *testing.B) { ElementRequired: true, }, Required: false}, ) + if err != nil { + b.Fatalf("failed to create schema: %v", err) + } // Define Arrow schema arrSchema := arrow.NewSchema([]arrow.Field{ @@ -297,7 +303,7 @@ func BenchmarkPartitionedWriteThroughput_ListStruct(b *testing.B) { } // Define Iceberg schema with complex nested types - icebergSchema := iceberg.NewSchema(0, + icebergSchema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 2, Name: "ts", Type: iceberg.PrimitiveTypes.TimestampTz, Required: true}, iceberg.NestedField{ID: 3, Name: "host", Type: iceberg.PrimitiveTypes.String, Required: true}, @@ -306,6 +312,9 @@ func BenchmarkPartitionedWriteThroughput_ListStruct(b *testing.B) { iceberg.NestedField{ID: 6, Name: "user_agent", Type: iceberg.PrimitiveTypes.String, Required: true}, iceberg.NestedField{ID: 7, Name: "resources", Type: resourcesListType, Required: false}, ) + if err != nil { + b.Fatalf("failed to create schema: %v", err) + } // Define Arrow schema arrSchema := arrow.NewSchema([]arrow.Field{ diff --git a/table/scanner.go b/table/scanner.go index 8dfc238e1..c8ddddcb5 100644 --- a/table/scanner.go +++ b/table/scanner.go @@ -253,7 +253,10 @@ func (scan *Scan) buildManifestEvaluator(specID int) (func(iceberg.ManifestFile) func (scan *Scan) buildPartitionEvaluator(specID int) func(iceberg.DataFile) (bool, error) { spec := scan.metadata.PartitionSpecs()[specID] partType := spec.PartitionType(scan.metadata.CurrentSchema()) - partSchema := iceberg.NewSchema(0, partType.FieldList...) + partSchema, err := iceberg.NewSchema(0, partType.FieldList...) + if err != nil { + return nil, fmt.Errorf("failed to create partition schema: %w", err) + } partExpr := scan.partitionFilters.Get(specID) return func(d iceberg.DataFile) (bool, error) { diff --git a/table/snapshots_internal_test.go b/table/snapshots_internal_test.go index 796642f93..77797b36b 100644 --- a/table/snapshots_internal_test.go +++ b/table/snapshots_internal_test.go @@ -18,6 +18,7 @@ package table import ( + "fmt" "testing" "github.com/apache/iceberg-go" @@ -25,12 +26,20 @@ import ( "github.com/stretchr/testify/require" ) -var tableSchemaSimple = iceberg.NewSchemaWithIdentifiers(1, - []int{2}, - iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, - iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true}, - iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool}, -) +var tableSchemaSimple *iceberg.Schema + +func init() { + var err error + tableSchemaSimple, err = iceberg.NewSchemaWithIdentifiers(1, + []int{2}, + iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, + iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true}, + iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool}, + ) + if err != nil { + panic(fmt.Sprintf("failed to create tableSchemaSimple: %v", err)) + } +} func TestSnapshotSummaryCollector(t *testing.T) { var ssc SnapshotSummaryCollector @@ -53,11 +62,12 @@ func TestSnapshotSummaryCollectorWithPartition(t *testing.T) { var ssc SnapshotSummaryCollector assert.Equal(t, iceberg.Properties{}, ssc.build()) - sc := iceberg.NewSchema(0, + sc, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "bool_field", Type: iceberg.PrimitiveTypes.Bool}, iceberg.NestedField{ID: 2, Name: "string_field", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 3, Name: "int_field", Type: iceberg.PrimitiveTypes.Int32}, ) + require.NoError(t, err) spec := iceberg.NewPartitionSpec(iceberg.PartitionField{ Name: "int_field", SourceID: 3, FieldID: 1001, Transform: iceberg.IdentityTransform{}, }) diff --git a/table/table_test.go b/table/table_test.go index cf5ae1baf..4b565ab8d 100644 --- a/table/table_test.go +++ b/table/table_test.go @@ -153,11 +153,13 @@ func (t *TableTestSuite) TestNewTableFromReadFileGzipped() { } func (t *TableTestSuite) TestSchema() { - t.True(t.tbl.Schema().Equals(iceberg.NewSchemaWithIdentifiers(1, []int{1, 2}, + expectedSchema, err := iceberg.NewSchemaWithIdentifiers(1, []int{1, 2}, iceberg.NestedField{ID: 1, Name: "x", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 2, Name: "y", Type: iceberg.PrimitiveTypes.Int64, Required: true, Doc: "comment"}, iceberg.NestedField{ID: 3, Name: "z", Type: iceberg.PrimitiveTypes.Int64, Required: true}, - ))) + ) + t.Require().NoError(err) + t.True(t.tbl.Schema().Equals(expectedSchema)) } func (t *TableTestSuite) TestPartitionSpec() { @@ -239,11 +241,13 @@ func (t *TableWritingTestSuite) SetupSuite() { t.ctx = context.Background() mem := memory.DefaultAllocator - t.tableSchema = iceberg.NewSchema(0, + var err error + t.tableSchema, err = iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.Bool}, iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 4, Name: "baz", Type: iceberg.PrimitiveTypes.Int32}, iceberg.NestedField{ID: 10, Name: "qux", Type: iceberg.PrimitiveTypes.Date}) + t.Require().NoError(err) t.arrSchema = arrow.NewSchema([]arrow.Field{ {Name: "foo", Type: arrow.FixedWidthTypes.Boolean, Nullable: true}, @@ -294,7 +298,8 @@ func (t *TableWritingTestSuite) SetupSuite() { }) t.Require().NoError(err) - t.tableSchemaPromotedTypes = iceberg.NewSchema(0, + var err2 error + t.tableSchemaPromotedTypes, err2 = iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "long", Type: iceberg.PrimitiveTypes.Int64}, iceberg.NestedField{ ID: 2, Name: "list", @@ -312,6 +317,7 @@ func (t *TableWritingTestSuite) SetupSuite() { Required: true, }, iceberg.NestedField{ID: 7, Name: "double", Type: iceberg.PrimitiveTypes.Float64}) + t.Require().NoError(err2) // arrow-go needs to implement cast_extension for [16]byte -> uuid // iceberg.NestedField{ID: 8, Name: "uuid", Type: iceberg.PrimitiveTypes.UUID}) @@ -620,8 +626,9 @@ func (t *TableWritingTestSuite) TestAddFilesToPartitionedTableFailsLowerAndUpper func (t *TableWritingTestSuite) TestAddFilesWithLargeAndRegular() { ident := table.Identifier{"default", "unpartitioned_with_large_types_v" + strconv.Itoa(t.formatVersion)} - ice := iceberg.NewSchema(0, + ice, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: true}) + t.Require().NoError(err) arrowSchema := arrow.NewSchema([]arrow.Field{ {Name: "foo", Type: arrow.BinaryTypes.String}, @@ -1310,7 +1317,7 @@ func (t *TableWritingTestSuite) TestWriteSpecialCharacterColumn() { ident := table.Identifier{"default", "write_special_character_column"} colNameWithSpecialChar := "letter/abc" - s := iceberg.NewSchema(0, + s, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: colNameWithSpecialChar, Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "id", Type: iceberg.PrimitiveTypes.Int32}, iceberg.NestedField{ID: 3, Name: "name", Type: iceberg.PrimitiveTypes.String, Required: true}, @@ -1322,6 +1329,7 @@ func (t *TableWritingTestSuite) TestWriteSpecialCharacterColumn() { {ID: 8, Name: colNameWithSpecialChar, Type: iceberg.PrimitiveTypes.String, Required: true}, }, }}) + t.Require().NoError(err) arrowSchema := arrow.NewSchema([]arrow.Field{ {Name: colNameWithSpecialChar, Type: arrow.BinaryTypes.String, Nullable: true}, @@ -1405,7 +1413,7 @@ func (t *TableWritingTestSuite) createTableWithProps(identifier table.Identifier } func tableSchema() *iceberg.Schema { - return iceberg.NewSchema(0, + sch, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "bool", Type: iceberg.PrimitiveTypes.Bool}, iceberg.NestedField{ID: 2, Name: "string", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 3, Name: "string_long", Type: iceberg.PrimitiveTypes.String}, @@ -1424,6 +1432,10 @@ func tableSchema() *iceberg.Schema { iceberg.NestedField{ID: 16, Name: "med_dec", Type: iceberg.DecimalTypeOf(16, 2)}, iceberg.NestedField{ID: 17, Name: "large_dec", Type: iceberg.DecimalTypeOf(24, 2)}, ) + if err != nil { + panic(fmt.Sprintf("failed to create tableSchema: %v", err)) + } + return sch } func arrowTableWithNull() arrow.Table { @@ -1865,7 +1877,7 @@ func TestWriteMapType(t *testing.T) { ctx := compute.WithAllocator(context.Background(), mem) cat.CreateNamespace(ctx, catalog.ToIdentifier("default"), nil) - iceSch := iceberg.NewSchema(1, + iceSch, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.String, Required: true, }, @@ -1878,6 +1890,7 @@ func TestWriteMapType(t *testing.T) { ValueRequired: false, }, }) + require.NoError(t, err) ident := catalog.ToIdentifier("default", "repro_map") tbl, err := cat.CreateTable(ctx, ident, iceSch, catalog.WithLocation(loc)) diff --git a/table/update_schema.go b/table/update_schema.go index 0492f0855..bf08d3d8d 100644 --- a/table/update_schema.go +++ b/table/update_schema.go @@ -328,7 +328,11 @@ func (u *UpdateSchema) addColumn(path []string, fieldType iceberg.Type, doc stri field.WriteDefault = defaultValue.Any() } - sch, err := iceberg.AssignFreshSchemaIDs(iceberg.NewSchema(0, field), u.assignNewColumnID) + tempSchema, err := iceberg.NewSchema(0, field) + if err != nil { + return err + } + sch, err := iceberg.AssignFreshSchemaIDs(tempSchema, u.assignNewColumnID) if err != nil { return fmt.Errorf("failed to assign field id: %w", err) } @@ -686,7 +690,10 @@ func (u *UpdateSchema) Apply() (*iceberg.Schema, error) { } identifierFieldIDs := make([]int, 0) - newSchema := iceberg.NewSchema(0, st.(*iceberg.StructType).FieldList...) + newSchema, err := iceberg.NewSchema(0, st.(*iceberg.StructType).FieldList...) + if err != nil { + return nil, fmt.Errorf("error creating schema: %w", err) + } for name := range u.identifierFieldNames { var field iceberg.NestedField var ok bool @@ -708,7 +715,7 @@ func (u *UpdateSchema) Apply() (*iceberg.Schema, error) { }).ID } - return iceberg.NewSchemaWithIdentifiers(nextSchemaID, identifierFieldIDs, st.(*iceberg.StructType).FieldList...), nil + return iceberg.NewSchemaWithIdentifiers(nextSchemaID, identifierFieldIDs, st.(*iceberg.StructType).FieldList...) } func (u *UpdateSchema) Commit() error { diff --git a/table/update_schema_test.go b/table/update_schema_test.go index 8f35363d6..0bac381e1 100644 --- a/table/update_schema_test.go +++ b/table/update_schema_test.go @@ -18,35 +18,44 @@ package table import ( + "fmt" "testing" "github.com/apache/iceberg-go" "github.com/stretchr/testify/assert" ) -var originalSchema = iceberg.NewSchema(1, - iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, Doc: ""}, - iceberg.NestedField{ID: 2, Name: "name", Type: iceberg.PrimitiveTypes.String, Required: false, Doc: ""}, - iceberg.NestedField{ID: 3, Name: "age", Type: iceberg.PrimitiveTypes.Int32, Required: false, Doc: ""}, - iceberg.NestedField{ID: 4, Name: "address", Type: &iceberg.StructType{ - FieldList: []iceberg.NestedField{ - {ID: 5, Name: "city", Type: iceberg.PrimitiveTypes.String, Required: false, Doc: ""}, - {ID: 6, Name: "zip", Type: iceberg.PrimitiveTypes.String, Required: false, Doc: ""}, - }, - }, Required: false, Doc: ""}, - iceberg.NestedField{ID: 7, Name: "tags", Type: &iceberg.ListType{ - ElementID: 8, - Element: iceberg.PrimitiveTypes.String, - ElementRequired: false, - }, Required: false, Doc: ""}, - iceberg.NestedField{ID: 9, Name: "properties", Type: &iceberg.MapType{ - KeyID: 10, - KeyType: iceberg.PrimitiveTypes.String, - ValueID: 11, - ValueType: iceberg.PrimitiveTypes.String, - ValueRequired: false, - }, Required: false, Doc: ""}, -) +var originalSchema *iceberg.Schema + +func init() { + var err error + originalSchema, err = iceberg.NewSchema(1, + iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, Doc: ""}, + iceberg.NestedField{ID: 2, Name: "name", Type: iceberg.PrimitiveTypes.String, Required: false, Doc: ""}, + iceberg.NestedField{ID: 3, Name: "age", Type: iceberg.PrimitiveTypes.Int32, Required: false, Doc: ""}, + iceberg.NestedField{ID: 4, Name: "address", Type: &iceberg.StructType{ + FieldList: []iceberg.NestedField{ + {ID: 5, Name: "city", Type: iceberg.PrimitiveTypes.String, Required: false, Doc: ""}, + {ID: 6, Name: "zip", Type: iceberg.PrimitiveTypes.String, Required: false, Doc: ""}, + }, + }, Required: false, Doc: ""}, + iceberg.NestedField{ID: 7, Name: "tags", Type: &iceberg.ListType{ + ElementID: 8, + Element: iceberg.PrimitiveTypes.String, + ElementRequired: false, + }, Required: false, Doc: ""}, + iceberg.NestedField{ID: 9, Name: "properties", Type: &iceberg.MapType{ + KeyID: 10, + KeyType: iceberg.PrimitiveTypes.String, + ValueID: 11, + ValueType: iceberg.PrimitiveTypes.String, + ValueRequired: false, + }, Required: false, Doc: ""}, + ) + if err != nil { + panic(fmt.Sprintf("failed to create originalSchema: %v", err)) + } +} var testMetadata, _ = NewMetadata(originalSchema, nil, UnsortedSortOrder, "", nil) @@ -400,7 +409,7 @@ func TestApplyChanges(t *testing.T) { }) t.Run("test apply changes on add field that delete in same time", func(t *testing.T) { - originalSchema := iceberg.NewSchema(1, + originalSchema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, Doc: ""}, iceberg.NestedField{ID: 2, Name: "name", Type: iceberg.PrimitiveTypes.String, Required: false, Doc: ""}, iceberg.NestedField{ID: 3, Name: "age", Type: iceberg.PrimitiveTypes.Int32, Required: false, Doc: ""}, @@ -812,7 +821,7 @@ func TestSetIdentifierField(t *testing.T) { t.Run("test set identifier field replaces existing identifier fields", func(t *testing.T) { // Create a schema with existing identifier fields - schemaWithIdentifiers := iceberg.NewSchemaWithIdentifiers(1, []int{1}, // id is initially an identifier + schemaWithIdentifiers, err := iceberg.NewSchemaWithIdentifiers(1, []int{1}, // id is initially an identifier iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, Doc: ""}, iceberg.NestedField{ID: 2, Name: "name", Type: iceberg.PrimitiveTypes.String, Required: false, Doc: ""}, iceberg.NestedField{ID: 3, Name: "age", Type: iceberg.PrimitiveTypes.Int32, Required: false, Doc: ""}, diff --git a/transforms_test.go b/transforms_test.go index a6ee85d96..0e8a40266 100644 --- a/transforms_test.go +++ b/transforms_test.go @@ -196,7 +196,7 @@ func TestManifestPartitionVals(t *testing.T) { require.True(t, result.Valid) assert.Equal(t, tt.expectResult, result.Val) - schema := iceberg.NewSchema(0, iceberg.NestedField{ + schema, err := iceberg.NewSchema(0, iceberg.NestedField{ Name: "abc", ID: 1, Type: tt.input.Type(), diff --git a/types_test.go b/types_test.go index 8649c259c..f8daca012 100644 --- a/types_test.go +++ b/types_test.go @@ -261,7 +261,7 @@ func TestUnknownTypeInNestedStructs(t *testing.T) { ValueRequired: false, } - schema := iceberg.NewSchema(1, + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.Int64Type{}, Required: true}, iceberg.NestedField{ID: 2, Name: "top", Type: iceberg.UnknownType{}, Required: false}, iceberg.NestedField{ID: 3, Name: "arr", Type: listType, Required: false}, @@ -281,6 +281,7 @@ func TestUnknownTypeInNestedStructs(t *testing.T) { }, Required: true}, iceberg.NestedField{ID: 14, Name: "standalone_map", Type: standaloneMapType, Required: false}, ) + require.NoError(t, err) out, err := json.Marshal(schema) require.NoError(t, err) diff --git a/view/metadata_builder.go b/view/metadata_builder.go index 85b21b4df..c29ef859d 100644 --- a/view/metadata_builder.go +++ b/view/metadata_builder.go @@ -227,7 +227,11 @@ func (b *MetadataBuilder) addSchema(schema *iceberg.Schema) (int, error) { newSchema := schema // Build a fresh schema if we reset the ID if schema.ID != newSchemaID { - newSchema = iceberg.NewSchemaWithIdentifiers(newSchemaID, schema.IdentifierFieldIDs, schema.Fields()...) + var err error + newSchema, err = iceberg.NewSchemaWithIdentifiers(newSchemaID, schema.IdentifierFieldIDs, schema.Fields()...) + if err != nil { + return 0, fmt.Errorf("error creating schema with new ID: %w", err) + } } b.schemaList = append(b.schemaList, newSchema) diff --git a/visitors_test.go b/visitors_test.go index ed18b48cd..df9c084a2 100644 --- a/visitors_test.go +++ b/visitors_test.go @@ -18,6 +18,7 @@ package iceberg_test import ( + "fmt" "math" "strings" "testing" @@ -287,20 +288,24 @@ func rowOf(vals ...any) rowTester { return rowTester(vals) } -var testSchema = iceberg.NewSchema(1, - iceberg.NestedField{ - ID: 13, Name: "x", - Type: iceberg.PrimitiveTypes.Int32, Required: true, - }, - iceberg.NestedField{ - ID: 14, Name: "y", - Type: iceberg.PrimitiveTypes.Float64, Required: true, - }, - iceberg.NestedField{ - ID: 15, Name: "z", - Type: iceberg.PrimitiveTypes.Int32, - }, - iceberg.NestedField{ +var testSchema *iceberg.Schema + +func init() { + var err error + testSchema, err = iceberg.NewSchema(1, + iceberg.NestedField{ + ID: 13, Name: "x", + Type: iceberg.PrimitiveTypes.Int32, Required: true, + }, + iceberg.NestedField{ + ID: 14, Name: "y", + Type: iceberg.PrimitiveTypes.Float64, Required: true, + }, + iceberg.NestedField{ + ID: 15, Name: "z", + Type: iceberg.PrimitiveTypes.Int32, + }, + iceberg.NestedField{ ID: 16, Name: "s1", Type: &iceberg.StructType{ FieldList: []iceberg.NestedField{{ @@ -324,7 +329,7 @@ var testSchema = iceberg.NewSchema(1, }}, }, }, - iceberg.NestedField{ID: 21, Name: "s5", Type: &iceberg.StructType{ + iceberg.NestedField{ID: 21, Name: "s5", Type: &iceberg.StructType{ FieldList: []iceberg.NestedField{{ ID: 22, Name: "s6", Required: true, Type: &iceberg.StructType{ FieldList: []iceberg.NestedField{{ @@ -333,7 +338,12 @@ var testSchema = iceberg.NewSchema(1, }, }}, }}, - iceberg.NestedField{ID: 24, Name: "s", Type: iceberg.PrimitiveTypes.String}) + iceberg.NestedField{ID: 24, Name: "s", Type: iceberg.PrimitiveTypes.String}, + ) + if err != nil { + panic(fmt.Sprintf("failed to create testSchema: %v", err)) + } +} func TestExprEvaluator(t *testing.T) { type testCase struct { @@ -543,7 +553,7 @@ func TestExprEvaluator(t *testing.T) { } func TestEvaluatorCmpTypes(t *testing.T) { - sc := iceberg.NewSchema(1, + sc, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "a", Type: iceberg.PrimitiveTypes.Bool}, iceberg.NestedField{ID: 2, Name: "b", Type: iceberg.PrimitiveTypes.Int32}, iceberg.NestedField{ID: 3, Name: "c", Type: iceberg.PrimitiveTypes.Int64}, @@ -556,7 +566,9 @@ func TestEvaluatorCmpTypes(t *testing.T) { iceberg.NestedField{ID: 10, Name: "j", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 11, Name: "k", Type: iceberg.PrimitiveTypes.Binary}, iceberg.NestedField{ID: 12, Name: "l", Type: iceberg.PrimitiveTypes.UUID}, - iceberg.NestedField{ID: 13, Name: "m", Type: iceberg.FixedTypeOf(5)}) + iceberg.NestedField{ID: 13, Name: "m", Type: iceberg.FixedTypeOf(5)}, + ) + require.NoError(t, err) rowData := rowOf(true, 5, 5, float32(5.0), float64(5.0), From 6327eb56752eef086158e1c5ef6735cc2d7a0429 Mon Sep 17 00:00:00 2001 From: dttung2905 Date: Wed, 7 Jan 2026 23:05:22 +0000 Subject: [PATCH 3/8] Fix CI Signed-off-by: dttung2905 --- catalog/glue/glue_test.go | 17 ++++-- catalog/glue/schema_test.go | 76 +++++++++++++------------ catalog/rest/rest_test.go | 11 ++-- catalog/sql/sql_test.go | 2 +- schema.go | 11 ++++ table/arrow_utils_test.go | 6 +- table/evaluators_test.go | 10 +++- table/metadata_builder_internal_test.go | 24 ++++---- table/scanner.go | 4 +- table/substrait/substrait_test.go | 8 ++- table/table_test.go | 2 - table/update_spec_test.go | 2 +- view/metadata_builder_test.go | 6 +- 13 files changed, 105 insertions(+), 74 deletions(-) diff --git a/catalog/glue/glue_test.go b/catalog/glue/glue_test.go index c05757f5b..0db4c48de 100644 --- a/catalog/glue/glue_test.go +++ b/catalog/glue/glue_test.go @@ -172,7 +172,7 @@ var testNonIcebergGlueTable = types.Table{ }, } -var testSchema = iceberg.NewSchemaWithIdentifiers(0, []int{}, +var testSchema = iceberg.MustNewSchemaWithIdentifiers(0, []int{}, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool}) @@ -1005,10 +1005,11 @@ func TestGlueCreateTableInvalidMetadataRollback(t *testing.T) { func TestGlueCreateTableRollbackOnInvalidMetadata(t *testing.T) { assert := require.New(t) mockGlueSvc := &mockGlueClient{} - schema := iceberg.NewSchemaWithIdentifiers(1, []int{1}, + schema, err := iceberg.NewSchemaWithIdentifiers(1, []int{1}, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.Int64Type{}, Required: true}, iceberg.NestedField{ID: 2, Name: "name", Type: iceberg.StringType{}, Required: true}, ) + assert.NoError(err) mockGlueSvc.On("CreateTable", mock.Anything, mock.Anything, mock.Anything).Return(&glue.CreateTableOutput{}, nil) mockGlueSvc.On("GetTable", mock.Anything, &glue.GetTableInput{ DatabaseName: aws.String("test_database"), @@ -1026,7 +1027,7 @@ func TestGlueCreateTableRollbackOnInvalidMetadata(t *testing.T) { glueSvc: mockGlueSvc, awsCfg: &aws.Config{}, } - _, err := glueCatalog.CreateTable(context.TODO(), + _, err = glueCatalog.CreateTable(context.TODO(), TableIdentifier("test_database", "test_rollback_table"), schema, catalog.WithLocation("s3://non-existent-test-bucket")) @@ -1094,10 +1095,11 @@ func TestAlterTableIntegration(t *testing.T) { metadataLocation := os.Getenv("TEST_TABLE_LOCATION") tbName := fmt.Sprintf("table_%d", time.Now().UnixNano()) tbIdent := TableIdentifier(dbName, tbName) - schema := iceberg.NewSchemaWithIdentifiers(0, []int{}, + schema, err := iceberg.NewSchemaWithIdentifiers(0, []int{}, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32}, iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool}) + assert.NoError(err) awsCfg, err := config.LoadDefaultConfig(context.TODO(), config.WithClientLogMode(aws.LogRequest|aws.LogResponse)) assert.NoError(err) @@ -1169,7 +1171,9 @@ func TestAlterTableIntegration(t *testing.T) { } newFields := append(currentSchema.Fields(), addField) // add column 'new_col' newFields = append(newFields[:1], newFields[2:]...) // drop column 'bar' - updateColumns := table.NewAddSchemaUpdate(iceberg.NewSchemaWithIdentifiers(newSchemaId, currentSchema.IdentifierFieldIDs, newFields...)) + newSchema, err := iceberg.NewSchemaWithIdentifiers(newSchemaId, currentSchema.IdentifierFieldIDs, newFields...) + require.NoError(t, err) + updateColumns := table.NewAddSchemaUpdate(newSchema) setSchema := table.NewSetCurrentSchemaUpdate(newSchemaId) _, _, err = ctlg.CommitTable( @@ -1342,9 +1346,10 @@ func TestCommitTableOptimisticLockingIntegration(t *testing.T) { tbName := fmt.Sprintf("optimistic_lock_test_%d", time.Now().UnixNano()) tbIdent := TableIdentifier(dbName, tbName) - schema := iceberg.NewSchemaWithIdentifiers(0, []int{}, + schema, err := iceberg.NewSchemaWithIdentifiers(0, []int{}, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int64}, iceberg.NestedField{ID: 2, Name: "data", Type: iceberg.PrimitiveTypes.String}) + assert.NoError(err) awsCfg, err := config.LoadDefaultConfig(context.TODO(), config.WithClientLogMode(aws.LogRequest|aws.LogResponse)) assert.NoError(err) diff --git a/catalog/glue/schema_test.go b/catalog/glue/schema_test.go index 04f5dd74f..a727c310f 100644 --- a/catalog/glue/schema_test.go +++ b/catalog/glue/schema_test.go @@ -349,7 +349,8 @@ func TestSchemaToGlueColumns(t *testing.T) { Doc: "User tags", }, } - schema := iceberg.NewSchema(1, fields...) + schema, err := iceberg.NewSchema(1, fields...) + require.NoError(t, err) columns := schemaToGlueColumns(schema, true) assert.Equal(t, 4, len(columns)) assert.Equal(t, "id", aws.ToString(columns[0].Name)) @@ -365,42 +366,43 @@ func TestSchemaToGlueColumns(t *testing.T) { } func TestSchemasToGlueColumns(t *testing.T) { - schemas := []*iceberg.Schema{ - iceberg.NewSchema(0, - iceberg.NestedField{ - ID: 1, - Name: "id", - Type: iceberg.Int64Type{}, - Required: true, - }, - iceberg.NestedField{ - ID: 2, - Name: "name", - Type: iceberg.StringType{}, - Required: true, - }, - iceberg.NestedField{ - ID: 3, - Name: "address", - Type: iceberg.StringType{}, - Required: false, - }, - ), - iceberg.NewSchema(1, - iceberg.NestedField{ - ID: 1, - Name: "id", - Type: iceberg.Int64Type{}, - Required: true, - }, - iceberg.NestedField{ - ID: 2, - Name: "name", - Type: iceberg.StringType{}, - Required: true, - }, - ), - } + schema0, err := iceberg.NewSchema(0, + iceberg.NestedField{ + ID: 1, + Name: "id", + Type: iceberg.Int64Type{}, + Required: true, + }, + iceberg.NestedField{ + ID: 2, + Name: "name", + Type: iceberg.StringType{}, + Required: true, + }, + iceberg.NestedField{ + ID: 3, + Name: "address", + Type: iceberg.StringType{}, + Required: false, + }, + ) + require.NoError(t, err) + schema1, err := iceberg.NewSchema(1, + iceberg.NestedField{ + ID: 1, + Name: "id", + Type: iceberg.Int64Type{}, + Required: true, + }, + iceberg.NestedField{ + ID: 2, + Name: "name", + Type: iceberg.StringType{}, + Required: true, + }, + ) + require.NoError(t, err) + schemas := []*iceberg.Schema{schema0, schema1} expectedColumns := []types.Column{ { diff --git a/catalog/rest/rest_test.go b/catalog/rest/rest_test.go index 91ae11c83..98c7b6e76 100644 --- a/catalog/rest/rest_test.go +++ b/catalog/rest/rest_test.go @@ -1150,7 +1150,7 @@ var ( } }`, exampleTableMetadataNoSnapshotV1) - tableSchemaSimple = iceberg.NewSchemaWithIdentifiers(1, []int{2}, + tableSchemaSimple = iceberg.MustNewSchemaWithIdentifiers(1, []int{2}, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.StringType{}, Required: false}, iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool, Required: false}, @@ -2329,12 +2329,13 @@ func (r *RestCatalogSuite) TestCreateView200() { ns := "ns" viewName := "view" identifier := table.Identifier{ns, viewName} - schema := iceberg.NewSchemaWithIdentifiers(0, []int{1}, iceberg.NestedField{ + schema, err := iceberg.NewSchemaWithIdentifiers(0, []int{1}, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, }) + r.Require().NoError(err) config := iceberg.Properties{ "comment": "Example view created via REST catalog", "owner": "admin", @@ -2384,12 +2385,13 @@ func (r *RestCatalogSuite) TestCreateView409() { ns := "ns" viewName := "view" identifier := table.Identifier{ns, viewName} - schema := iceberg.NewSchema(1, iceberg.NestedField{ + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, }) + r.Require().NoError(err) sql := "SELECT * FROM table" reprs := []view.Representation{view.NewRepresentation(sql, "default")} version, err := view.NewVersion(1, 1, reprs, table.Identifier{ns}) @@ -2418,12 +2420,13 @@ func (r *RestCatalogSuite) TestCreateView404() { ns := "ns" viewName := "view" identifier := table.Identifier{ns, viewName} - schema := iceberg.NewSchema(1, iceberg.NestedField{ + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, }) + r.Require().NoError(err) sql := "SELECT * FROM table" reprs := []view.Representation{ view.NewRepresentation(sql, "spark"), diff --git a/catalog/sql/sql_test.go b/catalog/sql/sql_test.go index 6ac0fdb86..8686cc126 100644 --- a/catalog/sql/sql_test.go +++ b/catalog/sql/sql_test.go @@ -42,7 +42,7 @@ import ( "github.com/uptrace/bun/driver/sqliteshim" ) -var tableSchemaNested = iceberg.NewSchemaWithIdentifiers(1, +var tableSchemaNested = iceberg.MustNewSchemaWithIdentifiers(1, []int{1}, iceberg.NestedField{ ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: false, diff --git a/schema.go b/schema.go index 9ee05296a..e493604ac 100644 --- a/schema.go +++ b/schema.go @@ -92,6 +92,17 @@ func MustNewSchema(id int, fields ...NestedField) *Schema { return s } +// MustNewSchemaWithIdentifiers is a helper that panics if NewSchemaWithIdentifiers returns an error. +// It is intended for use in variable initializations where the schema +// is known to be valid at compile time. +func MustNewSchemaWithIdentifiers(id int, identifierIDs []int, fields ...NestedField) *Schema { + s, err := NewSchemaWithIdentifiers(id, identifierIDs, fields...) + if err != nil { + panic(err) + } + return s +} + func (s *Schema) init() error { s.lazyIDToParent = sync.OnceValues(func() (map[int]int, error) { return IndexParents(s) diff --git a/table/arrow_utils_test.go b/table/arrow_utils_test.go index 0f41b22cb..d9a63f542 100644 --- a/table/arrow_utils_test.go +++ b/table/arrow_utils_test.go @@ -277,7 +277,7 @@ func TestArrowSchemaToIceberg(t *testing.T) { func makeID(v int) *int { return &v } var ( - icebergSchemaNested = iceberg.NewSchema(0, + icebergSchemaNested = iceberg.MustNewSchema(0, iceberg.NestedField{ ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: true, }, @@ -334,7 +334,7 @@ var ( }, ) - icebergSchemaSimple = iceberg.NewSchema(0, + icebergSchemaSimple = iceberg.MustNewSchema(0, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool}, @@ -467,7 +467,7 @@ var ( {Name: "timestamptz_s_0000", Type: &arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: "UTC"}, Nullable: true}, }, nil) - TableSchemaWithAllMicrosecondsTimestampPrec = iceberg.NewSchema(0, + TableSchemaWithAllMicrosecondsTimestampPrec = iceberg.MustNewSchema(0, iceberg.NestedField{ID: 1, Name: "timestamp_s", Type: iceberg.PrimitiveTypes.Timestamp}, iceberg.NestedField{ID: 2, Name: "timestamptz_s", Type: iceberg.PrimitiveTypes.TimestampTz}, iceberg.NestedField{ID: 3, Name: "timestamp_ms", Type: iceberg.PrimitiveTypes.Timestamp}, diff --git a/table/evaluators_test.go b/table/evaluators_test.go index 540960def..ecf02f485 100644 --- a/table/evaluators_test.go +++ b/table/evaluators_test.go @@ -1221,13 +1221,14 @@ func (suite *InclusiveMetricsTestSuite) SetupSuite() { }, } - suite.schemaDataFileNan = iceberg.NewSchema(0, + suite.schemaDataFileNan, err = iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "all_nan", Type: iceberg.PrimitiveTypes.Float64, Required: true}, iceberg.NestedField{ID: 2, Name: "max_nan", Type: iceberg.PrimitiveTypes.Float64, Required: true}, iceberg.NestedField{ID: 3, Name: "min_max_nan", Type: iceberg.PrimitiveTypes.Float32}, iceberg.NestedField{ID: 4, Name: "all_nan_null_bounds", Type: iceberg.PrimitiveTypes.Float64, Required: true}, iceberg.NestedField{ID: 5, Name: "some_nan_correct_bounds", Type: iceberg.PrimitiveTypes.Float32}, ) + suite.Require().NoError(err) suite.dataFileNan = &mockDataFile{ path: "file.avro", @@ -2054,7 +2055,8 @@ type StrictMetricsTestSuite struct { } func (suite *StrictMetricsTestSuite) SetupSuite() { - suite.schemaDataFile = iceberg.NewSchema(0, + var err error + suite.schemaDataFile, err = iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 2, Name: "no_stats", Type: iceberg.PrimitiveTypes.Int32, Required: false}, iceberg.NestedField{ID: 3, Name: "required", Type: iceberg.PrimitiveTypes.String, Required: true}, @@ -2070,6 +2072,7 @@ func (suite *StrictMetricsTestSuite) SetupSuite() { iceberg.NestedField{ID: 13, Name: "nan_and_null_only", Type: iceberg.PrimitiveTypes.Float64}, iceberg.NestedField{ID: 14, Name: "no_nan_stats", Type: iceberg.PrimitiveTypes.Float64}, ) + suite.Require().NoError(err) var ( IntMin, _ = iceberg.Int32Literal(IntMinValue).MarshalBinary() @@ -2131,13 +2134,14 @@ func (suite *StrictMetricsTestSuite) SetupSuite() { }, } - suite.schemaDataFileNan = iceberg.NewSchema(0, + suite.schemaDataFileNan, err = iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "all_nan", Type: iceberg.PrimitiveTypes.Float64, Required: true}, iceberg.NestedField{ID: 2, Name: "max_nan", Type: iceberg.PrimitiveTypes.Float64, Required: true}, iceberg.NestedField{ID: 3, Name: "min_max_nan", Type: iceberg.PrimitiveTypes.Float32}, iceberg.NestedField{ID: 4, Name: "all_nan_null_bounds", Type: iceberg.PrimitiveTypes.Float64, Required: true}, iceberg.NestedField{ID: 5, Name: "some_nan_correct_bounds", Type: iceberg.PrimitiveTypes.Float32}, ) + suite.Require().NoError(err) suite.dataFileNan = &mockDataFile{ path: "file.avro", diff --git a/table/metadata_builder_internal_test.go b/table/metadata_builder_internal_test.go index 6680db679..162cbe0ec 100644 --- a/table/metadata_builder_internal_test.go +++ b/table/metadata_builder_internal_test.go @@ -1199,7 +1199,7 @@ func TestSetCurrentSchemaChangeIsMinusOneIfSchemaWasAddedInThisChange(t *testing ) require.NoError(t, err) - err := builder.AddSchema(addedSchema) + err = builder.AddSchema(addedSchema) require.NoError(t, err) err = builder.SetCurrentSchemaID(1) @@ -1614,7 +1614,7 @@ func TestUnknownTypeValidation(t *testing.T) { iceberg.NestedField{ID: 2, Name: "unknown_field", Type: iceberg.UnknownType{}, Required: false}, ) require.NoError(t, err) - err := checkSchemaCompatibility(validSchema, 3) + err = checkSchemaCompatibility(validSchema, 3) require.NoError(t, err, "Valid unknown type schema should pass validation") }) t.Run("InvalidRequiredUnknownType", func(t *testing.T) { @@ -1622,7 +1622,7 @@ func TestUnknownTypeValidation(t *testing.T) { iceberg.NestedField{ID: 1, Name: "invalid_unknown", Type: iceberg.UnknownType{}, Required: true}, ) require.NoError(t, err) - err := checkSchemaCompatibility(invalidSchema, 3) + err = checkSchemaCompatibility(invalidSchema, 3) require.Error(t, err, "should error when unknown type is required") require.ErrorContains(t, err, "must be optional") }) @@ -1631,7 +1631,7 @@ func TestUnknownTypeValidation(t *testing.T) { iceberg.NestedField{ID: 1, Name: "invalid_unknown", Type: iceberg.UnknownType{}, Required: false, InitialDefault: "invalid"}, ) require.NoError(t, err) - err := checkSchemaCompatibility(invalidSchema, 3) + err = checkSchemaCompatibility(invalidSchema, 3) require.Error(t, err, "should error when unknown type has non-null initial-default") require.ErrorContains(t, err, "must have null initial-default") }) @@ -1640,7 +1640,7 @@ func TestUnknownTypeValidation(t *testing.T) { iceberg.NestedField{ID: 1, Name: "invalid_unknown", Type: iceberg.UnknownType{}, Required: false, WriteDefault: "invalid"}, ) require.NoError(t, err) - err := checkSchemaCompatibility(invalidSchema, 3) + err = checkSchemaCompatibility(invalidSchema, 3) require.Error(t, err, "should error when unknown type has non-null write-default") require.ErrorContains(t, err, "must have null write-default") }) @@ -1653,7 +1653,7 @@ func TestUnknownTypeValidation(t *testing.T) { }}, Required: false}, ) require.NoError(t, err) - err := checkSchemaCompatibility(validSchema, 3) + err = checkSchemaCompatibility(validSchema, 3) require.NoError(t, err, "Valid nested unknown type schema should pass validation") }) @@ -1664,7 +1664,7 @@ func TestUnknownTypeValidation(t *testing.T) { }}, Required: false}, ) require.NoError(t, err) - err := checkSchemaCompatibility(invalidSchema, 3) + err = checkSchemaCompatibility(invalidSchema, 3) require.Error(t, err, "should error when unknown type is required") require.ErrorContains(t, err, "must be optional") }) @@ -1675,7 +1675,7 @@ func TestUnknownTypeValidation(t *testing.T) { iceberg.NestedField{ID: 2, Name: "list", Type: &iceberg.ListType{ElementID: 3, Element: iceberg.UnknownType{}, ElementRequired: false}, Required: false}, ) require.NoError(t, err) - err := checkSchemaCompatibility(validSchema, 3) + err = checkSchemaCompatibility(validSchema, 3) require.NoError(t, err, "Valid list with unknown type schema should pass validation") }) @@ -1685,7 +1685,7 @@ func TestUnknownTypeValidation(t *testing.T) { iceberg.NestedField{ID: 2, Name: "list", Type: &iceberg.ListType{ElementID: 3, Element: iceberg.UnknownType{}, ElementRequired: true}, Required: false}, ) require.NoError(t, err) - err := checkSchemaCompatibility(invalidSchema, 3) + err = checkSchemaCompatibility(invalidSchema, 3) require.Error(t, err, "should error when unknown type is required") require.ErrorContains(t, err, "must be optional") }) @@ -1696,7 +1696,7 @@ func TestUnknownTypeValidation(t *testing.T) { iceberg.NestedField{ID: 2, Name: "map", Type: &iceberg.MapType{KeyID: 3, KeyType: iceberg.StringType{}, ValueID: 4, ValueType: iceberg.UnknownType{}, ValueRequired: false}, Required: false}, ) require.NoError(t, err) - err := checkSchemaCompatibility(validSchema, 3) + err = checkSchemaCompatibility(validSchema, 3) require.NoError(t, err, "Valid map with unknown type schema should pass validation") }) @@ -1705,7 +1705,7 @@ func TestUnknownTypeValidation(t *testing.T) { iceberg.NestedField{ID: 2, Name: "invalid_map", Type: &iceberg.MapType{KeyID: 3, KeyType: iceberg.UnknownType{}, ValueID: 4, ValueType: iceberg.StringType{}, ValueRequired: false}, Required: true}, ) require.NoError(t, err) - err := checkSchemaCompatibility(invalidSchema, 3) + err = checkSchemaCompatibility(invalidSchema, 3) require.Error(t, err, "should error when unknown type is used as map key") require.ErrorContains(t, err, "must be optional") }) @@ -1715,7 +1715,7 @@ func TestUnknownTypeValidation(t *testing.T) { iceberg.NestedField{ID: 2, Name: "invalid_map", Type: &iceberg.MapType{KeyID: 3, KeyType: iceberg.StringType{}, ValueID: 4, ValueType: iceberg.UnknownType{}, ValueRequired: true}, Required: false}, ) require.NoError(t, err) - err := checkSchemaCompatibility(invalidSchema, 3) + err = checkSchemaCompatibility(invalidSchema, 3) require.Error(t, err, "should error when unknown type is used as map value") require.ErrorContains(t, err, "must be optional") }) diff --git a/table/scanner.go b/table/scanner.go index c8ddddcb5..f93524c3c 100644 --- a/table/scanner.go +++ b/table/scanner.go @@ -255,7 +255,9 @@ func (scan *Scan) buildPartitionEvaluator(specID int) func(iceberg.DataFile) (bo partType := spec.PartitionType(scan.metadata.CurrentSchema()) partSchema, err := iceberg.NewSchema(0, partType.FieldList...) if err != nil { - return nil, fmt.Errorf("failed to create partition schema: %w", err) + return func(d iceberg.DataFile) (bool, error) { + return false, fmt.Errorf("failed to create partition schema: %w", err) + } } partExpr := scan.partitionFilters.Get(specID) diff --git a/table/substrait/substrait_test.go b/table/substrait/substrait_test.go index 26ee87852..4c3e71681 100644 --- a/table/substrait/substrait_test.go +++ b/table/substrait/substrait_test.go @@ -29,7 +29,7 @@ import ( ) func TestRefTypes(t *testing.T) { - sc := iceberg.NewSchema(1, + sc, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "a", Type: iceberg.PrimitiveTypes.Bool}, iceberg.NestedField{ID: 2, Name: "b", Type: iceberg.PrimitiveTypes.Int32}, iceberg.NestedField{ID: 3, Name: "c", Type: iceberg.PrimitiveTypes.Int64}, @@ -37,6 +37,8 @@ func TestRefTypes(t *testing.T) { iceberg.NestedField{ID: 5, Name: "e", Type: iceberg.PrimitiveTypes.Float64}, iceberg.NestedField{ID: 6, Name: "f", Type: iceberg.PrimitiveTypes.Date}, iceberg.NestedField{ID: 7, Name: "g", Type: iceberg.PrimitiveTypes.Time}, + ) + require.NoError(t, err) iceberg.NestedField{ID: 8, Name: "h", Type: iceberg.PrimitiveTypes.Timestamp}, iceberg.NestedField{ID: 9, Name: "i", Type: iceberg.DecimalTypeOf(9, 2)}, iceberg.NestedField{ID: 10, Name: "j", Type: iceberg.PrimitiveTypes.String}, @@ -80,14 +82,14 @@ func TestRefTypes(t *testing.T) { } var ( - tableSchemaSimple = iceberg.NewSchemaWithIdentifiers(1, + tableSchemaSimple = iceberg.MustNewSchemaWithIdentifiers(1, []int{2}, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool}, ) - doubleSchema = iceberg.NewSchema(1, + doubleSchema = iceberg.MustNewSchema(1, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.Float64}) ) diff --git a/table/table_test.go b/table/table_test.go index 4b565ab8d..7b265a332 100644 --- a/table/table_test.go +++ b/table/table_test.go @@ -50,7 +50,6 @@ import ( "github.com/pterm/pterm" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" - "github.com/uptrace/bun/driver/sqliteshim" ) type TableTestSuite struct { @@ -256,7 +255,6 @@ func (t *TableWritingTestSuite) SetupSuite() { {Name: "qux", Type: arrow.PrimitiveTypes.Date32, Nullable: true}, }, nil) - var err error t.arrTbl, err = array.TableFromJSON(mem, t.arrSchema, []string{ `[{"foo": true, "bar": "bar_string", "baz": 123, "qux": "2024-03-07"}]`, }) diff --git a/table/update_spec_test.go b/table/update_spec_test.go index ea1131119..bf2d143fd 100644 --- a/table/update_spec_test.go +++ b/table/update_spec_test.go @@ -25,7 +25,7 @@ import ( "github.com/stretchr/testify/assert" ) -var testSchema = iceberg.NewSchema(1, +var testSchema = iceberg.MustNewSchema(1, iceberg.NestedField{ID: 1, Name: "id", Required: true, Type: iceberg.PrimitiveTypes.Int64}, iceberg.NestedField{ID: 2, Name: "name", Required: true, Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 3, Name: "ts", Required: false, Type: iceberg.PrimitiveTypes.Timestamp}, diff --git a/view/metadata_builder_test.go b/view/metadata_builder_test.go index 418fb05d0..afddba4d9 100644 --- a/view/metadata_builder_test.go +++ b/view/metadata_builder_test.go @@ -66,7 +66,11 @@ func newTestSchema(schemaID int, optFieldName ...string) *iceberg.Schema { fieldName = optFieldName[0] } - return iceberg.NewSchema(schemaID, iceberg.NestedField{ID: 1, Name: fieldName, Type: iceberg.PrimitiveTypes.Int64}) + schema, err := iceberg.NewSchema(schemaID, iceberg.NestedField{ID: 1, Name: fieldName, Type: iceberg.PrimitiveTypes.Int64}) + if err != nil { + return nil + } + return schema } func stripErr[T any](v T, _ error) T { From 374066811776494137db1bf4fa13deb4142e0cf0 Mon Sep 17 00:00:00 2001 From: dttung2905 Date: Sun, 25 Jan 2026 12:39:04 +0000 Subject: [PATCH 4/8] Fix errors Signed-off-by: dttung2905 --- catalog/glue/schema_test.go | 1 + catalog/internal/utils.go | 6 ++- catalog/rest/rest_integration_test.go | 2 +- catalog/sql/sql_integration_test.go | 4 +- catalog/sql/sql_test.go | 20 ++++++--- io/azure_integration_test.go | 4 +- io/gcs_integration_test.go | 2 +- io/s3_integration_test.go | 4 +- table/arrow_utils.go | 10 ++++- table/internal/utils_test.go | 6 ++- table/metadata_internal_test.go | 50 ++++++++++++++-------- table/orphan_cleanup_integration_test.go | 2 +- table/partitioned_fanout_writer_test.go | 3 +- table/partitioned_throughput_bench_test.go | 10 ++++- table/substrait/substrait_test.go | 6 +-- table/time_travel_test.go | 5 ++- table/transaction_test.go | 9 ++-- table/updates_test.go | 20 +++++---- 18 files changed, 109 insertions(+), 55 deletions(-) diff --git a/catalog/glue/schema_test.go b/catalog/glue/schema_test.go index a727c310f..4ed58adf0 100644 --- a/catalog/glue/schema_test.go +++ b/catalog/glue/schema_test.go @@ -25,6 +25,7 @@ import ( "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/glue/types" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestIcebergTypeToGlueType(t *testing.T) { diff --git a/catalog/internal/utils.go b/catalog/internal/utils.go index 6e1f2b393..c9a26b82f 100644 --- a/catalog/internal/utils.go +++ b/catalog/internal/utils.go @@ -201,7 +201,11 @@ func UpdateAndStageTable(ctx context.Context, current *table.Table, ident table. metadataLoc = current.MetadataLocation() } else { var err error - baseMeta, err = table.NewMetadata(iceberg.NewSchema(0), nil, table.UnsortedSortOrder, "", nil) + emptySchema, err := iceberg.NewSchema(0) + if err != nil { + return nil, err + } + baseMeta, err = table.NewMetadata(emptySchema, nil, table.UnsortedSortOrder, "", nil) if err != nil { return nil, err } diff --git a/catalog/rest/rest_integration_test.go b/catalog/rest/rest_integration_test.go index 17fe20263..a99b67dc4 100644 --- a/catalog/rest/rest_integration_test.go +++ b/catalog/rest/rest_integration_test.go @@ -65,7 +65,7 @@ func (s *RestIntegrationSuite) SetupTest() { s.cat = s.loadCatalog(s.ctx) } -var tableSchemaNested = iceberg.NewSchemaWithIdentifiers(1, +var tableSchemaNested = iceberg.MustNewSchemaWithIdentifiers(1, []int{1}, iceberg.NestedField{ ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: true, diff --git a/catalog/sql/sql_integration_test.go b/catalog/sql/sql_integration_test.go index 5e879e9dc..13d82689c 100644 --- a/catalog/sql/sql_integration_test.go +++ b/catalog/sql/sql_integration_test.go @@ -51,7 +51,7 @@ const ( location = "s3://warehouse/iceberg" ) -var tableSchemaSimple = iceberg.NewSchemaWithIdentifiers(1, []int{2}, +var tableSchemaSimple = iceberg.MustNewSchemaWithIdentifiers(1, []int{2}, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.StringType{}, Required: false}, iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool, Required: false}, @@ -92,7 +92,7 @@ func (s *SQLIntegrationSuite) TearDownTest() { } } -var tableSchemaNestedTest = iceberg.NewSchemaWithIdentifiers(1, +var tableSchemaNestedTest = iceberg.MustNewSchemaWithIdentifiers(1, []int{1}, iceberg.NestedField{ ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: true, diff --git a/catalog/sql/sql_test.go b/catalog/sql/sql_test.go index 8686cc126..fdb9693eb 100644 --- a/catalog/sql/sql_test.go +++ b/catalog/sql/sql_test.go @@ -1058,9 +1058,10 @@ func (s *SqliteCatalogTestSuite) TestCreateView() { s.Require().NoError(db.CreateNamespace(context.Background(), []string{nsName}, nil)) viewSQL := "SELECT * FROM test_table" - schema := iceberg.NewSchema(1, iceberg.NestedField{ + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, }) + s.Require().NoError(err) s.Require().NoError(db.CreateView(context.Background(), []string{nsName, viewName}, schema, viewSQL, nil)) exists, err := db.CheckViewExists(context.Background(), []string{nsName, viewName}) @@ -1077,9 +1078,10 @@ func (s *SqliteCatalogTestSuite) TestDropView() { s.Require().NoError(db.CreateNamespace(context.Background(), []string{nsName}, nil)) viewSQL := "SELECT * FROM test_table" - schema := iceberg.NewSchema(1, iceberg.NestedField{ + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, }) + s.Require().NoError(err) s.Require().NoError(db.CreateView(context.Background(), []string{nsName, viewName}, schema, viewSQL, nil)) exists, err := db.CheckViewExists(context.Background(), []string{nsName, viewName}) @@ -1106,9 +1108,10 @@ func (s *SqliteCatalogTestSuite) TestDropViewWithInvalidMetadataLocation() { s.Require().NoError(db.CreateNamespace(context.Background(), []string{nsName}, nil)) viewSQL := "SELECT * FROM test_table" - schema := iceberg.NewSchema(1, iceberg.NestedField{ + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, }) + s.Require().NoError(err) s.Require().NoError(db.CreateView(context.Background(), []string{nsName, viewName}, schema, viewSQL, nil)) // Manually update the metadata location to a URL with an unsupported scheme @@ -1116,7 +1119,7 @@ func (s *SqliteCatalogTestSuite) TestDropViewWithInvalidMetadataLocation() { sqldb := s.getDB() defer sqldb.Close() - _, err := sqldb.Exec( + _, err = sqldb.Exec( "UPDATE iceberg_tables SET metadata_location = ? WHERE table_namespace = ? AND table_name = ?", "unsupported-scheme://bucket/metadata.json", nsName, @@ -1142,9 +1145,10 @@ func (s *SqliteCatalogTestSuite) TestCheckViewExists() { s.False(exists) viewSQL := "SELECT * FROM test_table" - schema := iceberg.NewSchema(1, iceberg.NestedField{ + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, }) + s.Require().NoError(err) s.Require().NoError(db.CreateView(context.Background(), []string{nsName, viewName}, schema, viewSQL, nil)) exists, err = db.CheckViewExists(context.Background(), []string{nsName, viewName}) @@ -1162,9 +1166,10 @@ func (s *SqliteCatalogTestSuite) TestListViews() { viewNames := []string{tableName(), tableName(), tableName()} for _, viewName := range viewNames { viewSQL := "SELECT * FROM test_table" - schema := iceberg.NewSchema(1, iceberg.NestedField{ + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, }) + s.Require().NoError(err) s.Require().NoError(db.CreateView(context.Background(), []string{nsName, viewName}, schema, viewSQL, nil)) } @@ -1199,9 +1204,10 @@ func (s *SqliteCatalogTestSuite) TestLoadView() { s.Require().NoError(db.CreateNamespace(context.Background(), []string{nsName}, nil)) viewSQL := "SELECT * FROM test_table" - schema := iceberg.NewSchema(1, iceberg.NestedField{ + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, }) + s.Require().NoError(err) props := iceberg.Properties{ "comment": "Test view", "owner": "test-user", diff --git a/io/azure_integration_test.go b/io/azure_integration_test.go index cfe5ecb6b..217844a47 100644 --- a/io/azure_integration_test.go +++ b/io/azure_integration_test.go @@ -79,7 +79,7 @@ func (s *AzureBlobIOTestSuite) TestAzureBlobWarehouseKey() { tbl, err := c.CreateTable(s.ctx, catalog.ToIdentifier("iceberg-test-azure", "test-table-azure"), - iceberg.NewSchema(0, iceberg.NestedField{ + iceberg.MustNewSchema(0, iceberg.NestedField{ Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, ID: 1, }), catalog.WithLocation(fmt.Sprintf("abfs://%s@%s.dfs.core.windows.net/iceberg/%s", containerName, accountName, path))) s.Require().NoError(err) @@ -111,7 +111,7 @@ func (s *AzureBlobIOTestSuite) TestAzuriteWarehouseConnectionString() { tbl, err := c.CreateTable(s.ctx, catalog.ToIdentifier("iceberg-test-azure", "test-table-azure"), - iceberg.NewSchema(0, iceberg.NestedField{ + iceberg.MustNewSchema(0, iceberg.NestedField{ Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, ID: 1, }), catalog.WithLocation(fmt.Sprintf("abfs://%s@%s.dfs.core.windows.net/iceberg/%s", containerName, accountName, path))) s.Require().NoError(err) diff --git a/io/gcs_integration_test.go b/io/gcs_integration_test.go index f2c0556c3..70f84ef91 100644 --- a/io/gcs_integration_test.go +++ b/io/gcs_integration_test.go @@ -126,7 +126,7 @@ func (s *GCSIOTestSuite) TestGCSWarehouse() { tbl, err := c.CreateTable(s.ctx, catalog.ToIdentifier("iceberg-test-gcs", "test-table-gcs"), - iceberg.NewSchema(0, iceberg.NestedField{ + iceberg.MustNewSchema(0, iceberg.NestedField{ Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, ID: 1, }), catalog.WithLocation(fmt.Sprintf("gs://%s/iceberg/%s", gcsBucketName, path))) s.Require().NoError(err) diff --git a/io/s3_integration_test.go b/io/s3_integration_test.go index 0e51ad029..7e16a74c1 100644 --- a/io/s3_integration_test.go +++ b/io/s3_integration_test.go @@ -58,7 +58,7 @@ func TestMinioWarehouse(t *testing.T) { tbl, err := c.CreateTable(ctx, catalog.ToIdentifier("iceberg-test-2", "test-table-2"), - iceberg.NewSchema(0, iceberg.NestedField{ + iceberg.MustNewSchema(0, iceberg.NestedField{ Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, ID: 1, }), catalog.WithLocation("s3a://warehouse/iceberg/iceberg-test-2/test-table-2")) require.NoError(t, err) @@ -90,7 +90,7 @@ func TestMinioWarehouseNoLocation(t *testing.T) { tbl, err := c.CreateTable(ctx, catalog.ToIdentifier("iceberg-test-2", "test-table-2"), - iceberg.NewSchema(0, iceberg.NestedField{ + iceberg.MustNewSchema(0, iceberg.NestedField{ Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, ID: 1, })) require.NoError(t, err) diff --git a/table/arrow_utils.go b/table/arrow_utils.go index 7ff518646..1bc1a73fc 100644 --- a/table/arrow_utils.go +++ b/table/arrow_utils.go @@ -433,9 +433,17 @@ func ArrowSchemaToIcebergWithFreshIDs(sc *arrow.Schema, downcastNsTimestamp bool } func arrowToSchemaWithoutIDs(sc *arrow.Schema, downcastNsTimestamp bool) (*iceberg.Schema, error) { + // Use a counter to assign unique temporary IDs starting from -1 + // These IDs will be replaced by ApplyNameMapping, but we need unique IDs + // to pass schema validation + nextTempID := -1 withoutIDs, err := VisitArrowSchema(sc, convertToIceberg{ downcastTimestamp: downcastNsTimestamp, - fieldID: func(_ arrow.Field) int { return -1 }, + fieldID: func(_ arrow.Field) int { + id := nextTempID + nextTempID-- + return id + }, }) if err != nil { return nil, err diff --git a/table/internal/utils_test.go b/table/internal/utils_test.go index 246644417..a152c6ed0 100644 --- a/table/internal/utils_test.go +++ b/table/internal/utils_test.go @@ -111,11 +111,12 @@ func (m *mockStatsAgg) MinAsBytes() ([]byte, error) { return nil, func (m *mockStatsAgg) MaxAsBytes() ([]byte, error) { return nil, nil } func TestPartitionValue_LinearTransforms(t *testing.T) { - schema := iceberg.NewSchema(1, iceberg.NestedField{ + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 1, Name: "ts_col", Type: iceberg.TimestampType{}, }) + require.NoError(t, err) cases := []struct { name string @@ -164,11 +165,12 @@ func TestPartitionValue_LinearTransforms(t *testing.T) { } func TestPartitionValue_MismatchPanics(t *testing.T) { - schema := iceberg.NewSchema(1, iceberg.NestedField{ + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 1, Name: "ts_col", Type: iceberg.TimestampType{}, }) + require.NoError(t, err) partitionField := iceberg.PartitionField{ SourceID: 1, FieldID: 100, diff --git a/table/metadata_internal_test.go b/table/metadata_internal_test.go index 5dd270986..31e93e30e 100644 --- a/table/metadata_internal_test.go +++ b/table/metadata_internal_test.go @@ -194,12 +194,13 @@ func TestMetadataV1Parsing(t *testing.T) { assert.Equal(t, int64(1602638573874), meta.LastUpdatedMillis()) assert.Equal(t, 3, meta.LastColumnID()) - expected := iceberg.NewSchema( + expected, err := iceberg.NewSchema( 0, iceberg.NestedField{ID: 1, Name: "x", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 2, Name: "y", Type: iceberg.PrimitiveTypes.Int64, Required: true, Doc: "comment"}, iceberg.NestedField{ID: 3, Name: "z", Type: iceberg.PrimitiveTypes.Int64, Required: true}, ) + require.NoError(t, err) assert.True(t, slices.EqualFunc([]*iceberg.Schema{expected}, meta.Schemas(), func(s1, s2 *iceberg.Schema) bool { return s1.Equals(s2) @@ -308,9 +309,10 @@ func TestMetadataV3Parsing(t *testing.T) { func TestMetadataV3Builder(t *testing.T) { // Test creating v3 metadata with builder - schema := iceberg.NewSchema(0, + schema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int64, Required: true}, ) + require.NoError(t, err) builder, err := NewMetadataBuilder(3) require.NoError(t, err) @@ -715,12 +717,13 @@ func TestV1WriteMetadataToV2(t *testing.T) { } func TestNewMetadataWithExplicitV1Format(t *testing.T) { - schema := iceberg.NewSchemaWithIdentifiers(10, + schema, err := iceberg.NewSchemaWithIdentifiers(10, []int{22}, iceberg.NestedField{ID: 10, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: false}, iceberg.NestedField{ID: 22, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 33, Name: "baz", Type: iceberg.PrimitiveTypes.Bool, Required: false}, ) + require.NoError(t, err) partitionSpec := iceberg.NewPartitionSpecID(10, iceberg.PartitionField{SourceID: 22, FieldID: 1022, Transform: iceberg.IdentityTransform{}, Name: "bar"}) @@ -735,10 +738,12 @@ func TestNewMetadataWithExplicitV1Format(t *testing.T) { actual, err := NewMetadata(schema, &partitionSpec, sortOrder, "s3://some_v1_location/", iceberg.Properties{PropertyFormatVersion: "1"}) require.NoError(t, err) - expectedSchema := iceberg.NewSchemaWithIdentifiers(0, []int{2}, + expectedSchema, err := iceberg.NewSchemaWithIdentifiers(0, []int{2}, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true}, - iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool}) + iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool}, + ) + require.NoError(t, err) expectedSpec := iceberg.NewPartitionSpec( iceberg.PartitionField{SourceID: 2, FieldID: 1000, Transform: iceberg.IdentityTransform{}, Name: "bar"}) @@ -773,12 +778,13 @@ func TestNewMetadataWithExplicitV1Format(t *testing.T) { } func TestNewMetadataV2Format(t *testing.T) { - schema := iceberg.NewSchemaWithIdentifiers(10, + schema, err := iceberg.NewSchemaWithIdentifiers(10, []int{22}, iceberg.NestedField{ID: 10, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: false}, iceberg.NestedField{ID: 22, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 33, Name: "baz", Type: iceberg.PrimitiveTypes.Bool, Required: false}, ) + require.NoError(t, err) partitionSpec := iceberg.NewPartitionSpecID(10, iceberg.PartitionField{SourceID: 22, FieldID: 1022, Transform: iceberg.IdentityTransform{}, Name: "bar"}) @@ -795,10 +801,12 @@ func TestNewMetadataV2Format(t *testing.T) { actual, err := NewMetadataWithUUID(schema, &partitionSpec, sortOrder, "s3://some_v1_location/", nil, tableUUID) require.NoError(t, err) - expectedSchema := iceberg.NewSchemaWithIdentifiers(0, []int{2}, + expectedSchema, err := iceberg.NewSchemaWithIdentifiers(0, []int{2}, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true}, - iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool}) + iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool}, + ) + require.NoError(t, err) expectedSpec := iceberg.NewPartitionSpec( iceberg.PartitionField{SourceID: 2, FieldID: 1000, Transform: iceberg.IdentityTransform{}, Name: "bar"}) @@ -831,8 +839,9 @@ func TestNewMetadataV2Format(t *testing.T) { } func TestMetadataV1Serialize(t *testing.T) { - sc := iceberg.NewSchema(0, + sc, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "int", Type: iceberg.PrimitiveTypes.Int32}) + require.NoError(t, err) toserialize := &metadataV1{ commonMetadata: commonMetadata{ FormatVersion: 1, @@ -923,8 +932,9 @@ func TestMetadataV1Serialize(t *testing.T) { } func TestMetadataV2Serialize(t *testing.T) { - sc := iceberg.NewSchema(0, + sc, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "int", Type: iceberg.PrimitiveTypes.Int32}) + require.NoError(t, err) toserialize := &metadataV2{ LastSeqNum: 1, commonMetadata: commonMetadata{ @@ -1032,9 +1042,10 @@ func TestMetadataBuilderSetDefaultSpecIDLastPartition(t *testing.T) { func TestMetadataBuilderSetLastAddedSchema(t *testing.T) { builder, err := NewMetadataBuilder(2) assert.NoError(t, err) - schema := iceberg.NewSchema(1, + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.StringType{}, Required: true}, ) + require.NoError(t, err) assert.NoError(t, builder.AddSchema(schema)) assert.NoError(t, builder.SetCurrentSchemaID(-1)) @@ -1057,19 +1068,22 @@ func TestMetadataBuilderSchemaIncreasingNumbering(t *testing.T) { builder, err := NewMetadataBuilder(2) assert.NoError(t, err) assert.NoError(t, builder.SetFormatVersion(2)) - schema := iceberg.NewSchema(1, + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.StringType{}, Required: true}, ) + require.NoError(t, err) assert.NoError(t, builder.AddSchema(schema)) - schema = iceberg.NewSchema(3, + schema, err = iceberg.NewSchema(3, iceberg.NestedField{ID: 3, Name: "foo", Type: iceberg.StringType{}, Required: true}, ) + require.NoError(t, err) assert.NoError(t, builder.AddSchema(schema)) - schema = iceberg.NewSchema(2, + schema, err = iceberg.NewSchema(2, iceberg.NestedField{ID: 4, Name: "foo", Type: iceberg.StringType{}, Required: true}, ) + require.NoError(t, err) assert.NoError(t, builder.AddSchema(schema)) assert.Equal(t, 1, builder.schemaList[0].ID) @@ -1080,13 +1094,15 @@ func TestMetadataBuilderSchemaIncreasingNumbering(t *testing.T) { func TestMetadataBuilderReuseSchema(t *testing.T) { builder, err := NewMetadataBuilder(2) assert.NoError(t, err) - schema := iceberg.NewSchema(1, + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.StringType{}, Required: true}, ) + require.NoError(t, err) assert.NoError(t, builder.AddSchema(schema)) - schema2 := iceberg.NewSchema(15, + schema2, err := iceberg.NewSchema(15, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.StringType{}, Required: true}, ) + require.NoError(t, err) assert.NoError(t, builder.AddSchema(schema2)) assert.Equal(t, len(builder.schemaList), 1) assert.Equal(t, *builder.lastAddedSchemaID, 1) @@ -1328,7 +1344,7 @@ func TestTableDataV2NoSnapshots(t *testing.T) { ], "default-sort-order-id": 0 }` - schema := iceberg.NewSchema(1, iceberg.NestedField{ + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ Type: iceberg.FixedTypeOf(1), ID: 1, Name: "struct_name", diff --git a/table/orphan_cleanup_integration_test.go b/table/orphan_cleanup_integration_test.go index bc4d3883e..1df063195 100644 --- a/table/orphan_cleanup_integration_test.go +++ b/table/orphan_cleanup_integration_test.go @@ -53,7 +53,7 @@ const ( OrphanFilePrefix = "orphan_" ) -var tableSchemaSimple = iceberg.NewSchemaWithIdentifiers(1, +var tableSchemaSimple = iceberg.MustNewSchemaWithIdentifiers(1, []int{1}, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int64, Required: true, diff --git a/table/partitioned_fanout_writer_test.go b/table/partitioned_fanout_writer_test.go index b7e0f6d8b..3a3def114 100644 --- a/table/partitioned_fanout_writer_test.go +++ b/table/partitioned_fanout_writer_test.go @@ -308,7 +308,7 @@ func (s *FanoutWriterTestSuite) TestVoidTransform() { } func (s *FanoutWriterTestSuite) TestPartitionedLogicalTypesRequireIntFieldIDCase() { - icebergSchema := iceberg.NewSchemaWithIdentifiers(1, []int{1}, + icebergSchema, err := iceberg.NewSchemaWithIdentifiers(1, []int{1}, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 2, Name: "decimal_col", Type: iceberg.DecimalTypeOf(10, 6), Required: true}, iceberg.NestedField{ID: 3, Name: "time_col", Type: iceberg.PrimitiveTypes.Time, Required: true}, @@ -317,6 +317,7 @@ func (s *FanoutWriterTestSuite) TestPartitionedLogicalTypesRequireIntFieldIDCase iceberg.NestedField{ID: 6, Name: "uuid_col", Type: iceberg.PrimitiveTypes.UUID, Required: true}, iceberg.NestedField{ID: 7, Name: "date_col", Type: iceberg.PrimitiveTypes.Date, Required: true}, ) + s.Require().NoError(err) spec := iceberg.NewPartitionSpec( iceberg.PartitionField{SourceID: 2, FieldID: 4008, Transform: iceberg.IdentityTransform{}, Name: "decimal_col"}, diff --git a/table/partitioned_throughput_bench_test.go b/table/partitioned_throughput_bench_test.go index f87ae4f80..c22249b06 100644 --- a/table/partitioned_throughput_bench_test.go +++ b/table/partitioned_throughput_bench_test.go @@ -406,7 +406,7 @@ func BenchmarkPartitionedWriteThroughput_MapPrimitive(b *testing.B) { mem := memory.DefaultAllocator // Define Iceberg schema with list - icebergSchema := iceberg.NewSchema(0, + icebergSchema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 2, Name: "ts", Type: iceberg.PrimitiveTypes.TimestampTz, Required: true}, iceberg.NestedField{ID: 3, Name: "host", Type: iceberg.PrimitiveTypes.String, Required: true}, @@ -421,6 +421,9 @@ func BenchmarkPartitionedWriteThroughput_MapPrimitive(b *testing.B) { ValueRequired: false, }, Required: false}, ) + if err != nil { + b.Fatalf("failed to create schema: %v", err) + } // Define Arrow schema arrSchema := arrow.NewSchema([]arrow.Field{ @@ -499,12 +502,15 @@ func BenchmarkPartitionedWriteThroughput_PartitionCount(b *testing.B) { ctx := context.Background() // Define Iceberg schema - icebergSchema := iceberg.NewSchema(0, + icebergSchema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 2, Name: "ts", Type: iceberg.PrimitiveTypes.TimestampTz, Required: true}, iceberg.NestedField{ID: 3, Name: "partition_key", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 4, Name: "value", Type: iceberg.PrimitiveTypes.Int64, Required: true}, ) + if err != nil { + b.Fatalf("failed to create schema: %v", err) + } // Define Arrow schema arrSchema := arrow.NewSchema([]arrow.Field{ diff --git a/table/substrait/substrait_test.go b/table/substrait/substrait_test.go index 4c3e71681..0a4497d7d 100644 --- a/table/substrait/substrait_test.go +++ b/table/substrait/substrait_test.go @@ -37,14 +37,14 @@ func TestRefTypes(t *testing.T) { iceberg.NestedField{ID: 5, Name: "e", Type: iceberg.PrimitiveTypes.Float64}, iceberg.NestedField{ID: 6, Name: "f", Type: iceberg.PrimitiveTypes.Date}, iceberg.NestedField{ID: 7, Name: "g", Type: iceberg.PrimitiveTypes.Time}, - ) - require.NoError(t, err) iceberg.NestedField{ID: 8, Name: "h", Type: iceberg.PrimitiveTypes.Timestamp}, iceberg.NestedField{ID: 9, Name: "i", Type: iceberg.DecimalTypeOf(9, 2)}, iceberg.NestedField{ID: 10, Name: "j", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 11, Name: "k", Type: iceberg.PrimitiveTypes.Binary}, iceberg.NestedField{ID: 12, Name: "l", Type: iceberg.PrimitiveTypes.UUID}, - iceberg.NestedField{ID: 13, Name: "m", Type: iceberg.FixedTypeOf(5)}) + iceberg.NestedField{ID: 13, Name: "m", Type: iceberg.FixedTypeOf(5)}, + ) + require.NoError(t, err) tests := []struct { name string diff --git a/table/time_travel_test.go b/table/time_travel_test.go index d20548d1e..26e4ae3bf 100644 --- a/table/time_travel_test.go +++ b/table/time_travel_test.go @@ -267,9 +267,12 @@ func TestSnapshotAsOfEdgeCases(t *testing.T) { // createTestMetadata creates metadata with custom snapshots and logs for testing func createTestMetadata(snapshots []Snapshot, snapshotLog []SnapshotLogEntry) (Metadata, error) { - schema := iceberg.NewSchema(1, + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int64, Required: true}, ) + if err != nil { + return nil, err + } // Create basic metadata meta, err := NewMetadata(schema, iceberg.UnpartitionedSpec, UnsortedSortOrder, diff --git a/table/transaction_test.go b/table/transaction_test.go index a0269cb3f..bb061967a 100644 --- a/table/transaction_test.go +++ b/table/transaction_test.go @@ -70,11 +70,12 @@ func (s *SparkIntegrationTestSuite) SetupTest() { } func (s *SparkIntegrationTestSuite) TestSetProperties() { - icebergSchema := iceberg.NewSchema(0, + icebergSchema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.Bool}, iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Int32}, ) + s.Require().NoError(err) tbl, err := s.cat.CreateTable(s.ctx, catalog.ToIdentifier("default", "go_test_set_properties"), icebergSchema) s.Require().NoError(err) @@ -172,7 +173,7 @@ func (s *SparkIntegrationTestSuite) TestAddFile() { } func (s *SparkIntegrationTestSuite) TestDifferentDataTypes() { - icebergSchema := iceberg.NewSchema(0, + icebergSchema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "bool", Type: iceberg.PrimitiveTypes.Bool}, iceberg.NestedField{ID: 2, Name: "string", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 3, Name: "string_long", Type: iceberg.PrimitiveTypes.String}, @@ -196,6 +197,7 @@ func (s *SparkIntegrationTestSuite) TestDifferentDataTypes() { }, }, ) + s.Require().NoError(err) arrowSchema, err := table.SchemaToArrowSchema(icebergSchema, nil, true, false) s.Require().NoError(err) @@ -320,11 +322,12 @@ func (s *SparkIntegrationTestSuite) TestDifferentDataTypes() { } func (s *SparkIntegrationTestSuite) TestUpdateSpec() { - icebergSchema := iceberg.NewSchema(0, + icebergSchema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.Bool}, iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Int32}, ) + s.Require().NoError(err) partitionSpec := iceberg.NewPartitionSpec( iceberg.PartitionField{SourceID: 2, FieldID: 1000, Transform: iceberg.TruncateTransform{Width: 5}, Name: "bar_truncate"}, diff --git a/table/updates_test.go b/table/updates_test.go index 831abb4b6..369f39dfe 100644 --- a/table/updates_test.go +++ b/table/updates_test.go @@ -168,15 +168,19 @@ func TestUnmarshalUpdates(t *testing.T) { "schema-id": 1 } ]`), - expected: Updates{ - NewAddSchemaUpdate(iceberg.NewSchema(1, + expected: func() Updates { + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.StringType{}, Required: true}, - )), - NewAddPartitionSpecUpdate( - &spec, false), - NewAddSortOrderUpdate(&sortOrder), - NewSetCurrentSchemaUpdate(1), - }, + ) + require.NoError(t, err) + return Updates{ + NewAddSchemaUpdate(schema), + NewAddPartitionSpecUpdate( + &spec, false), + NewAddSortOrderUpdate(&sortOrder), + NewSetCurrentSchemaUpdate(1), + } + }(), expectedErr: false, }, { From 035832697cdec1a74fa0087d33597aa24bc47642 Mon Sep 17 00:00:00 2001 From: dttung2905 Date: Sun, 25 Jan 2026 17:34:30 +0000 Subject: [PATCH 5/8] Fix errors Signed-off-by: dttung2905 --- table/metadata_schema_compatibility_test.go | 5 +++-- table/snapshot_producers_test.go | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/table/metadata_schema_compatibility_test.go b/table/metadata_schema_compatibility_test.go index eb67fe54f..2c19c8eb6 100644 --- a/table/metadata_schema_compatibility_test.go +++ b/table/metadata_schema_compatibility_test.go @@ -36,9 +36,10 @@ func TestNewMetadata_DuplicateValues(t *testing.T) { Type: iceberg.PrimitiveTypes.String, } } - icebergSchema := iceberg.NewSchema(1, fields...) + icebergSchema, err := iceberg.NewSchema(1, fields...) + require.NoError(t, err) - _, err := NewMetadata( + _, err = NewMetadata( icebergSchema, &iceberg.PartitionSpec{}, UnsortedSortOrder, diff --git a/table/snapshot_producers_test.go b/table/snapshot_producers_test.go index 1c6922f14..c5a160540 100644 --- a/table/snapshot_producers_test.go +++ b/table/snapshot_producers_test.go @@ -138,7 +138,7 @@ func newTestDataFile(t *testing.T, spec iceberg.PartitionSpec, path string, part } func simpleSchema() *iceberg.Schema { - return iceberg.NewSchema(0, iceberg.NestedField{ + return iceberg.MustNewSchema(0, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, }) } From 907fb9b5aef71a4e248c56bf49f6d04c34297953 Mon Sep 17 00:00:00 2001 From: dttung2905 Date: Sun, 25 Jan 2026 18:46:14 +0000 Subject: [PATCH 6/8] Fix errors Signed-off-by: dttung2905 --- table/metadata_schema_compatibility_test.go | 16 ++------- table/table_test.go | 1 + table/update_schema.go | 37 ++++++++++++++++++++- table/update_schema_test.go | 14 +++++--- 4 files changed, 49 insertions(+), 19 deletions(-) diff --git a/table/metadata_schema_compatibility_test.go b/table/metadata_schema_compatibility_test.go index 2c19c8eb6..761449264 100644 --- a/table/metadata_schema_compatibility_test.go +++ b/table/metadata_schema_compatibility_test.go @@ -36,18 +36,8 @@ func TestNewMetadata_DuplicateValues(t *testing.T) { Type: iceberg.PrimitiveTypes.String, } } - icebergSchema, err := iceberg.NewSchema(1, fields...) - require.NoError(t, err) - - _, err = NewMetadata( - icebergSchema, - &iceberg.PartitionSpec{}, - UnsortedSortOrder, - "", - iceberg.Properties{}, - ) - - require.Error(t, err) - assert.Contains(t, err.Error(), "invalid schema: error encountered during schema visitor") + _, err := iceberg.NewSchema(1, fields...) + require.Error(t, err, "NewSchema should reject schemas with duplicate field names") + assert.Contains(t, err.Error(), "error encountered during schema visitor") assert.Contains(t, err.Error(), "multiple fields for name foo") } diff --git a/table/table_test.go b/table/table_test.go index 7b265a332..a17c12607 100644 --- a/table/table_test.go +++ b/table/table_test.go @@ -50,6 +50,7 @@ import ( "github.com/pterm/pterm" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + "github.com/uptrace/bun/driver/sqliteshim" ) type TableTestSuite struct { diff --git a/table/update_schema.go b/table/update_schema.go index bf08d3d8d..2481b78e0 100644 --- a/table/update_schema.go +++ b/table/update_schema.go @@ -219,6 +219,36 @@ func (u *UpdateSchema) assignNewColumnID() int { return u.lastColumnID } +// normalizeTypeForSchemaCreation ensures nested types have temporary non-zero IDs +// to avoid schema validation conflicts. These IDs will be replaced by AssignFreshSchemaIDs. +// The field itself uses ID 1, so nested type IDs start from 2 to avoid conflicts. +func normalizeTypeForSchemaCreation(fieldType iceberg.Type) iceberg.Type { + switch t := fieldType.(type) { + case *iceberg.ListType: + if t.ElementID == 0 { + return &iceberg.ListType{ + ElementID: 2, // temporary ID (field uses 1), will be reassigned + Element: t.Element, + ElementRequired: t.ElementRequired, + } + } + return t + case *iceberg.MapType: + if t.KeyID == 0 || t.ValueID == 0 { + return &iceberg.MapType{ + KeyID: 2, // temporary ID (field uses 1), will be reassigned + KeyType: t.KeyType, + ValueID: 3, // temporary ID, will be reassigned + ValueType: t.ValueType, + ValueRequired: t.ValueRequired, + } + } + return t + default: + return t + } +} + func (u *UpdateSchema) findField(name string) (iceberg.NestedField, bool) { if u.caseSensitive { return u.schema.FindFieldByName(name) @@ -317,9 +347,14 @@ func (u *UpdateSchema) addColumn(path []string, fieldType iceberg.Type, doc stri } } + // Normalize nested types to have temporary non-zero IDs to avoid schema validation conflicts. + // These IDs will be replaced by AssignFreshSchemaIDs. + normalizedType := normalizeTypeForSchemaCreation(fieldType) + field := iceberg.NestedField{ + ID: 1, // temporary ID to avoid validation conflict with nested type IDs Name: name, - Type: fieldType, + Type: normalizedType, Required: required, Doc: doc, } diff --git a/table/update_schema_test.go b/table/update_schema_test.go index 0bac381e1..a019c6f28 100644 --- a/table/update_schema_test.go +++ b/table/update_schema_test.go @@ -28,8 +28,7 @@ import ( var originalSchema *iceberg.Schema func init() { - var err error - originalSchema, err = iceberg.NewSchema(1, + originalSchema = iceberg.MustNewSchema(1, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, Doc: ""}, iceberg.NestedField{ID: 2, Name: "name", Type: iceberg.PrimitiveTypes.String, Required: false, Doc: ""}, iceberg.NestedField{ID: 3, Name: "age", Type: iceberg.PrimitiveTypes.Int32, Required: false, Doc: ""}, @@ -52,13 +51,18 @@ func init() { ValueRequired: false, }, Required: false, Doc: ""}, ) +} + +var testMetadata Metadata + +func init() { + var err error + testMetadata, err = NewMetadata(originalSchema, nil, UnsortedSortOrder, "", nil) if err != nil { - panic(fmt.Sprintf("failed to create originalSchema: %v", err)) + panic(fmt.Sprintf("failed to create testMetadata: %v", err)) } } -var testMetadata, _ = NewMetadata(originalSchema, nil, UnsortedSortOrder, "", nil) - func TestAddColumn(t *testing.T) { t.Run("test update schema with add primitive type on top level", func(t *testing.T) { table := New([]string{"id"}, testMetadata, "", nil, nil) From da731418603d7f96e919d808ff4ec32f4c9f43a1 Mon Sep 17 00:00:00 2001 From: dttung2905 Date: Sun, 25 Jan 2026 21:48:23 +0000 Subject: [PATCH 7/8] Fix errors Signed-off-by: dttung2905 --- manifest_test.go | 1 + schema.go | 2 + table/arrow_utils_internal_test.go | 92 ++++++++++++------------ table/evaluators_test.go | 112 ++++++++++++++--------------- visitors_test.go | 52 +++++++------- 5 files changed, 131 insertions(+), 128 deletions(-) diff --git a/manifest_test.go b/manifest_test.go index 523c2462f..0da1479de 100644 --- a/manifest_test.go +++ b/manifest_test.go @@ -961,6 +961,7 @@ func (m *ManifestTestSuite) TestReadManifestIncompleteSchema() { if err != nil { panic(fmt.Sprintf("failed to create schema: %v", err)) } + return sch }(), snapshotID, diff --git a/schema.go b/schema.go index e493604ac..6e033e58d 100644 --- a/schema.go +++ b/schema.go @@ -89,6 +89,7 @@ func MustNewSchema(id int, fields ...NestedField) *Schema { if err != nil { panic(err) } + return s } @@ -100,6 +101,7 @@ func MustNewSchemaWithIdentifiers(id int, identifierIDs []int, fields ...NestedF if err != nil { panic(err) } + return s } diff --git a/table/arrow_utils_internal_test.go b/table/arrow_utils_internal_test.go index a02cbd13a..4208dcb23 100644 --- a/table/arrow_utils_internal_test.go +++ b/table/arrow_utils_internal_test.go @@ -364,61 +364,61 @@ var tableSchemaNested *iceberg.Schema func init() { var err error tableSchemaNested, err = iceberg.NewSchemaWithIdentifiers(1, - []int{1}, - iceberg.NestedField{ - ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: false, - }, - iceberg.NestedField{ - ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true, - }, - iceberg.NestedField{ - ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool, Required: false, - }, - iceberg.NestedField{ - ID: 4, Name: "qux", Required: true, Type: &iceberg.ListType{ - ElementID: 5, Element: iceberg.PrimitiveTypes.String, ElementRequired: true, + []int{1}, + iceberg.NestedField{ + ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: false, }, - }, - iceberg.NestedField{ - ID: 6, Name: "quux", - Type: &iceberg.MapType{ - KeyID: 7, - KeyType: iceberg.PrimitiveTypes.String, - ValueID: 8, - ValueType: &iceberg.MapType{ - KeyID: 9, - KeyType: iceberg.PrimitiveTypes.String, - ValueID: 10, - ValueType: iceberg.PrimitiveTypes.Int32, + iceberg.NestedField{ + ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true, + }, + iceberg.NestedField{ + ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool, Required: false, + }, + iceberg.NestedField{ + ID: 4, Name: "qux", Required: true, Type: &iceberg.ListType{ + ElementID: 5, Element: iceberg.PrimitiveTypes.String, ElementRequired: true, + }, + }, + iceberg.NestedField{ + ID: 6, Name: "quux", + Type: &iceberg.MapType{ + KeyID: 7, + KeyType: iceberg.PrimitiveTypes.String, + ValueID: 8, + ValueType: &iceberg.MapType{ + KeyID: 9, + KeyType: iceberg.PrimitiveTypes.String, + ValueID: 10, + ValueType: iceberg.PrimitiveTypes.Int32, + ValueRequired: true, + }, ValueRequired: true, }, - ValueRequired: true, + Required: true, }, - Required: true, - }, - iceberg.NestedField{ - ID: 11, Name: "location", Type: &iceberg.ListType{ - ElementID: 12, Element: &iceberg.StructType{ - FieldList: []iceberg.NestedField{ - {ID: 13, Name: "latitude", Type: iceberg.PrimitiveTypes.Float32, Required: false}, - {ID: 14, Name: "longitude", Type: iceberg.PrimitiveTypes.Float32, Required: false}, + iceberg.NestedField{ + ID: 11, Name: "location", Type: &iceberg.ListType{ + ElementID: 12, Element: &iceberg.StructType{ + FieldList: []iceberg.NestedField{ + {ID: 13, Name: "latitude", Type: iceberg.PrimitiveTypes.Float32, Required: false}, + {ID: 14, Name: "longitude", Type: iceberg.PrimitiveTypes.Float32, Required: false}, + }, }, + ElementRequired: true, }, - ElementRequired: true, + Required: true, }, - Required: true, - }, - iceberg.NestedField{ - ID: 15, - Name: "person", - Type: &iceberg.StructType{ - FieldList: []iceberg.NestedField{ - {ID: 16, Name: "name", Type: iceberg.PrimitiveTypes.String, Required: false}, - {ID: 17, Name: "age", Type: iceberg.PrimitiveTypes.Int32, Required: true}, + iceberg.NestedField{ + ID: 15, + Name: "person", + Type: &iceberg.StructType{ + FieldList: []iceberg.NestedField{ + {ID: 16, Name: "name", Type: iceberg.PrimitiveTypes.String, Required: false}, + {ID: 17, Name: "age", Type: iceberg.PrimitiveTypes.Int32, Required: true}, + }, }, + Required: false, }, - Required: false, - }, ) if err != nil { panic(fmt.Sprintf("failed to create tableSchemaNested: %v", err)) diff --git a/table/evaluators_test.go b/table/evaluators_test.go index ecf02f485..1ab9bc701 100644 --- a/table/evaluators_test.go +++ b/table/evaluators_test.go @@ -37,63 +37,63 @@ var testSchema *iceberg.Schema func init() { var err error testSchema, err = iceberg.NewSchema(1, - iceberg.NestedField{ - ID: 1, Name: "id", - Type: iceberg.PrimitiveTypes.Int32, Required: true, - }, - iceberg.NestedField{ - ID: 2, Name: "all_nulls_missing_nan", - Type: iceberg.PrimitiveTypes.String, Required: false, - }, - iceberg.NestedField{ - ID: 3, Name: "some_nulls", - Type: iceberg.PrimitiveTypes.String, Required: false, - }, - iceberg.NestedField{ - ID: 4, Name: "no_nulls", - Type: iceberg.PrimitiveTypes.String, Required: false, - }, - iceberg.NestedField{ - ID: 5, Name: "float", - Type: iceberg.PrimitiveTypes.Float32, Required: false, - }, - iceberg.NestedField{ - ID: 6, Name: "all_nulls_double", - Type: iceberg.PrimitiveTypes.Float64, Required: false, - }, - iceberg.NestedField{ - ID: 7, Name: "all_nulls_no_nans", - Type: iceberg.PrimitiveTypes.Float32, Required: false, - }, - iceberg.NestedField{ - ID: 8, Name: "all_nans", - Type: iceberg.PrimitiveTypes.Float64, Required: false, - }, - iceberg.NestedField{ - ID: 9, Name: "both_nan_and_null", - Type: iceberg.PrimitiveTypes.Float32, Required: false, - }, - iceberg.NestedField{ - ID: 10, Name: "no_nan_or_null", - Type: iceberg.PrimitiveTypes.Float64, Required: false, - }, - iceberg.NestedField{ - ID: 11, Name: "all_nulls_missing_nan_float", - Type: iceberg.PrimitiveTypes.Float32, Required: false, - }, - iceberg.NestedField{ - ID: 12, Name: "all_same_value_or_null", - Type: iceberg.PrimitiveTypes.String, Required: false, - }, - iceberg.NestedField{ - ID: 13, Name: "no_nulls_same_value_a", - Type: iceberg.PrimitiveTypes.Binary, Required: false, - }, - ) - if err != nil { - panic(fmt.Sprintf("failed to create testSchema: %v", err)) - } + iceberg.NestedField{ + ID: 1, Name: "id", + Type: iceberg.PrimitiveTypes.Int32, Required: true, + }, + iceberg.NestedField{ + ID: 2, Name: "all_nulls_missing_nan", + Type: iceberg.PrimitiveTypes.String, Required: false, + }, + iceberg.NestedField{ + ID: 3, Name: "some_nulls", + Type: iceberg.PrimitiveTypes.String, Required: false, + }, + iceberg.NestedField{ + ID: 4, Name: "no_nulls", + Type: iceberg.PrimitiveTypes.String, Required: false, + }, + iceberg.NestedField{ + ID: 5, Name: "float", + Type: iceberg.PrimitiveTypes.Float32, Required: false, + }, + iceberg.NestedField{ + ID: 6, Name: "all_nulls_double", + Type: iceberg.PrimitiveTypes.Float64, Required: false, + }, + iceberg.NestedField{ + ID: 7, Name: "all_nulls_no_nans", + Type: iceberg.PrimitiveTypes.Float32, Required: false, + }, + iceberg.NestedField{ + ID: 8, Name: "all_nans", + Type: iceberg.PrimitiveTypes.Float64, Required: false, + }, + iceberg.NestedField{ + ID: 9, Name: "both_nan_and_null", + Type: iceberg.PrimitiveTypes.Float32, Required: false, + }, + iceberg.NestedField{ + ID: 10, Name: "no_nan_or_null", + Type: iceberg.PrimitiveTypes.Float64, Required: false, + }, + iceberg.NestedField{ + ID: 11, Name: "all_nulls_missing_nan_float", + Type: iceberg.PrimitiveTypes.Float32, Required: false, + }, + iceberg.NestedField{ + ID: 12, Name: "all_same_value_or_null", + Type: iceberg.PrimitiveTypes.String, Required: false, + }, + iceberg.NestedField{ + ID: 13, Name: "no_nulls_same_value_a", + Type: iceberg.PrimitiveTypes.Binary, Required: false, + }, + ) + if err != nil { + panic(fmt.Sprintf("failed to create testSchema: %v", err)) } +} func TestManifestEvaluator(t *testing.T) { var ( diff --git a/visitors_test.go b/visitors_test.go index df9c084a2..7bf3859cd 100644 --- a/visitors_test.go +++ b/visitors_test.go @@ -306,38 +306,38 @@ func init() { Type: iceberg.PrimitiveTypes.Int32, }, iceberg.NestedField{ - ID: 16, Name: "s1", - Type: &iceberg.StructType{ + ID: 16, Name: "s1", + Type: &iceberg.StructType{ + FieldList: []iceberg.NestedField{{ + ID: 17, Name: "s2", Required: true, + Type: &iceberg.StructType{ + FieldList: []iceberg.NestedField{{ + ID: 18, Name: "s3", Required: true, + Type: &iceberg.StructType{ + FieldList: []iceberg.NestedField{{ + ID: 19, Name: "s4", Required: true, + Type: &iceberg.StructType{ + FieldList: []iceberg.NestedField{{ + ID: 20, Name: "i", Required: true, + Type: iceberg.PrimitiveTypes.Int32, + }}, + }, + }}, + }, + }}, + }, + }}, + }, + }, + iceberg.NestedField{ID: 21, Name: "s5", Type: &iceberg.StructType{ FieldList: []iceberg.NestedField{{ - ID: 17, Name: "s2", Required: true, - Type: &iceberg.StructType{ + ID: 22, Name: "s6", Required: true, Type: &iceberg.StructType{ FieldList: []iceberg.NestedField{{ - ID: 18, Name: "s3", Required: true, - Type: &iceberg.StructType{ - FieldList: []iceberg.NestedField{{ - ID: 19, Name: "s4", Required: true, - Type: &iceberg.StructType{ - FieldList: []iceberg.NestedField{{ - ID: 20, Name: "i", Required: true, - Type: iceberg.PrimitiveTypes.Int32, - }}, - }, - }}, - }, + ID: 23, Name: "f", Required: true, Type: iceberg.PrimitiveTypes.Float32, }}, }, }}, - }, - }, - iceberg.NestedField{ID: 21, Name: "s5", Type: &iceberg.StructType{ - FieldList: []iceberg.NestedField{{ - ID: 22, Name: "s6", Required: true, Type: &iceberg.StructType{ - FieldList: []iceberg.NestedField{{ - ID: 23, Name: "f", Required: true, Type: iceberg.PrimitiveTypes.Float32, - }}, - }, }}, - }}, iceberg.NestedField{ID: 24, Name: "s", Type: iceberg.PrimitiveTypes.String}, ) if err != nil { From 43e5602d35d4b16ee45a820fd5729ee90d175db4 Mon Sep 17 00:00:00 2001 From: dttung2905 Date: Mon, 26 Jan 2026 18:47:08 +0000 Subject: [PATCH 8/8] Fix errors Signed-off-by: dttung2905 --- schema_test.go | 1 + table/arrow_utils.go | 2 ++ 2 files changed, 3 insertions(+) diff --git a/schema_test.go b/schema_test.go index 65f36e29a..261e6b204 100644 --- a/schema_test.go +++ b/schema_test.go @@ -1013,6 +1013,7 @@ func TestSchemaDuplicateFieldIDReturnsError(t *testing.T) { {"id":1,"name":"foo","type":"string","required":false}, {"id":1,"name":"bar","type":"int","required":true} ]` + return iceberg.NewSchemaFromJsonFields(1, testFieldsStr) }, expectedID: 1, diff --git a/table/arrow_utils.go b/table/arrow_utils.go index 1bc1a73fc..22c8849cf 100644 --- a/table/arrow_utils.go +++ b/table/arrow_utils.go @@ -409,6 +409,7 @@ func ArrowSchemaToIceberg(sc *arrow.Schema, downcastNsTimestamp bool, nameMappin if err != nil { return nil, err } + return schema, nil case nameMapping != nil: schemaWithoutIDs, err := arrowToSchemaWithoutIDs(sc, downcastNsTimestamp) @@ -442,6 +443,7 @@ func arrowToSchemaWithoutIDs(sc *arrow.Schema, downcastNsTimestamp bool) (*icebe fieldID: func(_ arrow.Field) int { id := nextTempID nextTempID-- + return id }, })