diff --git a/catalog/glue/glue_test.go b/catalog/glue/glue_test.go index c05757f5b..0db4c48de 100644 --- a/catalog/glue/glue_test.go +++ b/catalog/glue/glue_test.go @@ -172,7 +172,7 @@ var testNonIcebergGlueTable = types.Table{ }, } -var testSchema = iceberg.NewSchemaWithIdentifiers(0, []int{}, +var testSchema = iceberg.MustNewSchemaWithIdentifiers(0, []int{}, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool}) @@ -1005,10 +1005,11 @@ func TestGlueCreateTableInvalidMetadataRollback(t *testing.T) { func TestGlueCreateTableRollbackOnInvalidMetadata(t *testing.T) { assert := require.New(t) mockGlueSvc := &mockGlueClient{} - schema := iceberg.NewSchemaWithIdentifiers(1, []int{1}, + schema, err := iceberg.NewSchemaWithIdentifiers(1, []int{1}, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.Int64Type{}, Required: true}, iceberg.NestedField{ID: 2, Name: "name", Type: iceberg.StringType{}, Required: true}, ) + assert.NoError(err) mockGlueSvc.On("CreateTable", mock.Anything, mock.Anything, mock.Anything).Return(&glue.CreateTableOutput{}, nil) mockGlueSvc.On("GetTable", mock.Anything, &glue.GetTableInput{ DatabaseName: aws.String("test_database"), @@ -1026,7 +1027,7 @@ func TestGlueCreateTableRollbackOnInvalidMetadata(t *testing.T) { glueSvc: mockGlueSvc, awsCfg: &aws.Config{}, } - _, err := glueCatalog.CreateTable(context.TODO(), + _, err = glueCatalog.CreateTable(context.TODO(), TableIdentifier("test_database", "test_rollback_table"), schema, catalog.WithLocation("s3://non-existent-test-bucket")) @@ -1094,10 +1095,11 @@ func TestAlterTableIntegration(t *testing.T) { metadataLocation := os.Getenv("TEST_TABLE_LOCATION") tbName := fmt.Sprintf("table_%d", time.Now().UnixNano()) tbIdent := TableIdentifier(dbName, tbName) - schema := iceberg.NewSchemaWithIdentifiers(0, []int{}, + schema, err := iceberg.NewSchemaWithIdentifiers(0, []int{}, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32}, iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool}) + assert.NoError(err) awsCfg, err := config.LoadDefaultConfig(context.TODO(), config.WithClientLogMode(aws.LogRequest|aws.LogResponse)) assert.NoError(err) @@ -1169,7 +1171,9 @@ func TestAlterTableIntegration(t *testing.T) { } newFields := append(currentSchema.Fields(), addField) // add column 'new_col' newFields = append(newFields[:1], newFields[2:]...) // drop column 'bar' - updateColumns := table.NewAddSchemaUpdate(iceberg.NewSchemaWithIdentifiers(newSchemaId, currentSchema.IdentifierFieldIDs, newFields...)) + newSchema, err := iceberg.NewSchemaWithIdentifiers(newSchemaId, currentSchema.IdentifierFieldIDs, newFields...) + require.NoError(t, err) + updateColumns := table.NewAddSchemaUpdate(newSchema) setSchema := table.NewSetCurrentSchemaUpdate(newSchemaId) _, _, err = ctlg.CommitTable( @@ -1342,9 +1346,10 @@ func TestCommitTableOptimisticLockingIntegration(t *testing.T) { tbName := fmt.Sprintf("optimistic_lock_test_%d", time.Now().UnixNano()) tbIdent := TableIdentifier(dbName, tbName) - schema := iceberg.NewSchemaWithIdentifiers(0, []int{}, + schema, err := iceberg.NewSchemaWithIdentifiers(0, []int{}, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int64}, iceberg.NestedField{ID: 2, Name: "data", Type: iceberg.PrimitiveTypes.String}) + assert.NoError(err) awsCfg, err := config.LoadDefaultConfig(context.TODO(), config.WithClientLogMode(aws.LogRequest|aws.LogResponse)) assert.NoError(err) diff --git a/catalog/glue/schema_test.go b/catalog/glue/schema_test.go index 04f5dd74f..4ed58adf0 100644 --- a/catalog/glue/schema_test.go +++ b/catalog/glue/schema_test.go @@ -25,6 +25,7 @@ import ( "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/glue/types" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestIcebergTypeToGlueType(t *testing.T) { @@ -349,7 +350,8 @@ func TestSchemaToGlueColumns(t *testing.T) { Doc: "User tags", }, } - schema := iceberg.NewSchema(1, fields...) + schema, err := iceberg.NewSchema(1, fields...) + require.NoError(t, err) columns := schemaToGlueColumns(schema, true) assert.Equal(t, 4, len(columns)) assert.Equal(t, "id", aws.ToString(columns[0].Name)) @@ -365,42 +367,43 @@ func TestSchemaToGlueColumns(t *testing.T) { } func TestSchemasToGlueColumns(t *testing.T) { - schemas := []*iceberg.Schema{ - iceberg.NewSchema(0, - iceberg.NestedField{ - ID: 1, - Name: "id", - Type: iceberg.Int64Type{}, - Required: true, - }, - iceberg.NestedField{ - ID: 2, - Name: "name", - Type: iceberg.StringType{}, - Required: true, - }, - iceberg.NestedField{ - ID: 3, - Name: "address", - Type: iceberg.StringType{}, - Required: false, - }, - ), - iceberg.NewSchema(1, - iceberg.NestedField{ - ID: 1, - Name: "id", - Type: iceberg.Int64Type{}, - Required: true, - }, - iceberg.NestedField{ - ID: 2, - Name: "name", - Type: iceberg.StringType{}, - Required: true, - }, - ), - } + schema0, err := iceberg.NewSchema(0, + iceberg.NestedField{ + ID: 1, + Name: "id", + Type: iceberg.Int64Type{}, + Required: true, + }, + iceberg.NestedField{ + ID: 2, + Name: "name", + Type: iceberg.StringType{}, + Required: true, + }, + iceberg.NestedField{ + ID: 3, + Name: "address", + Type: iceberg.StringType{}, + Required: false, + }, + ) + require.NoError(t, err) + schema1, err := iceberg.NewSchema(1, + iceberg.NestedField{ + ID: 1, + Name: "id", + Type: iceberg.Int64Type{}, + Required: true, + }, + iceberg.NestedField{ + ID: 2, + Name: "name", + Type: iceberg.StringType{}, + Required: true, + }, + ) + require.NoError(t, err) + schemas := []*iceberg.Schema{schema0, schema1} expectedColumns := []types.Column{ { diff --git a/catalog/internal/utils.go b/catalog/internal/utils.go index 6e1f2b393..c9a26b82f 100644 --- a/catalog/internal/utils.go +++ b/catalog/internal/utils.go @@ -201,7 +201,11 @@ func UpdateAndStageTable(ctx context.Context, current *table.Table, ident table. metadataLoc = current.MetadataLocation() } else { var err error - baseMeta, err = table.NewMetadata(iceberg.NewSchema(0), nil, table.UnsortedSortOrder, "", nil) + emptySchema, err := iceberg.NewSchema(0) + if err != nil { + return nil, err + } + baseMeta, err = table.NewMetadata(emptySchema, nil, table.UnsortedSortOrder, "", nil) if err != nil { return nil, err } diff --git a/catalog/rest/rest_integration_test.go b/catalog/rest/rest_integration_test.go index 17fe20263..a99b67dc4 100644 --- a/catalog/rest/rest_integration_test.go +++ b/catalog/rest/rest_integration_test.go @@ -65,7 +65,7 @@ func (s *RestIntegrationSuite) SetupTest() { s.cat = s.loadCatalog(s.ctx) } -var tableSchemaNested = iceberg.NewSchemaWithIdentifiers(1, +var tableSchemaNested = iceberg.MustNewSchemaWithIdentifiers(1, []int{1}, iceberg.NestedField{ ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: true, diff --git a/catalog/rest/rest_test.go b/catalog/rest/rest_test.go index 91ae11c83..98c7b6e76 100644 --- a/catalog/rest/rest_test.go +++ b/catalog/rest/rest_test.go @@ -1150,7 +1150,7 @@ var ( } }`, exampleTableMetadataNoSnapshotV1) - tableSchemaSimple = iceberg.NewSchemaWithIdentifiers(1, []int{2}, + tableSchemaSimple = iceberg.MustNewSchemaWithIdentifiers(1, []int{2}, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.StringType{}, Required: false}, iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool, Required: false}, @@ -2329,12 +2329,13 @@ func (r *RestCatalogSuite) TestCreateView200() { ns := "ns" viewName := "view" identifier := table.Identifier{ns, viewName} - schema := iceberg.NewSchemaWithIdentifiers(0, []int{1}, iceberg.NestedField{ + schema, err := iceberg.NewSchemaWithIdentifiers(0, []int{1}, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, }) + r.Require().NoError(err) config := iceberg.Properties{ "comment": "Example view created via REST catalog", "owner": "admin", @@ -2384,12 +2385,13 @@ func (r *RestCatalogSuite) TestCreateView409() { ns := "ns" viewName := "view" identifier := table.Identifier{ns, viewName} - schema := iceberg.NewSchema(1, iceberg.NestedField{ + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, }) + r.Require().NoError(err) sql := "SELECT * FROM table" reprs := []view.Representation{view.NewRepresentation(sql, "default")} version, err := view.NewVersion(1, 1, reprs, table.Identifier{ns}) @@ -2418,12 +2420,13 @@ func (r *RestCatalogSuite) TestCreateView404() { ns := "ns" viewName := "view" identifier := table.Identifier{ns, viewName} - schema := iceberg.NewSchema(1, iceberg.NestedField{ + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, }) + r.Require().NoError(err) sql := "SELECT * FROM table" reprs := []view.Representation{ view.NewRepresentation(sql, "spark"), diff --git a/catalog/sql/sql_integration_test.go b/catalog/sql/sql_integration_test.go index 5e879e9dc..13d82689c 100644 --- a/catalog/sql/sql_integration_test.go +++ b/catalog/sql/sql_integration_test.go @@ -51,7 +51,7 @@ const ( location = "s3://warehouse/iceberg" ) -var tableSchemaSimple = iceberg.NewSchemaWithIdentifiers(1, []int{2}, +var tableSchemaSimple = iceberg.MustNewSchemaWithIdentifiers(1, []int{2}, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.StringType{}, Required: false}, iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool, Required: false}, @@ -92,7 +92,7 @@ func (s *SQLIntegrationSuite) TearDownTest() { } } -var tableSchemaNestedTest = iceberg.NewSchemaWithIdentifiers(1, +var tableSchemaNestedTest = iceberg.MustNewSchemaWithIdentifiers(1, []int{1}, iceberg.NestedField{ ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: true, diff --git a/catalog/sql/sql_test.go b/catalog/sql/sql_test.go index 6ac0fdb86..fdb9693eb 100644 --- a/catalog/sql/sql_test.go +++ b/catalog/sql/sql_test.go @@ -42,7 +42,7 @@ import ( "github.com/uptrace/bun/driver/sqliteshim" ) -var tableSchemaNested = iceberg.NewSchemaWithIdentifiers(1, +var tableSchemaNested = iceberg.MustNewSchemaWithIdentifiers(1, []int{1}, iceberg.NestedField{ ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: false, @@ -1058,9 +1058,10 @@ func (s *SqliteCatalogTestSuite) TestCreateView() { s.Require().NoError(db.CreateNamespace(context.Background(), []string{nsName}, nil)) viewSQL := "SELECT * FROM test_table" - schema := iceberg.NewSchema(1, iceberg.NestedField{ + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, }) + s.Require().NoError(err) s.Require().NoError(db.CreateView(context.Background(), []string{nsName, viewName}, schema, viewSQL, nil)) exists, err := db.CheckViewExists(context.Background(), []string{nsName, viewName}) @@ -1077,9 +1078,10 @@ func (s *SqliteCatalogTestSuite) TestDropView() { s.Require().NoError(db.CreateNamespace(context.Background(), []string{nsName}, nil)) viewSQL := "SELECT * FROM test_table" - schema := iceberg.NewSchema(1, iceberg.NestedField{ + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, }) + s.Require().NoError(err) s.Require().NoError(db.CreateView(context.Background(), []string{nsName, viewName}, schema, viewSQL, nil)) exists, err := db.CheckViewExists(context.Background(), []string{nsName, viewName}) @@ -1106,9 +1108,10 @@ func (s *SqliteCatalogTestSuite) TestDropViewWithInvalidMetadataLocation() { s.Require().NoError(db.CreateNamespace(context.Background(), []string{nsName}, nil)) viewSQL := "SELECT * FROM test_table" - schema := iceberg.NewSchema(1, iceberg.NestedField{ + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, }) + s.Require().NoError(err) s.Require().NoError(db.CreateView(context.Background(), []string{nsName, viewName}, schema, viewSQL, nil)) // Manually update the metadata location to a URL with an unsupported scheme @@ -1116,7 +1119,7 @@ func (s *SqliteCatalogTestSuite) TestDropViewWithInvalidMetadataLocation() { sqldb := s.getDB() defer sqldb.Close() - _, err := sqldb.Exec( + _, err = sqldb.Exec( "UPDATE iceberg_tables SET metadata_location = ? WHERE table_namespace = ? AND table_name = ?", "unsupported-scheme://bucket/metadata.json", nsName, @@ -1142,9 +1145,10 @@ func (s *SqliteCatalogTestSuite) TestCheckViewExists() { s.False(exists) viewSQL := "SELECT * FROM test_table" - schema := iceberg.NewSchema(1, iceberg.NestedField{ + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, }) + s.Require().NoError(err) s.Require().NoError(db.CreateView(context.Background(), []string{nsName, viewName}, schema, viewSQL, nil)) exists, err = db.CheckViewExists(context.Background(), []string{nsName, viewName}) @@ -1162,9 +1166,10 @@ func (s *SqliteCatalogTestSuite) TestListViews() { viewNames := []string{tableName(), tableName(), tableName()} for _, viewName := range viewNames { viewSQL := "SELECT * FROM test_table" - schema := iceberg.NewSchema(1, iceberg.NestedField{ + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, }) + s.Require().NoError(err) s.Require().NoError(db.CreateView(context.Background(), []string{nsName, viewName}, schema, viewSQL, nil)) } @@ -1199,9 +1204,10 @@ func (s *SqliteCatalogTestSuite) TestLoadView() { s.Require().NoError(db.CreateNamespace(context.Background(), []string{nsName}, nil)) viewSQL := "SELECT * FROM test_table" - schema := iceberg.NewSchema(1, iceberg.NestedField{ + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, }) + s.Require().NoError(err) props := iceberg.Properties{ "comment": "Test view", "owner": "test-user", diff --git a/exprs_test.go b/exprs_test.go index 1bb9c3f09..c39287503 100644 --- a/exprs_test.go +++ b/exprs_test.go @@ -68,18 +68,22 @@ func TestUnaryExpr(t *testing.T) { assert.True(t, n.Equals(exp)) }) - sc := iceberg.NewSchema(1, iceberg.NestedField{ + sc, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 2, Name: "a", Type: iceberg.PrimitiveTypes.Int32, }) - sc2 := iceberg.NewSchema(1, iceberg.NestedField{ + require.NoError(t, err) + sc2, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 2, Name: "a", Type: iceberg.PrimitiveTypes.Float64, }) - sc3 := iceberg.NewSchema(1, iceberg.NestedField{ + require.NoError(t, err) + sc3, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 2, Name: "a", Type: iceberg.PrimitiveTypes.Int32, Required: true, }) - sc4 := iceberg.NewSchema(1, iceberg.NestedField{ + require.NoError(t, err) + sc4, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 2, Name: "a", Type: iceberg.PrimitiveTypes.Float32, Required: true, }) + require.NoError(t, err) t.Run("isnull and notnull", func(t *testing.T) { t.Run("bind", func(t *testing.T) { @@ -202,7 +206,7 @@ func TestRefBindingCaseSensitive(t *testing.T) { } func TestRefTypes(t *testing.T) { - sc := iceberg.NewSchema(1, + sc, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "a", Type: iceberg.PrimitiveTypes.Bool}, iceberg.NestedField{ID: 2, Name: "b", Type: iceberg.PrimitiveTypes.Int32}, iceberg.NestedField{ID: 3, Name: "c", Type: iceberg.PrimitiveTypes.Int64}, @@ -215,7 +219,9 @@ func TestRefTypes(t *testing.T) { iceberg.NestedField{ID: 10, Name: "j", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 11, Name: "k", Type: iceberg.PrimitiveTypes.Binary}, iceberg.NestedField{ID: 12, Name: "l", Type: iceberg.PrimitiveTypes.UUID}, - iceberg.NestedField{ID: 13, Name: "m", Type: iceberg.FixedTypeOf(5)}) + iceberg.NestedField{ID: 13, Name: "m", Type: iceberg.FixedTypeOf(5)}, + ) + require.NoError(t, err) t.Run("bind term", func(t *testing.T) { for i := 0; i < sc.NumFields(); i++ { @@ -635,7 +641,7 @@ func TestBoundReferenceToString(t *testing.T) { } func TestToString(t *testing.T) { - schema := iceberg.NewSchema(1, + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "a", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "b", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 3, Name: "c", Type: iceberg.PrimitiveTypes.String}, @@ -647,7 +653,9 @@ func TestToString(t *testing.T) { iceberg.NestedField{ID: 9, Name: "i", Type: iceberg.PrimitiveTypes.UUID}, iceberg.NestedField{ID: 10, Name: "j", Type: iceberg.PrimitiveTypes.Bool}, iceberg.NestedField{ID: 11, Name: "k", Type: iceberg.PrimitiveTypes.Bool}, - iceberg.NestedField{ID: 12, Name: "l", Type: iceberg.PrimitiveTypes.Binary}) + iceberg.NestedField{ID: 12, Name: "l", Type: iceberg.PrimitiveTypes.Binary}, + ) + require.NoError(t, err) null := iceberg.IsNull(iceberg.Reference("a")) nan := iceberg.IsNaN(iceberg.Reference("g")) @@ -770,10 +778,11 @@ func TestToString(t *testing.T) { } func TestBindAboveBelowIntMax(t *testing.T) { - sc := iceberg.NewSchema(1, + sc, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "a", Type: iceberg.PrimitiveTypes.Int32}, iceberg.NestedField{ID: 2, Name: "b", Type: iceberg.PrimitiveTypes.Float32}, ) + require.NoError(t, err) ref, ref2 := iceberg.Reference("a"), iceberg.Reference("b") above, below := int64(math.MaxInt32)+1, int64(math.MinInt32)-1 diff --git a/io/azure_integration_test.go b/io/azure_integration_test.go index cfe5ecb6b..217844a47 100644 --- a/io/azure_integration_test.go +++ b/io/azure_integration_test.go @@ -79,7 +79,7 @@ func (s *AzureBlobIOTestSuite) TestAzureBlobWarehouseKey() { tbl, err := c.CreateTable(s.ctx, catalog.ToIdentifier("iceberg-test-azure", "test-table-azure"), - iceberg.NewSchema(0, iceberg.NestedField{ + iceberg.MustNewSchema(0, iceberg.NestedField{ Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, ID: 1, }), catalog.WithLocation(fmt.Sprintf("abfs://%s@%s.dfs.core.windows.net/iceberg/%s", containerName, accountName, path))) s.Require().NoError(err) @@ -111,7 +111,7 @@ func (s *AzureBlobIOTestSuite) TestAzuriteWarehouseConnectionString() { tbl, err := c.CreateTable(s.ctx, catalog.ToIdentifier("iceberg-test-azure", "test-table-azure"), - iceberg.NewSchema(0, iceberg.NestedField{ + iceberg.MustNewSchema(0, iceberg.NestedField{ Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, ID: 1, }), catalog.WithLocation(fmt.Sprintf("abfs://%s@%s.dfs.core.windows.net/iceberg/%s", containerName, accountName, path))) s.Require().NoError(err) diff --git a/io/gcs_integration_test.go b/io/gcs_integration_test.go index f2c0556c3..70f84ef91 100644 --- a/io/gcs_integration_test.go +++ b/io/gcs_integration_test.go @@ -126,7 +126,7 @@ func (s *GCSIOTestSuite) TestGCSWarehouse() { tbl, err := c.CreateTable(s.ctx, catalog.ToIdentifier("iceberg-test-gcs", "test-table-gcs"), - iceberg.NewSchema(0, iceberg.NestedField{ + iceberg.MustNewSchema(0, iceberg.NestedField{ Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, ID: 1, }), catalog.WithLocation(fmt.Sprintf("gs://%s/iceberg/%s", gcsBucketName, path))) s.Require().NoError(err) diff --git a/io/s3_integration_test.go b/io/s3_integration_test.go index 0e51ad029..7e16a74c1 100644 --- a/io/s3_integration_test.go +++ b/io/s3_integration_test.go @@ -58,7 +58,7 @@ func TestMinioWarehouse(t *testing.T) { tbl, err := c.CreateTable(ctx, catalog.ToIdentifier("iceberg-test-2", "test-table-2"), - iceberg.NewSchema(0, iceberg.NestedField{ + iceberg.MustNewSchema(0, iceberg.NestedField{ Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, ID: 1, }), catalog.WithLocation("s3a://warehouse/iceberg/iceberg-test-2/test-table-2")) require.NoError(t, err) @@ -90,7 +90,7 @@ func TestMinioWarehouseNoLocation(t *testing.T) { tbl, err := c.CreateTable(ctx, catalog.ToIdentifier("iceberg-test-2", "test-table-2"), - iceberg.NewSchema(0, iceberg.NestedField{ + iceberg.MustNewSchema(0, iceberg.NestedField{ Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, ID: 1, })) require.NoError(t, err) diff --git a/manifest.go b/manifest.go index 5e591a99d..5e4f18659 100644 --- a/manifest.go +++ b/manifest.go @@ -2312,7 +2312,7 @@ type ManifestEntry interface { wrap(status ManifestEntryStatus, snapshotID, seqNum, fileSeqNum *int64, datafile DataFile) ManifestEntry } -var PositionalDeleteSchema = NewSchema(0, +var PositionalDeleteSchema = MustNewSchema(0, NestedField{ID: 2147483546, Type: PrimitiveTypes.String, Name: "file_path", Required: true}, NestedField{ID: 2147483545, Type: PrimitiveTypes.Int32, Name: "pos", Required: true}, ) diff --git a/manifest_test.go b/manifest_test.go index 1192474ee..0da1479de 100644 --- a/manifest_test.go +++ b/manifest_test.go @@ -20,6 +20,7 @@ package iceberg import ( "bytes" "errors" + "fmt" "io" "testing" "time" @@ -444,7 +445,12 @@ var ( }, } - testSchema = NewSchema(0, + testSchema *Schema +) + +func init() { + var err error + testSchema, err = NewSchema(0, NestedField{ID: 1, Name: "VendorID", Type: PrimitiveTypes.Int32, Required: true}, NestedField{ID: 2, Name: "tpep_pickup_datetime", Type: PrimitiveTypes.Timestamp, Required: true}, NestedField{ID: 3, Name: "tpep_dropoff_datetime", Type: PrimitiveTypes.Timestamp, Required: true}, @@ -463,9 +469,12 @@ var ( NestedField{ID: 16, Name: "improvement_surcharge", Type: PrimitiveTypes.Float64, Required: false}, NestedField{ID: 17, Name: "total_amount", Type: PrimitiveTypes.Float64, Required: true}, NestedField{ID: 18, Name: "congestion_surcharge", Type: PrimitiveTypes.Float64, Required: false}, - NestedField{ID: 19, Name: "VendorID", Type: PrimitiveTypes.Int32, Required: false}, + NestedField{ID: 19, Name: "vendor_id_alt", Type: PrimitiveTypes.Int32, Required: false}, ) -) + if err != nil { + panic(fmt.Sprintf("failed to create testSchema: %v", err)) + } +} type ManifestTestSuite struct { suite.Suite @@ -944,10 +953,17 @@ func (m *ManifestTestSuite) TestReadManifestIncompleteSchema() { file, err := WriteManifest( "s3://bucket/namespace/table/metadata/abcd-0123.avro", &buf, 2, partitionSpec, - NewSchema(123, - NestedField{ID: 1, Name: "id", Type: Int64Type{}}, - NestedField{ID: 2, Name: "name", Type: StringType{}}, - ), + func() *Schema { + sch, err := NewSchema(123, + NestedField{ID: 1, Name: "id", Type: Int64Type{}}, + NestedField{ID: 2, Name: "name", Type: StringType{}}, + ) + if err != nil { + panic(fmt.Sprintf("failed to create schema: %v", err)) + } + + return sch + }(), snapshotID, []ManifestEntry{NewManifestEntry( EntryStatusADDED, @@ -1362,7 +1378,8 @@ func (m *ManifestTestSuite) TestManifestEntryBuilder() { } func (m *ManifestTestSuite) TestManifestWriterMeta() { - sch := NewSchema(0, NestedField{ID: 0, Name: "test01", Type: StringType{}}) + sch, err := NewSchema(0, NestedField{ID: 0, Name: "test01", Type: StringType{}}) + m.Require().NoError(err) w, err := NewManifestWriter(2, io.Discard, *UnpartitionedSpec, sch, 1) m.Require().NoError(err) md, err := w.meta() diff --git a/name_mapping.go b/name_mapping.go index e87f6c5e3..dd7c3640c 100644 --- a/name_mapping.go +++ b/name_mapping.go @@ -432,7 +432,7 @@ func ApplyNameMapping(schemaWithoutIDs *Schema, nameMapping NameMapping) (*Schem } return NewSchema(schemaWithoutIDs.ID, - top.Type.(*StructType).FieldList...), nil + top.Type.(*StructType).FieldList...) } type createMapping struct{} diff --git a/partitions_bench_test.go b/partitions_bench_test.go index e170b981d..12f98b8cf 100644 --- a/partitions_bench_test.go +++ b/partitions_bench_test.go @@ -27,13 +27,16 @@ import ( // which uses cached URL-escaped field names and strings.Builder for efficient // string concatenation. func BenchmarkPartitionToPath(b *testing.B) { - schema := iceberg.NewSchema(0, + schema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "str", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "other_str", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 3, Name: "int", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 4, Name: "date", Type: iceberg.PrimitiveTypes.Date, Required: true}, iceberg.NestedField{ID: 5, Name: "ts", Type: iceberg.PrimitiveTypes.Timestamp, Required: true}, ) + if err != nil { + b.Fatalf("failed to create schema: %v", err) + } // Create a partition spec with fields that need URL escaping spec := iceberg.NewPartitionSpecID(1, @@ -88,7 +91,10 @@ func BenchmarkPartitionToPathManyFields(b *testing.B) { recordValues = append(recordValues, "value_"+string(rune('a'+i-1))) } - schema := iceberg.NewSchema(0, fields...) + schema, err := iceberg.NewSchema(0, fields...) + if err != nil { + b.Fatalf("failed to create schema: %v", err) + } spec := iceberg.NewPartitionSpecID(1, partitionFields...) record := partitionRecord(recordValues) @@ -103,11 +109,14 @@ func BenchmarkPartitionToPathManyFields(b *testing.B) { // The first call should be slower (builds the type), subsequent calls should // be faster (uses cache). func BenchmarkPartitionType(b *testing.B) { - schema := iceberg.NewSchema(0, + schema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "str", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "int", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 3, Name: "bool", Type: iceberg.PrimitiveTypes.Bool, Required: false}, ) + if err != nil { + b.Fatalf("failed to create schema: %v", err) + } spec := iceberg.NewPartitionSpecID(1, iceberg.PartitionField{ @@ -136,10 +145,14 @@ func BenchmarkPartitionType(b *testing.B) { func BenchmarkPartitionTypeMultipleSchemas(b *testing.B) { schemas := make([]*iceberg.Schema, 10) for i := range schemas { - schemas[i] = iceberg.NewSchema(i, + var err error + schemas[i], err = iceberg.NewSchema(i, iceberg.NestedField{ID: 1, Name: "str", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "int", Type: iceberg.PrimitiveTypes.Int32, Required: true}, ) + if err != nil { + b.Fatalf("failed to create schema: %v", err) + } } spec := iceberg.NewPartitionSpecID(1, @@ -164,11 +177,17 @@ func BenchmarkPartitionTypeMultipleSchemas(b *testing.B) { // BenchmarkPartitionToPathRepeated benchmarks repeated calls to PartitionToPath // with the same spec to verify that cached escaped names provide performance benefits. func BenchmarkPartitionToPathRepeated(b *testing.B) { - schema := iceberg.NewSchema(0, + schema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "str", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "other_str", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 3, Name: "int", Type: iceberg.PrimitiveTypes.Int32, Required: true}, ) + if err != nil { + b.Fatalf("failed to create schema: %v", err) + } + if err != nil { + b.Fatalf("failed to create schema: %v", err) + } // Use field names that require URL escaping to maximize the benefit of caching spec := iceberg.NewPartitionSpecID(1, diff --git a/partitions_test.go b/partitions_test.go index 5ad963003..aa788557d 100644 --- a/partitions_test.go +++ b/partitions_test.go @@ -163,10 +163,12 @@ func (p partitionRecord) Get(pos int) any { return p[pos] } func (p partitionRecord) Set(pos int, val any) { p[pos] = val } func TestPartitionSpecToPath(t *testing.T) { - schema := iceberg.NewSchema(0, + schema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "str", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "other_str", Type: iceberg.PrimitiveTypes.String}, - iceberg.NestedField{ID: 3, Name: "int", Type: iceberg.PrimitiveTypes.Int32, Required: true}) + iceberg.NestedField{ID: 3, Name: "int", Type: iceberg.PrimitiveTypes.Int32, Required: true}, + ) + require.NoError(t, err) spec := iceberg.NewPartitionSpecID(3, iceberg.PartitionField{ @@ -191,10 +193,12 @@ func TestPartitionSpecToPath(t *testing.T) { } func TestGetPartitionFieldName(t *testing.T) { - schema := iceberg.NewSchema(0, + schema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "str", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "int", Type: iceberg.PrimitiveTypes.Int32, Required: true}, - iceberg.NestedField{ID: 3, Name: "ts", Type: iceberg.PrimitiveTypes.Timestamp}) + iceberg.NestedField{ID: 3, Name: "ts", Type: iceberg.PrimitiveTypes.Timestamp}, + ) + require.NoError(t, err) tests := []struct { field iceberg.PartitionField diff --git a/schema.go b/schema.go index a13b8a325..6e033e58d 100644 --- a/schema.go +++ b/schema.go @@ -60,32 +60,65 @@ func NewSchemaFromJsonFields(id int, jsonFieldsStr string) (*Schema, error) { return nil, fmt.Errorf("failed to parse schema JSON: %w", err) } - return NewSchema(id, fields...), nil + return NewSchema(id, fields...) } // NewSchema constructs a new schema with the provided ID // and list of fields. -func NewSchema(id int, fields ...NestedField) *Schema { +func NewSchema(id int, fields ...NestedField) (*Schema, error) { return NewSchemaWithIdentifiers(id, []int{}, fields...) } // NewSchemaWithIdentifiers constructs a new schema with the provided ID // and fields, along with a slice of field IDs to be listed as identifier // fields. -func NewSchemaWithIdentifiers(id int, identifierIDs []int, fields ...NestedField) *Schema { +func NewSchemaWithIdentifiers(id int, identifierIDs []int, fields ...NestedField) (*Schema, error) { s := &Schema{ID: id, fields: fields, IdentifierFieldIDs: identifierIDs} - s.init() + if err := s.init(); err != nil { + return nil, err + } + + return s, nil +} + +// MustNewSchema is a helper that panics if NewSchema returns an error. +// It is intended for use in variable initializations where the schema +// is known to be valid at compile time. +func MustNewSchema(id int, fields ...NestedField) *Schema { + s, err := NewSchema(id, fields...) + if err != nil { + panic(err) + } + + return s +} + +// MustNewSchemaWithIdentifiers is a helper that panics if NewSchemaWithIdentifiers returns an error. +// It is intended for use in variable initializations where the schema +// is known to be valid at compile time. +func MustNewSchemaWithIdentifiers(id int, identifierIDs []int, fields ...NestedField) *Schema { + s, err := NewSchemaWithIdentifiers(id, identifierIDs, fields...) + if err != nil { + panic(err) + } return s } -func (s *Schema) init() { +func (s *Schema) init() error { s.lazyIDToParent = sync.OnceValues(func() (map[int]int, error) { return IndexParents(s) }) s.lazyNameMapping = sync.OnceValue(func() NameMapping { return createMappingFromSchema(s) }) + + // Validate that the schema does not contain duplicate field IDs. + if _, err := IndexNameByID(s); err != nil { + return err + } + + return nil } func (s *Schema) String() string { @@ -223,13 +256,15 @@ func (s *Schema) UnmarshalJSON(b []byte) error { return err } - s.init() - s.fields = aux.Fields if s.IdentifierFieldIDs == nil { s.IdentifierFieldIDs = []int{} } + if err := s.init(); err != nil { + return err + } + return nil } @@ -775,6 +810,7 @@ func IndexByName(schema *Schema) (map[string]int, error) { shortNameId: make(map[string]int), fieldNames: make([]string, 0), shortFieldNames: make([]string, 0), + idToName: make(map[int]string), } if _, err := Visit(schema, indexer); err != nil { return nil, err @@ -794,6 +830,7 @@ func IndexNameByID(schema *Schema) (map[int]string, error) { shortNameId: make(map[string]int), fieldNames: make([]string, 0), shortFieldNames: make([]string, 0), + idToName: make(map[int]string), } if _, err := Visit(schema, indexer); err != nil { return nil, err @@ -808,12 +845,13 @@ type indexByName struct { combinedIndex map[string]int fieldNames []string shortFieldNames []string + idToName map[int]string } func (i *indexByName) ByID() map[int]string { idToName := make(map[int]string) - for k, v := range i.index { - idToName[v] = k + for name, id := range i.index { + idToName[id] = name } return idToName @@ -838,6 +876,13 @@ func (i *indexByName) addField(name string, fieldID int) { ErrInvalidSchema, fullName, i.index[fullName], fieldID)) } + if existingName, ok := i.idToName[fieldID]; ok && existingName != fullName { + panic(fmt.Errorf("%w: Multiple entries with same key: %d=%s and %d=%s", + ErrInvalidSchema, fieldID, existingName, fieldID, fullName)) + } + + // Track the ID -> name mapping + i.idToName[fieldID] = fullName i.index[fullName] = fieldID if len(i.shortFieldNames) > 0 { shortName := strings.Join(i.shortFieldNames, ".") + "." + name @@ -1314,7 +1359,7 @@ func AssignFreshSchemaIDs(sc *Schema, nextID func() int) (*Schema, error) { } } - return NewSchemaWithIdentifiers(0, newIdentifierIDs, fields...), nil + return NewSchemaWithIdentifiers(0, newIdentifierIDs, fields...) } type SchemaWithPartnerVisitor[T, P any] interface { @@ -1529,7 +1574,7 @@ func SanitizeColumnNames(sc *Schema) (*Schema, error) { } return NewSchemaWithIdentifiers(sc.ID, sc.IdentifierFieldIDs, - result.Type.(*StructType).FieldList...), nil + result.Type.(*StructType).FieldList...) } type sanitizeColumnNameVisitor struct{} diff --git a/schema_test.go b/schema_test.go index c4ea21a6d..261e6b204 100644 --- a/schema_test.go +++ b/schema_test.go @@ -19,6 +19,7 @@ package iceberg_test import ( "encoding/json" + "fmt" "strings" "testing" @@ -29,7 +30,13 @@ import ( ) var ( - tableSchemaNested = iceberg.NewSchemaWithIdentifiers(1, + tableSchemaNested *iceberg.Schema + tableSchemaSimple *iceberg.Schema +) + +func init() { + var err error + tableSchemaNested, err = iceberg.NewSchemaWithIdentifiers(1, []int{1}, iceberg.NestedField{ ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: false, @@ -108,14 +115,20 @@ var ( Required: false, }, ) + if err != nil { + panic(fmt.Sprintf("failed to create tableSchemaNested: %v", err)) + } - tableSchemaSimple = iceberg.NewSchemaWithIdentifiers(1, + tableSchemaSimple, err = iceberg.NewSchemaWithIdentifiers(1, []int{2}, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool}, ) -) + if err != nil { + panic(fmt.Sprintf("failed to create tableSchemaSimple: %v", err)) + } +} func TestSchemaFromJson(t *testing.T) { testFieldsStr := `[ @@ -446,26 +459,32 @@ func TestPruneColumnsString(t *testing.T) { sc, err := iceberg.PruneColumns(tableSchemaNested, map[int]iceberg.Void{1: {}}, false) require.NoError(t, err) - assert.True(t, sc.Equals(iceberg.NewSchemaWithIdentifiers(1, []int{1}, - iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: false}))) + expected, err := iceberg.NewSchemaWithIdentifiers(1, []int{1}, + iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: false}) + require.NoError(t, err) + assert.True(t, sc.Equals(expected)) } func TestPruneColumnsStringFull(t *testing.T) { sc, err := iceberg.PruneColumns(tableSchemaNested, map[int]iceberg.Void{1: {}}, true) require.NoError(t, err) - assert.True(t, sc.Equals(iceberg.NewSchemaWithIdentifiers(1, []int{1}, - iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: false}))) + expected, err := iceberg.NewSchemaWithIdentifiers(1, []int{1}, + iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: false}) + require.NoError(t, err) + assert.True(t, sc.Equals(expected)) } func TestPruneColumnsList(t *testing.T) { sc, err := iceberg.PruneColumns(tableSchemaNested, map[int]iceberg.Void{5: {}}, false) require.NoError(t, err) - assert.True(t, sc.Equals(iceberg.NewSchema(1, + expected, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 4, Name: "qux", Required: true, Type: &iceberg.ListType{ ElementID: 5, Element: iceberg.PrimitiveTypes.String, ElementRequired: true, - }}))) + }}) + require.NoError(t, err) + assert.True(t, sc.Equals(expected)) } func TestPruneColumnsListItself(t *testing.T) { @@ -479,17 +498,19 @@ func TestPruneColumnsListFull(t *testing.T) { sc, err := iceberg.PruneColumns(tableSchemaNested, map[int]iceberg.Void{5: {}}, true) require.NoError(t, err) - assert.True(t, sc.Equals(iceberg.NewSchema(1, + expected, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 4, Name: "qux", Required: true, Type: &iceberg.ListType{ ElementID: 5, Element: iceberg.PrimitiveTypes.String, ElementRequired: true, - }}))) + }}) + require.NoError(t, err) + assert.True(t, sc.Equals(expected)) } func TestPruneColumnsMap(t *testing.T) { sc, err := iceberg.PruneColumns(tableSchemaNested, map[int]iceberg.Void{9: {}}, false) require.NoError(t, err) - assert.True(t, sc.Equals(iceberg.NewSchema(1, + expected, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 6, Name: "quux", @@ -507,7 +528,9 @@ func TestPruneColumnsMap(t *testing.T) { }, ValueRequired: true, }, - }))) + }) + require.NoError(t, err) + assert.True(t, sc.Equals(expected)) } func TestPruneColumnsMapItself(t *testing.T) { @@ -520,7 +543,7 @@ func TestPruneColumnsMapFull(t *testing.T) { sc, err := iceberg.PruneColumns(tableSchemaNested, map[int]iceberg.Void{9: {}}, true) require.NoError(t, err) - assert.True(t, sc.Equals(iceberg.NewSchema(1, + expected, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 6, Name: "quux", @@ -538,14 +561,16 @@ func TestPruneColumnsMapFull(t *testing.T) { }, ValueRequired: true, }, - }))) + }) + require.NoError(t, err) + assert.True(t, sc.Equals(expected)) } func TestPruneColumnsMapKey(t *testing.T) { sc, err := iceberg.PruneColumns(tableSchemaNested, map[int]iceberg.Void{10: {}}, false) require.NoError(t, err) - assert.True(t, sc.Equals(iceberg.NewSchema(1, + expected, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 6, Name: "quux", @@ -563,14 +588,16 @@ func TestPruneColumnsMapKey(t *testing.T) { }, ValueRequired: true, }, - }))) + }) + require.NoError(t, err) + assert.True(t, sc.Equals(expected)) } func TestPruneColumnsStruct(t *testing.T) { sc, err := iceberg.PruneColumns(tableSchemaNested, map[int]iceberg.Void{16: {}}, false) require.NoError(t, err) - assert.True(t, sc.Equals(iceberg.NewSchema(1, + expected, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 15, Name: "person", @@ -580,14 +607,16 @@ func TestPruneColumnsStruct(t *testing.T) { ID: 16, Name: "name", Type: iceberg.PrimitiveTypes.String, Required: false, }}, }, - }))) + }) + require.NoError(t, err) + assert.True(t, sc.Equals(expected)) } func TestPruneColumnsStructFull(t *testing.T) { sc, err := iceberg.PruneColumns(tableSchemaNested, map[int]iceberg.Void{16: {}}, true) require.NoError(t, err) - assert.True(t, sc.Equals(iceberg.NewSchema(1, + expected, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 15, Name: "person", @@ -597,39 +626,47 @@ func TestPruneColumnsStructFull(t *testing.T) { ID: 16, Name: "name", Type: iceberg.PrimitiveTypes.String, Required: false, }}, }, - }))) + }) + require.NoError(t, err) + assert.True(t, sc.Equals(expected)) } func TestPruneColumnsEmptyStruct(t *testing.T) { - schemaEmptyStruct := iceberg.NewSchema(0, iceberg.NestedField{ + schemaEmptyStruct, err := iceberg.NewSchema(0, iceberg.NestedField{ ID: 15, Name: "person", Type: &iceberg.StructType{}, Required: false, }) + require.NoError(t, err) sc, err := iceberg.PruneColumns(schemaEmptyStruct, map[int]iceberg.Void{15: {}}, false) require.NoError(t, err) - assert.True(t, sc.Equals(iceberg.NewSchema(0, + expected, err := iceberg.NewSchema(0, iceberg.NestedField{ ID: 15, Name: "person", Type: &iceberg.StructType{}, Required: false, - }))) + }) + require.NoError(t, err) + assert.True(t, sc.Equals(expected)) } func TestPruneColumnsEmptyStructFull(t *testing.T) { - schemaEmptyStruct := iceberg.NewSchema(0, iceberg.NestedField{ + schemaEmptyStruct, err := iceberg.NewSchema(0, iceberg.NestedField{ ID: 15, Name: "person", Type: &iceberg.StructType{}, Required: false, }) + require.NoError(t, err) sc, err := iceberg.PruneColumns(schemaEmptyStruct, map[int]iceberg.Void{15: {}}, true) require.NoError(t, err) - assert.True(t, sc.Equals(iceberg.NewSchema(0, + expected, err := iceberg.NewSchema(0, iceberg.NestedField{ ID: 15, Name: "person", Type: &iceberg.StructType{}, Required: false, - }))) + }) + require.NoError(t, err) + assert.True(t, sc.Equals(expected)) } func TestPruneColumnsStructInMap(t *testing.T) { - nestedSchema := iceberg.NewSchemaWithIdentifiers(1, []int{1}, + nestedSchema, err := iceberg.NewSchemaWithIdentifiers(1, []int{1}, iceberg.NestedField{ ID: 6, Name: "id_to_person", @@ -647,11 +684,12 @@ func TestPruneColumnsStructInMap(t *testing.T) { ValueRequired: true, }, }) + require.NoError(t, err) sc, err := iceberg.PruneColumns(nestedSchema, map[int]iceberg.Void{11: {}}, false) require.NoError(t, err) - expected := iceberg.NewSchema(1, + expected, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 6, Name: "id_to_person", @@ -668,12 +706,13 @@ func TestPruneColumnsStructInMap(t *testing.T) { ValueRequired: true, }, }) + require.NoError(t, err) assert.Truef(t, sc.Equals(expected), "expected: %s\ngot: %s", expected, sc) } func TestPruneColumnsStructInMapFull(t *testing.T) { - nestedSchema := iceberg.NewSchemaWithIdentifiers(1, []int{1}, + nestedSchema, err := iceberg.NewSchemaWithIdentifiers(1, []int{1}, iceberg.NestedField{ ID: 6, Name: "id_to_person", @@ -691,11 +730,12 @@ func TestPruneColumnsStructInMapFull(t *testing.T) { ValueRequired: true, }, }) + require.NoError(t, err) sc, err := iceberg.PruneColumns(nestedSchema, map[int]iceberg.Void{11: {}}, true) require.NoError(t, err) - expected := iceberg.NewSchema(1, + expected, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 6, Name: "id_to_person", @@ -712,6 +752,7 @@ func TestPruneColumnsStructInMapFull(t *testing.T) { ValueRequired: true, }, }) + require.NoError(t, err) assert.Truef(t, sc.Equals(expected), "expected: %s\ngot: %s", expected, sc) } @@ -909,7 +950,7 @@ func TestHighestFieldID(t *testing.T) { // TestHighestFieldIDListType tests that HighestFieldID correctly computes // the highest field ID in a schema that includes a ListType as the final field. func TestHighestFieldIDListType(t *testing.T) { - tableSchema := iceberg.NewSchemaWithIdentifiers(1, + tableSchema, err := iceberg.NewSchemaWithIdentifiers(1, []int{1}, iceberg.NestedField{ ID: 1, Name: "list_field", Type: &iceberg.ListType{ @@ -920,5 +961,362 @@ func TestHighestFieldIDListType(t *testing.T) { Required: true, }, ) + require.NoError(t, err) assert.Equal(t, 2, tableSchema.HighestFieldID()) } + +// TestSchemaDuplicateFieldIDReturnsError tests that creating a schema with duplicate field IDs +// returns an error, matching the Java implementation behavior. +// This includes the exact scenario from GitHub issue #593. +func TestSchemaDuplicateFieldIDReturnsError(t *testing.T) { + tests := []struct { + name string + setupSchema func() (*iceberg.Schema, error) + expectedID int + expectedNames []string + }{ + { + name: "nested struct fields with duplicate ID", + setupSchema: func() (*iceberg.Schema, error) { + return iceberg.NewSchema(0, + iceberg.NestedField{ + ID: 5, + Name: "struct", + Type: &iceberg.StructType{ + FieldList: []iceberg.NestedField{ + {ID: 6, Name: "inner_op", Type: iceberg.PrimitiveTypes.String}, + {ID: 6, Name: "inner_req", Type: iceberg.PrimitiveTypes.String, Required: true}, + }, + }, + Required: true, + }, + ) + }, + expectedID: 6, + expectedNames: []string{"struct.inner_op", "struct.inner_req"}, + }, + { + name: "top-level fields with duplicate ID", + setupSchema: func() (*iceberg.Schema, error) { + return iceberg.NewSchema(0, + iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, + iceberg.NestedField{ID: 1, Name: "bar", Type: iceberg.PrimitiveTypes.Int32}, + ) + }, + expectedID: 1, + expectedNames: []string{"foo", "bar"}, + }, + { + name: "duplicate ID from JSON deserialization", + setupSchema: func() (*iceberg.Schema, error) { + testFieldsStr := `[ + {"id":1,"name":"foo","type":"string","required":false}, + {"id":1,"name":"bar","type":"int","required":true} + ]` + + return iceberg.NewSchemaFromJsonFields(1, testFieldsStr) + }, + expectedID: 1, + expectedNames: []string{"foo", "bar"}, + }, + { + name: "nested struct with duplicate ID", + setupSchema: func() (*iceberg.Schema, error) { + return iceberg.NewSchema(0, + iceberg.NestedField{ + ID: 5, + Name: "person", + Type: &iceberg.StructType{ + FieldList: []iceberg.NestedField{ + {ID: 10, Name: "name", Type: iceberg.PrimitiveTypes.String}, + {ID: 10, Name: "age", Type: iceberg.PrimitiveTypes.Int32}, + }, + }, + }, + ) + }, + expectedID: 10, + expectedNames: []string{"person.name", "person.age"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := tt.setupSchema() + require.Error(t, err) + assert.ErrorIs(t, err, iceberg.ErrInvalidSchema) + assert.Contains(t, err.Error(), fmt.Sprintf("Multiple entries with same key: %d=", tt.expectedID)) + for _, expectedName := range tt.expectedNames { + assert.Contains(t, err.Error(), expectedName) + } + }) + } +} + +// TestSchemaValidSchemasNoError tests that valid schemas with unique IDs don't return errors +func TestSchemaValidSchemasNoError(t *testing.T) { + tests := []struct { + name string + schema func() (*iceberg.Schema, error) + }{ + { + name: "simple schema with unique IDs", + schema: func() (*iceberg.Schema, error) { + return iceberg.NewSchema(0, + iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, + iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32}, + iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool}, + ) + }, + }, + { + name: "nested struct with unique IDs", + schema: func() (*iceberg.Schema, error) { + return iceberg.NewSchema(0, + iceberg.NestedField{ + ID: 1, + Name: "struct", + Type: &iceberg.StructType{ + FieldList: []iceberg.NestedField{ + {ID: 2, Name: "inner1", Type: iceberg.PrimitiveTypes.String}, + {ID: 3, Name: "inner2", Type: iceberg.PrimitiveTypes.Int32}, + }, + }, + }, + ) + }, + }, + { + name: "list with unique element ID", + schema: func() (*iceberg.Schema, error) { + return iceberg.NewSchema(0, + iceberg.NestedField{ + ID: 1, + Name: "list_field", + Type: &iceberg.ListType{ + ElementID: 2, + Element: iceberg.PrimitiveTypes.String, + ElementRequired: true, + }, + }, + ) + }, + }, + { + name: "map with unique key and value IDs", + schema: func() (*iceberg.Schema, error) { + return iceberg.NewSchema(0, + iceberg.NestedField{ + ID: 1, + Name: "map_field", + Type: &iceberg.MapType{ + KeyID: 2, + KeyType: iceberg.PrimitiveTypes.String, + ValueID: 3, + ValueType: iceberg.PrimitiveTypes.Int32, + ValueRequired: true, + }, + }, + ) + }, + }, + { + name: "complex nested structure with unique IDs", + schema: func() (*iceberg.Schema, error) { + return iceberg.NewSchema(0, + iceberg.NestedField{ + ID: 1, + Name: "outer", + Type: &iceberg.StructType{ + FieldList: []iceberg.NestedField{ + { + ID: 2, + Name: "nested_list", + Type: &iceberg.ListType{ + ElementID: 3, + Element: iceberg.PrimitiveTypes.String, + ElementRequired: true, + }, + }, + { + ID: 4, + Name: "nested_map", + Type: &iceberg.MapType{ + KeyID: 5, + KeyType: iceberg.PrimitiveTypes.String, + ValueID: 6, + ValueType: iceberg.PrimitiveTypes.Int32, + ValueRequired: true, + }, + }, + }, + }, + }, + ) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + schema, err := tt.schema() + require.NoError(t, err) + assert.NotNil(t, schema) + }) + } +} + +// TestSchemaDuplicateIDInListElement tests duplicate IDs in list elements +func TestSchemaDuplicateIDInListElement(t *testing.T) { + _, err := iceberg.NewSchema(0, + iceberg.NestedField{ + ID: 1, + Name: "list1", + Type: &iceberg.ListType{ + ElementID: 5, + Element: iceberg.PrimitiveTypes.String, + ElementRequired: true, + }, + }, + iceberg.NestedField{ + ID: 2, + Name: "list2", + Type: &iceberg.ListType{ + ElementID: 5, // Duplicate ID + Element: iceberg.PrimitiveTypes.Int32, + ElementRequired: true, + }, + }, + ) + require.Error(t, err) + assert.ErrorIs(t, err, iceberg.ErrInvalidSchema) + assert.Contains(t, err.Error(), "Multiple entries with same key: 5=") +} + +// TestSchemaDuplicateIDInMapKey tests duplicate IDs in map keys +func TestSchemaDuplicateIDInMapKey(t *testing.T) { + _, err := iceberg.NewSchema(0, + iceberg.NestedField{ + ID: 1, + Name: "map1", + Type: &iceberg.MapType{ + KeyID: 5, + KeyType: iceberg.PrimitiveTypes.String, + ValueID: 6, + ValueType: iceberg.PrimitiveTypes.Int32, + ValueRequired: true, + }, + }, + iceberg.NestedField{ + ID: 2, + Name: "map2", + Type: &iceberg.MapType{ + KeyID: 5, // Duplicate ID + KeyType: iceberg.PrimitiveTypes.String, + ValueID: 7, + ValueType: iceberg.PrimitiveTypes.Bool, + ValueRequired: true, + }, + }, + ) + require.Error(t, err) + assert.ErrorIs(t, err, iceberg.ErrInvalidSchema) + assert.Contains(t, err.Error(), "Multiple entries with same key: 5=") +} + +// TestSchemaDuplicateIDInMapValue tests duplicate IDs in map values +func TestSchemaDuplicateIDInMapValue(t *testing.T) { + _, err := iceberg.NewSchema(0, + iceberg.NestedField{ + ID: 1, + Name: "map1", + Type: &iceberg.MapType{ + KeyID: 5, + KeyType: iceberg.PrimitiveTypes.String, + ValueID: 6, + ValueType: iceberg.PrimitiveTypes.Int32, + ValueRequired: true, + }, + }, + iceberg.NestedField{ + ID: 2, + Name: "map2", + Type: &iceberg.MapType{ + KeyID: 7, + KeyType: iceberg.PrimitiveTypes.String, + ValueID: 6, // Duplicate ID + ValueType: iceberg.PrimitiveTypes.Bool, + ValueRequired: true, + }, + }, + ) + require.Error(t, err) + assert.ErrorIs(t, err, iceberg.ErrInvalidSchema) + assert.Contains(t, err.Error(), "Multiple entries with same key: 6=") +} + +// TestSchemaDuplicateIDAcrossNestingLevels tests duplicate IDs across different nesting levels +func TestSchemaDuplicateIDAcrossNestingLevels(t *testing.T) { + _, err := iceberg.NewSchema(0, + iceberg.NestedField{ + ID: 1, + Name: "top_level", + Type: iceberg.PrimitiveTypes.String, + }, + iceberg.NestedField{ + ID: 2, + Name: "nested", + Type: &iceberg.StructType{ + FieldList: []iceberg.NestedField{ + {ID: 1, Name: "inner", Type: iceberg.PrimitiveTypes.Int32}, // Duplicate of top-level ID + }, + }, + }, + ) + require.Error(t, err) + assert.ErrorIs(t, err, iceberg.ErrInvalidSchema) + assert.Contains(t, err.Error(), "Multiple entries with same key: 1=") +} + +// TestSchemaErrorPropagation tests that errors are properly propagated from constructors +func TestSchemaErrorPropagation(t *testing.T) { + // Test NewSchema error propagation + _, err := iceberg.NewSchema(0, + iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, + iceberg.NestedField{ID: 1, Name: "bar", Type: iceberg.PrimitiveTypes.Int32}, + ) + require.Error(t, err) + assert.ErrorIs(t, err, iceberg.ErrInvalidSchema) + + // Test NewSchemaWithIdentifiers error propagation + _, err = iceberg.NewSchemaWithIdentifiers(0, []int{1}, + iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, + iceberg.NestedField{ID: 1, Name: "bar", Type: iceberg.PrimitiveTypes.Int32}, + ) + require.Error(t, err) + assert.ErrorIs(t, err, iceberg.ErrInvalidSchema) + + // Test NewSchemaFromJsonFields error propagation + _, err = iceberg.NewSchemaFromJsonFields(1, `[ + {"id":1,"name":"foo","type":"string","required":false}, + {"id":1,"name":"bar","type":"int","required":true} + ]`) + require.Error(t, err) + assert.ErrorIs(t, err, iceberg.ErrInvalidSchema) +} + +// TestSchemaErrorMessageFormat tests that error messages match Java format exactly +func TestSchemaErrorMessageFormat(t *testing.T) { + _, err := iceberg.NewSchema(0, + iceberg.NestedField{ID: 5, Name: "field1", Type: iceberg.PrimitiveTypes.String}, + iceberg.NestedField{ID: 5, Name: "field2", Type: iceberg.PrimitiveTypes.Int32}, + ) + require.Error(t, err) + + // Check for Java-compatible format: "Multiple entries with same key: ID=name1 and ID=name2" + errMsg := err.Error() + assert.Contains(t, errMsg, "Multiple entries with same key") + assert.Contains(t, errMsg, "5=field1") + assert.Contains(t, errMsg, "5=field2") + assert.Contains(t, errMsg, " and ") +} diff --git a/table/arrow_utils.go b/table/arrow_utils.go index 0dffa74a9..22c8849cf 100644 --- a/table/arrow_utils.go +++ b/table/arrow_utils.go @@ -405,7 +405,12 @@ func ArrowSchemaToIceberg(sc *arrow.Schema, downcastNsTimestamp bool, nameMappin return nil, err } - return iceberg.NewSchema(0, out.Type.(*iceberg.StructType).FieldList...), nil + schema, err := iceberg.NewSchema(0, out.Type.(*iceberg.StructType).FieldList...) + if err != nil { + return nil, err + } + + return schema, nil case nameMapping != nil: schemaWithoutIDs, err := arrowToSchemaWithoutIDs(sc, downcastNsTimestamp) if err != nil { @@ -429,15 +434,27 @@ func ArrowSchemaToIcebergWithFreshIDs(sc *arrow.Schema, downcastNsTimestamp bool } func arrowToSchemaWithoutIDs(sc *arrow.Schema, downcastNsTimestamp bool) (*iceberg.Schema, error) { + // Use a counter to assign unique temporary IDs starting from -1 + // These IDs will be replaced by ApplyNameMapping, but we need unique IDs + // to pass schema validation + nextTempID := -1 withoutIDs, err := VisitArrowSchema(sc, convertToIceberg{ downcastTimestamp: downcastNsTimestamp, - fieldID: func(_ arrow.Field) int { return -1 }, + fieldID: func(_ arrow.Field) int { + id := nextTempID + nextTempID-- + + return id + }, }) if err != nil { return nil, err } - schemaWithoutIDs := iceberg.NewSchema(0, withoutIDs.Type.(*iceberg.StructType).FieldList...) + schemaWithoutIDs, err := iceberg.NewSchema(0, withoutIDs.Type.(*iceberg.StructType).FieldList...) + if err != nil { + return nil, err + } return schemaWithoutIDs, nil } @@ -644,7 +661,11 @@ func SchemaToArrowSchema(sc *iceberg.Schema, metadata map[string]string, include // For dealing with nested fields (List, Struct, Map) if includeFieldIDs is true, then // the child fields will contain a metadata key PARQUET:field_id set to the field id. func TypeToArrowType(t iceberg.Type, includeFieldIDs bool, useLargeTypes bool) (arrow.DataType, error) { - top, err := iceberg.Visit(iceberg.NewSchema(0, iceberg.NestedField{Type: t}), + tempSchema, err := iceberg.NewSchema(0, iceberg.NestedField{Type: t}) + if err != nil { + return nil, err + } + top, err := iceberg.Visit(tempSchema, convertToArrow{includeFieldIDs: includeFieldIDs, useLargeTypes: useLargeTypes}) if err != nil { return nil, err diff --git a/table/arrow_utils_internal_test.go b/table/arrow_utils_internal_test.go index d32298225..4208dcb23 100644 --- a/table/arrow_utils_internal_test.go +++ b/table/arrow_utils_internal_test.go @@ -20,6 +20,7 @@ package table import ( "bytes" "cmp" + "fmt" "math" "slices" "testing" @@ -358,63 +359,71 @@ func TestFileMetrics(t *testing.T) { suite.Run(t, new(FileStatsMetricsSuite)) } -var tableSchemaNested = iceberg.NewSchemaWithIdentifiers(1, - []int{1}, - iceberg.NestedField{ - ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: false, - }, - iceberg.NestedField{ - ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true, - }, - iceberg.NestedField{ - ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool, Required: false, - }, - iceberg.NestedField{ - ID: 4, Name: "qux", Required: true, Type: &iceberg.ListType{ - ElementID: 5, Element: iceberg.PrimitiveTypes.String, ElementRequired: true, +var tableSchemaNested *iceberg.Schema + +func init() { + var err error + tableSchemaNested, err = iceberg.NewSchemaWithIdentifiers(1, + []int{1}, + iceberg.NestedField{ + ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: false, }, - }, - iceberg.NestedField{ - ID: 6, Name: "quux", - Type: &iceberg.MapType{ - KeyID: 7, - KeyType: iceberg.PrimitiveTypes.String, - ValueID: 8, - ValueType: &iceberg.MapType{ - KeyID: 9, - KeyType: iceberg.PrimitiveTypes.String, - ValueID: 10, - ValueType: iceberg.PrimitiveTypes.Int32, + iceberg.NestedField{ + ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true, + }, + iceberg.NestedField{ + ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool, Required: false, + }, + iceberg.NestedField{ + ID: 4, Name: "qux", Required: true, Type: &iceberg.ListType{ + ElementID: 5, Element: iceberg.PrimitiveTypes.String, ElementRequired: true, + }, + }, + iceberg.NestedField{ + ID: 6, Name: "quux", + Type: &iceberg.MapType{ + KeyID: 7, + KeyType: iceberg.PrimitiveTypes.String, + ValueID: 8, + ValueType: &iceberg.MapType{ + KeyID: 9, + KeyType: iceberg.PrimitiveTypes.String, + ValueID: 10, + ValueType: iceberg.PrimitiveTypes.Int32, + ValueRequired: true, + }, ValueRequired: true, }, - ValueRequired: true, + Required: true, }, - Required: true, - }, - iceberg.NestedField{ - ID: 11, Name: "location", Type: &iceberg.ListType{ - ElementID: 12, Element: &iceberg.StructType{ - FieldList: []iceberg.NestedField{ - {ID: 13, Name: "latitude", Type: iceberg.PrimitiveTypes.Float32, Required: false}, - {ID: 14, Name: "longitude", Type: iceberg.PrimitiveTypes.Float32, Required: false}, + iceberg.NestedField{ + ID: 11, Name: "location", Type: &iceberg.ListType{ + ElementID: 12, Element: &iceberg.StructType{ + FieldList: []iceberg.NestedField{ + {ID: 13, Name: "latitude", Type: iceberg.PrimitiveTypes.Float32, Required: false}, + {ID: 14, Name: "longitude", Type: iceberg.PrimitiveTypes.Float32, Required: false}, + }, }, + ElementRequired: true, }, - ElementRequired: true, + Required: true, }, - Required: true, - }, - iceberg.NestedField{ - ID: 15, - Name: "person", - Type: &iceberg.StructType{ - FieldList: []iceberg.NestedField{ - {ID: 16, Name: "name", Type: iceberg.PrimitiveTypes.String, Required: false}, - {ID: 17, Name: "age", Type: iceberg.PrimitiveTypes.Int32, Required: true}, + iceberg.NestedField{ + ID: 15, + Name: "person", + Type: &iceberg.StructType{ + FieldList: []iceberg.NestedField{ + {ID: 16, Name: "name", Type: iceberg.PrimitiveTypes.String, Required: false}, + {ID: 17, Name: "age", Type: iceberg.PrimitiveTypes.Int32, Required: true}, + }, }, + Required: false, }, - Required: false, - }, -) + ) + if err != nil { + panic(fmt.Sprintf("failed to create tableSchemaNested: %v", err)) + } +} func TestStatsTypes(t *testing.T) { statsCols, err := iceberg.PreOrderVisit(tableSchemaNested, diff --git a/table/arrow_utils_test.go b/table/arrow_utils_test.go index 0f41b22cb..d9a63f542 100644 --- a/table/arrow_utils_test.go +++ b/table/arrow_utils_test.go @@ -277,7 +277,7 @@ func TestArrowSchemaToIceberg(t *testing.T) { func makeID(v int) *int { return &v } var ( - icebergSchemaNested = iceberg.NewSchema(0, + icebergSchemaNested = iceberg.MustNewSchema(0, iceberg.NestedField{ ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: true, }, @@ -334,7 +334,7 @@ var ( }, ) - icebergSchemaSimple = iceberg.NewSchema(0, + icebergSchemaSimple = iceberg.MustNewSchema(0, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool}, @@ -467,7 +467,7 @@ var ( {Name: "timestamptz_s_0000", Type: &arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: "UTC"}, Nullable: true}, }, nil) - TableSchemaWithAllMicrosecondsTimestampPrec = iceberg.NewSchema(0, + TableSchemaWithAllMicrosecondsTimestampPrec = iceberg.MustNewSchema(0, iceberg.NestedField{ID: 1, Name: "timestamp_s", Type: iceberg.PrimitiveTypes.Timestamp}, iceberg.NestedField{ID: 2, Name: "timestamptz_s", Type: iceberg.PrimitiveTypes.TimestampTz}, iceberg.NestedField{ID: 3, Name: "timestamp_ms", Type: iceberg.PrimitiveTypes.Timestamp}, diff --git a/table/evaluators.go b/table/evaluators.go index fd751632d..c6731eec9 100644 --- a/table/evaluators.go +++ b/table/evaluators.go @@ -38,7 +38,10 @@ const ( // the stats provided in the partitions (UpperBound/LowerBound/ContainsNull/ContainsNaN). func newManifestEvaluator(spec iceberg.PartitionSpec, schema *iceberg.Schema, partitionFilter iceberg.BooleanExpression, caseSensitive bool) (func(iceberg.ManifestFile) (bool, error), error) { partType := spec.PartitionType(schema) - partSchema := iceberg.NewSchema(0, partType.FieldList...) + partSchema, err := iceberg.NewSchema(0, partType.FieldList...) + if err != nil { + return nil, fmt.Errorf("failed to create partition schema: %w", err) + } filter, err := iceberg.RewriteNotExpr(partitionFilter) if err != nil { return nil, err diff --git a/table/evaluators_test.go b/table/evaluators_test.go index cec68b430..196d9b5c9 100644 --- a/table/evaluators_test.go +++ b/table/evaluators_test.go @@ -18,6 +18,7 @@ package table import ( + "fmt" "math" "testing" @@ -31,6 +32,69 @@ const ( IntMinValue, IntMaxValue int32 = 30, 79 ) +var testSchema *iceberg.Schema + +func init() { + var err error + testSchema, err = iceberg.NewSchema(1, + iceberg.NestedField{ + ID: 1, Name: "id", + Type: iceberg.PrimitiveTypes.Int32, Required: true, + }, + iceberg.NestedField{ + ID: 2, Name: "all_nulls_missing_nan", + Type: iceberg.PrimitiveTypes.String, Required: false, + }, + iceberg.NestedField{ + ID: 3, Name: "some_nulls", + Type: iceberg.PrimitiveTypes.String, Required: false, + }, + iceberg.NestedField{ + ID: 4, Name: "no_nulls", + Type: iceberg.PrimitiveTypes.String, Required: false, + }, + iceberg.NestedField{ + ID: 5, Name: "float", + Type: iceberg.PrimitiveTypes.Float32, Required: false, + }, + iceberg.NestedField{ + ID: 6, Name: "all_nulls_double", + Type: iceberg.PrimitiveTypes.Float64, Required: false, + }, + iceberg.NestedField{ + ID: 7, Name: "all_nulls_no_nans", + Type: iceberg.PrimitiveTypes.Float32, Required: false, + }, + iceberg.NestedField{ + ID: 8, Name: "all_nans", + Type: iceberg.PrimitiveTypes.Float64, Required: false, + }, + iceberg.NestedField{ + ID: 9, Name: "both_nan_and_null", + Type: iceberg.PrimitiveTypes.Float32, Required: false, + }, + iceberg.NestedField{ + ID: 10, Name: "no_nan_or_null", + Type: iceberg.PrimitiveTypes.Float64, Required: false, + }, + iceberg.NestedField{ + ID: 11, Name: "all_nulls_missing_nan_float", + Type: iceberg.PrimitiveTypes.Float32, Required: false, + }, + iceberg.NestedField{ + ID: 12, Name: "all_same_value_or_null", + Type: iceberg.PrimitiveTypes.String, Required: false, + }, + iceberg.NestedField{ + ID: 13, Name: "no_nulls_same_value_a", + Type: iceberg.PrimitiveTypes.Binary, Required: false, + }, + ) + if err != nil { + panic(fmt.Sprintf("failed to create testSchema: %v", err)) + } +} + func TestManifestEvaluator(t *testing.T) { var ( IntMin, IntMax = []byte{byte(IntMinValue), 0x00, 0x00, 0x00}, []byte{byte(IntMaxValue), 0x00, 0x00, 0x00} @@ -40,61 +104,6 @@ func TestManifestEvaluator(t *testing.T) { DblMin, _ = iceberg.Float64Literal(0).MarshalBinary() DblMax, _ = iceberg.Float64Literal(20).MarshalBinary() NanTrue, NanFalse = true, false - - testSchema = iceberg.NewSchema(1, - iceberg.NestedField{ - ID: 1, Name: "id", - Type: iceberg.PrimitiveTypes.Int32, Required: true, - }, - iceberg.NestedField{ - ID: 2, Name: "all_nulls_missing_nan", - Type: iceberg.PrimitiveTypes.String, Required: false, - }, - iceberg.NestedField{ - ID: 3, Name: "some_nulls", - Type: iceberg.PrimitiveTypes.String, Required: false, - }, - iceberg.NestedField{ - ID: 4, Name: "no_nulls", - Type: iceberg.PrimitiveTypes.String, Required: false, - }, - iceberg.NestedField{ - ID: 5, Name: "float", - Type: iceberg.PrimitiveTypes.Float32, Required: false, - }, - iceberg.NestedField{ - ID: 6, Name: "all_nulls_double", - Type: iceberg.PrimitiveTypes.Float64, Required: false, - }, - iceberg.NestedField{ - ID: 7, Name: "all_nulls_no_nans", - Type: iceberg.PrimitiveTypes.Float32, Required: false, - }, - iceberg.NestedField{ - ID: 8, Name: "all_nans", - Type: iceberg.PrimitiveTypes.Float64, Required: false, - }, - iceberg.NestedField{ - ID: 9, Name: "both_nan_and_null", - Type: iceberg.PrimitiveTypes.Float32, Required: false, - }, - iceberg.NestedField{ - ID: 10, Name: "no_nan_or_null", - Type: iceberg.PrimitiveTypes.Float64, Required: false, - }, - iceberg.NestedField{ - ID: 11, Name: "all_nulls_missing_nan_float", - Type: iceberg.PrimitiveTypes.Float32, Required: false, - }, - iceberg.NestedField{ - ID: 12, Name: "all_same_value_or_null", - Type: iceberg.PrimitiveTypes.String, Required: false, - }, - iceberg.NestedField{ - ID: 13, Name: "no_nulls_same_value_a", - Type: iceberg.PrimitiveTypes.Binary, Required: false, - }, - ) ) partFields := make([]iceberg.PartitionField, 0, testSchema.NumFields()) @@ -720,12 +729,17 @@ type ProjectionTestSuite struct { } func (*ProjectionTestSuite) schema() *iceberg.Schema { - return iceberg.NewSchema(0, + sch, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int64}, iceberg.NestedField{ID: 2, Name: "data", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 3, Name: "event_date", Type: iceberg.PrimitiveTypes.Date}, iceberg.NestedField{ID: 4, Name: "event_ts", Type: iceberg.PrimitiveTypes.Timestamp}, ) + if err != nil { + panic(fmt.Sprintf("failed to create schema: %v", err)) + } + + return sch } func (*ProjectionTestSuite) emptySpec() iceberg.PartitionSpec { @@ -1119,7 +1133,8 @@ type InclusiveMetricsTestSuite struct { } func (suite *InclusiveMetricsTestSuite) SetupSuite() { - suite.schemaDataFile = iceberg.NewSchema(0, + var err error + suite.schemaDataFile, err = iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 2, Name: "no_stats", Type: iceberg.PrimitiveTypes.Int32, Required: false}, iceberg.NestedField{ID: 3, Name: "required", Type: iceberg.PrimitiveTypes.String, Required: true}, @@ -1135,6 +1150,7 @@ func (suite *InclusiveMetricsTestSuite) SetupSuite() { iceberg.NestedField{ID: 13, Name: "no_nan_stats", Type: iceberg.PrimitiveTypes.Float64}, iceberg.NestedField{ID: 14, Name: "some_empty", Type: iceberg.PrimitiveTypes.String}, ) + suite.Require().NoError(err) var ( IntMin, _ = iceberg.Int32Literal(IntMinValue).MarshalBinary() @@ -1206,13 +1222,14 @@ func (suite *InclusiveMetricsTestSuite) SetupSuite() { }, } - suite.schemaDataFileNan = iceberg.NewSchema(0, + suite.schemaDataFileNan, err = iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "all_nan", Type: iceberg.PrimitiveTypes.Float64, Required: true}, iceberg.NestedField{ID: 2, Name: "max_nan", Type: iceberg.PrimitiveTypes.Float64, Required: true}, iceberg.NestedField{ID: 3, Name: "min_max_nan", Type: iceberg.PrimitiveTypes.Float32}, iceberg.NestedField{ID: 4, Name: "all_nan_null_bounds", Type: iceberg.PrimitiveTypes.Float64, Required: true}, iceberg.NestedField{ID: 5, Name: "some_nan_correct_bounds", Type: iceberg.PrimitiveTypes.Float32}, ) + suite.Require().NoError(err) suite.dataFileNan = &mockDataFile{ path: "file.avro", @@ -1380,8 +1397,9 @@ func (suite *InclusiveMetricsTestSuite) TestMissingColumn() { } func (suite *InclusiveMetricsTestSuite) TestMissingStats() { - noStatsSchema := iceberg.NewSchema(0, + noStatsSchema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 2, Name: "no_stats", Type: iceberg.PrimitiveTypes.Float64}) + suite.Require().NoError(err) noStatsFile := &mockDataFile{ path: "file_1.parquet", @@ -2038,7 +2056,8 @@ type StrictMetricsTestSuite struct { } func (suite *StrictMetricsTestSuite) SetupSuite() { - suite.schemaDataFile = iceberg.NewSchema(0, + var err error + suite.schemaDataFile, err = iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 2, Name: "no_stats", Type: iceberg.PrimitiveTypes.Int32, Required: false}, iceberg.NestedField{ID: 3, Name: "required", Type: iceberg.PrimitiveTypes.String, Required: true}, @@ -2054,6 +2073,7 @@ func (suite *StrictMetricsTestSuite) SetupSuite() { iceberg.NestedField{ID: 13, Name: "nan_and_null_only", Type: iceberg.PrimitiveTypes.Float64}, iceberg.NestedField{ID: 14, Name: "no_nan_stats", Type: iceberg.PrimitiveTypes.Float64}, ) + suite.Require().NoError(err) var ( IntMin, _ = iceberg.Int32Literal(IntMinValue).MarshalBinary() @@ -2115,13 +2135,14 @@ func (suite *StrictMetricsTestSuite) SetupSuite() { }, } - suite.schemaDataFileNan = iceberg.NewSchema(0, + suite.schemaDataFileNan, err = iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "all_nan", Type: iceberg.PrimitiveTypes.Float64, Required: true}, iceberg.NestedField{ID: 2, Name: "max_nan", Type: iceberg.PrimitiveTypes.Float64, Required: true}, iceberg.NestedField{ID: 3, Name: "min_max_nan", Type: iceberg.PrimitiveTypes.Float32}, iceberg.NestedField{ID: 4, Name: "all_nan_null_bounds", Type: iceberg.PrimitiveTypes.Float64, Required: true}, iceberg.NestedField{ID: 5, Name: "some_nan_correct_bounds", Type: iceberg.PrimitiveTypes.Float32}, ) + suite.Require().NoError(err) suite.dataFileNan = &mockDataFile{ path: "file.avro", @@ -2310,8 +2331,9 @@ func (suite *StrictMetricsTestSuite) TestMissingColumn() { } func (suite *StrictMetricsTestSuite) TestMissingStats() { - noStatsSchema := iceberg.NewSchema(0, + noStatsSchema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 2, Name: "no_stats", Type: iceberg.PrimitiveTypes.Float64}) + suite.Require().NoError(err) noStatsFile := &mockDataFile{ path: "file_1.parquet", diff --git a/table/internal/utils_test.go b/table/internal/utils_test.go index 246644417..a152c6ed0 100644 --- a/table/internal/utils_test.go +++ b/table/internal/utils_test.go @@ -111,11 +111,12 @@ func (m *mockStatsAgg) MinAsBytes() ([]byte, error) { return nil, func (m *mockStatsAgg) MaxAsBytes() ([]byte, error) { return nil, nil } func TestPartitionValue_LinearTransforms(t *testing.T) { - schema := iceberg.NewSchema(1, iceberg.NestedField{ + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 1, Name: "ts_col", Type: iceberg.TimestampType{}, }) + require.NoError(t, err) cases := []struct { name string @@ -164,11 +165,12 @@ func TestPartitionValue_LinearTransforms(t *testing.T) { } func TestPartitionValue_MismatchPanics(t *testing.T) { - schema := iceberg.NewSchema(1, iceberg.NestedField{ + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 1, Name: "ts_col", Type: iceberg.TimestampType{}, }) + require.NoError(t, err) partitionField := iceberg.PartitionField{ SourceID: 1, FieldID: 100, diff --git a/table/metadata_builder_internal_test.go b/table/metadata_builder_internal_test.go index 8fe671235..c1eb39fb0 100644 --- a/table/metadata_builder_internal_test.go +++ b/table/metadata_builder_internal_test.go @@ -28,12 +28,17 @@ import ( ) func schema() iceberg.Schema { - return *iceberg.NewSchema( + sch, err := iceberg.NewSchema( 0, iceberg.NestedField{ID: 1, Name: "x", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 2, Name: "y", Type: iceberg.PrimitiveTypes.Int64, Required: true, Doc: "comment"}, iceberg.NestedField{ID: 3, Name: "z", Type: iceberg.PrimitiveTypes.Int64, Required: true}, ) + if err != nil { + panic(fmt.Sprintf("failed to create schema: %v", err)) + } + + return *sch } func sortOrder() SortOrder { @@ -151,7 +156,7 @@ func TestBuildUnpartitionedUnsorted(t *testing.T) { func TestReassignIds(t *testing.T) { TestLocation := "file:///tmp/iceberg-test" - schema := iceberg.NewSchema(10, iceberg.NestedField{ + schema, err := iceberg.NewSchema(10, iceberg.NestedField{ ID: 11, Name: "a", Type: iceberg.PrimitiveTypes.Int64, @@ -208,7 +213,7 @@ func TestReassignIds(t *testing.T) { require.NoError(t, err) require.NotNil(t, meta) - expectedSchema := iceberg.NewSchema(0, iceberg.NestedField{ + expectedSchema, err := iceberg.NewSchema(0, iceberg.NestedField{ ID: 1, Name: "a", Type: iceberg.PrimitiveTypes.Int64, @@ -889,8 +894,9 @@ func TestConstructDefaultMainBranch(t *testing.T) { func TestAddIncompatibleCurrentSchemaFails(t *testing.T) { builder := builderWithoutChanges(2) - addedSchema := iceberg.NewSchema(1) - err := builder.AddSchema(addedSchema) + addedSchema, err := iceberg.NewSchema(1) + require.NoError(t, err) + err = builder.AddSchema(addedSchema) require.NoError(t, err) err = builder.SetCurrentSchemaID(1) require.NoError(t, err) @@ -944,10 +950,11 @@ func TestRemoveSchemas(t *testing.T) { // Java: TestTableMetadata.testUpdateSchema func TestUpdateSchema(t *testing.T) { // Test schema updates and evolution - schema1 := iceberg.NewSchema( + schema1, err := iceberg.NewSchema( 0, iceberg.NestedField{ID: 1, Name: "y", Type: iceberg.PrimitiveTypes.Int64, Required: true, Doc: "comment"}, ) + require.NoError(t, err) meta, err := NewMetadata( schema1, @@ -963,11 +970,12 @@ func TestUpdateSchema(t *testing.T) { require.Equal(t, 1, meta.LastColumnID()) // Update schema by adding a field - schema2 := iceberg.NewSchema( + schema2, err := iceberg.NewSchema( 1, iceberg.NestedField{ID: 1, Name: "y", Type: iceberg.PrimitiveTypes.Int64, Required: true, Doc: "comment"}, iceberg.NestedField{ID: 2, Name: "x", Type: iceberg.PrimitiveTypes.String, Required: true}, ) + require.NoError(t, err) builder, err := MetadataBuilderFromBase(meta, "") require.NoError(t, err) @@ -1053,12 +1061,13 @@ func TestRemoveReservedPropertiesFails(t *testing.T) { func TestIdsAreReassignedForNewMetadata(t *testing.T) { // Create schema with ID 10 (should be reassigned to 0) - tableSchema := iceberg.NewSchema( + tableSchema, err := iceberg.NewSchema( 10, iceberg.NestedField{ID: 1, Name: "x", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 2, Name: "y", Type: iceberg.PrimitiveTypes.Int64, Required: true, Doc: "comment"}, iceberg.NestedField{ID: 3, Name: "z", Type: iceberg.PrimitiveTypes.Int64, Required: true}, ) + require.NoError(t, err) partitionSpec := partitionSpec() sortOrder := sortOrder() @@ -1131,7 +1140,9 @@ func TestNewMetadataChanges(t *testing.T) { } func TestNewMetadataChangesUnpartitionedUnsorted(t *testing.T) { - tableSchema := *iceberg.NewSchema(0) + tableSchemaPtr, err := iceberg.NewSchema(0) + require.NoError(t, err) + tableSchema := *tableSchemaPtr partitionSpec := *iceberg.UnpartitionedSpec sortOrder := UnsortedSortOrder @@ -1180,15 +1191,16 @@ func TestNewMetadataChangesUnpartitionedUnsorted(t *testing.T) { func TestSetCurrentSchemaChangeIsMinusOneIfSchemaWasAddedInThisChange(t *testing.T) { builder := builderWithoutChanges(2) - addedSchema := iceberg.NewSchema( + addedSchema, err := iceberg.NewSchema( 1, iceberg.NestedField{ID: 1, Name: "x", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 2, Name: "y", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 3, Name: "z", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 4, Name: "a", Type: iceberg.PrimitiveTypes.Int64, Required: true}, ) + require.NoError(t, err) - err := builder.AddSchema(addedSchema) + err = builder.AddSchema(addedSchema) require.NoError(t, err) err = builder.SetCurrentSchemaID(1) @@ -1542,7 +1554,7 @@ func TestAddSnapshotV3AcceptsFirstRowIDEqualToNextRowID(t *testing.T) { } func generateTypeSchema(typ iceberg.Type) *iceberg.Schema { - sc := iceberg.NewSchema(0, + sc, err := iceberg.NewSchema(0, iceberg.NestedField{ Type: iceberg.Int64Type{}, ID: 1, Name: "id", Required: true, }, @@ -1589,108 +1601,122 @@ func generateTypeSchema(typ iceberg.Type) *iceberg.Schema { Required: true, }, ) + if err != nil { + panic(fmt.Sprintf("failed to create generateTypeSchema: %v", err)) + } return sc } func TestUnknownTypeValidation(t *testing.T) { t.Run("ValidUnknownTypeInFields", func(t *testing.T) { - validSchema := iceberg.NewSchema(1, + validSchema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.Int64Type{}, Required: true}, iceberg.NestedField{ID: 2, Name: "unknown_field", Type: iceberg.UnknownType{}, Required: false}, ) - err := checkSchemaCompatibility(validSchema, 3) + require.NoError(t, err) + err = checkSchemaCompatibility(validSchema, 3) require.NoError(t, err, "Valid unknown type schema should pass validation") }) t.Run("InvalidRequiredUnknownType", func(t *testing.T) { - invalidSchema := iceberg.NewSchema(1, + invalidSchema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "invalid_unknown", Type: iceberg.UnknownType{}, Required: true}, ) - err := checkSchemaCompatibility(invalidSchema, 3) + require.NoError(t, err) + err = checkSchemaCompatibility(invalidSchema, 3) require.Error(t, err, "should error when unknown type is required") require.ErrorContains(t, err, "must be optional") }) t.Run("InvalidInitialDefaultUnknownType", func(t *testing.T) { - invalidSchema := iceberg.NewSchema(1, + invalidSchema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "invalid_unknown", Type: iceberg.UnknownType{}, Required: false, InitialDefault: "invalid"}, ) - err := checkSchemaCompatibility(invalidSchema, 3) + require.NoError(t, err) + err = checkSchemaCompatibility(invalidSchema, 3) require.Error(t, err, "should error when unknown type has non-null initial-default") require.ErrorContains(t, err, "must have null initial-default") }) t.Run("InvalidWriteDefaultUnknownType", func(t *testing.T) { - invalidSchema := iceberg.NewSchema(1, + invalidSchema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "invalid_unknown", Type: iceberg.UnknownType{}, Required: false, WriteDefault: "invalid"}, ) - err := checkSchemaCompatibility(invalidSchema, 3) + require.NoError(t, err) + err = checkSchemaCompatibility(invalidSchema, 3) require.Error(t, err, "should error when unknown type has non-null write-default") require.ErrorContains(t, err, "must have null write-default") }) t.Run("ValidNestedUnknownType", func(t *testing.T) { - validSchema := iceberg.NewSchema(1, + validSchema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.Int64Type{}, Required: true}, iceberg.NestedField{ID: 2, Name: "nested", Type: &iceberg.StructType{FieldList: []iceberg.NestedField{ {ID: 3, Name: "unknown_field", Type: iceberg.UnknownType{}, Required: false}, }}, Required: false}, ) - err := checkSchemaCompatibility(validSchema, 3) + require.NoError(t, err) + err = checkSchemaCompatibility(validSchema, 3) require.NoError(t, err, "Valid nested unknown type schema should pass validation") }) t.Run("InvalidRequiredNestedUnknownType", func(t *testing.T) { - invalidSchema := iceberg.NewSchema(1, + invalidSchema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 2, Name: "nested", Type: &iceberg.StructType{FieldList: []iceberg.NestedField{ {ID: 3, Name: "unknown_field", Type: iceberg.UnknownType{}, Required: true}, }}, Required: false}, ) - err := checkSchemaCompatibility(invalidSchema, 3) + require.NoError(t, err) + err = checkSchemaCompatibility(invalidSchema, 3) require.Error(t, err, "should error when unknown type is required") require.ErrorContains(t, err, "must be optional") }) t.Run("ValidListWithUnknownType", func(t *testing.T) { - validSchema := iceberg.NewSchema(1, + validSchema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.Int64Type{}, Required: true}, iceberg.NestedField{ID: 2, Name: "list", Type: &iceberg.ListType{ElementID: 3, Element: iceberg.UnknownType{}, ElementRequired: false}, Required: false}, ) - err := checkSchemaCompatibility(validSchema, 3) + require.NoError(t, err) + err = checkSchemaCompatibility(validSchema, 3) require.NoError(t, err, "Valid list with unknown type schema should pass validation") }) t.Run("InvalidRequiredListElement", func(t *testing.T) { - invalidSchema := iceberg.NewSchema(1, + invalidSchema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.Int64Type{}, Required: true}, iceberg.NestedField{ID: 2, Name: "list", Type: &iceberg.ListType{ElementID: 3, Element: iceberg.UnknownType{}, ElementRequired: true}, Required: false}, ) - err := checkSchemaCompatibility(invalidSchema, 3) + require.NoError(t, err) + err = checkSchemaCompatibility(invalidSchema, 3) require.Error(t, err, "should error when unknown type is required") require.ErrorContains(t, err, "must be optional") }) t.Run("ValidMapWithUnknownType", func(t *testing.T) { - validSchema := iceberg.NewSchema(1, + validSchema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.Int64Type{}, Required: true}, iceberg.NestedField{ID: 2, Name: "map", Type: &iceberg.MapType{KeyID: 3, KeyType: iceberg.StringType{}, ValueID: 4, ValueType: iceberg.UnknownType{}, ValueRequired: false}, Required: false}, ) - err := checkSchemaCompatibility(validSchema, 3) + require.NoError(t, err) + err = checkSchemaCompatibility(validSchema, 3) require.NoError(t, err, "Valid map with unknown type schema should pass validation") }) t.Run("InvalidUnknownMapKey", func(t *testing.T) { - invalidSchema := iceberg.NewSchema(1, + invalidSchema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 2, Name: "invalid_map", Type: &iceberg.MapType{KeyID: 3, KeyType: iceberg.UnknownType{}, ValueID: 4, ValueType: iceberg.StringType{}, ValueRequired: false}, Required: true}, ) - err := checkSchemaCompatibility(invalidSchema, 3) + require.NoError(t, err) + err = checkSchemaCompatibility(invalidSchema, 3) require.Error(t, err, "should error when unknown type is used as map key") require.ErrorContains(t, err, "must be optional") }) t.Run("InvalidUnknownMapValue", func(t *testing.T) { - invalidSchema := iceberg.NewSchema(1, + invalidSchema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 2, Name: "invalid_map", Type: &iceberg.MapType{KeyID: 3, KeyType: iceberg.StringType{}, ValueID: 4, ValueType: iceberg.UnknownType{}, ValueRequired: true}, Required: false}, ) - err := checkSchemaCompatibility(invalidSchema, 3) + require.NoError(t, err) + err = checkSchemaCompatibility(invalidSchema, 3) require.Error(t, err, "should error when unknown type is used as map value") require.ErrorContains(t, err, "must be optional") }) diff --git a/table/metadata_internal_test.go b/table/metadata_internal_test.go index 5dd270986..31e93e30e 100644 --- a/table/metadata_internal_test.go +++ b/table/metadata_internal_test.go @@ -194,12 +194,13 @@ func TestMetadataV1Parsing(t *testing.T) { assert.Equal(t, int64(1602638573874), meta.LastUpdatedMillis()) assert.Equal(t, 3, meta.LastColumnID()) - expected := iceberg.NewSchema( + expected, err := iceberg.NewSchema( 0, iceberg.NestedField{ID: 1, Name: "x", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 2, Name: "y", Type: iceberg.PrimitiveTypes.Int64, Required: true, Doc: "comment"}, iceberg.NestedField{ID: 3, Name: "z", Type: iceberg.PrimitiveTypes.Int64, Required: true}, ) + require.NoError(t, err) assert.True(t, slices.EqualFunc([]*iceberg.Schema{expected}, meta.Schemas(), func(s1, s2 *iceberg.Schema) bool { return s1.Equals(s2) @@ -308,9 +309,10 @@ func TestMetadataV3Parsing(t *testing.T) { func TestMetadataV3Builder(t *testing.T) { // Test creating v3 metadata with builder - schema := iceberg.NewSchema(0, + schema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int64, Required: true}, ) + require.NoError(t, err) builder, err := NewMetadataBuilder(3) require.NoError(t, err) @@ -715,12 +717,13 @@ func TestV1WriteMetadataToV2(t *testing.T) { } func TestNewMetadataWithExplicitV1Format(t *testing.T) { - schema := iceberg.NewSchemaWithIdentifiers(10, + schema, err := iceberg.NewSchemaWithIdentifiers(10, []int{22}, iceberg.NestedField{ID: 10, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: false}, iceberg.NestedField{ID: 22, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 33, Name: "baz", Type: iceberg.PrimitiveTypes.Bool, Required: false}, ) + require.NoError(t, err) partitionSpec := iceberg.NewPartitionSpecID(10, iceberg.PartitionField{SourceID: 22, FieldID: 1022, Transform: iceberg.IdentityTransform{}, Name: "bar"}) @@ -735,10 +738,12 @@ func TestNewMetadataWithExplicitV1Format(t *testing.T) { actual, err := NewMetadata(schema, &partitionSpec, sortOrder, "s3://some_v1_location/", iceberg.Properties{PropertyFormatVersion: "1"}) require.NoError(t, err) - expectedSchema := iceberg.NewSchemaWithIdentifiers(0, []int{2}, + expectedSchema, err := iceberg.NewSchemaWithIdentifiers(0, []int{2}, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true}, - iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool}) + iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool}, + ) + require.NoError(t, err) expectedSpec := iceberg.NewPartitionSpec( iceberg.PartitionField{SourceID: 2, FieldID: 1000, Transform: iceberg.IdentityTransform{}, Name: "bar"}) @@ -773,12 +778,13 @@ func TestNewMetadataWithExplicitV1Format(t *testing.T) { } func TestNewMetadataV2Format(t *testing.T) { - schema := iceberg.NewSchemaWithIdentifiers(10, + schema, err := iceberg.NewSchemaWithIdentifiers(10, []int{22}, iceberg.NestedField{ID: 10, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: false}, iceberg.NestedField{ID: 22, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 33, Name: "baz", Type: iceberg.PrimitiveTypes.Bool, Required: false}, ) + require.NoError(t, err) partitionSpec := iceberg.NewPartitionSpecID(10, iceberg.PartitionField{SourceID: 22, FieldID: 1022, Transform: iceberg.IdentityTransform{}, Name: "bar"}) @@ -795,10 +801,12 @@ func TestNewMetadataV2Format(t *testing.T) { actual, err := NewMetadataWithUUID(schema, &partitionSpec, sortOrder, "s3://some_v1_location/", nil, tableUUID) require.NoError(t, err) - expectedSchema := iceberg.NewSchemaWithIdentifiers(0, []int{2}, + expectedSchema, err := iceberg.NewSchemaWithIdentifiers(0, []int{2}, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true}, - iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool}) + iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool}, + ) + require.NoError(t, err) expectedSpec := iceberg.NewPartitionSpec( iceberg.PartitionField{SourceID: 2, FieldID: 1000, Transform: iceberg.IdentityTransform{}, Name: "bar"}) @@ -831,8 +839,9 @@ func TestNewMetadataV2Format(t *testing.T) { } func TestMetadataV1Serialize(t *testing.T) { - sc := iceberg.NewSchema(0, + sc, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "int", Type: iceberg.PrimitiveTypes.Int32}) + require.NoError(t, err) toserialize := &metadataV1{ commonMetadata: commonMetadata{ FormatVersion: 1, @@ -923,8 +932,9 @@ func TestMetadataV1Serialize(t *testing.T) { } func TestMetadataV2Serialize(t *testing.T) { - sc := iceberg.NewSchema(0, + sc, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "int", Type: iceberg.PrimitiveTypes.Int32}) + require.NoError(t, err) toserialize := &metadataV2{ LastSeqNum: 1, commonMetadata: commonMetadata{ @@ -1032,9 +1042,10 @@ func TestMetadataBuilderSetDefaultSpecIDLastPartition(t *testing.T) { func TestMetadataBuilderSetLastAddedSchema(t *testing.T) { builder, err := NewMetadataBuilder(2) assert.NoError(t, err) - schema := iceberg.NewSchema(1, + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.StringType{}, Required: true}, ) + require.NoError(t, err) assert.NoError(t, builder.AddSchema(schema)) assert.NoError(t, builder.SetCurrentSchemaID(-1)) @@ -1057,19 +1068,22 @@ func TestMetadataBuilderSchemaIncreasingNumbering(t *testing.T) { builder, err := NewMetadataBuilder(2) assert.NoError(t, err) assert.NoError(t, builder.SetFormatVersion(2)) - schema := iceberg.NewSchema(1, + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.StringType{}, Required: true}, ) + require.NoError(t, err) assert.NoError(t, builder.AddSchema(schema)) - schema = iceberg.NewSchema(3, + schema, err = iceberg.NewSchema(3, iceberg.NestedField{ID: 3, Name: "foo", Type: iceberg.StringType{}, Required: true}, ) + require.NoError(t, err) assert.NoError(t, builder.AddSchema(schema)) - schema = iceberg.NewSchema(2, + schema, err = iceberg.NewSchema(2, iceberg.NestedField{ID: 4, Name: "foo", Type: iceberg.StringType{}, Required: true}, ) + require.NoError(t, err) assert.NoError(t, builder.AddSchema(schema)) assert.Equal(t, 1, builder.schemaList[0].ID) @@ -1080,13 +1094,15 @@ func TestMetadataBuilderSchemaIncreasingNumbering(t *testing.T) { func TestMetadataBuilderReuseSchema(t *testing.T) { builder, err := NewMetadataBuilder(2) assert.NoError(t, err) - schema := iceberg.NewSchema(1, + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.StringType{}, Required: true}, ) + require.NoError(t, err) assert.NoError(t, builder.AddSchema(schema)) - schema2 := iceberg.NewSchema(15, + schema2, err := iceberg.NewSchema(15, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.StringType{}, Required: true}, ) + require.NoError(t, err) assert.NoError(t, builder.AddSchema(schema2)) assert.Equal(t, len(builder.schemaList), 1) assert.Equal(t, *builder.lastAddedSchemaID, 1) @@ -1328,7 +1344,7 @@ func TestTableDataV2NoSnapshots(t *testing.T) { ], "default-sort-order-id": 0 }` - schema := iceberg.NewSchema(1, iceberg.NestedField{ + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ Type: iceberg.FixedTypeOf(1), ID: 1, Name: "struct_name", diff --git a/table/metadata_schema_compatibility_test.go b/table/metadata_schema_compatibility_test.go index eb67fe54f..761449264 100644 --- a/table/metadata_schema_compatibility_test.go +++ b/table/metadata_schema_compatibility_test.go @@ -36,17 +36,8 @@ func TestNewMetadata_DuplicateValues(t *testing.T) { Type: iceberg.PrimitiveTypes.String, } } - icebergSchema := iceberg.NewSchema(1, fields...) - - _, err := NewMetadata( - icebergSchema, - &iceberg.PartitionSpec{}, - UnsortedSortOrder, - "", - iceberg.Properties{}, - ) - - require.Error(t, err) - assert.Contains(t, err.Error(), "invalid schema: error encountered during schema visitor") + _, err := iceberg.NewSchema(1, fields...) + require.Error(t, err, "NewSchema should reject schemas with duplicate field names") + assert.Contains(t, err.Error(), "error encountered during schema visitor") assert.Contains(t, err.Error(), "multiple fields for name foo") } diff --git a/table/orphan_cleanup_integration_test.go b/table/orphan_cleanup_integration_test.go index bc4d3883e..1df063195 100644 --- a/table/orphan_cleanup_integration_test.go +++ b/table/orphan_cleanup_integration_test.go @@ -53,7 +53,7 @@ const ( OrphanFilePrefix = "orphan_" ) -var tableSchemaSimple = iceberg.NewSchemaWithIdentifiers(1, +var tableSchemaSimple = iceberg.MustNewSchemaWithIdentifiers(1, []int{1}, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int64, Required: true, diff --git a/table/partitioned_fanout_writer_test.go b/table/partitioned_fanout_writer_test.go index b7e0f6d8b..3a3def114 100644 --- a/table/partitioned_fanout_writer_test.go +++ b/table/partitioned_fanout_writer_test.go @@ -308,7 +308,7 @@ func (s *FanoutWriterTestSuite) TestVoidTransform() { } func (s *FanoutWriterTestSuite) TestPartitionedLogicalTypesRequireIntFieldIDCase() { - icebergSchema := iceberg.NewSchemaWithIdentifiers(1, []int{1}, + icebergSchema, err := iceberg.NewSchemaWithIdentifiers(1, []int{1}, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 2, Name: "decimal_col", Type: iceberg.DecimalTypeOf(10, 6), Required: true}, iceberg.NestedField{ID: 3, Name: "time_col", Type: iceberg.PrimitiveTypes.Time, Required: true}, @@ -317,6 +317,7 @@ func (s *FanoutWriterTestSuite) TestPartitionedLogicalTypesRequireIntFieldIDCase iceberg.NestedField{ID: 6, Name: "uuid_col", Type: iceberg.PrimitiveTypes.UUID, Required: true}, iceberg.NestedField{ID: 7, Name: "date_col", Type: iceberg.PrimitiveTypes.Date, Required: true}, ) + s.Require().NoError(err) spec := iceberg.NewPartitionSpec( iceberg.PartitionField{SourceID: 2, FieldID: 4008, Transform: iceberg.IdentityTransform{}, Name: "decimal_col"}, diff --git a/table/partitioned_throughput_bench_test.go b/table/partitioned_throughput_bench_test.go index 2ba1a98d4..c22249b06 100644 --- a/table/partitioned_throughput_bench_test.go +++ b/table/partitioned_throughput_bench_test.go @@ -126,7 +126,7 @@ func BenchmarkPartitionedWriteThroughput_Simple(b *testing.B) { mem := memory.DefaultAllocator // Define Iceberg schema with only primitive types - icebergSchema := iceberg.NewSchema(0, + icebergSchema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 2, Name: "ts", Type: iceberg.PrimitiveTypes.TimestampTz, Required: true}, iceberg.NestedField{ID: 3, Name: "host", Type: iceberg.PrimitiveTypes.String, Required: true}, @@ -134,6 +134,9 @@ func BenchmarkPartitionedWriteThroughput_Simple(b *testing.B) { iceberg.NestedField{ID: 5, Name: "bytes_sent", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 6, Name: "user_agent", Type: iceberg.PrimitiveTypes.String, Required: true}, ) + if err != nil { + b.Fatalf("failed to create schema: %v", err) + } // Define Arrow schema (must match Iceberg schema types) arrSchema := arrow.NewSchema([]arrow.Field{ @@ -193,7 +196,7 @@ func BenchmarkPartitionedWriteThroughput_ListPrimitive(b *testing.B) { mem := memory.DefaultAllocator // Define Iceberg schema with list - icebergSchema := iceberg.NewSchema(0, + icebergSchema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 2, Name: "ts", Type: iceberg.PrimitiveTypes.TimestampTz, Required: true}, iceberg.NestedField{ID: 3, Name: "host", Type: iceberg.PrimitiveTypes.String, Required: true}, @@ -206,6 +209,9 @@ func BenchmarkPartitionedWriteThroughput_ListPrimitive(b *testing.B) { ElementRequired: true, }, Required: false}, ) + if err != nil { + b.Fatalf("failed to create schema: %v", err) + } // Define Arrow schema arrSchema := arrow.NewSchema([]arrow.Field{ @@ -297,7 +303,7 @@ func BenchmarkPartitionedWriteThroughput_ListStruct(b *testing.B) { } // Define Iceberg schema with complex nested types - icebergSchema := iceberg.NewSchema(0, + icebergSchema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 2, Name: "ts", Type: iceberg.PrimitiveTypes.TimestampTz, Required: true}, iceberg.NestedField{ID: 3, Name: "host", Type: iceberg.PrimitiveTypes.String, Required: true}, @@ -306,6 +312,9 @@ func BenchmarkPartitionedWriteThroughput_ListStruct(b *testing.B) { iceberg.NestedField{ID: 6, Name: "user_agent", Type: iceberg.PrimitiveTypes.String, Required: true}, iceberg.NestedField{ID: 7, Name: "resources", Type: resourcesListType, Required: false}, ) + if err != nil { + b.Fatalf("failed to create schema: %v", err) + } // Define Arrow schema arrSchema := arrow.NewSchema([]arrow.Field{ @@ -397,7 +406,7 @@ func BenchmarkPartitionedWriteThroughput_MapPrimitive(b *testing.B) { mem := memory.DefaultAllocator // Define Iceberg schema with list - icebergSchema := iceberg.NewSchema(0, + icebergSchema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 2, Name: "ts", Type: iceberg.PrimitiveTypes.TimestampTz, Required: true}, iceberg.NestedField{ID: 3, Name: "host", Type: iceberg.PrimitiveTypes.String, Required: true}, @@ -412,6 +421,9 @@ func BenchmarkPartitionedWriteThroughput_MapPrimitive(b *testing.B) { ValueRequired: false, }, Required: false}, ) + if err != nil { + b.Fatalf("failed to create schema: %v", err) + } // Define Arrow schema arrSchema := arrow.NewSchema([]arrow.Field{ @@ -490,12 +502,15 @@ func BenchmarkPartitionedWriteThroughput_PartitionCount(b *testing.B) { ctx := context.Background() // Define Iceberg schema - icebergSchema := iceberg.NewSchema(0, + icebergSchema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 2, Name: "ts", Type: iceberg.PrimitiveTypes.TimestampTz, Required: true}, iceberg.NestedField{ID: 3, Name: "partition_key", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 4, Name: "value", Type: iceberg.PrimitiveTypes.Int64, Required: true}, ) + if err != nil { + b.Fatalf("failed to create schema: %v", err) + } // Define Arrow schema arrSchema := arrow.NewSchema([]arrow.Field{ diff --git a/table/scanner.go b/table/scanner.go index 8dfc238e1..f93524c3c 100644 --- a/table/scanner.go +++ b/table/scanner.go @@ -253,7 +253,12 @@ func (scan *Scan) buildManifestEvaluator(specID int) (func(iceberg.ManifestFile) func (scan *Scan) buildPartitionEvaluator(specID int) func(iceberg.DataFile) (bool, error) { spec := scan.metadata.PartitionSpecs()[specID] partType := spec.PartitionType(scan.metadata.CurrentSchema()) - partSchema := iceberg.NewSchema(0, partType.FieldList...) + partSchema, err := iceberg.NewSchema(0, partType.FieldList...) + if err != nil { + return func(d iceberg.DataFile) (bool, error) { + return false, fmt.Errorf("failed to create partition schema: %w", err) + } + } partExpr := scan.partitionFilters.Get(specID) return func(d iceberg.DataFile) (bool, error) { diff --git a/table/snapshot_producers_test.go b/table/snapshot_producers_test.go index 1c6922f14..c5a160540 100644 --- a/table/snapshot_producers_test.go +++ b/table/snapshot_producers_test.go @@ -138,7 +138,7 @@ func newTestDataFile(t *testing.T, spec iceberg.PartitionSpec, path string, part } func simpleSchema() *iceberg.Schema { - return iceberg.NewSchema(0, iceberg.NestedField{ + return iceberg.MustNewSchema(0, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, }) } diff --git a/table/snapshots_internal_test.go b/table/snapshots_internal_test.go index 796642f93..77797b36b 100644 --- a/table/snapshots_internal_test.go +++ b/table/snapshots_internal_test.go @@ -18,6 +18,7 @@ package table import ( + "fmt" "testing" "github.com/apache/iceberg-go" @@ -25,12 +26,20 @@ import ( "github.com/stretchr/testify/require" ) -var tableSchemaSimple = iceberg.NewSchemaWithIdentifiers(1, - []int{2}, - iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, - iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true}, - iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool}, -) +var tableSchemaSimple *iceberg.Schema + +func init() { + var err error + tableSchemaSimple, err = iceberg.NewSchemaWithIdentifiers(1, + []int{2}, + iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, + iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true}, + iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool}, + ) + if err != nil { + panic(fmt.Sprintf("failed to create tableSchemaSimple: %v", err)) + } +} func TestSnapshotSummaryCollector(t *testing.T) { var ssc SnapshotSummaryCollector @@ -53,11 +62,12 @@ func TestSnapshotSummaryCollectorWithPartition(t *testing.T) { var ssc SnapshotSummaryCollector assert.Equal(t, iceberg.Properties{}, ssc.build()) - sc := iceberg.NewSchema(0, + sc, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "bool_field", Type: iceberg.PrimitiveTypes.Bool}, iceberg.NestedField{ID: 2, Name: "string_field", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 3, Name: "int_field", Type: iceberg.PrimitiveTypes.Int32}, ) + require.NoError(t, err) spec := iceberg.NewPartitionSpec(iceberg.PartitionField{ Name: "int_field", SourceID: 3, FieldID: 1001, Transform: iceberg.IdentityTransform{}, }) diff --git a/table/substrait/substrait_test.go b/table/substrait/substrait_test.go index 26ee87852..0a4497d7d 100644 --- a/table/substrait/substrait_test.go +++ b/table/substrait/substrait_test.go @@ -29,7 +29,7 @@ import ( ) func TestRefTypes(t *testing.T) { - sc := iceberg.NewSchema(1, + sc, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "a", Type: iceberg.PrimitiveTypes.Bool}, iceberg.NestedField{ID: 2, Name: "b", Type: iceberg.PrimitiveTypes.Int32}, iceberg.NestedField{ID: 3, Name: "c", Type: iceberg.PrimitiveTypes.Int64}, @@ -42,7 +42,9 @@ func TestRefTypes(t *testing.T) { iceberg.NestedField{ID: 10, Name: "j", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 11, Name: "k", Type: iceberg.PrimitiveTypes.Binary}, iceberg.NestedField{ID: 12, Name: "l", Type: iceberg.PrimitiveTypes.UUID}, - iceberg.NestedField{ID: 13, Name: "m", Type: iceberg.FixedTypeOf(5)}) + iceberg.NestedField{ID: 13, Name: "m", Type: iceberg.FixedTypeOf(5)}, + ) + require.NoError(t, err) tests := []struct { name string @@ -80,14 +82,14 @@ func TestRefTypes(t *testing.T) { } var ( - tableSchemaSimple = iceberg.NewSchemaWithIdentifiers(1, + tableSchemaSimple = iceberg.MustNewSchemaWithIdentifiers(1, []int{2}, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.Int32, Required: true}, iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Bool}, ) - doubleSchema = iceberg.NewSchema(1, + doubleSchema = iceberg.MustNewSchema(1, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.Float64}) ) diff --git a/table/table_test.go b/table/table_test.go index cf5ae1baf..55cc83adc 100644 --- a/table/table_test.go +++ b/table/table_test.go @@ -153,11 +153,13 @@ func (t *TableTestSuite) TestNewTableFromReadFileGzipped() { } func (t *TableTestSuite) TestSchema() { - t.True(t.tbl.Schema().Equals(iceberg.NewSchemaWithIdentifiers(1, []int{1, 2}, + expectedSchema, err := iceberg.NewSchemaWithIdentifiers(1, []int{1, 2}, iceberg.NestedField{ID: 1, Name: "x", Type: iceberg.PrimitiveTypes.Int64, Required: true}, iceberg.NestedField{ID: 2, Name: "y", Type: iceberg.PrimitiveTypes.Int64, Required: true, Doc: "comment"}, iceberg.NestedField{ID: 3, Name: "z", Type: iceberg.PrimitiveTypes.Int64, Required: true}, - ))) + ) + t.Require().NoError(err) + t.True(t.tbl.Schema().Equals(expectedSchema)) } func (t *TableTestSuite) TestPartitionSpec() { @@ -239,11 +241,13 @@ func (t *TableWritingTestSuite) SetupSuite() { t.ctx = context.Background() mem := memory.DefaultAllocator - t.tableSchema = iceberg.NewSchema(0, + var err error + t.tableSchema, err = iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.Bool}, iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 4, Name: "baz", Type: iceberg.PrimitiveTypes.Int32}, iceberg.NestedField{ID: 10, Name: "qux", Type: iceberg.PrimitiveTypes.Date}) + t.Require().NoError(err) t.arrSchema = arrow.NewSchema([]arrow.Field{ {Name: "foo", Type: arrow.FixedWidthTypes.Boolean, Nullable: true}, @@ -252,7 +256,6 @@ func (t *TableWritingTestSuite) SetupSuite() { {Name: "qux", Type: arrow.PrimitiveTypes.Date32, Nullable: true}, }, nil) - var err error t.arrTbl, err = array.TableFromJSON(mem, t.arrSchema, []string{ `[{"foo": true, "bar": "bar_string", "baz": 123, "qux": "2024-03-07"}]`, }) @@ -294,7 +297,8 @@ func (t *TableWritingTestSuite) SetupSuite() { }) t.Require().NoError(err) - t.tableSchemaPromotedTypes = iceberg.NewSchema(0, + var err2 error + t.tableSchemaPromotedTypes, err2 = iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "long", Type: iceberg.PrimitiveTypes.Int64}, iceberg.NestedField{ ID: 2, Name: "list", @@ -312,6 +316,7 @@ func (t *TableWritingTestSuite) SetupSuite() { Required: true, }, iceberg.NestedField{ID: 7, Name: "double", Type: iceberg.PrimitiveTypes.Float64}) + t.Require().NoError(err2) // arrow-go needs to implement cast_extension for [16]byte -> uuid // iceberg.NestedField{ID: 8, Name: "uuid", Type: iceberg.PrimitiveTypes.UUID}) @@ -620,8 +625,9 @@ func (t *TableWritingTestSuite) TestAddFilesToPartitionedTableFailsLowerAndUpper func (t *TableWritingTestSuite) TestAddFilesWithLargeAndRegular() { ident := table.Identifier{"default", "unpartitioned_with_large_types_v" + strconv.Itoa(t.formatVersion)} - ice := iceberg.NewSchema(0, + ice, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.String, Required: true}) + t.Require().NoError(err) arrowSchema := arrow.NewSchema([]arrow.Field{ {Name: "foo", Type: arrow.BinaryTypes.String}, @@ -1310,7 +1316,7 @@ func (t *TableWritingTestSuite) TestWriteSpecialCharacterColumn() { ident := table.Identifier{"default", "write_special_character_column"} colNameWithSpecialChar := "letter/abc" - s := iceberg.NewSchema(0, + s, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: colNameWithSpecialChar, Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 2, Name: "id", Type: iceberg.PrimitiveTypes.Int32}, iceberg.NestedField{ID: 3, Name: "name", Type: iceberg.PrimitiveTypes.String, Required: true}, @@ -1322,6 +1328,7 @@ func (t *TableWritingTestSuite) TestWriteSpecialCharacterColumn() { {ID: 8, Name: colNameWithSpecialChar, Type: iceberg.PrimitiveTypes.String, Required: true}, }, }}) + t.Require().NoError(err) arrowSchema := arrow.NewSchema([]arrow.Field{ {Name: colNameWithSpecialChar, Type: arrow.BinaryTypes.String, Nullable: true}, @@ -1405,7 +1412,7 @@ func (t *TableWritingTestSuite) createTableWithProps(identifier table.Identifier } func tableSchema() *iceberg.Schema { - return iceberg.NewSchema(0, + sch, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "bool", Type: iceberg.PrimitiveTypes.Bool}, iceberg.NestedField{ID: 2, Name: "string", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 3, Name: "string_long", Type: iceberg.PrimitiveTypes.String}, @@ -1424,6 +1431,11 @@ func tableSchema() *iceberg.Schema { iceberg.NestedField{ID: 16, Name: "med_dec", Type: iceberg.DecimalTypeOf(16, 2)}, iceberg.NestedField{ID: 17, Name: "large_dec", Type: iceberg.DecimalTypeOf(24, 2)}, ) + if err != nil { + panic(fmt.Sprintf("failed to create tableSchema: %v", err)) + } + + return sch } func arrowTableWithNull() arrow.Table { @@ -1865,7 +1877,7 @@ func TestWriteMapType(t *testing.T) { ctx := compute.WithAllocator(context.Background(), mem) cat.CreateNamespace(ctx, catalog.ToIdentifier("default"), nil) - iceSch := iceberg.NewSchema(1, + iceSch, err := iceberg.NewSchema(1, iceberg.NestedField{ ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.String, Required: true, }, @@ -1878,6 +1890,7 @@ func TestWriteMapType(t *testing.T) { ValueRequired: false, }, }) + require.NoError(t, err) ident := catalog.ToIdentifier("default", "repro_map") tbl, err := cat.CreateTable(ctx, ident, iceSch, catalog.WithLocation(loc)) diff --git a/table/time_travel_test.go b/table/time_travel_test.go index d20548d1e..26e4ae3bf 100644 --- a/table/time_travel_test.go +++ b/table/time_travel_test.go @@ -267,9 +267,12 @@ func TestSnapshotAsOfEdgeCases(t *testing.T) { // createTestMetadata creates metadata with custom snapshots and logs for testing func createTestMetadata(snapshots []Snapshot, snapshotLog []SnapshotLogEntry) (Metadata, error) { - schema := iceberg.NewSchema(1, + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int64, Required: true}, ) + if err != nil { + return nil, err + } // Create basic metadata meta, err := NewMetadata(schema, iceberg.UnpartitionedSpec, UnsortedSortOrder, diff --git a/table/transaction_test.go b/table/transaction_test.go index a0269cb3f..bb061967a 100644 --- a/table/transaction_test.go +++ b/table/transaction_test.go @@ -70,11 +70,12 @@ func (s *SparkIntegrationTestSuite) SetupTest() { } func (s *SparkIntegrationTestSuite) TestSetProperties() { - icebergSchema := iceberg.NewSchema(0, + icebergSchema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.Bool}, iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Int32}, ) + s.Require().NoError(err) tbl, err := s.cat.CreateTable(s.ctx, catalog.ToIdentifier("default", "go_test_set_properties"), icebergSchema) s.Require().NoError(err) @@ -172,7 +173,7 @@ func (s *SparkIntegrationTestSuite) TestAddFile() { } func (s *SparkIntegrationTestSuite) TestDifferentDataTypes() { - icebergSchema := iceberg.NewSchema(0, + icebergSchema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "bool", Type: iceberg.PrimitiveTypes.Bool}, iceberg.NestedField{ID: 2, Name: "string", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 3, Name: "string_long", Type: iceberg.PrimitiveTypes.String}, @@ -196,6 +197,7 @@ func (s *SparkIntegrationTestSuite) TestDifferentDataTypes() { }, }, ) + s.Require().NoError(err) arrowSchema, err := table.SchemaToArrowSchema(icebergSchema, nil, true, false) s.Require().NoError(err) @@ -320,11 +322,12 @@ func (s *SparkIntegrationTestSuite) TestDifferentDataTypes() { } func (s *SparkIntegrationTestSuite) TestUpdateSpec() { - icebergSchema := iceberg.NewSchema(0, + icebergSchema, err := iceberg.NewSchema(0, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.PrimitiveTypes.Bool}, iceberg.NestedField{ID: 2, Name: "bar", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 3, Name: "baz", Type: iceberg.PrimitiveTypes.Int32}, ) + s.Require().NoError(err) partitionSpec := iceberg.NewPartitionSpec( iceberg.PartitionField{SourceID: 2, FieldID: 1000, Transform: iceberg.TruncateTransform{Width: 5}, Name: "bar_truncate"}, diff --git a/table/update_schema.go b/table/update_schema.go index 0492f0855..15403d443 100644 --- a/table/update_schema.go +++ b/table/update_schema.go @@ -219,6 +219,38 @@ func (u *UpdateSchema) assignNewColumnID() int { return u.lastColumnID } +// normalizeTypeForSchemaCreation ensures nested types have temporary non-zero IDs +// to avoid schema validation conflicts. These IDs will be replaced by AssignFreshSchemaIDs. +// The field itself uses ID 1, so nested type IDs start from 2 to avoid conflicts. +func normalizeTypeForSchemaCreation(fieldType iceberg.Type) iceberg.Type { + switch t := fieldType.(type) { + case *iceberg.ListType: + if t.ElementID == 0 { + return &iceberg.ListType{ + ElementID: 2, // temporary ID (field uses 1), will be reassigned + Element: t.Element, + ElementRequired: t.ElementRequired, + } + } + + return t + case *iceberg.MapType: + if t.KeyID == 0 || t.ValueID == 0 { + return &iceberg.MapType{ + KeyID: 2, // temporary ID (field uses 1), will be reassigned + KeyType: t.KeyType, + ValueID: 3, // temporary ID, will be reassigned + ValueType: t.ValueType, + ValueRequired: t.ValueRequired, + } + } + + return t + default: + return t + } +} + func (u *UpdateSchema) findField(name string) (iceberg.NestedField, bool) { if u.caseSensitive { return u.schema.FindFieldByName(name) @@ -317,9 +349,14 @@ func (u *UpdateSchema) addColumn(path []string, fieldType iceberg.Type, doc stri } } + // Normalize nested types to have temporary non-zero IDs to avoid schema validation conflicts. + // These IDs will be replaced by AssignFreshSchemaIDs. + normalizedType := normalizeTypeForSchemaCreation(fieldType) + field := iceberg.NestedField{ + ID: 1, // temporary ID to avoid validation conflict with nested type IDs Name: name, - Type: fieldType, + Type: normalizedType, Required: required, Doc: doc, } @@ -328,7 +365,11 @@ func (u *UpdateSchema) addColumn(path []string, fieldType iceberg.Type, doc stri field.WriteDefault = defaultValue.Any() } - sch, err := iceberg.AssignFreshSchemaIDs(iceberg.NewSchema(0, field), u.assignNewColumnID) + tempSchema, err := iceberg.NewSchema(0, field) + if err != nil { + return err + } + sch, err := iceberg.AssignFreshSchemaIDs(tempSchema, u.assignNewColumnID) if err != nil { return fmt.Errorf("failed to assign field id: %w", err) } @@ -686,7 +727,10 @@ func (u *UpdateSchema) Apply() (*iceberg.Schema, error) { } identifierFieldIDs := make([]int, 0) - newSchema := iceberg.NewSchema(0, st.(*iceberg.StructType).FieldList...) + newSchema, err := iceberg.NewSchema(0, st.(*iceberg.StructType).FieldList...) + if err != nil { + return nil, fmt.Errorf("error creating schema: %w", err) + } for name := range u.identifierFieldNames { var field iceberg.NestedField var ok bool @@ -708,7 +752,7 @@ func (u *UpdateSchema) Apply() (*iceberg.Schema, error) { }).ID } - return iceberg.NewSchemaWithIdentifiers(nextSchemaID, identifierFieldIDs, st.(*iceberg.StructType).FieldList...), nil + return iceberg.NewSchemaWithIdentifiers(nextSchemaID, identifierFieldIDs, st.(*iceberg.StructType).FieldList...) } func (u *UpdateSchema) Commit() error { diff --git a/table/update_schema_test.go b/table/update_schema_test.go index 8f35363d6..a019c6f28 100644 --- a/table/update_schema_test.go +++ b/table/update_schema_test.go @@ -18,37 +18,50 @@ package table import ( + "fmt" "testing" "github.com/apache/iceberg-go" "github.com/stretchr/testify/assert" ) -var originalSchema = iceberg.NewSchema(1, - iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, Doc: ""}, - iceberg.NestedField{ID: 2, Name: "name", Type: iceberg.PrimitiveTypes.String, Required: false, Doc: ""}, - iceberg.NestedField{ID: 3, Name: "age", Type: iceberg.PrimitiveTypes.Int32, Required: false, Doc: ""}, - iceberg.NestedField{ID: 4, Name: "address", Type: &iceberg.StructType{ - FieldList: []iceberg.NestedField{ - {ID: 5, Name: "city", Type: iceberg.PrimitiveTypes.String, Required: false, Doc: ""}, - {ID: 6, Name: "zip", Type: iceberg.PrimitiveTypes.String, Required: false, Doc: ""}, - }, - }, Required: false, Doc: ""}, - iceberg.NestedField{ID: 7, Name: "tags", Type: &iceberg.ListType{ - ElementID: 8, - Element: iceberg.PrimitiveTypes.String, - ElementRequired: false, - }, Required: false, Doc: ""}, - iceberg.NestedField{ID: 9, Name: "properties", Type: &iceberg.MapType{ - KeyID: 10, - KeyType: iceberg.PrimitiveTypes.String, - ValueID: 11, - ValueType: iceberg.PrimitiveTypes.String, - ValueRequired: false, - }, Required: false, Doc: ""}, -) +var originalSchema *iceberg.Schema + +func init() { + originalSchema = iceberg.MustNewSchema(1, + iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, Doc: ""}, + iceberg.NestedField{ID: 2, Name: "name", Type: iceberg.PrimitiveTypes.String, Required: false, Doc: ""}, + iceberg.NestedField{ID: 3, Name: "age", Type: iceberg.PrimitiveTypes.Int32, Required: false, Doc: ""}, + iceberg.NestedField{ID: 4, Name: "address", Type: &iceberg.StructType{ + FieldList: []iceberg.NestedField{ + {ID: 5, Name: "city", Type: iceberg.PrimitiveTypes.String, Required: false, Doc: ""}, + {ID: 6, Name: "zip", Type: iceberg.PrimitiveTypes.String, Required: false, Doc: ""}, + }, + }, Required: false, Doc: ""}, + iceberg.NestedField{ID: 7, Name: "tags", Type: &iceberg.ListType{ + ElementID: 8, + Element: iceberg.PrimitiveTypes.String, + ElementRequired: false, + }, Required: false, Doc: ""}, + iceberg.NestedField{ID: 9, Name: "properties", Type: &iceberg.MapType{ + KeyID: 10, + KeyType: iceberg.PrimitiveTypes.String, + ValueID: 11, + ValueType: iceberg.PrimitiveTypes.String, + ValueRequired: false, + }, Required: false, Doc: ""}, + ) +} + +var testMetadata Metadata -var testMetadata, _ = NewMetadata(originalSchema, nil, UnsortedSortOrder, "", nil) +func init() { + var err error + testMetadata, err = NewMetadata(originalSchema, nil, UnsortedSortOrder, "", nil) + if err != nil { + panic(fmt.Sprintf("failed to create testMetadata: %v", err)) + } +} func TestAddColumn(t *testing.T) { t.Run("test update schema with add primitive type on top level", func(t *testing.T) { @@ -400,7 +413,7 @@ func TestApplyChanges(t *testing.T) { }) t.Run("test apply changes on add field that delete in same time", func(t *testing.T) { - originalSchema := iceberg.NewSchema(1, + originalSchema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, Doc: ""}, iceberg.NestedField{ID: 2, Name: "name", Type: iceberg.PrimitiveTypes.String, Required: false, Doc: ""}, iceberg.NestedField{ID: 3, Name: "age", Type: iceberg.PrimitiveTypes.Int32, Required: false, Doc: ""}, @@ -812,7 +825,7 @@ func TestSetIdentifierField(t *testing.T) { t.Run("test set identifier field replaces existing identifier fields", func(t *testing.T) { // Create a schema with existing identifier fields - schemaWithIdentifiers := iceberg.NewSchemaWithIdentifiers(1, []int{1}, // id is initially an identifier + schemaWithIdentifiers, err := iceberg.NewSchemaWithIdentifiers(1, []int{1}, // id is initially an identifier iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.PrimitiveTypes.Int32, Required: true, Doc: ""}, iceberg.NestedField{ID: 2, Name: "name", Type: iceberg.PrimitiveTypes.String, Required: false, Doc: ""}, iceberg.NestedField{ID: 3, Name: "age", Type: iceberg.PrimitiveTypes.Int32, Required: false, Doc: ""}, diff --git a/table/update_spec_test.go b/table/update_spec_test.go index ea1131119..bf2d143fd 100644 --- a/table/update_spec_test.go +++ b/table/update_spec_test.go @@ -25,7 +25,7 @@ import ( "github.com/stretchr/testify/assert" ) -var testSchema = iceberg.NewSchema(1, +var testSchema = iceberg.MustNewSchema(1, iceberg.NestedField{ID: 1, Name: "id", Required: true, Type: iceberg.PrimitiveTypes.Int64}, iceberg.NestedField{ID: 2, Name: "name", Required: true, Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 3, Name: "ts", Required: false, Type: iceberg.PrimitiveTypes.Timestamp}, diff --git a/table/updates_test.go b/table/updates_test.go index 831abb4b6..92ce51b11 100644 --- a/table/updates_test.go +++ b/table/updates_test.go @@ -168,15 +168,20 @@ func TestUnmarshalUpdates(t *testing.T) { "schema-id": 1 } ]`), - expected: Updates{ - NewAddSchemaUpdate(iceberg.NewSchema(1, + expected: func() Updates { + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "foo", Type: iceberg.StringType{}, Required: true}, - )), - NewAddPartitionSpecUpdate( - &spec, false), - NewAddSortOrderUpdate(&sortOrder), - NewSetCurrentSchemaUpdate(1), - }, + ) + require.NoError(t, err) + + return Updates{ + NewAddSchemaUpdate(schema), + NewAddPartitionSpecUpdate( + &spec, false), + NewAddSortOrderUpdate(&sortOrder), + NewSetCurrentSchemaUpdate(1), + } + }(), expectedErr: false, }, { diff --git a/transforms_test.go b/transforms_test.go index a6ee85d96..0e8a40266 100644 --- a/transforms_test.go +++ b/transforms_test.go @@ -196,7 +196,7 @@ func TestManifestPartitionVals(t *testing.T) { require.True(t, result.Valid) assert.Equal(t, tt.expectResult, result.Val) - schema := iceberg.NewSchema(0, iceberg.NestedField{ + schema, err := iceberg.NewSchema(0, iceberg.NestedField{ Name: "abc", ID: 1, Type: tt.input.Type(), diff --git a/types_test.go b/types_test.go index 8649c259c..f8daca012 100644 --- a/types_test.go +++ b/types_test.go @@ -261,7 +261,7 @@ func TestUnknownTypeInNestedStructs(t *testing.T) { ValueRequired: false, } - schema := iceberg.NewSchema(1, + schema, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "id", Type: iceberg.Int64Type{}, Required: true}, iceberg.NestedField{ID: 2, Name: "top", Type: iceberg.UnknownType{}, Required: false}, iceberg.NestedField{ID: 3, Name: "arr", Type: listType, Required: false}, @@ -281,6 +281,7 @@ func TestUnknownTypeInNestedStructs(t *testing.T) { }, Required: true}, iceberg.NestedField{ID: 14, Name: "standalone_map", Type: standaloneMapType, Required: false}, ) + require.NoError(t, err) out, err := json.Marshal(schema) require.NoError(t, err) diff --git a/view/metadata_builder.go b/view/metadata_builder.go index 85b21b4df..c29ef859d 100644 --- a/view/metadata_builder.go +++ b/view/metadata_builder.go @@ -227,7 +227,11 @@ func (b *MetadataBuilder) addSchema(schema *iceberg.Schema) (int, error) { newSchema := schema // Build a fresh schema if we reset the ID if schema.ID != newSchemaID { - newSchema = iceberg.NewSchemaWithIdentifiers(newSchemaID, schema.IdentifierFieldIDs, schema.Fields()...) + var err error + newSchema, err = iceberg.NewSchemaWithIdentifiers(newSchemaID, schema.IdentifierFieldIDs, schema.Fields()...) + if err != nil { + return 0, fmt.Errorf("error creating schema with new ID: %w", err) + } } b.schemaList = append(b.schemaList, newSchema) diff --git a/view/metadata_builder_test.go b/view/metadata_builder_test.go index 418fb05d0..6ee9c6cfd 100644 --- a/view/metadata_builder_test.go +++ b/view/metadata_builder_test.go @@ -66,7 +66,12 @@ func newTestSchema(schemaID int, optFieldName ...string) *iceberg.Schema { fieldName = optFieldName[0] } - return iceberg.NewSchema(schemaID, iceberg.NestedField{ID: 1, Name: fieldName, Type: iceberg.PrimitiveTypes.Int64}) + schema, err := iceberg.NewSchema(schemaID, iceberg.NestedField{ID: 1, Name: fieldName, Type: iceberg.PrimitiveTypes.Int64}) + if err != nil { + return nil + } + + return schema } func stripErr[T any](v T, _ error) T { diff --git a/visitors_test.go b/visitors_test.go index ed18b48cd..7bf3859cd 100644 --- a/visitors_test.go +++ b/visitors_test.go @@ -18,6 +18,7 @@ package iceberg_test import ( + "fmt" "math" "strings" "testing" @@ -287,53 +288,62 @@ func rowOf(vals ...any) rowTester { return rowTester(vals) } -var testSchema = iceberg.NewSchema(1, - iceberg.NestedField{ - ID: 13, Name: "x", - Type: iceberg.PrimitiveTypes.Int32, Required: true, - }, - iceberg.NestedField{ - ID: 14, Name: "y", - Type: iceberg.PrimitiveTypes.Float64, Required: true, - }, - iceberg.NestedField{ - ID: 15, Name: "z", - Type: iceberg.PrimitiveTypes.Int32, - }, - iceberg.NestedField{ - ID: 16, Name: "s1", - Type: &iceberg.StructType{ +var testSchema *iceberg.Schema + +func init() { + var err error + testSchema, err = iceberg.NewSchema(1, + iceberg.NestedField{ + ID: 13, Name: "x", + Type: iceberg.PrimitiveTypes.Int32, Required: true, + }, + iceberg.NestedField{ + ID: 14, Name: "y", + Type: iceberg.PrimitiveTypes.Float64, Required: true, + }, + iceberg.NestedField{ + ID: 15, Name: "z", + Type: iceberg.PrimitiveTypes.Int32, + }, + iceberg.NestedField{ + ID: 16, Name: "s1", + Type: &iceberg.StructType{ + FieldList: []iceberg.NestedField{{ + ID: 17, Name: "s2", Required: true, + Type: &iceberg.StructType{ + FieldList: []iceberg.NestedField{{ + ID: 18, Name: "s3", Required: true, + Type: &iceberg.StructType{ + FieldList: []iceberg.NestedField{{ + ID: 19, Name: "s4", Required: true, + Type: &iceberg.StructType{ + FieldList: []iceberg.NestedField{{ + ID: 20, Name: "i", Required: true, + Type: iceberg.PrimitiveTypes.Int32, + }}, + }, + }}, + }, + }}, + }, + }}, + }, + }, + iceberg.NestedField{ID: 21, Name: "s5", Type: &iceberg.StructType{ FieldList: []iceberg.NestedField{{ - ID: 17, Name: "s2", Required: true, - Type: &iceberg.StructType{ + ID: 22, Name: "s6", Required: true, Type: &iceberg.StructType{ FieldList: []iceberg.NestedField{{ - ID: 18, Name: "s3", Required: true, - Type: &iceberg.StructType{ - FieldList: []iceberg.NestedField{{ - ID: 19, Name: "s4", Required: true, - Type: &iceberg.StructType{ - FieldList: []iceberg.NestedField{{ - ID: 20, Name: "i", Required: true, - Type: iceberg.PrimitiveTypes.Int32, - }}, - }, - }}, - }, + ID: 23, Name: "f", Required: true, Type: iceberg.PrimitiveTypes.Float32, }}, }, }}, - }, - }, - iceberg.NestedField{ID: 21, Name: "s5", Type: &iceberg.StructType{ - FieldList: []iceberg.NestedField{{ - ID: 22, Name: "s6", Required: true, Type: &iceberg.StructType{ - FieldList: []iceberg.NestedField{{ - ID: 23, Name: "f", Required: true, Type: iceberg.PrimitiveTypes.Float32, - }}, - }, }}, - }}, - iceberg.NestedField{ID: 24, Name: "s", Type: iceberg.PrimitiveTypes.String}) + iceberg.NestedField{ID: 24, Name: "s", Type: iceberg.PrimitiveTypes.String}, + ) + if err != nil { + panic(fmt.Sprintf("failed to create testSchema: %v", err)) + } +} func TestExprEvaluator(t *testing.T) { type testCase struct { @@ -543,7 +553,7 @@ func TestExprEvaluator(t *testing.T) { } func TestEvaluatorCmpTypes(t *testing.T) { - sc := iceberg.NewSchema(1, + sc, err := iceberg.NewSchema(1, iceberg.NestedField{ID: 1, Name: "a", Type: iceberg.PrimitiveTypes.Bool}, iceberg.NestedField{ID: 2, Name: "b", Type: iceberg.PrimitiveTypes.Int32}, iceberg.NestedField{ID: 3, Name: "c", Type: iceberg.PrimitiveTypes.Int64}, @@ -556,7 +566,9 @@ func TestEvaluatorCmpTypes(t *testing.T) { iceberg.NestedField{ID: 10, Name: "j", Type: iceberg.PrimitiveTypes.String}, iceberg.NestedField{ID: 11, Name: "k", Type: iceberg.PrimitiveTypes.Binary}, iceberg.NestedField{ID: 12, Name: "l", Type: iceberg.PrimitiveTypes.UUID}, - iceberg.NestedField{ID: 13, Name: "m", Type: iceberg.FixedTypeOf(5)}) + iceberg.NestedField{ID: 13, Name: "m", Type: iceberg.FixedTypeOf(5)}, + ) + require.NoError(t, err) rowData := rowOf(true, 5, 5, float32(5.0), float64(5.0),