From 7de83f3ba8bff2d27fa76622ce50f7c06afb1af4 Mon Sep 17 00:00:00 2001 From: fcote Date: Thu, 30 Oct 2025 15:52:36 +0100 Subject: [PATCH] Add SkipAdditionalFields codec option Introduces a new CodecOption.SkipAdditionalFields that allows decoding textual Avro data with additional fields not defined in the schema. When enabled, unknown fields are skipped instead of causing errors. This is useful for schema evolution scenarios where newer data formats may include fields that older schemas don't define. The option defaults to false to maintain backward compatibility. Changes: - Add SkipAdditionalFields field to CodecOption - Update genericMapTextDecoder to skip unknown fields when enabled - Add advanceToNextValue helper to skip JSON values - Thread option through record and union decoders --- codec.go | 5 +++++ map.go | 28 +++++++++++++++++------- map_test.go | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++ record.go | 2 +- text.go | 31 +++++++++++++++++++++++++++ union.go | 4 +++- 6 files changed, 122 insertions(+), 10 deletions(-) diff --git a/codec.go b/codec.go index 192bcf61..a1415485 100644 --- a/codec.go +++ b/codec.go @@ -49,6 +49,11 @@ type CodecOption struct { // When true, the string literal "null" in textual Avro data will be coerced to Go's nil. // Primarily used to handle edge cases where some Avro implementations allow string representations of null. EnableStringNull bool + + // SkipAdditionalFields controls handling of additional fields during decoding. + // When true, any additional input fields not defined in the schema will be skipped during decoding. + // When false (default), any additional input fields not defined in the schema will result in an error. + SkipAdditionalFields bool } // Codec supports decoding binary and text Avro data to Go native data types, diff --git a/map.go b/map.go index 8bb885d4..6f90f7f2 100644 --- a/map.go +++ b/map.go @@ -141,7 +141,7 @@ func makeMapCodec(st map[string]*Codec, namespace string, schemaMap map[string]i return longBinaryFromNative(buf, 0) // append tailing 0 block count to signal end of Map }, nativeFromTextual: func(buf []byte) (interface{}, []byte, error) { - return genericMapTextDecoder(buf, valueCodec, nil) // codecFromKey == nil + return genericMapTextDecoder(buf, valueCodec, nil, cb.option.SkipAdditionalFields) // codecFromKey == nil }, textualFromNative: func(buf []byte, datum interface{}) ([]byte, error) { return genericMapTextEncoder(buf, datum, valueCodec, nil) @@ -155,7 +155,12 @@ func makeMapCodec(st map[string]*Codec, namespace string, schemaMap map[string]i // error if it encounters a map key that is not present in codecFromKey. If // codecFromKey is nil, every map value will be decoded using defaultCodec, if // possible. -func genericMapTextDecoder(buf []byte, defaultCodec *Codec, codecFromKey map[string]*Codec) (map[string]interface{}, []byte, error) { +func genericMapTextDecoder( + buf []byte, + defaultCodec *Codec, + codecFromKey map[string]*Codec, + skipAdditionalFields bool, +) (map[string]interface{}, []byte, error) { var value interface{} var err error var b byte @@ -191,7 +196,7 @@ func genericMapTextDecoder(buf []byte, defaultCodec *Codec, codecFromKey map[str if fieldCodec == nil { fieldCodec = defaultCodec } - if fieldCodec == nil { + if fieldCodec == nil && !skipAdditionalFields { return nil, nil, fmt.Errorf("cannot decode textual map: cannot determine codec: %q", key) } // decode colon @@ -202,12 +207,19 @@ func genericMapTextDecoder(buf []byte, defaultCodec *Codec, codecFromKey map[str if buf, _ = advanceToNonWhitespace(buf); len(buf) == 0 { return nil, nil, io.ErrShortBuffer } - value, buf, err = fieldCodec.nativeFromTextual(buf) - if err != nil { - return nil, nil, fmt.Errorf("%s for key: %q", err, key) + if fieldCodec != nil { + value, buf, err = fieldCodec.nativeFromTextual(buf) + if err != nil { + return nil, nil, fmt.Errorf("%s for key: %q", err, key) + } + // set map value for key + mapValues[key] = value + } else { + // skipAdditionalFields is true and we have no codec + if buf, _ = advanceToNextValue(buf); len(buf) == 0 { + return nil, nil, io.ErrShortBuffer + } } - // set map value for key - mapValues[key] = value // either comma or closing curly brace if buf, _ = advanceToNonWhitespace(buf); len(buf) == 0 { return nil, nil, io.ErrShortBuffer diff --git a/map_test.go b/map_test.go index d1b02d29..84ee3092 100644 --- a/map_test.go +++ b/map_test.go @@ -12,6 +12,7 @@ package goavro import ( "fmt" "log" + "reflect" "testing" ) @@ -149,6 +150,67 @@ func TestMapTextualReceiveSliceInt(t *testing.T) { testTextEncodeFail(t, `{"type":"map","values":"int"}`, map[int]int{42: 13}, "cannot create map[string]interface{}") } +// TestMapSkipAdditionalFields tests that when a map is used within a record, +// additional fields at the record level can be skipped when SkipAdditionalFields is enabled. +func TestMapSkipAdditionalFields(t *testing.T) { + // Note: For a plain map type, all string keys are valid, so we test + // the skip functionality in the context of a record containing a map + schema := `{ + "type": "record", + "name": "RecordWithMap", + "fields": [ + { + "name": "validMap", + "type": {"type": "map", "values": "int"} + } + ] + }` + + testSkipAdditionalFieldsFail(t, schema, false, []byte(`{"validMap": {}, "additionalField": 1}`)) + testSkipAdditionalFieldsPass(t, schema, true, []byte(`{"validMap": {"k1": 1}, "additionalField": 1}`), map[string]interface{}{"validMap": map[string]interface{}{"k1": int32(1)}}) + testSkipAdditionalFieldsPass(t, schema, true, []byte(`{"validMap": {"k1": 1}, "additionalField": 1, "additionalField2": 2}`), map[string]interface{}{"validMap": map[string]interface{}{"k1": int32(1)}}) + testSkipAdditionalFieldsPass(t, schema, true, []byte(`{"validMap": {"k1": 1}, "additionalField": true}`), map[string]interface{}{"validMap": map[string]interface{}{"k1": int32(1)}}) + testSkipAdditionalFieldsPass(t, schema, true, []byte(`{"validMap": {"k1": 1}, "additionalField": "1"}`), map[string]interface{}{"validMap": map[string]interface{}{"k1": int32(1)}}) + testSkipAdditionalFieldsPass(t, schema, true, []byte(`{"validMap": {"k1": 1}, "additionalField": {"nested": 1"}}`), map[string]interface{}{"validMap": map[string]interface{}{"k1": int32(1)}}) + testSkipAdditionalFieldsPass(t, schema, true, []byte(`{"validMap": {"k1": 1}, "additionalField": [1, 2, 3]}`), map[string]interface{}{"validMap": map[string]interface{}{"k1": int32(1)}}) +} + +func testSkipAdditionalFieldsFail(t *testing.T, schema string, skipAdditionalFields bool, input []byte) { + t.Helper() + option := &CodecOption{ + SkipAdditionalFields: skipAdditionalFields, + } + codec, err := NewCodecWithOptions(schema, option) + if err != nil { + t.Fatal(err) + } + _, _, err = codec.NativeFromTextual(input) + if err == nil { + t.Error("Expected error when decoding record with additional field, but got nil") + } +} + +func testSkipAdditionalFieldsPass(t *testing.T, schema string, skipAdditionalFields bool, input []byte, expected interface{}) { + t.Helper() + option := &CodecOption{ + SkipAdditionalFields: skipAdditionalFields, + } + codec, err := NewCodecWithOptions(schema, option) + if err != nil { + t.Fatal(err) + } + datum, remaining, err := codec.NativeFromTextual(input) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + if len(remaining) != 0 { + t.Errorf("Expected no remaining bytes, got %d", len(remaining)) + } + if !reflect.DeepEqual(datum, expected) { + t.Errorf("Expected %v, got %v", expected, datum) + } +} + func ExampleMap() { codec, err := NewCodec(`{ "name": "r1", diff --git a/record.go b/record.go index 305dad68..763d58a5 100644 --- a/record.go +++ b/record.go @@ -200,7 +200,7 @@ func makeRecordCodec(st map[string]*Codec, enclosingNamespace string, schemaMap // NOTE: Setting `defaultCodec == nil` instructs genericMapTextDecoder // to return an error when a field name is not found in the // codecFromFieldName map. - mapValues, buf, err = genericMapTextDecoder(buf, nil, codecFromFieldName) + mapValues, buf, err = genericMapTextDecoder(buf, nil, codecFromFieldName, cb.option.SkipAdditionalFields) if err != nil { return nil, nil, fmt.Errorf("cannot decode textual record %q: %s", c.typeName, err) } diff --git a/text.go b/text.go index 10e53082..00225ed2 100644 --- a/text.go +++ b/text.go @@ -39,3 +39,34 @@ func advanceToNonWhitespace(buf []byte) ([]byte, error) { } return nil, io.ErrShortBuffer } + +var ( + valueBoundaries = map[byte]byte{ + '"': '"', + '{': '}', + '[': ']', + } +) + +func advanceToNextValue(buf []byte) ([]byte, error) { + for openingRune, closingRune := range valueBoundaries { + if buf[0] == openingRune { + for i := 1; i < len(buf); i++ { + if buf[i] == closingRune { + return buf[i+1:], nil + } + } + return nil, io.ErrShortBuffer + } + } + + // If we get here, then we are not in a string, map, or array. most likely a number or boolean + // We can just scan ahead to the next comma, closing brace, or closing bracket + for i := 1; i < len(buf); i++ { + if buf[i] == ',' || buf[i] == '}' || buf[i] == ']' { + return buf[i:], nil + } + } + + return nil, fmt.Errorf("expected: token; actual: %q", buf[0]) +} diff --git a/union.go b/union.go index ac5a18f4..95582717 100644 --- a/union.go +++ b/union.go @@ -24,6 +24,7 @@ type codecInfo struct { codecFromIndex []*Codec codecFromName map[string]*Codec indexFromName map[string]int + option *CodecOption } // Union wraps a datum value in a map for encoding as a Union, as required by @@ -83,6 +84,7 @@ func makeCodecInfo(st map[string]*Codec, enclosingNamespace string, schemaArray codecFromIndex: codecFromIndex, codecFromName: codecFromName, indexFromName: indexFromName, + option: cb.option, }, nil } @@ -151,7 +153,7 @@ func unionNativeFromTextual(cr *codecInfo) func(buf []byte) (interface{}, []byte var datum interface{} var err error - datum, buf, err = genericMapTextDecoder(buf, nil, cr.codecFromName) + datum, buf, err = genericMapTextDecoder(buf, nil, cr.codecFromName, cr.option.SkipAdditionalFields) if err != nil { return nil, nil, fmt.Errorf("cannot decode textual union: %s", err) }