From 5481310e542a200426aa6f729c83ca6d49ae8473 Mon Sep 17 00:00:00 2001 From: Irina Khismatullina Date: Wed, 16 Oct 2019 18:34:32 +0300 Subject: [PATCH 1/5] Add function for loading the model Signed-off-by: Irina Khismatullina --- bpe.go | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++++ bpe_test.go | 44 +++++++++++++++++++++ go.mod | 5 +++ go.sum | 21 ++++++++++ main.go | 2 +- 5 files changed, 181 insertions(+), 1 deletion(-) create mode 100644 bpe.go create mode 100644 bpe_test.go diff --git a/bpe.go b/bpe.go new file mode 100644 index 0000000..81419a6 --- /dev/null +++ b/bpe.go @@ -0,0 +1,110 @@ +package bpe + +import ( + "bufio" + "fmt" + "io" + + "github.com/sirupsen/logrus" +) + +type TokenId int32 + +type DecodedToken []TokenId + +type Rule struct { + left TokenId + right TokenId + result TokenId +} + +type SpecialTokens struct { + unk TokenId + pad TokenId + bos TokenId + eos TokenId +} + +type Model struct { + char2id map[rune]TokenId + id2char map[TokenId]rune + rules []Rule + recipe map[TokenId]DecodedToken + revRecipe map[string]TokenId + specialTokens SpecialTokens +} + +func NewModel(nRules int) *Model { + return &Model{ + make(map[rune]TokenId), + make(map[TokenId]rune), + make([]Rule, nRules), + make(map[TokenId]DecodedToken), + make(map[string]TokenId), + SpecialTokens{-1, -1, -1, -1}, + } +} + +func DecodedTokenToString(token DecodedToken, id2char map[TokenId]rune) (string, error) { + word := "" + for _, id := range token { + if char, ok := id2char[id]; ok { + word = word + string(char) + } else { + logrus.Fatalf("%d key not found in id2char", id) + } + } + return word, nil +} + +func ReadModel(reader io.Reader) (*Model, error) { + scanner := bufio.NewScanner(reader) + var nChars, nRules int + scanner.Scan() + _, err := fmt.Sscanf(scanner.Text(), "%d %d", &nChars, &nRules) + if err != nil { + logrus.Fatal("Wrong input format: ", err) + return &Model{}, err + } + model := NewModel(nRules) + model.rules = make([]Rule, nRules) + for i := 0; i < nChars; i++ { + var char rune + var charId TokenId + scanner.Scan() + _, err = fmt.Sscanf(scanner.Text(), "%d %d", &char, &charId) + if err != nil { + logrus.Fatal("Wrong input format: ", err) + return model, err + } + model.char2id[char] = charId + model.id2char[charId] = char + model.recipe[charId] = DecodedToken{charId} + model.revRecipe[string(char)] = charId + } + for i := 0; i < nRules; i++ { + var rule Rule + scanner.Scan() + _, err = fmt.Sscanf(scanner.Text(), "%d %d %d", &rule.left, &rule.right, &rule.result) + if err != nil { + logrus.Fatal("Wrong input format: ", err) + return model, err + } + model.rules[i] = rule + model.recipe[rule.result] = append(model.recipe[rule.left], model.recipe[rule.right]...) + resultString, err := DecodedTokenToString(model.recipe[rule.result], model.id2char) + if err != nil { + logrus.Fatal("Unexpected token id inside the rules: ", err) + return model, err + } + model.revRecipe[resultString] = rule.result + } + scanner.Scan() + _, err = fmt.Sscanf(scanner.Text(), "%d %d %d %d", &model.specialTokens.unk, + &model.specialTokens.pad, &model.specialTokens.bos, &model.specialTokens.eos) + if err != nil { + logrus.Fatal("Wrong input format: ", err) + return model, err + } + return model, nil +} diff --git a/bpe_test.go b/bpe_test.go new file mode 100644 index 0000000..0facad9 --- /dev/null +++ b/bpe_test.go @@ -0,0 +1,44 @@ +package bpe + +import ( + "github.com/stretchr/testify/require" + "strings" + "testing" +) + +func TestNewModel(t *testing.T) { + model := NewModel(10) + require.Equal(t, 10, len(model.rules)) +} + +func TestDecodedTokenToString(t *testing.T) { + id2char := map[TokenId]rune{1: []rune("a")[0], 2: []rune("b")[0], 3: []rune("c")[0]} + word, err := DecodedTokenToString(DecodedToken{1, 2, 1, 3, 3}, id2char) + require.NoError(t, err) + require.Equal(t, "abacc", word) +} + +func TestReadModel(t *testing.T) { + reader := strings.NewReader(`5 4 +99 6 +98 7 +95 4 +100 5 +97 8 +4 8 9 +4 6 10 +4 5 11 +4 7 12 +1 0 2 4`) + expected := Model{ + map[rune]TokenId{97: 8, 98: 7, 99: 6, 100: 5, 95: 4}, + map[TokenId]rune{4: 95, 5: 100, 6: 99, 7: 98, 8: 97}, + []Rule{{4, 8, 9}, {4, 6, 10}, {4, 5, 11}, {4, 7, 12}}, + map[TokenId]DecodedToken{4: {4}, 5: {5}, 6: {6}, 7: {7}, 8: {8}, 9: {4, 8}, 10: {4, 6}, 11: {4, 5}, 12: {4, 7}}, + map[string]TokenId{"a": 8, "b": 7, "c": 6, "d": 5, "_": 4, + "_a": 9, "_b": 12, "_c": 10, "_d": 11}, + SpecialTokens{1, 0, 2, 4}, + } + model, _ := ReadModel(reader) + require.Equal(t, expected, *model) +} diff --git a/go.mod b/go.mod index 7d8e300..510e312 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,8 @@ module github.com/src-d/go-YouTokenToMe go 1.12 + +require ( + github.com/sirupsen/logrus v1.4.2 + github.com/stretchr/testify v1.4.0 +) diff --git a/go.sum b/go.sum index e69de29..3f2d26f 100644 --- a/go.sum +++ b/go.sum @@ -0,0 +1,21 @@ +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk= +github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4= +github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894 h1:Cz4ceDQGXuKRnVBDTS23GTn/pU5OE2C0WrNTOYK1Uuc= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/main.go b/main.go index 953cd3d..a779181 100644 --- a/main.go +++ b/main.go @@ -1,4 +1,4 @@ -package main +package bpe import "fmt" From 62d068077caa0cb4602a163b62b267651f1e5842 Mon Sep 17 00:00:00 2001 From: Irina Khismatullina Date: Thu, 17 Oct 2019 10:48:21 +0300 Subject: [PATCH 2/5] Fix style Signed-off-by: Irina Khismatullina --- bpe.go | 78 ++++++++++++++++++++++++++++------------------------- bpe_test.go | 23 ++++++++-------- go.sum | 21 --------------- main.go | 7 ----- 4 files changed, 54 insertions(+), 75 deletions(-) delete mode 100644 go.sum delete mode 100644 main.go diff --git a/bpe.go b/bpe.go index 81419a6..8552576 100644 --- a/bpe.go +++ b/bpe.go @@ -8,44 +8,50 @@ import ( "github.com/sirupsen/logrus" ) -type TokenId int32 +// TokenID is a numerical identitier of the subword token +type TokenID uint32 -type DecodedToken []TokenId +// EncodedToken is a sequence of subword tokens ids +type EncodedToken []TokenID -type Rule struct { - left TokenId - right TokenId - result TokenId +type rule struct { + left TokenID + right TokenID + result TokenID } -type SpecialTokens struct { - unk TokenId - pad TokenId - bos TokenId - eos TokenId +type specialTokens struct { + unk int + pad int + bos int + eos int } +// Model is a Byte-Pair encoding model, which supports encoding and decoding text into sequences +// of most frequent subword tokens type Model struct { - char2id map[rune]TokenId - id2char map[TokenId]rune - rules []Rule - recipe map[TokenId]DecodedToken - revRecipe map[string]TokenId - specialTokens SpecialTokens + char2id map[rune]TokenID + id2char map[TokenID]rune + rules []rule + recipe map[TokenID]EncodedToken + revRecipe map[string]TokenID + specialTokens specialTokens } -func NewModel(nRules int) *Model { +func newModel(nRules int) *Model { return &Model{ - make(map[rune]TokenId), - make(map[TokenId]rune), - make([]Rule, nRules), - make(map[TokenId]DecodedToken), - make(map[string]TokenId), - SpecialTokens{-1, -1, -1, -1}, + make(map[rune]TokenID), + make(map[TokenID]rune), + make([]rule, nRules), + make(map[TokenID]EncodedToken), + make(map[string]TokenID), + specialTokens{-1, -1, -1, -1}, } } -func DecodedTokenToString(token DecodedToken, id2char map[TokenId]rune) (string, error) { +// DecodeToken converts the sequence of chars' ids into the string - +// sequence of the corresponding chars +func DecodeToken(token EncodedToken, id2char map[TokenID]rune) (string, error) { word := "" for _, id := range token { if char, ok := id2char[id]; ok { @@ -57,7 +63,8 @@ func DecodedTokenToString(token DecodedToken, id2char map[TokenId]rune) (string, return word, nil } -func ReadModel(reader io.Reader) (*Model, error) { +// ReadModelFromText loads the BPE model from the text dump +func ReadModelFromText(reader io.Reader) (*Model, error) { scanner := bufio.NewScanner(reader) var nChars, nRules int scanner.Scan() @@ -66,24 +73,23 @@ func ReadModel(reader io.Reader) (*Model, error) { logrus.Fatal("Wrong input format: ", err) return &Model{}, err } - model := NewModel(nRules) - model.rules = make([]Rule, nRules) + model := newModel(nRules) for i := 0; i < nChars; i++ { var char rune - var charId TokenId + var charID TokenID scanner.Scan() - _, err = fmt.Sscanf(scanner.Text(), "%d %d", &char, &charId) + _, err = fmt.Sscanf(scanner.Text(), "%d %d", &char, &charID) if err != nil { logrus.Fatal("Wrong input format: ", err) return model, err } - model.char2id[char] = charId - model.id2char[charId] = char - model.recipe[charId] = DecodedToken{charId} - model.revRecipe[string(char)] = charId + model.char2id[char] = charID + model.id2char[charID] = char + model.recipe[charID] = EncodedToken{charID} + model.revRecipe[string(char)] = charID } for i := 0; i < nRules; i++ { - var rule Rule + var rule rule scanner.Scan() _, err = fmt.Sscanf(scanner.Text(), "%d %d %d", &rule.left, &rule.right, &rule.result) if err != nil { @@ -92,7 +98,7 @@ func ReadModel(reader io.Reader) (*Model, error) { } model.rules[i] = rule model.recipe[rule.result] = append(model.recipe[rule.left], model.recipe[rule.right]...) - resultString, err := DecodedTokenToString(model.recipe[rule.result], model.id2char) + resultString, err := DecodeToken(model.recipe[rule.result], model.id2char) if err != nil { logrus.Fatal("Unexpected token id inside the rules: ", err) return model, err diff --git a/bpe_test.go b/bpe_test.go index 0facad9..659fc71 100644 --- a/bpe_test.go +++ b/bpe_test.go @@ -1,19 +1,20 @@ package bpe import ( - "github.com/stretchr/testify/require" "strings" "testing" + + "github.com/stretchr/testify/require" ) func TestNewModel(t *testing.T) { - model := NewModel(10) + model := newModel(10) require.Equal(t, 10, len(model.rules)) } func TestDecodedTokenToString(t *testing.T) { - id2char := map[TokenId]rune{1: []rune("a")[0], 2: []rune("b")[0], 3: []rune("c")[0]} - word, err := DecodedTokenToString(DecodedToken{1, 2, 1, 3, 3}, id2char) + id2char := map[TokenID]rune{1: []rune("a")[0], 2: []rune("b")[0], 3: []rune("c")[0]} + word, err := DecodeToken(EncodedToken{1, 2, 1, 3, 3}, id2char) require.NoError(t, err) require.Equal(t, "abacc", word) } @@ -31,14 +32,14 @@ func TestReadModel(t *testing.T) { 4 7 12 1 0 2 4`) expected := Model{ - map[rune]TokenId{97: 8, 98: 7, 99: 6, 100: 5, 95: 4}, - map[TokenId]rune{4: 95, 5: 100, 6: 99, 7: 98, 8: 97}, - []Rule{{4, 8, 9}, {4, 6, 10}, {4, 5, 11}, {4, 7, 12}}, - map[TokenId]DecodedToken{4: {4}, 5: {5}, 6: {6}, 7: {7}, 8: {8}, 9: {4, 8}, 10: {4, 6}, 11: {4, 5}, 12: {4, 7}}, - map[string]TokenId{"a": 8, "b": 7, "c": 6, "d": 5, "_": 4, + map[rune]TokenID{97: 8, 98: 7, 99: 6, 100: 5, 95: 4}, + map[TokenID]rune{4: 95, 5: 100, 6: 99, 7: 98, 8: 97}, + []rule{{4, 8, 9}, {4, 6, 10}, {4, 5, 11}, {4, 7, 12}}, + map[TokenID]EncodedToken{4: {4}, 5: {5}, 6: {6}, 7: {7}, 8: {8}, 9: {4, 8}, 10: {4, 6}, 11: {4, 5}, 12: {4, 7}}, + map[string]TokenID{"a": 8, "b": 7, "c": 6, "d": 5, "_": 4, "_a": 9, "_b": 12, "_c": 10, "_d": 11}, - SpecialTokens{1, 0, 2, 4}, + specialTokens{1, 0, 2, 4}, } - model, _ := ReadModel(reader) + model, _ := ReadModelFromText(reader) require.Equal(t, expected, *model) } diff --git a/go.sum b/go.sum deleted file mode 100644 index 3f2d26f..0000000 --- a/go.sum +++ /dev/null @@ -1,21 +0,0 @@ -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk= -github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4= -github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= -github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= -github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= -golang.org/x/sys v0.0.0-20190422165155-953cdadca894 h1:Cz4ceDQGXuKRnVBDTS23GTn/pU5OE2C0WrNTOYK1Uuc= -golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/main.go b/main.go deleted file mode 100644 index a779181..0000000 --- a/main.go +++ /dev/null @@ -1,7 +0,0 @@ -package bpe - -import "fmt" - -func main() { - fmt.Printf("Package for applying BPE") -} From 4dfc608a996a5d8031373bc59af27a5001ffefab Mon Sep 17 00:00:00 2001 From: Irina Khismatullina Date: Thu, 17 Oct 2019 16:41:23 +0300 Subject: [PATCH 3/5] Add model loading from binary Signed-off-by: Irina Khismatullina --- bpe.go | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++-- bpe_test.go | 60 +++++++++++++++++++++++++++-- 2 files changed, 161 insertions(+), 8 deletions(-) diff --git a/bpe.go b/bpe.go index 8552576..da09919 100644 --- a/bpe.go +++ b/bpe.go @@ -2,6 +2,7 @@ package bpe import ( "bufio" + "encoding/binary" "fmt" "io" @@ -21,10 +22,10 @@ type rule struct { } type specialTokens struct { - unk int - pad int - bos int - eos int + unk int32 + pad int32 + bos int32 + eos int32 } // Model is a Byte-Pair encoding model, which supports encoding and decoding text into sequences @@ -114,3 +115,103 @@ func ReadModelFromText(reader io.Reader) (*Model, error) { } return model, nil } + +func specialTokensToBin(specials specialTokens) []byte { + bytesArray := make([]byte, 16) + binary.BigEndian.PutUint32(bytesArray, uint32(specials.unk)) + binary.BigEndian.PutUint32(bytesArray[4:], uint32(specials.pad)) + binary.BigEndian.PutUint32(bytesArray[8:], uint32(specials.bos)) + binary.BigEndian.PutUint32(bytesArray[12:], uint32(specials.eos)) + return bytesArray +} + +func binToSpecialTokens(bytesArray []byte) specialTokens { + var s specialTokens + s.unk = int32(binary.BigEndian.Uint32(bytesArray)) + s.pad = int32(binary.BigEndian.Uint32(bytesArray[4:])) + s.bos = int32(binary.BigEndian.Uint32(bytesArray[8:])) + s.eos = int32(binary.BigEndian.Uint32(bytesArray[12:])) + return s +} + +func ruleToBin(rule rule) []byte { + bytesArray := make([]byte, 12) + binary.BigEndian.PutUint32(bytesArray, uint32(rule.left)) + binary.BigEndian.PutUint32(bytesArray[4:], uint32(rule.right)) + binary.BigEndian.PutUint32(bytesArray[8:], uint32(rule.result)) + return bytesArray +} + +func binToRule(bytesArray []byte) rule { + var r rule + r.left = TokenID(binary.BigEndian.Uint32(bytesArray)) + r.right = TokenID(binary.BigEndian.Uint32(bytesArray[4:])) + r.result = TokenID(binary.BigEndian.Uint32(bytesArray[8:])) + return r +} + +// ReadModelFromBinary loads the BPE model from the binary dump +func ReadModelFromBinary(reader io.Reader) (*Model, error) { + bytesReader := bufio.NewReader(reader) + buf := make([]byte, 4) + var nChars, nRules int + _, err := bytesReader.Read(buf) + if err != nil { + logrus.Fatal("Broken input: ", err) + return &Model{}, err + } + nChars = int(binary.BigEndian.Uint32(buf)) + _, err = bytesReader.Read(buf) + if err != nil { + logrus.Fatal("Broken input: ", err) + return &Model{}, err + } + nRules = int(binary.BigEndian.Uint32(buf)) + + model := newModel(nRules) + for i := 0; i < nChars; i++ { + var char rune + var charID TokenID + _, err = bytesReader.Read(buf) + if err != nil { + logrus.Fatal("Broken input: ", err) + return &Model{}, err + } + char = rune(binary.BigEndian.Uint32(buf)) + _, err = bytesReader.Read(buf) + if err != nil { + logrus.Fatal("Broken input: ", err) + return &Model{}, err + } + charID = TokenID(binary.BigEndian.Uint32(buf)) + model.char2id[char] = charID + model.id2char[charID] = char + model.recipe[charID] = EncodedToken{charID} + model.revRecipe[string(char)] = charID + } + ruleBuf := make([]byte, 12) + for i := 0; i < nRules; i++ { + _, err = bytesReader.Read(ruleBuf) + if err != nil { + logrus.Fatal("Broken input: ", err) + return &Model{}, err + } + rule := binToRule(ruleBuf) + model.rules[i] = rule + model.recipe[rule.result] = append(model.recipe[rule.left], model.recipe[rule.right]...) + resultString, err := DecodeToken(model.recipe[rule.result], model.id2char) + if err != nil { + logrus.Fatal("Unexpected token id inside the rules: ", err) + return model, err + } + model.revRecipe[resultString] = rule.result + } + specialTokensBuf := make([]byte, 16) + _, err = bytesReader.Read(specialTokensBuf) + if err != nil { + logrus.Fatal("Broken input: ", err) + return &Model{}, err + } + model.specialTokens = binToSpecialTokens(specialTokensBuf) + return model, nil +} diff --git a/bpe_test.go b/bpe_test.go index 659fc71..e36a5f9 100644 --- a/bpe_test.go +++ b/bpe_test.go @@ -1,6 +1,7 @@ package bpe import ( + "bytes" "strings" "testing" @@ -19,7 +20,7 @@ func TestDecodedTokenToString(t *testing.T) { require.Equal(t, "abacc", word) } -func TestReadModel(t *testing.T) { +func TestReadModelFromText(t *testing.T) { reader := strings.NewReader(`5 4 99 6 98 7 @@ -30,7 +31,7 @@ func TestReadModel(t *testing.T) { 4 6 10 4 5 11 4 7 12 -1 0 2 4`) +1 0 2 3`) expected := Model{ map[rune]TokenID{97: 8, 98: 7, 99: 6, 100: 5, 95: 4}, map[TokenID]rune{4: 95, 5: 100, 6: 99, 7: 98, 8: 97}, @@ -38,8 +39,59 @@ func TestReadModel(t *testing.T) { map[TokenID]EncodedToken{4: {4}, 5: {5}, 6: {6}, 7: {7}, 8: {8}, 9: {4, 8}, 10: {4, 6}, 11: {4, 5}, 12: {4, 7}}, map[string]TokenID{"a": 8, "b": 7, "c": 6, "d": 5, "_": 4, "_a": 9, "_b": 12, "_c": 10, "_d": 11}, - specialTokens{1, 0, 2, 4}, + specialTokens{1, 0, 2, 3}, } - model, _ := ReadModelFromText(reader) + model, err := ReadModelFromText(reader) + require.NoError(t, err) + require.Equal(t, expected, *model) +} + +func TestSpecialTokensToBin(t *testing.T) { + specials := specialTokens{1, 259, 2*256*256 + 37*256 + 2, -256 * 256 * 256 * 127} + bytesArray := []byte{0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 37, 2, 129, 0, 0, 0} + require.Equal(t, bytesArray, specialTokensToBin(specials)) +} + +func TestBinToSpecialTokens(t *testing.T) { + bytesArray := []byte{0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 37, 2, 129, 0, 0, 0} + specials := specialTokens{1, 259, 2*256*256 + 37*256 + 2, -256 * 256 * 256 * 127} + require.Equal(t, specials, binToSpecialTokens(bytesArray)) +} + +func TestRuleToBin(t *testing.T) { + rule := rule{1, 2, 257} + bytesArray := []byte{0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 1, 1} + require.Equal(t, bytesArray, ruleToBin(rule)) +} + +func TestBinToRule(t *testing.T) { + rule := rule{1, 2, 257} + bytesArray := []byte{0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 1, 1} + require.Equal(t, rule, binToRule(bytesArray)) +} + +func TestReadModelFromBinary(t *testing.T) { + reader := bytes.NewReader([]byte{0, 0, 0, 5, 0, 0, 0, 4, + 0, 0, 0, 99, 0, 0, 0, 6, + 0, 0, 0, 98, 0, 0, 0, 7, + 0, 0, 0, 95, 0, 0, 0, 4, + 0, 0, 0, 100, 0, 0, 0, 5, + 0, 0, 0, 97, 0, 0, 0, 8, + 0, 0, 0, 4, 0, 0, 0, 8, 0, 0, 0, 9, + 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 10, + 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 11, + 0, 0, 0, 4, 0, 0, 0, 7, 0, 0, 0, 12, + 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3}) + expected := Model{ + map[rune]TokenID{97: 8, 98: 7, 99: 6, 100: 5, 95: 4}, + map[TokenID]rune{4: 95, 5: 100, 6: 99, 7: 98, 8: 97}, + []rule{{4, 8, 9}, {4, 6, 10}, {4, 5, 11}, {4, 7, 12}}, + map[TokenID]EncodedToken{4: {4}, 5: {5}, 6: {6}, 7: {7}, 8: {8}, 9: {4, 8}, 10: {4, 6}, 11: {4, 5}, 12: {4, 7}}, + map[string]TokenID{"a": 8, "b": 7, "c": 6, "d": 5, "_": 4, + "_a": 9, "_b": 12, "_c": 10, "_d": 11}, + specialTokens{1, 0, 2, 3}, + } + model, err := ReadModelFromBinary(reader) + require.NoError(t, err) require.Equal(t, expected, *model) } From 9bbc64d140540556aa1f1b393ff4a25202b34d45 Mon Sep 17 00:00:00 2001 From: Irina Khismatullina Date: Thu, 17 Oct 2019 16:43:01 +0300 Subject: [PATCH 4/5] Remove text model dump support Signed-off-by: Irina Khismatullina --- bpe.go | 53 ----------------------------------------------------- bpe_test.go | 27 --------------------------- 2 files changed, 80 deletions(-) diff --git a/bpe.go b/bpe.go index da09919..0af95ba 100644 --- a/bpe.go +++ b/bpe.go @@ -3,7 +3,6 @@ package bpe import ( "bufio" "encoding/binary" - "fmt" "io" "github.com/sirupsen/logrus" @@ -64,58 +63,6 @@ func DecodeToken(token EncodedToken, id2char map[TokenID]rune) (string, error) { return word, nil } -// ReadModelFromText loads the BPE model from the text dump -func ReadModelFromText(reader io.Reader) (*Model, error) { - scanner := bufio.NewScanner(reader) - var nChars, nRules int - scanner.Scan() - _, err := fmt.Sscanf(scanner.Text(), "%d %d", &nChars, &nRules) - if err != nil { - logrus.Fatal("Wrong input format: ", err) - return &Model{}, err - } - model := newModel(nRules) - for i := 0; i < nChars; i++ { - var char rune - var charID TokenID - scanner.Scan() - _, err = fmt.Sscanf(scanner.Text(), "%d %d", &char, &charID) - if err != nil { - logrus.Fatal("Wrong input format: ", err) - return model, err - } - model.char2id[char] = charID - model.id2char[charID] = char - model.recipe[charID] = EncodedToken{charID} - model.revRecipe[string(char)] = charID - } - for i := 0; i < nRules; i++ { - var rule rule - scanner.Scan() - _, err = fmt.Sscanf(scanner.Text(), "%d %d %d", &rule.left, &rule.right, &rule.result) - if err != nil { - logrus.Fatal("Wrong input format: ", err) - return model, err - } - model.rules[i] = rule - model.recipe[rule.result] = append(model.recipe[rule.left], model.recipe[rule.right]...) - resultString, err := DecodeToken(model.recipe[rule.result], model.id2char) - if err != nil { - logrus.Fatal("Unexpected token id inside the rules: ", err) - return model, err - } - model.revRecipe[resultString] = rule.result - } - scanner.Scan() - _, err = fmt.Sscanf(scanner.Text(), "%d %d %d %d", &model.specialTokens.unk, - &model.specialTokens.pad, &model.specialTokens.bos, &model.specialTokens.eos) - if err != nil { - logrus.Fatal("Wrong input format: ", err) - return model, err - } - return model, nil -} - func specialTokensToBin(specials specialTokens) []byte { bytesArray := make([]byte, 16) binary.BigEndian.PutUint32(bytesArray, uint32(specials.unk)) diff --git a/bpe_test.go b/bpe_test.go index e36a5f9..1a441bc 100644 --- a/bpe_test.go +++ b/bpe_test.go @@ -2,7 +2,6 @@ package bpe import ( "bytes" - "strings" "testing" "github.com/stretchr/testify/require" @@ -20,32 +19,6 @@ func TestDecodedTokenToString(t *testing.T) { require.Equal(t, "abacc", word) } -func TestReadModelFromText(t *testing.T) { - reader := strings.NewReader(`5 4 -99 6 -98 7 -95 4 -100 5 -97 8 -4 8 9 -4 6 10 -4 5 11 -4 7 12 -1 0 2 3`) - expected := Model{ - map[rune]TokenID{97: 8, 98: 7, 99: 6, 100: 5, 95: 4}, - map[TokenID]rune{4: 95, 5: 100, 6: 99, 7: 98, 8: 97}, - []rule{{4, 8, 9}, {4, 6, 10}, {4, 5, 11}, {4, 7, 12}}, - map[TokenID]EncodedToken{4: {4}, 5: {5}, 6: {6}, 7: {7}, 8: {8}, 9: {4, 8}, 10: {4, 6}, 11: {4, 5}, 12: {4, 7}}, - map[string]TokenID{"a": 8, "b": 7, "c": 6, "d": 5, "_": 4, - "_a": 9, "_b": 12, "_c": 10, "_d": 11}, - specialTokens{1, 0, 2, 3}, - } - model, err := ReadModelFromText(reader) - require.NoError(t, err) - require.Equal(t, expected, *model) -} - func TestSpecialTokensToBin(t *testing.T) { specials := specialTokens{1, 259, 2*256*256 + 37*256 + 2, -256 * 256 * 256 * 127} bytesArray := []byte{0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 37, 2, 129, 0, 0, 0} From 5a79713666b7b31ad37e737b3f9e38914dd9a51b Mon Sep 17 00:00:00 2001 From: Irina Khismatullina Date: Tue, 22 Oct 2019 11:47:45 +0300 Subject: [PATCH 5/5] Expand tests Signed-off-by: Irina Khismatullina --- .travis.yml | 3 +- bpe.go | 94 ++++++++++++++++++++++++++++++----------------------- bpe_test.go | 87 +++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 130 insertions(+), 54 deletions(-) diff --git a/.travis.yml b/.travis.yml index 788ee8e..92b448d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,7 +16,8 @@ install: script: - make install-dev-deps - make check-style - - make test + - make test-coverage + - make codecov matrix: fast_finish: true diff --git a/bpe.go b/bpe.go index 0af95ba..aca81fd 100644 --- a/bpe.go +++ b/bpe.go @@ -1,8 +1,8 @@ package bpe import ( - "bufio" "encoding/binary" + "errors" "io" "github.com/sirupsen/logrus" @@ -57,60 +57,66 @@ func DecodeToken(token EncodedToken, id2char map[TokenID]rune) (string, error) { if char, ok := id2char[id]; ok { word = word + string(char) } else { - logrus.Fatalf("%d key not found in id2char", id) + logrus.Errorf("Decode failure: %d token id has no corresponding char", id) + return "", errors.New("key not found in id2char") } } return word, nil } -func specialTokensToBin(specials specialTokens) []byte { +func (s specialTokens) toBinary() []byte { bytesArray := make([]byte, 16) - binary.BigEndian.PutUint32(bytesArray, uint32(specials.unk)) - binary.BigEndian.PutUint32(bytesArray[4:], uint32(specials.pad)) - binary.BigEndian.PutUint32(bytesArray[8:], uint32(specials.bos)) - binary.BigEndian.PutUint32(bytesArray[12:], uint32(specials.eos)) + binary.BigEndian.PutUint32(bytesArray, uint32(s.unk)) + binary.BigEndian.PutUint32(bytesArray[4:], uint32(s.pad)) + binary.BigEndian.PutUint32(bytesArray[8:], uint32(s.bos)) + binary.BigEndian.PutUint32(bytesArray[12:], uint32(s.eos)) return bytesArray } -func binToSpecialTokens(bytesArray []byte) specialTokens { +func binaryToSpecialTokens(bytesArray []byte) (specialTokens, error) { var s specialTokens + if len(bytesArray) < 16 { + logrus.Error("Bytes array length is too small") + return s, errors.New("bytes array is too small") + } s.unk = int32(binary.BigEndian.Uint32(bytesArray)) s.pad = int32(binary.BigEndian.Uint32(bytesArray[4:])) s.bos = int32(binary.BigEndian.Uint32(bytesArray[8:])) s.eos = int32(binary.BigEndian.Uint32(bytesArray[12:])) - return s + return s, nil } -func ruleToBin(rule rule) []byte { +func (r rule) toBinary() []byte { bytesArray := make([]byte, 12) - binary.BigEndian.PutUint32(bytesArray, uint32(rule.left)) - binary.BigEndian.PutUint32(bytesArray[4:], uint32(rule.right)) - binary.BigEndian.PutUint32(bytesArray[8:], uint32(rule.result)) + binary.BigEndian.PutUint32(bytesArray, uint32(r.left)) + binary.BigEndian.PutUint32(bytesArray[4:], uint32(r.right)) + binary.BigEndian.PutUint32(bytesArray[8:], uint32(r.result)) return bytesArray } -func binToRule(bytesArray []byte) rule { +func binaryToRule(bytesArray []byte) (rule, error) { var r rule + if len(bytesArray) < 12 { + logrus.Error("Bytes array length is too small") + return r, errors.New("bytes array is too small") + } r.left = TokenID(binary.BigEndian.Uint32(bytesArray)) r.right = TokenID(binary.BigEndian.Uint32(bytesArray[4:])) r.result = TokenID(binary.BigEndian.Uint32(bytesArray[8:])) - return r + return r, nil } -// ReadModelFromBinary loads the BPE model from the binary dump -func ReadModelFromBinary(reader io.Reader) (*Model, error) { - bytesReader := bufio.NewReader(reader) +// ReadModel loads the BPE model from the binary dump +func ReadModel(reader io.Reader) (*Model, error) { buf := make([]byte, 4) var nChars, nRules int - _, err := bytesReader.Read(buf) - if err != nil { - logrus.Fatal("Broken input: ", err) + if _, err := io.ReadFull(reader, buf); err != nil { + logrus.Error("Broken input: ", err) return &Model{}, err } nChars = int(binary.BigEndian.Uint32(buf)) - _, err = bytesReader.Read(buf) - if err != nil { - logrus.Fatal("Broken input: ", err) + if _, err := io.ReadFull(reader, buf); err != nil { + logrus.Error("Broken input: ", err) return &Model{}, err } nRules = int(binary.BigEndian.Uint32(buf)) @@ -119,15 +125,13 @@ func ReadModelFromBinary(reader io.Reader) (*Model, error) { for i := 0; i < nChars; i++ { var char rune var charID TokenID - _, err = bytesReader.Read(buf) - if err != nil { - logrus.Fatal("Broken input: ", err) + if _, err := io.ReadFull(reader, buf); err != nil { + logrus.Error("Broken input: ", err) return &Model{}, err } char = rune(binary.BigEndian.Uint32(buf)) - _, err = bytesReader.Read(buf) - if err != nil { - logrus.Fatal("Broken input: ", err) + if _, err := io.ReadFull(reader, buf); err != nil { + logrus.Error("Broken input: ", err) return &Model{}, err } charID = TokenID(binary.BigEndian.Uint32(buf)) @@ -138,27 +142,37 @@ func ReadModelFromBinary(reader io.Reader) (*Model, error) { } ruleBuf := make([]byte, 12) for i := 0; i < nRules; i++ { - _, err = bytesReader.Read(ruleBuf) - if err != nil { - logrus.Fatal("Broken input: ", err) + if _, err := io.ReadFull(reader, ruleBuf); err != nil { + logrus.Error("Broken input: ", err) return &Model{}, err } - rule := binToRule(ruleBuf) + rule, err := binaryToRule(ruleBuf) + if err != nil { + return model, err + } model.rules[i] = rule + if _, ok := model.recipe[rule.left]; !ok { + logrus.Errorf("%d: token id not described before", rule.left) + return model, errors.New("key not found in id2char") + } + if _, ok := model.recipe[rule.right]; !ok { + logrus.Errorf("%d: token id not described before", rule.right) + return model, errors.New("key not found in id2char") + } model.recipe[rule.result] = append(model.recipe[rule.left], model.recipe[rule.right]...) resultString, err := DecodeToken(model.recipe[rule.result], model.id2char) if err != nil { - logrus.Fatal("Unexpected token id inside the rules: ", err) + logrus.Error("Unexpected token id inside the rules: ", err) return model, err } model.revRecipe[resultString] = rule.result } specialTokensBuf := make([]byte, 16) - _, err = bytesReader.Read(specialTokensBuf) - if err != nil { - logrus.Fatal("Broken input: ", err) + if _, err := io.ReadFull(reader, specialTokensBuf); err != nil { + logrus.Error("Broken input: ", err) return &Model{}, err } - model.specialTokens = binToSpecialTokens(specialTokensBuf) - return model, nil + specials, err := binaryToSpecialTokens(specialTokensBuf) + model.specialTokens = specials + return model, err } diff --git a/bpe_test.go b/bpe_test.go index 1a441bc..e69398b 100644 --- a/bpe_test.go +++ b/bpe_test.go @@ -12,38 +12,54 @@ func TestNewModel(t *testing.T) { require.Equal(t, 10, len(model.rules)) } -func TestDecodedTokenToString(t *testing.T) { +func TestDecodeToken(t *testing.T) { id2char := map[TokenID]rune{1: []rune("a")[0], 2: []rune("b")[0], 3: []rune("c")[0]} word, err := DecodeToken(EncodedToken{1, 2, 1, 3, 3}, id2char) require.NoError(t, err) require.Equal(t, "abacc", word) } -func TestSpecialTokensToBin(t *testing.T) { +func TestSpecialTokensToBinary(t *testing.T) { specials := specialTokens{1, 259, 2*256*256 + 37*256 + 2, -256 * 256 * 256 * 127} bytesArray := []byte{0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 37, 2, 129, 0, 0, 0} - require.Equal(t, bytesArray, specialTokensToBin(specials)) + require.Equal(t, bytesArray, specials.toBinary()) } -func TestBinToSpecialTokens(t *testing.T) { +func TestBinaryToSpecialTokens(t *testing.T) { bytesArray := []byte{0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 37, 2, 129, 0, 0, 0} - specials := specialTokens{1, 259, 2*256*256 + 37*256 + 2, -256 * 256 * 256 * 127} - require.Equal(t, specials, binToSpecialTokens(bytesArray)) + expected := specialTokens{1, 259, 2*256*256 + 37*256 + 2, -256 * 256 * 256 * 127} + specials, err := binaryToSpecialTokens(bytesArray) + require.NoError(t, err) + require.Equal(t, expected, specials) + bytesArray = []byte{0, 0, 0, 1, 0, 0, 1, 3, 0, 2, 37, 2, 129, 0, 0} + specials, err = binaryToSpecialTokens(bytesArray) + require.Error(t, err) + bytesArray = []byte{} + specials, err = binaryToSpecialTokens(bytesArray) + require.Error(t, err) } -func TestRuleToBin(t *testing.T) { +func TestRuleToBinary(t *testing.T) { rule := rule{1, 2, 257} bytesArray := []byte{0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 1, 1} - require.Equal(t, bytesArray, ruleToBin(rule)) + require.Equal(t, bytesArray, rule.toBinary()) } -func TestBinToRule(t *testing.T) { - rule := rule{1, 2, 257} +func TestBinaryToRule(t *testing.T) { + expected := rule{1, 2, 257} bytesArray := []byte{0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 1, 1} - require.Equal(t, rule, binToRule(bytesArray)) + rule, err := binaryToRule(bytesArray) + require.NoError(t, err) + require.Equal(t, expected, rule) + bytesArray = []byte{0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 1} + rule, err = binaryToRule(bytesArray) + require.Error(t, err) + bytesArray = []byte{} + rule, err = binaryToRule(bytesArray) + require.Error(t, err) } -func TestReadModelFromBinary(t *testing.T) { +func TestReadModel(t *testing.T) { reader := bytes.NewReader([]byte{0, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 99, 0, 0, 0, 6, 0, 0, 0, 98, 0, 0, 0, 7, @@ -64,7 +80,52 @@ func TestReadModelFromBinary(t *testing.T) { "_a": 9, "_b": 12, "_c": 10, "_d": 11}, specialTokens{1, 0, 2, 3}, } - model, err := ReadModelFromBinary(reader) + model, err := ReadModel(reader) require.NoError(t, err) require.Equal(t, expected, *model) + + reader = bytes.NewReader([]byte{0, 0, 0, 5, 0, 0, 0, 4, + 0, 0, 0, 99, 0, 0, 0, 6, + 0, 0, 0, 98, 0, 0, 0, 7, + 0, 0, 0, 95, 0, 0, 0, 4, + 0, 0, 0, 100, 0, 0, 0, 5, + 0, 0, 0, 97, 0, 0, 0, 8, + 0, 0, 0, 4, 0, 0, 0, 8, 0, 0, 0, 9, + 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 10, + 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 11, + 0, 0, 0, 4, 0, 0, 0, 7, 0, 0, 0, 12, + 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, + 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 11, + 0, 0, 0, 4, 0, 0, 0, 7, 0, 0, 0, 12}) + model, err = ReadModel(reader) + require.NoError(t, err) + require.Equal(t, expected, *model) + + reader = bytes.NewReader([]byte{0, 0, 0, 5, 0, 0, 0, 4, + 0, 0, 0, 99, 0, 0, 0, 6, + 0, 0, 0, 98, 0, 0, 0, 7, + 0, 0, 0, 95, 0, 0, 0, 4, + 0, 0, 0, 100, 0, 0, 0, 5, + 0, 0, 0, 97, 0, 0, 0, 8, + 0, 0, 0, 4, 0, 0, 0, 8, 0, 0, 0, 9, + 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 10, + 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 11, + 0, 0, 0, 4, 0, 0, 0, 7, 0, 0, 0, 12, + 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0}) + model, err = ReadModel(reader) + require.Error(t, err) + + reader = bytes.NewReader([]byte{0, 0, 0, 5, 0, 0, 0, 4, + 0, 0, 0, 99, 0, 0, 0, 6, + 0, 0, 0, 98, 0, 0, 0, 7, + 0, 0, 0, 95, 0, 0, 0, 4, + 0, 0, 0, 100, 0, 0, 0, 5, + 0, 0, 0, 97, 0, 0, 0, 8, + 0, 0, 0, 4, 0, 0, 0, 20, 0, 0, 0, 9, + 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 10, + 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 11, + 0, 0, 0, 4, 0, 0, 0, 7, 0, 0, 0, 12, + 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3}) + model, err = ReadModel(reader) + require.Error(t, err) }