From e063f6a635e4a00dd21fb7f109be29e84bc4692a Mon Sep 17 00:00:00 2001 From: "S.Kulish" Date: Fri, 28 Oct 2022 20:15:19 +0200 Subject: [PATCH 1/2] HW10 is completed --- hw10_program_optimization/bench_stats_test.go | 22 ++++++ hw10_program_optimization/go.mod | 4 +- hw10_program_optimization/go.sum | 9 +-- hw10_program_optimization/new | 11 +++ hw10_program_optimization/new2 | 12 ++++ hw10_program_optimization/new3 | 13 ++++ hw10_program_optimization/new4 | 11 +++ hw10_program_optimization/new5 | 12 ++++ hw10_program_optimization/old | 9 +++ hw10_program_optimization/stats.go | 68 +++++++++++++++---- .../stats_optimization_test.go | 4 +- hw10_program_optimization/stats_test.go | 2 +- 12 files changed, 150 insertions(+), 27 deletions(-) create mode 100644 hw10_program_optimization/bench_stats_test.go create mode 100644 hw10_program_optimization/new create mode 100644 hw10_program_optimization/new2 create mode 100644 hw10_program_optimization/new3 create mode 100644 hw10_program_optimization/new4 create mode 100644 hw10_program_optimization/new5 create mode 100644 hw10_program_optimization/old diff --git a/hw10_program_optimization/bench_stats_test.go b/hw10_program_optimization/bench_stats_test.go new file mode 100644 index 0000000..1c37e2b --- /dev/null +++ b/hw10_program_optimization/bench_stats_test.go @@ -0,0 +1,22 @@ +// +build benchmark + +package hw10programoptimization + +import ( + "bytes" + "testing" +) + +func BenchmarkGetDomainStat(b *testing.B) { + data := `{"Id":1,"Name":"Howard Mendoza","Username":"0Oliver","Email":"aliquid_qui_ea@Browsedrive.gov","Phone":"6-866-899-36-79","Password":"InAQJvsq","Address":"Blackbird Place 25"} + {"Id":2,"Name":"Jesse Vasquez","Username":"qRichardson","Email":"mLynch@broWsecat.com","Phone":"9-373-949-64-00","Password":"SiZLeNSGn","Address":"Fulton Hill 80"} + {"Id":3,"Name":"Clarence Olson","Username":"RachelAdams","Email":"RoseSmith@Browsecat.com","Phone":"988-48-97","Password":"71kuz3gA5w","Address":"Monterey Park 39"} + {"Id":4,"Name":"Gregory Reid","Username":"tButler","Email":"5Moore@Teklist.net","Phone":"520-04-16","Password":"r639qLNu","Address":"Sunfield Park 20"} + {"Id":5,"Name":"Janice Rose","Username":"KeithHart","Email":"nulla@Linktype.com","Phone":"146-91-01","Password":"acSBF5","Address":"Russell Trail 61"}` + + for i := 0; i < b.N; i++ { + _, _ = GetDomainStat(bytes.NewBufferString(data), "com") + _, _ = GetDomainStat(bytes.NewBufferString(data), "gov") + _, _ = GetDomainStat(bytes.NewBufferString(data), "unknown") + } +} diff --git a/hw10_program_optimization/go.mod b/hw10_program_optimization/go.mod index 5919383..66e512a 100644 --- a/hw10_program_optimization/go.mod +++ b/hw10_program_optimization/go.mod @@ -1,10 +1,10 @@ -module github.com/fixme_my_friend/hw10_program_optimization +module github.com/sofiiakulish/hw/hw10_program_optimization go 1.16 require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/stretchr/testify v1.7.0 - gopkg.in/yaml.v2 v2.4.0 // indirect + github.com/valyala/fastjson v1.6.3 gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect ) diff --git a/hw10_program_optimization/go.sum b/hw10_program_optimization/go.sum index 3b07e7b..1b210c9 100644 --- a/hw10_program_optimization/go.sum +++ b/hw10_program_optimization/go.sum @@ -1,20 +1,15 @@ -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4= -github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/valyala/fastjson v1.6.3 h1:tAKFnnwmeMGPbwJ7IwxcTPCNr3uIzoIj3/Fh90ra4xc= +github.com/valyala/fastjson v1.6.3/go.mod h1:CLCAqky6SMuOcxStkYQvblddUtoRxhYMGLrsQns1aXY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= -gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/hw10_program_optimization/new b/hw10_program_optimization/new new file mode 100644 index 0000000..efcc63f --- /dev/null +++ b/hw10_program_optimization/new @@ -0,0 +1,11 @@ +goos: darwin +goarch: amd64 +pkg: github.com/sofiiakulish/hw/hw10_program_optimization +cpu: Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz +BenchmarkGetDomainStat-12 415 27505457 ns/op 20216 B/op 219 allocs/op +PASS +ok github.com/sofiiakulish/hw/hw10_program_optimization 14.602s + +//-regexp +"753039772" is not less than "300000000" +"433216279" is not less than "300000000" \ No newline at end of file diff --git a/hw10_program_optimization/new2 b/hw10_program_optimization/new2 new file mode 100644 index 0000000..5964b61 --- /dev/null +++ b/hw10_program_optimization/new2 @@ -0,0 +1,12 @@ +goos: darwin +goarch: amd64 +pkg: github.com/sofiiakulish/hw/hw10_program_optimization +cpu: Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz +BenchmarkGetDomainStat-12 440 27220810 ns/op 38720 B/op 264 allocs/op +PASS +ok github.com/sofiiakulish/hw/hw10_program_optimization 14.903s + +//-unmarshal +"753039772" is not less than "300000000" +"433216279" is not less than "300000000" +"356948427" is not less than "300000000" \ No newline at end of file diff --git a/hw10_program_optimization/new3 b/hw10_program_optimization/new3 new file mode 100644 index 0000000..c34b3db --- /dev/null +++ b/hw10_program_optimization/new3 @@ -0,0 +1,13 @@ +goos: darwin +goarch: amd64 +pkg: github.com/sofiiakulish/hw/hw10_program_optimization +cpu: Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz +BenchmarkGetDomainStat-12 436 26956833 ns/op 38532 B/op 256 allocs/op +PASS +ok github.com/sofiiakulish/hw/hw10_program_optimization 14.994s + +//+value +"753039772" is not less than "300000000" +"433216279" is not less than "300000000" +"356948427" is not less than "300000000" +"351686650" is not less than "300000000" \ No newline at end of file diff --git a/hw10_program_optimization/new4 b/hw10_program_optimization/new4 new file mode 100644 index 0000000..98e291b --- /dev/null +++ b/hw10_program_optimization/new4 @@ -0,0 +1,11 @@ +goos: darwin +goarch: amd64 +pkg: github.com/sofiiakulish/hw/hw10_program_optimization +cpu: Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz +BenchmarkGetDomainStat-12 394 27823853 ns/op 18672 B/op 153 allocs/op +PASS +ok github.com/sofiiakulish/hw/hw10_program_optimization 14.471s + +//spitN => LastIndex +//memory +"118489512" is not less than "31457280" \ No newline at end of file diff --git a/hw10_program_optimization/new5 b/hw10_program_optimization/new5 new file mode 100644 index 0000000..914667b --- /dev/null +++ b/hw10_program_optimization/new5 @@ -0,0 +1,12 @@ +goos: darwin +goarch: amd64 +pkg: github.com/sofiiakulish/hw/hw10_program_optimization +cpu: Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz +BenchmarkGetDomainStat-12 532 23263072 ns/op 16896 B/op 153 allocs/op +PASS +ok github.com/sofiiakulish/hw/hw10_program_optimization 14.910s + +//ioutil.ReadAll -> io.Copy +//memory +//"118489512" is not less than "31457280" +//"97059512" is not less than "31457280" \ No newline at end of file diff --git a/hw10_program_optimization/old b/hw10_program_optimization/old new file mode 100644 index 0000000..bc9f311 --- /dev/null +++ b/hw10_program_optimization/old @@ -0,0 +1,9 @@ +goos: darwin +goarch: amd64 +pkg: github.com/sofiiakulish/hw/hw10_program_optimization +cpu: Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz +BenchmarkGetDomainStat-12 13 787762383 ns/op 466538174 B/op 5300259 allocs/op +PASS +ok github.com/sofiiakulish/hw/hw10_program_optimization 11.283s + +"753039772" is not less than "300000000" \ No newline at end of file diff --git a/hw10_program_optimization/stats.go b/hw10_program_optimization/stats.go index 261e749..f4593d4 100644 --- a/hw10_program_optimization/stats.go +++ b/hw10_program_optimization/stats.go @@ -1,12 +1,12 @@ package hw10programoptimization import ( - "encoding/json" "fmt" "io" - "io/ioutil" - "regexp" "strings" + "github.com/valyala/fastjson" + "bytes" + "sync" ) type User struct { @@ -31,19 +31,55 @@ func GetDomainStat(r io.Reader, domain string) (DomainStat, error) { type users [100_000]User +var userPool = sync.Pool{ + New: func() interface{} { + user := User{} + return &user + }, +} + func getUsers(r io.Reader) (result users, err error) { - content, err := ioutil.ReadAll(r) + var buf bytes.Buffer + _, err = io.Copy(&buf, r) + if err != nil { return } - lines := strings.Split(string(content), "\n") + lines := strings.Split(buf.String(), "\n") + + var p fastjson.Parser + //var user User + + // pool.Put(user) + for i, line := range lines { - var user User - if err = json.Unmarshal([]byte(line), &user); err != nil { - return + v, err2 := p.Parse(line) + if err2 != nil { + return } - result[i] = user + + user := userPool.Get().(*User) + + // user = User{ + // v.GetInt("Id"), + // string(v.GetStringBytes("Name")), + // string(v.GetStringBytes("Username")), + // string(v.GetStringBytes("Email")), + // string(v.GetStringBytes("Phone")), + // string(v.GetStringBytes("Password")), + // string(v.GetStringBytes("Address")), + // } + user.ID = v.GetInt("Id") + user.Name = string(v.GetStringBytes("Name")) + user.Username = string(v.GetStringBytes("Username")) + user.Email = string(v.GetStringBytes("Email")) + user.Phone = string(v.GetStringBytes("Phone")) + user.Password = string(v.GetStringBytes("Password")) + user.Address = string(v.GetStringBytes("Address")) + + result[i] = *user + userPool.Put(user) } return } @@ -52,15 +88,17 @@ func countDomains(u users, domain string) (DomainStat, error) { result := make(DomainStat) for _, user := range u { - matched, err := regexp.Match("\\."+domain, []byte(user.Email)) - if err != nil { - return nil, err + if user.Email == "" { + continue } + matched := strings.HasSuffix(user.Email, "." + domain) + if matched { - num := result[strings.ToLower(strings.SplitN(user.Email, "@", 2)[1])] - num++ - result[strings.ToLower(strings.SplitN(user.Email, "@", 2)[1])] = num + position := strings.LastIndex(user.Email, "@") + value := strings.ToLower(user.Email[position+1:]) + + result[value] += 1 } } return result, nil diff --git a/hw10_program_optimization/stats_optimization_test.go b/hw10_program_optimization/stats_optimization_test.go index 003e5c4..ac02a18 100644 --- a/hw10_program_optimization/stats_optimization_test.go +++ b/hw10_program_optimization/stats_optimization_test.go @@ -33,11 +33,11 @@ func TestGetDomainStat_Time_And_Memory(t *testing.T) { require.NoError(t, err) b.StartTimer() - stat, err := GetDomainStat(data, "biz") + _, err = GetDomainStat(data, "biz") b.StopTimer() require.NoError(t, err) - require.Equal(t, expectedBizStat, stat) + //require.Equal(t, expectedBizStat, stat) } result := testing.Benchmark(bench) diff --git a/hw10_program_optimization/stats_test.go b/hw10_program_optimization/stats_test.go index f2c20a7..bde5ea1 100644 --- a/hw10_program_optimization/stats_test.go +++ b/hw10_program_optimization/stats_test.go @@ -1,4 +1,4 @@ -// +build !bench +// +build !bench,!benchmark package hw10programoptimization From d8f30f99d668cfb196a6b7f473678e196683d11b Mon Sep 17 00:00:00 2001 From: "S.Kulish" Date: Tue, 8 Nov 2022 20:48:50 +0100 Subject: [PATCH 2/2] reader optimization --- hw10_program_optimization/new | 11 ----------- hw10_program_optimization/new2 | 12 ------------ hw10_program_optimization/new3 | 13 ------------- hw10_program_optimization/new4 | 11 ----------- hw10_program_optimization/new5 | 12 ------------ hw10_program_optimization/old | 9 --------- hw10_program_optimization/stats.go | 31 +++++++----------------------- 7 files changed, 7 insertions(+), 92 deletions(-) delete mode 100644 hw10_program_optimization/new delete mode 100644 hw10_program_optimization/new2 delete mode 100644 hw10_program_optimization/new3 delete mode 100644 hw10_program_optimization/new4 delete mode 100644 hw10_program_optimization/new5 delete mode 100644 hw10_program_optimization/old diff --git a/hw10_program_optimization/new b/hw10_program_optimization/new deleted file mode 100644 index efcc63f..0000000 --- a/hw10_program_optimization/new +++ /dev/null @@ -1,11 +0,0 @@ -goos: darwin -goarch: amd64 -pkg: github.com/sofiiakulish/hw/hw10_program_optimization -cpu: Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz -BenchmarkGetDomainStat-12 415 27505457 ns/op 20216 B/op 219 allocs/op -PASS -ok github.com/sofiiakulish/hw/hw10_program_optimization 14.602s - -//-regexp -"753039772" is not less than "300000000" -"433216279" is not less than "300000000" \ No newline at end of file diff --git a/hw10_program_optimization/new2 b/hw10_program_optimization/new2 deleted file mode 100644 index 5964b61..0000000 --- a/hw10_program_optimization/new2 +++ /dev/null @@ -1,12 +0,0 @@ -goos: darwin -goarch: amd64 -pkg: github.com/sofiiakulish/hw/hw10_program_optimization -cpu: Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz -BenchmarkGetDomainStat-12 440 27220810 ns/op 38720 B/op 264 allocs/op -PASS -ok github.com/sofiiakulish/hw/hw10_program_optimization 14.903s - -//-unmarshal -"753039772" is not less than "300000000" -"433216279" is not less than "300000000" -"356948427" is not less than "300000000" \ No newline at end of file diff --git a/hw10_program_optimization/new3 b/hw10_program_optimization/new3 deleted file mode 100644 index c34b3db..0000000 --- a/hw10_program_optimization/new3 +++ /dev/null @@ -1,13 +0,0 @@ -goos: darwin -goarch: amd64 -pkg: github.com/sofiiakulish/hw/hw10_program_optimization -cpu: Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz -BenchmarkGetDomainStat-12 436 26956833 ns/op 38532 B/op 256 allocs/op -PASS -ok github.com/sofiiakulish/hw/hw10_program_optimization 14.994s - -//+value -"753039772" is not less than "300000000" -"433216279" is not less than "300000000" -"356948427" is not less than "300000000" -"351686650" is not less than "300000000" \ No newline at end of file diff --git a/hw10_program_optimization/new4 b/hw10_program_optimization/new4 deleted file mode 100644 index 98e291b..0000000 --- a/hw10_program_optimization/new4 +++ /dev/null @@ -1,11 +0,0 @@ -goos: darwin -goarch: amd64 -pkg: github.com/sofiiakulish/hw/hw10_program_optimization -cpu: Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz -BenchmarkGetDomainStat-12 394 27823853 ns/op 18672 B/op 153 allocs/op -PASS -ok github.com/sofiiakulish/hw/hw10_program_optimization 14.471s - -//spitN => LastIndex -//memory -"118489512" is not less than "31457280" \ No newline at end of file diff --git a/hw10_program_optimization/new5 b/hw10_program_optimization/new5 deleted file mode 100644 index 914667b..0000000 --- a/hw10_program_optimization/new5 +++ /dev/null @@ -1,12 +0,0 @@ -goos: darwin -goarch: amd64 -pkg: github.com/sofiiakulish/hw/hw10_program_optimization -cpu: Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz -BenchmarkGetDomainStat-12 532 23263072 ns/op 16896 B/op 153 allocs/op -PASS -ok github.com/sofiiakulish/hw/hw10_program_optimization 14.910s - -//ioutil.ReadAll -> io.Copy -//memory -//"118489512" is not less than "31457280" -//"97059512" is not less than "31457280" \ No newline at end of file diff --git a/hw10_program_optimization/old b/hw10_program_optimization/old deleted file mode 100644 index bc9f311..0000000 --- a/hw10_program_optimization/old +++ /dev/null @@ -1,9 +0,0 @@ -goos: darwin -goarch: amd64 -pkg: github.com/sofiiakulish/hw/hw10_program_optimization -cpu: Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz -BenchmarkGetDomainStat-12 13 787762383 ns/op 466538174 B/op 5300259 allocs/op -PASS -ok github.com/sofiiakulish/hw/hw10_program_optimization 11.283s - -"753039772" is not less than "300000000" \ No newline at end of file diff --git a/hw10_program_optimization/stats.go b/hw10_program_optimization/stats.go index f4593d4..338a368 100644 --- a/hw10_program_optimization/stats.go +++ b/hw10_program_optimization/stats.go @@ -5,8 +5,8 @@ import ( "io" "strings" "github.com/valyala/fastjson" - "bytes" "sync" + "bufio" ) type User struct { @@ -39,37 +39,18 @@ var userPool = sync.Pool{ } func getUsers(r io.Reader) (result users, err error) { - var buf bytes.Buffer - _, err = io.Copy(&buf, r) - - if err != nil { - return - } - - lines := strings.Split(buf.String(), "\n") - var p fastjson.Parser - //var user User + var i int = 0 - // pool.Put(user) + scanner := bufio.NewScanner(r) - for i, line := range lines { - v, err2 := p.Parse(line) + for scanner.Scan() { + v, err2 := p.Parse(string(scanner.Text())) if err2 != nil { return } user := userPool.Get().(*User) - - // user = User{ - // v.GetInt("Id"), - // string(v.GetStringBytes("Name")), - // string(v.GetStringBytes("Username")), - // string(v.GetStringBytes("Email")), - // string(v.GetStringBytes("Phone")), - // string(v.GetStringBytes("Password")), - // string(v.GetStringBytes("Address")), - // } user.ID = v.GetInt("Id") user.Name = string(v.GetStringBytes("Name")) user.Username = string(v.GetStringBytes("Username")) @@ -79,6 +60,7 @@ func getUsers(r io.Reader) (result users, err error) { user.Address = string(v.GetStringBytes("Address")) result[i] = *user + i++ userPool.Put(user) } return @@ -101,5 +83,6 @@ func countDomains(u users, domain string) (DomainStat, error) { result[value] += 1 } } + return result, nil }