From 26e3f4759aa832fd9f0ff6e956ee36284c38a4f1 Mon Sep 17 00:00:00 2001 From: Amanda Koh Date: Sat, 10 Jun 2017 21:53:58 +1000 Subject: [PATCH 01/13] Introduce Keva store for nodes. --- diskindexstore.go | 15 +++++++++++++-- index.go | 2 +- vendor/vendor.json | 12 ++++++++++++ 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/diskindexstore.go b/diskindexstore.go index 6101a4a..2d011a4 100644 --- a/diskindexstore.go +++ b/diskindexstore.go @@ -5,15 +5,19 @@ import ( "encoding/hex" "fmt" "os" + "path" "path/filepath" "strings" + + "github.com/mandykoh/keva" ) const nodeFingerprintFile = "fingerprint" const nodeEntriesDir = "entries" type DiskIndexStore struct { - RootPath string + rootPath string + nodes *keva.Store } func (s *DiskIndexStore) AddEntry(entry *IndexEntry, node *IndexNode) error { @@ -63,7 +67,7 @@ func (s *DiskIndexStore) GetOrCreateChild(f Fingerprint, parent *IndexNode) (*In } func (s *DiskIndexStore) GetRoot() (*IndexNode, error) { - return s.getNodeByPath(s.RootPath) + return s.getNodeByPath(path.Join(s.rootPath, "legacy")) } func (s *DiskIndexStore) RemoveEntries(node *IndexNode) error { @@ -215,3 +219,10 @@ func (s *DiskIndexStore) saveNode(n *IndexNode, f Fingerprint) error { _, err = file.Write(f.Bytes()) return err } + +func NewDiskIndexStore(rootPath string) *DiskIndexStore { + return &DiskIndexStore{ + rootPath: rootPath, + nodes: keva.NewStore(path.Join(rootPath, "nodes")), + } +} diff --git a/index.go b/index.go index 15eeb1c..798cc35 100644 --- a/index.go +++ b/index.go @@ -58,7 +58,7 @@ func NewIndex(path string, maxFingerprintSize int, maxEntryDifference float64) * os.MkdirAll(path, 0700) return &Index{ - Store: &DiskIndexStore{RootPath: path}, + Store: NewDiskIndexStore(path), maxFingerprintSize: maxFingerprintSize, maxEntryDifference: maxEntryDifference, } diff --git a/vendor/vendor.json b/vendor/vendor.json index 3e037de..de1a935 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -2,6 +2,18 @@ "comment": "", "ignore": "test", "package": [ + { + "checksumSHA1": "hkHT2pYsqvJVbWqGR3Vt6K0KIUo=", + "path": "github.com/mandykoh/keva", + "revision": "958f7b64e49598bd87a38c19296c39d56de9d00d", + "revisionTime": "2017-08-02T13:19:59Z" + }, + { + "checksumSHA1": "fk05LCN5pjUKlw10ErXFIYoFxZk=", + "path": "github.com/mandykoh/symlock", + "revision": "0362cd091b6b627bf9552d87ba15956d5e8bde32", + "revisionTime": "2017-06-17T12:17:10Z" + }, { "checksumSHA1": "7E3Y1HU/UbsQF/dxMRdjFmx9QDQ=", "path": "golang.org/x/image/draw", From e2bb6198045b789f26a4585c25f8310a03a79787 Mon Sep 17 00:00:00 2001 From: Amanda Koh Date: Sun, 11 Jun 2017 10:32:16 +1000 Subject: [PATCH 02/13] Scope fingerprint tests using sub-tests. --- fingerprint_test.go | 291 ++++++++++++++++++++++---------------------- 1 file changed, 147 insertions(+), 144 deletions(-) diff --git a/fingerprint_test.go b/fingerprint_test.go index e2646a5..6ff7211 100644 --- a/fingerprint_test.go +++ b/fingerprint_test.go @@ -9,202 +9,205 @@ import ( "testing" ) -func TestBytesSerialisesToPackedBytes(t *testing.T) { - f := Fingerprint{samples: []byte{0x00, 0x00, 0xF0, 0xF0}} +func TestFingerprint(t *testing.T) { - actualString := fmt.Sprintf("%x", f.Bytes()) + testImage := func() image.Image { + img := image.NewNRGBA(image.Rectangle{Max: image.Point{X: 256, Y: 256}}) - if actualString != "00ff" { - t.Errorf("Fingerprint '%s' doesn't match expected", actualString) + for i := img.Bounds().Min.Y; i < img.Bounds().Max.Y; i++ { + for j := img.Bounds().Min.X; j < img.Bounds().Max.X; j++ { + img.Set(j, i, color.RGBA{uint8(i), uint8(j), uint8(i), 255}) + } + } + + return img } -} -func TestDifferenceReturnsZeroForSameFingerprint(t *testing.T) { - f1 := Fingerprint{samples: []byte{0, 1, 2, 3, 130, 255}} - f2 := Fingerprint{samples: []byte{0, 1, 2, 3, 130, 255}} + t.Run("Bytes() serialises to packed bytes", func(t *testing.T) { + f := Fingerprint{samples: []byte{0x00, 0x00, 0xF0, 0xF0}} - diff := f1.Difference(f2) + actualString := fmt.Sprintf("%x", f.Bytes()) - if diff != 0.0 { - t.Errorf("Difference %f doesn't match expected", diff) - } + if actualString != "00ff" { + t.Errorf("Fingerprint '%s' doesn't match expected", actualString) + } + }) - diff = f2.Difference(f1) + t.Run("Difference() returns zero for same fingerprint", func(t *testing.T) { + f1 := Fingerprint{samples: []byte{0, 1, 2, 3, 130, 255}} + f2 := Fingerprint{samples: []byte{0, 1, 2, 3, 130, 255}} - if diff != 0.0 { - t.Errorf("Difference %f doesn't match expected", diff) - } -} + diff := f1.Difference(f2) -func TestDifferenceReturnsOneForCompletelyDifferentFingerprint(t *testing.T) { - f1 := Fingerprint{samples: []byte{0, 0, 0, 255, 255, 255}} - f2 := Fingerprint{samples: []byte{255, 255, 255, 0, 0, 0}} + if diff != 0.0 { + t.Errorf("Difference %f doesn't match expected", diff) + } - diff := f1.Difference(f2) + diff = f2.Difference(f1) - if diff != 1.0 { - t.Errorf("Difference %f doesn't match expected", diff) - } + if diff != 0.0 { + t.Errorf("Difference %f doesn't match expected", diff) + } + }) - diff = f2.Difference(f1) + t.Run("Difference() returns one for completely different fingerprint", func(t *testing.T) { + f1 := Fingerprint{samples: []byte{0, 0, 0, 255, 255, 255}} + f2 := Fingerprint{samples: []byte{255, 255, 255, 0, 0, 0}} - if diff != 1.0 { - t.Errorf("Difference %f doesn't match expected", diff) - } -} + diff := f1.Difference(f2) -func TestDifferenceReturnsOneForDifferentlySizedFingerprint(t *testing.T) { - f1 := Fingerprint{samples: []byte{255, 255, 255}} - f2 := Fingerprint{samples: []byte{255, 255, 255, 255}} + if diff != 1.0 { + t.Errorf("Difference %f doesn't match expected", diff) + } - diff := f1.Difference(f2) + diff = f2.Difference(f1) - if diff != 1.0 { - t.Errorf("Difference %f doesn't match expected", diff) - } + if diff != 1.0 { + t.Errorf("Difference %f doesn't match expected", diff) + } + }) - diff = f2.Difference(f1) + t.Run("Difference() returns one for differently sized fingerprint", func(t *testing.T) { + f1 := Fingerprint{samples: []byte{255, 255, 255}} + f2 := Fingerprint{samples: []byte{255, 255, 255, 255}} - if diff != 1.0 { - t.Errorf("Difference %f doesn't match expected", diff) - } -} + diff := f1.Difference(f2) -func TestDistanceReturnsComponentwiseAbsoluteDifference(t *testing.T) { - f1 := Fingerprint{samples: []byte{0, 1, 2, 3, 130, 255}} - f2 := Fingerprint{samples: []byte{1, 3, 6, 11, 146, 0}} + if diff != 1.0 { + t.Errorf("Difference %f doesn't match expected", diff) + } - dist := f1.Distance(f2) + diff = f2.Difference(f1) - if dist != 286 { - t.Errorf("Distance %d doesn't match expected", dist) - } + if diff != 1.0 { + t.Errorf("Difference %f doesn't match expected", diff) + } + }) - dist = f2.Distance(f1) + t.Run("Distance() returns componentwise absolute difference", func(t *testing.T) { + f1 := Fingerprint{samples: []byte{0, 1, 2, 3, 130, 255}} + f2 := Fingerprint{samples: []byte{1, 3, 6, 11, 146, 0}} - if dist != 286 { - t.Errorf("Distance %d doesn't match expected", dist) - } -} + dist := f1.Distance(f2) -func TestDistanceReturnsMaxValueForMismatchedLength(t *testing.T) { - f1 := Fingerprint{samples: []byte{0, 0, 0}} - f2 := Fingerprint{samples: []byte{0, 0, 0, 0}} + if dist != 286 { + t.Errorf("Distance %d doesn't match expected", dist) + } - dist := f1.Distance(f2) + dist = f2.Distance(f1) - if dist != math.MaxUint64 { - t.Errorf("Distance %d wasn't max uint64", dist) - } -} + if dist != 286 { + t.Errorf("Distance %d doesn't match expected", dist) + } + }) -func TestMarshalTextSerialisesToPackedHexStringBytes(t *testing.T) { - f := Fingerprint{samples: []byte{0x00, 0x00, 0xFF, 0xFF}} + t.Run("Distance() returns max value for mismatched length", func(t *testing.T) { + f1 := Fingerprint{samples: []byte{0, 0, 0}} + f2 := Fingerprint{samples: []byte{0, 0, 0, 0}} - actual, err := f.MarshalText() + dist := f1.Distance(f2) - if err != nil { - t.Errorf("Error while marshalling: %s", err) - } - if string(actual) != "00ff" { - t.Errorf("Fingerprint '%s' doesn't match expected", actual) - } -} + if dist != math.MaxUint64 { + t.Errorf("Distance %d wasn't max uint64", dist) + } + }) -func TestSizeReturnsCorrectSideLength(t *testing.T) { - img := testImage() + t.Run("MarshalText() serialises to packed hex string bytes", func(t *testing.T) { + f := Fingerprint{samples: []byte{0x00, 0x00, 0xFF, 0xFF}} - f := NewFingerprint(img, 3) - size := f.Size() + actual, err := f.MarshalText() - if size != 3 { - t.Errorf("Size %d doesn't match expected", size) - } + if err != nil { + t.Errorf("Error while marshalling: %s", err) + } + if string(actual) != "00ff" { + t.Errorf("Fingerprint '%s' doesn't match expected", actual) + } + }) - f = NewFingerprint(img, 7) - size = f.Size() + t.Run("Size() returns correct side length", func(t *testing.T) { + img := testImage() - if size != 7 { - t.Errorf("Size %d doesn't match expected", size) - } + f := NewFingerprint(img, 3) + size := f.Size() - f = Fingerprint{samples: make([]byte, 5*5)} - size = f.Size() + if size != 3 { + t.Errorf("Size %d doesn't match expected", size) + } - if size != 5 { - t.Errorf("Size %d doesn't match expected", size) - } -} + f = NewFingerprint(img, 7) + size = f.Size() -func TestStringSerialisesToPackedHexString(t *testing.T) { - f := Fingerprint{samples: []byte{ - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - }} + if size != 7 { + t.Errorf("Size %d doesn't match expected", size) + } - actualString := fmt.Sprintf("%s", f) + f = Fingerprint{samples: make([]byte, 5*5)} + size = f.Size() - if actualString != "fffffffffffffffffffffffff0" { - t.Errorf("Fingerprint '%s' doesn't match expected", actualString) - } -} + if size != 5 { + t.Errorf("Size %d doesn't match expected", size) + } + }) -func TestUnmarshalBytesDeserialisesFromPackedBytes(t *testing.T) { - b := []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF0} + t.Run("String() serialises to packed hex string", func(t *testing.T) { + f := Fingerprint{samples: []byte{ + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + }} - f := Fingerprint{} - f.UnmarshalBytes(b) + actualString := fmt.Sprintf("%s", f) - if len(f.samples) != 25 { - t.Fatalf("Fingerprint length %d doesn't match expected", len(f.samples)) - } - for i := 0; i < 25; i++ { - if f.samples[i] != 0xF0 { - t.Errorf("Fingerprint byte '%d' doesn't match expected", f.samples[i]) + if actualString != "fffffffffffffffffffffffff0" { + t.Errorf("Fingerprint '%s' doesn't match expected", actualString) } - } -} + }) -func TestUnmarshalTextDeserialisesFromPackedHexStringBytes(t *testing.T) { - text := []byte("fffffffffffffffffffffffff0") + t.Run("UnmarshalBytes() deserialises from packed bytes", func(t *testing.T) { + b := []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF0} - f := Fingerprint{} - f.UnmarshalText(text) + f := Fingerprint{} + f.UnmarshalBytes(b) - if len(f.samples) != 25 { - t.Fatalf("Fingerprint length %d doesn't match expected", len(f.samples)) - } - for i := 0; i < 25; i++ { - if f.samples[i] != 0xF0 { - t.Errorf("Fingerprint byte '%d' doesn't match expected", f.samples[i]) + if len(f.samples) != 25 { + t.Fatalf("Fingerprint length %d doesn't match expected", len(f.samples)) } - } -} + for i := 0; i < 25; i++ { + if f.samples[i] != 0xF0 { + t.Errorf("Fingerprint byte '%d' doesn't match expected", f.samples[i]) + } + } + }) -func TestNewFingerprintGeneratesBinaryRepresentation(t *testing.T) { - f := NewFingerprint(testImage(), 3) + t.Run("UnmarshalText() deserialises from packed hex string bytes", func(t *testing.T) { + text := []byte("fffffffffffffffffffffffff0") - expected, _ := hex.DecodeString("3060805080a070a0c0") + f := Fingerprint{} + f.UnmarshalText(text) - expectedString := hex.EncodeToString(expected) - actualString := hex.EncodeToString(f.samples) + if len(f.samples) != 25 { + t.Fatalf("Fingerprint length %d doesn't match expected", len(f.samples)) + } + for i := 0; i < 25; i++ { + if f.samples[i] != 0xF0 { + t.Errorf("Fingerprint byte '%d' doesn't match expected", f.samples[i]) + } + } + }) - if expectedString != actualString { - t.Fatalf("Fingerprint '%s' doesn't match expected '%s'", actualString, expectedString) - } -} + t.Run("NewFingerprint() generates binary representation", func(t *testing.T) { + f := NewFingerprint(testImage(), 3) -func testImage() image.Image { - img := image.NewNRGBA(image.Rectangle{Max: image.Point{X: 256, Y: 256}}) + expected, _ := hex.DecodeString("3060805080a070a0c0") - for i := img.Bounds().Min.Y; i < img.Bounds().Max.Y; i++ { - for j := img.Bounds().Min.X; j < img.Bounds().Max.X; j++ { - img.Set(j, i, color.RGBA{uint8(i), uint8(j), uint8(i), 255}) - } - } + expectedString := hex.EncodeToString(expected) + actualString := hex.EncodeToString(f.samples) - return img + if expectedString != actualString { + t.Fatalf("Fingerprint '%s' doesn't match expected '%s'", actualString, expectedString) + } + }) } From 488383c8504f5c8a7f5e6d2facaeeb59e4b34a7d Mon Sep 17 00:00:00 2001 From: Amanda Koh Date: Sun, 11 Jun 2017 11:36:11 +1000 Subject: [PATCH 03/13] Save thumbnails to a separate location MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a temporary step; in future we don’t want to store thumbnails in the index at all, but that will require a new fingerprint format. --- diskindexstore.go | 22 +++++++++++++++++++++- indexentry.go | 45 ++++++++++++++++++++++++++------------------- 2 files changed, 47 insertions(+), 20 deletions(-) diff --git a/diskindexstore.go b/diskindexstore.go index 2d011a4..096ba65 100644 --- a/diskindexstore.go +++ b/diskindexstore.go @@ -14,6 +14,7 @@ import ( const nodeFingerprintFile = "fingerprint" const nodeEntriesDir = "entries" +const thumbnailsDir = "thumbnails" type DiskIndexStore struct { rootPath string @@ -24,7 +25,7 @@ func (s *DiskIndexStore) AddEntry(entry *IndexEntry, node *IndexNode) error { entriesDir := filepath.Join(node.path, nodeEntriesDir) os.Mkdir(entriesDir, os.ModePerm) - err := entry.saveToDir(entriesDir) + err := entry.save(entriesDir, s.pathForThumbnail(entry)) if err != nil { return err } @@ -183,6 +184,10 @@ func (s *DiskIndexStore) loadAllEntries(n *IndexNode) error { if err != nil { return err } + err = entry.loadThumbnail(s.pathForThumbnail(entry)) + if err != nil { + return err + } n.registerEntry(entry) } @@ -202,6 +207,12 @@ func (s *DiskIndexStore) loadChild(n *IndexNode, childDirName string) (*IndexNod return &IndexNodeHandle{Path: childPath, Fingerprint: childFingerprint}, nil } +func (s *DiskIndexStore) pathForThumbnail(entry *IndexEntry) string { + thumbnailHash := sha256.Sum256(entry.MaxFingerprint.Bytes()) + thumbnailHex := hex.EncodeToString(thumbnailHash[:]) + return path.Join(s.rootPath, thumbnailsDir, thumbnailHex[0:2], thumbnailHex[2:4], thumbnailHex[4:]) +} + func (s *DiskIndexStore) saveNode(n *IndexNode, f Fingerprint) error { fmt.Printf("Saving node %s\n", n.path) @@ -221,6 +232,15 @@ func (s *DiskIndexStore) saveNode(n *IndexNode, f Fingerprint) error { } func NewDiskIndexStore(rootPath string) *DiskIndexStore { + legacyNodesDir := path.Join(rootPath, "legacy") + err := os.MkdirAll(legacyNodesDir, os.FileMode(0700)) + if err != nil { + fmt.Printf("Error: %v\n", err) + } + + thumbnailsDir := path.Join(rootPath, thumbnailsDir) + os.MkdirAll(thumbnailsDir, os.FileMode(0700)) + return &DiskIndexStore{ rootPath: rootPath, nodes: keva.NewStore(path.Join(rootPath, "nodes")), diff --git a/indexentry.go b/indexentry.go index 50043ad..c38bfab 100644 --- a/indexentry.go +++ b/indexentry.go @@ -25,31 +25,46 @@ func (entry *IndexEntry) FingerprintForSize(size int) Fingerprint { return NewFingerprint(entry.Thumbnail, size) } -func (entry *IndexEntry) saveToDir(path string) error { - jsonFile := filepath.Join(path, entry.key+".entry") - jsonOut, err := os.Create(jsonFile) +func (entry *IndexEntry) loadThumbnail(path string) error { + thumbnailFile, err := os.Open(path) if err != nil { return err } - defer jsonOut.Close() + defer thumbnailFile.Close() - jsonEncoder := json.NewEncoder(jsonOut) - jsonEncoder.Encode(entry) + entry.Thumbnail, err = png.Decode(thumbnailFile) + return err +} - thumbnailFile := jsonFile + ".thumb" - thumbnailOut, err := os.Create(thumbnailFile) +func (entry *IndexEntry) saveThumbnail(path string) error { + thumbnailDir := filepath.Dir(path) + os.MkdirAll(thumbnailDir, os.FileMode(0700)) + + thumbnailOut, err := os.Create(path) if err != nil { return err } defer thumbnailOut.Close() pngEncoder := png.Encoder{} - err = pngEncoder.Encode(thumbnailOut, entry.Thumbnail) + return pngEncoder.Encode(thumbnailOut, entry.Thumbnail) +} + +func (entry *IndexEntry) save(path string, thumbnailPath string) error { + jsonFile := filepath.Join(path, entry.key+".entry") + jsonOut, err := os.Create(jsonFile) if err != nil { return err } + defer jsonOut.Close() - return nil + jsonEncoder := json.NewEncoder(jsonOut) + err = jsonEncoder.Encode(entry) + if err != nil { + return err + } + + return entry.saveThumbnail(thumbnailPath) } func NewIndexEntry(image image.Image, maxFingerprintSize int) (*IndexEntry, error) { @@ -84,15 +99,7 @@ func NewIndexEntryFromFile(file string) (*IndexEntry, error) { } jsonDecoder := json.NewDecoder(jsonFile) - jsonDecoder.Decode(entry) - - thumbnailFile, err := os.Open(file + ".thumb") - if err != nil { - return nil, err - } - defer thumbnailFile.Close() - - entry.Thumbnail, err = png.Decode(thumbnailFile) + err = jsonDecoder.Decode(entry) if err != nil { return nil, err } From 73e454a27876cccf6b4d6377fa9a682e8ee7c4fd Mon Sep 17 00:00:00 2001 From: Amanda Koh Date: Mon, 12 Jun 2017 11:40:56 +1000 Subject: [PATCH 04/13] Get rid of IndexNodeHandles We can do everything by fingerprints now and no longer need paths. --- diskindexstore.go | 17 +++++-------- indexnode.go | 62 ++++++++++++++++++++++------------------------ indexnodehandle.go | 6 ----- 3 files changed, 36 insertions(+), 49 deletions(-) delete mode 100644 indexnodehandle.go diff --git a/diskindexstore.go b/diskindexstore.go index 096ba65..5cee7dd 100644 --- a/diskindexstore.go +++ b/diskindexstore.go @@ -53,7 +53,7 @@ func (s *DiskIndexStore) GetOrCreateChild(f Fingerprint, parent *IndexNode) (*In node = &IndexNode{ path: childPath, - childrenByFingerprint: make(map[string]*IndexNodeHandle), + childFingerprintsByString: make(map[string]Fingerprint), } err := s.saveNode(node, f) @@ -61,7 +61,7 @@ func (s *DiskIndexStore) GetOrCreateChild(f Fingerprint, parent *IndexNode) (*In return nil, err } - parent.registerChild(node, f) + parent.registerChild(f) } return node, nil @@ -126,7 +126,7 @@ func (s *DiskIndexStore) getNodeByPath(path string) (*IndexNode, error) { node := &IndexNode{ path: path, - childrenByFingerprint: make(map[string]*IndexNodeHandle), + childFingerprintsByString: make(map[string]Fingerprint), } err = s.loadAllChildren(node) @@ -157,7 +157,7 @@ func (s *DiskIndexStore) loadAllChildren(n *IndexNode) error { return err } - n.registerChildByHandle(child) + n.registerChild(child) } } } @@ -197,14 +197,9 @@ func (s *DiskIndexStore) loadAllEntries(n *IndexNode) error { return nil } -func (s *DiskIndexStore) loadChild(n *IndexNode, childDirName string) (*IndexNodeHandle, error) { +func (s *DiskIndexStore) loadChild(n *IndexNode, childDirName string) (Fingerprint, error) { childPath := filepath.Join(n.path, childDirName) - childFingerprint, err := s.fingerprintForChild(childPath) - if err != nil { - return nil, err - } - - return &IndexNodeHandle{Path: childPath, Fingerprint: childFingerprint}, nil + return s.fingerprintForChild(childPath) } func (s *DiskIndexStore) pathForThumbnail(entry *IndexEntry) string { diff --git a/indexnode.go b/indexnode.go index bb01294..29c5d4c 100644 --- a/indexnode.go +++ b/indexnode.go @@ -10,10 +10,10 @@ import ( var errResultLimitReached = errors.New("result limit reached") type IndexNode struct { - path string - children []*IndexNodeHandle - childrenByFingerprint map[string]*IndexNodeHandle - entries []*IndexEntry + path string + childFingerprints []Fingerprint + childFingerprintsByString map[string]Fingerprint + entries []*IndexEntry } func (node *IndexNode) Add(entry *IndexEntry, childFingerprintSize int, index *Index) (*IndexNode, error) { @@ -22,7 +22,7 @@ func (node *IndexNode) Add(entry *IndexEntry, childFingerprintSize int, index *I entryFingerprint := entry.FingerprintForSize(childFingerprintSize) - if len(node.children) == 0 { + if len(node.childFingerprints) == 0 { // We can go deeper and this new entry is sufficiently different to // the rest, so split this leaf node by turning entries into children. @@ -89,10 +89,13 @@ func (node *IndexNode) gatherNearest(entry *IndexEntry, childFingerprintSize int // Check for an exact matching child entryFingerprint := entry.FingerprintForSize(childFingerprintSize) - exactChildHandle := node.childrenByFingerprint[entryFingerprint.String()] + exactChildFingerprint, exactChildFingerprintExists := node.childFingerprintsByString[entryFingerprint.String()] + var exactChildFingerprintString string var exactChild *IndexNode - if exactChildHandle != nil { + if exactChildFingerprintExists { + exactChildFingerprintString = exactChildFingerprint.String() + var err error exactChild, err = index.Store.GetChild(entryFingerprint, node) if err != nil { @@ -113,11 +116,11 @@ func (node *IndexNode) gatherNearest(entry *IndexEntry, childFingerprintSize int } } - children := make([]*IndexNodeHandle, len(node.children)) - copy(children, node.children) + childFingerprints := make([]Fingerprint, len(node.childFingerprints)) + copy(childFingerprints, node.childFingerprints) // Need more results - find and sort all children by nearness - sort.Sort(nodesByDifferenceToFingerprintWith(children, entryFingerprint)) + sort.Sort(nodesByDifferenceToFingerprintWith(childFingerprints, entryFingerprint)) // fmt.Printf("Sorting %d children...\n", len(children)) // for i, child := range children { @@ -126,13 +129,13 @@ func (node *IndexNode) gatherNearest(entry *IndexEntry, childFingerprintSize int // } // Recursively gather from nearest children - for i, child := range children { + for i, cf := range childFingerprints { fmt.Printf("Visiting child %d\n", i) - if exactChild != nil && child.Path == exactChild.path { + if exactChildFingerprintExists && cf.String() == exactChildFingerprintString { continue } - childNode, err := index.Store.GetChild(child.Fingerprint, node) + childNode, err := index.Store.GetChild(cf, node) if err != nil { return err } @@ -177,14 +180,9 @@ func (node *IndexNode) pushEntriesToChildren(childFingerprintSize int, store Ind return store.RemoveEntries(node) } -func (node *IndexNode) registerChild(child *IndexNode, f Fingerprint) { - childHandle := &IndexNodeHandle{Path: child.path, Fingerprint: f} - node.registerChildByHandle(childHandle) -} - -func (node *IndexNode) registerChildByHandle(childHandle *IndexNodeHandle) { - node.children = append(node.children, childHandle) - node.childrenByFingerprint[childHandle.Fingerprint.String()] = childHandle +func (node *IndexNode) registerChild(childFingerprint Fingerprint) { + node.childFingerprints = append(node.childFingerprints, childFingerprint) + node.childFingerprintsByString[childFingerprint.String()] = childFingerprint } func (node *IndexNode) registerEntry(entry *IndexEntry) { @@ -207,12 +205,12 @@ func (node *IndexNode) withEachEntry(action func(*IndexEntry) error) error { } type nodesByDifferenceToFingerprint struct { - nodes []*IndexNodeHandle - differences []float64 + nodeFingerprints []Fingerprint + differences []float64 } func (sorter *nodesByDifferenceToFingerprint) Len() int { - return len(sorter.nodes) + return len(sorter.nodeFingerprints) } func (sorter *nodesByDifferenceToFingerprint) Less(i, j int) bool { @@ -220,20 +218,20 @@ func (sorter *nodesByDifferenceToFingerprint) Less(i, j int) bool { } func (sorter *nodesByDifferenceToFingerprint) Swap(i, j int) { - tmpNode := sorter.nodes[i] - sorter.nodes[i] = sorter.nodes[j] - sorter.nodes[j] = tmpNode + tmp := sorter.nodeFingerprints[i] + sorter.nodeFingerprints[i] = sorter.nodeFingerprints[j] + sorter.nodeFingerprints[j] = tmp tmpDiff := sorter.differences[i] sorter.differences[i] = sorter.differences[j] sorter.differences[j] = tmpDiff } -func nodesByDifferenceToFingerprintWith(nodes []*IndexNodeHandle, f Fingerprint) *nodesByDifferenceToFingerprint { - differences := make([]float64, len(nodes), len(nodes)) - for i, n := range nodes { - differences[i] = n.Fingerprint.Difference(f) +func nodesByDifferenceToFingerprintWith(nodeFingerprints []Fingerprint, f Fingerprint) *nodesByDifferenceToFingerprint { + differences := make([]float64, len(nodeFingerprints), len(nodeFingerprints)) + for i, nf := range nodeFingerprints { + differences[i] = nf.Difference(f) } - return &nodesByDifferenceToFingerprint{nodes: nodes, differences: differences} + return &nodesByDifferenceToFingerprint{nodeFingerprints: nodeFingerprints, differences: differences} } diff --git a/indexnodehandle.go b/indexnodehandle.go deleted file mode 100644 index 3314028..0000000 --- a/indexnodehandle.go +++ /dev/null @@ -1,6 +0,0 @@ -package simian - -type IndexNodeHandle struct { - Path string - Fingerprint Fingerprint -} From 78da9fd133cbcf57f6cc12e935c44dde1095eb89 Mon Sep 17 00:00:00 2001 From: Amanda Koh Date: Tue, 27 Jun 2017 23:28:39 +1000 Subject: [PATCH 05/13] Make IndexNodes JSON-serialisable. --- diskindexstore.go | 4 +- indexnode.go | 40 +++++++++++++++++++- indexnode_test.go | 94 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 134 insertions(+), 4 deletions(-) create mode 100644 indexnode_test.go diff --git a/diskindexstore.go b/diskindexstore.go index 5cee7dd..ec1a5d5 100644 --- a/diskindexstore.go +++ b/diskindexstore.go @@ -53,7 +53,7 @@ func (s *DiskIndexStore) GetOrCreateChild(f Fingerprint, parent *IndexNode) (*In node = &IndexNode{ path: childPath, - childFingerprintsByString: make(map[string]Fingerprint), + childFingerprintsByString: make(map[string]*Fingerprint), } err := s.saveNode(node, f) @@ -126,7 +126,7 @@ func (s *DiskIndexStore) getNodeByPath(path string) (*IndexNode, error) { node := &IndexNode{ path: path, - childFingerprintsByString: make(map[string]Fingerprint), + childFingerprintsByString: make(map[string]*Fingerprint), } err = s.loadAllChildren(node) diff --git a/indexnode.go b/indexnode.go index 29c5d4c..82878b1 100644 --- a/indexnode.go +++ b/indexnode.go @@ -1,6 +1,7 @@ package simian import ( + "encoding/json" "errors" "fmt" "math" @@ -12,7 +13,7 @@ var errResultLimitReached = errors.New("result limit reached") type IndexNode struct { path string childFingerprints []Fingerprint - childFingerprintsByString map[string]Fingerprint + childFingerprintsByString map[string]*Fingerprint entries []*IndexEntry } @@ -61,6 +62,35 @@ func (node *IndexNode) FindNearest(entry *IndexEntry, childFingerprintSize int, return results, nil } +func (node *IndexNode) MarshalJSON() ([]byte, error) { + return json.Marshal(&indexNodeJSON{ + Path: node.path, + ChildFingerprints: node.childFingerprints, + Entries: node.entries, + }) +} + +func (node *IndexNode) UnmarshalJSON(b []byte) error { + var value indexNodeJSON + err := json.Unmarshal(b, &value) + if err != nil { + return err + } + + node.path = value.Path + node.childFingerprints = value.ChildFingerprints + + node.childFingerprintsByString = make(map[string]*Fingerprint) + for i := 0; i < len(node.childFingerprints); i++ { + f := &node.childFingerprints[i] + node.childFingerprintsByString[f.String()] = f + } + + node.entries = value.Entries + + return nil +} + func (node *IndexNode) addSimilarEntriesTo(entries *[]*IndexEntry, fingerprint Fingerprint, maxDifference float64) error { fmt.Printf("addSimilarEntriesTo\n") @@ -182,7 +212,7 @@ func (node *IndexNode) pushEntriesToChildren(childFingerprintSize int, store Ind func (node *IndexNode) registerChild(childFingerprint Fingerprint) { node.childFingerprints = append(node.childFingerprints, childFingerprint) - node.childFingerprintsByString[childFingerprint.String()] = childFingerprint + node.childFingerprintsByString[childFingerprint.String()] = &node.childFingerprints[len(node.childFingerprints)-1] } func (node *IndexNode) registerEntry(entry *IndexEntry) { @@ -204,6 +234,12 @@ func (node *IndexNode) withEachEntry(action func(*IndexEntry) error) error { return nil } +type indexNodeJSON struct { + Path string `json:"path"` + ChildFingerprints []Fingerprint `json:"childFingerprints"` + Entries []*IndexEntry `json:"entries"` +} + type nodesByDifferenceToFingerprint struct { nodeFingerprints []Fingerprint differences []float64 diff --git a/indexnode_test.go b/indexnode_test.go new file mode 100644 index 0000000..0a284a8 --- /dev/null +++ b/indexnode_test.go @@ -0,0 +1,94 @@ +package simian + +import ( + "encoding/json" + "testing" +) + +func TestIndexNode(t *testing.T) { + + t.Run("JSON serialisation", func(t *testing.T) { + + t.Run("should roundtrip all fields", func(t *testing.T) { + n := &IndexNode{ + path: "some-path", + childFingerprintsByString: make(map[string]*Fingerprint), + } + + n.registerChild(Fingerprint{samples: []uint8{0x10, 0x20, 0x30, 0x40}}) + n.registerChild(Fingerprint{samples: []uint8{0x50, 0x60, 0x70, 0x80}}) + + entry1 := &IndexEntry{ + MaxFingerprint: Fingerprint{samples: []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9}}, + Attributes: make(map[string]interface{}), + } + n.registerEntry(entry1) + + entry2 := &IndexEntry{ + MaxFingerprint: Fingerprint{samples: []uint8{10, 11, 12, 13, 14, 15, 16, 17, 18}}, + Attributes: make(map[string]interface{}), + } + n.registerEntry(entry2) + + jsonBytes, err := json.Marshal(n) + if err != nil { + t.Fatalf("Error marshalling JSON: %v", err) + } + + var result *IndexNode + err = json.Unmarshal(jsonBytes, &result) + if err != nil { + t.Fatalf("Error unmarshalling JSON: %v", err) + } + + if result.path != n.path { + t.Errorf("Expected path '%s' but got '%s'", n.path, result.path) + } + + if actual, expected := len(result.childFingerprints), len(n.childFingerprints); actual != expected { + t.Fatalf("Expected %d child fingerprints but got %d", expected, actual) + } + for i := 0; i < len(result.childFingerprints); i++ { + actual := result.childFingerprints[i].String() + expected := n.childFingerprints[i].String() + + if actual != expected { + t.Errorf("Expected fingerprint '%s' but got '%s'", expected, actual) + } + } + + if actual, expected := len(result.childFingerprintsByString), len(n.childFingerprintsByString); actual != expected { + t.Fatalf("Expected %d child fingerprints mapped by string but got %d", expected, actual) + } + for k, v := range n.childFingerprintsByString { + actual := result.childFingerprintsByString[k].String() + expected := v.String() + + if actual != expected { + t.Errorf("Expected fingerprint '%s' but got '%s'", expected, actual) + } + } + + if actual, expected := len(result.entries), len(n.entries); actual != expected { + t.Fatalf("Expected %d entries but got %d", expected, actual) + } + for i := 0; i < len(result.entries); i++ { + actualBytes, err := json.Marshal(result.entries[i]) + if err != nil { + t.Fatalf("Error marshalling entry: %v", err) + } + actual := string(actualBytes) + + expectedBytes, err := json.Marshal(n.entries[i]) + if err != nil { + t.Fatalf("Error marshalling entry: %v", err) + } + expected := string(expectedBytes) + + if actual != expected { + t.Errorf("Expected entry '%s' but got '%s'", expected, actual) + } + } + }) + }) +} From 2ea9ef3edf1849dde8b5089e4b6a4b3b2645876a Mon Sep 17 00:00:00 2001 From: Amanda Koh Date: Thu, 10 Aug 2017 22:02:43 +1000 Subject: [PATCH 06/13] Expose store creation errors. --- diskindexstore.go | 13 +++++++++---- index.go | 16 ++++++++++++---- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/diskindexstore.go b/diskindexstore.go index ec1a5d5..2920154 100644 --- a/diskindexstore.go +++ b/diskindexstore.go @@ -226,18 +226,23 @@ func (s *DiskIndexStore) saveNode(n *IndexNode, f Fingerprint) error { return err } -func NewDiskIndexStore(rootPath string) *DiskIndexStore { +func NewDiskIndexStore(rootPath string) (*DiskIndexStore, error) { legacyNodesDir := path.Join(rootPath, "legacy") err := os.MkdirAll(legacyNodesDir, os.FileMode(0700)) if err != nil { - fmt.Printf("Error: %v\n", err) + return nil, err } thumbnailsDir := path.Join(rootPath, thumbnailsDir) os.MkdirAll(thumbnailsDir, os.FileMode(0700)) + nodeStore, err := keva.NewStore(path.Join(rootPath, "nodes")) + if err != nil { + return nil, err + } + return &DiskIndexStore{ rootPath: rootPath, - nodes: keva.NewStore(path.Join(rootPath, "nodes")), - } + nodes: nodeStore, + }, nil } diff --git a/index.go b/index.go index 798cc35..0866107 100644 --- a/index.go +++ b/index.go @@ -54,14 +54,22 @@ func (i *Index) FindNearest(image image.Image, maxResults int, maxDifference flo return results, err } -func NewIndex(path string, maxFingerprintSize int, maxEntryDifference float64) *Index { - os.MkdirAll(path, 0700) +func NewIndex(path string, maxFingerprintSize int, maxEntryDifference float64) (*Index, error) { + err := os.MkdirAll(path, 0700) + if err != nil { + return nil, err + } + + indexStore, err := NewDiskIndexStore(path) + if err != nil { + return nil, err + } return &Index{ - Store: NewDiskIndexStore(path), + Store: indexStore, maxFingerprintSize: maxFingerprintSize, maxEntryDifference: maxEntryDifference, - } + }, err } type entriesByDifferenceToEntry struct { From cf333d522c9eaf644c60ce3117afe3c00b586682 Mon Sep 17 00:00:00 2001 From: Amanda Koh Date: Thu, 10 Aug 2017 22:29:15 +1000 Subject: [PATCH 07/13] Make IndexEntry JSON serialisable. --- indexentry.go | 37 ++++++++++++++++++++++++++++++++++--- indexentry_test.go | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 3 deletions(-) create mode 100644 indexentry_test.go diff --git a/indexentry.go b/indexentry.go index c38bfab..017eb65 100644 --- a/indexentry.go +++ b/indexentry.go @@ -16,15 +16,41 @@ const keyBitLength = 256 type IndexEntry struct { key string - Thumbnail image.Image `json:"-"` - MaxFingerprint Fingerprint `json:"maxFingerprint"` - Attributes map[string]interface{} `json:"attributes"` + Thumbnail image.Image + MaxFingerprint Fingerprint + Attributes map[string]interface{} } func (entry *IndexEntry) FingerprintForSize(size int) Fingerprint { return NewFingerprint(entry.Thumbnail, size) } +func (entry *IndexEntry) MarshalJSON() ([]byte, error) { + return json.Marshal(&indexEntryJSON{ + MaxFingerprint: entry.MaxFingerprint.Bytes(), + Attributes: entry.Attributes, + }) +} + +func (entry *IndexEntry) UnmarshalJSON(b []byte) error { + var value indexEntryJSON + err := json.Unmarshal(b, &value) + if err != nil { + return err + } + + var fingerprint Fingerprint + err = fingerprint.UnmarshalBytes(value.MaxFingerprint) + if err != nil { + return err + } + + entry.MaxFingerprint = fingerprint + entry.Attributes = value.Attributes + + return nil +} + func (entry *IndexEntry) loadThumbnail(path string) error { thumbnailFile, err := os.Open(path) if err != nil { @@ -135,3 +161,8 @@ func makeThumbnail(src image.Image, size int) image.Image { return thumbnail } + +type indexEntryJSON struct { + MaxFingerprint []byte `json:"maxFingerprint"` + Attributes map[string]interface{} +} diff --git a/indexentry_test.go b/indexentry_test.go new file mode 100644 index 0000000..aae46e3 --- /dev/null +++ b/indexentry_test.go @@ -0,0 +1,41 @@ +package simian + +import ( + "encoding/json" + "reflect" + "testing" +) + +func TestIndexEntry(t *testing.T) { + + t.Run("JSON serialisation", func(t *testing.T) { + + t.Run("should roundtrip all fields", func(t *testing.T) { + + entry := &IndexEntry{ + MaxFingerprint: Fingerprint{samples: []uint8{0xF0, 0xF0, 0xF0, 0xF0}}, + Attributes: make(map[string]interface{}), + } + entry.Attributes["some key"] = "some value" + entry.Attributes["some other key"] = "some other value" + + jsonBytes, err := json.Marshal(entry) + if err != nil { + t.Fatalf("Error marshalling JSON: %v", err) + } + + var result *IndexEntry + err = json.Unmarshal(jsonBytes, &result) + if err != nil { + t.Fatalf("Error unmarshalling JSON: %v", err) + } + + if distance := result.MaxFingerprint.Distance(entry.MaxFingerprint); distance != 0 { + t.Errorf("Expected no difference in fingerprints but got %d", distance) + } + if !reflect.DeepEqual(entry.Attributes, result.Attributes) { + t.Errorf("Expected attributes to match but got %v", result.Attributes) + } + }) + }) +} From da4b4612ca4b881e5dd7623eb643474cf2fb9fa2 Mon Sep 17 00:00:00 2001 From: Amanda Koh Date: Fri, 11 Aug 2017 17:18:12 +1000 Subject: [PATCH 08/13] Use Keva for storing the index. --- diskindexstore.go | 229 +++++++++++++++------------------------------- index.go | 13 ++- indexentry.go | 63 +------------ indexnode.go | 44 ++++----- indexnode_test.go | 5 - indexstore.go | 7 +- 6 files changed, 109 insertions(+), 252 deletions(-) diff --git a/diskindexstore.go b/diskindexstore.go index 2920154..04e73df 100644 --- a/diskindexstore.go +++ b/diskindexstore.go @@ -6,8 +6,6 @@ import ( "fmt" "os" "path" - "path/filepath" - "strings" "github.com/mandykoh/keva" ) @@ -21,185 +19,120 @@ type DiskIndexStore struct { nodes *keva.Store } -func (s *DiskIndexStore) AddEntry(entry *IndexEntry, node *IndexNode) error { - entriesDir := filepath.Join(node.path, nodeEntriesDir) - os.Mkdir(entriesDir, os.ModePerm) - - err := entry.save(entriesDir, s.pathForThumbnail(entry)) +func (s *DiskIndexStore) AddEntry(entry *IndexEntry, node *IndexNode, nodeFingerprint Fingerprint) error { + err := entry.saveThumbnail(s.pathForThumbnail(entry)) if err != nil { return err } node.registerEntry(entry) - return nil -} -func (s *DiskIndexStore) GetChild(f Fingerprint, parent *IndexNode) (*IndexNode, error) { - path := s.childPathForFingerprint(f, parent.path) - return s.getNodeByPath(path) + fmt.Printf("AddEntry - Saving [%s] %d %d\n", nodeFingerprint.String(), len(node.childFingerprints), len(node.entries)) + return s.nodes.Put(nodeFingerprint.String(), node) } -func (s *DiskIndexStore) GetOrCreateChild(f Fingerprint, parent *IndexNode) (*IndexNode, error) { - fmt.Printf("GetOrCreateChild()\n") - childPath := s.childPathForFingerprint(f, parent.path) +func (s *DiskIndexStore) Close() error { + return s.nodes.Close() +} - node, err := s.getNodeByPath(childPath) - if err != nil { - return nil, err - } +func (s *DiskIndexStore) GetChild(f Fingerprint, parent *IndexNode) (*IndexNode, error) { + var node IndexNode - if node == nil { - fmt.Printf("Creating child\n") + err := s.nodes.Get(f.String(), &node) + if err == keva.ErrValueNotFound { + return nil, nil - node = &IndexNode{ - path: childPath, - childFingerprintsByString: make(map[string]*Fingerprint), - } - - err := s.saveNode(node, f) + } else if err == nil { + err = s.loadThumbnails(&node) if err != nil { return nil, err } - parent.registerChild(f) + } else { + return nil, err } - return node, nil + return &node, nil } -func (s *DiskIndexStore) GetRoot() (*IndexNode, error) { - return s.getNodeByPath(path.Join(s.rootPath, "legacy")) -} +func (s *DiskIndexStore) GetOrCreateChild(f Fingerprint, parent *IndexNode, parentFingerprint Fingerprint) (*IndexNode, error) { + fmt.Printf("GetOrCreateChild() %s\n", f.String()) -func (s *DiskIndexStore) RemoveEntries(node *IndexNode) error { - entriesDir := filepath.Join(node.path, nodeEntriesDir) - err := os.RemoveAll(entriesDir) - if err != nil { - return err - } + nodeKey := f.String() - node.removeEntries() - return nil -} + var node IndexNode + err := s.nodes.Get(nodeKey, &node) -func (s *DiskIndexStore) childPathForFingerprint(f Fingerprint, parentPath string) string { - fingerprintHash := sha256.Sum256(f.Bytes()) - childDirName := hex.EncodeToString(fingerprintHash[:8]) - return filepath.Join(parentPath, childDirName) -} + if err == keva.ErrValueNotFound { + fmt.Printf("Creating child\n") -func (s *DiskIndexStore) fingerprintForChild(childPath string) (Fingerprint, error) { - childFingerprint := Fingerprint{} - childFingerprintFile := filepath.Join(childPath, nodeFingerprintFile) + node = IndexNode{ + childFingerprintsByString: make(map[string]*Fingerprint), + } - file, err := os.Open(childFingerprintFile) - if err != nil { - return childFingerprint, err - } - defer file.Close() + fmt.Printf("GetOrCreateChild - Saving [%s] %d %d\n", nodeKey, len(node.childFingerprints), len(node.entries)) + err = s.nodes.Put(nodeKey, &node) + if err != nil { + return nil, err + } - fileInfo, err := file.Stat() - if err != nil { - return childFingerprint, err - } + parent.registerChild(f) + fmt.Printf("GetOrCreateChild - Parent - Saving [%s] %d %d\n", parentFingerprint.String(), len(parent.childFingerprints), len(parent.entries)) + err = s.nodes.Put(parentFingerprint.String(), parent) + if err != nil { + return nil, err + } - fingerprintBytes := make([]byte, fileInfo.Size(), fileInfo.Size()) - _, err = file.Read(fingerprintBytes) - if err != nil { - return childFingerprint, err - } + } else if err == nil { + err = s.loadThumbnails(&node) + if err != nil { + return nil, err + } - childFingerprint.UnmarshalBytes(fingerprintBytes) + } else { + return nil, err + } - return childFingerprint, nil + return &node, nil } -func (s *DiskIndexStore) getNodeByPath(path string) (*IndexNode, error) { +func (s *DiskIndexStore) GetRoot() (*IndexNode, error) { + var rootKey = Fingerprint{}.String() - _, err := os.Stat(path) - if err != nil { - if os.IsNotExist(err) { - return nil, nil + var root IndexNode + err := s.nodes.Get(rootKey, &root) + + if err == keva.ErrValueNotFound { + fmt.Printf("Root node not found - creating it\n") + root = IndexNode{ + childFingerprintsByString: make(map[string]*Fingerprint), } - return nil, err - } - node := &IndexNode{ - path: path, - childFingerprintsByString: make(map[string]*Fingerprint), - } + } else if err == nil { + fmt.Printf("Found root node with %d children and %d entries\n", len(root.childFingerprints), len(root.entries)) - err = s.loadAllChildren(node) - if err != nil { - return nil, err - } + err = s.loadThumbnails(&root) + if err != nil { + return nil, err + } - err = s.loadAllEntries(node) - if err != nil { + } else { return nil, err } - return node, nil + return &root, nil } -func (s *DiskIndexStore) loadAllChildren(n *IndexNode) error { - dir, err := os.Open(n.path) - if err != nil { - return err - } - defer dir.Close() - - for fileInfos, err := dir.Readdir(1); err == nil && len(fileInfos) > 0; fileInfos, err = dir.Readdir(1) { - for _, info := range fileInfos { - if info.IsDir() && info.Name() != nodeEntriesDir { - child, err := s.loadChild(n, info.Name()) - if err != nil { - return err - } - - n.registerChild(child) - } - } - } - - return nil -} - -func (s *DiskIndexStore) loadAllEntries(n *IndexNode) error { - entriesDir := filepath.Join(n.path, nodeEntriesDir) - - dir, err := os.Open(entriesDir) - if err != nil { - if os.IsNotExist(err) { - return nil - } - return err - } - defer dir.Close() - - for fileInfos, err := dir.Readdir(1); err == nil && len(fileInfos) > 0; fileInfos, err = dir.Readdir(1) { - for _, fileInfo := range fileInfos { - if strings.HasSuffix(fileInfo.Name(), ".entry") { - entry, err := NewIndexEntryFromFile(filepath.Join(entriesDir, fileInfo.Name())) - if err != nil { - return err - } - err = entry.loadThumbnail(s.pathForThumbnail(entry)) - if err != nil { - return err - } - - n.registerEntry(entry) - } - } - } - - return nil +func (s *DiskIndexStore) RemoveEntries(node *IndexNode, nodeFingerprint Fingerprint) error { + node.removeEntries() + fmt.Printf("RemoveEntries - Saving [%s] %d %d\n", nodeFingerprint.String(), len(node.childFingerprints), len(node.entries)) + return s.nodes.Put(nodeFingerprint.String(), node) } -func (s *DiskIndexStore) loadChild(n *IndexNode, childDirName string) (Fingerprint, error) { - childPath := filepath.Join(n.path, childDirName) - return s.fingerprintForChild(childPath) +func (s *DiskIndexStore) loadThumbnails(n *IndexNode) error { + return n.withEachEntry(func(entry *IndexEntry) error { + return entry.loadThumbnail(s.pathForThumbnail(entry)) + }) } func (s *DiskIndexStore) pathForThumbnail(entry *IndexEntry) string { @@ -208,24 +141,6 @@ func (s *DiskIndexStore) pathForThumbnail(entry *IndexEntry) string { return path.Join(s.rootPath, thumbnailsDir, thumbnailHex[0:2], thumbnailHex[2:4], thumbnailHex[4:]) } -func (s *DiskIndexStore) saveNode(n *IndexNode, f Fingerprint) error { - fmt.Printf("Saving node %s\n", n.path) - - os.Mkdir(n.path, os.FileMode(0700)) - - // Save the actual (non-truncated) fingerprint - fingerprintFile := filepath.Join(n.path, nodeFingerprintFile) - file, err := os.Create(fingerprintFile) - if err != nil { - return err - } - - defer file.Close() - - _, err = file.Write(f.Bytes()) - return err -} - func NewDiskIndexStore(rootPath string) (*DiskIndexStore, error) { legacyNodesDir := path.Join(rootPath, "legacy") err := os.MkdirAll(legacyNodesDir, os.FileMode(0700)) diff --git a/index.go b/index.go index 0866107..69afb9f 100644 --- a/index.go +++ b/index.go @@ -1,6 +1,7 @@ package simian import ( + "fmt" "image" "math" "os" @@ -26,12 +27,20 @@ func (i *Index) Add(image image.Image, metadata interface{}) (key string, err er return "", err } - node, err := root.Add(entry, rootFingerprintSize+1, i) + var rootFingerprint Fingerprint + + _, err = root.Add(entry, rootFingerprint, rootFingerprintSize+1, i) if err != nil { return "", err } - return node.path, nil + fmt.Printf("Root node has %d children and %d entries\n", len(root.childFingerprints), len(root.entries)) + + return "", nil +} + +func (i *Index) Close() error { + return i.Store.Close() } func (i *Index) FindNearest(image image.Image, maxResults int, maxDifference float64) ([]*IndexEntry, error) { diff --git a/indexentry.go b/indexentry.go index 017eb65..6f28510 100644 --- a/indexentry.go +++ b/indexentry.go @@ -1,8 +1,6 @@ package simian import ( - "crypto/rand" - "encoding/hex" "encoding/json" "image" "image/png" @@ -15,7 +13,6 @@ import ( const keyBitLength = 256 type IndexEntry struct { - key string Thumbnail image.Image MaxFingerprint Fingerprint Attributes map[string]interface{} @@ -76,31 +73,8 @@ func (entry *IndexEntry) saveThumbnail(path string) error { return pngEncoder.Encode(thumbnailOut, entry.Thumbnail) } -func (entry *IndexEntry) save(path string, thumbnailPath string) error { - jsonFile := filepath.Join(path, entry.key+".entry") - jsonOut, err := os.Create(jsonFile) - if err != nil { - return err - } - defer jsonOut.Close() - - jsonEncoder := json.NewEncoder(jsonOut) - err = jsonEncoder.Encode(entry) - if err != nil { - return err - } - - return entry.saveThumbnail(thumbnailPath) -} - func NewIndexEntry(image image.Image, maxFingerprintSize int) (*IndexEntry, error) { - key, err := makeKey() - if err != nil { - return nil, err - } - entry := &IndexEntry{ - key: key, Thumbnail: makeThumbnail(image, maxFingerprintSize*2), Attributes: make(map[string]interface{}), } @@ -110,39 +84,6 @@ func NewIndexEntry(image image.Image, maxFingerprintSize int) (*IndexEntry, erro return entry, nil } -func NewIndexEntryFromFile(file string) (*IndexEntry, error) { - jsonFile, err := os.Open(file) - if err != nil { - return nil, err - } - defer jsonFile.Close() - - key := filepath.Base(file) - key = key[:(len(key) - len(filepath.Ext(key)))] - - entry := &IndexEntry{ - key: key, - } - - jsonDecoder := json.NewDecoder(jsonFile) - err = jsonDecoder.Decode(entry) - if err != nil { - return nil, err - } - - return entry, nil -} - -func makeKey() (string, error) { - b := make([]byte, keyBitLength/8) - _, err := rand.Read(b) - if err != nil { - return "", err - } - - return hex.EncodeToString(b), nil -} - func makeThumbnail(src image.Image, size int) image.Image { width := float64(src.Bounds().Max.X - src.Bounds().Min.X) height := float64(src.Bounds().Max.Y - src.Bounds().Min.Y) @@ -163,6 +104,6 @@ func makeThumbnail(src image.Image, size int) image.Image { } type indexEntryJSON struct { - MaxFingerprint []byte `json:"maxFingerprint"` - Attributes map[string]interface{} + MaxFingerprint []byte `json:"maxFingerprint"` + Attributes map[string]interface{} `json:"attributes"` } diff --git a/indexnode.go b/indexnode.go index 82878b1..d73a199 100644 --- a/indexnode.go +++ b/indexnode.go @@ -11,17 +11,16 @@ import ( var errResultLimitReached = errors.New("result limit reached") type IndexNode struct { - path string childFingerprints []Fingerprint childFingerprintsByString map[string]*Fingerprint entries []*IndexEntry } -func (node *IndexNode) Add(entry *IndexEntry, childFingerprintSize int, index *Index) (*IndexNode, error) { +func (node *IndexNode) Add(entry *IndexEntry, nodeFingerprint Fingerprint, childFingerprintSize int, index *Index) (*IndexNode, error) { - fmt.Printf("Node[%s] Add %d\n", node.path, childFingerprintSize) + fmt.Printf("Node Add %d\n", childFingerprintSize) - entryFingerprint := entry.FingerprintForSize(childFingerprintSize) + childFingerprint := entry.FingerprintForSize(childFingerprintSize) if len(node.childFingerprints) == 0 { @@ -30,11 +29,12 @@ func (node *IndexNode) Add(entry *IndexEntry, childFingerprintSize int, index *I fmt.Printf("Max Diff: %f\n", node.maxChildDifferenceTo(entry.MaxFingerprint)) if childFingerprintSize < index.maxFingerprintSize && node.maxChildDifferenceTo(entry.MaxFingerprint) > index.maxEntryDifference { fmt.Printf("Pushing entries to children\n") - node.pushEntriesToChildren(childFingerprintSize, index.Store) + node.pushEntriesToChildren(nodeFingerprint, childFingerprintSize, index.Store) + fmt.Printf("Done pushing entries to children\n") } else { - fmt.Printf("Adding entry %s\n", node.path) - err := index.Store.AddEntry(entry, node) + fmt.Printf("Adding entry %s\n", nodeFingerprint.String()) + err := index.Store.AddEntry(entry, node, nodeFingerprint) if err != nil { return nil, err } @@ -43,19 +43,19 @@ func (node *IndexNode) Add(entry *IndexEntry, childFingerprintSize int, index *I } } - child, err := index.Store.GetOrCreateChild(entryFingerprint, node) + child, err := index.Store.GetOrCreateChild(childFingerprint, node, nodeFingerprint) if err != nil { return nil, err } - return child.Add(entry, childFingerprintSize+1, index) + return child.Add(entry, childFingerprint, childFingerprintSize+1, index) } func (node *IndexNode) FindNearest(entry *IndexEntry, childFingerprintSize int, index *Index, maxResults int, maxDifference float64) ([]*IndexEntry, error) { results := make([]*IndexEntry, 0, maxResults) err := node.gatherNearest(entry, childFingerprintSize, index, maxDifference, &results) - if err != errResultLimitReached { + if err != nil && err != errResultLimitReached { return nil, err } @@ -64,7 +64,6 @@ func (node *IndexNode) FindNearest(entry *IndexEntry, childFingerprintSize int, func (node *IndexNode) MarshalJSON() ([]byte, error) { return json.Marshal(&indexNodeJSON{ - Path: node.path, ChildFingerprints: node.childFingerprints, Entries: node.entries, }) @@ -77,7 +76,6 @@ func (node *IndexNode) UnmarshalJSON(b []byte) error { return err } - node.path = value.Path node.childFingerprints = value.ChildFingerprints node.childFingerprintsByString = make(map[string]*Fingerprint) @@ -115,11 +113,11 @@ func (node *IndexNode) addSimilarEntriesTo(entries *[]*IndexEntry, fingerprint F func (node *IndexNode) gatherNearest(entry *IndexEntry, childFingerprintSize int, index *Index, maxDifference float64, results *[]*IndexEntry) error { - fmt.Printf("%d gatherNearest\n", childFingerprintSize) + fmt.Printf("%d gatherNearest %d\n", childFingerprintSize, len(node.entries)) // Check for an exact matching child - entryFingerprint := entry.FingerprintForSize(childFingerprintSize) - exactChildFingerprint, exactChildFingerprintExists := node.childFingerprintsByString[entryFingerprint.String()] + childFingerprint := entry.FingerprintForSize(childFingerprintSize) + exactChildFingerprint, exactChildFingerprintExists := node.childFingerprintsByString[childFingerprint.String()] var exactChildFingerprintString string var exactChild *IndexNode @@ -127,7 +125,7 @@ func (node *IndexNode) gatherNearest(entry *IndexEntry, childFingerprintSize int exactChildFingerprintString = exactChildFingerprint.String() var err error - exactChild, err = index.Store.GetChild(entryFingerprint, node) + exactChild, err = index.Store.GetChild(childFingerprint, node) if err != nil { return err } @@ -150,7 +148,7 @@ func (node *IndexNode) gatherNearest(entry *IndexEntry, childFingerprintSize int copy(childFingerprints, node.childFingerprints) // Need more results - find and sort all children by nearness - sort.Sort(nodesByDifferenceToFingerprintWith(childFingerprints, entryFingerprint)) + sort.Sort(nodesByDifferenceToFingerprintWith(childFingerprints, childFingerprint)) // fmt.Printf("Sorting %d children...\n", len(children)) // for i, child := range children { @@ -196,18 +194,17 @@ func (node *IndexNode) maxChildDifferenceTo(f Fingerprint) float64 { return maxDifference } -func (node *IndexNode) pushEntriesToChildren(childFingerprintSize int, store IndexStore) error { +func (node *IndexNode) pushEntriesToChildren(nodeFingerprint Fingerprint, childFingerprintSize int, store IndexStore) error { node.withEachEntry(func(entry *IndexEntry) error { - entryFingerprint := entry.FingerprintForSize(childFingerprintSize) - child, err := store.GetOrCreateChild(entryFingerprint, node) + childFingerprint := entry.FingerprintForSize(childFingerprintSize) + child, err := store.GetOrCreateChild(childFingerprint, node, nodeFingerprint) if err != nil { return err } - store.AddEntry(entry, child) - return nil + return store.AddEntry(entry, child, childFingerprint) }) - return store.RemoveEntries(node) + return store.RemoveEntries(node, nodeFingerprint) } func (node *IndexNode) registerChild(childFingerprint Fingerprint) { @@ -235,7 +232,6 @@ func (node *IndexNode) withEachEntry(action func(*IndexEntry) error) error { } type indexNodeJSON struct { - Path string `json:"path"` ChildFingerprints []Fingerprint `json:"childFingerprints"` Entries []*IndexEntry `json:"entries"` } diff --git a/indexnode_test.go b/indexnode_test.go index 0a284a8..75ee02b 100644 --- a/indexnode_test.go +++ b/indexnode_test.go @@ -11,7 +11,6 @@ func TestIndexNode(t *testing.T) { t.Run("should roundtrip all fields", func(t *testing.T) { n := &IndexNode{ - path: "some-path", childFingerprintsByString: make(map[string]*Fingerprint), } @@ -41,10 +40,6 @@ func TestIndexNode(t *testing.T) { t.Fatalf("Error unmarshalling JSON: %v", err) } - if result.path != n.path { - t.Errorf("Expected path '%s' but got '%s'", n.path, result.path) - } - if actual, expected := len(result.childFingerprints), len(n.childFingerprints); actual != expected { t.Fatalf("Expected %d child fingerprints but got %d", expected, actual) } diff --git a/indexstore.go b/indexstore.go index b7649a4..7df2d02 100644 --- a/indexstore.go +++ b/indexstore.go @@ -1,9 +1,10 @@ package simian type IndexStore interface { - AddEntry(entry *IndexEntry, node *IndexNode) error + AddEntry(entry *IndexEntry, node *IndexNode, nodeFingerprint Fingerprint) error + Close() error GetChild(f Fingerprint, parent *IndexNode) (*IndexNode, error) - GetOrCreateChild(f Fingerprint, parent *IndexNode) (*IndexNode, error) + GetOrCreateChild(f Fingerprint, parent *IndexNode, parentFingerprint Fingerprint) (*IndexNode, error) GetRoot() (*IndexNode, error) - RemoveEntries(node *IndexNode) error + RemoveEntries(node *IndexNode, nodeFingerprint Fingerprint) error } From b7077785d465e058e321b9ff9d0aee583adcdd6e Mon Sep 17 00:00:00 2001 From: Amanda Koh Date: Sat, 12 Aug 2017 13:47:59 +1000 Subject: [PATCH 09/13] Update Keva. --- vendor/vendor.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vendor/vendor.json b/vendor/vendor.json index de1a935..e426c6a 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -3,10 +3,10 @@ "ignore": "test", "package": [ { - "checksumSHA1": "hkHT2pYsqvJVbWqGR3Vt6K0KIUo=", + "checksumSHA1": "3rZWEvKplW9AuWwjuPyCJNyBfhA=", "path": "github.com/mandykoh/keva", - "revision": "958f7b64e49598bd87a38c19296c39d56de9d00d", - "revisionTime": "2017-08-02T13:19:59Z" + "revision": "14866bae2ca726b120aa63fce47e5ac93efb30d0", + "revisionTime": "2017-08-12T03:44:41Z" }, { "checksumSHA1": "fk05LCN5pjUKlw10ErXFIYoFxZk=", From cbe4ee9dc533bbce729dc51e289762b6702faba4 Mon Sep 17 00:00:00 2001 From: Amanda Koh Date: Sat, 12 Aug 2017 13:49:02 +1000 Subject: [PATCH 10/13] Stop creating legacy nodes directory. --- diskindexstore.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/diskindexstore.go b/diskindexstore.go index 04e73df..133c473 100644 --- a/diskindexstore.go +++ b/diskindexstore.go @@ -142,12 +142,6 @@ func (s *DiskIndexStore) pathForThumbnail(entry *IndexEntry) string { } func NewDiskIndexStore(rootPath string) (*DiskIndexStore, error) { - legacyNodesDir := path.Join(rootPath, "legacy") - err := os.MkdirAll(legacyNodesDir, os.FileMode(0700)) - if err != nil { - return nil, err - } - thumbnailsDir := path.Join(rootPath, thumbnailsDir) os.MkdirAll(thumbnailsDir, os.FileMode(0700)) From 04117dc4ff60467698829c3d2231db8035fe94b8 Mon Sep 17 00:00:00 2001 From: Amanda Koh Date: Sun, 13 Aug 2017 21:41:52 +1000 Subject: [PATCH 11/13] Allow attributes to be stored with images. --- index.go | 8 +++++--- indexentry.go | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/index.go b/index.go index 69afb9f..83513aa 100644 --- a/index.go +++ b/index.go @@ -16,8 +16,8 @@ type Index struct { maxEntryDifference float64 } -func (i *Index) Add(image image.Image, metadata interface{}) (key string, err error) { - entry, err := NewIndexEntry(image, i.maxFingerprintSize) +func (i *Index) Add(image image.Image, metadata map[string]interface{}) (key string, err error) { + entry, err := NewIndexEntry(image, i.maxFingerprintSize, metadata) if err != nil { return "", nil } @@ -44,7 +44,9 @@ func (i *Index) Close() error { } func (i *Index) FindNearest(image image.Image, maxResults int, maxDifference float64) ([]*IndexEntry, error) { - entry, err := NewIndexEntry(image, i.maxFingerprintSize) + var dummy map[string]interface{} + + entry, err := NewIndexEntry(image, i.maxFingerprintSize, dummy) if err != nil { return nil, nil } diff --git a/indexentry.go b/indexentry.go index 6f28510..57fe408 100644 --- a/indexentry.go +++ b/indexentry.go @@ -73,10 +73,10 @@ func (entry *IndexEntry) saveThumbnail(path string) error { return pngEncoder.Encode(thumbnailOut, entry.Thumbnail) } -func NewIndexEntry(image image.Image, maxFingerprintSize int) (*IndexEntry, error) { +func NewIndexEntry(image image.Image, maxFingerprintSize int, attributes map[string]interface{}) (*IndexEntry, error) { entry := &IndexEntry{ Thumbnail: makeThumbnail(image, maxFingerprintSize*2), - Attributes: make(map[string]interface{}), + Attributes: attributes, } entry.MaxFingerprint = entry.FingerprintForSize(maxFingerprintSize) From 7f249667e8bfd9ff517090ef49ed8f26a9b51661 Mon Sep 17 00:00:00 2001 From: Amanda Koh Date: Sun, 13 Aug 2017 21:42:01 +1000 Subject: [PATCH 12/13] Add more diagnostic logging. --- indexnode.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/indexnode.go b/indexnode.go index d73a199..7b572e7 100644 --- a/indexnode.go +++ b/indexnode.go @@ -28,7 +28,7 @@ func (node *IndexNode) Add(entry *IndexEntry, nodeFingerprint Fingerprint, child // the rest, so split this leaf node by turning entries into children. fmt.Printf("Max Diff: %f\n", node.maxChildDifferenceTo(entry.MaxFingerprint)) if childFingerprintSize < index.maxFingerprintSize && node.maxChildDifferenceTo(entry.MaxFingerprint) > index.maxEntryDifference { - fmt.Printf("Pushing entries to children\n") + fmt.Printf("Pushing %d entries to children\n", len(node.entries)) node.pushEntriesToChildren(nodeFingerprint, childFingerprintSize, index.Store) fmt.Printf("Done pushing entries to children\n") @@ -201,6 +201,7 @@ func (node *IndexNode) pushEntriesToChildren(nodeFingerprint Fingerprint, childF if err != nil { return err } + fmt.Printf("Pushing entry to child\n") return store.AddEntry(entry, child, childFingerprint) }) From 24ddce1e703d04063cfdd6bdb8452b32d2ef92cf Mon Sep 17 00:00:00 2001 From: Amanda Koh Date: Tue, 15 Aug 2017 19:39:39 +1000 Subject: [PATCH 13/13] Update Keva. --- vendor/vendor.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vendor/vendor.json b/vendor/vendor.json index e426c6a..ba70d97 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -3,10 +3,10 @@ "ignore": "test", "package": [ { - "checksumSHA1": "3rZWEvKplW9AuWwjuPyCJNyBfhA=", + "checksumSHA1": "sVE1dlxGDmuS/C6XfTL/2/UG6q4=", "path": "github.com/mandykoh/keva", - "revision": "14866bae2ca726b120aa63fce47e5ac93efb30d0", - "revisionTime": "2017-08-12T03:44:41Z" + "revision": "443c1447fc51d6502cc5cb82d810014baba00b5b", + "revisionTime": "2017-08-15T09:38:58Z" }, { "checksumSHA1": "fk05LCN5pjUKlw10ErXFIYoFxZk=",