diff --git a/diskindexstore.go b/diskindexstore.go index 6101a4a..133c473 100644 --- a/diskindexstore.go +++ b/diskindexstore.go @@ -5,213 +5,153 @@ import ( "encoding/hex" "fmt" "os" - "path/filepath" - "strings" + "path" + + "github.com/mandykoh/keva" ) const nodeFingerprintFile = "fingerprint" const nodeEntriesDir = "entries" +const thumbnailsDir = "thumbnails" type DiskIndexStore struct { - RootPath string + rootPath string + nodes *keva.Store } -func (s *DiskIndexStore) AddEntry(entry *IndexEntry, node *IndexNode) error { - entriesDir := filepath.Join(node.path, nodeEntriesDir) - os.Mkdir(entriesDir, os.ModePerm) - - err := entry.saveToDir(entriesDir) +func (s *DiskIndexStore) AddEntry(entry *IndexEntry, node *IndexNode, nodeFingerprint Fingerprint) error { + err := entry.saveThumbnail(s.pathForThumbnail(entry)) if err != nil { return err } node.registerEntry(entry) - return nil -} -func (s *DiskIndexStore) GetChild(f Fingerprint, parent *IndexNode) (*IndexNode, error) { - path := s.childPathForFingerprint(f, parent.path) - return s.getNodeByPath(path) + fmt.Printf("AddEntry - Saving [%s] %d %d\n", nodeFingerprint.String(), len(node.childFingerprints), len(node.entries)) + return s.nodes.Put(nodeFingerprint.String(), node) } -func (s *DiskIndexStore) GetOrCreateChild(f Fingerprint, parent *IndexNode) (*IndexNode, error) { - fmt.Printf("GetOrCreateChild()\n") - childPath := s.childPathForFingerprint(f, parent.path) - - node, err := s.getNodeByPath(childPath) - if err != nil { - return nil, err - } +func (s *DiskIndexStore) Close() error { + return s.nodes.Close() +} - if node == nil { - fmt.Printf("Creating child\n") +func (s *DiskIndexStore) GetChild(f Fingerprint, parent *IndexNode) (*IndexNode, error) { + var node IndexNode - node = &IndexNode{ - path: childPath, - childrenByFingerprint: make(map[string]*IndexNodeHandle), - } + err := s.nodes.Get(f.String(), &node) + if err == keva.ErrValueNotFound { + return nil, nil - err := s.saveNode(node, f) + } else if err == nil { + err = s.loadThumbnails(&node) if err != nil { return nil, err } - parent.registerChild(node, f) + } else { + return nil, err } - return node, nil + return &node, nil } -func (s *DiskIndexStore) GetRoot() (*IndexNode, error) { - return s.getNodeByPath(s.RootPath) -} +func (s *DiskIndexStore) GetOrCreateChild(f Fingerprint, parent *IndexNode, parentFingerprint Fingerprint) (*IndexNode, error) { + fmt.Printf("GetOrCreateChild() %s\n", f.String()) -func (s *DiskIndexStore) RemoveEntries(node *IndexNode) error { - entriesDir := filepath.Join(node.path, nodeEntriesDir) - err := os.RemoveAll(entriesDir) - if err != nil { - return err - } + nodeKey := f.String() - node.removeEntries() - return nil -} + var node IndexNode + err := s.nodes.Get(nodeKey, &node) -func (s *DiskIndexStore) childPathForFingerprint(f Fingerprint, parentPath string) string { - fingerprintHash := sha256.Sum256(f.Bytes()) - childDirName := hex.EncodeToString(fingerprintHash[:8]) - return filepath.Join(parentPath, childDirName) -} + if err == keva.ErrValueNotFound { + fmt.Printf("Creating child\n") -func (s *DiskIndexStore) fingerprintForChild(childPath string) (Fingerprint, error) { - childFingerprint := Fingerprint{} - childFingerprintFile := filepath.Join(childPath, nodeFingerprintFile) + node = IndexNode{ + childFingerprintsByString: make(map[string]*Fingerprint), + } - file, err := os.Open(childFingerprintFile) - if err != nil { - return childFingerprint, err - } - defer file.Close() + fmt.Printf("GetOrCreateChild - Saving [%s] %d %d\n", nodeKey, len(node.childFingerprints), len(node.entries)) + err = s.nodes.Put(nodeKey, &node) + if err != nil { + return nil, err + } - fileInfo, err := file.Stat() - if err != nil { - return childFingerprint, err - } + parent.registerChild(f) + fmt.Printf("GetOrCreateChild - Parent - Saving [%s] %d %d\n", parentFingerprint.String(), len(parent.childFingerprints), len(parent.entries)) + err = s.nodes.Put(parentFingerprint.String(), parent) + if err != nil { + return nil, err + } - fingerprintBytes := make([]byte, fileInfo.Size(), fileInfo.Size()) - _, err = file.Read(fingerprintBytes) - if err != nil { - return childFingerprint, err - } + } else if err == nil { + err = s.loadThumbnails(&node) + if err != nil { + return nil, err + } - childFingerprint.UnmarshalBytes(fingerprintBytes) + } else { + return nil, err + } - return childFingerprint, nil + return &node, nil } -func (s *DiskIndexStore) getNodeByPath(path string) (*IndexNode, error) { +func (s *DiskIndexStore) GetRoot() (*IndexNode, error) { + var rootKey = Fingerprint{}.String() - _, err := os.Stat(path) - if err != nil { - if os.IsNotExist(err) { - return nil, nil + var root IndexNode + err := s.nodes.Get(rootKey, &root) + + if err == keva.ErrValueNotFound { + fmt.Printf("Root node not found - creating it\n") + root = IndexNode{ + childFingerprintsByString: make(map[string]*Fingerprint), } - return nil, err - } - node := &IndexNode{ - path: path, - childrenByFingerprint: make(map[string]*IndexNodeHandle), - } + } else if err == nil { + fmt.Printf("Found root node with %d children and %d entries\n", len(root.childFingerprints), len(root.entries)) - err = s.loadAllChildren(node) - if err != nil { - return nil, err - } + err = s.loadThumbnails(&root) + if err != nil { + return nil, err + } - err = s.loadAllEntries(node) - if err != nil { + } else { return nil, err } - return node, nil + return &root, nil } -func (s *DiskIndexStore) loadAllChildren(n *IndexNode) error { - dir, err := os.Open(n.path) - if err != nil { - return err - } - defer dir.Close() - - for fileInfos, err := dir.Readdir(1); err == nil && len(fileInfos) > 0; fileInfos, err = dir.Readdir(1) { - for _, info := range fileInfos { - if info.IsDir() && info.Name() != nodeEntriesDir { - child, err := s.loadChild(n, info.Name()) - if err != nil { - return err - } - - n.registerChildByHandle(child) - } - } - } - - return nil +func (s *DiskIndexStore) RemoveEntries(node *IndexNode, nodeFingerprint Fingerprint) error { + node.removeEntries() + fmt.Printf("RemoveEntries - Saving [%s] %d %d\n", nodeFingerprint.String(), len(node.childFingerprints), len(node.entries)) + return s.nodes.Put(nodeFingerprint.String(), node) } -func (s *DiskIndexStore) loadAllEntries(n *IndexNode) error { - entriesDir := filepath.Join(n.path, nodeEntriesDir) - - dir, err := os.Open(entriesDir) - if err != nil { - if os.IsNotExist(err) { - return nil - } - return err - } - defer dir.Close() - - for fileInfos, err := dir.Readdir(1); err == nil && len(fileInfos) > 0; fileInfos, err = dir.Readdir(1) { - for _, fileInfo := range fileInfos { - if strings.HasSuffix(fileInfo.Name(), ".entry") { - entry, err := NewIndexEntryFromFile(filepath.Join(entriesDir, fileInfo.Name())) - if err != nil { - return err - } - - n.registerEntry(entry) - } - } - } - - return nil +func (s *DiskIndexStore) loadThumbnails(n *IndexNode) error { + return n.withEachEntry(func(entry *IndexEntry) error { + return entry.loadThumbnail(s.pathForThumbnail(entry)) + }) } -func (s *DiskIndexStore) loadChild(n *IndexNode, childDirName string) (*IndexNodeHandle, error) { - childPath := filepath.Join(n.path, childDirName) - childFingerprint, err := s.fingerprintForChild(childPath) - if err != nil { - return nil, err - } - - return &IndexNodeHandle{Path: childPath, Fingerprint: childFingerprint}, nil +func (s *DiskIndexStore) pathForThumbnail(entry *IndexEntry) string { + thumbnailHash := sha256.Sum256(entry.MaxFingerprint.Bytes()) + thumbnailHex := hex.EncodeToString(thumbnailHash[:]) + return path.Join(s.rootPath, thumbnailsDir, thumbnailHex[0:2], thumbnailHex[2:4], thumbnailHex[4:]) } -func (s *DiskIndexStore) saveNode(n *IndexNode, f Fingerprint) error { - fmt.Printf("Saving node %s\n", n.path) +func NewDiskIndexStore(rootPath string) (*DiskIndexStore, error) { + thumbnailsDir := path.Join(rootPath, thumbnailsDir) + os.MkdirAll(thumbnailsDir, os.FileMode(0700)) - os.Mkdir(n.path, os.FileMode(0700)) - - // Save the actual (non-truncated) fingerprint - fingerprintFile := filepath.Join(n.path, nodeFingerprintFile) - file, err := os.Create(fingerprintFile) + nodeStore, err := keva.NewStore(path.Join(rootPath, "nodes")) if err != nil { - return err + return nil, err } - defer file.Close() - - _, err = file.Write(f.Bytes()) - return err + return &DiskIndexStore{ + rootPath: rootPath, + nodes: nodeStore, + }, nil } diff --git a/fingerprint_test.go b/fingerprint_test.go index e2646a5..6ff7211 100644 --- a/fingerprint_test.go +++ b/fingerprint_test.go @@ -9,202 +9,205 @@ import ( "testing" ) -func TestBytesSerialisesToPackedBytes(t *testing.T) { - f := Fingerprint{samples: []byte{0x00, 0x00, 0xF0, 0xF0}} +func TestFingerprint(t *testing.T) { - actualString := fmt.Sprintf("%x", f.Bytes()) + testImage := func() image.Image { + img := image.NewNRGBA(image.Rectangle{Max: image.Point{X: 256, Y: 256}}) - if actualString != "00ff" { - t.Errorf("Fingerprint '%s' doesn't match expected", actualString) + for i := img.Bounds().Min.Y; i < img.Bounds().Max.Y; i++ { + for j := img.Bounds().Min.X; j < img.Bounds().Max.X; j++ { + img.Set(j, i, color.RGBA{uint8(i), uint8(j), uint8(i), 255}) + } + } + + return img } -} -func TestDifferenceReturnsZeroForSameFingerprint(t *testing.T) { - f1 := Fingerprint{samples: []byte{0, 1, 2, 3, 130, 255}} - f2 := Fingerprint{samples: []byte{0, 1, 2, 3, 130, 255}} + t.Run("Bytes() serialises to packed bytes", func(t *testing.T) { + f := Fingerprint{samples: []byte{0x00, 0x00, 0xF0, 0xF0}} - diff := f1.Difference(f2) + actualString := fmt.Sprintf("%x", f.Bytes()) - if diff != 0.0 { - t.Errorf("Difference %f doesn't match expected", diff) - } + if actualString != "00ff" { + t.Errorf("Fingerprint '%s' doesn't match expected", actualString) + } + }) - diff = f2.Difference(f1) + t.Run("Difference() returns zero for same fingerprint", func(t *testing.T) { + f1 := Fingerprint{samples: []byte{0, 1, 2, 3, 130, 255}} + f2 := Fingerprint{samples: []byte{0, 1, 2, 3, 130, 255}} - if diff != 0.0 { - t.Errorf("Difference %f doesn't match expected", diff) - } -} + diff := f1.Difference(f2) -func TestDifferenceReturnsOneForCompletelyDifferentFingerprint(t *testing.T) { - f1 := Fingerprint{samples: []byte{0, 0, 0, 255, 255, 255}} - f2 := Fingerprint{samples: []byte{255, 255, 255, 0, 0, 0}} + if diff != 0.0 { + t.Errorf("Difference %f doesn't match expected", diff) + } - diff := f1.Difference(f2) + diff = f2.Difference(f1) - if diff != 1.0 { - t.Errorf("Difference %f doesn't match expected", diff) - } + if diff != 0.0 { + t.Errorf("Difference %f doesn't match expected", diff) + } + }) - diff = f2.Difference(f1) + t.Run("Difference() returns one for completely different fingerprint", func(t *testing.T) { + f1 := Fingerprint{samples: []byte{0, 0, 0, 255, 255, 255}} + f2 := Fingerprint{samples: []byte{255, 255, 255, 0, 0, 0}} - if diff != 1.0 { - t.Errorf("Difference %f doesn't match expected", diff) - } -} + diff := f1.Difference(f2) -func TestDifferenceReturnsOneForDifferentlySizedFingerprint(t *testing.T) { - f1 := Fingerprint{samples: []byte{255, 255, 255}} - f2 := Fingerprint{samples: []byte{255, 255, 255, 255}} + if diff != 1.0 { + t.Errorf("Difference %f doesn't match expected", diff) + } - diff := f1.Difference(f2) + diff = f2.Difference(f1) - if diff != 1.0 { - t.Errorf("Difference %f doesn't match expected", diff) - } + if diff != 1.0 { + t.Errorf("Difference %f doesn't match expected", diff) + } + }) - diff = f2.Difference(f1) + t.Run("Difference() returns one for differently sized fingerprint", func(t *testing.T) { + f1 := Fingerprint{samples: []byte{255, 255, 255}} + f2 := Fingerprint{samples: []byte{255, 255, 255, 255}} - if diff != 1.0 { - t.Errorf("Difference %f doesn't match expected", diff) - } -} + diff := f1.Difference(f2) -func TestDistanceReturnsComponentwiseAbsoluteDifference(t *testing.T) { - f1 := Fingerprint{samples: []byte{0, 1, 2, 3, 130, 255}} - f2 := Fingerprint{samples: []byte{1, 3, 6, 11, 146, 0}} + if diff != 1.0 { + t.Errorf("Difference %f doesn't match expected", diff) + } - dist := f1.Distance(f2) + diff = f2.Difference(f1) - if dist != 286 { - t.Errorf("Distance %d doesn't match expected", dist) - } + if diff != 1.0 { + t.Errorf("Difference %f doesn't match expected", diff) + } + }) - dist = f2.Distance(f1) + t.Run("Distance() returns componentwise absolute difference", func(t *testing.T) { + f1 := Fingerprint{samples: []byte{0, 1, 2, 3, 130, 255}} + f2 := Fingerprint{samples: []byte{1, 3, 6, 11, 146, 0}} - if dist != 286 { - t.Errorf("Distance %d doesn't match expected", dist) - } -} + dist := f1.Distance(f2) -func TestDistanceReturnsMaxValueForMismatchedLength(t *testing.T) { - f1 := Fingerprint{samples: []byte{0, 0, 0}} - f2 := Fingerprint{samples: []byte{0, 0, 0, 0}} + if dist != 286 { + t.Errorf("Distance %d doesn't match expected", dist) + } - dist := f1.Distance(f2) + dist = f2.Distance(f1) - if dist != math.MaxUint64 { - t.Errorf("Distance %d wasn't max uint64", dist) - } -} + if dist != 286 { + t.Errorf("Distance %d doesn't match expected", dist) + } + }) -func TestMarshalTextSerialisesToPackedHexStringBytes(t *testing.T) { - f := Fingerprint{samples: []byte{0x00, 0x00, 0xFF, 0xFF}} + t.Run("Distance() returns max value for mismatched length", func(t *testing.T) { + f1 := Fingerprint{samples: []byte{0, 0, 0}} + f2 := Fingerprint{samples: []byte{0, 0, 0, 0}} - actual, err := f.MarshalText() + dist := f1.Distance(f2) - if err != nil { - t.Errorf("Error while marshalling: %s", err) - } - if string(actual) != "00ff" { - t.Errorf("Fingerprint '%s' doesn't match expected", actual) - } -} + if dist != math.MaxUint64 { + t.Errorf("Distance %d wasn't max uint64", dist) + } + }) -func TestSizeReturnsCorrectSideLength(t *testing.T) { - img := testImage() + t.Run("MarshalText() serialises to packed hex string bytes", func(t *testing.T) { + f := Fingerprint{samples: []byte{0x00, 0x00, 0xFF, 0xFF}} - f := NewFingerprint(img, 3) - size := f.Size() + actual, err := f.MarshalText() - if size != 3 { - t.Errorf("Size %d doesn't match expected", size) - } + if err != nil { + t.Errorf("Error while marshalling: %s", err) + } + if string(actual) != "00ff" { + t.Errorf("Fingerprint '%s' doesn't match expected", actual) + } + }) - f = NewFingerprint(img, 7) - size = f.Size() + t.Run("Size() returns correct side length", func(t *testing.T) { + img := testImage() - if size != 7 { - t.Errorf("Size %d doesn't match expected", size) - } + f := NewFingerprint(img, 3) + size := f.Size() - f = Fingerprint{samples: make([]byte, 5*5)} - size = f.Size() + if size != 3 { + t.Errorf("Size %d doesn't match expected", size) + } - if size != 5 { - t.Errorf("Size %d doesn't match expected", size) - } -} + f = NewFingerprint(img, 7) + size = f.Size() -func TestStringSerialisesToPackedHexString(t *testing.T) { - f := Fingerprint{samples: []byte{ - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, - }} + if size != 7 { + t.Errorf("Size %d doesn't match expected", size) + } - actualString := fmt.Sprintf("%s", f) + f = Fingerprint{samples: make([]byte, 5*5)} + size = f.Size() - if actualString != "fffffffffffffffffffffffff0" { - t.Errorf("Fingerprint '%s' doesn't match expected", actualString) - } -} + if size != 5 { + t.Errorf("Size %d doesn't match expected", size) + } + }) -func TestUnmarshalBytesDeserialisesFromPackedBytes(t *testing.T) { - b := []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF0} + t.Run("String() serialises to packed hex string", func(t *testing.T) { + f := Fingerprint{samples: []byte{ + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, + }} - f := Fingerprint{} - f.UnmarshalBytes(b) + actualString := fmt.Sprintf("%s", f) - if len(f.samples) != 25 { - t.Fatalf("Fingerprint length %d doesn't match expected", len(f.samples)) - } - for i := 0; i < 25; i++ { - if f.samples[i] != 0xF0 { - t.Errorf("Fingerprint byte '%d' doesn't match expected", f.samples[i]) + if actualString != "fffffffffffffffffffffffff0" { + t.Errorf("Fingerprint '%s' doesn't match expected", actualString) } - } -} + }) -func TestUnmarshalTextDeserialisesFromPackedHexStringBytes(t *testing.T) { - text := []byte("fffffffffffffffffffffffff0") + t.Run("UnmarshalBytes() deserialises from packed bytes", func(t *testing.T) { + b := []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF0} - f := Fingerprint{} - f.UnmarshalText(text) + f := Fingerprint{} + f.UnmarshalBytes(b) - if len(f.samples) != 25 { - t.Fatalf("Fingerprint length %d doesn't match expected", len(f.samples)) - } - for i := 0; i < 25; i++ { - if f.samples[i] != 0xF0 { - t.Errorf("Fingerprint byte '%d' doesn't match expected", f.samples[i]) + if len(f.samples) != 25 { + t.Fatalf("Fingerprint length %d doesn't match expected", len(f.samples)) } - } -} + for i := 0; i < 25; i++ { + if f.samples[i] != 0xF0 { + t.Errorf("Fingerprint byte '%d' doesn't match expected", f.samples[i]) + } + } + }) -func TestNewFingerprintGeneratesBinaryRepresentation(t *testing.T) { - f := NewFingerprint(testImage(), 3) + t.Run("UnmarshalText() deserialises from packed hex string bytes", func(t *testing.T) { + text := []byte("fffffffffffffffffffffffff0") - expected, _ := hex.DecodeString("3060805080a070a0c0") + f := Fingerprint{} + f.UnmarshalText(text) - expectedString := hex.EncodeToString(expected) - actualString := hex.EncodeToString(f.samples) + if len(f.samples) != 25 { + t.Fatalf("Fingerprint length %d doesn't match expected", len(f.samples)) + } + for i := 0; i < 25; i++ { + if f.samples[i] != 0xF0 { + t.Errorf("Fingerprint byte '%d' doesn't match expected", f.samples[i]) + } + } + }) - if expectedString != actualString { - t.Fatalf("Fingerprint '%s' doesn't match expected '%s'", actualString, expectedString) - } -} + t.Run("NewFingerprint() generates binary representation", func(t *testing.T) { + f := NewFingerprint(testImage(), 3) -func testImage() image.Image { - img := image.NewNRGBA(image.Rectangle{Max: image.Point{X: 256, Y: 256}}) + expected, _ := hex.DecodeString("3060805080a070a0c0") - for i := img.Bounds().Min.Y; i < img.Bounds().Max.Y; i++ { - for j := img.Bounds().Min.X; j < img.Bounds().Max.X; j++ { - img.Set(j, i, color.RGBA{uint8(i), uint8(j), uint8(i), 255}) - } - } + expectedString := hex.EncodeToString(expected) + actualString := hex.EncodeToString(f.samples) - return img + if expectedString != actualString { + t.Fatalf("Fingerprint '%s' doesn't match expected '%s'", actualString, expectedString) + } + }) } diff --git a/index.go b/index.go index 15eeb1c..83513aa 100644 --- a/index.go +++ b/index.go @@ -1,6 +1,7 @@ package simian import ( + "fmt" "image" "math" "os" @@ -15,8 +16,8 @@ type Index struct { maxEntryDifference float64 } -func (i *Index) Add(image image.Image, metadata interface{}) (key string, err error) { - entry, err := NewIndexEntry(image, i.maxFingerprintSize) +func (i *Index) Add(image image.Image, metadata map[string]interface{}) (key string, err error) { + entry, err := NewIndexEntry(image, i.maxFingerprintSize, metadata) if err != nil { return "", nil } @@ -26,16 +27,26 @@ func (i *Index) Add(image image.Image, metadata interface{}) (key string, err er return "", err } - node, err := root.Add(entry, rootFingerprintSize+1, i) + var rootFingerprint Fingerprint + + _, err = root.Add(entry, rootFingerprint, rootFingerprintSize+1, i) if err != nil { return "", err } - return node.path, nil + fmt.Printf("Root node has %d children and %d entries\n", len(root.childFingerprints), len(root.entries)) + + return "", nil +} + +func (i *Index) Close() error { + return i.Store.Close() } func (i *Index) FindNearest(image image.Image, maxResults int, maxDifference float64) ([]*IndexEntry, error) { - entry, err := NewIndexEntry(image, i.maxFingerprintSize) + var dummy map[string]interface{} + + entry, err := NewIndexEntry(image, i.maxFingerprintSize, dummy) if err != nil { return nil, nil } @@ -54,14 +65,22 @@ func (i *Index) FindNearest(image image.Image, maxResults int, maxDifference flo return results, err } -func NewIndex(path string, maxFingerprintSize int, maxEntryDifference float64) *Index { - os.MkdirAll(path, 0700) +func NewIndex(path string, maxFingerprintSize int, maxEntryDifference float64) (*Index, error) { + err := os.MkdirAll(path, 0700) + if err != nil { + return nil, err + } + + indexStore, err := NewDiskIndexStore(path) + if err != nil { + return nil, err + } return &Index{ - Store: &DiskIndexStore{RootPath: path}, + Store: indexStore, maxFingerprintSize: maxFingerprintSize, maxEntryDifference: maxEntryDifference, - } + }, err } type entriesByDifferenceToEntry struct { diff --git a/indexentry.go b/indexentry.go index 50043ad..57fe408 100644 --- a/indexentry.go +++ b/indexentry.go @@ -1,8 +1,6 @@ package simian import ( - "crypto/rand" - "encoding/hex" "encoding/json" "image" "image/png" @@ -15,101 +13,77 @@ import ( const keyBitLength = 256 type IndexEntry struct { - key string - Thumbnail image.Image `json:"-"` - MaxFingerprint Fingerprint `json:"maxFingerprint"` - Attributes map[string]interface{} `json:"attributes"` + Thumbnail image.Image + MaxFingerprint Fingerprint + Attributes map[string]interface{} } func (entry *IndexEntry) FingerprintForSize(size int) Fingerprint { return NewFingerprint(entry.Thumbnail, size) } -func (entry *IndexEntry) saveToDir(path string) error { - jsonFile := filepath.Join(path, entry.key+".entry") - jsonOut, err := os.Create(jsonFile) - if err != nil { - return err - } - defer jsonOut.Close() - - jsonEncoder := json.NewEncoder(jsonOut) - jsonEncoder.Encode(entry) +func (entry *IndexEntry) MarshalJSON() ([]byte, error) { + return json.Marshal(&indexEntryJSON{ + MaxFingerprint: entry.MaxFingerprint.Bytes(), + Attributes: entry.Attributes, + }) +} - thumbnailFile := jsonFile + ".thumb" - thumbnailOut, err := os.Create(thumbnailFile) +func (entry *IndexEntry) UnmarshalJSON(b []byte) error { + var value indexEntryJSON + err := json.Unmarshal(b, &value) if err != nil { return err } - defer thumbnailOut.Close() - pngEncoder := png.Encoder{} - err = pngEncoder.Encode(thumbnailOut, entry.Thumbnail) + var fingerprint Fingerprint + err = fingerprint.UnmarshalBytes(value.MaxFingerprint) if err != nil { return err } + entry.MaxFingerprint = fingerprint + entry.Attributes = value.Attributes + return nil } -func NewIndexEntry(image image.Image, maxFingerprintSize int) (*IndexEntry, error) { - key, err := makeKey() +func (entry *IndexEntry) loadThumbnail(path string) error { + thumbnailFile, err := os.Open(path) if err != nil { - return nil, err - } - - entry := &IndexEntry{ - key: key, - Thumbnail: makeThumbnail(image, maxFingerprintSize*2), - Attributes: make(map[string]interface{}), + return err } + defer thumbnailFile.Close() - entry.MaxFingerprint = entry.FingerprintForSize(maxFingerprintSize) - - return entry, nil + entry.Thumbnail, err = png.Decode(thumbnailFile) + return err } -func NewIndexEntryFromFile(file string) (*IndexEntry, error) { - jsonFile, err := os.Open(file) +func (entry *IndexEntry) saveThumbnail(path string) error { + thumbnailDir := filepath.Dir(path) + os.MkdirAll(thumbnailDir, os.FileMode(0700)) + + thumbnailOut, err := os.Create(path) if err != nil { - return nil, err + return err } - defer jsonFile.Close() + defer thumbnailOut.Close() - key := filepath.Base(file) - key = key[:(len(key) - len(filepath.Ext(key)))] + pngEncoder := png.Encoder{} + return pngEncoder.Encode(thumbnailOut, entry.Thumbnail) +} +func NewIndexEntry(image image.Image, maxFingerprintSize int, attributes map[string]interface{}) (*IndexEntry, error) { entry := &IndexEntry{ - key: key, - } - - jsonDecoder := json.NewDecoder(jsonFile) - jsonDecoder.Decode(entry) - - thumbnailFile, err := os.Open(file + ".thumb") - if err != nil { - return nil, err + Thumbnail: makeThumbnail(image, maxFingerprintSize*2), + Attributes: attributes, } - defer thumbnailFile.Close() - entry.Thumbnail, err = png.Decode(thumbnailFile) - if err != nil { - return nil, err - } + entry.MaxFingerprint = entry.FingerprintForSize(maxFingerprintSize) return entry, nil } -func makeKey() (string, error) { - b := make([]byte, keyBitLength/8) - _, err := rand.Read(b) - if err != nil { - return "", err - } - - return hex.EncodeToString(b), nil -} - func makeThumbnail(src image.Image, size int) image.Image { width := float64(src.Bounds().Max.X - src.Bounds().Min.X) height := float64(src.Bounds().Max.Y - src.Bounds().Min.Y) @@ -128,3 +102,8 @@ func makeThumbnail(src image.Image, size int) image.Image { return thumbnail } + +type indexEntryJSON struct { + MaxFingerprint []byte `json:"maxFingerprint"` + Attributes map[string]interface{} `json:"attributes"` +} diff --git a/indexentry_test.go b/indexentry_test.go new file mode 100644 index 0000000..aae46e3 --- /dev/null +++ b/indexentry_test.go @@ -0,0 +1,41 @@ +package simian + +import ( + "encoding/json" + "reflect" + "testing" +) + +func TestIndexEntry(t *testing.T) { + + t.Run("JSON serialisation", func(t *testing.T) { + + t.Run("should roundtrip all fields", func(t *testing.T) { + + entry := &IndexEntry{ + MaxFingerprint: Fingerprint{samples: []uint8{0xF0, 0xF0, 0xF0, 0xF0}}, + Attributes: make(map[string]interface{}), + } + entry.Attributes["some key"] = "some value" + entry.Attributes["some other key"] = "some other value" + + jsonBytes, err := json.Marshal(entry) + if err != nil { + t.Fatalf("Error marshalling JSON: %v", err) + } + + var result *IndexEntry + err = json.Unmarshal(jsonBytes, &result) + if err != nil { + t.Fatalf("Error unmarshalling JSON: %v", err) + } + + if distance := result.MaxFingerprint.Distance(entry.MaxFingerprint); distance != 0 { + t.Errorf("Expected no difference in fingerprints but got %d", distance) + } + if !reflect.DeepEqual(entry.Attributes, result.Attributes) { + t.Errorf("Expected attributes to match but got %v", result.Attributes) + } + }) + }) +} diff --git a/indexnode.go b/indexnode.go index bb01294..7b572e7 100644 --- a/indexnode.go +++ b/indexnode.go @@ -1,6 +1,7 @@ package simian import ( + "encoding/json" "errors" "fmt" "math" @@ -10,30 +11,30 @@ import ( var errResultLimitReached = errors.New("result limit reached") type IndexNode struct { - path string - children []*IndexNodeHandle - childrenByFingerprint map[string]*IndexNodeHandle - entries []*IndexEntry + childFingerprints []Fingerprint + childFingerprintsByString map[string]*Fingerprint + entries []*IndexEntry } -func (node *IndexNode) Add(entry *IndexEntry, childFingerprintSize int, index *Index) (*IndexNode, error) { +func (node *IndexNode) Add(entry *IndexEntry, nodeFingerprint Fingerprint, childFingerprintSize int, index *Index) (*IndexNode, error) { - fmt.Printf("Node[%s] Add %d\n", node.path, childFingerprintSize) + fmt.Printf("Node Add %d\n", childFingerprintSize) - entryFingerprint := entry.FingerprintForSize(childFingerprintSize) + childFingerprint := entry.FingerprintForSize(childFingerprintSize) - if len(node.children) == 0 { + if len(node.childFingerprints) == 0 { // We can go deeper and this new entry is sufficiently different to // the rest, so split this leaf node by turning entries into children. fmt.Printf("Max Diff: %f\n", node.maxChildDifferenceTo(entry.MaxFingerprint)) if childFingerprintSize < index.maxFingerprintSize && node.maxChildDifferenceTo(entry.MaxFingerprint) > index.maxEntryDifference { - fmt.Printf("Pushing entries to children\n") - node.pushEntriesToChildren(childFingerprintSize, index.Store) + fmt.Printf("Pushing %d entries to children\n", len(node.entries)) + node.pushEntriesToChildren(nodeFingerprint, childFingerprintSize, index.Store) + fmt.Printf("Done pushing entries to children\n") } else { - fmt.Printf("Adding entry %s\n", node.path) - err := index.Store.AddEntry(entry, node) + fmt.Printf("Adding entry %s\n", nodeFingerprint.String()) + err := index.Store.AddEntry(entry, node, nodeFingerprint) if err != nil { return nil, err } @@ -42,25 +43,52 @@ func (node *IndexNode) Add(entry *IndexEntry, childFingerprintSize int, index *I } } - child, err := index.Store.GetOrCreateChild(entryFingerprint, node) + child, err := index.Store.GetOrCreateChild(childFingerprint, node, nodeFingerprint) if err != nil { return nil, err } - return child.Add(entry, childFingerprintSize+1, index) + return child.Add(entry, childFingerprint, childFingerprintSize+1, index) } func (node *IndexNode) FindNearest(entry *IndexEntry, childFingerprintSize int, index *Index, maxResults int, maxDifference float64) ([]*IndexEntry, error) { results := make([]*IndexEntry, 0, maxResults) err := node.gatherNearest(entry, childFingerprintSize, index, maxDifference, &results) - if err != errResultLimitReached { + if err != nil && err != errResultLimitReached { return nil, err } return results, nil } +func (node *IndexNode) MarshalJSON() ([]byte, error) { + return json.Marshal(&indexNodeJSON{ + ChildFingerprints: node.childFingerprints, + Entries: node.entries, + }) +} + +func (node *IndexNode) UnmarshalJSON(b []byte) error { + var value indexNodeJSON + err := json.Unmarshal(b, &value) + if err != nil { + return err + } + + node.childFingerprints = value.ChildFingerprints + + node.childFingerprintsByString = make(map[string]*Fingerprint) + for i := 0; i < len(node.childFingerprints); i++ { + f := &node.childFingerprints[i] + node.childFingerprintsByString[f.String()] = f + } + + node.entries = value.Entries + + return nil +} + func (node *IndexNode) addSimilarEntriesTo(entries *[]*IndexEntry, fingerprint Fingerprint, maxDifference float64) error { fmt.Printf("addSimilarEntriesTo\n") @@ -85,16 +113,19 @@ func (node *IndexNode) addSimilarEntriesTo(entries *[]*IndexEntry, fingerprint F func (node *IndexNode) gatherNearest(entry *IndexEntry, childFingerprintSize int, index *Index, maxDifference float64, results *[]*IndexEntry) error { - fmt.Printf("%d gatherNearest\n", childFingerprintSize) + fmt.Printf("%d gatherNearest %d\n", childFingerprintSize, len(node.entries)) // Check for an exact matching child - entryFingerprint := entry.FingerprintForSize(childFingerprintSize) - exactChildHandle := node.childrenByFingerprint[entryFingerprint.String()] + childFingerprint := entry.FingerprintForSize(childFingerprintSize) + exactChildFingerprint, exactChildFingerprintExists := node.childFingerprintsByString[childFingerprint.String()] + var exactChildFingerprintString string var exactChild *IndexNode - if exactChildHandle != nil { + if exactChildFingerprintExists { + exactChildFingerprintString = exactChildFingerprint.String() + var err error - exactChild, err = index.Store.GetChild(entryFingerprint, node) + exactChild, err = index.Store.GetChild(childFingerprint, node) if err != nil { return err } @@ -113,11 +144,11 @@ func (node *IndexNode) gatherNearest(entry *IndexEntry, childFingerprintSize int } } - children := make([]*IndexNodeHandle, len(node.children)) - copy(children, node.children) + childFingerprints := make([]Fingerprint, len(node.childFingerprints)) + copy(childFingerprints, node.childFingerprints) // Need more results - find and sort all children by nearness - sort.Sort(nodesByDifferenceToFingerprintWith(children, entryFingerprint)) + sort.Sort(nodesByDifferenceToFingerprintWith(childFingerprints, childFingerprint)) // fmt.Printf("Sorting %d children...\n", len(children)) // for i, child := range children { @@ -126,13 +157,13 @@ func (node *IndexNode) gatherNearest(entry *IndexEntry, childFingerprintSize int // } // Recursively gather from nearest children - for i, child := range children { + for i, cf := range childFingerprints { fmt.Printf("Visiting child %d\n", i) - if exactChild != nil && child.Path == exactChild.path { + if exactChildFingerprintExists && cf.String() == exactChildFingerprintString { continue } - childNode, err := index.Store.GetChild(child.Fingerprint, node) + childNode, err := index.Store.GetChild(cf, node) if err != nil { return err } @@ -163,28 +194,23 @@ func (node *IndexNode) maxChildDifferenceTo(f Fingerprint) float64 { return maxDifference } -func (node *IndexNode) pushEntriesToChildren(childFingerprintSize int, store IndexStore) error { +func (node *IndexNode) pushEntriesToChildren(nodeFingerprint Fingerprint, childFingerprintSize int, store IndexStore) error { node.withEachEntry(func(entry *IndexEntry) error { - entryFingerprint := entry.FingerprintForSize(childFingerprintSize) - child, err := store.GetOrCreateChild(entryFingerprint, node) + childFingerprint := entry.FingerprintForSize(childFingerprintSize) + child, err := store.GetOrCreateChild(childFingerprint, node, nodeFingerprint) if err != nil { return err } - store.AddEntry(entry, child) - return nil + fmt.Printf("Pushing entry to child\n") + return store.AddEntry(entry, child, childFingerprint) }) - return store.RemoveEntries(node) + return store.RemoveEntries(node, nodeFingerprint) } -func (node *IndexNode) registerChild(child *IndexNode, f Fingerprint) { - childHandle := &IndexNodeHandle{Path: child.path, Fingerprint: f} - node.registerChildByHandle(childHandle) -} - -func (node *IndexNode) registerChildByHandle(childHandle *IndexNodeHandle) { - node.children = append(node.children, childHandle) - node.childrenByFingerprint[childHandle.Fingerprint.String()] = childHandle +func (node *IndexNode) registerChild(childFingerprint Fingerprint) { + node.childFingerprints = append(node.childFingerprints, childFingerprint) + node.childFingerprintsByString[childFingerprint.String()] = &node.childFingerprints[len(node.childFingerprints)-1] } func (node *IndexNode) registerEntry(entry *IndexEntry) { @@ -206,13 +232,18 @@ func (node *IndexNode) withEachEntry(action func(*IndexEntry) error) error { return nil } +type indexNodeJSON struct { + ChildFingerprints []Fingerprint `json:"childFingerprints"` + Entries []*IndexEntry `json:"entries"` +} + type nodesByDifferenceToFingerprint struct { - nodes []*IndexNodeHandle - differences []float64 + nodeFingerprints []Fingerprint + differences []float64 } func (sorter *nodesByDifferenceToFingerprint) Len() int { - return len(sorter.nodes) + return len(sorter.nodeFingerprints) } func (sorter *nodesByDifferenceToFingerprint) Less(i, j int) bool { @@ -220,20 +251,20 @@ func (sorter *nodesByDifferenceToFingerprint) Less(i, j int) bool { } func (sorter *nodesByDifferenceToFingerprint) Swap(i, j int) { - tmpNode := sorter.nodes[i] - sorter.nodes[i] = sorter.nodes[j] - sorter.nodes[j] = tmpNode + tmp := sorter.nodeFingerprints[i] + sorter.nodeFingerprints[i] = sorter.nodeFingerprints[j] + sorter.nodeFingerprints[j] = tmp tmpDiff := sorter.differences[i] sorter.differences[i] = sorter.differences[j] sorter.differences[j] = tmpDiff } -func nodesByDifferenceToFingerprintWith(nodes []*IndexNodeHandle, f Fingerprint) *nodesByDifferenceToFingerprint { - differences := make([]float64, len(nodes), len(nodes)) - for i, n := range nodes { - differences[i] = n.Fingerprint.Difference(f) +func nodesByDifferenceToFingerprintWith(nodeFingerprints []Fingerprint, f Fingerprint) *nodesByDifferenceToFingerprint { + differences := make([]float64, len(nodeFingerprints), len(nodeFingerprints)) + for i, nf := range nodeFingerprints { + differences[i] = nf.Difference(f) } - return &nodesByDifferenceToFingerprint{nodes: nodes, differences: differences} + return &nodesByDifferenceToFingerprint{nodeFingerprints: nodeFingerprints, differences: differences} } diff --git a/indexnode_test.go b/indexnode_test.go new file mode 100644 index 0000000..75ee02b --- /dev/null +++ b/indexnode_test.go @@ -0,0 +1,89 @@ +package simian + +import ( + "encoding/json" + "testing" +) + +func TestIndexNode(t *testing.T) { + + t.Run("JSON serialisation", func(t *testing.T) { + + t.Run("should roundtrip all fields", func(t *testing.T) { + n := &IndexNode{ + childFingerprintsByString: make(map[string]*Fingerprint), + } + + n.registerChild(Fingerprint{samples: []uint8{0x10, 0x20, 0x30, 0x40}}) + n.registerChild(Fingerprint{samples: []uint8{0x50, 0x60, 0x70, 0x80}}) + + entry1 := &IndexEntry{ + MaxFingerprint: Fingerprint{samples: []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9}}, + Attributes: make(map[string]interface{}), + } + n.registerEntry(entry1) + + entry2 := &IndexEntry{ + MaxFingerprint: Fingerprint{samples: []uint8{10, 11, 12, 13, 14, 15, 16, 17, 18}}, + Attributes: make(map[string]interface{}), + } + n.registerEntry(entry2) + + jsonBytes, err := json.Marshal(n) + if err != nil { + t.Fatalf("Error marshalling JSON: %v", err) + } + + var result *IndexNode + err = json.Unmarshal(jsonBytes, &result) + if err != nil { + t.Fatalf("Error unmarshalling JSON: %v", err) + } + + if actual, expected := len(result.childFingerprints), len(n.childFingerprints); actual != expected { + t.Fatalf("Expected %d child fingerprints but got %d", expected, actual) + } + for i := 0; i < len(result.childFingerprints); i++ { + actual := result.childFingerprints[i].String() + expected := n.childFingerprints[i].String() + + if actual != expected { + t.Errorf("Expected fingerprint '%s' but got '%s'", expected, actual) + } + } + + if actual, expected := len(result.childFingerprintsByString), len(n.childFingerprintsByString); actual != expected { + t.Fatalf("Expected %d child fingerprints mapped by string but got %d", expected, actual) + } + for k, v := range n.childFingerprintsByString { + actual := result.childFingerprintsByString[k].String() + expected := v.String() + + if actual != expected { + t.Errorf("Expected fingerprint '%s' but got '%s'", expected, actual) + } + } + + if actual, expected := len(result.entries), len(n.entries); actual != expected { + t.Fatalf("Expected %d entries but got %d", expected, actual) + } + for i := 0; i < len(result.entries); i++ { + actualBytes, err := json.Marshal(result.entries[i]) + if err != nil { + t.Fatalf("Error marshalling entry: %v", err) + } + actual := string(actualBytes) + + expectedBytes, err := json.Marshal(n.entries[i]) + if err != nil { + t.Fatalf("Error marshalling entry: %v", err) + } + expected := string(expectedBytes) + + if actual != expected { + t.Errorf("Expected entry '%s' but got '%s'", expected, actual) + } + } + }) + }) +} diff --git a/indexnodehandle.go b/indexnodehandle.go deleted file mode 100644 index 3314028..0000000 --- a/indexnodehandle.go +++ /dev/null @@ -1,6 +0,0 @@ -package simian - -type IndexNodeHandle struct { - Path string - Fingerprint Fingerprint -} diff --git a/indexstore.go b/indexstore.go index b7649a4..7df2d02 100644 --- a/indexstore.go +++ b/indexstore.go @@ -1,9 +1,10 @@ package simian type IndexStore interface { - AddEntry(entry *IndexEntry, node *IndexNode) error + AddEntry(entry *IndexEntry, node *IndexNode, nodeFingerprint Fingerprint) error + Close() error GetChild(f Fingerprint, parent *IndexNode) (*IndexNode, error) - GetOrCreateChild(f Fingerprint, parent *IndexNode) (*IndexNode, error) + GetOrCreateChild(f Fingerprint, parent *IndexNode, parentFingerprint Fingerprint) (*IndexNode, error) GetRoot() (*IndexNode, error) - RemoveEntries(node *IndexNode) error + RemoveEntries(node *IndexNode, nodeFingerprint Fingerprint) error } diff --git a/vendor/vendor.json b/vendor/vendor.json index 3e037de..ba70d97 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -2,6 +2,18 @@ "comment": "", "ignore": "test", "package": [ + { + "checksumSHA1": "sVE1dlxGDmuS/C6XfTL/2/UG6q4=", + "path": "github.com/mandykoh/keva", + "revision": "443c1447fc51d6502cc5cb82d810014baba00b5b", + "revisionTime": "2017-08-15T09:38:58Z" + }, + { + "checksumSHA1": "fk05LCN5pjUKlw10ErXFIYoFxZk=", + "path": "github.com/mandykoh/symlock", + "revision": "0362cd091b6b627bf9552d87ba15956d5e8bde32", + "revisionTime": "2017-06-17T12:17:10Z" + }, { "checksumSHA1": "7E3Y1HU/UbsQF/dxMRdjFmx9QDQ=", "path": "golang.org/x/image/draw",