diff --git a/Makefile b/Makefile index 5a3cb57..861e883 100644 --- a/Makefile +++ b/Makefile @@ -49,3 +49,11 @@ fixture: aws s3 --endpoint-url http://localhost:4572 mb s3://example-bucket-directory aws s3 --endpoint-url http://localhost:4572 mb s3://example-bucket-mime aws s3api --endpoint-url http://localhost:4572 put-object --bucket example-bucket-directory --key test/ + aws s3 --endpoint-url http://localhost:4572 mb s3://example-bucket-check-file-difference + aws s3api --endpoint-url http://localhost:4572 put-object --bucket example-bucket-check-file-difference --key empty/ + aws s3 --endpoint-url http://localhost:4572 cp README.md s3://example-bucket-check-file-difference/equal/ + aws s3 --endpoint-url http://localhost:4572 cp README.md s3://example-bucket-check-file-difference/equal/foo/ + aws s3 --endpoint-url http://localhost:4572 cp README.md s3://example-bucket-check-file-difference/equal/foo/bar/ + aws s3 --endpoint-url http://localhost:4572 cp Makefile s3://example-bucket-check-file-difference/difference/ + aws s3 --endpoint-url http://localhost:4572 cp Makefile s3://example-bucket-check-file-difference/difference/foo/ + aws s3 --endpoint-url http://localhost:4572 cp Makefile s3://example-bucket-check-file-difference/difference/foo/bar/ diff --git a/examples/simple/example.go b/examples/simple/example.go index 8622758..5d4d593 100644 --- a/examples/simple/example.go +++ b/examples/simple/example.go @@ -33,8 +33,19 @@ func main() { fmt.Printf("from=%s\n", os.Args[1]) fmt.Printf("to=%s\n", os.Args[2]) - err = s3sync.New(sess).Sync(os.Args[1], os.Args[2]) + syncManager := s3sync.New(sess) + + hasDiff, err := syncManager.HasDifference(os.Args[1], os.Args[2]) if err != nil { panic(err) } + + if hasDiff { + err = syncManager.Sync(os.Args[1], os.Args[2]) + if err != nil { + panic(err) + } + } else { + fmt.Println("There are no differences in the files.") + } } diff --git a/s3sync.go b/s3sync.go index e4f7e31..ce93dc0 100644 --- a/s3sync.go +++ b/s3sync.go @@ -15,6 +15,7 @@ package s3sync import ( "context" "errors" + "fmt" "net/url" "os" "path/filepath" @@ -74,6 +75,12 @@ type fileOp struct { op operation } +type checkFiles struct { + sourceFile *fileInfo + destFile *fileInfo + err error +} + // New returns a new Manager. func New(sess *session.Session, options ...Option) *Manager { m := &Manager{ @@ -144,6 +151,63 @@ func (m *Manager) Sync(source, dest string) error { return errors.New("local to local sync is not supported") } +// HasDifference checks for differences with the sync destination based on the files present in the sync source. +func (m *Manager) HasDifference(source, dest string) (bool, error) { + sourceURL, err := url.Parse(source) + if err != nil { + return false, err + } + + destURL, err := url.Parse(dest) + if err != nil { + return false, err + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + chJob := make(chan func()) + var wg sync.WaitGroup + for i := 0; i < m.nJobs; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for job := range chJob { + job() + } + }() + } + defer func() { + close(chJob) + wg.Wait() + }() + + if isS3URL(sourceURL) { + sourceS3Path, err := urlToS3Path(sourceURL) + if err != nil { + return false, err + } + if isS3URL(destURL) { + destS3Path, err := urlToS3Path(destURL) + if err != nil { + return false, err + } + return m.hasDifferenceS3ToS3(ctx, chJob, sourceS3Path, destS3Path) + } + return m.hasDifferenceS3ToLocal(ctx, chJob, sourceS3Path, dest) + } + + if isS3URL(destURL) { + destS3Path, err := urlToS3Path(destURL) + if err != nil { + return false, err + } + return m.hasDifferenceLocalToS3(ctx, chJob, source, destS3Path) + } + + return false, errors.New("local to local file differences check is not implemented") +} + // GetStatistics returns the structure that contains the sync statistics func (m *Manager) GetStatistics() SyncStatistics { m.statistics.mutex.Lock() @@ -222,6 +286,70 @@ func (m *Manager) syncS3ToLocal(ctx context.Context, chJob chan func(), sourcePa return errs.ErrOrNil() } +func (m *Manager) hasDifferenceS3ToS3(ctx context.Context, chJob chan func(), sourcePath, destPath *s3Path) (bool, error) { + return false, errors.New("S3 to S3 file differences check feature is not implemented") +} + +func (m *Manager) hasDifferenceLocalToS3(ctx context.Context, chJob chan func(), sourcePath string, destPath *s3Path) (bool, error) { + wg := &sync.WaitGroup{} + var hasDifference bool + errs := &multiErr{} + + for files := range filterFilesForHasDifference( + listLocalFiles(ctx, sourcePath), m.listS3Files(ctx, destPath), + ) { + wg.Add(1) + files := files + chJob <- func() { + defer wg.Done() + if files.err != nil { + errs.Append(files.err) + return + } + notEqual, err := hasDifferenceFile(files) + if err != nil { + errs.Append(files.err) + return + } else if notEqual { + hasDifference = true + } + } + } + wg.Wait() + + return hasDifference, errs.ErrOrNil() +} + +func (m *Manager) hasDifferenceS3ToLocal(ctx context.Context, chJob chan func(), sourcePath *s3Path, destPath string) (bool, error) { + wg := &sync.WaitGroup{} + var hasDifference bool + errs := &multiErr{} + + for files := range filterFilesForHasDifference( + m.listS3Files(ctx, sourcePath), listLocalFiles(ctx, destPath), + ) { + wg.Add(1) + files := files + chJob <- func() { + defer wg.Done() + if files.err != nil { + errs.Append(files.err) + return + } + notEqual, err := hasDifferenceFile(files) + if err != nil { + errs.Append(files.err) + return + } else if notEqual { + hasDifference = true + } + } + } + wg.Wait() + + return hasDifference, errs.ErrOrNil() +} + func (m *Manager) download(file *fileInfo, sourcePath *s3Path, destPath string) error { var targetFilename string if !strings.HasSuffix(destPath, "/") && file.singleFile { @@ -547,7 +675,7 @@ func filterFilesForSync(sourceFileChan, destFileChan chan *fileInfo, del bool) c // 1. The dest doesn't exist // 2. The dest doesn't have the same size as the source // 3. The dest is older than the source - if !ok || sourceInfo.size != destInfo.size || sourceInfo.lastModified.After(destInfo.lastModified) { + if !ok || isSyncTargetFile(sourceInfo, destInfo) { c <- &fileOp{fileInfo: sourceInfo} } if ok { @@ -567,6 +695,37 @@ func filterFilesForSync(sourceFileChan, destFileChan chan *fileInfo, del bool) c return c } +// filterFilesForHasDifference filters the source files from the given destination files, and returns +// another channel which includes the file information necessary to be file differences check. +func filterFilesForHasDifference(sourceFileChan, destFileChan chan *fileInfo) chan *checkFiles { + c := make(chan *checkFiles) + + destFiles, err := fileInfoChanToMap(destFileChan) + + go func() { + defer close(c) + if err != nil { + c <- &checkFiles{err: err} + return + } + for sourceInfo := range sourceFileChan { + destInfo, ok := destFiles[sourceInfo.name] + if ok { + c <- &checkFiles{ + sourceFile: sourceInfo, + destFile: destInfo, + } + } else { + c <- &checkFiles{ + sourceFile: sourceInfo, + } + } + } + }() + + return c +} + // fileInfoChanToMap accumulates the fileInfos from the given channel and returns a map. // It retruns an error if the channel contains an error. func fileInfoChanToMap(files chan *fileInfo) (map[string]*fileInfo, error) { @@ -580,3 +739,29 @@ func fileInfoChanToMap(files chan *fileInfo) (map[string]*fileInfo, error) { } return result, nil } + +// hasDifferenceFile checks for differences in the specified file. +func hasDifferenceFile(checkFiles *checkFiles) (bool, error) { + switch { + case checkFiles == nil || checkFiles.sourceFile == nil: + return false, errors.New("missing information needed to compare files") + case checkFiles.destFile == nil: + println(fmt.Sprintf("file %s does not exist at the comparison destination", checkFiles.sourceFile.name)) + return true, nil + case isSyncTargetFile(checkFiles.sourceFile, checkFiles.destFile): + println(fmt.Sprintf("the %s file statuses of the source and destination files do not match", checkFiles.sourceFile.name)) + return true, nil + } + return false, nil +} + +// isSyncTargetFile determines if the target file should be updated based on the source file to be compared. +// If either file to be compared is nil, return false. +func isSyncTargetFile(sourceFile, destFile *fileInfo) bool { + if sourceFile == nil || destFile == nil { + return false + } else if sourceFile.size != destFile.size || sourceFile.lastModified.After(destFile.lastModified) { + return true + } + return false +} diff --git a/s3sync_test.go b/s3sync_test.go index 74a814d..95a1f6c 100644 --- a/s3sync_test.go +++ b/s3sync_test.go @@ -38,6 +38,14 @@ func TestS3syncNotImplemented(t *testing.T) { if err := m.Sync("s3://foo", "s3://bar"); err == nil { t.Fatal("s3 to s3 sync is not implemented yet") } + + if _, err := m.HasDifference("s3://foo", "s3://bar"); err == nil { + t.Fatal("s3 to s3 file differences check is not implemented yet") + } + + if _, err := m.HasDifference("foo", "bar"); err == nil { + t.Fatal("local to local file differences check is not supported") + } } func TestS3sync(t *testing.T) { @@ -239,6 +247,129 @@ func TestS3sync(t *testing.T) { }) } +func TestHasDifference(t *testing.T) { + data, err := ioutil.ReadFile(dummyFilename) + if err != nil { + t.Fatal("Failed to read", dummyFilename) + } + dummyFileSize := len(data) + + t.Run("Empty Dir", func(t *testing.T) { + s3URL := "s3://example-bucket-check-file-difference/empty" + + localTempDir, err := ioutil.TempDir("", "s3synctest") + defer os.RemoveAll(localTempDir) + + if err != nil { + t.Fatal("Failed to create temp dir") + } + + hasDiff, err := New(getSession()).HasDifference(localTempDir, s3URL) + if err != nil { + t.Fatal("HasDifference should be successful", err) + } else if hasDiff { + t.Fatal("There should be no difference in the files") + } + + hasDiff, err = New(getSession()).HasDifference(s3URL, localTempDir) + if err != nil { + t.Fatal("HasDifference should be successful", err) + } else if hasDiff { + t.Fatal("There should be no difference in the files") + } + }) + + t.Run("Equal Files", func(t *testing.T) { + s3URL := "s3://example-bucket-check-file-difference/equal" + + localTempDir, err := ioutil.TempDir("", "s3synctest") + defer os.RemoveAll(localTempDir) + + if err != nil { + t.Fatal("Failed to create temp dir") + } + + // Set the file structure to the same state as the comparison target. + if err := os.MkdirAll(filepath.Join(localTempDir, "foo", "bar"), 0755); err != nil { + t.Fatal("Failed to mkdir", err) + } + for _, file := range []string{ + filepath.Join(localTempDir, dummyFilename), + filepath.Join(localTempDir, "foo", dummyFilename), + filepath.Join(localTempDir, "foo", "bar", dummyFilename), + } { + if err := ioutil.WriteFile(file, make([]byte, dummyFileSize), 0644); err != nil { + t.Fatal("Failed to write", err) + } + } + + hasDiff, err := New(getSession()).HasDifference(s3URL, localTempDir) + if err != nil { + t.Fatal("HasDifference should be successful", err) + } else if hasDiff { + t.Fatal("There should be no difference in the files") + } + + // If the update time is newer than the comparison target file, + // the last update time is changed to earlier than the comparison target file because the file is to be updated. + oldTime := time.Date(1980, time.January, 1, 0, 0, 0, 0, time.UTC) + for _, file := range []string{ + filepath.Join(localTempDir, dummyFilename), + filepath.Join(localTempDir, "foo", dummyFilename), + filepath.Join(localTempDir, "foo", "bar", dummyFilename), + } { + if err := os.Chtimes(file, oldTime, oldTime); err != nil { + t.Fatal("Failed to changes the access and modification times", err) + } + } + + hasDiff, err = New(getSession()).HasDifference(localTempDir, s3URL) + if err != nil { + t.Fatal("HasDifference should be successful", err) + } else if hasDiff { + t.Fatal("There should be no difference in the files") + } + }) + + t.Run("Different Files", func(t *testing.T) { + s3URL := "s3://example-bucket-check-file-difference/difference" + + localTempDir, err := ioutil.TempDir("", "s3synctest") + defer os.RemoveAll(localTempDir) + + if err != nil { + t.Fatal("Failed to create temp dir") + } + + if err := os.MkdirAll(filepath.Join(localTempDir, "foo", "bar"), 0755); err != nil { + t.Fatal("Failed to mkdir", err) + } + for _, file := range []string{ + filepath.Join(localTempDir, dummyFilename), + filepath.Join(localTempDir, "foo", dummyFilename), + filepath.Join(localTempDir, "foo", "bar", dummyFilename), + } { + if err := ioutil.WriteFile(file, make([]byte, dummyFileSize), 0644); err != nil { + t.Fatal("Failed to write", err) + } + } + + hasDiff, err := New(getSession()).HasDifference(localTempDir, s3URL) + if err != nil { + t.Fatal("HasDifference should be successful", err) + } else if !hasDiff { + t.Fatal("There should be difference in the files") + } + + hasDiff, err = New(getSession()).HasDifference(s3URL, localTempDir) + if err != nil { + t.Fatal("HasDifference should be successful", err) + } else if !hasDiff { + t.Fatal("There should be difference in the files") + } + }) +} + func TestDelete(t *testing.T) { data, err := ioutil.ReadFile(dummyFilename) if err != nil { @@ -772,6 +903,192 @@ func TestS3sync_GuessMime(t *testing.T) { } } +func TestHasDifferenceFile(t *testing.T) { + nowTime := time.Now() + + testCases := map[string]struct { + files *checkFiles + wantErr bool + expected bool + }{ + "Equal File": { + files: &checkFiles{ + sourceFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + destFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + }, + expected: false, + }, + "Not Equal File Size": { + files: &checkFiles{ + sourceFile: &fileInfo{ + size: 20, + lastModified: nowTime, + }, + destFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + }, + expected: true, + }, + "Old Last Modified Dest File": { + files: &checkFiles{ + sourceFile: &fileInfo{ + size: 10, + lastModified: nowTime.Add(1 * time.Hour), + }, + destFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + }, + expected: true, + }, + "Old Last Modified Source File": { + files: &checkFiles{ + sourceFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + destFile: &fileInfo{ + size: 10, + lastModified: nowTime.Add(1 * time.Hour), + }, + }, + expected: false, + }, + "No comparison source file information": { + files: &checkFiles{ + sourceFile: nil, + destFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + }, + expected: false, + wantErr: true, + }, + "No comparison dest file information": { + files: &checkFiles{ + sourceFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + destFile: nil, + }, + expected: true, + }, + "No comparison file information": { + files: &checkFiles{ + sourceFile: nil, + destFile: nil, + }, + expected: false, + wantErr: true, + }, + } + for name, tt := range testCases { + t.Run(name, func(t *testing.T) { + got, err := hasDifferenceFile(tt.files) + if err != nil && !tt.wantErr { + t.Fatal("hasDifferenceFile should be successful", err) + } + if got != tt.expected { + t.Fatalf("expected to be %t, actual %t", tt.expected, !tt.expected) + } + }) + } +} + +func TestIsSyncTargetFile(t *testing.T) { + nowTime := time.Now() + + testCases := map[string]struct { + sourceFile *fileInfo + destFile *fileInfo + expected bool + }{ + "Equal File": { + sourceFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + destFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + expected: false, + }, + "Not Equal File Size": { + sourceFile: &fileInfo{ + size: 20, + lastModified: nowTime, + }, + destFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + expected: true, + }, + "Old Last Modified Dest File": { + sourceFile: &fileInfo{ + size: 10, + lastModified: nowTime.Add(1 * time.Hour), + }, + destFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + expected: true, + }, + "Old Last Modified Source File": { + sourceFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + destFile: &fileInfo{ + size: 10, + lastModified: nowTime.Add(1 * time.Hour), + }, + expected: false, + }, + "No comparison source file information": { + sourceFile: nil, + destFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + expected: false, + }, + "No comparison dest file information": { + sourceFile: &fileInfo{ + size: 10, + lastModified: nowTime, + }, + destFile: nil, + expected: false, + }, + "No comparison file information": { + sourceFile: nil, + destFile: nil, + expected: false, + }, + } + for name, tt := range testCases { + t.Run(name, func(t *testing.T) { + if tt.expected != isSyncTargetFile(tt.sourceFile, tt.destFile) { + t.Fatalf("expected to be %t, actual %t", tt.expected, !tt.expected) + } + }) + } +} + type dummyLogger struct { log func(...interface{}) }