From 3462a9ca90388dc5d8b4fa4218a32769676b3623 Mon Sep 17 00:00:00 2001 From: Yuri D'Elia Date: Sun, 7 Dec 2014 19:15:17 +0100 Subject: [PATCH 1/2] Implement --exclude-if-present Add a new --exclude-if-present command-line flag to ``attic create``. If specified, directories containing the specified tag file will be excluded from the backup. The flag can be repeated to ignore more than a single tag file, irregardless of the contents. --- attic/archiver.py | 19 ++++++++++++------- attic/helpers.py | 16 +++++++++++++++- attic/testsuite/archiver.py | 11 +++++++++++ 3 files changed, 38 insertions(+), 8 deletions(-) diff --git a/attic/archiver.py b/attic/archiver.py index 47650c2d..20b59983 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -17,8 +17,8 @@ from attic.helpers import Error, location_validator, format_time, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ - Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ - is_cachedir, bigint_to_int + Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, \ + Statistics, dir_is_tagged, bigint_to_int from attic.remote import RepositoryServer, RemoteRepository @@ -125,7 +125,8 @@ def do_create(self, args): continue else: restrict_dev = None - self._process(archive, cache, args.excludes, args.exclude_caches, skip_inodes, path, restrict_dev) + self._process(archive, cache, args.excludes, args.exclude_caches, + args.exclude_if_present, skip_inodes, path, restrict_dev) archive.save() if args.stats: t = datetime.now() @@ -141,7 +142,8 @@ def do_create(self, args): print('-' * 78) return self.exit_code - def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, restrict_dev): + def _process(self, archive, cache, excludes, exclude_caches, exclude_if_present, + skip_inodes, path, restrict_dev): if exclude_path(path, excludes): return try: @@ -164,7 +166,7 @@ def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, except IOError as e: self.print_error('%s: %s', path, e) elif stat.S_ISDIR(st.st_mode): - if exclude_caches and is_cachedir(path): + if dir_is_tagged(path, exclude_caches, exclude_if_present): return archive.process_item(path, st) try: @@ -173,8 +175,8 @@ def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, self.print_error('%s: %s', path, e) else: for filename in sorted(entries): - self._process(archive, cache, excludes, exclude_caches, skip_inodes, - os.path.join(path, filename), restrict_dev) + self._process(archive, cache, excludes, exclude_caches, exclude_if_present, + skip_inodes, os.path.join(path, filename), restrict_dev) elif stat.S_ISLNK(st.st_mode): archive.process_symlink(path, st) elif stat.S_ISFIFO(st.st_mode): @@ -541,6 +543,9 @@ def run(self, args=None): subparser.add_argument('--exclude-caches', dest='exclude_caches', action='store_true', default=False, help='exclude directories that contain a CACHEDIR.TAG file (http://www.brynosaurus.com/cachedir/spec.html)') + subparser.add_argument('--exclude-if-present', dest='exclude_if_present', + metavar='FILENAME', action='append', type=str, + help='exclude directories that contain the specified file') subparser.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval', type=int, default=300, metavar='SECONDS', help='write checkpoint every SECONDS seconds (Default: 300)') diff --git a/attic/helpers.py b/attic/helpers.py index ac526698..cc14a60d 100644 --- a/attic/helpers.py +++ b/attic/helpers.py @@ -257,7 +257,7 @@ def __repr__(self): return '%s(%s)' % (type(self), self.pattern) -def is_cachedir(path): +def dir_is_cachedir(path): """Determines whether the specified path is a cache directory (and therefore should potentially be excluded from the backup) according to the CACHEDIR.TAG protocol @@ -277,6 +277,20 @@ def is_cachedir(path): return False +def dir_is_tagged(path, exclude_caches, exclude_if_present): + """Determines whether the specified path is excluded by being a cache + directory or containing the user-specified tag file. + """ + if exclude_caches and dir_is_cachedir(path): + return True + if exclude_if_present is not None: + for tag in exclude_if_present: + tag_path = os.path.join(path, tag) + if os.path.isfile(tag_path): + return True + return False + + def format_time(t): """Format datetime suitable for fixed length list output """ diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index 382fcc85..370513e3 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -208,6 +208,17 @@ def test_exclude_caches(self): self.assert_equal(sorted(os.listdir('output/input')), ['cache2', 'file1']) self.assert_equal(sorted(os.listdir('output/input/cache2')), ['CACHEDIR.TAG']) + def test_exclude_tagged(self): + self.attic('init', self.repository_location) + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('tagged1/.NOBACKUP') + self.create_regular_file('tagged2/00-NOBACKUP') + self.create_regular_file('tagged3/.NOBACKUP/file2') + self.attic('create', '--exclude-if-present', '.NOBACKUP', '--exclude-if-present', '00-NOBACKUP', self.repository_location + '::test', 'input') + with changedir('output'): + self.attic('extract', self.repository_location + '::test') + self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'tagged3']) + def test_path_normalization(self): self.attic('init', self.repository_location) self.create_regular_file('dir1/dir2/file', size=1024 * 80) From f61e22cacc90e76e6c8f4b23677eee62c09e97ac Mon Sep 17 00:00:00 2001 From: Yuri D'Elia Date: Mon, 15 Dec 2014 12:27:43 +0100 Subject: [PATCH 2/2] Implement --keep-tag-files to preserve directory roots/tag-files We also add --keep-tag-files to keep in the archive the root directory and the tag/exclusion file in the archive. --- attic/archiver.py | 17 ++++++++++++----- attic/helpers.py | 10 ++++++---- attic/testsuite/archiver.py | 14 ++++++++++++++ 3 files changed, 32 insertions(+), 9 deletions(-) diff --git a/attic/archiver.py b/attic/archiver.py index 20b59983..91557008 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -125,8 +125,8 @@ def do_create(self, args): continue else: restrict_dev = None - self._process(archive, cache, args.excludes, args.exclude_caches, - args.exclude_if_present, skip_inodes, path, restrict_dev) + self._process(archive, cache, args.excludes, args.exclude_caches, args.exclude_if_present, + args.keep_tag_files, skip_inodes, path, restrict_dev) archive.save() if args.stats: t = datetime.now() @@ -143,7 +143,7 @@ def do_create(self, args): return self.exit_code def _process(self, archive, cache, excludes, exclude_caches, exclude_if_present, - skip_inodes, path, restrict_dev): + keep_tag_files, skip_inodes, path, restrict_dev): if exclude_path(path, excludes): return try: @@ -166,7 +166,11 @@ def _process(self, archive, cache, excludes, exclude_caches, exclude_if_present, except IOError as e: self.print_error('%s: %s', path, e) elif stat.S_ISDIR(st.st_mode): - if dir_is_tagged(path, exclude_caches, exclude_if_present): + tag_path = dir_is_tagged(path, exclude_caches, exclude_if_present) + if tag_path: + if keep_tag_files: + archive.process_item(path, st) + archive.process_item(tag_path, st) return archive.process_item(path, st) try: @@ -176,7 +180,7 @@ def _process(self, archive, cache, excludes, exclude_caches, exclude_if_present, else: for filename in sorted(entries): self._process(archive, cache, excludes, exclude_caches, exclude_if_present, - skip_inodes, os.path.join(path, filename), restrict_dev) + keep_tag_files, skip_inodes, os.path.join(path, filename), restrict_dev) elif stat.S_ISLNK(st.st_mode): archive.process_symlink(path, st) elif stat.S_ISFIFO(st.st_mode): @@ -546,6 +550,9 @@ def run(self, args=None): subparser.add_argument('--exclude-if-present', dest='exclude_if_present', metavar='FILENAME', action='append', type=str, help='exclude directories that contain the specified file') + subparser.add_argument('--keep-tag-files', dest='keep_tag_files', + action='store_true', default=False, + help='keep tag files of excluded caches/directories') subparser.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval', type=int, default=300, metavar='SECONDS', help='write checkpoint every SECONDS seconds (Default: 300)') diff --git a/attic/helpers.py b/attic/helpers.py index cc14a60d..9f224101 100644 --- a/attic/helpers.py +++ b/attic/helpers.py @@ -279,16 +279,18 @@ def dir_is_cachedir(path): def dir_is_tagged(path, exclude_caches, exclude_if_present): """Determines whether the specified path is excluded by being a cache - directory or containing the user-specified tag file. + directory or containing the user-specified tag file. Returns the + path of the tag file (either CACHEDIR.TAG or the matching + user-specified file) """ if exclude_caches and dir_is_cachedir(path): - return True + return os.path.join(path, 'CACHEDIR.TAG') if exclude_if_present is not None: for tag in exclude_if_present: tag_path = os.path.join(path, tag) if os.path.isfile(tag_path): - return True - return False + return tag_path + return None def format_time(t): diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index 370513e3..c0fc74f7 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -219,6 +219,20 @@ def test_exclude_tagged(self): self.attic('extract', self.repository_location + '::test') self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'tagged3']) + def test_exclude_keep_tagged(self): + self.attic('init', self.repository_location) + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('tagged1/.NOBACKUP') + self.create_regular_file('tagged1/file2', size=1024 * 80) + self.create_regular_file('tagged2/CACHEDIR.TAG', contents = b'Signature: 8a477f597d28d172789f06886806bc55 extra stuff') + self.create_regular_file('tagged2/file3', size=1024 * 80) + self.attic('create', '--exclude-if-present', '.NOBACKUP', '--exclude-caches', '--keep-tag-files', self.repository_location + '::test', 'input') + with changedir('output'): + self.attic('extract', self.repository_location + '::test') + self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'tagged1', 'tagged2']) + self.assert_equal(sorted(os.listdir('output/input/tagged1')), ['.NOBACKUP']) + self.assert_equal(sorted(os.listdir('output/input/tagged2')), ['CACHEDIR.TAG']) + def test_path_normalization(self): self.attic('init', self.repository_location) self.create_regular_file('dir1/dir2/file', size=1024 * 80)