diff --git a/attic/archiver.py b/attic/archiver.py index 47650c2d..91557008 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -17,8 +17,8 @@ from attic.helpers import Error, location_validator, format_time, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ - Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ - is_cachedir, bigint_to_int + Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, \ + Statistics, dir_is_tagged, bigint_to_int from attic.remote import RepositoryServer, RemoteRepository @@ -125,7 +125,8 @@ def do_create(self, args): continue else: restrict_dev = None - self._process(archive, cache, args.excludes, args.exclude_caches, skip_inodes, path, restrict_dev) + self._process(archive, cache, args.excludes, args.exclude_caches, args.exclude_if_present, + args.keep_tag_files, skip_inodes, path, restrict_dev) archive.save() if args.stats: t = datetime.now() @@ -141,7 +142,8 @@ def do_create(self, args): print('-' * 78) return self.exit_code - def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, restrict_dev): + def _process(self, archive, cache, excludes, exclude_caches, exclude_if_present, + keep_tag_files, skip_inodes, path, restrict_dev): if exclude_path(path, excludes): return try: @@ -164,7 +166,11 @@ def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, except IOError as e: self.print_error('%s: %s', path, e) elif stat.S_ISDIR(st.st_mode): - if exclude_caches and is_cachedir(path): + tag_path = dir_is_tagged(path, exclude_caches, exclude_if_present) + if tag_path: + if keep_tag_files: + archive.process_item(path, st) + archive.process_item(tag_path, st) return archive.process_item(path, st) try: @@ -173,8 +179,8 @@ def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, self.print_error('%s: %s', path, e) else: for filename in sorted(entries): - self._process(archive, cache, excludes, exclude_caches, skip_inodes, - os.path.join(path, filename), restrict_dev) + self._process(archive, cache, excludes, exclude_caches, exclude_if_present, + keep_tag_files, skip_inodes, os.path.join(path, filename), restrict_dev) elif stat.S_ISLNK(st.st_mode): archive.process_symlink(path, st) elif stat.S_ISFIFO(st.st_mode): @@ -541,6 +547,12 @@ def run(self, args=None): subparser.add_argument('--exclude-caches', dest='exclude_caches', action='store_true', default=False, help='exclude directories that contain a CACHEDIR.TAG file (http://www.brynosaurus.com/cachedir/spec.html)') + subparser.add_argument('--exclude-if-present', dest='exclude_if_present', + metavar='FILENAME', action='append', type=str, + help='exclude directories that contain the specified file') + subparser.add_argument('--keep-tag-files', dest='keep_tag_files', + action='store_true', default=False, + help='keep tag files of excluded caches/directories') subparser.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval', type=int, default=300, metavar='SECONDS', help='write checkpoint every SECONDS seconds (Default: 300)') diff --git a/attic/helpers.py b/attic/helpers.py index ac526698..9f224101 100644 --- a/attic/helpers.py +++ b/attic/helpers.py @@ -257,7 +257,7 @@ def __repr__(self): return '%s(%s)' % (type(self), self.pattern) -def is_cachedir(path): +def dir_is_cachedir(path): """Determines whether the specified path is a cache directory (and therefore should potentially be excluded from the backup) according to the CACHEDIR.TAG protocol @@ -277,6 +277,22 @@ def is_cachedir(path): return False +def dir_is_tagged(path, exclude_caches, exclude_if_present): + """Determines whether the specified path is excluded by being a cache + directory or containing the user-specified tag file. Returns the + path of the tag file (either CACHEDIR.TAG or the matching + user-specified file) + """ + if exclude_caches and dir_is_cachedir(path): + return os.path.join(path, 'CACHEDIR.TAG') + if exclude_if_present is not None: + for tag in exclude_if_present: + tag_path = os.path.join(path, tag) + if os.path.isfile(tag_path): + return tag_path + return None + + def format_time(t): """Format datetime suitable for fixed length list output """ diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index 382fcc85..c0fc74f7 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -208,6 +208,31 @@ def test_exclude_caches(self): self.assert_equal(sorted(os.listdir('output/input')), ['cache2', 'file1']) self.assert_equal(sorted(os.listdir('output/input/cache2')), ['CACHEDIR.TAG']) + def test_exclude_tagged(self): + self.attic('init', self.repository_location) + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('tagged1/.NOBACKUP') + self.create_regular_file('tagged2/00-NOBACKUP') + self.create_regular_file('tagged3/.NOBACKUP/file2') + self.attic('create', '--exclude-if-present', '.NOBACKUP', '--exclude-if-present', '00-NOBACKUP', self.repository_location + '::test', 'input') + with changedir('output'): + self.attic('extract', self.repository_location + '::test') + self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'tagged3']) + + def test_exclude_keep_tagged(self): + self.attic('init', self.repository_location) + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('tagged1/.NOBACKUP') + self.create_regular_file('tagged1/file2', size=1024 * 80) + self.create_regular_file('tagged2/CACHEDIR.TAG', contents = b'Signature: 8a477f597d28d172789f06886806bc55 extra stuff') + self.create_regular_file('tagged2/file3', size=1024 * 80) + self.attic('create', '--exclude-if-present', '.NOBACKUP', '--exclude-caches', '--keep-tag-files', self.repository_location + '::test', 'input') + with changedir('output'): + self.attic('extract', self.repository_location + '::test') + self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'tagged1', 'tagged2']) + self.assert_equal(sorted(os.listdir('output/input/tagged1')), ['.NOBACKUP']) + self.assert_equal(sorted(os.listdir('output/input/tagged2')), ['CACHEDIR.TAG']) + def test_path_normalization(self): self.attic('init', self.repository_location) self.create_regular_file('dir1/dir2/file', size=1024 * 80)