Feature keep file 1135 (#1139)

* Added gitignorefile

* Fixed gitignorefile for os.PathLike paths

* --keep now follows .gitignore rules

* Fixed ruff QA error

* Added support for .osxphotos_keep file

* Added reference to .osxphotos_keep

* Added tests for .osxphotos_keep

* Updated help text for --cleanup, --keep
This commit is contained in:
Rhet Turnbull 2023-08-02 06:37:29 -07:00 committed by GitHub
parent 284c272183
commit e937285a72
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 2442 additions and 34 deletions

View File

@ -2393,6 +2393,7 @@ OSXPhotos adheres to the [XDG](https://specifications.freedesktop.org/basedir-sp
* `$XDG_DATA_HOME` or `$HOME/.local/share`: `osxphotos` directory containing local data files, for example, the help files displayed with `osxphotos docs`.
* Current working dir: `osxphotos_crash.log` file containing the stack trace of the last crash if OSXPhotos encounters a fatal error during execution.
* export directory (when running `osxphotos export` command): `.osxphotos_export.db` [SQLite](https://www.sqlite.org/index.html) database containing information needed to update an export and track metadata changes in exported photos. *Note*: This file may contain sensitive information such as locations and the names of persons in photos so if you are using `osxphotos export` to share with others, you may want to delete this file. You can also specify an alternate location for the export database using the `--exportdb` flag during export. See also `osxphotos help exportdb` for more information about built in utilities for working with the export database.
* While osxphotos does not create the file, if present in the root of the export directory, osxphotos will read the file `.osxphotos_keep` to load a list of file/directory patterns which should be excluded from `--cleanup` during export. This file uses the same rule format as [.gitignore](https://git-scm.com/docs/gitignore). See `osxphotos help export cleanup` for more information.
## Python API

View File

@ -14,6 +14,7 @@ from typing import Iterable, List, Optional, Tuple
import click
import osxphotos
import osxphotos.gitignorefile
from osxphotos._constants import (
_EXIF_TOOL_URL,
_OSXPHOTOS_NONE_SENTINEL,
@ -607,27 +608,43 @@ from .verbose import get_verbose_console, verbose_print
"For example, photos which had previously been exported and were subsequently deleted in Photos. "
"WARNING: --cleanup will delete *any* files in the export directory that were not exported by osxphotos, "
"for example, your own scripts or other files. Be sure this is what you intend before using "
"--cleanup. Use --dry-run with --cleanup first if you're not certain.",
"--cleanup. Use --dry-run with --cleanup first if you're not certain. "
"To prevent files not generated by osxphotos from being deleted, you may specify one or more rules"
"in a file named `.osxphotos_keep` in the export directory. "
"This file uses the same format as a .gitignore file and should contain one rule per line; "
"lines starting with a `#` will be ignored. "
"Reference https://git-scm.com/docs/gitignore#_pattern_format for details. "
"In addition to the standard .gitignore rules, the rules may also be the absolute path to a file or directory. "
"For example if export destination is `/Volumes/Photos` and you want to keep all `.txt` files, "
'in the top level of the export directory, you can specify `/*.txt"` in the .osxphotos_keep file. '
"If you want to keep all `.txt` files in the export directory and all subdirectories, "
"you can specify `**/*.txt`. "
"If present, the .osxphotos_keep file will be read after the export is completed and any rules found in the file "
"will be added to the list of rules to keep. "
"See also --keep.",
)
@click.option(
"--keep",
metavar="KEEP_PATH",
metavar="KEEP_RULE",
nargs=1,
multiple=True,
help="When used with --cleanup, prevents file or directory KEEP_PATH from being deleted "
help="When used with --cleanup, prevents file or directory matching KEEP_RULE from being deleted "
"when cleanup is run. Use this if there are files in the export directory that you don't "
"want to be deleted when --cleanup is run. "
"KEEP_PATH may be a file path, e.g. '/Volumes/Photos/keep.jpg', "
"or a file path and wild card, e.g. '/Volumes/Photos/*.txt', "
"or a directory, e.g. '/Volumes/Photos/KeepMe'. "
"KEEP_PATH may be an absolute path or a relative path. "
"If it is relative, it must be relative to the export destination. "
"KEEP_RULE follows the same format rules a .gitignore file. "
"Reference https://git-scm.com/docs/gitignore#_pattern_format for details. "
"In addition to the standard .gitignore rules, KEEP_RULE may also be the absolute path to a file or directory. "
"For example if export destination is `/Volumes/Photos` and you want to keep all `.txt` files, "
'you can specify `--keep "/Volumes/Photos/*.txt"` or `--keep "*.txt"`. '
"If wild card is used, KEEP_PATH must be enclosed in quotes to prevent the shell from expanding the wildcard, "
'e.g. `--keep "/Volumes/Photos/*.txt"`. '
"If KEEP_PATH is a directory, all files and directories contained in KEEP_PATH will be kept. "
"--keep may be repeated to keep additional files/directories.",
'in the top level of the export directory, you can specify `--keep "/*.txt"`. '
"If you want to keep all `.txt` files in the export directory and all subdirectories, "
'you can specify `--keep "**/*.txt"`. '
"If wild card is used, KEEP_RULE must be enclosed in quotes to prevent the shell from expanding the wildcard. "
"--keep may be repeated to keep additional files/directories. "
"Rules may also be included in a file named `.osxphotos_keep` in the export directory. "
"If present, this file will be read after the export is completed and any rules found in the file "
"will be added to the list of rules to keep. "
"This file uses the same format as a .gitignore file and should contain one rule per line; "
"lines starting with a `#` will be ignored. "
)
@click.option(
"--add-exported-to-album",
@ -1717,6 +1734,7 @@ def export(
if cleanup:
db_file = str(pathlib.Path(export_db_path).resolve())
db_files = [db_file, db_file + "-wal", db_file + "-shm"]
keep_file = str(pathlib.Path(dest) / ".osxphotos_keep")
all_files = (
results.exported
+ results.skipped
@ -1738,29 +1756,36 @@ def export(
+ results.missing
# include files that have error in case they exist from previous export
+ [r[0] for r in results.error]
# don't delete export database files
+ db_files
# include the .osxphotos_keep file
+ [keep_file]
)
# if --report, add report file to keep list to prevent it from being deleted
if report:
all_files.append(report)
# gather any files that should be kept from both .osxphotos_keep and --keep
dirs_to_keep = []
if keep:
files_to_keep, dirs_to_keep = collect_files_to_keep(keep, dest)
all_files += files_to_keep
files_to_keep, dirs_to_keep = collect_files_to_keep(keep, dest)
all_files += files_to_keep
rich_echo(f"Cleaning up [filepath]{dest}")
cleaned_files, cleaned_dirs = cleanup_files(
dest, all_files, dirs_to_keep, fileutil, verbose=verbose
)
file_str = "files" if len(cleaned_files) != 1 else "file"
dir_str = "directories" if len(cleaned_dirs) != 1 else "directory"
rich_echo(
f"Deleted: [num]{len(cleaned_files)}[/num] {file_str}, [num]{len(cleaned_dirs)}[/num] {dir_str}"
)
report_writer.write(
ExportResults(deleted_files=cleaned_files, deleted_directories=cleaned_dirs)
)
results.deleted_files = cleaned_files
results.deleted_directories = cleaned_dirs
@ -2550,22 +2575,40 @@ def collect_files_to_keep(
"""Collect all files to keep for --keep/--cleanup.
Args:
keep: Iterable of filepaths to keep; each path may be a filepath, a filepath/wildcard, or a directory path.
keep: Iterable of patterns to keep; each pattern is a pattern that follows gitignore syntax
export_dir: the export directory which will be used to resolve paths when paths in keep are relative instead of absolute
Returns:
tuple of [files_to_keep], [dirs_to_keep]
"""
export_dir = pathlib.Path(export_dir)
keepers = []
export_dir = pathlib.Path(export_dir).expanduser()
export_dir_str = str(export_dir)
KEEP_RULEs = []
# parse .osxphotos_keep file if it exists
keep_file : pathlib.Path = export_dir / ".osxphotos_keep"
if keep_file.is_file():
for line in keep_file.read_text().splitlines():
line = line.rstrip("\r\n")
KEEP_RULEs.append(line)
# parse any patterns passed via --keep
# do this after the file so negations to the file could be applied via --keep
for k in keep:
keeper = pathlib.Path(k).expanduser()
if not keeper.is_absolute():
# relative path: relative to export_dir
keeper = export_dir / keeper
if keeper.is_dir():
keepers.extend(keeper.glob("**/*"))
keepers.extend(keeper.parent.glob(keeper.name))
if k.startswith(export_dir_str):
# allow full path to be specified for keep (e.g. --keep /path/to/file)
KEEP_RULEs.append(k.replace(export_dir_str, ""))
else:
KEEP_RULEs.append(k)
if not KEEP_RULEs:
return [], []
# have some rules to apply
matcher = osxphotos.gitignorefile.parse_pattern_list(KEEP_RULEs, export_dir)
keepers = []
keepers = [path for path in export_dir.rglob("*") if matcher(path)]
files_to_keep = [str(k) for k in keepers if k.is_file()]
dirs_to_keep = [str(k) for k in keepers if k.is_dir()]
return files_to_keep, dirs_to_keep

431
osxphotos/gitignorefile.py Normal file
View File

@ -0,0 +1,431 @@
"""A spec-compliant `.gitignore` parser for Python.
Versioned from: https://github.com/excitoon/gitignorefile to add parse_pattern_list() function
to apply .gitignore rules to a list of patterns that aren't actually a .gitignore file.
The original code was licensed under the MIT license, Copyright (c) 2022 Vladimir Chebotarev
"""
from __future__ import annotations
import os
import re
from typing import Callable
DEFAULT_IGNORE_NAMES = [".gitignore", ".git/info/exclude"]
def parse_pattern_list(
patterns: list[str], base_path: str = None
) -> Callable[[str], bool]:
"""Parse a list of patterns and return a callable to match against a path.
Args:
patterns (list[str]): List of patterns to match against.
base_path (str): Base path for applying ignore rules.
Returns:
Callable[[str], bool]: Callable which returns `True` if specified path is ignored.
You can also pass `is_dir: bool` optional parameter if you know whether the specified path is a directory.
"""
rules = []
for pattern in patterns:
pattern = pattern.rstrip("\r\n")
if rule := _rule_from_pattern(pattern):
rules.append(rule)
return _IgnoreRules(rules, base_path).match
def parse(path, base_path=None):
"""Parses single `.gitignore` file.
Args:
path (str): Path to `.gitignore` file.
base_path (str): Base path for applying ignore rules.
Returns:
Callable[[str], bool]: Callable which returns `True` if specified path is ignored.
You can also pass `is_dir: bool` optional parameter if you know whether the specified path is a directory.
"""
if base_path is None:
base_path = os.path.dirname(path) or os.path.dirname(os.path.abspath(path))
rules = []
with open(path) as ignore_file:
for line in ignore_file:
line = line.rstrip("\r\n")
if rule := _rule_from_pattern(line):
rules.append(rule)
return _IgnoreRules(rules, base_path).match
def ignore(ignore_names=DEFAULT_IGNORE_NAMES):
"""Returns `shutil.copytree()`-compatible ignore function for skipping ignored files.
It will check if file is ignored by any `.gitignore` in the directory tree.
Args:
ignore_names (list[str], optional): List of names of ignore files.
Returns:
Callable[[str, list[str]], list[str]]: Callable compatible with `shutil.copytree()`.
"""
matches = Cache(ignore_names=ignore_names)
return lambda root, names: {
name for name in names if matches(os.path.join(root, name))
}
def ignored(path, is_dir=None, ignore_names=DEFAULT_IGNORE_NAMES):
"""Checks if file is ignored by any `.gitignore` in the directory tree.
Args:
path (str): Path to check against ignore rules.
is_dir (bool, optional): Set if you know whether the specified path is a directory.
ignore_names (list[str], optional): List of names of ignore files.
Returns:
bool: `True` if the path is ignored.
"""
return Cache(ignore_names=ignore_names)(path, is_dir=is_dir)
class Cache:
"""Caches information about different `.gitignore` files in the directory tree.
Allows to reduce number of queries to filesystem to mininum.
"""
def __init__(self, ignore_names=DEFAULT_IGNORE_NAMES):
"""Constructs `Cache` objects.
Args:
ignore_names (list[str], optional): List of names of ignore files.
"""
self.__ignore_names = ignore_names
self.__gitignores = {}
def __call__(self, path, is_dir=None):
"""Checks whether the specified path is ignored.
Args:
path (str): Path to check against ignore rules.
is_dir (bool, optional): Set if you know whether the specified path is a directory.
"""
path = _Path(path)
add_to_children = {}
plain_paths = []
for parent in path.parents():
if parent.parts in self.__gitignores:
break
ignore_paths = []
for ignore_name in self.__ignore_names:
ignore_path = parent.join(ignore_name)
if ignore_path.isfile():
ignore_paths.append(str(ignore_path))
if ignore_paths:
matches = [
parse(ignore_path, base_path=parent) for ignore_path in ignore_paths
]
add_to_children[parent] = (matches, plain_paths)
plain_paths = []
else:
plain_paths.append(parent)
else:
parent = _Path(tuple()) # Null path.
self.__gitignores[parent.parts] = []
for plain_path in plain_paths:
# assert plain_path.parts not in self.__gitignores
self.__gitignores[plain_path.parts] = self.__gitignores[parent.parts]
for parent, (_, parent_plain_paths) in reversed(list(add_to_children.items())):
# assert parent.parts not in self.__gitignores
self.__gitignores[parent.parts] = self.__gitignores[
parent.parts[:-1]
].copy()
for parent_to_add, (gitignores_to_add, _) in reversed(
list(add_to_children.items())
):
self.__gitignores[parent.parts].extend(gitignores_to_add)
if parent_to_add == parent:
break
self.__gitignores[parent.parts].reverse()
for plain_path in parent_plain_paths:
# assert plain_path.parts not in self.__gitignores
self.__gitignores[plain_path.parts] = self.__gitignores[parent.parts]
# This parent comes either from first or second loop.
return any((m(path, is_dir=is_dir) for m in self.__gitignores[parent.parts]))
class _Path:
def __init__(self, path):
if isinstance(path, (str, bytes, os.PathLike)):
abs_path = os.path.abspath(path)
self.__parts = tuple(_path_split(abs_path))
self.__joined = abs_path
self.__is_dir = None
else:
self.__parts = path
self.__joined = None
self.__is_dir = None
@property
def parts(self):
return self.__parts
def join(self, name):
return _Path(self.__parts + (name,))
def relpath(self, base_path):
if self.__parts[: len(base_path.__parts)] == base_path.__parts:
return "/".join(self.__parts[len(base_path.__parts) :])
else:
return None
def parents(self):
for i in range(len(self.__parts) - 1, 0, -1):
yield _Path(self.__parts[:i])
def isfile(self):
return os.path.isfile(str(self))
def isdir(self):
if self.__is_dir is not None:
return self.__is_dir
self.__is_dir = os.path.isdir(str(self))
return self.__is_dir
def __str__(self):
if self.__joined is None:
self.__joined = (
os.sep.join(self.__parts) if self.__parts != ("",) else os.sep
)
return self.__joined
def _rule_from_pattern(pattern):
# Takes a `.gitignore` match pattern, such as "*.py[cod]" or "**/*.bak",
# and returns an `_IgnoreRule` suitable for matching against files and
# directories. Patterns which do not match files, such as comments
# and blank lines, will return `None`.
# Early returns follow
# Discard comments and separators
if not pattern.lstrip() or pattern.lstrip().startswith("#"):
return
# Discard anything with more than two consecutive asterisks
if "***" in pattern:
return
# Strip leading bang before examining double asterisks
if pattern.startswith("!"):
negation = True
pattern = pattern[1:]
else:
negation = False
# Discard anything with invalid double-asterisks -- they can appear
# at the start or the end, or be surrounded by slashes
for m in re.finditer("\\*\\*", pattern):
start_index = m.start()
if (
start_index != 0
and start_index != len(pattern) - 2
and (pattern[start_index - 1] != "/" or pattern[start_index + 2] != "/")
):
return
# Special-casing '/', which doesn't match any files or directories
if pattern.rstrip() == "/":
return
directory_only = pattern.endswith("/")
# A slash is a sign that we're tied to the `base_path` of our rule
# set.
anchored = "/" in pattern[:-1]
if pattern.startswith("/"):
pattern = pattern[1:]
if pattern.startswith("**"):
pattern = pattern[2:]
anchored = False
if pattern.startswith("/"):
pattern = pattern[1:]
if pattern.endswith("/"):
pattern = pattern[:-1]
# patterns with leading hashes are escaped with a backslash in front, unescape it
if pattern.startswith("\\#"):
pattern = pattern[1:]
# trailing spaces are ignored unless they are escaped with a backslash
i = len(pattern) - 1
striptrailingspaces = True
while i > 1 and pattern[i] == " ":
if pattern[i - 1] == "\\":
pattern = pattern[: i - 1] + pattern[i:]
i -= 1
striptrailingspaces = False
else:
if striptrailingspaces:
pattern = pattern[:i]
i -= 1
regexp = _fnmatch_pathname_to_regexp(pattern, anchored, directory_only)
return _IgnoreRule(regexp, negation, directory_only)
class _IgnoreRules:
def __init__(self, rules, base_path):
self.__rules = rules
self.__can_return_immediately = not any((r.negation for r in rules))
self.__base_path = (
_Path(base_path) if not isinstance(base_path, _Path) else base_path
)
def match(self, path, is_dir=None):
if not isinstance(path, _Path):
path = _Path(path)
rel_path = path.relpath(self.__base_path)
if rel_path is not None:
if is_dir is None:
is_dir = path.isdir() # TODO Pass callable here.
if self.__can_return_immediately:
return any((r.match(rel_path, is_dir) for r in self.__rules))
else:
matched = False
for rule in self.__rules:
if rule.match(rel_path, is_dir):
matched = not rule.negation
else:
return matched
else:
return False
class _IgnoreRule:
def __init__(self, regexp, negation, directory_only):
self.__regexp = re.compile(regexp)
self.__negation = negation
self.__directory_only = directory_only
self.__match = self.__regexp.match
@property
def regexp(self):
return self.__regexp
@property
def negation(self):
return self.__negation
def match(self, rel_path, is_dir):
m = self.__match(rel_path)
# If we need a directory, check there is something after slash and if there is not, target must be a directory.
# If there is something after slash then it's a directory irrelevant to type of target.
# `self.directory_only` implies we have group number 1.
# N.B. Question mark inside a group without a name can shift indices. :(
return m and (not self.__directory_only or m.group(1) is not None or is_dir)
if os.altsep is not None:
_all_seps_expr = f"[{re.escape(os.sep)}{re.escape(os.altsep)}]"
_path_split = lambda path: re.split(_all_seps_expr, path) # noqa: E731
else:
_path_split = lambda path: path.split(os.sep) # noqa: E731
def _fnmatch_pathname_to_regexp(pattern, anchored, directory_only):
# Implements `fnmatch` style-behavior, as though with `FNM_PATHNAME` flagged;
# the path separator will not match shell-style `*` and `.` wildcards.
# Frustratingly, python's fnmatch doesn't provide the FNM_PATHNAME
# option that `.gitignore`'s behavior depends on.
if not pattern:
if directory_only:
return "[^/]+(/.+)?$" # Empty name means no path fragment.
else:
return ".*"
i, n = 0, len(pattern)
res = ["(?:^|.+/)" if not anchored else ""]
while i < n:
c = pattern[i]
i += 1
if c == "*":
if i < n and pattern[i] == "*":
i += 1
if i < n and pattern[i] == "/":
i += 1
res.append("(.+/)?") # `/**/` matches `/`.
else:
res.append(".*")
else:
res.append("[^/]*")
elif c == "?":
res.append("[^/]")
elif c == "[":
j = i
if j < n and pattern[j] == "!":
j += 1
if j < n and pattern[j] == "]":
j += 1
while j < n and pattern[j] != "]":
j += 1
if j >= n:
res.append("\\[")
else:
stuff = pattern[i:j].replace("\\", "\\\\")
i = j + 1
if stuff[0] == "!":
stuff = f"^{stuff[1:]}"
elif stuff[0] == "^":
stuff = f"\\{stuff}"
res.append(f"[{stuff}]")
else:
res.append(re.escape(c))
if (
directory_only
): # In this case we are interested if there is something after slash.
res.append("(/.+)?$")
else:
res.append("(?:/.+)?$")
return "".join(res)

View File

@ -3387,6 +3387,7 @@ def test_export_aae():
files = glob.glob("*.*")
assert sorted(files) == sorted(CLI_EXPORT_AAE_FILENAMES)
def test_export_aae_as_hardlink():
"""Test export with --export-aae and --export-as-hardlink"""
@ -3411,6 +3412,7 @@ def test_export_aae_as_hardlink():
files = glob.glob("*.*")
assert sorted(files) == sorted(CLI_EXPORT_AAE_FILENAMES)
def test_export_sidecar():
"""test --sidecar"""
@ -6564,13 +6566,14 @@ def test_export_cleanup_keep():
assert pathlib.Path("./report.db").is_file()
def test_export_cleanup_keep_relative_path():
"""test export with --cleanup --keep options with relative paths"""
def test_export_cleanup_keep_leading_slash():
"""test export with --cleanup --keep options when pattern has leading slash"""
runner = CliRunner()
cwd = os.getcwd()
# pylint: disable=not-context-manager
with runner.isolated_filesystem():
tmpdir = os.getcwd()
result = runner.invoke(export, [os.path.join(cwd, CLI_PHOTOS_DB), ".", "-V"])
assert result.exit_code == 0
@ -6602,11 +6605,11 @@ def test_export_cleanup_keep_relative_path():
"--update",
"--cleanup",
"--keep",
"keep_me",
f"/keep_me/",
"--keep",
"keep_me.txt",
f"/keep_me.txt",
"--keep",
"*.db",
f"/*.db",
"--dry-run",
],
)
@ -6625,11 +6628,11 @@ def test_export_cleanup_keep_relative_path():
"--update",
"--cleanup",
"--keep",
"keep_me",
f"/keep_me/",
"--keep",
"keep_me.txt",
f"/keep_me.txt",
"--keep",
"*.db",
f"/*.db",
],
)
assert "Deleted: 2 files, 2 directories" in result.output
@ -6643,6 +6646,94 @@ def test_export_cleanup_keep_relative_path():
assert pathlib.Path("./report.db").is_file()
def test_export_cleanup_keep_relative_path():
"""test export with --cleanup --keep options with relative paths"""
runner = CliRunner()
cwd = os.getcwd()
# pylint: disable=not-context-manager
with runner.isolated_filesystem():
result = runner.invoke(export, [os.path.join(cwd, CLI_PHOTOS_DB), ".", "-V"])
assert result.exit_code == 0
# create file and a directory that should be deleted
os.mkdir("./empty_dir")
os.mkdir("./delete_me_dir")
with open("./delete_me.txt", "w") as fd:
fd.write("delete me!")
with open("./delete_me_dir/delete_me.txt", "w") as fd:
fd.write("delete me!")
# create files and directories that should be kept
os.mkdir("./keep_me")
os.mkdir("./keep_me/keep_me_2")
with open("./keep_me.txt", "w") as fd:
fd.write("keep me!")
with open("./report.db", "w") as fd:
fd.write("keep me!")
with open("./keep_me/keep_me.txt", "w") as fd:
fd.write("keep me")
# for negation rule
with open("./keep_me/keep_me.db", "w") as fd:
fd.write("keep me")
# run cleanup with dry-run
result = runner.invoke(
export,
[
os.path.join(cwd, CLI_PHOTOS_DB),
".",
"-V",
"--update",
"--cleanup",
"--keep",
"keep_me/",
"--keep",
"keep_me.txt",
"--keep",
"*.db",
"--dry-run",
"--keep",
"!keep_me/keep_me.db",
],
)
assert "Deleted: 3 files, 1 directory" in result.output
assert pathlib.Path("./delete_me.txt").is_file()
assert pathlib.Path("./delete_me_dir/delete_me.txt").is_file()
assert pathlib.Path("./empty_dir").is_dir()
# run cleanup without dry-run
result = runner.invoke(
export,
[
os.path.join(cwd, CLI_PHOTOS_DB),
".",
"-V",
"--update",
"--cleanup",
"--keep",
"keep_me/",
"--keep",
"keep_me.txt",
"--keep",
"*.db",
"--keep",
"!keep_me/keep_me.db",
],
)
assert "Deleted: 3 files, 2 directories" in result.output
assert not pathlib.Path("./delete_me.txt").is_file()
assert not pathlib.Path("./delete_me_dir/delete_me_too.txt").is_file()
assert not pathlib.Path("./empty_dir").is_dir()
assert not pathlib.Path("./keep_me/keep_me.db").is_file()
assert pathlib.Path("./keep_me.txt").is_file()
assert pathlib.Path("./keep_me").is_dir()
assert pathlib.Path("./keep_me/keep_me.txt").is_file()
assert pathlib.Path("./keep_me/keep_me_2").is_dir()
assert pathlib.Path("./report.db").is_file()
def test_export_cleanup_exportdb_report():
"""test export with --cleanup flag results show in exportdb --report"""
@ -6682,6 +6773,159 @@ def test_export_cleanup_exportdb_report():
assert len(deleted_files) == 2
def test_export_cleanup_osxphotos_keep():
"""test export with --cleanup with a .osxphotos_keep file"""
runner = CliRunner()
cwd = os.getcwd()
# pylint: disable=not-context-manager
with runner.isolated_filesystem():
tmpdir = os.getcwd()
result = runner.invoke(export, [os.path.join(cwd, CLI_PHOTOS_DB), ".", "-V"])
assert result.exit_code == 0
# create file and a directory that should be deleted
os.mkdir("./empty_dir")
os.mkdir("./delete_me_dir")
with open("./delete_me.txt", "w") as fd:
fd.write("delete me!")
with open("./delete_me_dir/delete_me.txt", "w") as fd:
fd.write("delete me!")
# create files and directories that should be kept
os.mkdir("./keep_me")
os.mkdir("./keep_me/keep_me_2")
with open("./keep_me.txt", "w") as fd:
fd.write("keep me!")
with open("./report.db", "w") as fd:
fd.write("keep me!")
with open("./keep_me/keep_me.txt", "w") as fd:
fd.write("keep me")
with open(".osxphotos_keep", "w") as fd:
fd.write("/keep_me/\n")
fd.write("/keep_me.txt\n")
fd.write("/*.db\n")
# run cleanup with dry-run
result = runner.invoke(
export,
[
".",
"--library",
os.path.join(cwd, CLI_PHOTOS_DB),
"-V",
"--update",
"--cleanup",
"--dry-run",
],
)
assert "Deleted: 2 files, 1 directory" in result.output
assert pathlib.Path("./delete_me.txt").is_file()
assert pathlib.Path("./delete_me_dir/delete_me.txt").is_file()
assert pathlib.Path("./empty_dir").is_dir()
# run cleanup without dry-run
result = runner.invoke(
export,
[
".",
"--library",
os.path.join(cwd, CLI_PHOTOS_DB),
"-V",
"--update",
"--cleanup",
],
)
assert "Deleted: 2 files, 2 directories" in result.output
assert not pathlib.Path("./delete_me.txt").is_file()
assert not pathlib.Path("./delete_me_dir/delete_me_too.txt").is_file()
assert not pathlib.Path("./empty_dir").is_dir()
assert pathlib.Path("./keep_me.txt").is_file()
assert pathlib.Path("./keep_me").is_dir()
assert pathlib.Path("./keep_me/keep_me.txt").is_file()
assert pathlib.Path("./keep_me/keep_me_2").is_dir()
assert pathlib.Path("./report.db").is_file()
def test_export_cleanup_osxphotos_keep_keep():
"""test export with --cleanup with a .osxphotos_keep file and --keep"""
runner = CliRunner()
cwd = os.getcwd()
# pylint: disable=not-context-manager
with runner.isolated_filesystem():
tmpdir = os.getcwd()
result = runner.invoke(export, [os.path.join(cwd, CLI_PHOTOS_DB), ".", "-V"])
assert result.exit_code == 0
# create file and a directory that should be deleted
os.mkdir("./empty_dir")
os.mkdir("./delete_me_dir")
with open("./delete_me.txt", "w") as fd:
fd.write("delete me!")
with open("./delete_me_dir/delete_me.txt", "w") as fd:
fd.write("delete me!")
# create files and directories that should be kept
os.mkdir("./keep_me")
os.mkdir("./keep_me/keep_me_2")
with open("./keep_me.txt", "w") as fd:
fd.write("keep me!")
with open("./report.db", "w") as fd:
fd.write("keep me!")
with open("./keep_me/keep_me.txt", "w") as fd:
fd.write("keep me")
with open(".osxphotos_keep", "w") as fd:
fd.write("/keep_me/\n")
fd.write("/keep_me.txt\n")
# run cleanup with dry-run
result = runner.invoke(
export,
[
".",
"--library",
os.path.join(cwd, CLI_PHOTOS_DB),
"-V",
"--update",
"--cleanup",
"--dry-run",
"--keep",
"/*.db",
],
)
assert "Deleted: 2 files, 1 directory" in result.output
assert pathlib.Path("./delete_me.txt").is_file()
assert pathlib.Path("./delete_me_dir/delete_me.txt").is_file()
assert pathlib.Path("./empty_dir").is_dir()
# run cleanup without dry-run
result = runner.invoke(
export,
[
".",
"--library",
os.path.join(cwd, CLI_PHOTOS_DB),
"-V",
"--update",
"--cleanup",
"--keep",
"/*.db",
],
)
assert "Deleted: 2 files, 2 directories" in result.output
assert not pathlib.Path("./delete_me.txt").is_file()
assert not pathlib.Path("./delete_me_dir/delete_me_too.txt").is_file()
assert not pathlib.Path("./empty_dir").is_dir()
assert pathlib.Path("./keep_me.txt").is_file()
assert pathlib.Path("./keep_me").is_dir()
assert pathlib.Path("./keep_me/keep_me.txt").is_file()
assert pathlib.Path("./keep_me/keep_me_2").is_dir()
assert pathlib.Path("./report.db").is_file()
def test_save_load_config():
"""test --save-config, --load-config"""

View File

@ -0,0 +1,137 @@
import io
import itertools
import os
import stat
import tempfile
import unittest
import unittest.mock
import osxphotos.gitignorefile
class TestCache(unittest.TestCase):
def test_simple(self):
def normalize_path(path):
return os.path.abspath(path).replace(os.sep, "/")
class StatResult:
def __init__(self, is_file=False):
self.st_ino = id(self)
self.st_dev = 0
self.st_mode = stat.S_IFREG if is_file else stat.S_IFDIR
def isdir(self):
return self.st_mode == stat.S_IFDIR
def isfile(self):
return self.st_mode == stat.S_IFREG
class Stat:
def __init__(self, directories, files):
self.__filesystem = {}
for path in directories:
self.__filesystem[normalize_path(path)] = StatResult()
for path in files:
self.__filesystem[normalize_path(path)] = StatResult(True)
def __call__(self, path):
try:
return self.__filesystem[normalize_path(path)]
except KeyError:
raise FileNotFoundError()
for ignore_file_name in (".gitignore", ".mylovelytoolignore"):
with self.subTest(ignore_file_name=ignore_file_name):
my_stat = Stat(
[
"/home/vladimir/project/directory/subdirectory",
"/home/vladimir/project/directory",
"/home/vladimir/project",
"/home/vladimir",
"/home",
"/",
],
[
"/home/vladimir/project/directory/subdirectory/subdirectory/file.txt",
"/home/vladimir/project/directory/subdirectory/subdirectory/file2.txt",
"/home/vladimir/project/directory/subdirectory/subdirectory/file3.txt",
"/home/vladimir/project/directory/subdirectory/file.txt",
"/home/vladimir/project/directory/subdirectory/file2.txt",
"/home/vladimir/project/directory/%s" % ignore_file_name,
"/home/vladimir/project/directory/file.txt",
"/home/vladimir/project/directory/file2.txt",
"/home/vladimir/project/file.txt",
"/home/vladimir/project/%s" % ignore_file_name,
"/home/vladimir/file.txt",
],
)
def mock_open(path):
data = {
normalize_path(
"/home/vladimir/project/directory/%s" % ignore_file_name
): ["file.txt"],
normalize_path(
"/home/vladimir/project/%s" % ignore_file_name
): ["file2.txt"],
}
statistics["open"] += 1
try:
return io.StringIO("\n".join(data[normalize_path(path)]))
except KeyError:
raise FileNotFoundError()
def mock_isdir(path):
statistics["isdir"] += 1
try:
return my_stat(path).isdir()
except FileNotFoundError:
return False
def mock_isfile(path):
statistics["isfile"] += 1
try:
return my_stat(path).isfile()
except FileNotFoundError:
return False
data = {
"/home/vladimir/project/directory/subdirectory/file.txt": True,
"/home/vladimir/project/directory/subdirectory/file2.txt": True,
"/home/vladimir/project/directory/subdirectory/subdirectory/file.txt": True,
"/home/vladimir/project/directory/subdirectory/subdirectory/file2.txt": True,
"/home/vladimir/project/directory/subdirectory/subdirectory/file3.txt": False,
"/home/vladimir/project/directory/file.txt": True,
"/home/vladimir/project/directory/file2.txt": True,
"/home/vladimir/project/file.txt": False,
"/home/vladimir/file.txt": False, # No rules and no `isdir` calls for this file.
}
# 9! == 362880 combinations.
for permutation in itertools.islice(
itertools.permutations(data.items()), 0, None, 6 * 8
):
statistics = {"open": 0, "isdir": 0, "isfile": 0}
with unittest.mock.patch("builtins.open", mock_open):
with unittest.mock.patch("os.path.isdir", mock_isdir):
with unittest.mock.patch("os.path.isfile", mock_isfile):
matches = osxphotos.gitignorefile.Cache(
ignore_names=[ignore_file_name]
)
for path, expected in permutation:
self.assertEqual(matches(path), expected)
self.assertEqual(statistics["open"], 2)
self.assertEqual(statistics["isdir"], len(data) - 1)
self.assertEqual(statistics["isfile"], 7) # Unique path fragments.
def test_wrong_symlink(self):
with tempfile.TemporaryDirectory() as d:
matches = osxphotos.gitignorefile.Cache()
os.makedirs(f"{d}/.venv/bin")
os.symlink(f"/nonexistent-path-{id(self)}", f"{d}/.venv/bin/python")
self.assertFalse(matches(f"{d}/.venv/bin/python"))

View File

@ -0,0 +1,83 @@
import os
import shutil
import tempfile
import unittest
import unittest.mock
import osxphotos.gitignorefile
class TestIgnore(unittest.TestCase):
def test_robert_shutil_ignore_function(self):
with tempfile.TemporaryDirectory() as d:
for directory in [
"test__pycache__/excluded/excluded",
".test_venv",
"not_excluded/test__pycache__",
"not_excluded/excluded_not",
"not_excluded/excluded",
"not_excluded/not_excluded2",
]:
os.makedirs(f"{d}/example/{directory}")
for name in [
"test__pycache__/.test_gitignore",
"test__pycache__/excluded/excluded/excluded.txt",
"test__pycache__/excluded/excluded/test_inverse",
"test__pycache__/some_file.txt",
"test__pycache__/test",
".test_gitignore",
".test_venv/some_file.txt",
"not_excluded.txt",
"not_excluded/.test_gitignore",
"not_excluded/excluded_not/sub_excluded.txt",
"not_excluded/excluded/excluded.txt",
"not_excluded/not_excluded2.txt",
"not_excluded/not_excluded2/sub_excluded.txt",
"not_excluded/excluded_not.txt",
".test_gitignore_empty",
]:
with open(f"{d}/example/{name}", "w"):
pass
with open(f"{d}/example/.gitignore", "w") as f:
print("test__pycache__", file=f)
print("*.py[cod]", file=f)
print(".test_venv/", file=f)
print(".test_venv/**", file=f)
print(".test_venv/*", file=f)
print("!test_inverse", file=f)
result = []
shutil.copytree(
f"{d}/example", f"{d}/target", ignore=osxphotos.gitignorefile.ignore()
)
for root, directories, files in os.walk(f"{d}/target"):
for directory in directories:
result.append(os.path.join(root, directory))
for name in files:
result.append(os.path.join(root, name))
result = sorted(
(os.path.relpath(x, f"{d}/target").replace(os.sep, "/") for x in result)
)
self.assertEqual(
result,
[
".gitignore",
".test_gitignore",
".test_gitignore_empty",
"not_excluded",
"not_excluded.txt",
"not_excluded/.test_gitignore",
"not_excluded/excluded",
"not_excluded/excluded/excluded.txt",
"not_excluded/excluded_not",
"not_excluded/excluded_not.txt",
"not_excluded/excluded_not/sub_excluded.txt",
"not_excluded/not_excluded2",
"not_excluded/not_excluded2.txt",
"not_excluded/not_excluded2/sub_excluded.txt",
],
)

View File

@ -0,0 +1,38 @@
import os
import unittest
import osxphotos.gitignorefile
class TestIgnored(unittest.TestCase):
def test_simple(self):
for is_dir in (None, False, True):
with self.subTest(i=is_dir):
self.assertFalse(
osxphotos.gitignorefile.ignored(__file__, is_dir=is_dir)
)
if is_dir is not True:
self.assertTrue(
osxphotos.gitignorefile.ignored(
f"{os.path.dirname(__file__)}/__pycache__/some.pyc",
is_dir=is_dir,
)
)
self.assertFalse(
osxphotos.gitignorefile.ignored(
os.path.dirname(__file__), is_dir=is_dir
)
)
if is_dir is not False:
self.assertTrue(
osxphotos.gitignorefile.ignored(
f"{os.path.dirname(__file__)}/__pycache__", is_dir=is_dir
)
)
else:
# Note: this test will fail if your .gitignore file does not contain __pycache__/
self.assertFalse(
osxphotos.gitignorefile.ignored(
f"{os.path.dirname(__file__)}/__pycache__", is_dir=is_dir
)
)

View File

@ -0,0 +1,54 @@
""" Test match with non-string arguments. """
import io
import pathlib
import unittest
import unittest.mock
import osxphotos.gitignorefile
class TestMatchNonStr(unittest.TestCase):
"""Test match with non-string arguments."""
def test_simple_base_path(self):
"""Test non-str pathlike arguments for base_path"""
matches = self.__parse_gitignore_string(
["__pycache__/", "*.py[cod]"], mock_base_path=pathlib.Path("/home/michael")
)
for is_dir in (False, True):
with self.subTest(i=is_dir):
self.assertFalse(matches("/home/michael/main.py", is_dir=is_dir))
self.assertTrue(matches("/home/michael/main.pyc", is_dir=is_dir))
self.assertTrue(matches("/home/michael/dir/main.pyc", is_dir=is_dir))
self.assertFalse(matches("/home/michael/__pycache__", is_dir=False))
self.assertTrue(matches("/home/michael/__pycache__", is_dir=True))
def test_simple_matches(self):
"""Test non-str pathlike arguments for match"""
matches = self.__parse_gitignore_string(
["__pycache__/", "*.py[cod]"], mock_base_path=pathlib.Path("/home/michael")
)
for is_dir in (False, True):
with self.subTest(i=is_dir):
self.assertFalse(
matches(pathlib.Path("/home/michael/main.py"), is_dir=is_dir)
)
self.assertTrue(
matches(pathlib.Path("/home/michael/main.pyc"), is_dir=is_dir)
)
self.assertTrue(
matches(pathlib.Path("/home/michael/dir/main.pyc"), is_dir=is_dir)
)
self.assertFalse(
matches(pathlib.Path("/home/michael/__pycache__"), is_dir=False)
)
self.assertTrue(matches(pathlib.Path("/home/michael/__pycache__"), is_dir=True))
def __parse_gitignore_string(self, data, mock_base_path):
with unittest.mock.patch(
"builtins.open", lambda _: io.StringIO("\n".join(data))
):
return osxphotos.gitignorefile.parse(
f"{mock_base_path}/.gitignore", base_path=mock_base_path
)

File diff suppressed because it is too large Load Diff