Added lock files to export to minimize name collisions (#981)

This commit is contained in:
Rhet Turnbull 2023-02-11 18:21:15 -08:00 committed by GitHub
parent c4788c0fd2
commit ce297ced0a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 148 additions and 28 deletions

View File

@ -180,6 +180,9 @@ _MAX_IPTC_KEYWORD_LEN = 64
# If anyone has a keyword matching this, then too bad...
_OSXPHOTOS_NONE_SENTINEL = "OSXPhotosXYZZY42_Sentinel$"
# Lock file extension for reserving filenames when exporting
_OSXPHOTOS_LOCK_EXTENSION = ".osxphotos.lock"
class SearchCategory:
"""SearchInfo categories for Photos 5+; corresponds to categories in database/search/psi.sqlite:groups.category
@ -351,7 +354,7 @@ def search_category_factory(version: int) -> SearchCategory:
# Max filename length on MacOS
MAX_FILENAME_LEN = 255
MAX_FILENAME_LEN = 255 - len(_OSXPHOTOS_LOCK_EXTENSION)
# Max directory name length on MacOS
MAX_DIRNAME_LEN = 255

View File

@ -1,5 +1,6 @@
"""export command for osxphotos CLI"""
import atexit
import inspect
import os
import pathlib
@ -63,8 +64,10 @@ from osxphotos.utils import (
get_macos_version,
normalize_fs_path,
pluralize,
under_test,
)
from .cli_commands import logger
from .cli_params import (
DB_ARGUMENT,
DB_OPTION,
@ -1406,6 +1409,20 @@ def export(
else None
)
def cleanup_lock_files():
"""Cleanup lock files"""
if not under_test():
verbose("Cleaning up lock files")
if dry_run:
return
for lock_file in pathlib.Path(dest).rglob("*.osxphotos.lock"):
try:
lock_file.unlink()
except Exception as e:
logger.debug(f"Error removing lock file {lock_file}: {e}")
atexit.register(cleanup_lock_files)
photo_num = 0
num_exported = 0
limit_str = f" (limit = [num]{limit}[/num])" if limit else ""

View File

@ -50,6 +50,8 @@ from .utils import (
increment_filename_with_count,
lineno,
list_directory,
lock_filename,
unlock_filename,
)
__all__ = [
@ -490,6 +492,7 @@ class PhotoExporter:
f"Skipping missing {'edited' if options.edited else 'original'} photo {self._filename(self.photo.original_filename)} ({self._uuid(self.photo.uuid)})"
)
all_results.missing.append(dest)
unlock_filename(dest)
# copy live photo associated .mov if requested
if export_original and options.live_photo and self.photo.live_photo:
@ -565,7 +568,9 @@ class PhotoExporter:
preview_name = (
preview_name
if any([options.overwrite, options.update, options.force_update])
else pathlib.Path(increment_filename(preview_name))
else pathlib.Path(
increment_filename(preview_name, lock=not options.dry_run)
)
)
all_results += self._export_photo(
preview_path,
@ -625,11 +630,10 @@ class PhotoExporter:
if self.photo.path_edited:
ext = pathlib.Path(self.photo.path_edited).suffix
else:
uti = self.photo.uti_edited if self.photo.uti_edited else self.photo.uti
uti = self.photo.uti_edited or self.photo.uti
ext = get_preferred_uti_extension(uti)
ext = "." + ext
edited_filename = original_filename.stem + "_edited" + ext
return edited_filename
ext = f".{ext}"
return f"{original_filename.stem}_edited{ext}"
def _get_dest_path(
self, dest: pathlib.Path, options: ExportOptions
@ -644,6 +648,14 @@ class PhotoExporter:
new dest path (pathlib.Path)
"""
# lock files are used to minimize chance of name collision when in parallel mode
# don't create lock files if in dry_run mode
lock = not options.dry_run
def _lock_filename(filename):
"""Lock filename if not in dry_run mode"""
return lock_filename(filename) if lock else filename
# if overwrite==False and #increment==False, export should fail if file exists
if dest.exists() and not any(
[options.increment, options.update, options.force_update, options.overwrite]
@ -653,7 +665,7 @@ class PhotoExporter:
)
# if overwrite, we don't care if the file exists or not
if options.overwrite:
if options.overwrite and _lock_filename(dest):
return dest
# if not update or overwrite, check to see if file exists and if so, add (1), (2), etc
@ -665,19 +677,21 @@ class PhotoExporter:
if options.increment and not any(
[options.update, options.force_update, options.overwrite]
):
return pathlib.Path(increment_filename(dest))
return pathlib.Path(increment_filename(dest, lock=lock))
# if update and file exists, need to check to see if it's the right file by checking export db
if options.update or options.force_update:
export_db = options.export_db
dest_uuid = export_db.get_uuid_for_file(dest)
if dest_uuid is None and not dest.exists():
if dest_uuid is None and not dest.exists() and _lock_filename(dest):
# destination doesn't exist in export db and doesn't exist on disk
# so we can just use it
return dest
if dest_uuid == self.photo.uuid:
# destination is the right file
# will use it even if locked so don't check return value of _lock_filename
_lock_filename(dest)
return dest
# either dest_uuid is wrong or file exists and there's no associated UUID, so find a name that matches
@ -686,18 +700,20 @@ class PhotoExporter:
# first, find all matching files in export db and see if there's a match
if dest_target := export_db.get_target_for_file(self.photo.uuid, dest):
# there's a match so use that
_lock_filename(dest_target)
return pathlib.Path(dest_target)
# no match so need to create a new name
# increment the destination file until we find one that doesn't exist and doesn't match another uuid in the database
count = 0
dest, count = increment_filename_with_count(dest, count)
dest, count = increment_filename_with_count(dest, count, lock=lock)
count += 1
while export_db.get_uuid_for_file(dest) is not None:
dest, count = increment_filename_with_count(dest, count)
dest, count = increment_filename_with_count(dest, count, lock=lock)
return pathlib.Path(dest)
# fail safe...I can't think of a case that gets here
_lock_filename(dest)
return dest
def _should_update_photo(
@ -1216,6 +1232,9 @@ class PhotoExporter:
"exiftool_warning": exif_results.exiftool_warning,
}
# clean up lock file
unlock_filename(dest_str)
return results
def _export_photo_uuid_applescript(

View File

@ -39,6 +39,7 @@ __all__ = [
"list_directory",
"list_photo_libraries",
"load_function",
"lock_filename",
"noop",
"normalize_fs_path",
"normalize_unicode",
@ -367,7 +368,7 @@ def normalize_unicode(value):
def increment_filename_with_count(
filepath: Union[str, pathlib.Path], count: int = 0
filepath: Union[str, pathlib.Path], count: int = 0, lock: bool = False
) -> Tuple[str, int]:
"""Return filename (1).ext, etc if filename.ext exists
@ -377,6 +378,7 @@ def increment_filename_with_count(
Args:
filepath: str or pathlib.Path; full path, including file name
count: int; starting increment value
lock: bool, if True, creates lock file to reserve filename
Returns:
tuple of new filepath (or same if not incremented), count
@ -393,10 +395,13 @@ def increment_filename_with_count(
count += 1
dest_new = normalize_fs_path(f"{dest.stem} ({count})")
dest = dest.parent / f"{dest_new}{dest.suffix}"
if lock and not lock_filename(dest):
# if lock fails, increment count and try again
return increment_filename_with_count(filepath, count + 1, lock=lock)
return normalize_fs_path(str(dest)), count
def increment_filename(filepath: Union[str, pathlib.Path]) -> str:
def increment_filename(filepath: Union[str, pathlib.Path], lock: bool = False) -> str:
"""Return filename (1).ext, etc if filename.ext exists
If file exists in filename's parent folder with same stem as filename,
@ -405,16 +410,47 @@ def increment_filename(filepath: Union[str, pathlib.Path]) -> str:
Args:
filepath: str or pathlib.Path; full path, including file name
force: force the file count to increment by at least 1 even if filepath doesn't exist
lock: bool, if True, creates lock file to reserve filename
Returns:
new filepath (or same if not incremented)
Note: This obviously is subject to race condition so using with caution.
"""
new_filepath, _ = increment_filename_with_count(filepath)
new_filepath, _ = increment_filename_with_count(filepath, lock=lock)
return new_filepath
def lock_filename(filepath: Union[str, pathlib.Path]) -> bool:
"""Create empty lock file to reserve file.
Lock file will have name of filepath with .osxphotos.lock extension.
Args:
filepath: str or pathlib.Path; full path, including file name
Returns:
filepath if lock file created, False if lock file already exists
"""
lockfile = pathlib.Path(f"{filepath}.osxphotos.lock")
if lockfile.exists():
return False
lockfile.touch()
return filepath
def unlock_filename(filepath: Union[str, pathlib.Path]):
"""Remove lock file created by lock_filename()
Args:
filepath: str or pathlib.Path; full path, including file name
"""
lockfile = pathlib.Path(f"{filepath}.osxphotos.lock")
if lockfile.exists():
lockfile.unlink()
def extract_increment_count_from_filename(filepath: Union[str, pathlib.Path]) -> int:
"""Extract a count from end of file name if it exists or 0 if not; count takes forms file (1).ext, file (2).ext, etc."""
filepath = str(filepath)
@ -501,3 +537,7 @@ def uuid_to_shortuuid(uuid: str) -> str:
def shortuuid_to_uuid(short_uuid: str) -> str:
"""Convert shortuuid to uuid"""
return str(shortuuid.decode(short_uuid)).upper()
def under_test() -> bool:
"""Return True if running under pytest"""
return "pytest" in sys.modules

View File

@ -9,8 +9,8 @@ import os.path
import pathlib
import re
import shutil
import subprocess
import sqlite3
import subprocess
import tempfile
import time
from tempfile import TemporaryDirectory
@ -498,10 +498,9 @@ CLI_EXPORTED_FILENAME_TEMPLATE_FILENAMES_KEYWORD_PATHSEP = [
]
CLI_EXPORTED_FILENAME_TEMPLATE_LONG_DESCRIPTION = [
"Lorem ipsum dolor sit amet, consectetuer adipiscing elit. "
"Aenean commodo ligula eget dolor. Aenean massa. "
"Cum sociis natoque penatibus et magnis dis parturient montes, "
"nascetur ridiculus mus. Donec quam felis, ultricies nec, "
"Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo"
" ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis "
"dis parturient montes, nascetu. Donec quam felis, ultricies nec, "
"pellentesque eu, pretium q.tif"
]
@ -4008,6 +4007,7 @@ def test_export_filename_template_long_description():
],
)
assert result.exit_code == 0
files = glob.glob("*.*")
for fname in CLI_EXPORTED_FILENAME_TEMPLATE_LONG_DESCRIPTION:
assert pathlib.Path(fname).is_file()

View File

@ -1,8 +1,10 @@
""" Test path_utils.py """
from osxphotos._constants import _OSXPHOTOS_LOCK_EXTENSION, MAX_FILENAME_LEN
from osxphotos.path_utils import sanitize_filename
def test_sanitize_filename():
from osxphotos.path_utils import sanitize_filename
from osxphotos._constants import MAX_FILENAME_LEN
# basic sanitize
filenames = {
@ -30,30 +32,33 @@ def test_sanitize_filename():
filename = "foo" + "x" * 512
new_filename = sanitize_filename(filename)
assert len(new_filename) == MAX_FILENAME_LEN
assert new_filename == "foo" + "x" * 252
assert new_filename == "foo" + "x" * (252 - len(_OSXPHOTOS_LOCK_EXTENSION))
# filename too long with extension
filename = "x" * 512 + ".jpeg"
new_filename = sanitize_filename(filename)
assert len(new_filename) == MAX_FILENAME_LEN
assert new_filename == "x" * 250 + ".jpeg"
assert new_filename == "x" * (250 - len(_OSXPHOTOS_LOCK_EXTENSION)) + ".jpeg"
# more than one extension
filename = "foo.bar" + "x" * 255 + ".foo.bar.jpeg"
new_filename = sanitize_filename(filename)
assert len(new_filename) == MAX_FILENAME_LEN
assert new_filename == "foo.bar" + "x" * 243 + ".jpeg"
assert (
new_filename
== "foo.bar" + "x" * (243 - len(_OSXPHOTOS_LOCK_EXTENSION)) + ".jpeg"
)
# shorter than drop count
filename = "foo." + "x" * 256
new_filename = sanitize_filename(filename)
assert len(new_filename) == MAX_FILENAME_LEN
assert new_filename == "foo." + "x" * 251
assert new_filename == "foo." + "x" * (251 - len(_OSXPHOTOS_LOCK_EXTENSION))
def test_sanitize_dirname():
from osxphotos.path_utils import sanitize_dirname
from osxphotos._constants import MAX_DIRNAME_LEN
from osxphotos.path_utils import sanitize_dirname
# basic sanitize
dirnames = {
@ -83,9 +88,10 @@ def test_sanitize_dirname():
assert len(new_dirname) == MAX_DIRNAME_LEN
assert new_dirname == "foo" + "x" * 252
def test_sanitize_pathpart():
from osxphotos.path_utils import sanitize_pathpart
from osxphotos._constants import MAX_DIRNAME_LEN
from osxphotos.path_utils import sanitize_pathpart
# basic sanitize
dirnames = {
@ -114,4 +120,3 @@ def test_sanitize_pathpart():
new_dirname = sanitize_pathpart(dirname)
assert len(new_dirname) == MAX_DIRNAME_LEN
assert new_dirname == "foo" + "x" * 252

View File

@ -125,6 +125,42 @@ def test_increment_filename():
)
def test_increment_filename_with_lock():
# test that increment_filename works with lock=True
with tempfile.TemporaryDirectory(prefix="osxphotos_") as temp_dir:
temp_dir = pathlib.Path(temp_dir)
filename = str(temp_dir / "file.jpg")
assert increment_filename(filename, lock=True) == str(temp_dir / "file.jpg")
new_file = temp_dir / "file.jpg"
new_file.touch()
assert increment_filename(filename, lock=True) == str(temp_dir / "file (1).jpg")
# test increment_filename_with_count
filename = str(temp_dir / "file2.jpg")
assert increment_filename_with_count(filename, count=2, lock=True) == (
str(temp_dir / "file2 (2).jpg"),
2,
)
new_file = temp_dir / "file2 (2).jpg"
new_file.touch()
assert increment_filename_with_count(filename, count=2, lock=True) == (
str(temp_dir / "file2 (3).jpg"),
3,
)
def test_increment_filename_with_lock_exists():
# test that increment_filename works with lock=True when lock file already exists
with tempfile.TemporaryDirectory(prefix="osxphotos_") as temp_dir:
temp_dir = pathlib.Path(temp_dir)
filename = str(temp_dir / "file.jpg")
pathlib.Path(f"{filename}.osxphotos.lock").touch()
assert increment_filename(filename, lock=True) == str(temp_dir / "file (1).jpg")
def test_shortuuid_uuid():
"""Test shortuuid_to_uuid and uuid_to_shortuuid"""
uuid = "5CF8D91E-DCEB-4CC3-BFF7-920B05564EB0"