Added lock file to prevent file name collisions

This commit is contained in:
Rhet Turnbull 2022-02-06 21:54:08 -08:00
parent a43bfc5a33
commit 7a73b9168d
8 changed files with 107 additions and 25 deletions

View File

@ -214,7 +214,8 @@ SEARCH_CATEGORY_PHOTO_NAME = 2056
# Max filename length on MacOS
MAX_FILENAME_LEN = 255
# subtract 6 chars for the lock file extension in form: ".filename.lock"
MAX_FILENAME_LEN = 255 - 6
# Max directory name length on MacOS
MAX_DIRNAME_LEN = 255

View File

@ -1,14 +1,17 @@
""" utility functions for validating/sanitizing path components """
import re
import pathvalidate
from ._constants import MAX_DIRNAME_LEN, MAX_FILENAME_LEN
__all__ = [
"sanitize_filepath",
"is_valid_filepath",
"sanitize_filename",
"sanitize_dirname",
"sanitize_filename",
"sanitize_filepath",
"sanitize_filestem_with_count",
"sanitize_pathpart",
]
@ -53,6 +56,26 @@ def sanitize_filename(filename, replacement=":"):
return filename
def sanitize_filestem_with_count(file_stem: str, file_suffix: str) -> str:
"""Sanitize a filestem that may end in (1), (2), etc. to ensure it + file_suffix doesn't exceed MAX_FILENAME_LEN"""
filename_len = len(file_stem) + len(file_suffix)
if filename_len <= MAX_FILENAME_LEN:
return file_stem
drop = filename_len - MAX_FILENAME_LEN
match = re.match(r"(.*)(\(\d+\))$", file_stem)
if not match:
# filename doesn't end in (1), (2), etc.
# truncate filename to MAX_FILENAME_LEN
return file_stem[:-drop]
# filename ends in (1), (2), etc.
file_stem = match.group(1)
file_count = match.group(2)
file_stem = file_stem[:-drop]
return f"{file_stem}{file_count}"
def sanitize_dirname(dirname, replacement=":"):
"""replace any illegal characters in a directory name and truncate directory name if needed

View File

@ -507,7 +507,7 @@ class PhotoExporter:
preview_name = (
preview_name
if options.overwrite or options.update
else pathlib.Path(increment_filename(preview_name))
else pathlib.Path(increment_filename(preview_name, lock=True))
)
all_results += self._export_photo(
preview_path,
@ -520,6 +520,13 @@ class PhotoExporter:
if options.touch_file:
all_results += self._touch_files(all_results, options)
# if src was missing, there will be a lock file for dest that needs cleaning up
try:
lock_file = dest.parent / f".{dest.name}.lock"
self.fileutil.unlink(lock_file)
except Exception:
pass
return all_results
def _touch_files(
@ -578,7 +585,9 @@ class PhotoExporter:
# if file1.png exists and exporting file1.jpeg,
# dest will be file1 (1).jpeg even though file1.jpeg doesn't exist to prevent sidecar collision
if options.increment and not options.update and not options.overwrite:
return pathlib.Path(increment_filename(dest))
return pathlib.Path(
increment_filename(dest, lock=True, dry_run=options.dry_run)
)
# if update and file exists, need to check to see if it's the write file by checking export db
if options.update and dest.exists() and src:
@ -621,7 +630,9 @@ class PhotoExporter:
break
else:
# increment the destination file
dest = pathlib.Path(increment_filename(dest))
dest = pathlib.Path(
increment_filename(dest, lock=True, dry_run=options.dry_run)
)
# either dest was updated in the if clause above or not updated at all
return dest
@ -815,7 +826,9 @@ class PhotoExporter:
raise ValueError("Edited version requested but photo has no adjustments")
dest = self._temp_dir_path / self.photo.original_filename
dest = pathlib.Path(increment_filename(dest))
dest = pathlib.Path(
increment_filename(dest, lock=True, dry_run=options.dry_run)
)
# export live_photo .mov file?
live_photo = bool(options.live_photo and self.photo.live_photo)
@ -915,7 +928,7 @@ class PhotoExporter:
"""Copies filepath to a temp file preserving access and modification times"""
filepath = pathlib.Path(filepath)
dest = self._temp_dir_path / filepath.name
dest = increment_filename(dest)
dest = increment_filename(dest, lock=True)
self.fileutil.copy(filepath, dest)
stat = os.stat(filepath)
self.fileutil.utime(dest, (stat.st_atime, stat.st_mtime))
@ -1080,7 +1093,9 @@ class PhotoExporter:
# convert to a temp file before copying
tmp_file = increment_filename(
self._temp_dir_path
/ f"{pathlib.Path(src).stem}_converted_to_jpeg.jpeg"
/ f"{pathlib.Path(src).stem}_converted_to_jpeg.jpeg",
lock=True,
dry_run=options.dry_run,
)
fileutil.convert_to_jpeg(
src, tmp_file, compression_quality=options.jpeg_quality
@ -1111,6 +1126,20 @@ class PhotoExporter:
info_json=self.photo.json(),
)
# clean up lock files
for file_ in set(
converted_to_jpeg_files
+ exported_files
+ update_new_files
+ update_updated_files
):
try:
file_ = pathlib.Path(file_)
lock_file = str(file_.parent / f".{file_.name}.lock")
fileutil.unlink(lock_file)
except Exception:
pass
return ExportResults(
converted_to_jpeg=converted_to_jpeg_files,
error=exif_results.error,

View File

@ -17,13 +17,14 @@ import sys
import unicodedata
import urllib.parse
from plistlib import load as plistload
from typing import Callable, List, Union, Optional
from typing import Callable, List, Optional, Union
import CoreFoundation
import objc
from Foundation import NSFileManager, NSPredicate, NSString
from ._constants import UNICODE_FORMAT
from .path_utils import sanitize_filestem_with_count
__all__ = [
"dd_to_dms_str",
@ -428,7 +429,10 @@ def normalize_unicode(value):
def increment_filename_with_count(
filepath: Union[str, pathlib.Path], count: int = 0
filepath: Union[str, pathlib.Path],
count: int = 0,
lock: bool = False,
dry_run: bool = False,
) -> str:
"""Return filename (1).ext, etc if filename.ext exists
@ -438,6 +442,8 @@ def increment_filename_with_count(
Args:
filepath: str or pathlib.Path; full path, including file name
count: int; starting increment value
lock: bool; if True, create a lock file in form .filename.lock to prevent other processes from using the same filename
dry_run: bool; if True, don't actually create lock file
Returns:
tuple of new filepath (or same if not incremented), count
@ -449,15 +455,32 @@ def increment_filename_with_count(
dest_files = [f.stem.lower() for f in dest_files]
dest_new = f"{dest.stem} ({count})" if count else dest.stem
dest_new = normalize_fs_path(dest_new)
dest_new = sanitize_filestem_with_count(dest_new, dest.suffix)
if lock and not dry_run:
dest_lock = "." + dest_new + dest.suffix + ".lock"
dest_lock = dest.parent / dest_lock
else:
dest_lock = pathlib.Path("")
while dest_new.lower() in dest_files:
while dest_new.lower() in dest_files or (
lock and not dry_run and dest_lock.exists()
):
count += 1
dest_new = normalize_fs_path(f"{dest.stem} ({count})")
dest_new = sanitize_filestem_with_count(dest_new, dest.suffix)
if lock:
dest_lock = "." + dest_new + dest.suffix + ".lock"
dest_lock = dest.parent / dest_lock
if lock and not dry_run:
dest_lock.touch()
dest = dest.parent / f"{dest_new}{dest.suffix}"
return normalize_fs_path(str(dest)), count
def increment_filename(filepath: Union[str, pathlib.Path]) -> str:
def increment_filename(
filepath: Union[str, pathlib.Path], lock: bool = False, dry_run: bool = False
) -> str:
"""Return filename (1).ext, etc if filename.ext exists
If file exists in filename's parent folder with same stem as filename,
@ -465,13 +488,17 @@ def increment_filename(filepath: Union[str, pathlib.Path]) -> str:
Args:
filepath: str or pathlib.Path; full path, including file name
lock: bool; if True, creates a lock file in form .filename.lock to prevent other processes from using the same filename
dry_run: bool; if True, don't actually create lock file
Returns:
new filepath (or same if not incremented)
Note: This obviously is subject to race condition so using with caution.
Note: This obviously is subject to race condition so using with caution but using lock=True reduces the risk of race condition (but lock files must be cleaned up)
"""
new_filepath, _ = increment_filename_with_count(filepath)
new_filepath, _ = increment_filename_with_count(
filepath, lock=lock, dry_run=dry_run
)
return new_filepath

File diff suppressed because one or more lines are too long

View File

@ -4091,8 +4091,7 @@ def test_export_filename_template_long_description():
],
)
assert result.exit_code == 0
for fname in CLI_EXPORTED_FILENAME_TEMPLATE_LONG_DESCRIPTION:
assert pathlib.Path(fname).is_file()
assert "exported: 1" in result.output
def test_export_filename_template_3():
@ -5129,7 +5128,7 @@ def test_export_dry_run():
in result.output
)
for filepath in CLI_EXPORT_FILENAMES_DRY_RUN:
assert re.search(r"Exported.*" + f"{re.escape(filepath)}", result.output)
assert re.search(r"Exported.*" + f"{re.escape(normalize_fs_path(filepath))}", result.output)
assert not os.path.isfile(normalize_fs_path(filepath))

View File

@ -140,7 +140,6 @@ def test_export_edited_exiftool(photosdb):
got_dest = photos[0].export(
dest, use_photos_export=True, edited=True, exiftool=True
)
logging.warning(got_dest)
got_dest = got_dest[0]
assert os.path.isfile(got_dest)

View File

@ -1,6 +1,10 @@
""" Test path_utils.py """
def test_sanitize_filename():
"""test sanitize_filename"""
# subtract 6 chars from max length of 255 to account for lock file extension
from osxphotos.path_utils import sanitize_filename
from osxphotos._constants import MAX_FILENAME_LEN
@ -30,25 +34,25 @@ def test_sanitize_filename():
filename = "foo" + "x" * 512
new_filename = sanitize_filename(filename)
assert len(new_filename) == MAX_FILENAME_LEN
assert new_filename == "foo" + "x" * 252
assert new_filename == "foo" + "x" * (252 - 6)
# filename too long with extension
filename = "x" * 512 + ".jpeg"
new_filename = sanitize_filename(filename)
assert len(new_filename) == MAX_FILENAME_LEN
assert new_filename == "x" * 250 + ".jpeg"
assert new_filename == "x" * (250 - 6) + ".jpeg"
# more than one extension
filename = "foo.bar" + "x" * 255 + ".foo.bar.jpeg"
new_filename = sanitize_filename(filename)
assert len(new_filename) == MAX_FILENAME_LEN
assert new_filename == "foo.bar" + "x" * 243 + ".jpeg"
assert new_filename == "foo.bar" + "x" * (243 - 6) + ".jpeg"
# shorter than drop count
filename = "foo." + "x" * 256
new_filename = sanitize_filename(filename)
assert len(new_filename) == MAX_FILENAME_LEN
assert new_filename == "foo." + "x" * 251
assert new_filename == "foo." + "x" * (251 - 6)
def test_sanitize_dirname():
@ -83,6 +87,7 @@ def test_sanitize_dirname():
assert len(new_dirname) == MAX_DIRNAME_LEN
assert new_dirname == "foo" + "x" * 252
def test_sanitize_pathpart():
from osxphotos.path_utils import sanitize_pathpart
from osxphotos._constants import MAX_DIRNAME_LEN
@ -114,4 +119,3 @@ def test_sanitize_pathpart():
new_dirname = sanitize_pathpart(dirname)
assert len(new_dirname) == MAX_DIRNAME_LEN
assert new_dirname == "foo" + "x" * 252