Fix for filenames with special characters, #561, #618

This commit is contained in:
Rhet Turnbull 2022-02-03 22:46:11 -08:00
parent e32090bf39
commit f3063d35be
6 changed files with 165 additions and 66 deletions

View File

@ -1,3 +1,3 @@
""" version info """
__version__ = "0.45.3"
__version__ = "0.45.4"

View File

@ -45,7 +45,7 @@ from .photokit import (
)
from .phototemplate import RenderOptions
from .uti import get_preferred_uti_extension
from .utils import increment_filename, increment_filename_with_count, lineno
from .utils import increment_filename, lineno, list_directory
__all__ = [
"ExportError",
@ -598,9 +598,13 @@ class PhotoExporter:
)
if dest_uuid != self.photo.uuid:
# not the right file, find the right one
glob_str = str(dest.parent / f"{dest.stem} (*{dest.suffix}")
# TODO: use the normalized code in utils
dest_files = glob.glob(glob_str)
# find files that match "dest_name (*.ext" (e.g. "dest_name (1).jpg", "dest_name (2).jpg)", ...)
dest_files = list_directory(
dest.parent,
startswith=f"{dest.stem} (",
endswith=dest.suffix,
include_path=True,
)
for file_ in dest_files:
dest_uuid = export_db.get_uuid_for_file(file_)
if dest_uuid == self.photo.uuid:
@ -1828,7 +1832,7 @@ def _export_photo_uuid_applescript(
raise ValueError(f"dest {dest} must be a directory")
if not original ^ edited:
raise ValueError(f"edited or original must be True but not both")
raise ValueError("edited or original must be True but not both")
tmpdir = tempfile.TemporaryDirectory(prefix="osxphotos_")
@ -1851,7 +1855,6 @@ def _export_photo_uuid_applescript(
if not exported_files or not filename:
# nothing got exported
raise ExportError(f"Could not export photo {uuid} ({lineno(__file__)})")
# need to find actual filename as sometimes Photos renames JPG to jpeg on export
# may be more than one file exported (e.g. if Live Photo, Photos exports both .jpeg and .mov)
# TemporaryDirectory will cleanup on return

View File

@ -54,7 +54,7 @@ from .scoreinfo import ScoreInfo
from .searchinfo import SearchInfo
from .text_detection import detect_text
from .uti import get_preferred_uti_extension, get_uti_for_extension
from .utils import _debug, _get_resource_loc, findfiles
from .utils import _debug, _get_resource_loc, list_directory
__all__ = ["PhotoInfo", "PhotoInfoNone"]
@ -369,7 +369,7 @@ class PhotoInfo:
# In Photos 5, raw is in same folder as original but with _4.ext
# Unless "Copy Items to the Photos Library" is not checked
# then RAW image is not renamed but has same name is jpeg buth with raw extension
# Current implementation uses findfiles to find images with the correct raw UTI extension
# Current implementation finds images with the correct raw UTI extension
# in same folder as the original and with same stem as original in form: original_stem*.raw_ext
# TODO: I don't like this -- would prefer a more deterministic approach but until I have more
# data on how Photos stores and retrieves RAW images, this seems to be working
@ -405,8 +405,7 @@ class PhotoInfo:
# raw files have same name as original but with _4.raw_ext appended
# I believe the _4 maps to PHAssetResourceTypeAlternatePhoto = 4
# see: https://developer.apple.com/documentation/photokit/phassetresourcetype/phassetresourcetypealternatephoto?language=objc
glob_str = f"{filestem}_4*"
raw_file = findfiles(glob_str, filepath)
raw_file = list_directory(filepath, startswith=f"{filestem}_4")
if not raw_file:
photopath = None
else:

View File

@ -17,7 +17,7 @@ import sys
import unicodedata
import urllib.parse
from plistlib import load as plistload
from typing import Callable, List, Union
from typing import Callable, List, Union, Optional
import CoreFoundation
import objc
@ -28,7 +28,6 @@ from ._constants import UNICODE_FORMAT
__all__ = [
"dd_to_dms_str",
"expand_and_validate_filepath",
"findfiles",
"get_last_library_path",
"get_system_library_path",
"increment_filename_with_count",
@ -266,7 +265,9 @@ def list_photo_libraries():
# On older MacOS versions, mdfind appears to ignore some libraries
# glob to find libraries in ~/Pictures then mdfind to find all the others
# TODO: make this more robust
lib_list = glob.glob(f"{pathlib.Path.home()}/Pictures/*.photoslibrary")
lib_list = list_directory(
f"{pathlib.Path.home()}/Pictures/", glob="*.photoslibrary"
)
# On older OS, may not get all libraries so make sure we get the last one
last_lib = get_last_library_path()
@ -290,27 +291,90 @@ def normalize_fs_path(path: str) -> str:
return unicodedata.normalize("NFD", path)
def findfiles(pattern, path):
"""Returns list of filenames from path matched by pattern
shell pattern. Matching is case-insensitive.
If 'path_' is invalid/doesn't exist, returns []."""
if not os.path.isdir(path):
# def findfiles(pattern, path):
# """Returns list of filenames from path matched by pattern
# shell pattern. Matching is case-insensitive.
# If 'path_' is invalid/doesn't exist, returns []."""
# if not os.path.isdir(path):
# return []
# # paths need to be normalized for unicode as filesystem returns unicode in NFD form
# pattern = normalize_fs_path(pattern)
# rule = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
# files = os.listdir(path)
# return [name for name in files if rule.match(name)]
def list_directory(
directory: Union[str, pathlib.Path],
startswith: Optional[str] = None,
endswith: Optional[str] = None,
contains: Optional[str] = None,
glob: Optional[str] = None,
include_path: bool = False,
case_sensitive: bool = False,
) -> List[Union[str, pathlib.Path]]:
"""List directory contents and return list of files or directories matching search criteria.
Accounts for case-insensitive filesystems, unicode filenames. directory can be a str or a pathlib.Path object.
Args:
directory: directory to search
startswith: string to match at start of filename
endswith: string to match at end of filename
contains: string to match anywhere in filename
glob: shell-style glob pattern to match filename
include_path: if True, return full path to file
case_sensitive: if True, match case-sensitively
Returns: List of files or directories matching search criteria as either str or pathlib.Path objects depending on the input type;
returns empty list if directory is invalid or doesn't exist.
"""
is_pathlib = isinstance(directory, pathlib.Path)
if is_pathlib:
directory = str(directory)
if not os.path.isdir(directory):
return []
# paths need to be normalized for unicode as filesystem returns unicode in NFD form
pattern = normalize_fs_path(pattern)
rule = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
files = os.listdir(path)
return [name for name in files if rule.match(name)]
startswith = normalize_fs_path(startswith) if startswith else None
endswith = normalize_fs_path(endswith) if endswith else None
contains = normalize_fs_path(contains) if contains else None
glob = normalize_fs_path(glob) if glob else None
files = [normalize_fs_path(f) for f in os.listdir(directory)]
if not case_sensitive:
files_normalized = {f.lower(): f for f in files}
files = [f.lower() for f in files]
startswith = startswith.lower() if startswith else None
endswith = endswith.lower() if endswith else None
contains = contains.lower() if contains else None
glob = glob.lower() if glob else None
else:
files_normalized = {f: f for f in files}
def list_directory_startswith(directory_path: str, startswith: str) -> List[str]:
"""List directory contents and return list of files starting with startswith; returns [] if directory doesn't exist"""
if not os.path.isdir(directory_path):
return []
startswith = normalize_fs_path(startswith)
files = [normalize_fs_path(f) for f in os.listdir(directory_path)]
return [f for f in files if f.startswith(startswith)]
if startswith:
files = [f for f in files if f.startswith(startswith)]
if endswith:
endswith = normalize_fs_path(endswith)
files = [f for f in files if f.endswith(endswith)]
if contains:
contains = normalize_fs_path(contains)
files = [f for f in files if contains in f]
if glob:
glob = normalize_fs_path(glob)
flags = re.IGNORECASE if not case_sensitive else 0
rule = re.compile(fnmatch.translate(glob), flags)
files = [f for f in files if rule.match(f)]
files = [files_normalized[f] for f in files]
if include_path:
files = [os.path.join(directory, f) for f in files]
if is_pathlib:
files = [pathlib.Path(f) for f in files]
return files
def _open_sql_file(dbname):
@ -381,8 +445,8 @@ def increment_filename_with_count(
Note: This obviously is subject to race condition so using with caution.
"""
dest = filepath if isinstance(filepath, pathlib.Path) else pathlib.Path(filepath)
dest_files = list_directory_startswith(str(dest.parent), dest.stem)
dest_files = [pathlib.Path(f).stem.lower() for f in dest_files]
dest_files = list_directory(dest.parent, startswith=dest.stem)
dest_files = [f.stem.lower() for f in dest_files]
dest_new = f"{dest.stem} ({count})" if count else dest.stem
dest_new = normalize_fs_path(dest_new)

View File

@ -4700,7 +4700,14 @@ def test_export_live_edited():
# basic export
result = runner.invoke(
export,
[os.path.join(cwd, PHOTOS_DB_RHET), ".", "-V", "--uuid", UUID_LIVE_EDITED],
[
os.path.join(cwd, PHOTOS_DB_RHET),
".",
"-V",
"--uuid",
UUID_LIVE_EDITED,
"--download-missing",
],
)
assert result.exit_code == 0
files = glob.glob("*")

View File

@ -1,27 +1,33 @@
import logging
import os.path
import pathlib
import tempfile
import pytest
import osxphotos
DB_LOCKED_10_12 = "./tests/Test-Lock-10_12.photoslibrary/database/photos.db"
DB_LOCKED_10_15 = "./tests/Test-Lock-10_15_1.photoslibrary/database/Photos.sqlite"
DB_UNLOCKED_10_15 = "./tests/Test-10.15.1.photoslibrary/database/photos.db"
UTI_DICT = {"public.jpeg": "jpeg", "com.canon.cr2-raw-image": "cr2"}
from osxphotos.utils import (
_dd_to_dms,
increment_filename,
increment_filename_with_count,
list_directory,
)
def test_debug_enable():
import logging
import osxphotos
osxphotos._set_debug(True)
logger = osxphotos._get_logger()
assert logger.isEnabledFor(logging.DEBUG)
def test_debug_disable():
import logging
import osxphotos
osxphotos._set_debug(False)
logger = osxphotos._get_logger()
assert not logger.isEnabledFor(logging.DEBUG)
@ -29,14 +35,12 @@ def test_debug_disable():
def test_dd_to_dms():
# expands coverage for edge case in _dd_to_dms
from osxphotos.utils import _dd_to_dms
assert _dd_to_dms(-0.001) == (0, 0, -3.6)
@pytest.mark.skip(reason="Fails on some machines")
def test_get_system_library_path():
import osxphotos
_, major, _ = osxphotos.utils._get_os_version()
if int(major) < 15:
@ -46,51 +50,73 @@ def test_get_system_library_path():
def test_db_is_locked_locked():
import osxphotos
assert osxphotos.utils._db_is_locked(DB_LOCKED_10_12)
assert osxphotos.utils._db_is_locked(DB_LOCKED_10_15)
def test_db_is_locked_unlocked():
import osxphotos
assert not osxphotos.utils._db_is_locked(DB_UNLOCKED_10_15)
def test_findfiles():
import os.path
import tempfile
from osxphotos.utils import findfiles
def test_list_directory():
"""test list_directory"""
temp_dir = tempfile.TemporaryDirectory(prefix="osxphotos_")
fd = open(os.path.join(temp_dir.name, "file1.jpg"), "w+")
fd.close
fd = open(os.path.join(temp_dir.name, "file2.JPG"), "w+")
fd.close
files = findfiles("*.jpg", temp_dir.name)
temp_dir_name = pathlib.Path(temp_dir.name)
file1 = (temp_dir_name / "file1.jpg").touch()
file2 = (temp_dir_name / "File2.JPG").touch()
file3 = (temp_dir_name / "File.png").touch()
file4 = (temp_dir_name / "document.pdf").touch()
files = list_directory(temp_dir.name, glob="*.jpg")
assert len(files) == 2
assert "file1.jpg" in files
assert "file2.JPG" in files
assert "File2.JPG" in files
assert isinstance(files[0], str)
files = list_directory(temp_dir.name, glob="*.jpg", case_sensitive=True)
assert len(files) == 1
assert "file1.jpg" in files
files = list_directory(temp_dir.name, startswith="file")
assert len(files) == 3
files = list_directory(temp_dir.name, endswith="jpg")
assert len(files) == 2
files = list_directory(temp_dir.name, contains="doc")
assert len(files) == 1
assert "document.pdf" in files
files = list_directory(temp_dir.name, startswith="File", case_sensitive=True)
assert len(files) == 2
files = list_directory(temp_dir.name, startswith="File", case_sensitive=False)
assert len(files) == 3
files = list_directory(temp_dir.name, startswith="document", include_path=True)
assert len(files) == 1
assert files[0] == str(pathlib.Path(temp_dir.name) / "document.pdf")
# test pathlib.Path
files = list_directory(temp_dir_name, glob="*.jpg")
assert isinstance(files[0], pathlib.Path)
files = list_directory(temp_dir.name, glob="FooBar*.jpg")
assert not files
def test_findfiles_invalid_dir():
import tempfile
from osxphotos.utils import findfiles
def test_list_directory_invalid():
temp_dir = tempfile.TemporaryDirectory(prefix="osxphotos_")
files = findfiles("*.jpg", f"{temp_dir.name}/no_such_dir")
files = list_directory(f"{temp_dir.name}/no_such_dir", glob="*.jpg")
assert len(files) == 0
def test_increment_filename():
# test that increment_filename works
import pathlib
import tempfile
from osxphotos.utils import increment_filename, increment_filename_with_count
with tempfile.TemporaryDirectory(prefix="osxphotos_") as temp_dir:
temp_dir = pathlib.Path(temp_dir)