Fix for filenames with special characters, #561, #618

2022-02-03 22:46:11 -08:00 · 2022-02-03 22:46:11 -08:00 · f3063d35be
commit f3063d35be
parent e32090bf39
6 changed files with 165 additions and 66 deletions
--- a/osxphotos/_version.py
+++ b/osxphotos/_version.py
@ -1,3 +1,3 @@
 """ version info """

-__version__ = "0.45.3"
+__version__ = "0.45.4"
--- a/osxphotos/photoexporter.py
+++ b/osxphotos/photoexporter.py
@ -45,7 +45,7 @@ from .photokit import (
 )
 from .phototemplate import RenderOptions
 from .uti import get_preferred_uti_extension
-from .utils import increment_filename, increment_filename_with_count, lineno
+from .utils import increment_filename, lineno, list_directory

 __all__ = [
    "ExportError",
@ -598,9 +598,13 @@ class PhotoExporter:
                )
            if dest_uuid != self.photo.uuid:
                # not the right file, find the right one
-                glob_str = str(dest.parent / f"{dest.stem} (*{dest.suffix}")
-                # TODO: use the normalized code in utils
-                dest_files = glob.glob(glob_str)
+                # find files that match "dest_name (*.ext" (e.g. "dest_name (1).jpg", "dest_name (2).jpg)", ...)
+                dest_files = list_directory(
+                    dest.parent,
+                    startswith=f"{dest.stem} (",
+                    endswith=dest.suffix,
+                    include_path=True,
+                )
                for file_ in dest_files:
                    dest_uuid = export_db.get_uuid_for_file(file_)
                    if dest_uuid == self.photo.uuid:
@ -1828,7 +1832,7 @@ def _export_photo_uuid_applescript(
        raise ValueError(f"dest {dest} must be a directory")

    if not original ^ edited:
-        raise ValueError(f"edited or original must be True but not both")
+        raise ValueError("edited or original must be True but not both")

    tmpdir = tempfile.TemporaryDirectory(prefix="osxphotos_")

@ -1851,7 +1855,6 @@ def _export_photo_uuid_applescript(
    if not exported_files or not filename:
        # nothing got exported
        raise ExportError(f"Could not export photo {uuid} ({lineno(__file__)})")
-
    # need to find actual filename as sometimes Photos renames JPG to jpeg on export
    # may be more than one file exported (e.g. if Live Photo, Photos exports both .jpeg and .mov)
    # TemporaryDirectory will cleanup on return
--- a/osxphotos/photoinfo.py
+++ b/osxphotos/photoinfo.py
@ -54,7 +54,7 @@ from .scoreinfo import ScoreInfo
 from .searchinfo import SearchInfo
 from .text_detection import detect_text
 from .uti import get_preferred_uti_extension, get_uti_for_extension
-from .utils import _debug, _get_resource_loc, findfiles
+from .utils import _debug, _get_resource_loc, list_directory

 __all__ = ["PhotoInfo", "PhotoInfoNone"]

@ -369,7 +369,7 @@ class PhotoInfo:
        # In Photos 5, raw is in same folder as original but with _4.ext
        # Unless "Copy Items to the Photos Library" is not checked
        # then RAW image is not renamed but has same name is jpeg buth with raw extension
-        # Current implementation uses findfiles to find images with the correct raw UTI extension
+        # Current implementation finds images with the correct raw UTI extension
        # in same folder as the original and with same stem as original in form: original_stem*.raw_ext
        # TODO: I don't like this -- would prefer a more deterministic approach but until I have more
        # data on how Photos stores and retrieves RAW images, this seems to be working
@ -405,8 +405,7 @@ class PhotoInfo:
            # raw files have same name as original but with _4.raw_ext appended
            # I believe the _4 maps to PHAssetResourceTypeAlternatePhoto = 4
            # see: https://developer.apple.com/documentation/photokit/phassetresourcetype/phassetresourcetypealternatephoto?language=objc
-            glob_str = f"{filestem}_4*"
-            raw_file = findfiles(glob_str, filepath)
+            raw_file = list_directory(filepath, startswith=f"{filestem}_4")
            if not raw_file:
                photopath = None
            else:
--- a/osxphotos/utils.py
+++ b/osxphotos/utils.py
@ -17,7 +17,7 @@ import sys
 import unicodedata
 import urllib.parse
 from plistlib import load as plistload
-from typing import Callable, List, Union
+from typing import Callable, List, Union, Optional

 import CoreFoundation
 import objc
@ -28,7 +28,6 @@ from ._constants import UNICODE_FORMAT
 __all__ = [
    "dd_to_dms_str",
    "expand_and_validate_filepath",
-    "findfiles",
    "get_last_library_path",
    "get_system_library_path",
    "increment_filename_with_count",
@ -266,7 +265,9 @@ def list_photo_libraries():
    # On older MacOS versions, mdfind appears to ignore some libraries
    # glob to find libraries in ~/Pictures then mdfind to find all the others
    # TODO: make this more robust
-    lib_list = glob.glob(f"{pathlib.Path.home()}/Pictures/*.photoslibrary")
+    lib_list = list_directory(
+        f"{pathlib.Path.home()}/Pictures/", glob="*.photoslibrary"
+    )

    # On older OS, may not get all libraries so make sure we get the last one
    last_lib = get_last_library_path()
@ -290,27 +291,90 @@ def normalize_fs_path(path: str) -> str:
    return unicodedata.normalize("NFD", path)


-def findfiles(pattern, path):
-    """Returns list of filenames from path matched by pattern
-    shell pattern. Matching is case-insensitive.
-    If 'path_' is invalid/doesn't exist, returns []."""
-    if not os.path.isdir(path):
+# def findfiles(pattern, path):
+#     """Returns list of filenames from path matched by pattern
+#     shell pattern. Matching is case-insensitive.
+#     If 'path_' is invalid/doesn't exist, returns []."""
+#     if not os.path.isdir(path):
+#         return []
+
+#     # paths need to be normalized for unicode as filesystem returns unicode in NFD form
+#     pattern = normalize_fs_path(pattern)
+#     rule = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
+#     files = os.listdir(path)
+#     return [name for name in files if rule.match(name)]
+
+
+def list_directory(
+    directory: Union[str, pathlib.Path],
+    startswith: Optional[str] = None,
+    endswith: Optional[str] = None,
+    contains: Optional[str] = None,
+    glob: Optional[str] = None,
+    include_path: bool = False,
+    case_sensitive: bool = False,
+) -> List[Union[str, pathlib.Path]]:
+    """List directory contents and return list of files or directories matching search criteria.
+    Accounts for case-insensitive filesystems, unicode filenames. directory can be a str or a pathlib.Path object.
+
+    Args:
+        directory: directory to search
+        startswith: string to match at start of filename
+        endswith: string to match at end of filename
+        contains: string to match anywhere in filename
+        glob: shell-style glob pattern to match filename
+        include_path: if True, return full path to file
+        case_sensitive: if True, match case-sensitively
+
+    Returns: List of files or directories matching search criteria as either str or pathlib.Path objects depending on the input type;
+    returns empty list if directory is invalid or doesn't exist.
+
+    """
+    is_pathlib = isinstance(directory, pathlib.Path)
+    if is_pathlib:
+        directory = str(directory)
+
+    if not os.path.isdir(directory):
        return []

-    # paths need to be normalized for unicode as filesystem returns unicode in NFD form
-    pattern = normalize_fs_path(pattern)
-    rule = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
-    files = os.listdir(path)
-    return [name for name in files if rule.match(name)]
+    startswith = normalize_fs_path(startswith) if startswith else None
+    endswith = normalize_fs_path(endswith) if endswith else None
+    contains = normalize_fs_path(contains) if contains else None
+    glob = normalize_fs_path(glob) if glob else None

+    files = [normalize_fs_path(f) for f in os.listdir(directory)]
+    if not case_sensitive:
+        files_normalized = {f.lower(): f for f in files}
+        files = [f.lower() for f in files]
+        startswith = startswith.lower() if startswith else None
+        endswith = endswith.lower() if endswith else None
+        contains = contains.lower() if contains else None
+        glob = glob.lower() if glob else None
+    else:
+        files_normalized = {f: f for f in files}

-def list_directory_startswith(directory_path: str, startswith: str) -> List[str]:
-    """List directory contents and return list of files starting with startswith; returns [] if directory doesn't exist"""
-    if not os.path.isdir(directory_path):
-        return []
-    startswith = normalize_fs_path(startswith)
-    files = [normalize_fs_path(f) for f in os.listdir(directory_path)]
-    return [f for f in files if f.startswith(startswith)]
+    if startswith:
+        files = [f for f in files if f.startswith(startswith)]
+    if endswith:
+        endswith = normalize_fs_path(endswith)
+        files = [f for f in files if f.endswith(endswith)]
+    if contains:
+        contains = normalize_fs_path(contains)
+        files = [f for f in files if contains in f]
+    if glob:
+        glob = normalize_fs_path(glob)
+        flags = re.IGNORECASE if not case_sensitive else 0
+        rule = re.compile(fnmatch.translate(glob), flags)
+        files = [f for f in files if rule.match(f)]
+
+    files = [files_normalized[f] for f in files]
+
+    if include_path:
+        files = [os.path.join(directory, f) for f in files]
+    if is_pathlib:
+        files = [pathlib.Path(f) for f in files]
+
+    return files


 def _open_sql_file(dbname):
@ -381,8 +445,8 @@ def increment_filename_with_count(
    Note: This obviously is subject to race condition so using with caution.
    """
    dest = filepath if isinstance(filepath, pathlib.Path) else pathlib.Path(filepath)
-    dest_files = list_directory_startswith(str(dest.parent), dest.stem)
-    dest_files = [pathlib.Path(f).stem.lower() for f in dest_files]
+    dest_files = list_directory(dest.parent, startswith=dest.stem)
+    dest_files = [f.stem.lower() for f in dest_files]
    dest_new = f"{dest.stem} ({count})" if count else dest.stem
    dest_new = normalize_fs_path(dest_new)

--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@ -4700,7 +4700,14 @@ def test_export_live_edited():
        # basic export
        result = runner.invoke(
            export,
-            [os.path.join(cwd, PHOTOS_DB_RHET), ".", "-V", "--uuid", UUID_LIVE_EDITED],
+            [
+                os.path.join(cwd, PHOTOS_DB_RHET),
+                ".",
+                "-V",
+                "--uuid",
+                UUID_LIVE_EDITED,
+                "--download-missing",
+            ],
        )
        assert result.exit_code == 0
        files = glob.glob("*")
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@ -1,27 +1,33 @@
+import logging
+import os.path
+import pathlib
+import tempfile
+
 import pytest

+import osxphotos
+
 DB_LOCKED_10_12 = "./tests/Test-Lock-10_12.photoslibrary/database/photos.db"
 DB_LOCKED_10_15 = "./tests/Test-Lock-10_15_1.photoslibrary/database/Photos.sqlite"
 DB_UNLOCKED_10_15 = "./tests/Test-10.15.1.photoslibrary/database/photos.db"

 UTI_DICT = {"public.jpeg": "jpeg", "com.canon.cr2-raw-image": "cr2"}

+from osxphotos.utils import (
+    _dd_to_dms,
+    increment_filename,
+    increment_filename_with_count,
+    list_directory,
+)
+

 def test_debug_enable():
-    import logging
-
-    import osxphotos
-
    osxphotos._set_debug(True)
    logger = osxphotos._get_logger()
    assert logger.isEnabledFor(logging.DEBUG)


 def test_debug_disable():
-    import logging
-
-    import osxphotos
-
    osxphotos._set_debug(False)
    logger = osxphotos._get_logger()
    assert not logger.isEnabledFor(logging.DEBUG)
@ -29,14 +35,12 @@ def test_debug_disable():

 def test_dd_to_dms():
    # expands coverage for edge case in _dd_to_dms
-    from osxphotos.utils import _dd_to_dms

    assert _dd_to_dms(-0.001) == (0, 0, -3.6)


@pytest.mark.skip(reason="Fails on some machines")
 def test_get_system_library_path():
-    import osxphotos

    _, major, _ = osxphotos.utils._get_os_version()
    if int(major) < 15:
@ -46,51 +50,73 @@ def test_get_system_library_path():


 def test_db_is_locked_locked():
-    import osxphotos

    assert osxphotos.utils._db_is_locked(DB_LOCKED_10_12)
    assert osxphotos.utils._db_is_locked(DB_LOCKED_10_15)


 def test_db_is_locked_unlocked():
-    import osxphotos

    assert not osxphotos.utils._db_is_locked(DB_UNLOCKED_10_15)


-def test_findfiles():
-    import os.path
-    import tempfile
-
-    from osxphotos.utils import findfiles
+def test_list_directory():
+    """test list_directory"""

    temp_dir = tempfile.TemporaryDirectory(prefix="osxphotos_")
-    fd = open(os.path.join(temp_dir.name, "file1.jpg"), "w+")
-    fd.close
-    fd = open(os.path.join(temp_dir.name, "file2.JPG"), "w+")
-    fd.close
-    files = findfiles("*.jpg", temp_dir.name)
+    temp_dir_name = pathlib.Path(temp_dir.name)
+    file1 = (temp_dir_name / "file1.jpg").touch()
+    file2 = (temp_dir_name / "File2.JPG").touch()
+    file3 = (temp_dir_name / "File.png").touch()
+    file4 = (temp_dir_name / "document.pdf").touch()
+
+    files = list_directory(temp_dir.name, glob="*.jpg")
    assert len(files) == 2
    assert "file1.jpg" in files
-    assert "file2.JPG" in files
+    assert "File2.JPG" in files
+    assert isinstance(files[0], str)
+
+    files = list_directory(temp_dir.name, glob="*.jpg", case_sensitive=True)
+    assert len(files) == 1
+    assert "file1.jpg" in files
+
+    files = list_directory(temp_dir.name, startswith="file")
+    assert len(files) == 3
+
+    files = list_directory(temp_dir.name, endswith="jpg")
+    assert len(files) == 2
+
+    files = list_directory(temp_dir.name, contains="doc")
+    assert len(files) == 1
+    assert "document.pdf" in files
+
+    files = list_directory(temp_dir.name, startswith="File", case_sensitive=True)
+    assert len(files) == 2
+
+    files = list_directory(temp_dir.name, startswith="File", case_sensitive=False)
+    assert len(files) == 3
+
+    files = list_directory(temp_dir.name, startswith="document", include_path=True)
+    assert len(files) == 1
+    assert files[0] == str(pathlib.Path(temp_dir.name) / "document.pdf")
+
+    # test pathlib.Path
+    files = list_directory(temp_dir_name, glob="*.jpg")
+    assert isinstance(files[0], pathlib.Path)
+
+    files = list_directory(temp_dir.name, glob="FooBar*.jpg")
+    assert not files


-def test_findfiles_invalid_dir():
-    import tempfile
-
-    from osxphotos.utils import findfiles
+def test_list_directory_invalid():

    temp_dir = tempfile.TemporaryDirectory(prefix="osxphotos_")
-    files = findfiles("*.jpg", f"{temp_dir.name}/no_such_dir")
+    files = list_directory(f"{temp_dir.name}/no_such_dir", glob="*.jpg")
    assert len(files) == 0


 def test_increment_filename():
    # test that increment_filename works
-    import pathlib
-    import tempfile
-
-    from osxphotos.utils import increment_filename, increment_filename_with_count

    with tempfile.TemporaryDirectory(prefix="osxphotos_") as temp_dir:
        temp_dir = pathlib.Path(temp_dir)