diff --git a/osxphotos/__main__.py b/osxphotos/__main__.py index 8e4749f4..023c401a 100644 --- a/osxphotos/__main__.py +++ b/osxphotos/__main__.py @@ -10,6 +10,7 @@ import pathlib import pprint import sys import time +import unicodedata import click import yaml @@ -22,7 +23,7 @@ from pathvalidate import ( import osxphotos -from ._constants import _EXIF_TOOL_URL, _PHOTOS_4_VERSION, _UNKNOWN_PLACE +from ._constants import _EXIF_TOOL_URL, _PHOTOS_4_VERSION, _UNKNOWN_PLACE, UNICODE_FORMAT from ._export_db import ExportDB, ExportDBInMemory from ._version import __version__ from .datetime_formatter import DateTimeFormatter @@ -40,9 +41,21 @@ OSXPHOTOS_EXPORT_DB = ".osxphotos_export.db" def verbose(*args, **kwargs): + """ print output if verbose flag set """ if VERBOSE: click.echo(*args, **kwargs) +def normalize_unicode(value): + """ normalize unicode data """ + if value is not None: + if isinstance(value, tuple): + return tuple(unicodedata.normalize(UNICODE_FORMAT, v) for v in value) + elif isinstance(value, str): + return unicodedata.normalize(UNICODE_FORMAT, value) + else: + return value + else: + return None def get_photos_db(*db_options): """ Return path to photos db, select first non-None db_options @@ -1863,6 +1876,15 @@ def _query( to_date=to_date, ) + person = normalize_unicode(person) + keyword = normalize_unicode(keyword) + album = normalize_unicode(album) + folder = normalize_unicode(folder) + title = normalize_unicode(title) + description = normalize_unicode(description) + place = normalize_unicode(place) + label = normalize_unicode(label) + if album: photos = get_photos_by_attribute(photos, "albums", album, ignore_case) diff --git a/osxphotos/_constants.py b/osxphotos/_constants.py index 82b0c578..fde74c14 100644 --- a/osxphotos/_constants.py +++ b/osxphotos/_constants.py @@ -9,6 +9,9 @@ from datetime import datetime # Apple Epoch is Jan 1, 2001 TIME_DELTA = (datetime(2001, 1, 1, 0, 0) - datetime(1970, 1, 1, 0, 0)).total_seconds() +# Unicode format to use for comparing strings +UNICODE_FORMAT = "NFC" + # which Photos library database versions have been tested # Photos 2.0 (10.12.6) == 2622 # Photos 3.0 (10.13.6) == 3301 diff --git a/osxphotos/_version.py b/osxphotos/_version.py index 80d20f8d..22cb5725 100644 --- a/osxphotos/_version.py +++ b/osxphotos/_version.py @@ -1,3 +1,3 @@ """ version info """ -__version__ = "0.33.6" +__version__ = "0.33.7" diff --git a/osxphotos/photosdb/_photosdb_process_faceinfo.py b/osxphotos/photosdb/_photosdb_process_faceinfo.py index 53212ce5..7f386f64 100644 --- a/osxphotos/photosdb/_photosdb_process_faceinfo.py +++ b/osxphotos/photosdb/_photosdb_process_faceinfo.py @@ -4,7 +4,7 @@ import logging from .._constants import _DB_TABLE_NAMES, _PHOTOS_4_VERSION -from ..utils import _open_sql_file +from ..utils import _open_sql_file, normalize_unicode from .photosdb_utils import get_db_version @@ -121,7 +121,7 @@ def _process_faceinfo_4(photosdb): face["asset_uuid"] = asset_uuid face["uuid"] = row[2] face["person"] = person_id - face["fullname"] = row[3] + face["fullname"] = normalize_unicode(row[3]) face["sourcewidth"] = row[7] face["sourceheight"] = row[8] face["centerx"] = row[9] @@ -282,7 +282,7 @@ def _process_faceinfo_5(photosdb): face["asset_uuid"] = asset_uuid face["uuid"] = row[2] face["person"] = person_pk - face["fullname"] = row[4] + face["fullname"] = normalize_unicode(row[4]) face["agetype"] = row[5] face["baldtype"] = row[6] face["eyemakeuptype"] = row[7] diff --git a/osxphotos/photosdb/_photosdb_process_searchinfo.py b/osxphotos/photosdb/_photosdb_process_searchinfo.py index fd948c87..6f5aec08 100644 --- a/osxphotos/photosdb/_photosdb_process_searchinfo.py +++ b/osxphotos/photosdb/_photosdb_process_searchinfo.py @@ -10,7 +10,7 @@ import uuid as uuidlib from pprint import pformat from .._constants import _PHOTOS_4_VERSION, SEARCH_CATEGORY_LABEL -from ..utils import _db_is_locked, _debug, _open_sql_file +from ..utils import _db_is_locked, _debug, _open_sql_file, normalize_unicode """ This module should be imported in the class defintion of PhotosDB in photosdb.py @@ -112,8 +112,8 @@ def _process_searchinfo(self): record["groupid"] = row[3] record["category"] = row[4] record["owning_groupid"] = row[5] - record["content_string"] = row[6].replace("\x00", "") - record["normalized_string"] = row[7].replace("\x00", "") + record["content_string"] = normalize_unicode(row[6].replace("\x00", "")) + record["normalized_string"] = normalize_unicode(row[7].replace("\x00", "")) record["lookup_identifier"] = row[8] try: @@ -147,9 +147,10 @@ def _process_searchinfo(self): "_db_searchinfo_labels_normalized: \n" + pformat(self._db_searchinfo_labels_normalized) ) - + conn.close() + @property def labels(self): """ return list of all search info labels found in the library """ diff --git a/osxphotos/photosdb/photosdb.py b/osxphotos/photosdb/photosdb.py index ed178b50..144898ae 100644 --- a/osxphotos/photosdb/photosdb.py +++ b/osxphotos/photosdb/photosdb.py @@ -44,6 +44,7 @@ from ..utils import ( _get_os_version, _open_sql_file, get_last_library_path, + normalize_unicode, ) from .photosdb_utils import get_db_model_version, get_db_version @@ -713,7 +714,7 @@ class PhotosDB: for album in c: self._dbalbum_details[album[0]] = { "_uuid": album[0], - "title": album[1], + "title": normalize_unicode(album[1]), "cloudlibrarystate": album[2], "cloudidentifier": album[3], "intrash": False if album[4] == 0 else True, @@ -760,7 +761,7 @@ class PhotosDB: self._dbfolder_details[uuid] = { "_uuid": row[0], "modelId": row[1], - "name": row[2], + "name": normalize_unicode(row[2]), "isMagic": row[3], "intrash": row[4], "folderType": row[5], @@ -963,7 +964,7 @@ class PhotosDB: self._dbphotos[uuid]["volumeId"] = row[10] self._dbphotos[uuid]["imagePath"] = row[11] self._dbphotos[uuid]["extendedDescription"] = row[12] - self._dbphotos[uuid]["name"] = row[13] + self._dbphotos[uuid]["name"] = normalize_unicode(row[13]) self._dbphotos[uuid]["isMissing"] = row[14] self._dbphotos[uuid]["originalFilename"] = row[15] self._dbphotos[uuid]["favorite"] = row[16] @@ -1608,7 +1609,7 @@ class PhotosDB: for album in c: self._dbalbum_details[album[0]] = { "_uuid": album[0], - "title": album[1], + "title": normalize_unicode(album[1]), "cloudlocalstate": album[2], "cloudownerfirstname": album[3], "cloudownderlastname": album[4], @@ -1683,12 +1684,13 @@ class PhotosDB: JOIN ZKEYWORD ON ZKEYWORD.Z_PK = {keyword_join} """ ) for keyword in c: + keyword_title = normalize_unicode(keyword[0]) if not keyword[1] in self._dbkeywords_uuid: self._dbkeywords_uuid[keyword[1]] = [] - if not keyword[0] in self._dbkeywords_keyword: - self._dbkeywords_keyword[keyword[0]] = [] + if not keyword_title in self._dbkeywords_keyword: + self._dbkeywords_keyword[keyword_title] = [] self._dbkeywords_uuid[keyword[1]].append(keyword[0]) - self._dbkeywords_keyword[keyword[0]].append(keyword[1]) + self._dbkeywords_keyword[keyword_title].append(keyword[1]) if _debug(): logging.debug(f"Finished walking through keywords") @@ -1795,7 +1797,7 @@ class PhotosDB: info["modelID"] = None info["masterUuid"] = None info["masterFingerprint"] = row[1] - info["name"] = row[2] + info["name"] = normalize_unicode(row[2]) # There are sometimes negative values for lastmodifieddate in the database # I don't know what these mean but they will raise exception in datetime if @@ -2027,7 +2029,7 @@ class PhotosDB: for row in c: uuid = row[0] if uuid in self._dbphotos: - self._dbphotos[uuid]["extendedDescription"] = row[1] + self._dbphotos[uuid]["extendedDescription"] = normalize_unicode(row[1]) else: if _debug(): logging.debug( diff --git a/osxphotos/placeinfo.py b/osxphotos/placeinfo.py index 7c52d987..18de2a0c 100644 --- a/osxphotos/placeinfo.py +++ b/osxphotos/placeinfo.py @@ -11,6 +11,9 @@ from collections import namedtuple # pylint: disable=syntax-error import yaml from bpylist import archiver +from ._constants import UNICODE_FORMAT +from .utils import normalize_unicode + # postal address information, returned by PlaceInfo.address PostalAddress = namedtuple( "PostalAddress", @@ -76,12 +79,12 @@ class PLRevGeoLocationInfo: geoServiceProvider, postalAddress, ): - self.addressString = addressString + self.addressString = normalize_unicode(addressString) self.countryCode = countryCode self.mapItem = mapItem self.isHome = isHome - self.compoundNames = compoundNames - self.compoundSecondaryNames = compoundSecondaryNames + self.compoundNames = normalize_unicode(compoundNames) + self.compoundSecondaryNames = normalize_unicode(compoundSecondaryNames) self.version = version self.geoServiceProvider = geoServiceProvider self.postalAddress = postalAddress @@ -183,7 +186,7 @@ class PLRevGeoMapItemAdditionalPlaceInfo: def __init__(self, area, name, placeType, dominantOrderType): self.area = area - self.name = name + self.name = normalize_unicode(name) self.placeType = placeType self.dominantOrderType = dominantOrderType @@ -232,13 +235,13 @@ class CNPostalAddress: _subLocality, ): self._ISOCountryCode = _ISOCountryCode - self._city = _city - self._country = _country - self._postalCode = _postalCode - self._state = _state - self._street = _street - self._subAdministrativeArea = _subAdministrativeArea - self._subLocality = _subLocality + self._city = normalize_unicode(_city) + self._country = normalize_unicode(_country) + self._postalCode = normalize_unicode(_postalCode) + self._state = normalize_unicode(_state) + self._street = normalize_unicode(_street) + self._subAdministrativeArea = normalize_unicode(_subAdministrativeArea) + self._subLocality = normalize_unicode(_subLocality) def __eq__(self, other): return all( @@ -414,9 +417,9 @@ class PlaceInfo4(PlaceInfo): # 2: type # 3: area try: - places_dict[p[2]].append((p[1], p[3])) + places_dict[p[2]].append((normalize_unicode(p[1]), p[3])) except KeyError: - places_dict[p[2]] = [(p[1], p[3])] + places_dict[p[2]] = [(normalize_unicode(p[1]), p[3])] # build list to populate PlaceNames tuple # initialize with empty lists for each field in PlaceNames diff --git a/osxphotos/utils.py b/osxphotos/utils.py index 28f7bb83..2ec3f4bb 100644 --- a/osxphotos/utils.py +++ b/osxphotos/utils.py @@ -10,6 +10,7 @@ import sqlite3 import subprocess import sys import tempfile +import unicodedata import urllib.parse from plistlib import load as plistload @@ -18,6 +19,7 @@ import CoreServices import objc from Foundation import * +from ._constants import UNICODE_FORMAT from .fileutil import FileUtil _DEBUG = False @@ -352,3 +354,13 @@ def _db_is_locked(dbname): # attr = xattr.xattr(filepath) # uuid_bytes = bytes(uuid, 'utf-8') # attr.set(OSXPHOTOS_XATTR_UUID, uuid_bytes) + + +def normalize_unicode(value): + """ normalize unicode data """ + if value is not None: + if not isinstance(value, str): + raise ValueError("value must be str") + return unicodedata.normalize(UNICODE_FORMAT, value) + else: + return None diff --git a/tests/Test-10.15.6.photoslibrary/database/Photos.sqlite-shm b/tests/Test-10.15.6.photoslibrary/database/Photos.sqlite-shm index 49f2dac9..a5e29bd3 100644 Binary files a/tests/Test-10.15.6.photoslibrary/database/Photos.sqlite-shm and b/tests/Test-10.15.6.photoslibrary/database/Photos.sqlite-shm differ diff --git a/tests/Test-10.15.6.photoslibrary/database/Photos.sqlite-wal b/tests/Test-10.15.6.photoslibrary/database/Photos.sqlite-wal index 0bf6febb..a7b92702 100644 Binary files a/tests/Test-10.15.6.photoslibrary/database/Photos.sqlite-wal and b/tests/Test-10.15.6.photoslibrary/database/Photos.sqlite-wal differ diff --git a/tests/Test-10.15.6.photoslibrary/database/Photos.sqlite.lock b/tests/Test-10.15.6.photoslibrary/database/Photos.sqlite.lock index 5b4d48b7..5b9b78f5 100644 --- a/tests/Test-10.15.6.photoslibrary/database/Photos.sqlite.lock +++ b/tests/Test-10.15.6.photoslibrary/database/Photos.sqlite.lock @@ -7,7 +7,7 @@ hostuuid 9575E48B-8D5F-5654-ABAC-4431B1167324 pid - 1847 + 1942 processname photolibraryd uid diff --git a/tests/Test-10.15.6.photoslibrary/private/com.apple.photoanalysisd/caches/graph/CLSLocationCache.sqlite-shm b/tests/Test-10.15.6.photoslibrary/private/com.apple.photoanalysisd/caches/graph/CLSLocationCache.sqlite-shm index 8f27ed5e..eb0386c2 100644 Binary files a/tests/Test-10.15.6.photoslibrary/private/com.apple.photoanalysisd/caches/graph/CLSLocationCache.sqlite-shm and b/tests/Test-10.15.6.photoslibrary/private/com.apple.photoanalysisd/caches/graph/CLSLocationCache.sqlite-shm differ diff --git a/tests/Test-10.15.6.photoslibrary/private/com.apple.photoanalysisd/caches/graph/changetoken.plist b/tests/Test-10.15.6.photoslibrary/private/com.apple.photoanalysisd/caches/graph/changetoken.plist index 8eb87588..3af2e280 100644 Binary files a/tests/Test-10.15.6.photoslibrary/private/com.apple.photoanalysisd/caches/graph/changetoken.plist and b/tests/Test-10.15.6.photoslibrary/private/com.apple.photoanalysisd/caches/graph/changetoken.plist differ diff --git a/tests/Test-10.15.6.photoslibrary/private/com.apple.photolibraryd/caches/clientservertransaction/87554DE2-780A-4ED2-83B2-324CCE3DEE45 b/tests/Test-10.15.6.photoslibrary/private/com.apple.photolibraryd/caches/clientservertransaction/87554DE2-780A-4ED2-83B2-324CCE3DEE45 new file mode 100644 index 00000000..5142c798 Binary files /dev/null and b/tests/Test-10.15.6.photoslibrary/private/com.apple.photolibraryd/caches/clientservertransaction/87554DE2-780A-4ED2-83B2-324CCE3DEE45 differ diff --git a/tests/Test-10.15.6.photoslibrary/resources/journals/Album-change.plj b/tests/Test-10.15.6.photoslibrary/resources/journals/Album-change.plj index b20737cd..5ef0bce4 100644 Binary files a/tests/Test-10.15.6.photoslibrary/resources/journals/Album-change.plj and b/tests/Test-10.15.6.photoslibrary/resources/journals/Album-change.plj differ diff --git a/tests/Test-10.15.6.photoslibrary/resources/journals/Folder-change.plj b/tests/Test-10.15.6.photoslibrary/resources/journals/Folder-change.plj index bfc0debb..f70ba701 100644 Binary files a/tests/Test-10.15.6.photoslibrary/resources/journals/Folder-change.plj and b/tests/Test-10.15.6.photoslibrary/resources/journals/Folder-change.plj differ diff --git a/tests/Test-10.15.6.photoslibrary/resources/journals/HistoryToken.plist b/tests/Test-10.15.6.photoslibrary/resources/journals/HistoryToken.plist index 8b132c68..55617142 100644 Binary files a/tests/Test-10.15.6.photoslibrary/resources/journals/HistoryToken.plist and b/tests/Test-10.15.6.photoslibrary/resources/journals/HistoryToken.plist differ diff --git a/tests/test_catalina_10_15_6.py b/tests/test_catalina_10_15_6.py index 0ae421d4..ca942446 100644 --- a/tests/test_catalina_10_15_6.py +++ b/tests/test_catalina_10_15_6.py @@ -33,6 +33,7 @@ ALBUMS = [ "Raw", "I have a deleted twin", # there's an empty album with same name that has been deleted "EmptyAlbum", + "2018-10 - Sponsion, Museum, Frühstück, Römermuseum", ] KEYWORDS_DICT = { "Kids": 4, @@ -53,6 +54,7 @@ ALBUM_DICT = { "Raw": 4, "I have a deleted twin": 1, "EmptyAlbum": 0, + "2018-10 - Sponsion, Museum, Frühstück, Römermuseum": 1, } # Note: there are 2 albums named "Test Album" for testing duplicate album names UUID_DICT = { diff --git a/tests/test_cli.py b/tests/test_cli.py index 065f932d..b9d672b9 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -58,6 +58,8 @@ CLI_EXPORT_FILENAMES = [ CLI_EXPORT_FILENAMES_ALBUM = ["Pumkins1.jpg", "Pumkins2.jpg", "Pumpkins3.jpg"] +CLI_EXPORT_FILENAMES_ALBUM_UNICODE = ["IMG_4547.jpg"] + CLI_EXPORT_FILENAMES_DELETED_TWIN = ["wedding.jpg", "wedding_edited.jpeg"] CLI_EXPORT_EDITED_SUFFIX = "_bearbeiten" @@ -451,7 +453,6 @@ def test_query_uuid(): "--json", "--db", os.path.join(cwd, CLI_PHOTOS_DB), - # "./tests/Test-10.15.1.photoslibrary", "--uuid", "D79B8D77-BFFC-460B-9312-034F2877D35B", ], @@ -1816,6 +1817,26 @@ def test_export_album(): files = glob.glob("*") assert sorted(files) == sorted(CLI_EXPORT_FILENAMES_ALBUM) +def test_export_album_unicode_name(): + """Test export of an album with non-English characters in name """ + import glob + import os + import os.path + from osxphotos.__main__ import export + + runner = CliRunner() + cwd = os.getcwd() + # pylint: disable=not-context-manager + with runner.isolated_filesystem(): + result = runner.invoke( + export, + [os.path.join(cwd, PHOTOS_DB_15_6), ".", "--album", "2018-10 - Sponsion, Museum, Frühstück, Römermuseum", "-V"], + ) + assert result.exit_code == 0 + files = glob.glob("*") + assert sorted(files) == sorted(CLI_EXPORT_FILENAMES_ALBUM_UNICODE) + + def test_export_album_deleted_twin(): """Test export of an album where album of same name has been deleted """