Normalize unicode for issue #208

This commit is contained in:
Rhet Turnbull
2020-08-31 05:24:54 -07:00
parent c9b15186a0
commit a36eb416b1
19 changed files with 99 additions and 33 deletions

View File

@@ -10,6 +10,7 @@ import pathlib
import pprint import pprint
import sys import sys
import time import time
import unicodedata
import click import click
import yaml import yaml
@@ -22,7 +23,7 @@ from pathvalidate import (
import osxphotos import osxphotos
from ._constants import _EXIF_TOOL_URL, _PHOTOS_4_VERSION, _UNKNOWN_PLACE from ._constants import _EXIF_TOOL_URL, _PHOTOS_4_VERSION, _UNKNOWN_PLACE, UNICODE_FORMAT
from ._export_db import ExportDB, ExportDBInMemory from ._export_db import ExportDB, ExportDBInMemory
from ._version import __version__ from ._version import __version__
from .datetime_formatter import DateTimeFormatter from .datetime_formatter import DateTimeFormatter
@@ -40,9 +41,21 @@ OSXPHOTOS_EXPORT_DB = ".osxphotos_export.db"
def verbose(*args, **kwargs): def verbose(*args, **kwargs):
""" print output if verbose flag set """
if VERBOSE: if VERBOSE:
click.echo(*args, **kwargs) click.echo(*args, **kwargs)
def normalize_unicode(value):
""" normalize unicode data """
if value is not None:
if isinstance(value, tuple):
return tuple(unicodedata.normalize(UNICODE_FORMAT, v) for v in value)
elif isinstance(value, str):
return unicodedata.normalize(UNICODE_FORMAT, value)
else:
return value
else:
return None
def get_photos_db(*db_options): def get_photos_db(*db_options):
""" Return path to photos db, select first non-None db_options """ Return path to photos db, select first non-None db_options
@@ -1863,6 +1876,15 @@ def _query(
to_date=to_date, to_date=to_date,
) )
person = normalize_unicode(person)
keyword = normalize_unicode(keyword)
album = normalize_unicode(album)
folder = normalize_unicode(folder)
title = normalize_unicode(title)
description = normalize_unicode(description)
place = normalize_unicode(place)
label = normalize_unicode(label)
if album: if album:
photos = get_photos_by_attribute(photos, "albums", album, ignore_case) photos = get_photos_by_attribute(photos, "albums", album, ignore_case)

View File

@@ -9,6 +9,9 @@ from datetime import datetime
# Apple Epoch is Jan 1, 2001 # Apple Epoch is Jan 1, 2001
TIME_DELTA = (datetime(2001, 1, 1, 0, 0) - datetime(1970, 1, 1, 0, 0)).total_seconds() TIME_DELTA = (datetime(2001, 1, 1, 0, 0) - datetime(1970, 1, 1, 0, 0)).total_seconds()
# Unicode format to use for comparing strings
UNICODE_FORMAT = "NFC"
# which Photos library database versions have been tested # which Photos library database versions have been tested
# Photos 2.0 (10.12.6) == 2622 # Photos 2.0 (10.12.6) == 2622
# Photos 3.0 (10.13.6) == 3301 # Photos 3.0 (10.13.6) == 3301

View File

@@ -1,3 +1,3 @@
""" version info """ """ version info """
__version__ = "0.33.6" __version__ = "0.33.7"

View File

@@ -4,7 +4,7 @@
import logging import logging
from .._constants import _DB_TABLE_NAMES, _PHOTOS_4_VERSION from .._constants import _DB_TABLE_NAMES, _PHOTOS_4_VERSION
from ..utils import _open_sql_file from ..utils import _open_sql_file, normalize_unicode
from .photosdb_utils import get_db_version from .photosdb_utils import get_db_version
@@ -121,7 +121,7 @@ def _process_faceinfo_4(photosdb):
face["asset_uuid"] = asset_uuid face["asset_uuid"] = asset_uuid
face["uuid"] = row[2] face["uuid"] = row[2]
face["person"] = person_id face["person"] = person_id
face["fullname"] = row[3] face["fullname"] = normalize_unicode(row[3])
face["sourcewidth"] = row[7] face["sourcewidth"] = row[7]
face["sourceheight"] = row[8] face["sourceheight"] = row[8]
face["centerx"] = row[9] face["centerx"] = row[9]
@@ -282,7 +282,7 @@ def _process_faceinfo_5(photosdb):
face["asset_uuid"] = asset_uuid face["asset_uuid"] = asset_uuid
face["uuid"] = row[2] face["uuid"] = row[2]
face["person"] = person_pk face["person"] = person_pk
face["fullname"] = row[4] face["fullname"] = normalize_unicode(row[4])
face["agetype"] = row[5] face["agetype"] = row[5]
face["baldtype"] = row[6] face["baldtype"] = row[6]
face["eyemakeuptype"] = row[7] face["eyemakeuptype"] = row[7]

View File

@@ -10,7 +10,7 @@ import uuid as uuidlib
from pprint import pformat from pprint import pformat
from .._constants import _PHOTOS_4_VERSION, SEARCH_CATEGORY_LABEL from .._constants import _PHOTOS_4_VERSION, SEARCH_CATEGORY_LABEL
from ..utils import _db_is_locked, _debug, _open_sql_file from ..utils import _db_is_locked, _debug, _open_sql_file, normalize_unicode
""" """
This module should be imported in the class defintion of PhotosDB in photosdb.py This module should be imported in the class defintion of PhotosDB in photosdb.py
@@ -112,8 +112,8 @@ def _process_searchinfo(self):
record["groupid"] = row[3] record["groupid"] = row[3]
record["category"] = row[4] record["category"] = row[4]
record["owning_groupid"] = row[5] record["owning_groupid"] = row[5]
record["content_string"] = row[6].replace("\x00", "") record["content_string"] = normalize_unicode(row[6].replace("\x00", ""))
record["normalized_string"] = row[7].replace("\x00", "") record["normalized_string"] = normalize_unicode(row[7].replace("\x00", ""))
record["lookup_identifier"] = row[8] record["lookup_identifier"] = row[8]
try: try:
@@ -147,9 +147,10 @@ def _process_searchinfo(self):
"_db_searchinfo_labels_normalized: \n" "_db_searchinfo_labels_normalized: \n"
+ pformat(self._db_searchinfo_labels_normalized) + pformat(self._db_searchinfo_labels_normalized)
) )
conn.close() conn.close()
@property @property
def labels(self): def labels(self):
""" return list of all search info labels found in the library """ """ return list of all search info labels found in the library """

View File

@@ -44,6 +44,7 @@ from ..utils import (
_get_os_version, _get_os_version,
_open_sql_file, _open_sql_file,
get_last_library_path, get_last_library_path,
normalize_unicode,
) )
from .photosdb_utils import get_db_model_version, get_db_version from .photosdb_utils import get_db_model_version, get_db_version
@@ -713,7 +714,7 @@ class PhotosDB:
for album in c: for album in c:
self._dbalbum_details[album[0]] = { self._dbalbum_details[album[0]] = {
"_uuid": album[0], "_uuid": album[0],
"title": album[1], "title": normalize_unicode(album[1]),
"cloudlibrarystate": album[2], "cloudlibrarystate": album[2],
"cloudidentifier": album[3], "cloudidentifier": album[3],
"intrash": False if album[4] == 0 else True, "intrash": False if album[4] == 0 else True,
@@ -760,7 +761,7 @@ class PhotosDB:
self._dbfolder_details[uuid] = { self._dbfolder_details[uuid] = {
"_uuid": row[0], "_uuid": row[0],
"modelId": row[1], "modelId": row[1],
"name": row[2], "name": normalize_unicode(row[2]),
"isMagic": row[3], "isMagic": row[3],
"intrash": row[4], "intrash": row[4],
"folderType": row[5], "folderType": row[5],
@@ -963,7 +964,7 @@ class PhotosDB:
self._dbphotos[uuid]["volumeId"] = row[10] self._dbphotos[uuid]["volumeId"] = row[10]
self._dbphotos[uuid]["imagePath"] = row[11] self._dbphotos[uuid]["imagePath"] = row[11]
self._dbphotos[uuid]["extendedDescription"] = row[12] self._dbphotos[uuid]["extendedDescription"] = row[12]
self._dbphotos[uuid]["name"] = row[13] self._dbphotos[uuid]["name"] = normalize_unicode(row[13])
self._dbphotos[uuid]["isMissing"] = row[14] self._dbphotos[uuid]["isMissing"] = row[14]
self._dbphotos[uuid]["originalFilename"] = row[15] self._dbphotos[uuid]["originalFilename"] = row[15]
self._dbphotos[uuid]["favorite"] = row[16] self._dbphotos[uuid]["favorite"] = row[16]
@@ -1608,7 +1609,7 @@ class PhotosDB:
for album in c: for album in c:
self._dbalbum_details[album[0]] = { self._dbalbum_details[album[0]] = {
"_uuid": album[0], "_uuid": album[0],
"title": album[1], "title": normalize_unicode(album[1]),
"cloudlocalstate": album[2], "cloudlocalstate": album[2],
"cloudownerfirstname": album[3], "cloudownerfirstname": album[3],
"cloudownderlastname": album[4], "cloudownderlastname": album[4],
@@ -1683,12 +1684,13 @@ class PhotosDB:
JOIN ZKEYWORD ON ZKEYWORD.Z_PK = {keyword_join} """ JOIN ZKEYWORD ON ZKEYWORD.Z_PK = {keyword_join} """
) )
for keyword in c: for keyword in c:
keyword_title = normalize_unicode(keyword[0])
if not keyword[1] in self._dbkeywords_uuid: if not keyword[1] in self._dbkeywords_uuid:
self._dbkeywords_uuid[keyword[1]] = [] self._dbkeywords_uuid[keyword[1]] = []
if not keyword[0] in self._dbkeywords_keyword: if not keyword_title in self._dbkeywords_keyword:
self._dbkeywords_keyword[keyword[0]] = [] self._dbkeywords_keyword[keyword_title] = []
self._dbkeywords_uuid[keyword[1]].append(keyword[0]) self._dbkeywords_uuid[keyword[1]].append(keyword[0])
self._dbkeywords_keyword[keyword[0]].append(keyword[1]) self._dbkeywords_keyword[keyword_title].append(keyword[1])
if _debug(): if _debug():
logging.debug(f"Finished walking through keywords") logging.debug(f"Finished walking through keywords")
@@ -1795,7 +1797,7 @@ class PhotosDB:
info["modelID"] = None info["modelID"] = None
info["masterUuid"] = None info["masterUuid"] = None
info["masterFingerprint"] = row[1] info["masterFingerprint"] = row[1]
info["name"] = row[2] info["name"] = normalize_unicode(row[2])
# There are sometimes negative values for lastmodifieddate in the database # There are sometimes negative values for lastmodifieddate in the database
# I don't know what these mean but they will raise exception in datetime if # I don't know what these mean but they will raise exception in datetime if
@@ -2027,7 +2029,7 @@ class PhotosDB:
for row in c: for row in c:
uuid = row[0] uuid = row[0]
if uuid in self._dbphotos: if uuid in self._dbphotos:
self._dbphotos[uuid]["extendedDescription"] = row[1] self._dbphotos[uuid]["extendedDescription"] = normalize_unicode(row[1])
else: else:
if _debug(): if _debug():
logging.debug( logging.debug(

View File

@@ -11,6 +11,9 @@ from collections import namedtuple # pylint: disable=syntax-error
import yaml import yaml
from bpylist import archiver from bpylist import archiver
from ._constants import UNICODE_FORMAT
from .utils import normalize_unicode
# postal address information, returned by PlaceInfo.address # postal address information, returned by PlaceInfo.address
PostalAddress = namedtuple( PostalAddress = namedtuple(
"PostalAddress", "PostalAddress",
@@ -76,12 +79,12 @@ class PLRevGeoLocationInfo:
geoServiceProvider, geoServiceProvider,
postalAddress, postalAddress,
): ):
self.addressString = addressString self.addressString = normalize_unicode(addressString)
self.countryCode = countryCode self.countryCode = countryCode
self.mapItem = mapItem self.mapItem = mapItem
self.isHome = isHome self.isHome = isHome
self.compoundNames = compoundNames self.compoundNames = normalize_unicode(compoundNames)
self.compoundSecondaryNames = compoundSecondaryNames self.compoundSecondaryNames = normalize_unicode(compoundSecondaryNames)
self.version = version self.version = version
self.geoServiceProvider = geoServiceProvider self.geoServiceProvider = geoServiceProvider
self.postalAddress = postalAddress self.postalAddress = postalAddress
@@ -183,7 +186,7 @@ class PLRevGeoMapItemAdditionalPlaceInfo:
def __init__(self, area, name, placeType, dominantOrderType): def __init__(self, area, name, placeType, dominantOrderType):
self.area = area self.area = area
self.name = name self.name = normalize_unicode(name)
self.placeType = placeType self.placeType = placeType
self.dominantOrderType = dominantOrderType self.dominantOrderType = dominantOrderType
@@ -232,13 +235,13 @@ class CNPostalAddress:
_subLocality, _subLocality,
): ):
self._ISOCountryCode = _ISOCountryCode self._ISOCountryCode = _ISOCountryCode
self._city = _city self._city = normalize_unicode(_city)
self._country = _country self._country = normalize_unicode(_country)
self._postalCode = _postalCode self._postalCode = normalize_unicode(_postalCode)
self._state = _state self._state = normalize_unicode(_state)
self._street = _street self._street = normalize_unicode(_street)
self._subAdministrativeArea = _subAdministrativeArea self._subAdministrativeArea = normalize_unicode(_subAdministrativeArea)
self._subLocality = _subLocality self._subLocality = normalize_unicode(_subLocality)
def __eq__(self, other): def __eq__(self, other):
return all( return all(
@@ -414,9 +417,9 @@ class PlaceInfo4(PlaceInfo):
# 2: type # 2: type
# 3: area # 3: area
try: try:
places_dict[p[2]].append((p[1], p[3])) places_dict[p[2]].append((normalize_unicode(p[1]), p[3]))
except KeyError: except KeyError:
places_dict[p[2]] = [(p[1], p[3])] places_dict[p[2]] = [(normalize_unicode(p[1]), p[3])]
# build list to populate PlaceNames tuple # build list to populate PlaceNames tuple
# initialize with empty lists for each field in PlaceNames # initialize with empty lists for each field in PlaceNames

View File

@@ -10,6 +10,7 @@ import sqlite3
import subprocess import subprocess
import sys import sys
import tempfile import tempfile
import unicodedata
import urllib.parse import urllib.parse
from plistlib import load as plistload from plistlib import load as plistload
@@ -18,6 +19,7 @@ import CoreServices
import objc import objc
from Foundation import * from Foundation import *
from ._constants import UNICODE_FORMAT
from .fileutil import FileUtil from .fileutil import FileUtil
_DEBUG = False _DEBUG = False
@@ -352,3 +354,13 @@ def _db_is_locked(dbname):
# attr = xattr.xattr(filepath) # attr = xattr.xattr(filepath)
# uuid_bytes = bytes(uuid, 'utf-8') # uuid_bytes = bytes(uuid, 'utf-8')
# attr.set(OSXPHOTOS_XATTR_UUID, uuid_bytes) # attr.set(OSXPHOTOS_XATTR_UUID, uuid_bytes)
def normalize_unicode(value):
""" normalize unicode data """
if value is not None:
if not isinstance(value, str):
raise ValueError("value must be str")
return unicodedata.normalize(UNICODE_FORMAT, value)
else:
return None

View File

@@ -7,7 +7,7 @@
<key>hostuuid</key> <key>hostuuid</key>
<string>9575E48B-8D5F-5654-ABAC-4431B1167324</string> <string>9575E48B-8D5F-5654-ABAC-4431B1167324</string>
<key>pid</key> <key>pid</key>
<integer>1847</integer> <integer>1942</integer>
<key>processname</key> <key>processname</key>
<string>photolibraryd</string> <string>photolibraryd</string>
<key>uid</key> <key>uid</key>

View File

@@ -33,6 +33,7 @@ ALBUMS = [
"Raw", "Raw",
"I have a deleted twin", # there's an empty album with same name that has been deleted "I have a deleted twin", # there's an empty album with same name that has been deleted
"EmptyAlbum", "EmptyAlbum",
"2018-10 - Sponsion, Museum, Frühstück, Römermuseum",
] ]
KEYWORDS_DICT = { KEYWORDS_DICT = {
"Kids": 4, "Kids": 4,
@@ -53,6 +54,7 @@ ALBUM_DICT = {
"Raw": 4, "Raw": 4,
"I have a deleted twin": 1, "I have a deleted twin": 1,
"EmptyAlbum": 0, "EmptyAlbum": 0,
"2018-10 - Sponsion, Museum, Frühstück, Römermuseum": 1,
} # Note: there are 2 albums named "Test Album" for testing duplicate album names } # Note: there are 2 albums named "Test Album" for testing duplicate album names
UUID_DICT = { UUID_DICT = {

View File

@@ -58,6 +58,8 @@ CLI_EXPORT_FILENAMES = [
CLI_EXPORT_FILENAMES_ALBUM = ["Pumkins1.jpg", "Pumkins2.jpg", "Pumpkins3.jpg"] CLI_EXPORT_FILENAMES_ALBUM = ["Pumkins1.jpg", "Pumkins2.jpg", "Pumpkins3.jpg"]
CLI_EXPORT_FILENAMES_ALBUM_UNICODE = ["IMG_4547.jpg"]
CLI_EXPORT_FILENAMES_DELETED_TWIN = ["wedding.jpg", "wedding_edited.jpeg"] CLI_EXPORT_FILENAMES_DELETED_TWIN = ["wedding.jpg", "wedding_edited.jpeg"]
CLI_EXPORT_EDITED_SUFFIX = "_bearbeiten" CLI_EXPORT_EDITED_SUFFIX = "_bearbeiten"
@@ -451,7 +453,6 @@ def test_query_uuid():
"--json", "--json",
"--db", "--db",
os.path.join(cwd, CLI_PHOTOS_DB), os.path.join(cwd, CLI_PHOTOS_DB),
# "./tests/Test-10.15.1.photoslibrary",
"--uuid", "--uuid",
"D79B8D77-BFFC-460B-9312-034F2877D35B", "D79B8D77-BFFC-460B-9312-034F2877D35B",
], ],
@@ -1816,6 +1817,26 @@ def test_export_album():
files = glob.glob("*") files = glob.glob("*")
assert sorted(files) == sorted(CLI_EXPORT_FILENAMES_ALBUM) assert sorted(files) == sorted(CLI_EXPORT_FILENAMES_ALBUM)
def test_export_album_unicode_name():
"""Test export of an album with non-English characters in name """
import glob
import os
import os.path
from osxphotos.__main__ import export
runner = CliRunner()
cwd = os.getcwd()
# pylint: disable=not-context-manager
with runner.isolated_filesystem():
result = runner.invoke(
export,
[os.path.join(cwd, PHOTOS_DB_15_6), ".", "--album", "2018-10 - Sponsion, Museum, Frühstück, Römermuseum", "-V"],
)
assert result.exit_code == 0
files = glob.glob("*")
assert sorted(files) == sorted(CLI_EXPORT_FILENAMES_ALBUM_UNICODE)
def test_export_album_deleted_twin(): def test_export_album_deleted_twin():
"""Test export of an album where album of same name has been deleted """ """Test export of an album where album of same name has been deleted """