Normalize unicode for issue #208
This commit is contained in:
@@ -10,6 +10,7 @@ import pathlib
|
||||
import pprint
|
||||
import sys
|
||||
import time
|
||||
import unicodedata
|
||||
|
||||
import click
|
||||
import yaml
|
||||
@@ -22,7 +23,7 @@ from pathvalidate import (
|
||||
|
||||
import osxphotos
|
||||
|
||||
from ._constants import _EXIF_TOOL_URL, _PHOTOS_4_VERSION, _UNKNOWN_PLACE
|
||||
from ._constants import _EXIF_TOOL_URL, _PHOTOS_4_VERSION, _UNKNOWN_PLACE, UNICODE_FORMAT
|
||||
from ._export_db import ExportDB, ExportDBInMemory
|
||||
from ._version import __version__
|
||||
from .datetime_formatter import DateTimeFormatter
|
||||
@@ -40,9 +41,21 @@ OSXPHOTOS_EXPORT_DB = ".osxphotos_export.db"
|
||||
|
||||
|
||||
def verbose(*args, **kwargs):
|
||||
""" print output if verbose flag set """
|
||||
if VERBOSE:
|
||||
click.echo(*args, **kwargs)
|
||||
|
||||
def normalize_unicode(value):
|
||||
""" normalize unicode data """
|
||||
if value is not None:
|
||||
if isinstance(value, tuple):
|
||||
return tuple(unicodedata.normalize(UNICODE_FORMAT, v) for v in value)
|
||||
elif isinstance(value, str):
|
||||
return unicodedata.normalize(UNICODE_FORMAT, value)
|
||||
else:
|
||||
return value
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_photos_db(*db_options):
|
||||
""" Return path to photos db, select first non-None db_options
|
||||
@@ -1863,6 +1876,15 @@ def _query(
|
||||
to_date=to_date,
|
||||
)
|
||||
|
||||
person = normalize_unicode(person)
|
||||
keyword = normalize_unicode(keyword)
|
||||
album = normalize_unicode(album)
|
||||
folder = normalize_unicode(folder)
|
||||
title = normalize_unicode(title)
|
||||
description = normalize_unicode(description)
|
||||
place = normalize_unicode(place)
|
||||
label = normalize_unicode(label)
|
||||
|
||||
if album:
|
||||
photos = get_photos_by_attribute(photos, "albums", album, ignore_case)
|
||||
|
||||
|
||||
@@ -9,6 +9,9 @@ from datetime import datetime
|
||||
# Apple Epoch is Jan 1, 2001
|
||||
TIME_DELTA = (datetime(2001, 1, 1, 0, 0) - datetime(1970, 1, 1, 0, 0)).total_seconds()
|
||||
|
||||
# Unicode format to use for comparing strings
|
||||
UNICODE_FORMAT = "NFC"
|
||||
|
||||
# which Photos library database versions have been tested
|
||||
# Photos 2.0 (10.12.6) == 2622
|
||||
# Photos 3.0 (10.13.6) == 3301
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
""" version info """
|
||||
|
||||
__version__ = "0.33.6"
|
||||
__version__ = "0.33.7"
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
import logging
|
||||
|
||||
from .._constants import _DB_TABLE_NAMES, _PHOTOS_4_VERSION
|
||||
from ..utils import _open_sql_file
|
||||
from ..utils import _open_sql_file, normalize_unicode
|
||||
from .photosdb_utils import get_db_version
|
||||
|
||||
|
||||
@@ -121,7 +121,7 @@ def _process_faceinfo_4(photosdb):
|
||||
face["asset_uuid"] = asset_uuid
|
||||
face["uuid"] = row[2]
|
||||
face["person"] = person_id
|
||||
face["fullname"] = row[3]
|
||||
face["fullname"] = normalize_unicode(row[3])
|
||||
face["sourcewidth"] = row[7]
|
||||
face["sourceheight"] = row[8]
|
||||
face["centerx"] = row[9]
|
||||
@@ -282,7 +282,7 @@ def _process_faceinfo_5(photosdb):
|
||||
face["asset_uuid"] = asset_uuid
|
||||
face["uuid"] = row[2]
|
||||
face["person"] = person_pk
|
||||
face["fullname"] = row[4]
|
||||
face["fullname"] = normalize_unicode(row[4])
|
||||
face["agetype"] = row[5]
|
||||
face["baldtype"] = row[6]
|
||||
face["eyemakeuptype"] = row[7]
|
||||
|
||||
@@ -10,7 +10,7 @@ import uuid as uuidlib
|
||||
from pprint import pformat
|
||||
|
||||
from .._constants import _PHOTOS_4_VERSION, SEARCH_CATEGORY_LABEL
|
||||
from ..utils import _db_is_locked, _debug, _open_sql_file
|
||||
from ..utils import _db_is_locked, _debug, _open_sql_file, normalize_unicode
|
||||
|
||||
"""
|
||||
This module should be imported in the class defintion of PhotosDB in photosdb.py
|
||||
@@ -112,8 +112,8 @@ def _process_searchinfo(self):
|
||||
record["groupid"] = row[3]
|
||||
record["category"] = row[4]
|
||||
record["owning_groupid"] = row[5]
|
||||
record["content_string"] = row[6].replace("\x00", "")
|
||||
record["normalized_string"] = row[7].replace("\x00", "")
|
||||
record["content_string"] = normalize_unicode(row[6].replace("\x00", ""))
|
||||
record["normalized_string"] = normalize_unicode(row[7].replace("\x00", ""))
|
||||
record["lookup_identifier"] = row[8]
|
||||
|
||||
try:
|
||||
@@ -147,9 +147,10 @@ def _process_searchinfo(self):
|
||||
"_db_searchinfo_labels_normalized: \n"
|
||||
+ pformat(self._db_searchinfo_labels_normalized)
|
||||
)
|
||||
|
||||
|
||||
conn.close()
|
||||
|
||||
|
||||
@property
|
||||
def labels(self):
|
||||
""" return list of all search info labels found in the library """
|
||||
|
||||
@@ -44,6 +44,7 @@ from ..utils import (
|
||||
_get_os_version,
|
||||
_open_sql_file,
|
||||
get_last_library_path,
|
||||
normalize_unicode,
|
||||
)
|
||||
from .photosdb_utils import get_db_model_version, get_db_version
|
||||
|
||||
@@ -713,7 +714,7 @@ class PhotosDB:
|
||||
for album in c:
|
||||
self._dbalbum_details[album[0]] = {
|
||||
"_uuid": album[0],
|
||||
"title": album[1],
|
||||
"title": normalize_unicode(album[1]),
|
||||
"cloudlibrarystate": album[2],
|
||||
"cloudidentifier": album[3],
|
||||
"intrash": False if album[4] == 0 else True,
|
||||
@@ -760,7 +761,7 @@ class PhotosDB:
|
||||
self._dbfolder_details[uuid] = {
|
||||
"_uuid": row[0],
|
||||
"modelId": row[1],
|
||||
"name": row[2],
|
||||
"name": normalize_unicode(row[2]),
|
||||
"isMagic": row[3],
|
||||
"intrash": row[4],
|
||||
"folderType": row[5],
|
||||
@@ -963,7 +964,7 @@ class PhotosDB:
|
||||
self._dbphotos[uuid]["volumeId"] = row[10]
|
||||
self._dbphotos[uuid]["imagePath"] = row[11]
|
||||
self._dbphotos[uuid]["extendedDescription"] = row[12]
|
||||
self._dbphotos[uuid]["name"] = row[13]
|
||||
self._dbphotos[uuid]["name"] = normalize_unicode(row[13])
|
||||
self._dbphotos[uuid]["isMissing"] = row[14]
|
||||
self._dbphotos[uuid]["originalFilename"] = row[15]
|
||||
self._dbphotos[uuid]["favorite"] = row[16]
|
||||
@@ -1608,7 +1609,7 @@ class PhotosDB:
|
||||
for album in c:
|
||||
self._dbalbum_details[album[0]] = {
|
||||
"_uuid": album[0],
|
||||
"title": album[1],
|
||||
"title": normalize_unicode(album[1]),
|
||||
"cloudlocalstate": album[2],
|
||||
"cloudownerfirstname": album[3],
|
||||
"cloudownderlastname": album[4],
|
||||
@@ -1683,12 +1684,13 @@ class PhotosDB:
|
||||
JOIN ZKEYWORD ON ZKEYWORD.Z_PK = {keyword_join} """
|
||||
)
|
||||
for keyword in c:
|
||||
keyword_title = normalize_unicode(keyword[0])
|
||||
if not keyword[1] in self._dbkeywords_uuid:
|
||||
self._dbkeywords_uuid[keyword[1]] = []
|
||||
if not keyword[0] in self._dbkeywords_keyword:
|
||||
self._dbkeywords_keyword[keyword[0]] = []
|
||||
if not keyword_title in self._dbkeywords_keyword:
|
||||
self._dbkeywords_keyword[keyword_title] = []
|
||||
self._dbkeywords_uuid[keyword[1]].append(keyword[0])
|
||||
self._dbkeywords_keyword[keyword[0]].append(keyword[1])
|
||||
self._dbkeywords_keyword[keyword_title].append(keyword[1])
|
||||
|
||||
if _debug():
|
||||
logging.debug(f"Finished walking through keywords")
|
||||
@@ -1795,7 +1797,7 @@ class PhotosDB:
|
||||
info["modelID"] = None
|
||||
info["masterUuid"] = None
|
||||
info["masterFingerprint"] = row[1]
|
||||
info["name"] = row[2]
|
||||
info["name"] = normalize_unicode(row[2])
|
||||
|
||||
# There are sometimes negative values for lastmodifieddate in the database
|
||||
# I don't know what these mean but they will raise exception in datetime if
|
||||
@@ -2027,7 +2029,7 @@ class PhotosDB:
|
||||
for row in c:
|
||||
uuid = row[0]
|
||||
if uuid in self._dbphotos:
|
||||
self._dbphotos[uuid]["extendedDescription"] = row[1]
|
||||
self._dbphotos[uuid]["extendedDescription"] = normalize_unicode(row[1])
|
||||
else:
|
||||
if _debug():
|
||||
logging.debug(
|
||||
|
||||
@@ -11,6 +11,9 @@ from collections import namedtuple # pylint: disable=syntax-error
|
||||
import yaml
|
||||
from bpylist import archiver
|
||||
|
||||
from ._constants import UNICODE_FORMAT
|
||||
from .utils import normalize_unicode
|
||||
|
||||
# postal address information, returned by PlaceInfo.address
|
||||
PostalAddress = namedtuple(
|
||||
"PostalAddress",
|
||||
@@ -76,12 +79,12 @@ class PLRevGeoLocationInfo:
|
||||
geoServiceProvider,
|
||||
postalAddress,
|
||||
):
|
||||
self.addressString = addressString
|
||||
self.addressString = normalize_unicode(addressString)
|
||||
self.countryCode = countryCode
|
||||
self.mapItem = mapItem
|
||||
self.isHome = isHome
|
||||
self.compoundNames = compoundNames
|
||||
self.compoundSecondaryNames = compoundSecondaryNames
|
||||
self.compoundNames = normalize_unicode(compoundNames)
|
||||
self.compoundSecondaryNames = normalize_unicode(compoundSecondaryNames)
|
||||
self.version = version
|
||||
self.geoServiceProvider = geoServiceProvider
|
||||
self.postalAddress = postalAddress
|
||||
@@ -183,7 +186,7 @@ class PLRevGeoMapItemAdditionalPlaceInfo:
|
||||
|
||||
def __init__(self, area, name, placeType, dominantOrderType):
|
||||
self.area = area
|
||||
self.name = name
|
||||
self.name = normalize_unicode(name)
|
||||
self.placeType = placeType
|
||||
self.dominantOrderType = dominantOrderType
|
||||
|
||||
@@ -232,13 +235,13 @@ class CNPostalAddress:
|
||||
_subLocality,
|
||||
):
|
||||
self._ISOCountryCode = _ISOCountryCode
|
||||
self._city = _city
|
||||
self._country = _country
|
||||
self._postalCode = _postalCode
|
||||
self._state = _state
|
||||
self._street = _street
|
||||
self._subAdministrativeArea = _subAdministrativeArea
|
||||
self._subLocality = _subLocality
|
||||
self._city = normalize_unicode(_city)
|
||||
self._country = normalize_unicode(_country)
|
||||
self._postalCode = normalize_unicode(_postalCode)
|
||||
self._state = normalize_unicode(_state)
|
||||
self._street = normalize_unicode(_street)
|
||||
self._subAdministrativeArea = normalize_unicode(_subAdministrativeArea)
|
||||
self._subLocality = normalize_unicode(_subLocality)
|
||||
|
||||
def __eq__(self, other):
|
||||
return all(
|
||||
@@ -414,9 +417,9 @@ class PlaceInfo4(PlaceInfo):
|
||||
# 2: type
|
||||
# 3: area
|
||||
try:
|
||||
places_dict[p[2]].append((p[1], p[3]))
|
||||
places_dict[p[2]].append((normalize_unicode(p[1]), p[3]))
|
||||
except KeyError:
|
||||
places_dict[p[2]] = [(p[1], p[3])]
|
||||
places_dict[p[2]] = [(normalize_unicode(p[1]), p[3])]
|
||||
|
||||
# build list to populate PlaceNames tuple
|
||||
# initialize with empty lists for each field in PlaceNames
|
||||
|
||||
@@ -10,6 +10,7 @@ import sqlite3
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import unicodedata
|
||||
import urllib.parse
|
||||
from plistlib import load as plistload
|
||||
|
||||
@@ -18,6 +19,7 @@ import CoreServices
|
||||
import objc
|
||||
from Foundation import *
|
||||
|
||||
from ._constants import UNICODE_FORMAT
|
||||
from .fileutil import FileUtil
|
||||
|
||||
_DEBUG = False
|
||||
@@ -352,3 +354,13 @@ def _db_is_locked(dbname):
|
||||
# attr = xattr.xattr(filepath)
|
||||
# uuid_bytes = bytes(uuid, 'utf-8')
|
||||
# attr.set(OSXPHOTOS_XATTR_UUID, uuid_bytes)
|
||||
|
||||
|
||||
def normalize_unicode(value):
|
||||
""" normalize unicode data """
|
||||
if value is not None:
|
||||
if not isinstance(value, str):
|
||||
raise ValueError("value must be str")
|
||||
return unicodedata.normalize(UNICODE_FORMAT, value)
|
||||
else:
|
||||
return None
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -7,7 +7,7 @@
|
||||
<key>hostuuid</key>
|
||||
<string>9575E48B-8D5F-5654-ABAC-4431B1167324</string>
|
||||
<key>pid</key>
|
||||
<integer>1847</integer>
|
||||
<integer>1942</integer>
|
||||
<key>processname</key>
|
||||
<string>photolibraryd</string>
|
||||
<key>uid</key>
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -33,6 +33,7 @@ ALBUMS = [
|
||||
"Raw",
|
||||
"I have a deleted twin", # there's an empty album with same name that has been deleted
|
||||
"EmptyAlbum",
|
||||
"2018-10 - Sponsion, Museum, Frühstück, Römermuseum",
|
||||
]
|
||||
KEYWORDS_DICT = {
|
||||
"Kids": 4,
|
||||
@@ -53,6 +54,7 @@ ALBUM_DICT = {
|
||||
"Raw": 4,
|
||||
"I have a deleted twin": 1,
|
||||
"EmptyAlbum": 0,
|
||||
"2018-10 - Sponsion, Museum, Frühstück, Römermuseum": 1,
|
||||
} # Note: there are 2 albums named "Test Album" for testing duplicate album names
|
||||
|
||||
UUID_DICT = {
|
||||
|
||||
@@ -58,6 +58,8 @@ CLI_EXPORT_FILENAMES = [
|
||||
|
||||
CLI_EXPORT_FILENAMES_ALBUM = ["Pumkins1.jpg", "Pumkins2.jpg", "Pumpkins3.jpg"]
|
||||
|
||||
CLI_EXPORT_FILENAMES_ALBUM_UNICODE = ["IMG_4547.jpg"]
|
||||
|
||||
CLI_EXPORT_FILENAMES_DELETED_TWIN = ["wedding.jpg", "wedding_edited.jpeg"]
|
||||
|
||||
CLI_EXPORT_EDITED_SUFFIX = "_bearbeiten"
|
||||
@@ -451,7 +453,6 @@ def test_query_uuid():
|
||||
"--json",
|
||||
"--db",
|
||||
os.path.join(cwd, CLI_PHOTOS_DB),
|
||||
# "./tests/Test-10.15.1.photoslibrary",
|
||||
"--uuid",
|
||||
"D79B8D77-BFFC-460B-9312-034F2877D35B",
|
||||
],
|
||||
@@ -1816,6 +1817,26 @@ def test_export_album():
|
||||
files = glob.glob("*")
|
||||
assert sorted(files) == sorted(CLI_EXPORT_FILENAMES_ALBUM)
|
||||
|
||||
def test_export_album_unicode_name():
|
||||
"""Test export of an album with non-English characters in name """
|
||||
import glob
|
||||
import os
|
||||
import os.path
|
||||
from osxphotos.__main__ import export
|
||||
|
||||
runner = CliRunner()
|
||||
cwd = os.getcwd()
|
||||
# pylint: disable=not-context-manager
|
||||
with runner.isolated_filesystem():
|
||||
result = runner.invoke(
|
||||
export,
|
||||
[os.path.join(cwd, PHOTOS_DB_15_6), ".", "--album", "2018-10 - Sponsion, Museum, Frühstück, Römermuseum", "-V"],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
files = glob.glob("*")
|
||||
assert sorted(files) == sorted(CLI_EXPORT_FILENAMES_ALBUM_UNICODE)
|
||||
|
||||
|
||||
|
||||
def test_export_album_deleted_twin():
|
||||
"""Test export of an album where album of same name has been deleted """
|
||||
|
||||
Reference in New Issue
Block a user