From f0d7496bc66aae291337efc570a2e2c4b9b5529c Mon Sep 17 00:00:00 2001 From: Rhet Turnbull Date: Sun, 29 Aug 2021 12:18:20 -0700 Subject: [PATCH] Fix for newlines in exif tags, #513 --- README.md | 4 +- osxphotos/_version.py | 2 +- osxphotos/exiftool.py | 64 ++++++++++++------ osxphotos/photoinfo/_photoinfo_export.py | 6 -- tests/test_catalina_10_15_7.py | 4 +- tests/test_exiftool.py | 85 ++++++++++++++++++++++-- tests/test_monterey_dev_beta_12_0_0.py | 4 +- 7 files changed, 131 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index dad8d338..5e4a755a 100644 --- a/README.md +++ b/README.md @@ -1702,7 +1702,7 @@ Substitution Description {lf} A line feed: '\n', alias for {newline} {cr} A carriage return: '\r' {crlf} a carriage return + line feed: '\r\n' -{osxphotos_version} The osxphotos version, e.g. '0.42.77' +{osxphotos_version} The osxphotos version, e.g. '0.42.78' {osxphotos_cmd_line} The full command line used to run osxphotos The following substitutions may result in multiple values. Thus if specified for @@ -3561,7 +3561,7 @@ The following template field substitutions are availabe for use the templating s |{lf}|A line feed: '\n', alias for {newline}| |{cr}|A carriage return: '\r'| |{crlf}|a carriage return + line feed: '\r\n'| -|{osxphotos_version}|The osxphotos version, e.g. '0.42.77'| +|{osxphotos_version}|The osxphotos version, e.g. '0.42.78'| |{osxphotos_cmd_line}|The full command line used to run osxphotos| |{album}|Album(s) photo is contained in| |{folder_album}|Folder path + album photo is contained in. e.g. 'Folder/Subfolder/Album' or just 'Album' if no enclosing folder| diff --git a/osxphotos/_version.py b/osxphotos/_version.py index c2978847..ecb85206 100644 --- a/osxphotos/_version.py +++ b/osxphotos/_version.py @@ -1,3 +1,3 @@ """ version info """ -__version__ = "0.42.77" +__version__ = "0.42.78" diff --git a/osxphotos/exiftool.py b/osxphotos/exiftool.py index bc43fedc..ae3675a7 100644 --- a/osxphotos/exiftool.py +++ b/osxphotos/exiftool.py @@ -7,6 +7,7 @@ pyexiftool: https://github.com/smarnach/pyexiftool which provides more functionality """ import atexit +import html import json import logging import os @@ -24,16 +25,34 @@ EXIFTOOL_STAYOPEN_EOF_LEN = len(EXIFTOOL_STAYOPEN_EOF) EXIFTOOL_PROCESSES = [] +def escape_str(s): + """escape string for use with exiftool -E""" + if type(s) != str: + return s + s = html.escape(s) + s = s.replace("\n", " ") + s = s.replace("\t", " ") + s = s.replace("\r", " ") + return s + + +def unescape_str(s): + """unescape an HTML string returned by exiftool -E""" + if type(s) != str: + return s + return html.unescape(s) + + @atexit.register def terminate_exiftool(): - """Terminate any running ExifTool subprocesses; call this to cleanup when done using ExifTool """ + """Terminate any running ExifTool subprocesses; call this to cleanup when done using ExifTool""" for proc in EXIFTOOL_PROCESSES: proc._stop_proc() @lru_cache(maxsize=1) def get_exiftool_path(): - """ return path of exiftool, cache result """ + """return path of exiftool, cache result""" exiftool_path = shutil.which("exiftool") if exiftool_path: return exiftool_path.rstrip() @@ -49,7 +68,7 @@ class _ExifToolProc: Creates a singleton object""" def __new__(cls, *args, **kwargs): - """ create new object or return instance of already created singleton """ + """create new object or return instance of already created singleton""" if not hasattr(cls, "instance") or not cls.instance: cls.instance = super().__new__(cls) @@ -74,7 +93,7 @@ class _ExifToolProc: @property def process(self): - """ return the exiftool subprocess """ + """return the exiftool subprocess""" if self._process_running: return self._process else: @@ -83,16 +102,16 @@ class _ExifToolProc: @property def pid(self): - """ return process id (PID) of the exiftool process """ + """return process id (PID) of the exiftool process""" return self._process.pid @property def exiftool(self): - """ return path to exiftool process """ + """return path to exiftool process""" return self._exiftool def _start_proc(self): - """ start exiftool in batch mode """ + """start exiftool in batch mode""" if self._process_running: logging.warning("exiftool already running: {self._process}") @@ -110,6 +129,7 @@ class _ExifToolProc: "-n", # no print conversion (e.g. print tag values in machine readable format) "-P", # Preserve file modification date/time "-G", # print group name for each tag + "-E", # escape tag values for HTML (allows use of HTML for newlines) ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, @@ -120,7 +140,7 @@ class _ExifToolProc: EXIFTOOL_PROCESSES.append(self) def _stop_proc(self): - """ stop the exiftool process if it's running, otherwise, do nothing """ + """stop the exiftool process if it's running, otherwise, do nothing""" if not self._process_running: return @@ -143,7 +163,7 @@ class _ExifToolProc: class ExifTool: - """ Basic exiftool interface for reading and writing EXIF tags """ + """Basic exiftool interface for reading and writing EXIF tags""" def __init__(self, filepath, exiftool=None, overwrite=True, flags=None): """Create ExifTool object @@ -189,6 +209,7 @@ class ExifTool: if value is None: value = "" + value = escape_str(value) command = [f"-{tag}={value}"] if self.overwrite and not self._context_mgr: command.append("-overwrite_original") @@ -233,6 +254,7 @@ class ExifTool: for value in values: if value is None: raise ValueError("Can't add None value to tag") + value = escape_str(value) command.append(f"-{tag}+={value}") if self.overwrite and not self._context_mgr: @@ -315,12 +337,12 @@ class ExifTool: @property def pid(self): - """ return process id (PID) of the exiftool process """ + """return process id (PID) of the exiftool process""" return self._process.pid @property def version(self): - """ returns exiftool version """ + """returns exiftool version""" ver, _, _ = self.run_commands("-ver", no_file=True) return ver.decode("utf-8") @@ -335,6 +357,7 @@ class ExifTool: json_str, _, _ = self.run_commands("-json") if not json_str: return dict() + json_str = unescape_str(json_str.decode("utf-8")) try: exifdict = json.loads(json_str) @@ -342,7 +365,6 @@ class ExifTool: # will fail with some commands, e.g --ext AVI which produces # 'No file with specified extension' instead of json return dict() - exifdict = exifdict[0] if not tag_groups: # strip tag groups @@ -358,12 +380,13 @@ class ExifTool: return exifdict def json(self): - """ returns JSON string containing all EXIF tags and values from exiftool """ + """returns JSON string containing all EXIF tags and values from exiftool""" json, _, _ = self.run_commands("-json") + json = unescape_str(json.decode("utf-8")) return json def _read_exif(self): - """ read exif data from file """ + """read exif data from file""" data = self.asdict() self.data = {k: v for k, v in data.items()} @@ -384,15 +407,15 @@ class ExifTool: class ExifToolCaching(ExifTool): - """ Basic exiftool interface for reading and writing EXIF tags, with caching. - Use this only when you know the file's EXIF data will not be changed by any external process. - - Creates a singleton cached ExifTool instance """ + """Basic exiftool interface for reading and writing EXIF tags, with caching. + Use this only when you know the file's EXIF data will not be changed by any external process. + + Creates a singleton cached ExifTool instance""" _singletons = {} def __new__(cls, filepath, exiftool=None): - """ create new object or return instance of already created singleton """ + """create new object or return instance of already created singleton""" if filepath not in cls._singletons: cls._singletons[filepath] = _ExifToolCaching(filepath, exiftool=exiftool) return cls._singletons[filepath] @@ -448,7 +471,6 @@ class _ExifToolCaching(ExifTool): return self._asdict_cache[tag_groups][normalized] def flush_cache(self): - """ Clear cached data so that calls to json or asdict return fresh data """ + """Clear cached data so that calls to json or asdict return fresh data""" self._json_cache = None self._asdict_cache = {} - diff --git a/osxphotos/photoinfo/_photoinfo_export.py b/osxphotos/photoinfo/_photoinfo_export.py index f546b3b1..7cad5f11 100644 --- a/osxphotos/photoinfo/_photoinfo_export.py +++ b/osxphotos/photoinfo/_photoinfo_export.py @@ -1871,12 +1871,6 @@ def _exiftool_dict( self.date_modified ).strftime("%Y:%m:%d %H:%M:%S") - # remove any new lines in any fields - for field, val in exif.items(): - if type(val) == str: - exif[field] = val.replace("\n", " ") - elif type(val) == list: - exif[field] = [str(v).replace("\n", " ") for v in val if v is not None] return exif diff --git a/tests/test_catalina_10_15_7.py b/tests/test_catalina_10_15_7.py index 22dd2e11..d5232d1c 100644 --- a/tests/test_catalina_10_15_7.py +++ b/tests/test_catalina_10_15_7.py @@ -1376,12 +1376,12 @@ def test_no_adjustments(photosdb): def test_exiftool_newlines_in_description(photosdb): - """Test that exiftool code removes newlines embedded in description, issue #393""" + """Test that exiftool handles newlines embedded in description, issue #393""" photo = photosdb.get_photo(UUID_DICT["description_newlines"]) exif = photo._exiftool_dict() assert photo.description.find("\n") > 0 - assert exif["EXIF:ImageDescription"].find("\n") == -1 + assert exif["EXIF:ImageDescription"].find("\n") > 0 @pytest.mark.skip(SKIP_TEST, reason="Not yet implemented") diff --git a/tests/test_exiftool.py b/tests/test_exiftool.py index d2835571..1ec3c362 100644 --- a/tests/test_exiftool.py +++ b/tests/test_exiftool.py @@ -141,6 +141,44 @@ def test_setvalue_1(): assert exif.data["IPTC:Keywords"] == "test" +def test_setvalue_multiline(): + # test setting a tag value with embedded newline + import os.path + import tempfile + import osxphotos.exiftool + from osxphotos.fileutil import FileUtil + + tempdir = tempfile.TemporaryDirectory(prefix="osxphotos_") + tempfile = os.path.join(tempdir.name, os.path.basename(TEST_FILE_ONE_KEYWORD)) + FileUtil.copy(TEST_FILE_ONE_KEYWORD, tempfile) + + exif = osxphotos.exiftool.ExifTool(tempfile) + exif.setvalue("EXIF:ImageDescription", "multi\nline") + assert not exif.error + + exif._read_exif() + assert exif.data["EXIF:ImageDescription"] == "multi\nline" + + +def test_setvalue_non_alphanumeric_chars(): + # test setting a tag value non-alphanumeric characters + import os.path + import tempfile + import osxphotos.exiftool + from osxphotos.fileutil import FileUtil + + tempdir = tempfile.TemporaryDirectory(prefix="osxphotos_") + tempfile = os.path.join(tempdir.name, os.path.basename(TEST_FILE_ONE_KEYWORD)) + FileUtil.copy(TEST_FILE_ONE_KEYWORD, tempfile) + + exif = osxphotos.exiftool.ExifTool(tempfile) + exif.setvalue("EXIF:ImageDescription", "{world}$bye#foo%bar") + assert not exif.error + + exif._read_exif() + assert exif.data["EXIF:ImageDescription"] == "{world}$bye#foo%bar" + + def test_setvalue_warning(): # test setting illegal tag value generates warning import os.path @@ -311,6 +349,45 @@ def test_addvalues_2(): assert sorted(exif.data["IPTC:Keywords"]) == sorted(test_multi) +def test_addvalues_non_alphanumeric_multiline(): + # test setting a tag value + import os.path + import tempfile + import osxphotos.exiftool + from osxphotos.fileutil import FileUtil + + tempdir = tempfile.TemporaryDirectory(prefix="osxphotos_") + tempfile = os.path.join(tempdir.name, os.path.basename(TEST_FILE_ONE_KEYWORD)) + FileUtil.copy(TEST_FILE_ONE_KEYWORD, tempfile) + + exif = osxphotos.exiftool.ExifTool(tempfile) + exif.addvalues("IPTC:Keywords", "multi\nline", "\t{bar}") + assert not exif.error + exif._read_exif() + assert sorted(exif.data["IPTC:Keywords"]) == sorted( + ["wedding", "multi\nline", "\t{bar}"] + ) + + +def test_addvalues_unicode(): + # test setting a tag value with unicode + import os.path + import tempfile + import osxphotos.exiftool + from osxphotos.fileutil import FileUtil + + tempdir = tempfile.TemporaryDirectory(prefix="osxphotos_") + tempfile = os.path.join(tempdir.name, os.path.basename(TEST_FILE_ONE_KEYWORD)) + FileUtil.copy(TEST_FILE_ONE_KEYWORD, tempfile) + + exif = osxphotos.exiftool.ExifTool(tempfile) + exif.setvalue("IPTC:Keywords", None) + exif.addvalues("IPTC:Keywords", "ǂ", "Ƕ") + assert not exif.error + exif._read_exif() + assert sorted(exif.data["IPTC:Keywords"]) == sorted(["ǂ", "Ƕ"]) + + def test_singleton(): import osxphotos.exiftool @@ -384,7 +461,7 @@ def test_str(): def test_photoinfo_exiftool(): - """ test PhotoInfo.exiftool which returns ExifTool object for photo """ + """test PhotoInfo.exiftool which returns ExifTool object for photo""" import osxphotos photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB) @@ -397,7 +474,7 @@ def test_photoinfo_exiftool(): def test_photoinfo_exiftool_no_groups(): - """ test PhotoInfo.exiftool which returns ExifTool object for photo without tag group names""" + """test PhotoInfo.exiftool which returns ExifTool object for photo without tag group names""" import osxphotos photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB) @@ -420,7 +497,7 @@ def test_photoinfo_exiftool_none(): def test_exiftool_terminate(): - """ Test that exiftool process is terminated when exiftool.terminate() is called """ + """Test that exiftool process is terminated when exiftool.terminate() is called""" import osxphotos.exiftool import subprocess @@ -435,7 +512,7 @@ def test_exiftool_terminate(): ps = subprocess.run(["ps"], capture_output=True) stdout = ps.stdout.decode("utf-8") assert "exiftool -stay_open" not in stdout - + # verify we can create a new instance after termination exif2 = osxphotos.exiftool.ExifTool(TEST_FILE_ONE_KEYWORD) assert exif2.asdict()["IPTC:Keywords"] == "wedding" diff --git a/tests/test_monterey_dev_beta_12_0_0.py b/tests/test_monterey_dev_beta_12_0_0.py index 73b9e5ee..0f2e7154 100644 --- a/tests/test_monterey_dev_beta_12_0_0.py +++ b/tests/test_monterey_dev_beta_12_0_0.py @@ -1346,12 +1346,12 @@ def test_no_adjustments(photosdb): def test_exiftool_newlines_in_description(photosdb): - """Test that exiftool code removes newlines embedded in description, issue #393""" + """Test that exiftool handles newlines embedded in description, issue #393""" photo = photosdb.get_photo(UUID_DICT["description_newlines"]) exif = photo._exiftool_dict() assert photo.description.find("\n") > 0 - assert exif["EXIF:ImageDescription"].find("\n") == -1 + assert exif["EXIF:ImageDescription"].find("\n") > 0 @pytest.mark.skip(SKIP_TEST, reason="Not yet implemented")