Fix for newlines in exif tags, #513

This commit is contained in:
Rhet Turnbull
2021-08-29 12:18:20 -07:00
parent 8e2b768236
commit f0d7496bc6
7 changed files with 131 additions and 38 deletions

View File

@@ -1702,7 +1702,7 @@ Substitution Description
{lf} A line feed: '\n', alias for {newline}
{cr} A carriage return: '\r'
{crlf} a carriage return + line feed: '\r\n'
{osxphotos_version} The osxphotos version, e.g. '0.42.77'
{osxphotos_version} The osxphotos version, e.g. '0.42.78'
{osxphotos_cmd_line} The full command line used to run osxphotos
The following substitutions may result in multiple values. Thus if specified for
@@ -3561,7 +3561,7 @@ The following template field substitutions are availabe for use the templating s
|{lf}|A line feed: '\n', alias for {newline}|
|{cr}|A carriage return: '\r'|
|{crlf}|a carriage return + line feed: '\r\n'|
|{osxphotos_version}|The osxphotos version, e.g. '0.42.77'|
|{osxphotos_version}|The osxphotos version, e.g. '0.42.78'|
|{osxphotos_cmd_line}|The full command line used to run osxphotos|
|{album}|Album(s) photo is contained in|
|{folder_album}|Folder path + album photo is contained in. e.g. 'Folder/Subfolder/Album' or just 'Album' if no enclosing folder|

View File

@@ -1,3 +1,3 @@
""" version info """
__version__ = "0.42.77"
__version__ = "0.42.78"

View File

@@ -7,6 +7,7 @@
pyexiftool: https://github.com/smarnach/pyexiftool which provides more functionality """
import atexit
import html
import json
import logging
import os
@@ -24,16 +25,34 @@ EXIFTOOL_STAYOPEN_EOF_LEN = len(EXIFTOOL_STAYOPEN_EOF)
EXIFTOOL_PROCESSES = []
def escape_str(s):
"""escape string for use with exiftool -E"""
if type(s) != str:
return s
s = html.escape(s)
s = s.replace("\n", "
")
s = s.replace("\t", "	")
s = s.replace("\r", "
")
return s
def unescape_str(s):
"""unescape an HTML string returned by exiftool -E"""
if type(s) != str:
return s
return html.unescape(s)
@atexit.register
def terminate_exiftool():
"""Terminate any running ExifTool subprocesses; call this to cleanup when done using ExifTool """
"""Terminate any running ExifTool subprocesses; call this to cleanup when done using ExifTool"""
for proc in EXIFTOOL_PROCESSES:
proc._stop_proc()
@lru_cache(maxsize=1)
def get_exiftool_path():
""" return path of exiftool, cache result """
"""return path of exiftool, cache result"""
exiftool_path = shutil.which("exiftool")
if exiftool_path:
return exiftool_path.rstrip()
@@ -49,7 +68,7 @@ class _ExifToolProc:
Creates a singleton object"""
def __new__(cls, *args, **kwargs):
""" create new object or return instance of already created singleton """
"""create new object or return instance of already created singleton"""
if not hasattr(cls, "instance") or not cls.instance:
cls.instance = super().__new__(cls)
@@ -74,7 +93,7 @@ class _ExifToolProc:
@property
def process(self):
""" return the exiftool subprocess """
"""return the exiftool subprocess"""
if self._process_running:
return self._process
else:
@@ -83,16 +102,16 @@ class _ExifToolProc:
@property
def pid(self):
""" return process id (PID) of the exiftool process """
"""return process id (PID) of the exiftool process"""
return self._process.pid
@property
def exiftool(self):
""" return path to exiftool process """
"""return path to exiftool process"""
return self._exiftool
def _start_proc(self):
""" start exiftool in batch mode """
"""start exiftool in batch mode"""
if self._process_running:
logging.warning("exiftool already running: {self._process}")
@@ -110,6 +129,7 @@ class _ExifToolProc:
"-n", # no print conversion (e.g. print tag values in machine readable format)
"-P", # Preserve file modification date/time
"-G", # print group name for each tag
"-E", # escape tag values for HTML (allows use of HTML 
 for newlines)
],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
@@ -120,7 +140,7 @@ class _ExifToolProc:
EXIFTOOL_PROCESSES.append(self)
def _stop_proc(self):
""" stop the exiftool process if it's running, otherwise, do nothing """
"""stop the exiftool process if it's running, otherwise, do nothing"""
if not self._process_running:
return
@@ -143,7 +163,7 @@ class _ExifToolProc:
class ExifTool:
""" Basic exiftool interface for reading and writing EXIF tags """
"""Basic exiftool interface for reading and writing EXIF tags"""
def __init__(self, filepath, exiftool=None, overwrite=True, flags=None):
"""Create ExifTool object
@@ -189,6 +209,7 @@ class ExifTool:
if value is None:
value = ""
value = escape_str(value)
command = [f"-{tag}={value}"]
if self.overwrite and not self._context_mgr:
command.append("-overwrite_original")
@@ -233,6 +254,7 @@ class ExifTool:
for value in values:
if value is None:
raise ValueError("Can't add None value to tag")
value = escape_str(value)
command.append(f"-{tag}+={value}")
if self.overwrite and not self._context_mgr:
@@ -315,12 +337,12 @@ class ExifTool:
@property
def pid(self):
""" return process id (PID) of the exiftool process """
"""return process id (PID) of the exiftool process"""
return self._process.pid
@property
def version(self):
""" returns exiftool version """
"""returns exiftool version"""
ver, _, _ = self.run_commands("-ver", no_file=True)
return ver.decode("utf-8")
@@ -335,6 +357,7 @@ class ExifTool:
json_str, _, _ = self.run_commands("-json")
if not json_str:
return dict()
json_str = unescape_str(json_str.decode("utf-8"))
try:
exifdict = json.loads(json_str)
@@ -342,7 +365,6 @@ class ExifTool:
# will fail with some commands, e.g --ext AVI which produces
# 'No file with specified extension' instead of json
return dict()
exifdict = exifdict[0]
if not tag_groups:
# strip tag groups
@@ -358,12 +380,13 @@ class ExifTool:
return exifdict
def json(self):
""" returns JSON string containing all EXIF tags and values from exiftool """
"""returns JSON string containing all EXIF tags and values from exiftool"""
json, _, _ = self.run_commands("-json")
json = unescape_str(json.decode("utf-8"))
return json
def _read_exif(self):
""" read exif data from file """
"""read exif data from file"""
data = self.asdict()
self.data = {k: v for k, v in data.items()}
@@ -384,15 +407,15 @@ class ExifTool:
class ExifToolCaching(ExifTool):
""" Basic exiftool interface for reading and writing EXIF tags, with caching.
Use this only when you know the file's EXIF data will not be changed by any external process.
Creates a singleton cached ExifTool instance """
"""Basic exiftool interface for reading and writing EXIF tags, with caching.
Use this only when you know the file's EXIF data will not be changed by any external process.
Creates a singleton cached ExifTool instance"""
_singletons = {}
def __new__(cls, filepath, exiftool=None):
""" create new object or return instance of already created singleton """
"""create new object or return instance of already created singleton"""
if filepath not in cls._singletons:
cls._singletons[filepath] = _ExifToolCaching(filepath, exiftool=exiftool)
return cls._singletons[filepath]
@@ -448,7 +471,6 @@ class _ExifToolCaching(ExifTool):
return self._asdict_cache[tag_groups][normalized]
def flush_cache(self):
""" Clear cached data so that calls to json or asdict return fresh data """
"""Clear cached data so that calls to json or asdict return fresh data"""
self._json_cache = None
self._asdict_cache = {}

View File

@@ -1871,12 +1871,6 @@ def _exiftool_dict(
self.date_modified
).strftime("%Y:%m:%d %H:%M:%S")
# remove any new lines in any fields
for field, val in exif.items():
if type(val) == str:
exif[field] = val.replace("\n", " ")
elif type(val) == list:
exif[field] = [str(v).replace("\n", " ") for v in val if v is not None]
return exif

View File

@@ -1376,12 +1376,12 @@ def test_no_adjustments(photosdb):
def test_exiftool_newlines_in_description(photosdb):
"""Test that exiftool code removes newlines embedded in description, issue #393"""
"""Test that exiftool handles newlines embedded in description, issue #393"""
photo = photosdb.get_photo(UUID_DICT["description_newlines"])
exif = photo._exiftool_dict()
assert photo.description.find("\n") > 0
assert exif["EXIF:ImageDescription"].find("\n") == -1
assert exif["EXIF:ImageDescription"].find("\n") > 0
@pytest.mark.skip(SKIP_TEST, reason="Not yet implemented")

View File

@@ -141,6 +141,44 @@ def test_setvalue_1():
assert exif.data["IPTC:Keywords"] == "test"
def test_setvalue_multiline():
# test setting a tag value with embedded newline
import os.path
import tempfile
import osxphotos.exiftool
from osxphotos.fileutil import FileUtil
tempdir = tempfile.TemporaryDirectory(prefix="osxphotos_")
tempfile = os.path.join(tempdir.name, os.path.basename(TEST_FILE_ONE_KEYWORD))
FileUtil.copy(TEST_FILE_ONE_KEYWORD, tempfile)
exif = osxphotos.exiftool.ExifTool(tempfile)
exif.setvalue("EXIF:ImageDescription", "multi\nline")
assert not exif.error
exif._read_exif()
assert exif.data["EXIF:ImageDescription"] == "multi\nline"
def test_setvalue_non_alphanumeric_chars():
# test setting a tag value non-alphanumeric characters
import os.path
import tempfile
import osxphotos.exiftool
from osxphotos.fileutil import FileUtil
tempdir = tempfile.TemporaryDirectory(prefix="osxphotos_")
tempfile = os.path.join(tempdir.name, os.path.basename(TEST_FILE_ONE_KEYWORD))
FileUtil.copy(TEST_FILE_ONE_KEYWORD, tempfile)
exif = osxphotos.exiftool.ExifTool(tempfile)
exif.setvalue("EXIF:ImageDescription", "<hello>{world}$bye#foo%bar")
assert not exif.error
exif._read_exif()
assert exif.data["EXIF:ImageDescription"] == "<hello>{world}$bye#foo%bar"
def test_setvalue_warning():
# test setting illegal tag value generates warning
import os.path
@@ -311,6 +349,45 @@ def test_addvalues_2():
assert sorted(exif.data["IPTC:Keywords"]) == sorted(test_multi)
def test_addvalues_non_alphanumeric_multiline():
# test setting a tag value
import os.path
import tempfile
import osxphotos.exiftool
from osxphotos.fileutil import FileUtil
tempdir = tempfile.TemporaryDirectory(prefix="osxphotos_")
tempfile = os.path.join(tempdir.name, os.path.basename(TEST_FILE_ONE_KEYWORD))
FileUtil.copy(TEST_FILE_ONE_KEYWORD, tempfile)
exif = osxphotos.exiftool.ExifTool(tempfile)
exif.addvalues("IPTC:Keywords", "multi\nline", "<Foo>\t{bar}")
assert not exif.error
exif._read_exif()
assert sorted(exif.data["IPTC:Keywords"]) == sorted(
["wedding", "multi\nline", "<Foo>\t{bar}"]
)
def test_addvalues_unicode():
# test setting a tag value with unicode
import os.path
import tempfile
import osxphotos.exiftool
from osxphotos.fileutil import FileUtil
tempdir = tempfile.TemporaryDirectory(prefix="osxphotos_")
tempfile = os.path.join(tempdir.name, os.path.basename(TEST_FILE_ONE_KEYWORD))
FileUtil.copy(TEST_FILE_ONE_KEYWORD, tempfile)
exif = osxphotos.exiftool.ExifTool(tempfile)
exif.setvalue("IPTC:Keywords", None)
exif.addvalues("IPTC:Keywords", "ǂ", "Ƕ")
assert not exif.error
exif._read_exif()
assert sorted(exif.data["IPTC:Keywords"]) == sorted(["ǂ", "Ƕ"])
def test_singleton():
import osxphotos.exiftool
@@ -384,7 +461,7 @@ def test_str():
def test_photoinfo_exiftool():
""" test PhotoInfo.exiftool which returns ExifTool object for photo """
"""test PhotoInfo.exiftool which returns ExifTool object for photo"""
import osxphotos
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
@@ -397,7 +474,7 @@ def test_photoinfo_exiftool():
def test_photoinfo_exiftool_no_groups():
""" test PhotoInfo.exiftool which returns ExifTool object for photo without tag group names"""
"""test PhotoInfo.exiftool which returns ExifTool object for photo without tag group names"""
import osxphotos
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
@@ -420,7 +497,7 @@ def test_photoinfo_exiftool_none():
def test_exiftool_terminate():
""" Test that exiftool process is terminated when exiftool.terminate() is called """
"""Test that exiftool process is terminated when exiftool.terminate() is called"""
import osxphotos.exiftool
import subprocess
@@ -435,7 +512,7 @@ def test_exiftool_terminate():
ps = subprocess.run(["ps"], capture_output=True)
stdout = ps.stdout.decode("utf-8")
assert "exiftool -stay_open" not in stdout
# verify we can create a new instance after termination
exif2 = osxphotos.exiftool.ExifTool(TEST_FILE_ONE_KEYWORD)
assert exif2.asdict()["IPTC:Keywords"] == "wedding"

View File

@@ -1346,12 +1346,12 @@ def test_no_adjustments(photosdb):
def test_exiftool_newlines_in_description(photosdb):
"""Test that exiftool code removes newlines embedded in description, issue #393"""
"""Test that exiftool handles newlines embedded in description, issue #393"""
photo = photosdb.get_photo(UUID_DICT["description_newlines"])
exif = photo._exiftool_dict()
assert photo.description.find("\n") > 0
assert exif["EXIF:ImageDescription"].find("\n") == -1
assert exif["EXIF:ImageDescription"].find("\n") > 0
@pytest.mark.skip(SKIP_TEST, reason="Not yet implemented")