Fix for newlines in exif tags, #513

This commit is contained in:
Rhet Turnbull
2021-08-29 12:18:20 -07:00
parent 8e2b768236
commit f0d7496bc6
7 changed files with 131 additions and 38 deletions

View File

@@ -1,3 +1,3 @@
""" version info """
__version__ = "0.42.77"
__version__ = "0.42.78"

View File

@@ -7,6 +7,7 @@
pyexiftool: https://github.com/smarnach/pyexiftool which provides more functionality """
import atexit
import html
import json
import logging
import os
@@ -24,16 +25,34 @@ EXIFTOOL_STAYOPEN_EOF_LEN = len(EXIFTOOL_STAYOPEN_EOF)
EXIFTOOL_PROCESSES = []
def escape_str(s):
"""escape string for use with exiftool -E"""
if type(s) != str:
return s
s = html.escape(s)
s = s.replace("\n", "
")
s = s.replace("\t", "	")
s = s.replace("\r", "
")
return s
def unescape_str(s):
"""unescape an HTML string returned by exiftool -E"""
if type(s) != str:
return s
return html.unescape(s)
@atexit.register
def terminate_exiftool():
"""Terminate any running ExifTool subprocesses; call this to cleanup when done using ExifTool """
"""Terminate any running ExifTool subprocesses; call this to cleanup when done using ExifTool"""
for proc in EXIFTOOL_PROCESSES:
proc._stop_proc()
@lru_cache(maxsize=1)
def get_exiftool_path():
""" return path of exiftool, cache result """
"""return path of exiftool, cache result"""
exiftool_path = shutil.which("exiftool")
if exiftool_path:
return exiftool_path.rstrip()
@@ -49,7 +68,7 @@ class _ExifToolProc:
Creates a singleton object"""
def __new__(cls, *args, **kwargs):
""" create new object or return instance of already created singleton """
"""create new object or return instance of already created singleton"""
if not hasattr(cls, "instance") or not cls.instance:
cls.instance = super().__new__(cls)
@@ -74,7 +93,7 @@ class _ExifToolProc:
@property
def process(self):
""" return the exiftool subprocess """
"""return the exiftool subprocess"""
if self._process_running:
return self._process
else:
@@ -83,16 +102,16 @@ class _ExifToolProc:
@property
def pid(self):
""" return process id (PID) of the exiftool process """
"""return process id (PID) of the exiftool process"""
return self._process.pid
@property
def exiftool(self):
""" return path to exiftool process """
"""return path to exiftool process"""
return self._exiftool
def _start_proc(self):
""" start exiftool in batch mode """
"""start exiftool in batch mode"""
if self._process_running:
logging.warning("exiftool already running: {self._process}")
@@ -110,6 +129,7 @@ class _ExifToolProc:
"-n", # no print conversion (e.g. print tag values in machine readable format)
"-P", # Preserve file modification date/time
"-G", # print group name for each tag
"-E", # escape tag values for HTML (allows use of HTML 
 for newlines)
],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
@@ -120,7 +140,7 @@ class _ExifToolProc:
EXIFTOOL_PROCESSES.append(self)
def _stop_proc(self):
""" stop the exiftool process if it's running, otherwise, do nothing """
"""stop the exiftool process if it's running, otherwise, do nothing"""
if not self._process_running:
return
@@ -143,7 +163,7 @@ class _ExifToolProc:
class ExifTool:
""" Basic exiftool interface for reading and writing EXIF tags """
"""Basic exiftool interface for reading and writing EXIF tags"""
def __init__(self, filepath, exiftool=None, overwrite=True, flags=None):
"""Create ExifTool object
@@ -189,6 +209,7 @@ class ExifTool:
if value is None:
value = ""
value = escape_str(value)
command = [f"-{tag}={value}"]
if self.overwrite and not self._context_mgr:
command.append("-overwrite_original")
@@ -233,6 +254,7 @@ class ExifTool:
for value in values:
if value is None:
raise ValueError("Can't add None value to tag")
value = escape_str(value)
command.append(f"-{tag}+={value}")
if self.overwrite and not self._context_mgr:
@@ -315,12 +337,12 @@ class ExifTool:
@property
def pid(self):
""" return process id (PID) of the exiftool process """
"""return process id (PID) of the exiftool process"""
return self._process.pid
@property
def version(self):
""" returns exiftool version """
"""returns exiftool version"""
ver, _, _ = self.run_commands("-ver", no_file=True)
return ver.decode("utf-8")
@@ -335,6 +357,7 @@ class ExifTool:
json_str, _, _ = self.run_commands("-json")
if not json_str:
return dict()
json_str = unescape_str(json_str.decode("utf-8"))
try:
exifdict = json.loads(json_str)
@@ -342,7 +365,6 @@ class ExifTool:
# will fail with some commands, e.g --ext AVI which produces
# 'No file with specified extension' instead of json
return dict()
exifdict = exifdict[0]
if not tag_groups:
# strip tag groups
@@ -358,12 +380,13 @@ class ExifTool:
return exifdict
def json(self):
""" returns JSON string containing all EXIF tags and values from exiftool """
"""returns JSON string containing all EXIF tags and values from exiftool"""
json, _, _ = self.run_commands("-json")
json = unescape_str(json.decode("utf-8"))
return json
def _read_exif(self):
""" read exif data from file """
"""read exif data from file"""
data = self.asdict()
self.data = {k: v for k, v in data.items()}
@@ -384,15 +407,15 @@ class ExifTool:
class ExifToolCaching(ExifTool):
""" Basic exiftool interface for reading and writing EXIF tags, with caching.
Use this only when you know the file's EXIF data will not be changed by any external process.
Creates a singleton cached ExifTool instance """
"""Basic exiftool interface for reading and writing EXIF tags, with caching.
Use this only when you know the file's EXIF data will not be changed by any external process.
Creates a singleton cached ExifTool instance"""
_singletons = {}
def __new__(cls, filepath, exiftool=None):
""" create new object or return instance of already created singleton """
"""create new object or return instance of already created singleton"""
if filepath not in cls._singletons:
cls._singletons[filepath] = _ExifToolCaching(filepath, exiftool=exiftool)
return cls._singletons[filepath]
@@ -448,7 +471,6 @@ class _ExifToolCaching(ExifTool):
return self._asdict_cache[tag_groups][normalized]
def flush_cache(self):
""" Clear cached data so that calls to json or asdict return fresh data """
"""Clear cached data so that calls to json or asdict return fresh data"""
self._json_cache = None
self._asdict_cache = {}

View File

@@ -1871,12 +1871,6 @@ def _exiftool_dict(
self.date_modified
).strftime("%Y:%m:%d %H:%M:%S")
# remove any new lines in any fields
for field, val in exif.items():
if type(val) == str:
exif[field] = val.replace("\n", " ")
elif type(val) == list:
exif[field] = [str(v).replace("\n", " ") for v in val if v is not None]
return exif