Updated ExifTool to use multiple processes

This commit is contained in:
Rhet Turnbull
2023-04-01 19:00:01 -07:00
parent 93d22c646f
commit d8c7d45056
2 changed files with 73 additions and 72 deletions

View File

@@ -1,11 +1,11 @@
""" Yet another simple exiftool wrapper """ Yet another simple exiftool wrapper
I rolled my own for following reasons: I rolled my own for following reasons:
1. I wanted something under MIT license (best alternative was licensed under GPL/BSD) 1. I wanted something under MIT license (best alternative was licensed under GPL/BSD)
2. I wanted singleton behavior so only a single exiftool process was ever running 2. I wanted exiftool processes to stay resident between calls (improved performance)
3. When used as a context manager, I wanted the operations to batch until exiting the context (improved performance) 3. When used as a context manager, I wanted the operations to batch until exiting the context (improved performance)
If these aren't important to you, I highly recommend you use Sven Marnach's excellent """
pyexiftool: https://github.com/smarnach/pyexiftool which provides more functionality """
from __future__ import annotations
import atexit import atexit
import contextlib import contextlib
@@ -30,6 +30,8 @@ __all__ = [
"unescape_str", "unescape_str",
] ]
logger = logging.getLogger("osxphotos")
# exiftool -stay_open commands outputs this EOF marker after command is run # exiftool -stay_open commands outputs this EOF marker after command is run
EXIFTOOL_STAYOPEN_EOF = "{ready}" EXIFTOOL_STAYOPEN_EOF = "{ready}"
EXIFTOOL_STAYOPEN_EOF_LEN = len(EXIFTOOL_STAYOPEN_EOF) EXIFTOOL_STAYOPEN_EOF_LEN = len(EXIFTOOL_STAYOPEN_EOF)
@@ -42,6 +44,8 @@ EXIFTOOL_FILETYPES_JSON = "exiftool_filetypes.json"
with (pathlib.Path(__file__).parent / EXIFTOOL_FILETYPES_JSON).open("r") as f: with (pathlib.Path(__file__).parent / EXIFTOOL_FILETYPES_JSON).open("r") as f:
EXIFTOOL_SUPPORTED_FILETYPES = json.load(f) EXIFTOOL_SUPPORTED_FILETYPES = json.load(f)
NUM_PROCESSES = os.cpu_count() or 1
def exiftool_can_write(suffix: str) -> bool: def exiftool_can_write(suffix: str) -> bool:
"""Return True if exiftool supports writing to a file with the given suffix, otherwise False""" """Return True if exiftool supports writing to a file with the given suffix, otherwise False"""
@@ -96,8 +100,11 @@ def get_exiftool_path():
class _ExifToolProc: class _ExifToolProc:
"""Runs exiftool in a subprocess via Popen """
Creates a singleton object""" Runs exiftool in a subprocess via Popen
Creates a singleton object that dispatches commands to one or
more exiftool subprocesses.
"""
def __new__(cls, *args, **kwargs): def __new__(cls, *args, **kwargs):
"""create new object or return instance of already created singleton""" """create new object or return instance of already created singleton"""
@@ -106,7 +113,11 @@ class _ExifToolProc:
return cls.instance return cls.instance
def __init__(self, exiftool=None, large_file_support=True): def __init__(
self,
exiftool: str | None = None,
large_file_support: bool = True,
):
"""construct _ExifToolProc singleton object or return instance of already created object """construct _ExifToolProc singleton object or return instance of already created object
Args: Args:
@@ -117,7 +128,7 @@ class _ExifToolProc:
if hasattr(self, "_process_running") and self._process_running: if hasattr(self, "_process_running") and self._process_running:
# already running # already running
if exiftool is not None and exiftool != self._exiftool: if exiftool is not None and exiftool != self._exiftool:
logging.warning( logger.warning(
f"exiftool subprocess already running, " f"exiftool subprocess already running, "
f"ignoring exiftool={exiftool}" f"ignoring exiftool={exiftool}"
) )
@@ -125,6 +136,9 @@ class _ExifToolProc:
self._process_running = False self._process_running = False
self._large_file_support = large_file_support self._large_file_support = large_file_support
self._exiftool = exiftool or get_exiftool_path() self._exiftool = exiftool or get_exiftool_path()
self._num_processes = NUM_PROCESSES
self._process = []
self._process_counter = 0
self._start_proc(large_file_support=large_file_support) self._start_proc(large_file_support=large_file_support)
@property @property
@@ -132,23 +146,20 @@ class _ExifToolProc:
"""return the exiftool subprocess""" """return the exiftool subprocess"""
if not self._process_running: if not self._process_running:
self._start_proc(large_file_support=self._large_file_support) self._start_proc(large_file_support=self._large_file_support)
return self._process process_idx = self._process_counter % self._num_processes
self._process_counter += 1
@property return self._process[process_idx]
def pid(self):
"""return process id (PID) of the exiftool process"""
return self._process.pid
@property @property
def exiftool(self): def exiftool(self):
"""return path to exiftool process""" """return path to exiftool process"""
return self._exiftool return self._exiftool
def _start_proc(self, large_file_support): def _start_proc(self, large_file_support: bool):
"""start exiftool in batch mode""" """start exiftool in batch mode"""
if self._process_running: if self._process_running:
logging.warning("exiftool already running: {self._process}") logger.debug(f"exiftool already running: {self._process}")
return return
# open exiftool process # open exiftool process
@@ -156,25 +167,28 @@ class _ExifToolProc:
env = os.environ.copy() env = os.environ.copy()
env["PATH"] = f'/usr/bin/:{env["PATH"]}' env["PATH"] = f'/usr/bin/:{env["PATH"]}'
large_file_args = ["-api", "largefilesupport=1"] if large_file_support else [] large_file_args = ["-api", "largefilesupport=1"] if large_file_support else []
self._process = subprocess.Popen( for _ in range(self._num_processes):
[ self._process.append(
self._exiftool, subprocess.Popen(
"-stay_open", # keep process open in batch mode [
"True", # -stay_open=True, keep process open in batch mode self._exiftool,
*large_file_args, "-stay_open", # keep process open in batch mode
"-@", # read command-line arguments from file "True", # -stay_open=True, keep process open in batch mode
"-", # read from stdin *large_file_args,
"-common_args", # specifies args common to all commands subsequently run "-@", # read command-line arguments from file
"-n", # no print conversion (e.g. print tag values in machine readable format) "-", # read from stdin
"-P", # Preserve file modification date/time "-common_args", # specifies args common to all commands subsequently run
"-G", # print group name for each tag "-n", # no print conversion (e.g. print tag values in machine readable format)
"-E", # escape tag values for HTML (allows use of HTML 
 for newlines) "-P", # Preserve file modification date/time
], "-G", # print group name for each tag
stdin=subprocess.PIPE, "-E", # escape tag values for HTML (allows use of HTML 
 for newlines)
stdout=subprocess.PIPE, ],
stderr=subprocess.STDOUT, stdin=subprocess.PIPE,
env=env, stdout=subprocess.PIPE,
) stderr=subprocess.STDOUT,
env=env,
)
)
self._process_running = True self._process_running = True
EXIFTOOL_PROCESSES.append(self) EXIFTOOL_PROCESSES.append(self)
@@ -185,17 +199,19 @@ class _ExifToolProc:
if not self._process_running: if not self._process_running:
return return
with contextlib.suppress(Exception): for i in range(self._num_processes):
self._process.stdin.write(b"-stay_open\n") process = self._process[i]
self._process.stdin.write(b"False\n") with contextlib.suppress(Exception):
self._process.stdin.flush() process.stdin.write(b"-stay_open\n")
try: process.stdin.write(b"False\n")
self._process.communicate(timeout=5) process.stdin.flush()
except subprocess.TimeoutExpired: try:
self._process.kill() process.communicate(timeout=5)
self._process.communicate() except subprocess.TimeoutExpired:
process.kill()
process.communicate()
del self._process self._process = []
self._process_running = False self._process_running = False
@@ -361,15 +377,16 @@ class ExifTool:
) )
# send the command # send the command
self._process.stdin.write(command_str) process = self._process
self._process.stdin.flush() process.stdin.write(command_str)
process.stdin.flush()
# read the output # read the output
output = b"" output = b""
warning = b"" warning = b""
error = b"" error = b""
while EXIFTOOL_STAYOPEN_EOF not in str(output): while EXIFTOOL_STAYOPEN_EOF not in str(output):
line = self._process.stdout.readline() line = process.stdout.readline()
if line.startswith(b"Warning"): if line.startswith(b"Warning"):
warning += line.strip() warning += line.strip()
elif line.startswith(b"Error"): elif line.startswith(b"Error"):
@@ -383,11 +400,6 @@ class ExifTool:
return output[:-EXIFTOOL_STAYOPEN_EOF_LEN], warning, error return output[:-EXIFTOOL_STAYOPEN_EOF_LEN], warning, error
@property
def pid(self):
"""return process id (PID) of the exiftool process"""
return self._process.pid
@property @property
def version(self): def version(self):
"""returns exiftool version""" """returns exiftool version"""
@@ -404,7 +416,7 @@ class ExifTool:
""" """
json_str, _, _ = self.run_commands("-json") json_str, _, _ = self.run_commands("-json")
if not json_str: if not json_str:
return dict() return {}
json_str = unescape_str(json_str.decode("utf-8")) json_str = unescape_str(json_str.decode("utf-8"))
try: try:
@@ -412,8 +424,8 @@ class ExifTool:
except Exception as e: except Exception as e:
# will fail with some commands, e.g --ext AVI which produces # will fail with some commands, e.g --ext AVI which produces
# 'No file with specified extension' instead of json # 'No file with specified extension' instead of json
logging.warning(f"error loading json returned by exiftool: {e} {json_str}") logger.warning(f"error loading json returned by exiftool: {e} {json_str}")
return dict() return {}
exifdict = exifdict[0] exifdict = exifdict[0]
if not tag_groups: if not tag_groups:
# strip tag groups # strip tag groups
@@ -482,7 +494,12 @@ class _ExifToolCaching(ExifTool):
""" """
self._json_cache = None self._json_cache = None
self._asdict_cache = {} self._asdict_cache = {}
super().__init__(filepath, exiftool=exiftool, overwrite=False, flags=None) super().__init__(
filepath,
exiftool=exiftool,
overwrite=False,
flags=None,
)
def run_commands(self, *commands, no_file=False): def run_commands(self, *commands, no_file=False):
if commands[0] not in ["-json", "-ver"]: if commands[0] not in ["-json", "-ver"]:

View File

@@ -419,22 +419,6 @@ def test_addvalues_unicode():
assert sorted(exif.data["IPTC:Keywords"]) == sorted(["ǂ", "Ƕ"]) assert sorted(exif.data["IPTC:Keywords"]) == sorted(["ǂ", "Ƕ"])
def test_singleton():
import osxphotos.exiftool
exif1 = osxphotos.exiftool.ExifTool(TEST_FILE_ONE_KEYWORD)
exif2 = osxphotos.exiftool.ExifTool(TEST_FILE_MULTI_KEYWORD)
assert exif1._process.pid == exif2._process.pid
def test_pid():
import osxphotos.exiftool
exif1 = osxphotos.exiftool.ExifTool(TEST_FILE_ONE_KEYWORD)
assert exif1.pid == exif1._process.pid
def test_exiftoolproc_process(): def test_exiftoolproc_process():
import osxphotos.exiftool import osxphotos.exiftool