Updated ExifTool to use multiple processes
This commit is contained in:
@@ -1,11 +1,11 @@
|
|||||||
""" Yet another simple exiftool wrapper
|
""" Yet another simple exiftool wrapper
|
||||||
I rolled my own for following reasons:
|
I rolled my own for following reasons:
|
||||||
1. I wanted something under MIT license (best alternative was licensed under GPL/BSD)
|
1. I wanted something under MIT license (best alternative was licensed under GPL/BSD)
|
||||||
2. I wanted singleton behavior so only a single exiftool process was ever running
|
2. I wanted exiftool processes to stay resident between calls (improved performance)
|
||||||
3. When used as a context manager, I wanted the operations to batch until exiting the context (improved performance)
|
3. When used as a context manager, I wanted the operations to batch until exiting the context (improved performance)
|
||||||
If these aren't important to you, I highly recommend you use Sven Marnach's excellent
|
"""
|
||||||
pyexiftool: https://github.com/smarnach/pyexiftool which provides more functionality """
|
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import atexit
|
import atexit
|
||||||
import contextlib
|
import contextlib
|
||||||
@@ -30,6 +30,8 @@ __all__ = [
|
|||||||
"unescape_str",
|
"unescape_str",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
logger = logging.getLogger("osxphotos")
|
||||||
|
|
||||||
# exiftool -stay_open commands outputs this EOF marker after command is run
|
# exiftool -stay_open commands outputs this EOF marker after command is run
|
||||||
EXIFTOOL_STAYOPEN_EOF = "{ready}"
|
EXIFTOOL_STAYOPEN_EOF = "{ready}"
|
||||||
EXIFTOOL_STAYOPEN_EOF_LEN = len(EXIFTOOL_STAYOPEN_EOF)
|
EXIFTOOL_STAYOPEN_EOF_LEN = len(EXIFTOOL_STAYOPEN_EOF)
|
||||||
@@ -42,6 +44,8 @@ EXIFTOOL_FILETYPES_JSON = "exiftool_filetypes.json"
|
|||||||
with (pathlib.Path(__file__).parent / EXIFTOOL_FILETYPES_JSON).open("r") as f:
|
with (pathlib.Path(__file__).parent / EXIFTOOL_FILETYPES_JSON).open("r") as f:
|
||||||
EXIFTOOL_SUPPORTED_FILETYPES = json.load(f)
|
EXIFTOOL_SUPPORTED_FILETYPES = json.load(f)
|
||||||
|
|
||||||
|
NUM_PROCESSES = os.cpu_count() or 1
|
||||||
|
|
||||||
|
|
||||||
def exiftool_can_write(suffix: str) -> bool:
|
def exiftool_can_write(suffix: str) -> bool:
|
||||||
"""Return True if exiftool supports writing to a file with the given suffix, otherwise False"""
|
"""Return True if exiftool supports writing to a file with the given suffix, otherwise False"""
|
||||||
@@ -96,8 +100,11 @@ def get_exiftool_path():
|
|||||||
|
|
||||||
|
|
||||||
class _ExifToolProc:
|
class _ExifToolProc:
|
||||||
"""Runs exiftool in a subprocess via Popen
|
"""
|
||||||
Creates a singleton object"""
|
Runs exiftool in a subprocess via Popen
|
||||||
|
Creates a singleton object that dispatches commands to one or
|
||||||
|
more exiftool subprocesses.
|
||||||
|
"""
|
||||||
|
|
||||||
def __new__(cls, *args, **kwargs):
|
def __new__(cls, *args, **kwargs):
|
||||||
"""create new object or return instance of already created singleton"""
|
"""create new object or return instance of already created singleton"""
|
||||||
@@ -106,7 +113,11 @@ class _ExifToolProc:
|
|||||||
|
|
||||||
return cls.instance
|
return cls.instance
|
||||||
|
|
||||||
def __init__(self, exiftool=None, large_file_support=True):
|
def __init__(
|
||||||
|
self,
|
||||||
|
exiftool: str | None = None,
|
||||||
|
large_file_support: bool = True,
|
||||||
|
):
|
||||||
"""construct _ExifToolProc singleton object or return instance of already created object
|
"""construct _ExifToolProc singleton object or return instance of already created object
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@@ -117,7 +128,7 @@ class _ExifToolProc:
|
|||||||
if hasattr(self, "_process_running") and self._process_running:
|
if hasattr(self, "_process_running") and self._process_running:
|
||||||
# already running
|
# already running
|
||||||
if exiftool is not None and exiftool != self._exiftool:
|
if exiftool is not None and exiftool != self._exiftool:
|
||||||
logging.warning(
|
logger.warning(
|
||||||
f"exiftool subprocess already running, "
|
f"exiftool subprocess already running, "
|
||||||
f"ignoring exiftool={exiftool}"
|
f"ignoring exiftool={exiftool}"
|
||||||
)
|
)
|
||||||
@@ -125,6 +136,9 @@ class _ExifToolProc:
|
|||||||
self._process_running = False
|
self._process_running = False
|
||||||
self._large_file_support = large_file_support
|
self._large_file_support = large_file_support
|
||||||
self._exiftool = exiftool or get_exiftool_path()
|
self._exiftool = exiftool or get_exiftool_path()
|
||||||
|
self._num_processes = NUM_PROCESSES
|
||||||
|
self._process = []
|
||||||
|
self._process_counter = 0
|
||||||
self._start_proc(large_file_support=large_file_support)
|
self._start_proc(large_file_support=large_file_support)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@@ -132,23 +146,20 @@ class _ExifToolProc:
|
|||||||
"""return the exiftool subprocess"""
|
"""return the exiftool subprocess"""
|
||||||
if not self._process_running:
|
if not self._process_running:
|
||||||
self._start_proc(large_file_support=self._large_file_support)
|
self._start_proc(large_file_support=self._large_file_support)
|
||||||
return self._process
|
process_idx = self._process_counter % self._num_processes
|
||||||
|
self._process_counter += 1
|
||||||
@property
|
return self._process[process_idx]
|
||||||
def pid(self):
|
|
||||||
"""return process id (PID) of the exiftool process"""
|
|
||||||
return self._process.pid
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def exiftool(self):
|
def exiftool(self):
|
||||||
"""return path to exiftool process"""
|
"""return path to exiftool process"""
|
||||||
return self._exiftool
|
return self._exiftool
|
||||||
|
|
||||||
def _start_proc(self, large_file_support):
|
def _start_proc(self, large_file_support: bool):
|
||||||
"""start exiftool in batch mode"""
|
"""start exiftool in batch mode"""
|
||||||
|
|
||||||
if self._process_running:
|
if self._process_running:
|
||||||
logging.warning("exiftool already running: {self._process}")
|
logger.debug(f"exiftool already running: {self._process}")
|
||||||
return
|
return
|
||||||
|
|
||||||
# open exiftool process
|
# open exiftool process
|
||||||
@@ -156,25 +167,28 @@ class _ExifToolProc:
|
|||||||
env = os.environ.copy()
|
env = os.environ.copy()
|
||||||
env["PATH"] = f'/usr/bin/:{env["PATH"]}'
|
env["PATH"] = f'/usr/bin/:{env["PATH"]}'
|
||||||
large_file_args = ["-api", "largefilesupport=1"] if large_file_support else []
|
large_file_args = ["-api", "largefilesupport=1"] if large_file_support else []
|
||||||
self._process = subprocess.Popen(
|
for _ in range(self._num_processes):
|
||||||
[
|
self._process.append(
|
||||||
self._exiftool,
|
subprocess.Popen(
|
||||||
"-stay_open", # keep process open in batch mode
|
[
|
||||||
"True", # -stay_open=True, keep process open in batch mode
|
self._exiftool,
|
||||||
*large_file_args,
|
"-stay_open", # keep process open in batch mode
|
||||||
"-@", # read command-line arguments from file
|
"True", # -stay_open=True, keep process open in batch mode
|
||||||
"-", # read from stdin
|
*large_file_args,
|
||||||
"-common_args", # specifies args common to all commands subsequently run
|
"-@", # read command-line arguments from file
|
||||||
"-n", # no print conversion (e.g. print tag values in machine readable format)
|
"-", # read from stdin
|
||||||
"-P", # Preserve file modification date/time
|
"-common_args", # specifies args common to all commands subsequently run
|
||||||
"-G", # print group name for each tag
|
"-n", # no print conversion (e.g. print tag values in machine readable format)
|
||||||
"-E", # escape tag values for HTML (allows use of HTML 
 for newlines)
|
"-P", # Preserve file modification date/time
|
||||||
],
|
"-G", # print group name for each tag
|
||||||
stdin=subprocess.PIPE,
|
"-E", # escape tag values for HTML (allows use of HTML 
 for newlines)
|
||||||
stdout=subprocess.PIPE,
|
],
|
||||||
stderr=subprocess.STDOUT,
|
stdin=subprocess.PIPE,
|
||||||
env=env,
|
stdout=subprocess.PIPE,
|
||||||
)
|
stderr=subprocess.STDOUT,
|
||||||
|
env=env,
|
||||||
|
)
|
||||||
|
)
|
||||||
self._process_running = True
|
self._process_running = True
|
||||||
|
|
||||||
EXIFTOOL_PROCESSES.append(self)
|
EXIFTOOL_PROCESSES.append(self)
|
||||||
@@ -185,17 +199,19 @@ class _ExifToolProc:
|
|||||||
if not self._process_running:
|
if not self._process_running:
|
||||||
return
|
return
|
||||||
|
|
||||||
with contextlib.suppress(Exception):
|
for i in range(self._num_processes):
|
||||||
self._process.stdin.write(b"-stay_open\n")
|
process = self._process[i]
|
||||||
self._process.stdin.write(b"False\n")
|
with contextlib.suppress(Exception):
|
||||||
self._process.stdin.flush()
|
process.stdin.write(b"-stay_open\n")
|
||||||
try:
|
process.stdin.write(b"False\n")
|
||||||
self._process.communicate(timeout=5)
|
process.stdin.flush()
|
||||||
except subprocess.TimeoutExpired:
|
try:
|
||||||
self._process.kill()
|
process.communicate(timeout=5)
|
||||||
self._process.communicate()
|
except subprocess.TimeoutExpired:
|
||||||
|
process.kill()
|
||||||
|
process.communicate()
|
||||||
|
|
||||||
del self._process
|
self._process = []
|
||||||
self._process_running = False
|
self._process_running = False
|
||||||
|
|
||||||
|
|
||||||
@@ -361,15 +377,16 @@ class ExifTool:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# send the command
|
# send the command
|
||||||
self._process.stdin.write(command_str)
|
process = self._process
|
||||||
self._process.stdin.flush()
|
process.stdin.write(command_str)
|
||||||
|
process.stdin.flush()
|
||||||
|
|
||||||
# read the output
|
# read the output
|
||||||
output = b""
|
output = b""
|
||||||
warning = b""
|
warning = b""
|
||||||
error = b""
|
error = b""
|
||||||
while EXIFTOOL_STAYOPEN_EOF not in str(output):
|
while EXIFTOOL_STAYOPEN_EOF not in str(output):
|
||||||
line = self._process.stdout.readline()
|
line = process.stdout.readline()
|
||||||
if line.startswith(b"Warning"):
|
if line.startswith(b"Warning"):
|
||||||
warning += line.strip()
|
warning += line.strip()
|
||||||
elif line.startswith(b"Error"):
|
elif line.startswith(b"Error"):
|
||||||
@@ -383,11 +400,6 @@ class ExifTool:
|
|||||||
|
|
||||||
return output[:-EXIFTOOL_STAYOPEN_EOF_LEN], warning, error
|
return output[:-EXIFTOOL_STAYOPEN_EOF_LEN], warning, error
|
||||||
|
|
||||||
@property
|
|
||||||
def pid(self):
|
|
||||||
"""return process id (PID) of the exiftool process"""
|
|
||||||
return self._process.pid
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def version(self):
|
def version(self):
|
||||||
"""returns exiftool version"""
|
"""returns exiftool version"""
|
||||||
@@ -404,7 +416,7 @@ class ExifTool:
|
|||||||
"""
|
"""
|
||||||
json_str, _, _ = self.run_commands("-json")
|
json_str, _, _ = self.run_commands("-json")
|
||||||
if not json_str:
|
if not json_str:
|
||||||
return dict()
|
return {}
|
||||||
json_str = unescape_str(json_str.decode("utf-8"))
|
json_str = unescape_str(json_str.decode("utf-8"))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -412,8 +424,8 @@ class ExifTool:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
# will fail with some commands, e.g --ext AVI which produces
|
# will fail with some commands, e.g --ext AVI which produces
|
||||||
# 'No file with specified extension' instead of json
|
# 'No file with specified extension' instead of json
|
||||||
logging.warning(f"error loading json returned by exiftool: {e} {json_str}")
|
logger.warning(f"error loading json returned by exiftool: {e} {json_str}")
|
||||||
return dict()
|
return {}
|
||||||
exifdict = exifdict[0]
|
exifdict = exifdict[0]
|
||||||
if not tag_groups:
|
if not tag_groups:
|
||||||
# strip tag groups
|
# strip tag groups
|
||||||
@@ -482,7 +494,12 @@ class _ExifToolCaching(ExifTool):
|
|||||||
"""
|
"""
|
||||||
self._json_cache = None
|
self._json_cache = None
|
||||||
self._asdict_cache = {}
|
self._asdict_cache = {}
|
||||||
super().__init__(filepath, exiftool=exiftool, overwrite=False, flags=None)
|
super().__init__(
|
||||||
|
filepath,
|
||||||
|
exiftool=exiftool,
|
||||||
|
overwrite=False,
|
||||||
|
flags=None,
|
||||||
|
)
|
||||||
|
|
||||||
def run_commands(self, *commands, no_file=False):
|
def run_commands(self, *commands, no_file=False):
|
||||||
if commands[0] not in ["-json", "-ver"]:
|
if commands[0] not in ["-json", "-ver"]:
|
||||||
|
|||||||
@@ -419,22 +419,6 @@ def test_addvalues_unicode():
|
|||||||
assert sorted(exif.data["IPTC:Keywords"]) == sorted(["ǂ", "Ƕ"])
|
assert sorted(exif.data["IPTC:Keywords"]) == sorted(["ǂ", "Ƕ"])
|
||||||
|
|
||||||
|
|
||||||
def test_singleton():
|
|
||||||
import osxphotos.exiftool
|
|
||||||
|
|
||||||
exif1 = osxphotos.exiftool.ExifTool(TEST_FILE_ONE_KEYWORD)
|
|
||||||
exif2 = osxphotos.exiftool.ExifTool(TEST_FILE_MULTI_KEYWORD)
|
|
||||||
|
|
||||||
assert exif1._process.pid == exif2._process.pid
|
|
||||||
|
|
||||||
|
|
||||||
def test_pid():
|
|
||||||
import osxphotos.exiftool
|
|
||||||
|
|
||||||
exif1 = osxphotos.exiftool.ExifTool(TEST_FILE_ONE_KEYWORD)
|
|
||||||
assert exif1.pid == exif1._process.pid
|
|
||||||
|
|
||||||
|
|
||||||
def test_exiftoolproc_process():
|
def test_exiftoolproc_process():
|
||||||
import osxphotos.exiftool
|
import osxphotos.exiftool
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user