Refactored photoexporter for performance, #591

This commit is contained in:
Rhet Turnbull 2022-01-28 23:15:02 -08:00
parent 9da747ea9d
commit 6843b8661d
5 changed files with 212 additions and 90 deletions

View File

@ -1,3 +1,3 @@
""" version info """
__version__ = "0.44.13"
__version__ = "0.45.0"

View File

@ -16,7 +16,7 @@ from ._version import __version__
__all__ = ["ExportDB_ABC", "ExportDBNoOp", "ExportDB", "ExportDBInMemory"]
OSXPHOTOS_EXPORTDB_VERSION = "4.0"
OSXPHOTOS_EXPORTDB_VERSION = "4.2"
OSXPHOTOS_ABOUT_STRING = f"Created by osxphotos version {__version__} (https://github.com/RhetTbull/osxphotos) on {datetime.datetime.now()}"
@ -104,12 +104,12 @@ class ExportDB_ABC(ABC):
self,
filename,
uuid,
orig_stat,
exif_stat,
converted_stat,
edited_stat,
info_json,
exif_json,
orig_stat=None,
exif_stat=None,
converted_stat=None,
edited_stat=None,
info_json=None,
exif_json=None,
):
pass
@ -183,12 +183,12 @@ class ExportDBNoOp(ExportDB_ABC):
self,
filename,
uuid,
orig_stat,
exif_stat,
converted_stat,
edited_stat,
info_json,
exif_json,
orig_stat=None,
exif_stat=None,
converted_stat=None,
edited_stat=None,
info_json=None,
exif_json=None,
):
pass
@ -506,52 +506,65 @@ class ExportDB(ExportDB_ABC):
self,
filename,
uuid,
orig_stat,
exif_stat,
converted_stat,
edited_stat,
info_json,
exif_json,
orig_stat=None,
exif_stat=None,
converted_stat=None,
edited_stat=None,
info_json=None,
exif_json=None,
):
"""sets all the data for file and uuid at once"""
"""sets all the data for file and uuid at once; if any value is None, does not set it"""
filename = str(pathlib.Path(filename).relative_to(self._path))
filename_normalized = filename.lower()
conn = self._conn
try:
c = conn.cursor()
# update files table (if needed);
# this statement works around fact that there was no unique constraint on files.filepath_normalized
c.execute(
"INSERT OR REPLACE INTO files(filepath, filepath_normalized, uuid) VALUES (?, ?, ?);",
"""INSERT OR IGNORE INTO files(filepath, filepath_normalized, uuid) VALUES (?, ?, ?);""",
(filename, filename_normalized, uuid),
)
c.execute(
"UPDATE files "
+ "SET orig_mode = ?, orig_size = ?, orig_mtime = ? "
+ "WHERE filepath_normalized = ?;",
(*orig_stat, filename_normalized),
)
c.execute(
"UPDATE files "
+ "SET exif_mode = ?, exif_size = ?, exif_mtime = ? "
+ "WHERE filepath_normalized = ?;",
(*exif_stat, filename_normalized),
)
c.execute(
"INSERT OR REPLACE INTO converted(filepath_normalized, mode, size, mtime) VALUES (?, ?, ?, ?);",
(filename_normalized, *converted_stat),
)
c.execute(
"INSERT OR REPLACE INTO edited(filepath_normalized, mode, size, mtime) VALUES (?, ?, ?, ?);",
(filename_normalized, *edited_stat),
)
c.execute(
"INSERT OR REPLACE INTO info(uuid, json_info) VALUES (?, ?);",
(uuid, info_json),
)
c.execute(
"INSERT OR REPLACE INTO exifdata(filepath_normalized, json_exifdata) VALUES (?, ?);",
(filename_normalized, exif_json),
)
if orig_stat is not None:
c.execute(
"UPDATE files "
+ "SET orig_mode = ?, orig_size = ?, orig_mtime = ? "
+ "WHERE filepath_normalized = ?;",
(*orig_stat, filename_normalized),
)
if exif_stat is not None:
c.execute(
"UPDATE files "
+ "SET exif_mode = ?, exif_size = ?, exif_mtime = ? "
+ "WHERE filepath_normalized = ?;",
(*exif_stat, filename_normalized),
)
if converted_stat is not None:
c.execute(
"INSERT OR REPLACE INTO converted(filepath_normalized, mode, size, mtime) VALUES (?, ?, ?, ?);",
(filename_normalized, *converted_stat),
)
if edited_stat is not None:
c.execute(
"INSERT OR REPLACE INTO edited(filepath_normalized, mode, size, mtime) VALUES (?, ?, ?, ?);",
(filename_normalized, *edited_stat),
)
if info_json is not None:
c.execute(
"INSERT OR REPLACE INTO info(uuid, json_info) VALUES (?, ?);",
(uuid, info_json),
)
if exif_json is not None:
c.execute(
"INSERT OR REPLACE INTO exifdata(filepath_normalized, json_exifdata) VALUES (?, ?);",
(filename_normalized, exif_json),
)
conn.commit()
except Error as e:
logging.warning(e)
@ -662,6 +675,22 @@ class ExportDB(ExportDB_ABC):
exif_size INTEGER,
exif_mtime REAL
); """,
"sql_files_table_migrate": """ CREATE TABLE IF NOT EXISTS files_migrate (
id INTEGER PRIMARY KEY,
filepath TEXT NOT NULL,
filepath_normalized TEXT NOT NULL,
uuid TEXT,
orig_mode INTEGER,
orig_size INTEGER,
orig_mtime REAL,
exif_mode INTEGER,
exif_size INTEGER,
exif_mtime REAL,
UNIQUE(filepath_normalized)
); """,
"sql_files_migrate": """ INSERT INTO files_migrate SELECT * FROM files;""",
"sql_files_drop_tables": """ DROP TABLE files;""",
"sql_files_alter": """ ALTER TABLE files_migrate RENAME TO files;""",
"sql_runs_table": """ CREATE TABLE IF NOT EXISTS runs (
id INTEGER PRIMARY KEY,
datetime TEXT,

View File

@ -181,7 +181,6 @@ class FileUtilMacOS(FileUtilABC):
return False
s1 = cls._sig(os.stat(f1))
if s1[0] != stat.S_IFREG or s2[0] != stat.S_IFREG:
return False
return s1 == s2

View File

@ -640,12 +640,6 @@ class PhotoExporter:
all_results += self._write_sidecar_files(dest=dest, options=options)
# if exiftool, write the metadata
if options.exiftool:
all_results += self._write_exif_metadata_to_files(
all_results, options=options
)
if options.touch_file:
all_results += self._touch_files(all_results, options)
@ -722,11 +716,7 @@ class PhotoExporter:
filename=dest,
uuid=self.photo.uuid,
orig_stat=fileutil.file_sig(dest),
exif_stat=(None, None, None),
converted_stat=(None, None, None),
edited_stat=(None, None, None),
info_json=self.photo.json(),
exif_json=None,
)
if dest_uuid != self.photo.uuid:
# not the right file, find the right one
@ -745,11 +735,7 @@ class PhotoExporter:
filename=dest,
uuid=self.photo.uuid,
orig_stat=fileutil.file_sig(dest),
exif_stat=(None, None, None),
converted_stat=(None, None, None),
edited_stat=(None, None, None),
info_json=self.photo.json(),
exif_json=None,
)
break
else:
@ -795,7 +781,7 @@ class PhotoExporter:
if options.live_photo and self.photo.live_photo:
staged.edited_live = self.photo.path_edited_live_photo
if options.exiftool and not options.dry_run:
if options.exiftool and not options.dry_run and not options.export_as_hardlink:
# copy files to temp dir for exiftool to process before export
# not needed for download_missing or use_photokit as those files already staged to temp dir
for file_type in [
@ -1040,11 +1026,13 @@ class PhotoExporter:
return filepath.parent == self._temp_dir_path
def _copy_to_temp_file(self, filepath: str) -> str:
"""Copies filepath to a temp file"""
"""Copies filepath to a temp file preserving access and modification times"""
filepath = pathlib.Path(filepath)
dest = self._temp_dir_path / filepath.name
dest = increment_filename(dest)
self.fileutil.copy(filepath, dest)
stat = os.stat(filepath)
self.fileutil.utime(dest, (stat.st_atime, stat.st_mtime))
return str(dest)
def _export_photo(
@ -1057,7 +1045,9 @@ class PhotoExporter:
Does the actual copy or hardlink taking the appropriate
action depending on update, overwrite, export_as_hardlink
Assumes destination is the right destination (e.g. UUID matches)
sets UUID and JSON info for exported file using set_uuid_for_file, set_info_for_uuid
Sets UUID and JSON info for exported file using set_uuid_for_file, set_info_for_uuid
Expects that src is a temporary file (as set by _stage_photos_for_export) and
may modify the src (e.g. for convert_to_jpeg or exiftool)
Args:
src (str): src path
@ -1082,9 +1072,12 @@ class PhotoExporter:
exported_files = []
update_updated_files = []
update_new_files = []
update_skipped_files = []
update_skipped_files = [] # skip files that are already up to date
touched_files = []
converted_to_jpeg_files = []
exif_results = ExportResults()
converted_stat = None
edited_stat = None
dest_str = str(dest)
dest_exists = dest.exists()
@ -1174,8 +1167,9 @@ class PhotoExporter:
sig = (sig[0], sig[1], int(self.photo.date.timestamp()))
if not fileutil.cmp_file_sig(src, sig):
touched_files.append(dest_str)
if not update_skipped_files:
converted_stat = (None, None, None)
# have file to export
edited_stat = (
fileutil.file_sig(src) if options.edited else (None, None, None)
)
@ -1194,14 +1188,27 @@ class PhotoExporter:
raise ExportError(
f"Error hardlinking {src} to {dest}: {e} ({lineno(__file__)})"
) from e
elif options.convert_to_jpeg:
# use convert_to_jpeg to export the file
fileutil.convert_to_jpeg(
src, dest_str, compression_quality=options.jpeg_quality
)
converted_stat = fileutil.file_sig(dest_str)
converted_to_jpeg_files.append(dest_str)
else:
if options.convert_to_jpeg:
# use convert_to_jpeg to export the file
# convert to a temp file before copying
tmp_file = increment_filename(
self._temp_dir_path
/ f"{pathlib.Path(src).stem}_converted_to_jpeg.jpeg"
)
fileutil.convert_to_jpeg(
src, tmp_file, compression_quality=options.jpeg_quality
)
src = tmp_file
converted_stat = fileutil.file_sig(tmp_file)
converted_to_jpeg_files.append(dest_str)
if options.exiftool:
# if exiftool, write the metadata
exif_results = self._write_exif_metadata_to_file(
src, dest, options=options
)
try:
fileutil.copy(src, dest_str)
except Exception as e:
@ -1209,24 +1216,26 @@ class PhotoExporter:
f"Error copying file {src} to {dest_str}: {e} ({lineno(__file__)})"
) from e
export_db.set_data(
filename=dest_str,
uuid=self.photo.uuid,
orig_stat=fileutil.file_sig(dest_str),
exif_stat=(None, None, None),
converted_stat=converted_stat,
edited_stat=edited_stat,
info_json=self.photo.json(),
exif_json=None,
)
export_db.set_data(
filename=dest_str,
uuid=self.photo.uuid,
orig_stat=fileutil.file_sig(dest_str),
converted_stat=converted_stat,
edited_stat=edited_stat,
info_json=self.photo.json(),
)
return ExportResults(
converted_to_jpeg=converted_to_jpeg_files,
error=exif_results.error,
exif_updated=exif_results.exif_updated,
exiftool_error=exif_results.exiftool_error,
exiftool_warning=exif_results.exiftool_warning,
exported=exported_files + update_new_files + update_updated_files,
new=update_new_files,
updated=update_updated_files,
skipped=update_skipped_files,
to_touch=touched_files,
converted_to_jpeg=converted_to_jpeg_files,
updated=update_updated_files,
)
def _write_sidecar_files(
@ -1344,7 +1353,72 @@ class PhotoExporter:
sidecar_xmp_skipped=sidecar_xmp_files_skipped,
)
def _write_exif_metadata_to_files(
def _write_exif_metadata_to_file(
self,
src,
dest,
options: ExportOptions,
) -> ExportResults:
"""Write exif metadata to file using exiftool
Note: this method modifies src so src must be a copy of the original file;
it also does not write to dest (dest is the intended destination for purposes of
referencing the export database. This allows the exiftool update to be done on the
local machine prior to being copied to the export destination which may be on a
network drive or other slower external storage."""
export_db = options.export_db
fileutil = options.fileutil
verbose = options.verbose or self._verbose
exiftool_results = ExportResults()
# determine if we need to write the exif metadata
# if we are not updating, we always write
# else, need to check the database to determine if we need to write
run_exiftool = not options.update
current_data = "foo"
if options.update:
files_are_different = False
old_data = export_db.get_exifdata_for_file(dest)
if old_data is not None:
old_data = json.loads(old_data)[0]
current_data = json.loads(self._exiftool_json_sidecar(options=options))[
0
]
if old_data != current_data:
files_are_different = True
if old_data is None or files_are_different:
# didn't have old data, assume we need to write it
# or files were different
run_exiftool = True
else:
verbose(
f"Skipped up to date exiftool metadata for {pathlib.Path(dest).name}"
)
if run_exiftool:
verbose(f"Writing metadata with exiftool for {pathlib.Path(dest).name}")
if not options.dry_run:
warning_, error_ = self._write_exif_data(src, options=options)
if warning_:
exiftool_results.exiftool_warning.append((dest, warning_))
if error_:
exiftool_results.exiftool_error.append((dest, error_))
exiftool_results.error.append((dest, error_))
export_db.set_data(
dest,
uuid=self.photo.uuid,
exif_stat=fileutil.file_sig(src),
exif_json=self._exiftool_json_sidecar(options=options),
)
exiftool_results.exif_updated.append(dest)
exiftool_results.to_touch.append(dest)
return exiftool_results
def _write_exif_metadata_to_files_zzz(
self,
results: ExportResults,
options: ExportOptions,

View File

@ -74,6 +74,26 @@ def test_export_db():
assert db.get_stat_edited_for_file(filepath2) == (10, 11, 12)
assert sorted(db.get_previous_uuids()) == (["BAR-FOO", "FOO-BAR"])
# test set_data value=None doesn't overwrite existing data
db.set_data(
filepath2,
"BAR-FOO",
None,
None,
None,
None,
None,
None,
)
assert db.get_uuid_for_file(filepath2) == "BAR-FOO"
assert db.get_info_for_uuid("BAR-FOO") == INFO_DATA
assert db.get_exifdata_for_file(filepath2) == EXIF_DATA
assert db.get_stat_orig_for_file(filepath2) == (1, 2, 3)
assert db.get_stat_exif_for_file(filepath2) == (4, 5, 6)
assert db.get_stat_converted_for_file(filepath2) == (7, 8, 9)
assert db.get_stat_edited_for_file(filepath2) == (10, 11, 12)
assert sorted(db.get_previous_uuids()) == (["BAR-FOO", "FOO-BAR"])
# close and re-open
db.close()
db = ExportDB(dbname, tempdir.name)