performance improvements for update and export_db

This commit is contained in:
Rhet Turnbull
2020-05-27 20:52:05 -07:00
parent 9b11cbf32b
commit 42b89d34f3
4 changed files with 117 additions and 70 deletions

View File

@@ -19,6 +19,7 @@ OSXPHOTOS_EXPORTDB_VERSION = "1.0"
class ExportDB_ABC(ABC):
""" abstract base class for ExportDB """
@abstractmethod
def get_uuid_for_file(self, filename):
pass
@@ -306,17 +307,39 @@ class ExportDB(ExportDB_ABC):
def set_data(self, filename, uuid, orig_stat, exif_stat, info_json, exif_json):
""" sets all the data for file and uuid at once
calls set_uuid_for_file
set_info_for_uuid
set_stat_orig_for_file
set_stat_exif_for_file
set_exifdata_for_file
"""
self.set_uuid_for_file(filename, uuid)
self.set_info_for_uuid(uuid, info_json)
self.set_stat_orig_for_file(filename, orig_stat)
self.set_stat_exif_for_file(filename, exif_stat)
self.set_exifdata_for_file(filename, exif_json)
filename = str(pathlib.Path(filename).relative_to(self._path))
filename_normalized = filename.lower()
conn = self._conn
try:
c = conn.cursor()
c.execute(
f"INSERT OR REPLACE INTO files(filepath, filepath_normalized, uuid) VALUES (?, ?, ?);",
(filename, filename_normalized, uuid),
)
c.execute(
"UPDATE files "
+ "SET orig_mode = ?, orig_size = ?, orig_mtime = ? "
+ "WHERE filepath_normalized = ?;",
(*orig_stat, filename_normalized),
)
c.execute(
"UPDATE files "
+ "SET exif_mode = ?, exif_size = ?, exif_mtime = ? "
+ "WHERE filepath_normalized = ?;",
(*exif_stat, filename_normalized),
)
c.execute(
"INSERT OR REPLACE INTO info(uuid, json_info) VALUES (?, ?);",
(uuid, info_json),
)
c.execute(
"INSERT OR REPLACE INTO exifdata(filepath_normalized, json_exifdata) VALUES (?, ?);",
(filename_normalized, exif_json),
)
conn.commit()
except Error as e:
logging.warning(e)
def close(self):
""" close the database connection """
@@ -475,7 +498,7 @@ class ExportDBInMemory(ExportDB):
except Error as e:
logging.warning(e)
raise e
tempfile = StringIO()
for line in conn.iterdump():
tempfile.write("%s\n" % line)

View File

@@ -452,11 +452,14 @@ def export2(
f"Found matching file with blank uuid: {self.uuid}, {dest}"
)
dest_uuid = self.uuid
export_db.set_uuid_for_file(dest, self.uuid)
export_db.set_info_for_uuid(self.uuid, self.json())
export_db.set_stat_orig_for_file(dest, fileutil.file_sig(dest))
export_db.set_stat_exif_for_file(dest, (None, None, None))
export_db.set_exifdata_for_file(dest, None)
export_db.set_data(
dest,
self.uuid,
fileutil.file_sig(dest),
(None, None, None),
self.json(),
None,
)
if dest_uuid != self.uuid:
# not the right file, find the right one
logging.debug(
@@ -482,11 +485,14 @@ def export2(
)
dest = pathlib.Path(file_)
found_match = True
export_db.set_uuid_for_file(file_, self.uuid)
export_db.set_info_for_uuid(self.uuid, self.json())
export_db.set_stat_orig_for_file(dest, fileutil.file_sig(dest))
export_db.set_stat_exif_for_file(dest, (None, None, None))
export_db.set_exifdata_for_file(dest, None)
export_db.set_data(
dest,
self.uuid,
fileutil.file_sig(dest),
(None, None, None),
self.json(),
None,
)
break
if not found_match:
@@ -780,11 +786,14 @@ def _export_photo(
fileutil.unlink(dest)
logging.debug(f"Not update: export_as_hardlink linking file {src} {dest}")
fileutil.hardlink(src, dest)
export_db.set_uuid_for_file(dest_str, self.uuid)
export_db.set_info_for_uuid(self.uuid, self.json())
export_db.set_stat_orig_for_file(dest_str, fileutil.file_sig(dest_str))
export_db.set_stat_exif_for_file(dest_str, (None, None, None))
export_db.set_exifdata_for_file(dest_str, None)
export_db.set_data(
dest_str,
self.uuid,
fileutil.file_sig(dest_str),
(None, None, None),
self.json(),
None,
)
exported_files.append(dest_str)
elif dest_exists and dest.samefile(src):
# update, hardlink and it already points to the right file, do nothing
@@ -800,11 +809,14 @@ def _export_photo(
# dest.unlink()
fileutil.unlink(dest)
fileutil.hardlink(src, dest)
export_db.set_uuid_for_file(dest_str, self.uuid)
export_db.set_info_for_uuid(self.uuid, self.json())
export_db.set_stat_orig_for_file(dest_str, fileutil.file_sig(dest_str))
export_db.set_stat_exif_for_file(dest_str, (None, None, None))
export_db.set_exifdata_for_file(dest_str, None)
export_db.set_data(
dest_str,
self.uuid,
fileutil.file_sig(dest_str),
(None, None, None),
self.json(),
None,
)
update_updated_files.append(dest_str)
exported_files.append(dest_str)
else:
@@ -813,11 +825,14 @@ def _export_photo(
f"Update: exporting new file with export_as_hardlink {src} {dest}"
)
fileutil.hardlink(src, dest)
export_db.set_uuid_for_file(dest_str, self.uuid)
export_db.set_info_for_uuid(self.uuid, self.json())
export_db.set_stat_orig_for_file(dest_str, fileutil.file_sig(dest_str))
export_db.set_stat_exif_for_file(dest_str, (None, None, None))
export_db.set_exifdata_for_file(dest_str, None)
export_db.set_data(
dest_str,
self.uuid,
fileutil.file_sig(dest_str),
(None, None, None),
self.json(),
None,
)
exported_files.append(dest_str)
update_new_files.append(dest_str)
else:
@@ -829,12 +844,15 @@ def _export_photo(
fileutil.unlink(dest)
logging.debug(f"Not update: copying file {src} {dest}")
fileutil.copy(src, dest_str, norsrc=no_xattr)
export_db.set_uuid_for_file(dest_str, self.uuid)
export_db.set_info_for_uuid(self.uuid, self.json())
export_db.set_stat_orig_for_file(dest_str, fileutil.file_sig(dest_str))
export_db.set_stat_exif_for_file(dest_str, (None, None, None))
export_db.set_exifdata_for_file(dest_str, None)
exported_files.append(dest_str)
export_db.set_data(
dest_str,
self.uuid,
fileutil.file_sig(dest_str),
(None, None, None),
self.json(),
None,
)
# elif dest_exists and not exiftool and cmp_file(dest_str, export_db.get_stat_orig_for_file(dest_str)):
elif (
dest_exists
@@ -865,22 +883,28 @@ def _export_photo(
# dest.unlink()
fileutil.unlink(dest)
fileutil.copy(src, dest_str, norsrc=no_xattr)
export_db.set_uuid_for_file(dest_str, self.uuid)
export_db.set_info_for_uuid(self.uuid, self.json())
export_db.set_stat_orig_for_file(dest_str, fileutil.file_sig(dest_str))
export_db.set_stat_exif_for_file(dest_str, (None, None, None))
export_db.set_exifdata_for_file(dest_str, None)
export_db.set_data(
dest_str,
self.uuid,
fileutil.file_sig(dest_str),
(None, None, None),
self.json(),
None,
)
exported_files.append(dest_str)
update_updated_files.append(dest_str)
else:
# destination doesn't exist, copy the file
logging.debug(f"Update: copying new file {src} {dest}")
fileutil.copy(src, dest_str, norsrc=no_xattr)
export_db.set_uuid_for_file(dest_str, self.uuid)
export_db.set_info_for_uuid(self.uuid, self.json())
export_db.set_stat_orig_for_file(dest_str, fileutil.file_sig(dest_str))
export_db.set_stat_exif_for_file(dest_str, (None, None, None))
export_db.set_exifdata_for_file(dest_str, None)
export_db.set_data(
dest_str,
self.uuid,
fileutil.file_sig(dest_str),
(None, None, None),
self.json(),
None,
)
exported_files.append(dest_str)
update_new_files.append(dest_str)

View File

@@ -122,10 +122,7 @@ CLI_EXPORT_UUID = "D79B8D77-BFFC-460B-9312-034F2877D35B"
CLI_EXPORT_UUID_FILENAME = "Pumkins2.jpg"
CLI_EXPORT_BY_DATE = [
"2018/09/28/Pumpkins3.jpg",
"2018/09/28/Pumkins1.jpg",
]
CLI_EXPORT_BY_DATE = ["2018/09/28/Pumpkins3.jpg", "2018/09/28/Pumkins1.jpg"]
CLI_EXPORT_SIDECAR_FILENAMES = ["Pumkins2.jpg", "Pumkins2.json", "Pumkins2.xmp"]
@@ -1292,7 +1289,7 @@ def test_export_then_hardlink():
# pylint: disable=not-context-manager
with runner.isolated_filesystem():
# basic export
result = runner.invoke(export, [os.path.join(cwd, CLI_PHOTOS_DB), ".", "-V",],)
result = runner.invoke(export, [os.path.join(cwd, CLI_PHOTOS_DB), ".", "-V"])
assert result.exit_code == 0
files = glob.glob("*")
assert sorted(files) == sorted(CLI_EXPORT_FILENAMES)

View File

@@ -7,6 +7,7 @@ INFO_DATA = """{"uuid": "3DD2C897-F19E-4CA6-8C22-B027D5A71907", "filename": "3DD
EXIF_DATA2 = """[{"_CreatedBy": "osxphotos, https://github.com/RhetTbull/osxphotos", "XMP:Title": "St. James's Park", "XMP:TagsList": ["London 2018", "St. James's Park", "England", "United Kingdom", "UK", "London"], "IPTC:Keywords": ["London 2018", "St. James's Park", "England", "United Kingdom", "UK", "London"], "XMP:Subject": ["London 2018", "St. James's Park", "England", "United Kingdom", "UK", "London"], "EXIF:GPSLatitude": "51 deg 30' 12.86\" N", "EXIF:GPSLongitude": "0 deg 7' 54.50\" W", "Composite:GPSPosition": "51 deg 30' 12.86\" N, 0 deg 7' 54.50\" W", "EXIF:GPSLatitudeRef": "North", "EXIF:GPSLongitudeRef": "West", "EXIF:DateTimeOriginal": "2018:10:13 09:18:12", "EXIF:OffsetTimeOriginal": "-04:00", "EXIF:ModifyDate": "2019:12:08 14:06:44"}]"""
INFO_DATA2 = """{"uuid": "F2BB3F98-90F0-4E4C-A09B-25C6822A4529", "filename": "F2BB3F98-90F0-4E4C-A09B-25C6822A4529.jpeg", "original_filename": "IMG_8440.JPG", "date": "2019-06-11T11:42:06.711805-07:00", "description": null, "title": null, "keywords": [], "labels": ["Sky", "Cloudy", "Fence", "Land", "Outdoor", "Park", "Amusement Park", "Roller Coaster"], "albums": [], "folders": {}, "persons": [], "path": "/Volumes/MacBook Catalina - Data/Users/rhet/Pictures/Photos Library.photoslibrary/originals/F/F2BB3F98-90F0-4E4C-A09B-25C6822A4529.jpeg", "ismissing": false, "hasadjustments": false, "external_edit": false, "favorite": false, "hidden": false, "latitude": 33.81558666666667, "longitude": -117.99298, "path_edited": null, "shared": false, "isphoto": true, "ismovie": false, "uti": "public.jpeg", "burst": false, "live_photo": false, "path_live_photo": null, "iscloudasset": true, "incloud": true, "date_modified": "2019-10-14T00:51:47.141950-07:00", "portrait": false, "screenshot": false, "slow_mo": false, "time_lapse": false, "hdr": false, "selfie": false, "panorama": false, "has_raw": false, "uti_raw": null, "path_raw": null, "place": {"name": "Adventure City, Stanton, California, United States", "names": {"field0": [], "country": ["United States"], "state_province": ["California"], "sub_administrative_area": ["Orange"], "city": ["Stanton", "Anaheim", "Anaheim"], "field5": [], "additional_city_info": ["West Anaheim"], "ocean": [], "area_of_interest": ["Adventure City", "Adventure City"], "inland_water": [], "field10": [], "region": [], "sub_throughfare": [], "field13": [], "postal_code": [], "field15": [], "field16": [], "street_address": [], "body_of_water": []}, "country_code": "US", "ishome": false, "address_str": "Adventure City, 1240 S Beach Blvd, Anaheim, CA 92804, United States", "address": {"street": "1240 S Beach Blvd", "sub_locality": "West Anaheim", "city": "Stanton", "sub_administrative_area": "Orange", "state_province": "CA", "postal_code": "92804", "country": "United States", "iso_country_code": "US"}}, "exif": {"flash_fired": false, "iso": 25, "metering_mode": 5, "sample_rate": null, "track_format": null, "white_balance": 0, "aperture": 2.2, "bit_rate": null, "duration": null, "exposure_bias": 0.0, "focal_length": 4.15, "fps": null, "latitude": null, "longitude": null, "shutter_speed": 0.0004940711462450593, "camera_make": "Apple", "camera_model": "iPhone 6s", "codec": null, "lens_model": "iPhone 6s back camera 4.15mm f/2.2"}}"""
def test_export_db():
""" test ExportDB """
import os
@@ -18,8 +19,8 @@ def test_export_db():
db = ExportDB(dbname)
assert os.path.isfile(dbname)
filepath = os.path.join(tempdir.name,"test.JPG")
filepath_lower = os.path.join(tempdir.name,"test.jpg")
filepath = os.path.join(tempdir.name, "test.JPG")
filepath_lower = os.path.join(tempdir.name, "test.jpg")
db.set_uuid_for_file(filepath, "FOO-BAR")
# filename should be case-insensitive
@@ -34,7 +35,7 @@ def test_export_db():
assert db.get_stat_exif_for_file(filepath) == (4, 5, 6)
# test set_data which sets all at the same time
filepath2 = os.path.join(tempdir.name,"test2.jpg")
filepath2 = os.path.join(tempdir.name, "test2.jpg")
db.set_data(filepath2, "BAR-FOO", (1, 2, 3), (4, 5, 6), INFO_DATA, EXIF_DATA)
assert db.get_uuid_for_file(filepath2) == "BAR-FOO"
assert db.get_info_for_uuid("BAR-FOO") == INFO_DATA
@@ -49,12 +50,13 @@ def test_export_db():
assert db.get_info_for_uuid("BAR-FOO") == INFO_DATA
assert db.get_exifdata_for_file(filepath2) == EXIF_DATA
assert db.get_stat_orig_for_file(filepath2) == (1, 2, 3)
assert db.get_stat_exif_for_file(filepath2) == (4, 5, 6)
assert db.get_stat_exif_for_file(filepath2) == (4, 5, 6)
# update data
db.set_uuid_for_file(filepath, "FUBAR")
assert db.get_uuid_for_file(filepath) == "FUBAR"
def test_export_db_no_op():
""" test ExportDBNoOp """
import os
@@ -64,8 +66,8 @@ def test_export_db_no_op():
tempdir = tempfile.TemporaryDirectory(prefix="osxphotos_")
db = ExportDBNoOp()
filepath = os.path.join(tempdir.name,"test.JPG")
filepath_lower = os.path.join(tempdir.name,"test.jpg")
filepath = os.path.join(tempdir.name, "test.JPG")
filepath_lower = os.path.join(tempdir.name, "test.jpg")
db.set_uuid_for_file(filepath, "FOO-BAR")
# filename should be case-insensitive
@@ -80,7 +82,7 @@ def test_export_db_no_op():
assert db.get_stat_exif_for_file(filepath) is None
# test set_data which sets all at the same time
filepath2 = os.path.join(tempdir.name,"test2.jpg")
filepath2 = os.path.join(tempdir.name, "test2.jpg")
db.set_data(filepath2, "BAR-FOO", (1, 2, 3), (4, 5, 6), INFO_DATA, EXIF_DATA)
assert db.get_uuid_for_file(filepath2) is None
assert db.get_info_for_uuid("BAR-FOO") is None
@@ -92,6 +94,7 @@ def test_export_db_no_op():
db.set_uuid_for_file(filepath, "FUBAR")
assert db.get_uuid_for_file(filepath) is None
def test_export_db_in_memory():
""" test ExportDBInMemory """
import os
@@ -103,8 +106,8 @@ def test_export_db_in_memory():
db = ExportDB(dbname)
assert os.path.isfile(dbname)
filepath = os.path.join(tempdir.name,"test.JPG")
filepath_lower = os.path.join(tempdir.name,"test.jpg")
filepath = os.path.join(tempdir.name, "test.JPG")
filepath_lower = os.path.join(tempdir.name, "test.jpg")
db.set_uuid_for_file(filepath, "FOO-BAR")
db.set_info_for_uuid("FOO-BAR", INFO_DATA)
@@ -127,14 +130,14 @@ def test_export_db_in_memory():
dbram.set_uuid_for_file(filepath, "FUBAR")
dbram.set_info_for_uuid("FUBAR", INFO_DATA2)
dbram.set_exifdata_for_file(filepath, EXIF_DATA2)
dbram.set_stat_orig_for_file(filepath, (7,8,9))
dbram.set_stat_exif_for_file(filepath, (10,11,12))
dbram.set_stat_orig_for_file(filepath, (7, 8, 9))
dbram.set_stat_exif_for_file(filepath, (10, 11, 12))
assert dbram.get_uuid_for_file(filepath_lower) == "FUBAR"
assert dbram.get_info_for_uuid("FUBAR") == INFO_DATA2
assert dbram.get_exifdata_for_file(filepath) == EXIF_DATA2
assert dbram.get_stat_orig_for_file(filepath) == (7,8,9)
assert dbram.get_stat_exif_for_file(filepath) == (10,11,12)
assert dbram.get_stat_orig_for_file(filepath) == (7, 8, 9)
assert dbram.get_stat_exif_for_file(filepath) == (10, 11, 12)
dbram.close()
@@ -146,4 +149,4 @@ def test_export_db_in_memory():
assert db.get_stat_orig_for_file(filepath) == (1, 2, 3)
assert db.get_stat_exif_for_file(filepath) == (4, 5, 6)
assert db.get_info_for_uuid("FUBAR") is None
assert db.get_info_for_uuid("FUBAR") is None