Avoid copying db files if not necessary

This commit is contained in:
Rhet Turnbull 2020-11-11 07:03:57 -08:00
parent 38397b507b
commit ea9b41bae4
14 changed files with 183 additions and 12 deletions

View File

@ -1,4 +1,4 @@
""" version info """ """ version info """
__version__ = "0.36.14" __version__ = "0.36.15"

View File

@ -1,6 +1,5 @@
""" FileUtil class with methods for copy, hardlink, unlink, etc. """ """ FileUtil class with methods for copy, hardlink, unlink, etc. """
import logging
import os import os
import pathlib import pathlib
import stat import stat
@ -74,7 +73,6 @@ class FileUtilMacOS(FileUtilABC):
try: try:
os.link(src, dest) os.link(src, dest)
except Exception as e: except Exception as e:
logging.critical(f"os.link returned error: {e}")
raise e raise e
@classmethod @classmethod
@ -92,7 +90,7 @@ class FileUtilMacOS(FileUtilABC):
if src is None or dest is None: if src is None or dest is None:
raise ValueError("src and dest must not be None", src, dest) raise ValueError("src and dest must not be None", src, dest)
if not os.path.isfile(src): if not os.path.exists(src):
raise FileNotFoundError("src file does not appear to exist", src) raise FileNotFoundError("src file does not appear to exist", src)
if norsrc: if norsrc:
@ -104,9 +102,6 @@ class FileUtilMacOS(FileUtilABC):
try: try:
result = subprocess.run(command, check=True, stderr=subprocess.PIPE) result = subprocess.run(command, check=True, stderr=subprocess.PIPE)
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
logging.critical(
f"ditto returned error: {e.returncode} {e.stderr.decode(sys.getfilesystemencoding()).rstrip()}"
)
raise e raise e
return result.returncode return result.returncode

View File

@ -35,6 +35,7 @@ from .._constants import (
from .._version import __version__ from .._version import __version__
from ..albuminfo import AlbumInfo, FolderInfo, ImportInfo from ..albuminfo import AlbumInfo, FolderInfo, ImportInfo
from ..datetime_utils import datetime_has_tz, datetime_naive_to_local from ..datetime_utils import datetime_has_tz, datetime_naive_to_local
from ..fileutil import FileUtil
from ..personinfo import PersonInfo from ..personinfo import PersonInfo
from ..photoinfo import PhotoInfo from ..photoinfo import PhotoInfo
from ..utils import ( from ..utils import (
@ -102,7 +103,7 @@ class PhotosDB:
# tempfile.TemporaryDirectory gets cleaned up when the object does # tempfile.TemporaryDirectory gets cleaned up when the object does
self._tempdir = tempfile.TemporaryDirectory(prefix="osxphotos_") self._tempdir = tempfile.TemporaryDirectory(prefix="osxphotos_")
self._tempdir_name = self._tempdir.name self._tempdir_name = self._tempdir.name
# set up the data structures used to store all the Photo database info # set up the data structures used to store all the Photo database info
# TODO: I don't think these keywords flags are actually used # TODO: I don't think these keywords flags are actually used
@ -265,8 +266,11 @@ class PhotosDB:
# photoanalysisd sometimes maintains this lock even after Photos is closed # photoanalysisd sometimes maintains this lock even after Photos is closed
# In those cases, make a temp copy of the file for sqlite3 to read # In those cases, make a temp copy of the file for sqlite3 to read
if _db_is_locked(self._dbfile): if _db_is_locked(self._dbfile):
verbose(f"Database locked, creating temporary copy.") try:
self._tmp_db = self._copy_db_file(self._dbfile) self._tmp_db = self._link_db_file(self._dbfile)
except:
verbose(f"Database locked, creating temporary copy.")
self._tmp_db = self._copy_db_file(self._dbfile)
self._db_version = get_db_version(self._tmp_db) self._db_version = get_db_version(self._tmp_db)
@ -281,8 +285,11 @@ class PhotosDB:
verbose(f"Processing database {self._dbfile_actual}") verbose(f"Processing database {self._dbfile_actual}")
# if database is exclusively locked, make a copy of it and use the copy # if database is exclusively locked, make a copy of it and use the copy
if _db_is_locked(self._dbfile_actual): if _db_is_locked(self._dbfile_actual):
verbose(f"Database locked, creating temporary copy.") try:
self._tmp_db = self._copy_db_file(self._dbfile_actual) self._tmp_db = self._link_db_file(self._dbfile_actual)
except:
verbose(f"Database locked, creating temporary copy.")
self._tmp_db = self._copy_db_file(self._dbfile_actual)
if _debug(): if _debug():
logging.debug( logging.debug(
@ -546,6 +553,32 @@ class PhotosDB:
return dest_path return dest_path
def _link_db_file(self, fname):
""" links the sqlite database file to a temp file """
""" returns the name of the temp file """
""" If sqlite shared memory and write-ahead log files exist, those are copied too """
# required because python's sqlite3 implementation can't read a locked file
# _, suffix = os.path.splitext(fname)
dest_name = dest_path = ""
try:
dest_name = pathlib.Path(fname).name
dest_path = os.path.join(self._tempdir_name, dest_name)
FileUtil.hardlink(fname, dest_path)
# link write-ahead log and shared memory files (-wal and -shm) files if they exist
if os.path.exists(f"{fname}-wal"):
FileUtil.hardlink(f"{fname}-wal", f"{dest_path}-wal")
if os.path.exists(f"{fname}-shm"):
FileUtil.hardlink(f"{fname}-shm", f"{dest_path}-shm")
except:
print("Error linking " + fname + " to " + dest_path, file=sys.stderr)
raise Exception
if _debug():
logging.debug(dest_path)
return dest_path
def _process_database4(self): def _process_database4(self):
""" process the Photos database to extract info """ process the Photos database to extract info
works on Photos version <= 4.0 """ works on Photos version <= 4.0 """

114
tests/tempdiskimage.py Normal file
View File

@ -0,0 +1,114 @@
""" Create a temporary disk image on MacOS """
import pathlib
import platform
import subprocess
import tempfile
import time
class TempDiskImage:
""" Create and mount a temporary disk image """
def __init__(self, size=100, prefix=None):
""" Create and mount a temporary disk image.
Args:
size: int; size in MB of disk image, default = 100
prefix: str; optional prefix to prepend to name of the temporary disk image
name: str; name of the mounted volume, default = "TemporaryDiskImage"
Raises:
TypeError if size is not int
RunTimeError if not on MacOS
"""
if type(size) != int:
raise TypeError("size must be int")
system = platform.system()
if system != "Darwin":
raise RuntimeError("TempDiskImage only runs on MacOS")
self._tempdir = tempfile.TemporaryDirectory()
# hacky mktemp: this could create a race condition but unlikely given it's created in a TemporaryDirectory
prefix = "TemporaryDiskImage" if prefix is None else prefix
volume_name = f"{prefix}_{str(time.time()).replace('.','_')}_{str(time.perf_counter()).replace('.','_')}"
image_name = f"{volume_name}.dmg"
image_path = pathlib.Path(self._tempdir.name) / image_name
hdiutil = subprocess.run(
[
"/usr/bin/hdiutil",
"create",
"-size",
f"{size}m",
"-fs",
"HFS+",
"-volname",
volume_name,
image_path,
],
check=True,
text=True,
capture_output=True,
)
if "created" not in hdiutil.stdout:
raise OSError(f"Could not create DMG {image_path}")
self.path = image_path
self._mount_point, self.name = self._mount_image(self.path)
def _mount_image(self, image_path):
""" mount a DMG file and return path, returns (mount_point, path) """
hdiutil = subprocess.run(
["/usr/bin/hdiutil", "attach", image_path],
check=True,
text=True,
capture_output=True,
)
mount_point, path = None, None
for line in hdiutil.stdout.split("\n"):
line = line.strip()
if "Apple_HFS" not in line:
continue
output = line.split()
if len(output) < 3:
raise ValueError(f"Error mounting disk image {image_path}")
mount_point = output[0]
path = output[2]
break
return (mount_point, path)
def unmount(self):
try:
if self._mount_point:
hdiutil = subprocess.run(
["/usr/bin/hdiutil", "detach", self._mount_point],
check=True,
text=True,
capture_output=True,
)
self._mount_point = None
except AttributeError:
pass
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self.unmount()
if exc_type:
return False
if __name__ == "__main__":
# Create a temporary disk image, 50mb in size
img = TempDiskImage(size=50, prefix="MyDiskImage")
# Be sure to unmount it, image will be cleaned up automatically
img.unmount()
# Or use it as a context handler
# Default values are 100mb and prefix = "TemporaryDiskImage"
with TempDiskImage() as img:
print(f"image: {img.path}")
print(f"mounted at: {img.name}")

29
tests/test_link_db.py Normal file
View File

@ -0,0 +1,29 @@
""" Test PhotosDB._link_db_file """
import pytest
from tempdiskimage import TempDiskImage
PHOTOS_DB = "tests/Test-Movie-5_0.photoslibrary"
def test_link_db(capsys):
""" Test that database doesn't get copied when opened """
import osxphotos
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB, verbose=print)
captured = capsys.readouterr()
assert "creating temporary copy" not in captured.out
def test_copy_db(capsys):
""" Test that database does get copied if on different filesystem """
import pathlib
import tempfile
import osxphotos
from osxphotos.fileutil import FileUtil
with TempDiskImage(prefix="osxphotos") as tmpimg:
newdb = pathlib.Path(tmpimg.name) / pathlib.Path(PHOTOS_DB).name
FileUtil.copy(PHOTOS_DB,newdb)
photosdb = osxphotos.PhotosDB(dbfile=newdb, verbose=print)
captured = capsys.readouterr()
assert "creating temporary copy" in captured.out