Avoid copying db files if not necessary

This commit is contained in:
Rhet Turnbull 2020-11-11 07:03:57 -08:00
parent 38397b507b
commit ea9b41bae4
14 changed files with 183 additions and 12 deletions

View File

@ -1,4 +1,4 @@
""" version info """
__version__ = "0.36.14"
__version__ = "0.36.15"

View File

@ -1,6 +1,5 @@
""" FileUtil class with methods for copy, hardlink, unlink, etc. """
import logging
import os
import pathlib
import stat
@ -74,7 +73,6 @@ class FileUtilMacOS(FileUtilABC):
try:
os.link(src, dest)
except Exception as e:
logging.critical(f"os.link returned error: {e}")
raise e
@classmethod
@ -92,7 +90,7 @@ class FileUtilMacOS(FileUtilABC):
if src is None or dest is None:
raise ValueError("src and dest must not be None", src, dest)
if not os.path.isfile(src):
if not os.path.exists(src):
raise FileNotFoundError("src file does not appear to exist", src)
if norsrc:
@ -104,9 +102,6 @@ class FileUtilMacOS(FileUtilABC):
try:
result = subprocess.run(command, check=True, stderr=subprocess.PIPE)
except subprocess.CalledProcessError as e:
logging.critical(
f"ditto returned error: {e.returncode} {e.stderr.decode(sys.getfilesystemencoding()).rstrip()}"
)
raise e
return result.returncode

View File

@ -35,6 +35,7 @@ from .._constants import (
from .._version import __version__
from ..albuminfo import AlbumInfo, FolderInfo, ImportInfo
from ..datetime_utils import datetime_has_tz, datetime_naive_to_local
from ..fileutil import FileUtil
from ..personinfo import PersonInfo
from ..photoinfo import PhotoInfo
from ..utils import (
@ -102,7 +103,7 @@ class PhotosDB:
# tempfile.TemporaryDirectory gets cleaned up when the object does
self._tempdir = tempfile.TemporaryDirectory(prefix="osxphotos_")
self._tempdir_name = self._tempdir.name
# set up the data structures used to store all the Photo database info
# TODO: I don't think these keywords flags are actually used
@ -265,8 +266,11 @@ class PhotosDB:
# photoanalysisd sometimes maintains this lock even after Photos is closed
# In those cases, make a temp copy of the file for sqlite3 to read
if _db_is_locked(self._dbfile):
verbose(f"Database locked, creating temporary copy.")
self._tmp_db = self._copy_db_file(self._dbfile)
try:
self._tmp_db = self._link_db_file(self._dbfile)
except:
verbose(f"Database locked, creating temporary copy.")
self._tmp_db = self._copy_db_file(self._dbfile)
self._db_version = get_db_version(self._tmp_db)
@ -281,8 +285,11 @@ class PhotosDB:
verbose(f"Processing database {self._dbfile_actual}")
# if database is exclusively locked, make a copy of it and use the copy
if _db_is_locked(self._dbfile_actual):
verbose(f"Database locked, creating temporary copy.")
self._tmp_db = self._copy_db_file(self._dbfile_actual)
try:
self._tmp_db = self._link_db_file(self._dbfile_actual)
except:
verbose(f"Database locked, creating temporary copy.")
self._tmp_db = self._copy_db_file(self._dbfile_actual)
if _debug():
logging.debug(
@ -546,6 +553,32 @@ class PhotosDB:
return dest_path
def _link_db_file(self, fname):
""" links the sqlite database file to a temp file """
""" returns the name of the temp file """
""" If sqlite shared memory and write-ahead log files exist, those are copied too """
# required because python's sqlite3 implementation can't read a locked file
# _, suffix = os.path.splitext(fname)
dest_name = dest_path = ""
try:
dest_name = pathlib.Path(fname).name
dest_path = os.path.join(self._tempdir_name, dest_name)
FileUtil.hardlink(fname, dest_path)
# link write-ahead log and shared memory files (-wal and -shm) files if they exist
if os.path.exists(f"{fname}-wal"):
FileUtil.hardlink(f"{fname}-wal", f"{dest_path}-wal")
if os.path.exists(f"{fname}-shm"):
FileUtil.hardlink(f"{fname}-shm", f"{dest_path}-shm")
except:
print("Error linking " + fname + " to " + dest_path, file=sys.stderr)
raise Exception
if _debug():
logging.debug(dest_path)
return dest_path
def _process_database4(self):
""" process the Photos database to extract info
works on Photos version <= 4.0 """

114
tests/tempdiskimage.py Normal file
View File

@ -0,0 +1,114 @@
""" Create a temporary disk image on MacOS """
import pathlib
import platform
import subprocess
import tempfile
import time
class TempDiskImage:
""" Create and mount a temporary disk image """
def __init__(self, size=100, prefix=None):
""" Create and mount a temporary disk image.
Args:
size: int; size in MB of disk image, default = 100
prefix: str; optional prefix to prepend to name of the temporary disk image
name: str; name of the mounted volume, default = "TemporaryDiskImage"
Raises:
TypeError if size is not int
RunTimeError if not on MacOS
"""
if type(size) != int:
raise TypeError("size must be int")
system = platform.system()
if system != "Darwin":
raise RuntimeError("TempDiskImage only runs on MacOS")
self._tempdir = tempfile.TemporaryDirectory()
# hacky mktemp: this could create a race condition but unlikely given it's created in a TemporaryDirectory
prefix = "TemporaryDiskImage" if prefix is None else prefix
volume_name = f"{prefix}_{str(time.time()).replace('.','_')}_{str(time.perf_counter()).replace('.','_')}"
image_name = f"{volume_name}.dmg"
image_path = pathlib.Path(self._tempdir.name) / image_name
hdiutil = subprocess.run(
[
"/usr/bin/hdiutil",
"create",
"-size",
f"{size}m",
"-fs",
"HFS+",
"-volname",
volume_name,
image_path,
],
check=True,
text=True,
capture_output=True,
)
if "created" not in hdiutil.stdout:
raise OSError(f"Could not create DMG {image_path}")
self.path = image_path
self._mount_point, self.name = self._mount_image(self.path)
def _mount_image(self, image_path):
""" mount a DMG file and return path, returns (mount_point, path) """
hdiutil = subprocess.run(
["/usr/bin/hdiutil", "attach", image_path],
check=True,
text=True,
capture_output=True,
)
mount_point, path = None, None
for line in hdiutil.stdout.split("\n"):
line = line.strip()
if "Apple_HFS" not in line:
continue
output = line.split()
if len(output) < 3:
raise ValueError(f"Error mounting disk image {image_path}")
mount_point = output[0]
path = output[2]
break
return (mount_point, path)
def unmount(self):
try:
if self._mount_point:
hdiutil = subprocess.run(
["/usr/bin/hdiutil", "detach", self._mount_point],
check=True,
text=True,
capture_output=True,
)
self._mount_point = None
except AttributeError:
pass
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self.unmount()
if exc_type:
return False
if __name__ == "__main__":
# Create a temporary disk image, 50mb in size
img = TempDiskImage(size=50, prefix="MyDiskImage")
# Be sure to unmount it, image will be cleaned up automatically
img.unmount()
# Or use it as a context handler
# Default values are 100mb and prefix = "TemporaryDiskImage"
with TempDiskImage() as img:
print(f"image: {img.path}")
print(f"mounted at: {img.name}")

29
tests/test_link_db.py Normal file
View File

@ -0,0 +1,29 @@
""" Test PhotosDB._link_db_file """
import pytest
from tempdiskimage import TempDiskImage
PHOTOS_DB = "tests/Test-Movie-5_0.photoslibrary"
def test_link_db(capsys):
""" Test that database doesn't get copied when opened """
import osxphotos
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB, verbose=print)
captured = capsys.readouterr()
assert "creating temporary copy" not in captured.out
def test_copy_db(capsys):
""" Test that database does get copied if on different filesystem """
import pathlib
import tempfile
import osxphotos
from osxphotos.fileutil import FileUtil
with TempDiskImage(prefix="osxphotos") as tmpimg:
newdb = pathlib.Path(tmpimg.name) / pathlib.Path(PHOTOS_DB).name
FileUtil.copy(PHOTOS_DB,newdb)
photosdb = osxphotos.PhotosDB(dbfile=newdb, verbose=print)
captured = capsys.readouterr()
assert "creating temporary copy" in captured.out