osxphotos/examples/find_bad_extensions.py
2022-12-28 22:58:16 -07:00

152 lines
5.0 KiB
Python

"""Scan Photos library to find photos with bad (incorrect) file extensions.
This can be run with osxphotos via: osxphotos run find_bad_extensions.py
For help, run: osxphotos run find_bad_extensions.py --help
"""
from __future__ import annotations
import csv
import json
import os
import pathlib
import sys
import click
from rich import print
from osxphotos import PhotoInfo, PhotosDB
from osxphotos.cli.common import get_data_dir
from osxphotos.exiftool import ExifTool, get_exiftool_path
from osxphotos.sqlitekvstore import SQLiteKVStore
def check_extension(filepath: str) -> tuple[bool, str, str]:
"""Check if file extension is correct for image file using exiftool
Args:
filepath: path to file to check
Returns: tuple of (bool, str, str) where bool is True if extension is correct, False if not
and str, str is the current extension, correct extension or current extension if correct
"""
filepath = pathlib.Path(filepath)
current_ext = filepath.suffix.lower()
current_ext = current_ext[1:] if current_ext else "" # remove leading dot
exiftool = ExifTool(filepath)
correct_ext = exiftool.asdict().get("File:FileTypeExtension").lower()
if current_ext != correct_ext:
# there are some extensions that have more than one valid extension
# there are likely more but these are the ones I've seen so far
is_correct = (
current_ext in ("jpg", "jpeg") and correct_ext in ("jpg", "jpeg")
) or (current_ext in ("tif", "tiff") and correct_ext in ("tif", "tiff"))
else:
is_correct = True
return is_correct, current_ext, correct_ext
def check_photo(
photo: PhotoInfo, recheck: bool, version: str, kvstore: SQLiteKVStore
) -> None:
"""Check PhotoInfo for correct extension
Args:
photo: PhotoInfo instance
recheck: if True, recheck even if previously checked
version: "original" or "edited"
kvstore: SQLiteKVStore instance to store results
"""
photo_path = photo.path if version == "original" else photo.path_edited
if photo_path is None:
print(
f":warning-emoji: [yellow]No {version} path for photo: {photo.original_filename} ({photo.uuid})",
file=sys.stderr,
)
return
if recheck or f"{photo.uuid}:{version}" not in kvstore:
is_correct, current_ext, correct_ext = check_extension(photo_path)
if not is_correct:
print(
f"{photo.original_filename} ({version}) has incorrect extension: [red]{current_ext}[/] should be [green]{correct_ext}[/]",
file=sys.stderr,
)
# output results as CSV to stdout
csv.writer(sys.stdout).writerow(
[
photo.uuid,
photo.original_filename,
version,
current_ext,
correct_ext,
photo_path,
]
)
kvstore[f"{photo.uuid}:{version}"] = (is_correct, current_ext, correct_ext)
@click.command()
@click.option(
"--library",
default=None,
type=click.Path(exists=True, file_okay=True, dir_okay=True),
help="Path to Photos library to use. Default is to use default Photos library.",
)
@click.option(
"--recheck",
is_flag=True,
help="Recheck all files even if previously checked and cached.",
)
@click.option(
"--edited",
is_flag=True,
help="Check edited versions of photos in addition to originals.",
)
def main(library: str, recheck: bool, edited: bool):
"""Scan Photos library to find photos with bad (incorrect) file extensions.
This can be run with osxphotos via: `osxphotos run find_bad_extensions.py`
Both STDOUT and STDERR are used to output results.
STDOUT is used to output a CSV file with the following columns:
uuid, original_filename, version, current_extension, correct_extension, path
Thus, to save the results to a file, run:
osxphotos run find_bad_extensions.py > results.csv
"""
# exiftool required to run
try:
get_exiftool_path()
except FileNotFoundError as e:
print(
":cross_mark-emoji: [red]Could not find exiftool. Please download and install"
" from https://exiftool.org/",
file=sys.stderr,
)
raise click.Abort() from e
# path to the cache database to store results of extension check
cache_db_path = os.path.join(get_data_dir(), "bad_extensions.db")
kvstore = SQLiteKVStore(
cache_db_path, wal=True, serialize=json.dumps, deserialize=json.loads
)
kvstore.about = "osxphotos bad extensions cache"
print(f"Using cache database: [blue]{cache_db_path}", file=sys.stderr)
# load the Photos database and check each photo
photosdb = PhotosDB(dbfile=library)
for photo in photosdb.photos():
check_photo(photo, recheck, "original", kvstore)
if edited and photo.hasadjustments:
check_photo(photo, recheck, "edited", kvstore)
if __name__ == "__main__":
main()