Added --duplicate flag to find possible duplicates

This commit is contained in:
Rhet Turnbull
2021-06-12 18:31:53 -07:00
parent 6a0b8b4a3f
commit 83892e096a
37 changed files with 192 additions and 50 deletions

View File

@@ -9,12 +9,10 @@ import pathlib
import pprint
import sys
import time
import unicodedata
import bitmath
import click
import osxmetadata
import photoscript
import yaml
import osxphotos
@@ -458,6 +456,14 @@ def QUERY_OPTIONS(f):
is_flag=True,
help="Search for photos that are not in any albums.",
),
o(
"--duplicate",
is_flag=True,
help="Search for photos with possible duplicates. osxphotos will compare signatures of photos, "
"evaluating date created, size, height, width, and edited status to find *possible* duplicates. "
"This does not compare images byte-for-byte nor compare hashes but should find photos imported multiple "
"times or duplicated within Photos."
),
o(
"--min-size",
metavar="SIZE",
@@ -1067,6 +1073,7 @@ def export(
max_size,
regex,
query_eval,
duplicate,
):
"""Export photos from the Photos database.
Export path DEST is required.
@@ -1221,6 +1228,7 @@ def export(
max_size = cfg.max_size
regex = cfg.regex
query_eval = cfg.query_eval
duplicate = cfg.duplicate
# config file might have changed verbose
VERBOSE = bool(verbose)
@@ -1526,6 +1534,7 @@ def export(
max_size=max_size,
regex=regex,
query_eval=query_eval,
duplicate=duplicate,
)
try:
@@ -1891,6 +1900,7 @@ def query(
is_reference,
in_album,
not_in_album,
duplicate,
min_size,
max_size,
regex,
@@ -1926,6 +1936,7 @@ def query(
min_size,
max_size,
regex,
duplicate,
]
exclusive = [
(favorite, not_favorite),
@@ -2051,6 +2062,7 @@ def query(
max_size=max_size,
query_eval=query_eval,
regex=regex,
duplicate=duplicate
)
try:

View File

@@ -1,7 +1,10 @@
""" PhotosAlbum class to create an album in default Photos library and add photos to it """
from typing import Optional, List
from typing import List, Optional
import photoscript
from more_itertools import chunked
from .photoinfo import PhotoInfo
from .utils import noop
@@ -27,7 +30,8 @@ class PhotosAlbum:
def add_list(self, photo_list: List[PhotoInfo]):
photos = [photoscript.Photo(p.uuid) for p in photo_list]
self.album.add(photos)
for photolist in chunked(photos, 10):
self.album.add(photolist)
photo_len = len(photos)
photo_word = "photos" if photo_len > 1 else "photo"
self.verbose(f"Added {photo_len} {photo_word} to album {self.name}")

View File

@@ -11,6 +11,7 @@ import platform
import re
import sys
import tempfile
from collections import OrderedDict
from datetime import datetime, timedelta, timezone
from pprint import pformat
from typing import List
@@ -65,17 +66,17 @@ class PhotosDB:
"""Processes a Photos.app library database to extract information about photos"""
# import additional methods
from ._photosdb_process_comments import _process_comments
from ._photosdb_process_exif import _process_exifinfo
from ._photosdb_process_faceinfo import _process_faceinfo
from ._photosdb_process_scoreinfo import _process_scoreinfo
from ._photosdb_process_searchinfo import (
_process_searchinfo,
labels,
labels_normalized,
labels_as_dict,
labels_normalized,
labels_normalized_as_dict,
)
from ._photosdb_process_scoreinfo import _process_scoreinfo
from ._photosdb_process_comments import _process_comments
def __init__(self, dbfile=None, verbose=None, exiftool=None):
"""Create a new PhotosDB object.
@@ -3225,6 +3226,32 @@ class PhotosDB:
except Exception as e:
raise ValueError(f"Invalid query_eval CRITERIA: {e}")
if options.duplicate:
no_date = datetime(1970, 1, 1)
tz = timezone(timedelta(0))
no_date = no_date.astimezone(tz=tz)
photos = sorted(
[p for p in photos if p.duplicates],
key=lambda x: x.date_added or no_date,
)
# gather all duplicates but ensure each uuid is only represented once
photodict = OrderedDict()
for p in photos:
if p.uuid not in photodict:
photodict[p.uuid] = p
for d in sorted(
p.duplicates, key=lambda x: x.date_added or no_date
):
if d.uuid not in photodict:
photodict[d.uuid] = d
photos = list(photodict.values())
# filter for deleted as photo.duplicates will include photos in the trash
if not (options.deleted or options.deleted_only):
photos = [p for p in photos if not p.intrash]
if options.deleted_only:
photos = [p for p in photos if p.intrash]
return photos
def _duplicate_signature(self, uuid):

View File

@@ -1,6 +1,6 @@
""" QueryOptions class for PhotosDB.query """
from dataclasses import dataclass
from dataclasses import dataclass, asdict
from typing import Optional, Iterable, Tuple
import datetime
import bitmath
@@ -30,7 +30,7 @@ class QueryOptions:
shared: Optional[bool] = None
not_shared: Optional[bool] = None
photos: Optional[bool] = True
movies: Optional[bool] = True
movies: Optional[bool] = True
uti: Optional[Iterable[str]] = None
burst: Optional[bool] = None
not_burst: Optional[bool] = None
@@ -78,6 +78,7 @@ class QueryOptions:
max_size: Optional[bitmath.Byte] = None
regex: Optional[Iterable[Tuple[str, str]]] = None
query_eval: Optional[Iterable[str]] = None
duplicate: Optional[bool] = None
def asdict(self):
return asdict(self)