Added PhotoInfo.duplicates

This commit is contained in:
Rhet Turnbull 2021-06-01 17:32:43 -07:00
parent 99f4394f8e
commit 7accfdb066
22 changed files with 122 additions and 73 deletions

View File

@ -1497,7 +1497,7 @@ Substitution Description
{lf} A line feed: '\n', alias for {newline}
{cr} A carriage return: '\r'
{crlf} a carriage return + line feed: '\r\n'
{osxphotos_version} The osxphotos version, e.g. '0.42.27'
{osxphotos_version} The osxphotos version, e.g. '0.42.28'
{osxphotos_cmd_line} The full command line used to run osxphotos
The following substitutions may result in multiple values. Thus if specified for
@ -2388,6 +2388,9 @@ Returns a [ScoreInfo](#scoreinfo) data class object which provides access to the
**Note**: Valid only for Photos 5; returns None for earlier Photos versions.
#### `duplicates`
Returns list of PhotoInfo objects for *possible* duplicates or empty list if no matching duplicates. Photos are considered possible duplicates if the photo's original file size, date created, height, and width match another those of another photo. This does not do a byte-for-byte comparison or compute a hash which makes it fast and allows for identification of possible duplicates even if originals are not downloaded from iCloud. The signature-based approach should be robust enough to match duplicates created either through the "duplicate photo" menu item or imported twice into the library but you should not rely on this 100% for identification of all duplicates.
#### `json()`
Returns a JSON representation of all photo info.
@ -3191,7 +3194,7 @@ The following template field substitutions are availabe for use the templating s
|{lf}|A line feed: '\n', alias for {newline}|
|{cr}|A carriage return: '\r'|
|{crlf}|a carriage return + line feed: '\r\n'|
|{osxphotos_version}|The osxphotos version, e.g. '0.42.27'|
|{osxphotos_version}|The osxphotos version, e.g. '0.42.28'|
|{osxphotos_cmd_line}|The full command line used to run osxphotos|
|{album}|Album(s) photo is contained in|
|{folder_album}|Folder path + album photo is contained in. e.g. 'Folder/Subfolder/Album' or just 'Album' if no enclosing folder|

View File

@ -1,3 +1,3 @@
""" version info """
__version__ = "0.42.27"
__version__ = "0.42.28"

View File

@ -998,6 +998,21 @@ class PhotoInfo:
""" returns filesize of original photo in bytes as int """
return self._info["original_filesize"]
@property
def duplicates(self):
""" return list of PhotoInfo objects for possible duplicates (matching signature of original size, date, height, width) or empty list if no matching duplicates """
signature = self._db._duplicate_signature(self.uuid)
duplicates = []
try:
for uuid in self._db._db_signatures[signature]:
if uuid != self.uuid:
# found a possible duplicate
duplicates.append(self._db.get_photo(uuid))
except KeyError:
# don't expect this to happen as the signature should be in db
logging.warning(f"Did not find signature for {self.uuid} in _db_signatures")
return duplicates
def render_template(
self,
template_str,

View File

@ -240,6 +240,10 @@ class PhotosDB:
# Will hold the primary key of root folder
self._folder_root_pk = None
# Dict to hold signatures for finding possible duplicates
# key is tuple of (original_filesize, date) and value is list of uuids that match that signature
self._db_signatures = {}
if _debug():
logging.debug(f"dbfile = {dbfile}")
@ -1180,6 +1184,13 @@ class PhotosDB:
self._dbphotos[uuid]["import_uuid"] = row[44]
self._dbphotos[uuid]["fok_import_session"] = None
# compute signatures for finding possible duplicates
signature = self._duplicate_signature(uuid)
try:
self._db_signatures[signature].append(uuid)
except KeyError:
self._db_signatures[signature] = [uuid]
# get additional details from RKMaster, needed for RAW processing
verbose("Processing additional photo details.")
c.execute(
@ -2145,6 +2156,13 @@ class PhotosDB:
self._dbphotos[uuid] = info
# compute signatures for finding possible duplicates
signature = self._duplicate_signature(uuid)
try:
self._db_signatures[signature].append(uuid)
except KeyError:
self._db_signatures[signature] = [uuid]
# # if row[19] is not None and ((row[20] == 2) or (row[20] == 4)):
# # burst photo
# if row[19] is not None:
@ -3209,6 +3227,17 @@ class PhotosDB:
return photos
def _duplicate_signature(self, uuid):
""" Compute a signature for finding possible duplicates """
return (
self._dbphotos[uuid]["original_filesize"],
self._dbphotos[uuid]["imageDate"],
self._dbphotos[uuid]["height"],
self._dbphotos[uuid]["width"],
self._dbphotos[uuid]["UTI"],
self._dbphotos[uuid]["hasAdjustments"],
)
def __repr__(self):
return f"osxphotos.{self.__class__.__name__}(dbfile='{self.db_path}')"

View File

@ -5,7 +5,7 @@
<key>LithiumMessageTracer</key>
<dict>
<key>LastReportedDate</key>
<date>2020-04-17T18:39:50Z</date>
<date>2021-06-01T17:42:08Z</date>
</dict>
<key>PXPeopleScreenUnlocked</key>
<true/>

View File

@ -11,6 +11,6 @@
<key>PLLastRevGeoForcedProviderOutOfDateCheckVersionKey</key>
<integer>1</integer>
<key>PLLastRevGeoVerFileFetchDateKey</key>
<date>2020-04-17T18:39:52Z</date>
<date>2021-06-01T17:42:08Z</date>
</dict>
</plist>

View File

@ -3,7 +3,7 @@
<plist version="1.0">
<dict>
<key>LastHistoryRowId</key>
<integer>502</integer>
<integer>517</integer>
<key>LibraryBuildTag</key>
<string>E3E46F2A-7168-4973-AB3E-5848F80BFC7D</string>
<key>LibrarySchemaVersion</key>

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 157 KiB

View File

@ -1,5 +1,9 @@
import collections
import datetime
import pytest
import osxphotos
from osxphotos._constants import _UNKNOWN_PERSON
PHOTOS_DB = "./tests/Test-10.12.6.photoslibrary/database/photos.db"
@ -18,8 +22,8 @@ PERSONS = ["Katie", "Suzy", "Maria", _UNKNOWN_PERSON]
ALBUMS = ["Pumpkin Farm", "AlbumInFolder"]
KEYWORDS_DICT = {
"Kids": 4,
"wedding": 2,
"flowers": 1,
"wedding": 3,
"flowers": 2,
"England": 1,
"London": 1,
"London 2018": 1,
@ -30,83 +34,64 @@ KEYWORDS_DICT = {
PERSONS_DICT = {"Katie": 3, "Suzy": 2, "Maria": 1, _UNKNOWN_PERSON: 1}
ALBUM_DICT = {"Pumpkin Farm": 3, "AlbumInFolder": 1}
UUID_DICT = {"derivatives": "FPm+ICxpQV+LPBKR22UepA"}
UUID_DICT = {
"derivatives": "FPm+ICxpQV+LPBKR22UepA",
"no_duplicates": "FPm+ICxpQV+LPBKR22UepA",
"duplicates": "HWsxlzxlQ++1TUPg2XNUgg",
}
UUID_DUPLICATE = "VwOUaFMlSry5+51f6q8uyw"
def test_init():
import osxphotos
@pytest.fixture(scope="module")
def photosdb():
return osxphotos.PhotosDB(dbfile=PHOTOS_DB)
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
def test_init(photosdb):
assert isinstance(photosdb, osxphotos.PhotosDB)
def test_db_version():
import osxphotos
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
def test_db_version(photosdb):
# assert photosdb.db_version in osxphotos._TESTED_DB_VERSIONS
assert photosdb.db_version == "2622"
def test_persons():
import osxphotos
import collections
def test_persons(photosdb):
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
assert "Katie" in photosdb.persons
assert collections.Counter(PERSONS) == collections.Counter(photosdb.persons)
def test_keywords():
import osxphotos
import collections
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
def test_keywords(photosdb):
assert "wedding" in photosdb.keywords
assert collections.Counter(KEYWORDS) == collections.Counter(photosdb.keywords)
def test_album_names():
import osxphotos
import collections
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
def test_album_names(photosdb):
assert "Pumpkin Farm" in photosdb.albums
assert collections.Counter(ALBUMS) == collections.Counter(photosdb.albums)
def test_keywords_dict():
import osxphotos
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
def test_keywords_dict(photosdb):
keywords = photosdb.keywords_as_dict
assert keywords["wedding"] == 2
assert keywords["wedding"] == 3
assert keywords == KEYWORDS_DICT
def test_persons_as_dict():
import osxphotos
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
def test_persons_as_dict(photosdb):
persons = photosdb.persons_as_dict
assert persons["Maria"] == 1
assert persons == PERSONS_DICT
def test_albums_as_dict():
import osxphotos
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
def test_albums_as_dict(photosdb):
albums = photosdb.albums_as_dict
assert albums["Pumpkin Farm"] == 3
assert albums == ALBUM_DICT
def test_attributes():
import datetime
import osxphotos
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
def test_attributes(photosdb):
photos = photosdb.photos(uuid=["sE5LlfekS8ykEE7o0cuMVA"])
assert len(photos) == 1
p = photos[0]
@ -126,38 +111,25 @@ def test_attributes():
assert p.ismissing == False
def test_missing():
import osxphotos
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
def test_missing(photosdb):
photos = photosdb.photos(uuid=["Pj99JmYjQkeezdY2OFuSaw"])
assert len(photos) == 1
p = photos[0]
assert p.path == None
assert p.path is None
assert p.ismissing == True
def test_count():
import osxphotos
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
def test_count(photosdb):
photos = photosdb.photos()
assert len(photos) == 9
assert len(photos) == 10
def test_keyword_2():
import osxphotos
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
def test_keyword_2(photosdb):
photos = photosdb.photos(keywords=["wedding"])
assert len(photos) == 2
assert len(photos) == 3
def test_keyword_not_in_album():
import osxphotos
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
def test_keyword_not_in_album(photosdb):
# find all photos with keyword "Kids" not in the album "Pumpkin Farm"
photos1 = photosdb.photos(albums=["Pumpkin Farm"])
photos2 = photosdb.photos(keywords=["Kids"])
@ -166,12 +138,8 @@ def test_keyword_not_in_album():
assert photos3[0].uuid == "Pj99JmYjQkeezdY2OFuSaw"
def test_path_derivatives():
def test_path_derivatives(photosdb):
# test path_derivatives
import osxphotos
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
photos = photosdb.photos(uuid=[UUID_DICT["derivatives"]])
p = photos[0]
derivs = [
@ -180,3 +148,18 @@ def test_path_derivatives():
]
for i, p in enumerate(p.path_derivatives):
assert p.endswith(derivs[i])
def test_duplicates_1(photosdb):
# test photo has duplicates
photo = photosdb.get_photo(uuid=UUID_DICT["duplicates"])
assert len(photo.duplicates) == 1
assert photo.duplicates[0].uuid == UUID_DUPLICATE
def test_duplicates_2(photosdb):
# test photo does not have duplicates
photo = photosdb.get_photo(uuid=UUID_DICT["no_duplicates"])
assert not photo.duplicates

View File

@ -111,6 +111,7 @@ UUID_DICT = {
"import_session": "8846E3E6-8AC8-4857-8448-E3D025784410",
"movie": "D1359D09-1373-4F3B-B0E3-1A4DE573E4A3",
"description_newlines": "7F74DD34-5920-4DA3-B284-479887A34F66",
"no_duplicates": "E9BC5C36-7CD1-40A1-A72B-8B8FAC227D51",
}
UUID_DICT_LOCAL = {
@ -217,6 +218,8 @@ ORIGINAL_FILENAME_DICT = {
UUID_IS_REFERENCE = "A1DD1F98-2ECD-431F-9AC9-5AFEFE2D3A5C"
UUID_NOT_REFERENCE = "F12384F6-CD17-4151-ACBA-AE0E3688539E"
UUID_DUPLICATE = ""
@pytest.fixture(scope="module")
def photosdb():
@ -1347,3 +1350,19 @@ def test_exiftool_newlines_in_description(photosdb):
exif = photo._exiftool_dict()
assert photo.description.find("\n") > 0
assert exif["EXIF:ImageDescription"].find("\n") == -1
@pytest.mark.skip(SKIP_TEST, reason="Not yet implemented")
def test_duplicates_1(photosdb):
# test photo has duplicates
photo = photosdb.get_photo(uuid=UUID_DICT["duplicates"])
assert len(photo.duplicates) == 1
assert photo.duplicates[0].uuid == UUID_DUPLICATE
def test_duplicates_2(photosdb):
# test photo does not have duplicates
photo = photosdb.get_photo(uuid=UUID_DICT["no_duplicates"])
assert not photo.duplicates