Added PhotoInfo.duplicates

This commit is contained in:
Rhet Turnbull 2021-06-01 17:32:43 -07:00
parent 99f4394f8e
commit 7accfdb066
22 changed files with 122 additions and 73 deletions

View File

@ -1497,7 +1497,7 @@ Substitution Description
{lf} A line feed: '\n', alias for {newline} {lf} A line feed: '\n', alias for {newline}
{cr} A carriage return: '\r' {cr} A carriage return: '\r'
{crlf} a carriage return + line feed: '\r\n' {crlf} a carriage return + line feed: '\r\n'
{osxphotos_version} The osxphotos version, e.g. '0.42.27' {osxphotos_version} The osxphotos version, e.g. '0.42.28'
{osxphotos_cmd_line} The full command line used to run osxphotos {osxphotos_cmd_line} The full command line used to run osxphotos
The following substitutions may result in multiple values. Thus if specified for The following substitutions may result in multiple values. Thus if specified for
@ -2388,6 +2388,9 @@ Returns a [ScoreInfo](#scoreinfo) data class object which provides access to the
**Note**: Valid only for Photos 5; returns None for earlier Photos versions. **Note**: Valid only for Photos 5; returns None for earlier Photos versions.
#### `duplicates`
Returns list of PhotoInfo objects for *possible* duplicates or empty list if no matching duplicates. Photos are considered possible duplicates if the photo's original file size, date created, height, and width match another those of another photo. This does not do a byte-for-byte comparison or compute a hash which makes it fast and allows for identification of possible duplicates even if originals are not downloaded from iCloud. The signature-based approach should be robust enough to match duplicates created either through the "duplicate photo" menu item or imported twice into the library but you should not rely on this 100% for identification of all duplicates.
#### `json()` #### `json()`
Returns a JSON representation of all photo info. Returns a JSON representation of all photo info.
@ -3191,7 +3194,7 @@ The following template field substitutions are availabe for use the templating s
|{lf}|A line feed: '\n', alias for {newline}| |{lf}|A line feed: '\n', alias for {newline}|
|{cr}|A carriage return: '\r'| |{cr}|A carriage return: '\r'|
|{crlf}|a carriage return + line feed: '\r\n'| |{crlf}|a carriage return + line feed: '\r\n'|
|{osxphotos_version}|The osxphotos version, e.g. '0.42.27'| |{osxphotos_version}|The osxphotos version, e.g. '0.42.28'|
|{osxphotos_cmd_line}|The full command line used to run osxphotos| |{osxphotos_cmd_line}|The full command line used to run osxphotos|
|{album}|Album(s) photo is contained in| |{album}|Album(s) photo is contained in|
|{folder_album}|Folder path + album photo is contained in. e.g. 'Folder/Subfolder/Album' or just 'Album' if no enclosing folder| |{folder_album}|Folder path + album photo is contained in. e.g. 'Folder/Subfolder/Album' or just 'Album' if no enclosing folder|

View File

@ -1,3 +1,3 @@
""" version info """ """ version info """
__version__ = "0.42.27" __version__ = "0.42.28"

View File

@ -998,6 +998,21 @@ class PhotoInfo:
""" returns filesize of original photo in bytes as int """ """ returns filesize of original photo in bytes as int """
return self._info["original_filesize"] return self._info["original_filesize"]
@property
def duplicates(self):
""" return list of PhotoInfo objects for possible duplicates (matching signature of original size, date, height, width) or empty list if no matching duplicates """
signature = self._db._duplicate_signature(self.uuid)
duplicates = []
try:
for uuid in self._db._db_signatures[signature]:
if uuid != self.uuid:
# found a possible duplicate
duplicates.append(self._db.get_photo(uuid))
except KeyError:
# don't expect this to happen as the signature should be in db
logging.warning(f"Did not find signature for {self.uuid} in _db_signatures")
return duplicates
def render_template( def render_template(
self, self,
template_str, template_str,

View File

@ -240,6 +240,10 @@ class PhotosDB:
# Will hold the primary key of root folder # Will hold the primary key of root folder
self._folder_root_pk = None self._folder_root_pk = None
# Dict to hold signatures for finding possible duplicates
# key is tuple of (original_filesize, date) and value is list of uuids that match that signature
self._db_signatures = {}
if _debug(): if _debug():
logging.debug(f"dbfile = {dbfile}") logging.debug(f"dbfile = {dbfile}")
@ -1180,6 +1184,13 @@ class PhotosDB:
self._dbphotos[uuid]["import_uuid"] = row[44] self._dbphotos[uuid]["import_uuid"] = row[44]
self._dbphotos[uuid]["fok_import_session"] = None self._dbphotos[uuid]["fok_import_session"] = None
# compute signatures for finding possible duplicates
signature = self._duplicate_signature(uuid)
try:
self._db_signatures[signature].append(uuid)
except KeyError:
self._db_signatures[signature] = [uuid]
# get additional details from RKMaster, needed for RAW processing # get additional details from RKMaster, needed for RAW processing
verbose("Processing additional photo details.") verbose("Processing additional photo details.")
c.execute( c.execute(
@ -2145,6 +2156,13 @@ class PhotosDB:
self._dbphotos[uuid] = info self._dbphotos[uuid] = info
# compute signatures for finding possible duplicates
signature = self._duplicate_signature(uuid)
try:
self._db_signatures[signature].append(uuid)
except KeyError:
self._db_signatures[signature] = [uuid]
# # if row[19] is not None and ((row[20] == 2) or (row[20] == 4)): # # if row[19] is not None and ((row[20] == 2) or (row[20] == 4)):
# # burst photo # # burst photo
# if row[19] is not None: # if row[19] is not None:
@ -3209,6 +3227,17 @@ class PhotosDB:
return photos return photos
def _duplicate_signature(self, uuid):
""" Compute a signature for finding possible duplicates """
return (
self._dbphotos[uuid]["original_filesize"],
self._dbphotos[uuid]["imageDate"],
self._dbphotos[uuid]["height"],
self._dbphotos[uuid]["width"],
self._dbphotos[uuid]["UTI"],
self._dbphotos[uuid]["hasAdjustments"],
)
def __repr__(self): def __repr__(self):
return f"osxphotos.{self.__class__.__name__}(dbfile='{self.db_path}')" return f"osxphotos.{self.__class__.__name__}(dbfile='{self.db_path}')"

View File

@ -5,7 +5,7 @@
<key>LithiumMessageTracer</key> <key>LithiumMessageTracer</key>
<dict> <dict>
<key>LastReportedDate</key> <key>LastReportedDate</key>
<date>2020-04-17T18:39:50Z</date> <date>2021-06-01T17:42:08Z</date>
</dict> </dict>
<key>PXPeopleScreenUnlocked</key> <key>PXPeopleScreenUnlocked</key>
<true/> <true/>

View File

@ -11,6 +11,6 @@
<key>PLLastRevGeoForcedProviderOutOfDateCheckVersionKey</key> <key>PLLastRevGeoForcedProviderOutOfDateCheckVersionKey</key>
<integer>1</integer> <integer>1</integer>
<key>PLLastRevGeoVerFileFetchDateKey</key> <key>PLLastRevGeoVerFileFetchDateKey</key>
<date>2020-04-17T18:39:52Z</date> <date>2021-06-01T17:42:08Z</date>
</dict> </dict>
</plist> </plist>

View File

@ -3,7 +3,7 @@
<plist version="1.0"> <plist version="1.0">
<dict> <dict>
<key>LastHistoryRowId</key> <key>LastHistoryRowId</key>
<integer>502</integer> <integer>517</integer>
<key>LibraryBuildTag</key> <key>LibraryBuildTag</key>
<string>E3E46F2A-7168-4973-AB3E-5848F80BFC7D</string> <string>E3E46F2A-7168-4973-AB3E-5848F80BFC7D</string>
<key>LibrarySchemaVersion</key> <key>LibrarySchemaVersion</key>

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 157 KiB

View File

@ -1,5 +1,9 @@
import collections
import datetime
import pytest import pytest
import osxphotos
from osxphotos._constants import _UNKNOWN_PERSON from osxphotos._constants import _UNKNOWN_PERSON
PHOTOS_DB = "./tests/Test-10.12.6.photoslibrary/database/photos.db" PHOTOS_DB = "./tests/Test-10.12.6.photoslibrary/database/photos.db"
@ -18,8 +22,8 @@ PERSONS = ["Katie", "Suzy", "Maria", _UNKNOWN_PERSON]
ALBUMS = ["Pumpkin Farm", "AlbumInFolder"] ALBUMS = ["Pumpkin Farm", "AlbumInFolder"]
KEYWORDS_DICT = { KEYWORDS_DICT = {
"Kids": 4, "Kids": 4,
"wedding": 2, "wedding": 3,
"flowers": 1, "flowers": 2,
"England": 1, "England": 1,
"London": 1, "London": 1,
"London 2018": 1, "London 2018": 1,
@ -30,83 +34,64 @@ KEYWORDS_DICT = {
PERSONS_DICT = {"Katie": 3, "Suzy": 2, "Maria": 1, _UNKNOWN_PERSON: 1} PERSONS_DICT = {"Katie": 3, "Suzy": 2, "Maria": 1, _UNKNOWN_PERSON: 1}
ALBUM_DICT = {"Pumpkin Farm": 3, "AlbumInFolder": 1} ALBUM_DICT = {"Pumpkin Farm": 3, "AlbumInFolder": 1}
UUID_DICT = {"derivatives": "FPm+ICxpQV+LPBKR22UepA"} UUID_DICT = {
"derivatives": "FPm+ICxpQV+LPBKR22UepA",
"no_duplicates": "FPm+ICxpQV+LPBKR22UepA",
"duplicates": "HWsxlzxlQ++1TUPg2XNUgg",
}
UUID_DUPLICATE = "VwOUaFMlSry5+51f6q8uyw"
def test_init(): @pytest.fixture(scope="module")
import osxphotos def photosdb():
return osxphotos.PhotosDB(dbfile=PHOTOS_DB)
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
def test_init(photosdb):
assert isinstance(photosdb, osxphotos.PhotosDB) assert isinstance(photosdb, osxphotos.PhotosDB)
def test_db_version(): def test_db_version(photosdb):
import osxphotos
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
# assert photosdb.db_version in osxphotos._TESTED_DB_VERSIONS # assert photosdb.db_version in osxphotos._TESTED_DB_VERSIONS
assert photosdb.db_version == "2622" assert photosdb.db_version == "2622"
def test_persons(): def test_persons(photosdb):
import osxphotos
import collections
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
assert "Katie" in photosdb.persons assert "Katie" in photosdb.persons
assert collections.Counter(PERSONS) == collections.Counter(photosdb.persons) assert collections.Counter(PERSONS) == collections.Counter(photosdb.persons)
def test_keywords(): def test_keywords(photosdb):
import osxphotos
import collections
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
assert "wedding" in photosdb.keywords assert "wedding" in photosdb.keywords
assert collections.Counter(KEYWORDS) == collections.Counter(photosdb.keywords) assert collections.Counter(KEYWORDS) == collections.Counter(photosdb.keywords)
def test_album_names(): def test_album_names(photosdb):
import osxphotos
import collections
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
assert "Pumpkin Farm" in photosdb.albums assert "Pumpkin Farm" in photosdb.albums
assert collections.Counter(ALBUMS) == collections.Counter(photosdb.albums) assert collections.Counter(ALBUMS) == collections.Counter(photosdb.albums)
def test_keywords_dict(): def test_keywords_dict(photosdb):
import osxphotos
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
keywords = photosdb.keywords_as_dict keywords = photosdb.keywords_as_dict
assert keywords["wedding"] == 2 assert keywords["wedding"] == 3
assert keywords == KEYWORDS_DICT assert keywords == KEYWORDS_DICT
def test_persons_as_dict(): def test_persons_as_dict(photosdb):
import osxphotos
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
persons = photosdb.persons_as_dict persons = photosdb.persons_as_dict
assert persons["Maria"] == 1 assert persons["Maria"] == 1
assert persons == PERSONS_DICT assert persons == PERSONS_DICT
def test_albums_as_dict(): def test_albums_as_dict(photosdb):
import osxphotos
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
albums = photosdb.albums_as_dict albums = photosdb.albums_as_dict
assert albums["Pumpkin Farm"] == 3 assert albums["Pumpkin Farm"] == 3
assert albums == ALBUM_DICT assert albums == ALBUM_DICT
def test_attributes(): def test_attributes(photosdb):
import datetime
import osxphotos
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
photos = photosdb.photos(uuid=["sE5LlfekS8ykEE7o0cuMVA"]) photos = photosdb.photos(uuid=["sE5LlfekS8ykEE7o0cuMVA"])
assert len(photos) == 1 assert len(photos) == 1
p = photos[0] p = photos[0]
@ -126,38 +111,25 @@ def test_attributes():
assert p.ismissing == False assert p.ismissing == False
def test_missing(): def test_missing(photosdb):
import osxphotos
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
photos = photosdb.photos(uuid=["Pj99JmYjQkeezdY2OFuSaw"]) photos = photosdb.photos(uuid=["Pj99JmYjQkeezdY2OFuSaw"])
assert len(photos) == 1 assert len(photos) == 1
p = photos[0] p = photos[0]
assert p.path == None assert p.path is None
assert p.ismissing == True assert p.ismissing == True
def test_count(): def test_count(photosdb):
import osxphotos
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
photos = photosdb.photos() photos = photosdb.photos()
assert len(photos) == 9 assert len(photos) == 10
def test_keyword_2(): def test_keyword_2(photosdb):
import osxphotos
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
photos = photosdb.photos(keywords=["wedding"]) photos = photosdb.photos(keywords=["wedding"])
assert len(photos) == 2 assert len(photos) == 3
def test_keyword_not_in_album(): def test_keyword_not_in_album(photosdb):
import osxphotos
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
# find all photos with keyword "Kids" not in the album "Pumpkin Farm" # find all photos with keyword "Kids" not in the album "Pumpkin Farm"
photos1 = photosdb.photos(albums=["Pumpkin Farm"]) photos1 = photosdb.photos(albums=["Pumpkin Farm"])
photos2 = photosdb.photos(keywords=["Kids"]) photos2 = photosdb.photos(keywords=["Kids"])
@ -166,12 +138,8 @@ def test_keyword_not_in_album():
assert photos3[0].uuid == "Pj99JmYjQkeezdY2OFuSaw" assert photos3[0].uuid == "Pj99JmYjQkeezdY2OFuSaw"
def test_path_derivatives(): def test_path_derivatives(photosdb):
# test path_derivatives # test path_derivatives
import osxphotos
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
photos = photosdb.photos(uuid=[UUID_DICT["derivatives"]]) photos = photosdb.photos(uuid=[UUID_DICT["derivatives"]])
p = photos[0] p = photos[0]
derivs = [ derivs = [
@ -180,3 +148,18 @@ def test_path_derivatives():
] ]
for i, p in enumerate(p.path_derivatives): for i, p in enumerate(p.path_derivatives):
assert p.endswith(derivs[i]) assert p.endswith(derivs[i])
def test_duplicates_1(photosdb):
# test photo has duplicates
photo = photosdb.get_photo(uuid=UUID_DICT["duplicates"])
assert len(photo.duplicates) == 1
assert photo.duplicates[0].uuid == UUID_DUPLICATE
def test_duplicates_2(photosdb):
# test photo does not have duplicates
photo = photosdb.get_photo(uuid=UUID_DICT["no_duplicates"])
assert not photo.duplicates

View File

@ -111,6 +111,7 @@ UUID_DICT = {
"import_session": "8846E3E6-8AC8-4857-8448-E3D025784410", "import_session": "8846E3E6-8AC8-4857-8448-E3D025784410",
"movie": "D1359D09-1373-4F3B-B0E3-1A4DE573E4A3", "movie": "D1359D09-1373-4F3B-B0E3-1A4DE573E4A3",
"description_newlines": "7F74DD34-5920-4DA3-B284-479887A34F66", "description_newlines": "7F74DD34-5920-4DA3-B284-479887A34F66",
"no_duplicates": "E9BC5C36-7CD1-40A1-A72B-8B8FAC227D51",
} }
UUID_DICT_LOCAL = { UUID_DICT_LOCAL = {
@ -217,6 +218,8 @@ ORIGINAL_FILENAME_DICT = {
UUID_IS_REFERENCE = "A1DD1F98-2ECD-431F-9AC9-5AFEFE2D3A5C" UUID_IS_REFERENCE = "A1DD1F98-2ECD-431F-9AC9-5AFEFE2D3A5C"
UUID_NOT_REFERENCE = "F12384F6-CD17-4151-ACBA-AE0E3688539E" UUID_NOT_REFERENCE = "F12384F6-CD17-4151-ACBA-AE0E3688539E"
UUID_DUPLICATE = ""
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def photosdb(): def photosdb():
@ -1347,3 +1350,19 @@ def test_exiftool_newlines_in_description(photosdb):
exif = photo._exiftool_dict() exif = photo._exiftool_dict()
assert photo.description.find("\n") > 0 assert photo.description.find("\n") > 0
assert exif["EXIF:ImageDescription"].find("\n") == -1 assert exif["EXIF:ImageDescription"].find("\n") == -1
@pytest.mark.skip(SKIP_TEST, reason="Not yet implemented")
def test_duplicates_1(photosdb):
# test photo has duplicates
photo = photosdb.get_photo(uuid=UUID_DICT["duplicates"])
assert len(photo.duplicates) == 1
assert photo.duplicates[0].uuid == UUID_DUPLICATE
def test_duplicates_2(photosdb):
# test photo does not have duplicates
photo = photosdb.get_photo(uuid=UUID_DICT["no_duplicates"])
assert not photo.duplicates