Added PhotoInfo.duplicates
This commit is contained in:
parent
99f4394f8e
commit
7accfdb066
@ -1497,7 +1497,7 @@ Substitution Description
|
||||
{lf} A line feed: '\n', alias for {newline}
|
||||
{cr} A carriage return: '\r'
|
||||
{crlf} a carriage return + line feed: '\r\n'
|
||||
{osxphotos_version} The osxphotos version, e.g. '0.42.27'
|
||||
{osxphotos_version} The osxphotos version, e.g. '0.42.28'
|
||||
{osxphotos_cmd_line} The full command line used to run osxphotos
|
||||
|
||||
The following substitutions may result in multiple values. Thus if specified for
|
||||
@ -2388,6 +2388,9 @@ Returns a [ScoreInfo](#scoreinfo) data class object which provides access to the
|
||||
|
||||
**Note**: Valid only for Photos 5; returns None for earlier Photos versions.
|
||||
|
||||
#### `duplicates`
|
||||
Returns list of PhotoInfo objects for *possible* duplicates or empty list if no matching duplicates. Photos are considered possible duplicates if the photo's original file size, date created, height, and width match another those of another photo. This does not do a byte-for-byte comparison or compute a hash which makes it fast and allows for identification of possible duplicates even if originals are not downloaded from iCloud. The signature-based approach should be robust enough to match duplicates created either through the "duplicate photo" menu item or imported twice into the library but you should not rely on this 100% for identification of all duplicates.
|
||||
|
||||
#### `json()`
|
||||
Returns a JSON representation of all photo info.
|
||||
|
||||
@ -3191,7 +3194,7 @@ The following template field substitutions are availabe for use the templating s
|
||||
|{lf}|A line feed: '\n', alias for {newline}|
|
||||
|{cr}|A carriage return: '\r'|
|
||||
|{crlf}|a carriage return + line feed: '\r\n'|
|
||||
|{osxphotos_version}|The osxphotos version, e.g. '0.42.27'|
|
||||
|{osxphotos_version}|The osxphotos version, e.g. '0.42.28'|
|
||||
|{osxphotos_cmd_line}|The full command line used to run osxphotos|
|
||||
|{album}|Album(s) photo is contained in|
|
||||
|{folder_album}|Folder path + album photo is contained in. e.g. 'Folder/Subfolder/Album' or just 'Album' if no enclosing folder|
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
""" version info """
|
||||
|
||||
__version__ = "0.42.27"
|
||||
__version__ = "0.42.28"
|
||||
|
||||
@ -998,6 +998,21 @@ class PhotoInfo:
|
||||
""" returns filesize of original photo in bytes as int """
|
||||
return self._info["original_filesize"]
|
||||
|
||||
@property
|
||||
def duplicates(self):
|
||||
""" return list of PhotoInfo objects for possible duplicates (matching signature of original size, date, height, width) or empty list if no matching duplicates """
|
||||
signature = self._db._duplicate_signature(self.uuid)
|
||||
duplicates = []
|
||||
try:
|
||||
for uuid in self._db._db_signatures[signature]:
|
||||
if uuid != self.uuid:
|
||||
# found a possible duplicate
|
||||
duplicates.append(self._db.get_photo(uuid))
|
||||
except KeyError:
|
||||
# don't expect this to happen as the signature should be in db
|
||||
logging.warning(f"Did not find signature for {self.uuid} in _db_signatures")
|
||||
return duplicates
|
||||
|
||||
def render_template(
|
||||
self,
|
||||
template_str,
|
||||
|
||||
@ -240,6 +240,10 @@ class PhotosDB:
|
||||
# Will hold the primary key of root folder
|
||||
self._folder_root_pk = None
|
||||
|
||||
# Dict to hold signatures for finding possible duplicates
|
||||
# key is tuple of (original_filesize, date) and value is list of uuids that match that signature
|
||||
self._db_signatures = {}
|
||||
|
||||
if _debug():
|
||||
logging.debug(f"dbfile = {dbfile}")
|
||||
|
||||
@ -1180,6 +1184,13 @@ class PhotosDB:
|
||||
self._dbphotos[uuid]["import_uuid"] = row[44]
|
||||
self._dbphotos[uuid]["fok_import_session"] = None
|
||||
|
||||
# compute signatures for finding possible duplicates
|
||||
signature = self._duplicate_signature(uuid)
|
||||
try:
|
||||
self._db_signatures[signature].append(uuid)
|
||||
except KeyError:
|
||||
self._db_signatures[signature] = [uuid]
|
||||
|
||||
# get additional details from RKMaster, needed for RAW processing
|
||||
verbose("Processing additional photo details.")
|
||||
c.execute(
|
||||
@ -2145,6 +2156,13 @@ class PhotosDB:
|
||||
|
||||
self._dbphotos[uuid] = info
|
||||
|
||||
# compute signatures for finding possible duplicates
|
||||
signature = self._duplicate_signature(uuid)
|
||||
try:
|
||||
self._db_signatures[signature].append(uuid)
|
||||
except KeyError:
|
||||
self._db_signatures[signature] = [uuid]
|
||||
|
||||
# # if row[19] is not None and ((row[20] == 2) or (row[20] == 4)):
|
||||
# # burst photo
|
||||
# if row[19] is not None:
|
||||
@ -3209,6 +3227,17 @@ class PhotosDB:
|
||||
|
||||
return photos
|
||||
|
||||
def _duplicate_signature(self, uuid):
|
||||
""" Compute a signature for finding possible duplicates """
|
||||
return (
|
||||
self._dbphotos[uuid]["original_filesize"],
|
||||
self._dbphotos[uuid]["imageDate"],
|
||||
self._dbphotos[uuid]["height"],
|
||||
self._dbphotos[uuid]["width"],
|
||||
self._dbphotos[uuid]["UTI"],
|
||||
self._dbphotos[uuid]["hasAdjustments"],
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"osxphotos.{self.__class__.__name__}(dbfile='{self.db_path}')"
|
||||
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@ -5,7 +5,7 @@
|
||||
<key>LithiumMessageTracer</key>
|
||||
<dict>
|
||||
<key>LastReportedDate</key>
|
||||
<date>2020-04-17T18:39:50Z</date>
|
||||
<date>2021-06-01T17:42:08Z</date>
|
||||
</dict>
|
||||
<key>PXPeopleScreenUnlocked</key>
|
||||
<true/>
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@ -11,6 +11,6 @@
|
||||
<key>PLLastRevGeoForcedProviderOutOfDateCheckVersionKey</key>
|
||||
<integer>1</integer>
|
||||
<key>PLLastRevGeoVerFileFetchDateKey</key>
|
||||
<date>2020-04-17T18:39:52Z</date>
|
||||
<date>2021-06-01T17:42:08Z</date>
|
||||
</dict>
|
||||
</plist>
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>LastHistoryRowId</key>
|
||||
<integer>502</integer>
|
||||
<integer>517</integer>
|
||||
<key>LibraryBuildTag</key>
|
||||
<string>E3E46F2A-7168-4973-AB3E-5848F80BFC7D</string>
|
||||
<key>LibrarySchemaVersion</key>
|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 34 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 157 KiB |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,5 +1,9 @@
|
||||
import collections
|
||||
import datetime
|
||||
|
||||
import pytest
|
||||
|
||||
import osxphotos
|
||||
from osxphotos._constants import _UNKNOWN_PERSON
|
||||
|
||||
PHOTOS_DB = "./tests/Test-10.12.6.photoslibrary/database/photos.db"
|
||||
@ -18,8 +22,8 @@ PERSONS = ["Katie", "Suzy", "Maria", _UNKNOWN_PERSON]
|
||||
ALBUMS = ["Pumpkin Farm", "AlbumInFolder"]
|
||||
KEYWORDS_DICT = {
|
||||
"Kids": 4,
|
||||
"wedding": 2,
|
||||
"flowers": 1,
|
||||
"wedding": 3,
|
||||
"flowers": 2,
|
||||
"England": 1,
|
||||
"London": 1,
|
||||
"London 2018": 1,
|
||||
@ -30,83 +34,64 @@ KEYWORDS_DICT = {
|
||||
PERSONS_DICT = {"Katie": 3, "Suzy": 2, "Maria": 1, _UNKNOWN_PERSON: 1}
|
||||
ALBUM_DICT = {"Pumpkin Farm": 3, "AlbumInFolder": 1}
|
||||
|
||||
UUID_DICT = {"derivatives": "FPm+ICxpQV+LPBKR22UepA"}
|
||||
UUID_DICT = {
|
||||
"derivatives": "FPm+ICxpQV+LPBKR22UepA",
|
||||
"no_duplicates": "FPm+ICxpQV+LPBKR22UepA",
|
||||
"duplicates": "HWsxlzxlQ++1TUPg2XNUgg",
|
||||
}
|
||||
|
||||
UUID_DUPLICATE = "VwOUaFMlSry5+51f6q8uyw"
|
||||
|
||||
|
||||
def test_init():
|
||||
import osxphotos
|
||||
@pytest.fixture(scope="module")
|
||||
def photosdb():
|
||||
return osxphotos.PhotosDB(dbfile=PHOTOS_DB)
|
||||
|
||||
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
|
||||
|
||||
def test_init(photosdb):
|
||||
assert isinstance(photosdb, osxphotos.PhotosDB)
|
||||
|
||||
|
||||
def test_db_version():
|
||||
import osxphotos
|
||||
|
||||
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
|
||||
def test_db_version(photosdb):
|
||||
# assert photosdb.db_version in osxphotos._TESTED_DB_VERSIONS
|
||||
assert photosdb.db_version == "2622"
|
||||
|
||||
|
||||
def test_persons():
|
||||
import osxphotos
|
||||
import collections
|
||||
def test_persons(photosdb):
|
||||
|
||||
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
|
||||
assert "Katie" in photosdb.persons
|
||||
assert collections.Counter(PERSONS) == collections.Counter(photosdb.persons)
|
||||
|
||||
|
||||
def test_keywords():
|
||||
import osxphotos
|
||||
import collections
|
||||
|
||||
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
|
||||
def test_keywords(photosdb):
|
||||
assert "wedding" in photosdb.keywords
|
||||
assert collections.Counter(KEYWORDS) == collections.Counter(photosdb.keywords)
|
||||
|
||||
|
||||
def test_album_names():
|
||||
import osxphotos
|
||||
import collections
|
||||
|
||||
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
|
||||
def test_album_names(photosdb):
|
||||
assert "Pumpkin Farm" in photosdb.albums
|
||||
assert collections.Counter(ALBUMS) == collections.Counter(photosdb.albums)
|
||||
|
||||
|
||||
def test_keywords_dict():
|
||||
import osxphotos
|
||||
|
||||
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
|
||||
def test_keywords_dict(photosdb):
|
||||
keywords = photosdb.keywords_as_dict
|
||||
assert keywords["wedding"] == 2
|
||||
assert keywords["wedding"] == 3
|
||||
assert keywords == KEYWORDS_DICT
|
||||
|
||||
|
||||
def test_persons_as_dict():
|
||||
import osxphotos
|
||||
|
||||
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
|
||||
def test_persons_as_dict(photosdb):
|
||||
persons = photosdb.persons_as_dict
|
||||
assert persons["Maria"] == 1
|
||||
assert persons == PERSONS_DICT
|
||||
|
||||
|
||||
def test_albums_as_dict():
|
||||
import osxphotos
|
||||
|
||||
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
|
||||
def test_albums_as_dict(photosdb):
|
||||
albums = photosdb.albums_as_dict
|
||||
assert albums["Pumpkin Farm"] == 3
|
||||
assert albums == ALBUM_DICT
|
||||
|
||||
|
||||
def test_attributes():
|
||||
import datetime
|
||||
import osxphotos
|
||||
|
||||
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
|
||||
def test_attributes(photosdb):
|
||||
photos = photosdb.photos(uuid=["sE5LlfekS8ykEE7o0cuMVA"])
|
||||
assert len(photos) == 1
|
||||
p = photos[0]
|
||||
@ -126,38 +111,25 @@ def test_attributes():
|
||||
assert p.ismissing == False
|
||||
|
||||
|
||||
def test_missing():
|
||||
import osxphotos
|
||||
|
||||
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
|
||||
def test_missing(photosdb):
|
||||
photos = photosdb.photos(uuid=["Pj99JmYjQkeezdY2OFuSaw"])
|
||||
assert len(photos) == 1
|
||||
p = photos[0]
|
||||
assert p.path == None
|
||||
assert p.path is None
|
||||
assert p.ismissing == True
|
||||
|
||||
|
||||
def test_count():
|
||||
import osxphotos
|
||||
|
||||
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
|
||||
def test_count(photosdb):
|
||||
photos = photosdb.photos()
|
||||
assert len(photos) == 9
|
||||
assert len(photos) == 10
|
||||
|
||||
|
||||
def test_keyword_2():
|
||||
import osxphotos
|
||||
|
||||
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
|
||||
def test_keyword_2(photosdb):
|
||||
photos = photosdb.photos(keywords=["wedding"])
|
||||
assert len(photos) == 2
|
||||
assert len(photos) == 3
|
||||
|
||||
|
||||
def test_keyword_not_in_album():
|
||||
import osxphotos
|
||||
|
||||
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
|
||||
|
||||
def test_keyword_not_in_album(photosdb):
|
||||
# find all photos with keyword "Kids" not in the album "Pumpkin Farm"
|
||||
photos1 = photosdb.photos(albums=["Pumpkin Farm"])
|
||||
photos2 = photosdb.photos(keywords=["Kids"])
|
||||
@ -166,12 +138,8 @@ def test_keyword_not_in_album():
|
||||
assert photos3[0].uuid == "Pj99JmYjQkeezdY2OFuSaw"
|
||||
|
||||
|
||||
def test_path_derivatives():
|
||||
def test_path_derivatives(photosdb):
|
||||
# test path_derivatives
|
||||
import osxphotos
|
||||
|
||||
photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB)
|
||||
|
||||
photos = photosdb.photos(uuid=[UUID_DICT["derivatives"]])
|
||||
p = photos[0]
|
||||
derivs = [
|
||||
@ -180,3 +148,18 @@ def test_path_derivatives():
|
||||
]
|
||||
for i, p in enumerate(p.path_derivatives):
|
||||
assert p.endswith(derivs[i])
|
||||
|
||||
|
||||
def test_duplicates_1(photosdb):
|
||||
# test photo has duplicates
|
||||
|
||||
photo = photosdb.get_photo(uuid=UUID_DICT["duplicates"])
|
||||
assert len(photo.duplicates) == 1
|
||||
assert photo.duplicates[0].uuid == UUID_DUPLICATE
|
||||
|
||||
|
||||
def test_duplicates_2(photosdb):
|
||||
# test photo does not have duplicates
|
||||
|
||||
photo = photosdb.get_photo(uuid=UUID_DICT["no_duplicates"])
|
||||
assert not photo.duplicates
|
||||
|
||||
@ -111,6 +111,7 @@ UUID_DICT = {
|
||||
"import_session": "8846E3E6-8AC8-4857-8448-E3D025784410",
|
||||
"movie": "D1359D09-1373-4F3B-B0E3-1A4DE573E4A3",
|
||||
"description_newlines": "7F74DD34-5920-4DA3-B284-479887A34F66",
|
||||
"no_duplicates": "E9BC5C36-7CD1-40A1-A72B-8B8FAC227D51",
|
||||
}
|
||||
|
||||
UUID_DICT_LOCAL = {
|
||||
@ -217,6 +218,8 @@ ORIGINAL_FILENAME_DICT = {
|
||||
UUID_IS_REFERENCE = "A1DD1F98-2ECD-431F-9AC9-5AFEFE2D3A5C"
|
||||
UUID_NOT_REFERENCE = "F12384F6-CD17-4151-ACBA-AE0E3688539E"
|
||||
|
||||
UUID_DUPLICATE = ""
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def photosdb():
|
||||
@ -1347,3 +1350,19 @@ def test_exiftool_newlines_in_description(photosdb):
|
||||
exif = photo._exiftool_dict()
|
||||
assert photo.description.find("\n") > 0
|
||||
assert exif["EXIF:ImageDescription"].find("\n") == -1
|
||||
|
||||
|
||||
@pytest.mark.skip(SKIP_TEST, reason="Not yet implemented")
|
||||
def test_duplicates_1(photosdb):
|
||||
# test photo has duplicates
|
||||
|
||||
photo = photosdb.get_photo(uuid=UUID_DICT["duplicates"])
|
||||
assert len(photo.duplicates) == 1
|
||||
assert photo.duplicates[0].uuid == UUID_DUPLICATE
|
||||
|
||||
|
||||
def test_duplicates_2(photosdb):
|
||||
# test photo does not have duplicates
|
||||
|
||||
photo = photosdb.get_photo(uuid=UUID_DICT["no_duplicates"])
|
||||
assert not photo.duplicates
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user