diff --git a/README.md b/README.md index 7c357fef..42005bc6 100644 --- a/README.md +++ b/README.md @@ -2964,6 +2964,18 @@ Some substitutions, notably `album`, `keyword`, and `person` could return multip See [Template System](#template-system) for additional details. +#### `detected_text(confidence_threshold=TEXT_DETECTION_CONFIDENCE_THRESHOLD)` + +Detects text in photo and returns lists of results as (detected text, confidence) + +- `confidence_threshold`: float between 0.0 and 1.0. If text detection confidence is below this threshold, text will not be returned. Default is `osxphotos._constants.TEXT_DETECTION_CONFIDENCE_THRESHOLD` + +If photo is edited, uses the edited photo, otherwise the original; falls back to the preview image if neither edited or original is available. + +Returns: list of (detected text, confidence) tuples. + +Note: This is *not* the same as Live Text in macOS Monterey. When using `detected_text()`, osxphotos will use Apple's [Vision framework](https://developer.apple.com/documentation/vision/recognizing_text_in_images?language=objc) to perform text detection on the image. On my circa 2013 MacBook Pro, this takes about 2 seconds per image. `detected_text()` does memoize the results for a given `confidence_threshold` so repeated calls will not re-process the photo. + ### ExifInfo [PhotosInfo.exif_info](#exif-info) returns an `ExifInfo` object with some EXIF data about the photo (Photos 5 only). `ExifInfo` contains the following properties: diff --git a/osxphotos/_constants.py b/osxphotos/_constants.py index 75403096..b7db37df 100644 --- a/osxphotos/_constants.py +++ b/osxphotos/_constants.py @@ -282,3 +282,5 @@ class AlbumSortOrder(Enum): NEWEST_FIRST = 2 OLDEST_FIRST = 3 TITLE = 5 + +TEXT_DETECTION_CONFIDENCE_THRESHOLD = 0.75 \ No newline at end of file diff --git a/osxphotos/_version.py b/osxphotos/_version.py index ee05972a..a1bcaea4 100644 --- a/osxphotos/_version.py +++ b/osxphotos/_version.py @@ -1,3 +1,3 @@ """ version info """ -__version__ = "0.42.67" +__version__ = "0.42.68" diff --git a/osxphotos/photoinfo/photoinfo.py b/osxphotos/photoinfo/photoinfo.py index 75b83965..48da1978 100644 --- a/osxphotos/photoinfo/photoinfo.py +++ b/osxphotos/photoinfo/photoinfo.py @@ -30,12 +30,14 @@ from .._constants import ( BURST_KEY, BURST_NOT_SELECTED, BURST_SELECTED, + TEXT_DETECTION_CONFIDENCE_THRESHOLD, ) from ..adjustmentsinfo import AdjustmentsInfo from ..albuminfo import AlbumInfo, ImportInfo from ..personinfo import FaceInfo, PersonInfo from ..phototemplate import PhotoTemplate, RenderOptions from ..placeinfo import PlaceInfo4, PlaceInfo5 +from ..text_detection import detect_text from ..uti import get_preferred_uti_extension, get_uti_for_extension from ..utils import _debug, _get_resource_loc, findfiles @@ -1106,6 +1108,41 @@ class PhotoInfo: template = PhotoTemplate(self, exiftool_path=self._db._exiftool_path) return template.render(template_str, options) + def detected_text(self, confidence_threshold=TEXT_DETECTION_CONFIDENCE_THRESHOLD): + """Detects text in photo and returns lists of results as (detected text, confidence) + + confidence_threshold: float between 0.0 and 1.0. If text detection confidence is below this threshold, + text will not be returned. Default is TEXT_DETECTION_CONFIDENCE_THRESHOLD + + If photo is edited, uses the edited photo, otherwise the original; falls back to the preview image if neither edited or original is available + + Returns: list of (detected text, confidence) tuples + """ + path = ( + self.path_edited if self.hasadjustments and self.path_edited else self.path + ) + path = path or self.path_derivatives[0] if self.path_derivatives else None + if not path: + return [] + + try: + return self._detected_text[(path, confidence_threshold)] + except (AttributeError, KeyError) as e: + if isinstance(e, AttributeError): + self._detected_text = {} + + try: + detected_text = detect_text(path) + except Exception as e: + detected_text = [] + + self._detected_text[(path, confidence_threshold)] = [ + (text, confidence) + for text, confidence in detected_text + if confidence >= confidence_threshold + ] + return self._detected_text[(path, confidence_threshold)] + @property def _longitude(self): """Returns longitude, in degrees""" diff --git a/osxphotos/text_detection.py b/osxphotos/text_detection.py new file mode 100644 index 00000000..e460f12e --- /dev/null +++ b/osxphotos/text_detection.py @@ -0,0 +1,58 @@ +""" Use Apple's Vision Framework via PyObjC to perform text detection on images """ + +import objc +import Quartz +import Vision +from Cocoa import NSURL +from Foundation import NSDictionary + +from typing import List + +# needed to capture system-level stderr +from wurlitzer import pipes + + +def detect_text(img_path: str) -> List: + """process image at img_path with VNRecognizeTextRequest and return list of results""" + with objc.autorelease_pool(): + input_url = NSURL.fileURLWithPath_(img_path) + + with pipes() as (out, err): + # capture stdout and stderr from system calls + # otherwise, Quartz.CIImage.imageWithContentsOfURL_ + # prints to stderr something like: + # 2020-09-20 20:55:25.538 python[73042:5650492] Creating client/daemon connection: B8FE995E-3F27-47F4-9FA8-559C615FD774 + # 2020-09-20 20:55:25.652 python[73042:5650492] Got the query meta data reply for: com.apple.MobileAsset.RawCamera.Camera, response: 0 + input_image = Quartz.CIImage.imageWithContentsOfURL_(input_url) + + vision_options = NSDictionary.dictionaryWithDictionary_({}) + vision_handler = Vision.VNImageRequestHandler.alloc().initWithCIImage_options_( + input_image, vision_options + ) + results = [] + handler = make_request_handler(results) + vision_request = ( + Vision.VNRecognizeTextRequest.alloc().initWithCompletionHandler_(handler) + ) + error = vision_handler.performRequests_error_([vision_request], None) + vision_request.dealloc() + vision_handler.dealloc() + + return results + + +def make_request_handler(results): + """results: list to store results""" + if not isinstance(results, list): + raise ValueError("results must be a list") + + def handler(request, error): + if error: + print(f"Error! {error}") + else: + observations = request.results() + for text_observation in observations: + recognized_text = text_observation.topCandidates_(1)[0] + results.append([recognized_text.string(), recognized_text.confidence()]) + + return handler diff --git a/requirements.txt b/requirements.txt index 106415cf..2d300f51 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,12 @@ -pyobjc-core==7.2 -pyobjc-framework-AppleScriptKit==7.2 -pyobjc-framework-AppleScriptObjC==7.2 -pyobjc-framework-Photos==7.2 -pyobjc-framework-Quartz==7.2 -pyobjc-framework-AVFoundation==7.2 -pyobjc-framework-CoreServices==7.2 -pyobjc-framework-Metal==7.2 +pyobjc-core>=7.2 +pyobjc-framework-AppleScriptKit>=7.2 +pyobjc-framework-AppleScriptObjC>=7.2 +pyobjc-framework-Photos>=7.2 +pyobjc-framework-Quartz>=7.2 +pyobjc-framework-AVFoundation>=7.2 +pyobjc-framework-CoreServices>=7.2 +pyobjc-framework-Metal>=7.2 +pyobjc-framework-Vision>=7.2 Click==8.0.1 PyYAML==5.4.1 Mako==1.1.4 @@ -13,10 +14,10 @@ bpylist2==3.0.2 pathvalidate==2.4.1 dataclasses==0.7;python_version<'3.7' wurlitzer==2.1.0 -photoscript==0.1.3 +photoscript==0.1.4 toml==0.10.2 -osxmetadata==0.99.25 +osxmetadata==0.99.26 textx==2.3.0 rich==10.2.2 bitmath==1.3.3.1 -more-itertools==8.8.0 \ No newline at end of file +more-itertools==8.8.0 diff --git a/setup.py b/setup.py index 4f11ed42..28629290 100755 --- a/setup.py +++ b/setup.py @@ -73,14 +73,15 @@ setup( "Topic :: Software Development :: Libraries :: Python Modules", ], install_requires=[ - "pyobjc-core==7.2", - "pyobjc-framework-AppleScriptKit==7.2", - "pyobjc-framework-AppleScriptObjC==7.2", - "pyobjc-framework-Photos==7.2", - "pyobjc-framework-Quartz==7.2", - "pyobjc-framework-AVFoundation==7.2", - "pyobjc-framework-CoreServices==7.2", - "pyobjc-framework-Metal==7.2", + "pyobjc-core", + "pyobjc-framework-AppleScriptKit", + "pyobjc-framework-AppleScriptObjC", + "pyobjc-framework-Photos", + "pyobjc-framework-Quartz", + "pyobjc-framework-AVFoundation", + "pyobjc-framework-CoreServices", + "pyobjc-framework-Metal", + "pyobjc-framework-Vision", "Click==8.0.1", "PyYAML==5.4.1", "Mako==1.1.4", @@ -88,9 +89,9 @@ setup( "pathvalidate==2.4.1", "dataclasses==0.7;python_version<'3.7'", "wurlitzer==2.1.0", - "photoscript==0.1.3", + "photoscript==0.1.4", "toml==0.10.2", - "osxmetadata==0.99.25", + "osxmetadata==0.99.26", "textx==2.3.0", "rich==10.2.2", "bitmath==1.3.3.1", diff --git a/tests/test_catalina_10_15_7.py b/tests/test_catalina_10_15_7.py index 512e272e..22dd2e11 100644 --- a/tests/test_catalina_10_15_7.py +++ b/tests/test_catalina_10_15_7.py @@ -234,6 +234,11 @@ UUID_NOT_REFERENCE = "F12384F6-CD17-4151-ACBA-AE0E3688539E" UUID_DUPLICATE = "" +UUID_DETECTED_TEXT = { + "E2078879-A29C-4D6F-BACB-E3BBE6C3EB91": "osxphotos", + "A92D9C26-3A50-4197-9388-CB5F7DB9FA91": None, +} + @pytest.fixture(scope="module") def photosdb(): @@ -1423,3 +1428,14 @@ def test_multi_uuid(photosdb): photos = photosdb.photos(uuid=[UUID_DICT["favorite"], UUID_DICT["not_favorite"]]) assert len(photos) == 2 + + +def test_detected_text(photosdb): + """test PhotoInfo.detected_text""" + for uuid, expected_text in UUID_DETECTED_TEXT.items(): + photo = photosdb.get_photo(uuid=uuid) + detected_text = " ".join(text for text, conf in photo.detected_text()) + if expected_text is not None: + assert expected_text in detected_text + else: + assert not detected_text