Added PhotoInfo.detected_text()

2021-07-25 18:34:59 -07:00
parent 852a06f99b
commit 123340eada
8 changed files with 149 additions and 22 deletions
--- a/README.md
+++ b/README.md
@@ -2964,6 +2964,18 @@ Some substitutions, notably `album`, `keyword`, and `person` could return multip

 See [Template System](#template-system) for additional details.

+#### `detected_text(confidence_threshold=TEXT_DETECTION_CONFIDENCE_THRESHOLD)`
+
+Detects text in photo and returns lists of results as (detected text, confidence)
+
+- `confidence_threshold`: float between 0.0 and 1.0. If text detection confidence is below this threshold, text will not be returned. Default is `osxphotos._constants.TEXT_DETECTION_CONFIDENCE_THRESHOLD`
+
+If photo is edited, uses the edited photo, otherwise the original; falls back to the preview image if neither edited or original is available.
+
+Returns: list of (detected text, confidence) tuples.
+
+Note: This is *not* the same as Live Text in macOS Monterey.  When using `detected_text()`, osxphotos will use Apple's [Vision framework](https://developer.apple.com/documentation/vision/recognizing_text_in_images?language=objc) to perform text detection on the image.  On my circa 2013 MacBook Pro, this takes about 2 seconds per image.  `detected_text()` does memoize the results for a given `confidence_threshold` so repeated calls will not re-process the photo.
+
 ### ExifInfo
 [PhotosInfo.exif_info](#exif-info) returns an `ExifInfo` object with some EXIF data about the photo (Photos 5 only).  `ExifInfo` contains the following properties:

--- a/osxphotos/_constants.py
+++ b/osxphotos/_constants.py
@@ -282,3 +282,5 @@ class AlbumSortOrder(Enum):
    NEWEST_FIRST = 2
    OLDEST_FIRST = 3
    TITLE = 5
+
+TEXT_DETECTION_CONFIDENCE_THRESHOLD = 0.75
--- a/osxphotos/_version.py
+++ b/osxphotos/_version.py
@@ -1,3 +1,3 @@
 """ version info """

-__version__ = "0.42.67"
+__version__ = "0.42.68"
--- a/osxphotos/photoinfo/photoinfo.py
+++ b/osxphotos/photoinfo/photoinfo.py
@@ -30,12 +30,14 @@ from .._constants import (
    BURST_KEY,
    BURST_NOT_SELECTED,
    BURST_SELECTED,
+    TEXT_DETECTION_CONFIDENCE_THRESHOLD,
 )
 from ..adjustmentsinfo import AdjustmentsInfo
 from ..albuminfo import AlbumInfo, ImportInfo
 from ..personinfo import FaceInfo, PersonInfo
 from ..phototemplate import PhotoTemplate, RenderOptions
 from ..placeinfo import PlaceInfo4, PlaceInfo5
+from ..text_detection import detect_text
 from ..uti import get_preferred_uti_extension, get_uti_for_extension
 from ..utils import _debug, _get_resource_loc, findfiles

@@ -1106,6 +1108,41 @@ class PhotoInfo:
        template = PhotoTemplate(self, exiftool_path=self._db._exiftool_path)
        return template.render(template_str, options)

+    def detected_text(self, confidence_threshold=TEXT_DETECTION_CONFIDENCE_THRESHOLD):
+        """Detects text in photo and returns lists of results as (detected text, confidence)
+
+        confidence_threshold: float between 0.0 and 1.0. If text detection confidence is below this threshold,
+        text will not be returned. Default is TEXT_DETECTION_CONFIDENCE_THRESHOLD
+
+        If photo is edited, uses the edited photo, otherwise the original; falls back to the preview image if neither edited or original is available
+
+        Returns: list of (detected text, confidence) tuples
+        """
+        path = (
+            self.path_edited if self.hasadjustments and self.path_edited else self.path
+        )
+        path = path or self.path_derivatives[0] if self.path_derivatives else None
+        if not path:
+            return []
+
+        try:
+            return self._detected_text[(path, confidence_threshold)]
+        except (AttributeError, KeyError) as e:
+            if isinstance(e, AttributeError):
+                self._detected_text = {}
+
+            try:
+                detected_text = detect_text(path)
+            except Exception as e:
+                detected_text = []
+
+            self._detected_text[(path, confidence_threshold)] = [
+                (text, confidence)
+                for text, confidence in detected_text
+                if confidence >= confidence_threshold
+            ]
+            return self._detected_text[(path, confidence_threshold)]
+
    @property
    def _longitude(self):
        """Returns longitude, in degrees"""
--- a/osxphotos/text_detection.py
+++ b/osxphotos/text_detection.py
@@ -0,0 +1,58 @@
+""" Use Apple's Vision Framework via PyObjC to perform text detection on images """
+
+import objc
+import Quartz
+import Vision
+from Cocoa import NSURL
+from Foundation import NSDictionary
+
+from typing import List
+
+# needed to capture system-level stderr
+from wurlitzer import pipes
+
+
+def detect_text(img_path: str) -> List:
+    """process image at img_path with VNRecognizeTextRequest and return list of results"""
+    with objc.autorelease_pool():
+        input_url = NSURL.fileURLWithPath_(img_path)
+
+        with pipes() as (out, err):
+            # capture stdout and stderr from system calls
+            # otherwise, Quartz.CIImage.imageWithContentsOfURL_
+            # prints to stderr something like:
+            # 2020-09-20 20:55:25.538 python[73042:5650492] Creating client/daemon connection: B8FE995E-3F27-47F4-9FA8-559C615FD774
+            # 2020-09-20 20:55:25.652 python[73042:5650492] Got the query meta data reply for: com.apple.MobileAsset.RawCamera.Camera, response: 0
+            input_image = Quartz.CIImage.imageWithContentsOfURL_(input_url)
+
+        vision_options = NSDictionary.dictionaryWithDictionary_({})
+        vision_handler = Vision.VNImageRequestHandler.alloc().initWithCIImage_options_(
+            input_image, vision_options
+        )
+        results = []
+        handler = make_request_handler(results)
+        vision_request = (
+            Vision.VNRecognizeTextRequest.alloc().initWithCompletionHandler_(handler)
+        )
+        error = vision_handler.performRequests_error_([vision_request], None)
+        vision_request.dealloc()
+        vision_handler.dealloc()
+
+        return results
+
+
+def make_request_handler(results):
+    """results: list to store results"""
+    if not isinstance(results, list):
+        raise ValueError("results must be a list")
+
+    def handler(request, error):
+        if error:
+            print(f"Error! {error}")
+        else:
+            observations = request.results()
+            for text_observation in observations:
+                recognized_text = text_observation.topCandidates_(1)[0]
+                results.append([recognized_text.string(), recognized_text.confidence()])
+
+    return handler
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,11 +1,12 @@
-pyobjc-core==7.2
-pyobjc-framework-AppleScriptKit==7.2
-pyobjc-framework-AppleScriptObjC==7.2
-pyobjc-framework-Photos==7.2
-pyobjc-framework-Quartz==7.2
-pyobjc-framework-AVFoundation==7.2
-pyobjc-framework-CoreServices==7.2
-pyobjc-framework-Metal==7.2
+pyobjc-core>=7.2
+pyobjc-framework-AppleScriptKit>=7.2
+pyobjc-framework-AppleScriptObjC>=7.2
+pyobjc-framework-Photos>=7.2
+pyobjc-framework-Quartz>=7.2
+pyobjc-framework-AVFoundation>=7.2
+pyobjc-framework-CoreServices>=7.2
+pyobjc-framework-Metal>=7.2
+pyobjc-framework-Vision>=7.2
 Click==8.0.1
 PyYAML==5.4.1
 Mako==1.1.4
@@ -13,10 +14,10 @@ bpylist2==3.0.2
 pathvalidate==2.4.1
 dataclasses==0.7;python_version<'3.7'
 wurlitzer==2.1.0
-photoscript==0.1.3
+photoscript==0.1.4
 toml==0.10.2
-osxmetadata==0.99.25
+osxmetadata==0.99.26
 textx==2.3.0
 rich==10.2.2
 bitmath==1.3.3.1
-more-itertools==8.8.0 
+more-itertools==8.8.0 
--- a/setup.py
+++ b/setup.py
@@ -73,14 +73,15 @@ setup(
        "Topic :: Software Development :: Libraries :: Python Modules",
    ],
    install_requires=[
-        "pyobjc-core==7.2",
-        "pyobjc-framework-AppleScriptKit==7.2",
-        "pyobjc-framework-AppleScriptObjC==7.2",
-        "pyobjc-framework-Photos==7.2",
-        "pyobjc-framework-Quartz==7.2",
-        "pyobjc-framework-AVFoundation==7.2",
-        "pyobjc-framework-CoreServices==7.2",
-        "pyobjc-framework-Metal==7.2",
+        "pyobjc-core",
+        "pyobjc-framework-AppleScriptKit",
+        "pyobjc-framework-AppleScriptObjC",
+        "pyobjc-framework-Photos",
+        "pyobjc-framework-Quartz",
+        "pyobjc-framework-AVFoundation",
+        "pyobjc-framework-CoreServices",
+        "pyobjc-framework-Metal",
+        "pyobjc-framework-Vision",
        "Click==8.0.1",
        "PyYAML==5.4.1",
        "Mako==1.1.4",
@@ -88,9 +89,9 @@ setup(
        "pathvalidate==2.4.1",
        "dataclasses==0.7;python_version<'3.7'",
        "wurlitzer==2.1.0",
-        "photoscript==0.1.3",
+        "photoscript==0.1.4",
        "toml==0.10.2",
-        "osxmetadata==0.99.25",
+        "osxmetadata==0.99.26",
        "textx==2.3.0",
        "rich==10.2.2",
        "bitmath==1.3.3.1",
--- a/tests/test_catalina_10_15_7.py
+++ b/tests/test_catalina_10_15_7.py
@@ -234,6 +234,11 @@ UUID_NOT_REFERENCE = "F12384F6-CD17-4151-ACBA-AE0E3688539E"

 UUID_DUPLICATE = ""

+UUID_DETECTED_TEXT = {
+    "E2078879-A29C-4D6F-BACB-E3BBE6C3EB91": "osxphotos",
+    "A92D9C26-3A50-4197-9388-CB5F7DB9FA91": None,
+}
+

@pytest.fixture(scope="module")
 def photosdb():
@@ -1423,3 +1428,14 @@ def test_multi_uuid(photosdb):
    photos = photosdb.photos(uuid=[UUID_DICT["favorite"], UUID_DICT["not_favorite"]])

    assert len(photos) == 2
+
+
+def test_detected_text(photosdb):
+    """test PhotoInfo.detected_text"""
+    for uuid, expected_text in UUID_DETECTED_TEXT.items():
+        photo = photosdb.get_photo(uuid=uuid)
+        detected_text = " ".join(text for text, conf in photo.detected_text())
+        if expected_text is not None:
+            assert expected_text in detected_text
+        else:
+            assert not detected_text