Added PhotoInfo.detected_text()

This commit is contained in:
Rhet Turnbull
2021-07-25 18:34:59 -07:00
parent 852a06f99b
commit 123340eada
8 changed files with 149 additions and 22 deletions

View File

@@ -2964,6 +2964,18 @@ Some substitutions, notably `album`, `keyword`, and `person` could return multip
See [Template System](#template-system) for additional details.
#### `detected_text(confidence_threshold=TEXT_DETECTION_CONFIDENCE_THRESHOLD)`
Detects text in photo and returns lists of results as (detected text, confidence)
- `confidence_threshold`: float between 0.0 and 1.0. If text detection confidence is below this threshold, text will not be returned. Default is `osxphotos._constants.TEXT_DETECTION_CONFIDENCE_THRESHOLD`
If photo is edited, uses the edited photo, otherwise the original; falls back to the preview image if neither edited or original is available.
Returns: list of (detected text, confidence) tuples.
Note: This is *not* the same as Live Text in macOS Monterey. When using `detected_text()`, osxphotos will use Apple's [Vision framework](https://developer.apple.com/documentation/vision/recognizing_text_in_images?language=objc) to perform text detection on the image. On my circa 2013 MacBook Pro, this takes about 2 seconds per image. `detected_text()` does memoize the results for a given `confidence_threshold` so repeated calls will not re-process the photo.
### ExifInfo
[PhotosInfo.exif_info](#exif-info) returns an `ExifInfo` object with some EXIF data about the photo (Photos 5 only). `ExifInfo` contains the following properties:

View File

@@ -282,3 +282,5 @@ class AlbumSortOrder(Enum):
NEWEST_FIRST = 2
OLDEST_FIRST = 3
TITLE = 5
TEXT_DETECTION_CONFIDENCE_THRESHOLD = 0.75

View File

@@ -1,3 +1,3 @@
""" version info """
__version__ = "0.42.67"
__version__ = "0.42.68"

View File

@@ -30,12 +30,14 @@ from .._constants import (
BURST_KEY,
BURST_NOT_SELECTED,
BURST_SELECTED,
TEXT_DETECTION_CONFIDENCE_THRESHOLD,
)
from ..adjustmentsinfo import AdjustmentsInfo
from ..albuminfo import AlbumInfo, ImportInfo
from ..personinfo import FaceInfo, PersonInfo
from ..phototemplate import PhotoTemplate, RenderOptions
from ..placeinfo import PlaceInfo4, PlaceInfo5
from ..text_detection import detect_text
from ..uti import get_preferred_uti_extension, get_uti_for_extension
from ..utils import _debug, _get_resource_loc, findfiles
@@ -1106,6 +1108,41 @@ class PhotoInfo:
template = PhotoTemplate(self, exiftool_path=self._db._exiftool_path)
return template.render(template_str, options)
def detected_text(self, confidence_threshold=TEXT_DETECTION_CONFIDENCE_THRESHOLD):
"""Detects text in photo and returns lists of results as (detected text, confidence)
confidence_threshold: float between 0.0 and 1.0. If text detection confidence is below this threshold,
text will not be returned. Default is TEXT_DETECTION_CONFIDENCE_THRESHOLD
If photo is edited, uses the edited photo, otherwise the original; falls back to the preview image if neither edited or original is available
Returns: list of (detected text, confidence) tuples
"""
path = (
self.path_edited if self.hasadjustments and self.path_edited else self.path
)
path = path or self.path_derivatives[0] if self.path_derivatives else None
if not path:
return []
try:
return self._detected_text[(path, confidence_threshold)]
except (AttributeError, KeyError) as e:
if isinstance(e, AttributeError):
self._detected_text = {}
try:
detected_text = detect_text(path)
except Exception as e:
detected_text = []
self._detected_text[(path, confidence_threshold)] = [
(text, confidence)
for text, confidence in detected_text
if confidence >= confidence_threshold
]
return self._detected_text[(path, confidence_threshold)]
@property
def _longitude(self):
"""Returns longitude, in degrees"""

View File

@@ -0,0 +1,58 @@
""" Use Apple's Vision Framework via PyObjC to perform text detection on images """
import objc
import Quartz
import Vision
from Cocoa import NSURL
from Foundation import NSDictionary
from typing import List
# needed to capture system-level stderr
from wurlitzer import pipes
def detect_text(img_path: str) -> List:
"""process image at img_path with VNRecognizeTextRequest and return list of results"""
with objc.autorelease_pool():
input_url = NSURL.fileURLWithPath_(img_path)
with pipes() as (out, err):
# capture stdout and stderr from system calls
# otherwise, Quartz.CIImage.imageWithContentsOfURL_
# prints to stderr something like:
# 2020-09-20 20:55:25.538 python[73042:5650492] Creating client/daemon connection: B8FE995E-3F27-47F4-9FA8-559C615FD774
# 2020-09-20 20:55:25.652 python[73042:5650492] Got the query meta data reply for: com.apple.MobileAsset.RawCamera.Camera, response: 0
input_image = Quartz.CIImage.imageWithContentsOfURL_(input_url)
vision_options = NSDictionary.dictionaryWithDictionary_({})
vision_handler = Vision.VNImageRequestHandler.alloc().initWithCIImage_options_(
input_image, vision_options
)
results = []
handler = make_request_handler(results)
vision_request = (
Vision.VNRecognizeTextRequest.alloc().initWithCompletionHandler_(handler)
)
error = vision_handler.performRequests_error_([vision_request], None)
vision_request.dealloc()
vision_handler.dealloc()
return results
def make_request_handler(results):
"""results: list to store results"""
if not isinstance(results, list):
raise ValueError("results must be a list")
def handler(request, error):
if error:
print(f"Error! {error}")
else:
observations = request.results()
for text_observation in observations:
recognized_text = text_observation.topCandidates_(1)[0]
results.append([recognized_text.string(), recognized_text.confidence()])
return handler

View File

@@ -1,11 +1,12 @@
pyobjc-core==7.2
pyobjc-framework-AppleScriptKit==7.2
pyobjc-framework-AppleScriptObjC==7.2
pyobjc-framework-Photos==7.2
pyobjc-framework-Quartz==7.2
pyobjc-framework-AVFoundation==7.2
pyobjc-framework-CoreServices==7.2
pyobjc-framework-Metal==7.2
pyobjc-core>=7.2
pyobjc-framework-AppleScriptKit>=7.2
pyobjc-framework-AppleScriptObjC>=7.2
pyobjc-framework-Photos>=7.2
pyobjc-framework-Quartz>=7.2
pyobjc-framework-AVFoundation>=7.2
pyobjc-framework-CoreServices>=7.2
pyobjc-framework-Metal>=7.2
pyobjc-framework-Vision>=7.2
Click==8.0.1
PyYAML==5.4.1
Mako==1.1.4
@@ -13,10 +14,10 @@ bpylist2==3.0.2
pathvalidate==2.4.1
dataclasses==0.7;python_version<'3.7'
wurlitzer==2.1.0
photoscript==0.1.3
photoscript==0.1.4
toml==0.10.2
osxmetadata==0.99.25
osxmetadata==0.99.26
textx==2.3.0
rich==10.2.2
bitmath==1.3.3.1
more-itertools==8.8.0
more-itertools==8.8.0

View File

@@ -73,14 +73,15 @@ setup(
"Topic :: Software Development :: Libraries :: Python Modules",
],
install_requires=[
"pyobjc-core==7.2",
"pyobjc-framework-AppleScriptKit==7.2",
"pyobjc-framework-AppleScriptObjC==7.2",
"pyobjc-framework-Photos==7.2",
"pyobjc-framework-Quartz==7.2",
"pyobjc-framework-AVFoundation==7.2",
"pyobjc-framework-CoreServices==7.2",
"pyobjc-framework-Metal==7.2",
"pyobjc-core",
"pyobjc-framework-AppleScriptKit",
"pyobjc-framework-AppleScriptObjC",
"pyobjc-framework-Photos",
"pyobjc-framework-Quartz",
"pyobjc-framework-AVFoundation",
"pyobjc-framework-CoreServices",
"pyobjc-framework-Metal",
"pyobjc-framework-Vision",
"Click==8.0.1",
"PyYAML==5.4.1",
"Mako==1.1.4",
@@ -88,9 +89,9 @@ setup(
"pathvalidate==2.4.1",
"dataclasses==0.7;python_version<'3.7'",
"wurlitzer==2.1.0",
"photoscript==0.1.3",
"photoscript==0.1.4",
"toml==0.10.2",
"osxmetadata==0.99.25",
"osxmetadata==0.99.26",
"textx==2.3.0",
"rich==10.2.2",
"bitmath==1.3.3.1",

View File

@@ -234,6 +234,11 @@ UUID_NOT_REFERENCE = "F12384F6-CD17-4151-ACBA-AE0E3688539E"
UUID_DUPLICATE = ""
UUID_DETECTED_TEXT = {
"E2078879-A29C-4D6F-BACB-E3BBE6C3EB91": "osxphotos",
"A92D9C26-3A50-4197-9388-CB5F7DB9FA91": None,
}
@pytest.fixture(scope="module")
def photosdb():
@@ -1423,3 +1428,14 @@ def test_multi_uuid(photosdb):
photos = photosdb.photos(uuid=[UUID_DICT["favorite"], UUID_DICT["not_favorite"]])
assert len(photos) == 2
def test_detected_text(photosdb):
"""test PhotoInfo.detected_text"""
for uuid, expected_text in UUID_DETECTED_TEXT.items():
photo = photosdb.get_photo(uuid=uuid)
detected_text = " ".join(text for text, conf in photo.detected_text())
if expected_text is not None:
assert expected_text in detected_text
else:
assert not detected_text