Added PhotoInfo.detected_text()
This commit is contained in:
12
README.md
12
README.md
@@ -2964,6 +2964,18 @@ Some substitutions, notably `album`, `keyword`, and `person` could return multip
|
||||
|
||||
See [Template System](#template-system) for additional details.
|
||||
|
||||
#### `detected_text(confidence_threshold=TEXT_DETECTION_CONFIDENCE_THRESHOLD)`
|
||||
|
||||
Detects text in photo and returns lists of results as (detected text, confidence)
|
||||
|
||||
- `confidence_threshold`: float between 0.0 and 1.0. If text detection confidence is below this threshold, text will not be returned. Default is `osxphotos._constants.TEXT_DETECTION_CONFIDENCE_THRESHOLD`
|
||||
|
||||
If photo is edited, uses the edited photo, otherwise the original; falls back to the preview image if neither edited or original is available.
|
||||
|
||||
Returns: list of (detected text, confidence) tuples.
|
||||
|
||||
Note: This is *not* the same as Live Text in macOS Monterey. When using `detected_text()`, osxphotos will use Apple's [Vision framework](https://developer.apple.com/documentation/vision/recognizing_text_in_images?language=objc) to perform text detection on the image. On my circa 2013 MacBook Pro, this takes about 2 seconds per image. `detected_text()` does memoize the results for a given `confidence_threshold` so repeated calls will not re-process the photo.
|
||||
|
||||
### ExifInfo
|
||||
[PhotosInfo.exif_info](#exif-info) returns an `ExifInfo` object with some EXIF data about the photo (Photos 5 only). `ExifInfo` contains the following properties:
|
||||
|
||||
|
||||
@@ -282,3 +282,5 @@ class AlbumSortOrder(Enum):
|
||||
NEWEST_FIRST = 2
|
||||
OLDEST_FIRST = 3
|
||||
TITLE = 5
|
||||
|
||||
TEXT_DETECTION_CONFIDENCE_THRESHOLD = 0.75
|
||||
@@ -1,3 +1,3 @@
|
||||
""" version info """
|
||||
|
||||
__version__ = "0.42.67"
|
||||
__version__ = "0.42.68"
|
||||
|
||||
@@ -30,12 +30,14 @@ from .._constants import (
|
||||
BURST_KEY,
|
||||
BURST_NOT_SELECTED,
|
||||
BURST_SELECTED,
|
||||
TEXT_DETECTION_CONFIDENCE_THRESHOLD,
|
||||
)
|
||||
from ..adjustmentsinfo import AdjustmentsInfo
|
||||
from ..albuminfo import AlbumInfo, ImportInfo
|
||||
from ..personinfo import FaceInfo, PersonInfo
|
||||
from ..phototemplate import PhotoTemplate, RenderOptions
|
||||
from ..placeinfo import PlaceInfo4, PlaceInfo5
|
||||
from ..text_detection import detect_text
|
||||
from ..uti import get_preferred_uti_extension, get_uti_for_extension
|
||||
from ..utils import _debug, _get_resource_loc, findfiles
|
||||
|
||||
@@ -1106,6 +1108,41 @@ class PhotoInfo:
|
||||
template = PhotoTemplate(self, exiftool_path=self._db._exiftool_path)
|
||||
return template.render(template_str, options)
|
||||
|
||||
def detected_text(self, confidence_threshold=TEXT_DETECTION_CONFIDENCE_THRESHOLD):
|
||||
"""Detects text in photo and returns lists of results as (detected text, confidence)
|
||||
|
||||
confidence_threshold: float between 0.0 and 1.0. If text detection confidence is below this threshold,
|
||||
text will not be returned. Default is TEXT_DETECTION_CONFIDENCE_THRESHOLD
|
||||
|
||||
If photo is edited, uses the edited photo, otherwise the original; falls back to the preview image if neither edited or original is available
|
||||
|
||||
Returns: list of (detected text, confidence) tuples
|
||||
"""
|
||||
path = (
|
||||
self.path_edited if self.hasadjustments and self.path_edited else self.path
|
||||
)
|
||||
path = path or self.path_derivatives[0] if self.path_derivatives else None
|
||||
if not path:
|
||||
return []
|
||||
|
||||
try:
|
||||
return self._detected_text[(path, confidence_threshold)]
|
||||
except (AttributeError, KeyError) as e:
|
||||
if isinstance(e, AttributeError):
|
||||
self._detected_text = {}
|
||||
|
||||
try:
|
||||
detected_text = detect_text(path)
|
||||
except Exception as e:
|
||||
detected_text = []
|
||||
|
||||
self._detected_text[(path, confidence_threshold)] = [
|
||||
(text, confidence)
|
||||
for text, confidence in detected_text
|
||||
if confidence >= confidence_threshold
|
||||
]
|
||||
return self._detected_text[(path, confidence_threshold)]
|
||||
|
||||
@property
|
||||
def _longitude(self):
|
||||
"""Returns longitude, in degrees"""
|
||||
|
||||
58
osxphotos/text_detection.py
Normal file
58
osxphotos/text_detection.py
Normal file
@@ -0,0 +1,58 @@
|
||||
""" Use Apple's Vision Framework via PyObjC to perform text detection on images """
|
||||
|
||||
import objc
|
||||
import Quartz
|
||||
import Vision
|
||||
from Cocoa import NSURL
|
||||
from Foundation import NSDictionary
|
||||
|
||||
from typing import List
|
||||
|
||||
# needed to capture system-level stderr
|
||||
from wurlitzer import pipes
|
||||
|
||||
|
||||
def detect_text(img_path: str) -> List:
|
||||
"""process image at img_path with VNRecognizeTextRequest and return list of results"""
|
||||
with objc.autorelease_pool():
|
||||
input_url = NSURL.fileURLWithPath_(img_path)
|
||||
|
||||
with pipes() as (out, err):
|
||||
# capture stdout and stderr from system calls
|
||||
# otherwise, Quartz.CIImage.imageWithContentsOfURL_
|
||||
# prints to stderr something like:
|
||||
# 2020-09-20 20:55:25.538 python[73042:5650492] Creating client/daemon connection: B8FE995E-3F27-47F4-9FA8-559C615FD774
|
||||
# 2020-09-20 20:55:25.652 python[73042:5650492] Got the query meta data reply for: com.apple.MobileAsset.RawCamera.Camera, response: 0
|
||||
input_image = Quartz.CIImage.imageWithContentsOfURL_(input_url)
|
||||
|
||||
vision_options = NSDictionary.dictionaryWithDictionary_({})
|
||||
vision_handler = Vision.VNImageRequestHandler.alloc().initWithCIImage_options_(
|
||||
input_image, vision_options
|
||||
)
|
||||
results = []
|
||||
handler = make_request_handler(results)
|
||||
vision_request = (
|
||||
Vision.VNRecognizeTextRequest.alloc().initWithCompletionHandler_(handler)
|
||||
)
|
||||
error = vision_handler.performRequests_error_([vision_request], None)
|
||||
vision_request.dealloc()
|
||||
vision_handler.dealloc()
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def make_request_handler(results):
|
||||
"""results: list to store results"""
|
||||
if not isinstance(results, list):
|
||||
raise ValueError("results must be a list")
|
||||
|
||||
def handler(request, error):
|
||||
if error:
|
||||
print(f"Error! {error}")
|
||||
else:
|
||||
observations = request.results()
|
||||
for text_observation in observations:
|
||||
recognized_text = text_observation.topCandidates_(1)[0]
|
||||
results.append([recognized_text.string(), recognized_text.confidence()])
|
||||
|
||||
return handler
|
||||
@@ -1,11 +1,12 @@
|
||||
pyobjc-core==7.2
|
||||
pyobjc-framework-AppleScriptKit==7.2
|
||||
pyobjc-framework-AppleScriptObjC==7.2
|
||||
pyobjc-framework-Photos==7.2
|
||||
pyobjc-framework-Quartz==7.2
|
||||
pyobjc-framework-AVFoundation==7.2
|
||||
pyobjc-framework-CoreServices==7.2
|
||||
pyobjc-framework-Metal==7.2
|
||||
pyobjc-core>=7.2
|
||||
pyobjc-framework-AppleScriptKit>=7.2
|
||||
pyobjc-framework-AppleScriptObjC>=7.2
|
||||
pyobjc-framework-Photos>=7.2
|
||||
pyobjc-framework-Quartz>=7.2
|
||||
pyobjc-framework-AVFoundation>=7.2
|
||||
pyobjc-framework-CoreServices>=7.2
|
||||
pyobjc-framework-Metal>=7.2
|
||||
pyobjc-framework-Vision>=7.2
|
||||
Click==8.0.1
|
||||
PyYAML==5.4.1
|
||||
Mako==1.1.4
|
||||
@@ -13,10 +14,10 @@ bpylist2==3.0.2
|
||||
pathvalidate==2.4.1
|
||||
dataclasses==0.7;python_version<'3.7'
|
||||
wurlitzer==2.1.0
|
||||
photoscript==0.1.3
|
||||
photoscript==0.1.4
|
||||
toml==0.10.2
|
||||
osxmetadata==0.99.25
|
||||
osxmetadata==0.99.26
|
||||
textx==2.3.0
|
||||
rich==10.2.2
|
||||
bitmath==1.3.3.1
|
||||
more-itertools==8.8.0
|
||||
more-itertools==8.8.0
|
||||
|
||||
21
setup.py
21
setup.py
@@ -73,14 +73,15 @@ setup(
|
||||
"Topic :: Software Development :: Libraries :: Python Modules",
|
||||
],
|
||||
install_requires=[
|
||||
"pyobjc-core==7.2",
|
||||
"pyobjc-framework-AppleScriptKit==7.2",
|
||||
"pyobjc-framework-AppleScriptObjC==7.2",
|
||||
"pyobjc-framework-Photos==7.2",
|
||||
"pyobjc-framework-Quartz==7.2",
|
||||
"pyobjc-framework-AVFoundation==7.2",
|
||||
"pyobjc-framework-CoreServices==7.2",
|
||||
"pyobjc-framework-Metal==7.2",
|
||||
"pyobjc-core",
|
||||
"pyobjc-framework-AppleScriptKit",
|
||||
"pyobjc-framework-AppleScriptObjC",
|
||||
"pyobjc-framework-Photos",
|
||||
"pyobjc-framework-Quartz",
|
||||
"pyobjc-framework-AVFoundation",
|
||||
"pyobjc-framework-CoreServices",
|
||||
"pyobjc-framework-Metal",
|
||||
"pyobjc-framework-Vision",
|
||||
"Click==8.0.1",
|
||||
"PyYAML==5.4.1",
|
||||
"Mako==1.1.4",
|
||||
@@ -88,9 +89,9 @@ setup(
|
||||
"pathvalidate==2.4.1",
|
||||
"dataclasses==0.7;python_version<'3.7'",
|
||||
"wurlitzer==2.1.0",
|
||||
"photoscript==0.1.3",
|
||||
"photoscript==0.1.4",
|
||||
"toml==0.10.2",
|
||||
"osxmetadata==0.99.25",
|
||||
"osxmetadata==0.99.26",
|
||||
"textx==2.3.0",
|
||||
"rich==10.2.2",
|
||||
"bitmath==1.3.3.1",
|
||||
|
||||
@@ -234,6 +234,11 @@ UUID_NOT_REFERENCE = "F12384F6-CD17-4151-ACBA-AE0E3688539E"
|
||||
|
||||
UUID_DUPLICATE = ""
|
||||
|
||||
UUID_DETECTED_TEXT = {
|
||||
"E2078879-A29C-4D6F-BACB-E3BBE6C3EB91": "osxphotos",
|
||||
"A92D9C26-3A50-4197-9388-CB5F7DB9FA91": None,
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def photosdb():
|
||||
@@ -1423,3 +1428,14 @@ def test_multi_uuid(photosdb):
|
||||
photos = photosdb.photos(uuid=[UUID_DICT["favorite"], UUID_DICT["not_favorite"]])
|
||||
|
||||
assert len(photos) == 2
|
||||
|
||||
|
||||
def test_detected_text(photosdb):
|
||||
"""test PhotoInfo.detected_text"""
|
||||
for uuid, expected_text in UUID_DETECTED_TEXT.items():
|
||||
photo = photosdb.get_photo(uuid=uuid)
|
||||
detected_text = " ".join(text for text, conf in photo.detected_text())
|
||||
if expected_text is not None:
|
||||
assert expected_text in detected_text
|
||||
else:
|
||||
assert not detected_text
|
||||
|
||||
Reference in New Issue
Block a user