Improved caching of detected_text results

This commit is contained in:
Rhet Turnbull
2021-08-02 05:10:26 -07:00
parent 9d980e4917
commit fa2027d453
7 changed files with 39 additions and 52 deletions

View File

@@ -1,3 +1,3 @@
""" version info """ """ version info """
__version__ = "0.42.71" __version__ = "0.42.72"

View File

@@ -14,6 +14,7 @@ from datetime import timedelta, timezone
from typing import Optional from typing import Optional
import yaml import yaml
from osxmetadata import OSXMetaData
from .._constants import ( from .._constants import (
_MOVIE_TYPE, _MOVIE_TYPE,
@@ -1118,6 +1119,28 @@ class PhotoInfo:
Returns: list of (detected text, confidence) tuples Returns: list of (detected text, confidence) tuples
""" """
try:
return self._detected_text_cache[confidence_threshold]
except (AttributeError, KeyError) as e:
if isinstance(e, AttributeError):
self._detected_text_cache = {}
try:
detected_text = self._detected_text()
except Exception as e:
logging.warning(f"Error detecting text in photo {self.uuid}: {e}")
detected_text = []
self._detected_text_cache[confidence_threshold] = [
(text, confidence)
for text, confidence in detected_text
if confidence >= confidence_threshold
]
return self._detected_text_cache[confidence_threshold]
def _detected_text(self):
"""detect text in photo, either from cached extended attribute or by attempting text detection"""
path = ( path = (
self.path_edited if self.hasadjustments and self.path_edited else self.path self.path_edited if self.hasadjustments and self.path_edited else self.path
) )
@@ -1125,24 +1148,12 @@ class PhotoInfo:
if not path: if not path:
return [] return []
try: md = OSXMetaData(path)
return self._detected_text[(path, confidence_threshold)] detected_text = md.get_attribute("osxphotos_detected_text")
except (AttributeError, KeyError) as e: if detected_text is None:
if isinstance(e, AttributeError):
self._detected_text = {}
try:
detected_text = detect_text(path) detected_text = detect_text(path)
except Exception as e: md.set_attribute("osxphotos_detected_text", detected_text)
logging.warning(f"Error detecting text in photo {self.uuid} at {path}: {e}") return detected_text
detected_text = []
self._detected_text[(path, confidence_threshold)] = [
(text, confidence)
for text, confidence in detected_text
if confidence >= confidence_threshold
]
return self._detected_text[(path, confidence_threshold)]
@property @property
def _longitude(self): def _longitude(self):

View File

@@ -1445,25 +1445,8 @@ def _get_detected_text(photo, exportdb, confidence=TEXT_DETECTION_CONFIDENCE_THR
else TEXT_DETECTION_CONFIDENCE_THRESHOLD else TEXT_DETECTION_CONFIDENCE_THRESHOLD
) )
detected_text = exportdb.get_detected_text_for_uuid(photo.uuid) # _detected_text caches the text detection results in an extended attribute
if detected_text is not None: # so the first time this gets called is slow but repeated accesses are fast
detected_text = json.loads(detected_text) detected_text = photo._detected_text()
else:
path = (
photo.path_edited
if photo.hasadjustments and photo.path_edited
else photo.path
)
path = path or photo.path_derivatives[0] if photo.path_derivatives else None
if not path:
detected_text = []
else:
try:
detected_text = detect_text(path)
except Exception as e:
logging.warning(
f"Error detecting text in image {photo.uuid} at {path}: {e}"
)
return []
exportdb.set_detected_text_for_uuid(photo.uuid, json.dumps(detected_text)) exportdb.set_detected_text_for_uuid(photo.uuid, json.dumps(detected_text))
return [text for text, conf in detected_text if conf >= confidence] return [text for text, conf in detected_text if conf >= confidence]

View File

@@ -52,6 +52,9 @@ def detect_text(img_path: str) -> List:
vision_request.dealloc() vision_request.dealloc()
vision_handler.dealloc() vision_handler.dealloc()
for result in results:
result[0] = str(result[0])
return results return results

View File

@@ -16,7 +16,7 @@ dataclasses==0.7;python_version<'3.7'
wurlitzer==2.1.0 wurlitzer==2.1.0
photoscript==0.1.4 photoscript==0.1.4
toml==0.10.2 toml==0.10.2
osxmetadata==0.99.26 osxmetadata==0.99.31
textx==2.3.0 textx==2.3.0
rich==10.6.0 rich==10.6.0
bitmath==1.3.3.1 bitmath==1.3.3.1

View File

@@ -91,7 +91,7 @@ setup(
"wurlitzer==2.1.0", "wurlitzer==2.1.0",
"photoscript==0.1.4", "photoscript==0.1.4",
"toml==0.10.2", "toml==0.10.2",
"osxmetadata==0.99.26", "osxmetadata==0.99.31",
"textx==2.3.0", "textx==2.3.0",
"rich==10.6.0", "rich==10.6.0",
"bitmath==1.3.3.1", "bitmath==1.3.3.1",

View File

@@ -1179,13 +1179,3 @@ def test_detected_text(photosdb):
for template, value in TEMPLATE_VALUES_DETECTED_TEXT.items(): for template, value in TEMPLATE_VALUES_DETECTED_TEXT.items():
rendered, _ = photo.render_template(template) rendered, _ = photo.render_template(template)
assert value in "".join(rendered) assert value in "".join(rendered)
def test_detected_text_caching(photosdb):
"""Test {detected_text} template caches values"""
exportdb = ExportDBInMemory(None)
exportdb.set_detected_text_for_uuid(UUID_DETECTED_TEXT, json.dumps([["foo", 0.9]]))
photo = photosdb.get_photo(UUID_DETECTED_TEXT)
options = RenderOptions(exportdb=exportdb)
rendered, _ = photo.render_template("{detected_text}", options=options)
assert rendered[0] == "foo"