Improved caching of detected_text results

2021-08-02 05:10:26 -07:00 · 2021-08-02 05:10:26 -07:00 · fa2027d453
commit fa2027d453
parent 9d980e4917
7 changed files with 39 additions and 52 deletions
--- a/osxphotos/_version.py
+++ b/osxphotos/_version.py
@ -1,3 +1,3 @@
 """ version info """

-__version__ = "0.42.71"
+__version__ = "0.42.72"
--- a/osxphotos/photoinfo/photoinfo.py
+++ b/osxphotos/photoinfo/photoinfo.py
@ -14,6 +14,7 @@ from datetime import timedelta, timezone
 from typing import Optional

 import yaml
+from osxmetadata import OSXMetaData

 from .._constants import (
    _MOVIE_TYPE,
@ -1118,6 +1119,28 @@ class PhotoInfo:

        Returns: list of (detected text, confidence) tuples
        """
+
+        try:
+            return self._detected_text_cache[confidence_threshold]
+        except (AttributeError, KeyError) as e:
+            if isinstance(e, AttributeError):
+                self._detected_text_cache = {}
+
+            try:
+                detected_text = self._detected_text()
+            except Exception as e:
+                logging.warning(f"Error detecting text in photo {self.uuid}: {e}")
+                detected_text = []
+
+            self._detected_text_cache[confidence_threshold] = [
+                (text, confidence)
+                for text, confidence in detected_text
+                if confidence >= confidence_threshold
+            ]
+            return self._detected_text_cache[confidence_threshold]
+
+    def _detected_text(self):
+        """detect text in photo, either from cached extended attribute or by attempting text detection"""
        path = (
            self.path_edited if self.hasadjustments and self.path_edited else self.path
        )
@ -1125,24 +1148,12 @@ class PhotoInfo:
        if not path:
            return []

-        try:
-            return self._detected_text[(path, confidence_threshold)]
-        except (AttributeError, KeyError) as e:
-            if isinstance(e, AttributeError):
-                self._detected_text = {}
-
-            try:
-                detected_text = detect_text(path)
-            except Exception as e:
-                logging.warning(f"Error detecting text in photo {self.uuid} at {path}: {e}")
-                detected_text = []
-
-            self._detected_text[(path, confidence_threshold)] = [
-                (text, confidence)
-                for text, confidence in detected_text
-                if confidence >= confidence_threshold
-            ]
-            return self._detected_text[(path, confidence_threshold)]
+        md = OSXMetaData(path)
+        detected_text = md.get_attribute("osxphotos_detected_text")
+        if detected_text is None:
+            detected_text = detect_text(path)
+            md.set_attribute("osxphotos_detected_text", detected_text)
+        return detected_text

    @property
    def _longitude(self):
--- a/osxphotos/phototemplate.py
+++ b/osxphotos/phototemplate.py
@ -1445,25 +1445,8 @@ def _get_detected_text(photo, exportdb, confidence=TEXT_DETECTION_CONFIDENCE_THR
        else TEXT_DETECTION_CONFIDENCE_THRESHOLD
    )

-    detected_text = exportdb.get_detected_text_for_uuid(photo.uuid)
-    if detected_text is not None:
-        detected_text = json.loads(detected_text)
-    else:
-        path = (
-            photo.path_edited
-            if photo.hasadjustments and photo.path_edited
-            else photo.path
-        )
-        path = path or photo.path_derivatives[0] if photo.path_derivatives else None
-        if not path:
-            detected_text = []
-        else:
-            try:
-                detected_text = detect_text(path)
-            except Exception as e:
-                logging.warning(
-                    f"Error detecting text in image {photo.uuid} at {path}: {e}"
-                )
-                return []
-        exportdb.set_detected_text_for_uuid(photo.uuid, json.dumps(detected_text))
+    # _detected_text caches the text detection results in an extended attribute
+    # so the first time this gets called is slow but repeated accesses are fast
+    detected_text = photo._detected_text()
+    exportdb.set_detected_text_for_uuid(photo.uuid, json.dumps(detected_text))
    return [text for text, conf in detected_text if conf >= confidence]
--- a/osxphotos/text_detection.py
+++ b/osxphotos/text_detection.py
@ -52,6 +52,9 @@ def detect_text(img_path: str) -> List:
        vision_request.dealloc()
        vision_handler.dealloc()

+        for result in results:
+            result[0] = str(result[0])
+
        return results


--- a/requirements.txt
+++ b/requirements.txt
@ -16,7 +16,7 @@ dataclasses==0.7;python_version<'3.7'
 wurlitzer==2.1.0
 photoscript==0.1.4
 toml==0.10.2
-osxmetadata==0.99.26
+osxmetadata==0.99.31
 textx==2.3.0
 rich==10.6.0
 bitmath==1.3.3.1
--- a/setup.py
+++ b/setup.py
@ -91,7 +91,7 @@ setup(
        "wurlitzer==2.1.0",
        "photoscript==0.1.4",
        "toml==0.10.2",
-        "osxmetadata==0.99.26",
+        "osxmetadata==0.99.31",
        "textx==2.3.0",
        "rich==10.6.0",
        "bitmath==1.3.3.1",
--- a/tests/test_template.py
+++ b/tests/test_template.py
@ -1179,13 +1179,3 @@ def test_detected_text(photosdb):
    for template, value in TEMPLATE_VALUES_DETECTED_TEXT.items():
        rendered, _ = photo.render_template(template)
        assert value in "".join(rendered)
-
-
-def test_detected_text_caching(photosdb):
-    """Test {detected_text} template caches values"""
-    exportdb = ExportDBInMemory(None)
-    exportdb.set_detected_text_for_uuid(UUID_DETECTED_TEXT, json.dumps([["foo", 0.9]]))
-    photo = photosdb.get_photo(UUID_DETECTED_TEXT)
-    options = RenderOptions(exportdb=exportdb)
-    rendered, _ = photo.render_template("{detected_text}", options=options)
-    assert rendered[0] == "foo"