Improved caching of detected_text results

2021-08-02 05:10:26 -07:00
parent 9d980e4917
commit fa2027d453
7 changed files with 39 additions and 52 deletions
--- a/osxphotos/_version.py
+++ b/osxphotos/_version.py
@@ -1,3 +1,3 @@
 """ version info """

-__version__ = "0.42.71"
+__version__ = "0.42.72"
--- a/osxphotos/photoinfo/photoinfo.py
+++ b/osxphotos/photoinfo/photoinfo.py
@@ -14,6 +14,7 @@ from datetime import timedelta, timezone
 from typing import Optional

 import yaml
+from osxmetadata import OSXMetaData

 from .._constants import (
    _MOVIE_TYPE,
@@ -1118,6 +1119,28 @@ class PhotoInfo:

        Returns: list of (detected text, confidence) tuples
        """
+
+        try:
+            return self._detected_text_cache[confidence_threshold]
+        except (AttributeError, KeyError) as e:
+            if isinstance(e, AttributeError):
+                self._detected_text_cache = {}
+
+            try:
+                detected_text = self._detected_text()
+            except Exception as e:
+                logging.warning(f"Error detecting text in photo {self.uuid}: {e}")
+                detected_text = []
+
+            self._detected_text_cache[confidence_threshold] = [
+                (text, confidence)
+                for text, confidence in detected_text
+                if confidence >= confidence_threshold
+            ]
+            return self._detected_text_cache[confidence_threshold]
+
+    def _detected_text(self):
+        """detect text in photo, either from cached extended attribute or by attempting text detection"""
        path = (
            self.path_edited if self.hasadjustments and self.path_edited else self.path
        )
@@ -1125,24 +1148,12 @@ class PhotoInfo:
        if not path:
            return []

-        try:
-            return self._detected_text[(path, confidence_threshold)]
-        except (AttributeError, KeyError) as e:
-            if isinstance(e, AttributeError):
-                self._detected_text = {}
-
-            try:
-                detected_text = detect_text(path)
-            except Exception as e:
-                logging.warning(f"Error detecting text in photo {self.uuid} at {path}: {e}")
-                detected_text = []
-
-            self._detected_text[(path, confidence_threshold)] = [
-                (text, confidence)
-                for text, confidence in detected_text
-                if confidence >= confidence_threshold
-            ]
-            return self._detected_text[(path, confidence_threshold)]
+        md = OSXMetaData(path)
+        detected_text = md.get_attribute("osxphotos_detected_text")
+        if detected_text is None:
+            detected_text = detect_text(path)
+            md.set_attribute("osxphotos_detected_text", detected_text)
+        return detected_text

    @property
    def _longitude(self):
--- a/osxphotos/phototemplate.py
+++ b/osxphotos/phototemplate.py
@@ -1445,25 +1445,8 @@ def _get_detected_text(photo, exportdb, confidence=TEXT_DETECTION_CONFIDENCE_THR
        else TEXT_DETECTION_CONFIDENCE_THRESHOLD
    )

-    detected_text = exportdb.get_detected_text_for_uuid(photo.uuid)
-    if detected_text is not None:
-        detected_text = json.loads(detected_text)
-    else:
-        path = (
-            photo.path_edited
-            if photo.hasadjustments and photo.path_edited
-            else photo.path
-        )
-        path = path or photo.path_derivatives[0] if photo.path_derivatives else None
-        if not path:
-            detected_text = []
-        else:
-            try:
-                detected_text = detect_text(path)
-            except Exception as e:
-                logging.warning(
-                    f"Error detecting text in image {photo.uuid} at {path}: {e}"
-                )
-                return []
-        exportdb.set_detected_text_for_uuid(photo.uuid, json.dumps(detected_text))
+    # _detected_text caches the text detection results in an extended attribute
+    # so the first time this gets called is slow but repeated accesses are fast
+    detected_text = photo._detected_text()
+    exportdb.set_detected_text_for_uuid(photo.uuid, json.dumps(detected_text))
    return [text for text, conf in detected_text if conf >= confidence]
--- a/osxphotos/text_detection.py
+++ b/osxphotos/text_detection.py
@@ -52,6 +52,9 @@ def detect_text(img_path: str) -> List:
        vision_request.dealloc()
        vision_handler.dealloc()

+        for result in results:
+            result[0] = str(result[0])
+
        return results