From e7099d250b1ab8b6134784ca7d20af07599574cf Mon Sep 17 00:00:00 2001 From: Rhet Turnbull Date: Sat, 1 Apr 2023 09:39:08 -0700 Subject: [PATCH] Concurrency refactor 999 (#1029) * Working on making export threadsafe, #999 * Working on making export threadsafe, #999 * refactor for concurrent export, #999 * Fixed race condition in ExportRecord context manager --- API_README.md | 156 +++-- osxphotos/_constants.py | 13 + osxphotos/albuminfo.py | 117 +++- osxphotos/cli/import_cli.py | 7 +- osxphotos/cli/report_writer.py | 5 +- osxphotos/export_db.py | 1170 ++++++++++++++++++------------- osxphotos/export_db_utils.py | 24 +- osxphotos/frozen_photoinfo.py | 169 +++++ osxphotos/photodates.py | 6 +- osxphotos/photoexporter.py | 46 +- osxphotos/photoinfo.py | 313 +++++++-- osxphotos/photosdb/photosdb.py | 3 + osxphotos/phototemplate.py | 2 +- osxphotos/phototz.py | 14 +- osxphotos/sqlite_utils.py | 10 +- osxphotos/sqlitekvstore.py | 6 +- tests/test_cli.py | 2 +- tests/test_concurrent_export.py | 66 ++ 18 files changed, 1453 insertions(+), 676 deletions(-) create mode 100644 osxphotos/frozen_photoinfo.py create mode 100644 tests/test_concurrent_export.py diff --git a/API_README.md b/API_README.md index 9112f454..5f306c26 100644 --- a/API_README.md +++ b/API_README.md @@ -1523,12 +1523,14 @@ Returns full name of the album owner (person who shared the album) for shared al **Note**: *Only valid on Photos 5 / MacOS 10.15+; on Photos <= 4, returns None. +#### `asdict()` + +Returns a dictionary representation of the AlbumInfo object. + ### ImportInfo PhotosDB.import_info returns a list of ImportInfo objects. Each ImportInfo object represents an import session in the library. PhotoInfo.import_info returns a single ImportInfo object representing the import session for the photo (or `None` if no associated import session). -**Note**: Photos 5+ only. Not implemented for Photos version <= 4. - #### `uuid` Returns the universally unique identifier (uuid) of the import session. This is how Photos keeps track of individual objects within the database. @@ -1543,12 +1545,18 @@ Returns the creation date as a timezone aware datetime.datetime object of the im #### `start_date` -Returns the start date as a timezone aware datetime.datetime object for when the import session bega. +Returns the start date as a timezone aware datetime.datetime object for when the import session began. #### `end_date` Returns the end date as a timezone aware datetime.datetime object for when the import session completed. +**Note**: On Photos <=4, `start_date` and `end_date` will be the same as `creation_date`. + +#### `asdict()` + +Returns a dictionary representation of the import session. + ### ProjectInfo PhotosDB.projcet_info returns a list of ProjectInfo objects. Each ProjectInfo object represents a project in the library. PhotoInfo.project_info returns a list of ProjectInfo objects for each project the photo is contained in. @@ -1571,6 +1579,10 @@ Returns a list of [PhotoInfo](#photoinfo) objects representing each photo contai Returns the creation date as a timezone aware datetime.datetime object of the project. +#### `asdict()` + +Returns a dictionary representation of the ProjectInfo object. + ### MomentInfo PhotoInfo.moment_info return the MomentInfo object for the photo. The MomentInfo object contains information about the photo's moment as assigned by Photos. The MomentInfo object contains the following properties: @@ -1661,6 +1673,10 @@ Returns album sort order (as `AlbumSortOrder` enum). On Photos <=4, always retu Returns index of photo in album (based on album sort order). +#### `asdict()` + +Returns a dictionary representation of the FolderInfo object. + **Note**: FolderInfo and AlbumInfo objects effectively work as a linked list. The children of a folder are contained in `subfolders` and `album_info` and the parent object of both `AlbumInfo` and `FolderInfo` is represented by `parent`. For example: ```pycon @@ -2134,10 +2150,10 @@ Template statements are white-space sensitive meaning that white space (spaces, e.g. if Photo keywords are `["foo","bar"]`: -- `"{keyword}"` renders to `"foo", "bar"` -- `"{,+keyword}"` renders to: `"foo,bar"` -- `"{; +keyword}"` renders to: `"foo; bar"` -- `"{+keyword}"` renders to `"foobar"` +* `"{keyword}"` renders to `"foo", "bar"` +* `"{,+keyword}"` renders to: `"foo,bar"` +* `"{; +keyword}"` renders to: `"foo; bar"` +* `"{+keyword}"` renders to `"foobar"` `template_field`: The template field to resolve. See [Template Substitutions](#template-substitutions) for full list of template fields. @@ -2149,77 +2165,77 @@ e.g. if Photo keywords are `["foo","bar"]`: Valid filters are: -- `lower`: Convert value to lower case, e.g. 'Value' => 'value'. -- `upper`: Convert value to upper case, e.g. 'Value' => 'VALUE'. -- `strip`: Strip whitespace from beginning/end of value, e.g. ' Value ' => 'Value'. -- `titlecase`: Convert value to title case, e.g. 'my value' => 'My Value'. -- `capitalize`: Capitalize first word of value and convert other words to lower case, e.g. 'MY VALUE' => 'My value'. -- `braces`: Enclose value in curly braces, e.g. 'value => '{value}'. -- `parens`: Enclose value in parentheses, e.g. 'value' => '(value') -- `brackets`: Enclose value in brackets, e.g. 'value' => '[value]' -- `shell_quote`: Quotes the value for safe usage in the shell, e.g. My file.jpeg => 'My file.jpeg'; only adds quotes if needed. -- `function`: Run custom python function to filter value; use in format 'function:/path/to/file.py::function_name'. See example at https://github.com/RhetTbull/osxphotos/blob/master/examples/template_filter.py -- `split(x)`: Split value into a list of values using x as delimiter, e.g. 'value1;value2' => ['value1', 'value2'] if used with split(;). -- `autosplit`: Automatically split delimited string into separate values; will split strings delimited by comma, semicolon, or space, e.g. 'value1,value2' => ['value1', 'value2']. -- `chop(x)`: Remove x characters off the end of value, e.g. chop(1): 'Value' => 'Valu'; when applied to a list, chops characters from each list value, e.g. chop(1): ['travel', 'beach']=> ['trave', 'beac']. -- `chomp(x)`: Remove x characters from the beginning of value, e.g. chomp(1): ['Value'] => ['alue']; when applied to a list, removes characters from each list value, e.g. chomp(1): ['travel', 'beach']=> ['ravel', 'each']. -- `sort`: Sort list of values, e.g. ['c', 'b', 'a'] => ['a', 'b', 'c']. -- `rsort`: Sort list of values in reverse order, e.g. ['a', 'b', 'c'] => ['c', 'b', 'a']. -- `reverse`: Reverse order of values, e.g. ['a', 'b', 'c'] => ['c', 'b', 'a']. -- `uniq`: Remove duplicate values, e.g. ['a', 'b', 'c', 'b', 'a'] => ['a', 'b', 'c']. -- `join(x)`: Join list of values with delimiter x, e.g. join(,): ['a', 'b', 'c'] => 'a,b,c'; the DELIM option functions similar to join(x) but with DELIM, the join happens before being passed to any filters.May optionally be used without an argument, that is 'join()' which joins values together with no delimiter. e.g. join(): ['a', 'b', 'c'] => 'abc'. -- `append(x)`: Append x to list of values, e.g. append(d): ['a', 'b', 'c'] => ['a', 'b', 'c', 'd']. -- `prepend(x)`: Prepend x to list of values, e.g. prepend(d): ['a', 'b', 'c'] => ['d', 'a', 'b', 'c']. -- `appends(x)`: Append s[tring] Append x to each value of list of values, e.g. appends(d): ['a', 'b', 'c'] => ['ad', 'bd', 'cd']. -- `prepends(x)`: Prepend s[tring] x to each value of list of values, e.g. prepends(d): ['a', 'b', 'c'] => ['da', 'db', 'dc']. -- `remove(x)`: Remove x from list of values, e.g. remove(b): ['a', 'b', 'c'] => ['a', 'c']. -- `slice(start:stop:step)`: Slice list using same semantics as Python's list slicing, e.g. slice(1:3): ['a', 'b', 'c', 'd'] => ['b', 'c']; slice(1:4:2): ['a', 'b', 'c', 'd'] => ['b', 'd']; slice(1:): ['a', 'b', 'c', 'd'] => ['b', 'c', 'd']; slice(:-1): ['a', 'b', 'c', 'd'] => ['a', 'b', 'c']; slice(::-1): ['a', 'b', 'c', 'd'] => ['d', 'c', 'b', 'a']. See also sslice(). -- `sslice(start:stop:step)`: [s(tring) slice] Slice values in a list using same semantics as Python's string slicing, e.g. sslice(1:3):'abcd => 'bc'; sslice(1:4:2): 'abcd' => 'bd', etc. See also slice(). -- `filter(x)`: Filter list of values using predicate x; for example, `{folder_album|filter(contains Events)}` returns only folders/albums containing the word 'Events' in their path. -- `int`: Convert values in list to integer, e.g. 1.0 => 1. If value cannot be converted to integer, remove value from list. ['1.1', 'x'] => ['1']. See also float. -- `float`: Convert values in list to floating point number, e.g. 1 => 1.0. If value cannot be converted to float, remove value from list. ['1', 'x'] => ['1.0']. See also int. +* `lower`: Convert value to lower case, e.g. 'Value' => 'value'. +* `upper`: Convert value to upper case, e.g. 'Value' => 'VALUE'. +* `strip`: Strip whitespace from beginning/end of value, e.g. ' Value ' => 'Value'. +* `titlecase`: Convert value to title case, e.g. 'my value' => 'My Value'. +* `capitalize`: Capitalize first word of value and convert other words to lower case, e.g. 'MY VALUE' => 'My value'. +* `braces`: Enclose value in curly braces, e.g. 'value => '{value}'. +* `parens`: Enclose value in parentheses, e.g. 'value' => '(value') +* `brackets`: Enclose value in brackets, e.g. 'value' => '[value]' +* `shell_quote`: Quotes the value for safe usage in the shell, e.g. My file.jpeg => 'My file.jpeg'; only adds quotes if needed. +* `function`: Run custom python function to filter value; use in format 'function:/path/to/file.py::function_name'. See example at +* `split(x)`: Split value into a list of values using x as delimiter, e.g. 'value1;value2' => ['value1', 'value2'] if used with split(;). +* `autosplit`: Automatically split delimited string into separate values; will split strings delimited by comma, semicolon, or space, e.g. 'value1,value2' => ['value1', 'value2']. +* `chop(x)`: Remove x characters off the end of value, e.g. chop(1): 'Value' => 'Valu'; when applied to a list, chops characters from each list value, e.g. chop(1): ['travel', 'beach']=> ['trave', 'beac']. +* `chomp(x)`: Remove x characters from the beginning of value, e.g. chomp(1): ['Value'] => ['alue']; when applied to a list, removes characters from each list value, e.g. chomp(1): ['travel', 'beach']=> ['ravel', 'each']. +* `sort`: Sort list of values, e.g. ['c', 'b', 'a'] => ['a', 'b', 'c']. +* `rsort`: Sort list of values in reverse order, e.g. ['a', 'b', 'c'] => ['c', 'b', 'a']. +* `reverse`: Reverse order of values, e.g. ['a', 'b', 'c'] => ['c', 'b', 'a']. +* `uniq`: Remove duplicate values, e.g. ['a', 'b', 'c', 'b', 'a'] => ['a', 'b', 'c']. +* `join(x)`: Join list of values with delimiter x, e.g. join(,): ['a', 'b', 'c'] => 'a,b,c'; the DELIM option functions similar to join(x) but with DELIM, the join happens before being passed to any filters.May optionally be used without an argument, that is 'join()' which joins values together with no delimiter. e.g. join(): ['a', 'b', 'c'] => 'abc'. +* `append(x)`: Append x to list of values, e.g. append(d): ['a', 'b', 'c'] => ['a', 'b', 'c', 'd']. +* `prepend(x)`: Prepend x to list of values, e.g. prepend(d): ['a', 'b', 'c'] => ['d', 'a', 'b', 'c']. +* `appends(x)`: Append s[tring] Append x to each value of list of values, e.g. appends(d): ['a', 'b', 'c'] => ['ad', 'bd', 'cd']. +* `prepends(x)`: Prepend s[tring] x to each value of list of values, e.g. prepends(d): ['a', 'b', 'c'] => ['da', 'db', 'dc']. +* `remove(x)`: Remove x from list of values, e.g. remove(b): ['a', 'b', 'c'] => ['a', 'c']. +* `slice(start:stop:step)`: Slice list using same semantics as Python's list slicing, e.g. slice(1:3): ['a', 'b', 'c', 'd'] => ['b', 'c']; slice(1:4:2): ['a', 'b', 'c', 'd'] => ['b', 'd']; slice(1:): ['a', 'b', 'c', 'd'] => ['b', 'c', 'd']; slice(:-1): ['a', 'b', 'c', 'd'] => ['a', 'b', 'c']; slice(::-1): ['a', 'b', 'c', 'd'] => ['d', 'c', 'b', 'a']. See also sslice(). +* `sslice(start:stop:step)`: [s(tring) slice] Slice values in a list using same semantics as Python's string slicing, e.g. sslice(1:3):'abcd => 'bc'; sslice(1:4:2): 'abcd' => 'bd', etc. See also slice(). +* `filter(x)`: Filter list of values using predicate x; for example, `{folder_album|filter(contains Events)}` returns only folders/albums containing the word 'Events' in their path. +* `int`: Convert values in list to integer, e.g. 1.0 => 1. If value cannot be converted to integer, remove value from list. ['1.1', 'x'] => ['1']. See also float. +* `float`: Convert values in list to floating point number, e.g. 1 => 1.0. If value cannot be converted to float, remove value from list. ['1', 'x'] => ['1.0']. See also int. e.g. if Photo keywords are `["FOO","bar"]`: -- `"{keyword|lower}"` renders to `"foo", "bar"` -- `"{keyword|upper}"` renders to: `"FOO", "BAR"` -- `"{keyword|capitalize}"` renders to: `"Foo", "Bar"` -- `"{keyword|lower|parens}"` renders to: `"(foo)", "(bar)"` +* `"{keyword|lower}"` renders to `"foo", "bar"` +* `"{keyword|upper}"` renders to: `"FOO", "BAR"` +* `"{keyword|capitalize}"` renders to: `"Foo", "Bar"` +* `"{keyword|lower|parens}"` renders to: `"(foo)", "(bar)"` e.g. if Photo description is "my description": -- `"{descr|titlecase}"` renders to: `"My Description"` +* `"{descr|titlecase}"` renders to: `"My Description"` e.g. If Photo is in `Album1` in `Folder1`: -- `"{folder_album}"` renders to `["Folder1/Album1"]` -- `"{folder_album(>)}"` renders to `["Folder1>Album1"]` -- `"{folder_album()}"` renders to `["Folder1Album1"]` +* `"{folder_album}"` renders to `["Folder1/Album1"]` +* `"{folder_album(>)}"` renders to `["Folder1>Album1"]` +* `"{folder_album()}"` renders to `["Folder1Album1"]` `[find,replace]`: optional text replacement to perform on rendered template value. For example, to replace "/" in an album name, you could use the template `"{album[/,-]}"`. Multiple replacements can be made by appending "|" and adding another find|replace pair. e.g. to replace both "/" and ":" in album name: `"{album[/,-|:,-]}"`. find/replace pairs are not limited to single characters. The "|" character cannot be used in a find/replace pair. `conditional`: optional conditional expression that is evaluated as boolean (True/False) for use with the `?bool_value` modifier. Conditional expressions take the form '`not operator value`' where `not` is an optional modifier that negates the `operator`. Note: the space before the conditional expression is required if you use a conditional expression. Valid comparison operators are: -- `contains`: template field contains value, similar to python's `in` -- `matches`: template field contains exactly value, unlike `contains`: does not match partial matches -- `startswith`: template field starts with value -- `endswith`: template field ends with value -- `<=`: template field is less than or equal to value -- `>=`: template field is greater than or equal to value -- `<`: template field is less than value -- `>`: template field is greater than value -- `==`: template field equals value -- `!=`: template field does not equal value +* `contains`: template field contains value, similar to python's `in` +* `matches`: template field contains exactly value, unlike `contains`: does not match partial matches +* `startswith`: template field starts with value +* `endswith`: template field ends with value +* `<=`: template field is less than or equal to value +* `>=`: template field is greater than or equal to value +* `<`: template field is less than value +* `>`: template field is greater than value +* `==`: template field equals value +* `!=`: template field does not equal value The `value` part of the conditional expression is treated as a bare (unquoted) word/phrase. Multiple values may be separated by '|' (the pipe symbol). `value` is itself a template statement so you can use one or more template fields in `value` which will be resolved before the comparison occurs. For example: -- `{keyword matches Beach}` resolves to True if 'Beach' is a keyword. It would not match keyword 'BeachDay'. -- `{keyword contains Beach}` resolves to True if any keyword contains the word 'Beach' so it would match both 'Beach' and 'BeachDay'. -- `{photo.score.overall > 0.7}` resolves to True if the photo's overall aesthetic score is greater than 0.7. -- `{keyword|lower contains beach}` uses the lower case filter to do case-insensitive matching to match any keyword that contains the word 'beach'. -- `{keyword|lower not contains beach}` uses the `not` modifier to negate the comparison so this resolves to True if there is no keyword that matches 'beach'. +* `{keyword matches Beach}` resolves to True if 'Beach' is a keyword. It would not match keyword 'BeachDay'. +* `{keyword contains Beach}` resolves to True if any keyword contains the word 'Beach' so it would match both 'Beach' and 'BeachDay'. +* `{photo.score.overall > 0.7}` resolves to True if the photo's overall aesthetic score is greater than 0.7. +* `{keyword|lower contains beach}` uses the lower case filter to do case-insensitive matching to match any keyword that contains the word 'beach'. +* `{keyword|lower not contains beach}` uses the `not` modifier to negate the comparison so this resolves to True if there is no keyword that matches 'beach'. Examples: to export photos that contain certain keywords with the `osxphotos export` command's `--directory` option: @@ -2236,24 +2252,24 @@ This renames any photo that is a favorite as 'Favorite-ImageName.jpg' (where 'Im e.g. if photo is an HDR image, -- `"{hdr?ISHDR,NOTHDR}"` renders to `"ISHDR"` +* `"{hdr?ISHDR,NOTHDR}"` renders to `"ISHDR"` and if it is not an HDR image, -- `"{hdr?ISHDR,NOTHDR}"` renders to `"NOTHDR"` +* `"{hdr?ISHDR,NOTHDR}"` renders to `"NOTHDR"` `,default`: optional default value to use if the template name has no value. This modifier is also used for the value if False for boolean-type fields (see above) as well as to hold a sub-template for values like `{created.strftime}`. If no default value provided, "_" is used. e.g., if photo has no title set, -- `"{title}"` renders to "_" -- `"{title,I have no title}"` renders to `"I have no title"` +* `"{title}"` renders to "_" +* `"{title,I have no title}"` renders to `"I have no title"` Template fields such as `created.strftime` use the default value to pass the template to use for `strftime`. e.g., if photo date is 4 February 2020, 19:07:38, -- `"{created.strftime,%Y-%m-%d-%H%M%S}"` renders to `"2020-02-04-190738"` +* `"{created.strftime,%Y-%m-%d-%H%M%S}"` renders to `"2020-02-04-190738"` Some template fields such as `"{media_type}"` use the default value to allow customization of the output. For example, `"{media_type}"` resolves to the special media type of the photo such as `panorama` or `selfie`. You may use the default value to override these in form: `"{media_type,video=vidéo;time_lapse=vidéo_accélérée}"`. In this example, if photo was a time_lapse photo, `media_type` would resolve to `vidéo_accélérée` instead of `time_lapse`. @@ -2305,7 +2321,7 @@ cog.out(get_template_field_table()) |{created.hour}|2-digit hour of the photo creation time| |{created.min}|2-digit minute of the photo creation time| |{created.sec}|2-digit second of the photo creation time| -|{created.strftime}|Apply strftime template to file creation date/time. Should be used in form {created.strftime,TEMPLATE} where TEMPLATE is a valid strftime template, e.g. {created.strftime,%Y-%U} would result in year-week number of year: '2020-23'. If used with no template will return null value. See https://strftime.org/ for help on strftime templates.| +|{created.strftime}|Apply strftime template to file creation date/time. Should be used in form {created.strftime,TEMPLATE} where TEMPLATE is a valid strftime template, e.g. {created.strftime,%Y-%U} would result in year-week number of year: '2020-23'. If used with no template will return null value. See for help on strftime templates.| |{modified}|Photo's modification date in ISO format, e.g. '2020-03-22'; uses creation date if photo is not modified| |{modified.date}|Photo's modification date in ISO format, e.g. '2020-03-22'; uses creation date if photo is not modified| |{modified.year}|4-digit year of photo modification time; uses creation date if photo is not modified| @@ -2319,7 +2335,7 @@ cog.out(get_template_field_table()) |{modified.hour}|2-digit hour of the photo modification time; uses creation date if photo is not modified| |{modified.min}|2-digit minute of the photo modification time; uses creation date if photo is not modified| |{modified.sec}|2-digit second of the photo modification time; uses creation date if photo is not modified| -|{modified.strftime}|Apply strftime template to file modification date/time. Should be used in form {modified.strftime,TEMPLATE} where TEMPLATE is a valid strftime template, e.g. {modified.strftime,%Y-%U} would result in year-week number of year: '2020-23'. If used with no template will return null value. Uses creation date if photo is not modified. See https://strftime.org/ for help on strftime templates.| +|{modified.strftime}|Apply strftime template to file modification date/time. Should be used in form {modified.strftime,TEMPLATE} where TEMPLATE is a valid strftime template, e.g. {modified.strftime,%Y-%U} would result in year-week number of year: '2020-23'. If used with no template will return null value. Uses creation date if photo is not modified. See for help on strftime templates.| |{today}|Current date in iso format, e.g. '2020-03-22'| |{today.date}|Current date in iso format, e.g. '2020-03-22'| |{today.year}|4-digit year of current date| @@ -2333,7 +2349,7 @@ cog.out(get_template_field_table()) |{today.hour}|2-digit hour of the current date| |{today.min}|2-digit minute of the current date| |{today.sec}|2-digit second of the current date| -|{today.strftime}|Apply strftime template to current date/time. Should be used in form {today.strftime,TEMPLATE} where TEMPLATE is a valid strftime template, e.g. {today.strftime,%Y-%U} would result in year-week number of year: '2020-23'. If used with no template will return null value. See https://strftime.org/ for help on strftime templates.| +|{today.strftime}|Apply strftime template to current date/time. Should be used in form {today.strftime,TEMPLATE} where TEMPLATE is a valid strftime template, e.g. {today.strftime,%Y-%U} would result in year-week number of year: '2020-23'. If used with no template will return null value. See for help on strftime templates.| |{place.name}|Place name from the photo's reverse geolocation data, as displayed in Photos| |{place.country_code}|The ISO country code from the photo's reverse geolocation data| |{place.name.country}|Country name from the photo's reverse geolocation data| @@ -2385,17 +2401,17 @@ cog.out(get_template_field_table()) |{label}|Image categorization label associated with a photo (Photos 5+ only). Labels are added automatically by Photos using machine learning algorithms to categorize images. These are not the same as {keyword} which refers to the user-defined keywords/tags applied in Photos.| |{label_normalized}|All lower case version of 'label' (Photos 5+ only)| |{comment}|Comment(s) on shared Photos; format is 'Person name: comment text' (Photos 5+ only)| -|{exiftool}|Format: '{exiftool:GROUP:TAGNAME}'; use exiftool (https://exiftool.org) to extract metadata, in form GROUP:TAGNAME, from image. E.g. '{exiftool:EXIF:Make}' to get camera make, or {exiftool:IPTC:Keywords} to extract keywords. See https://exiftool.org/TagNames/ for list of valid tag names. You must specify group (e.g. EXIF, IPTC, etc) as used in `exiftool -G`. exiftool must be installed in the path to use this template.| +|{exiftool}|Format: '{exiftool:GROUP:TAGNAME}'; use exiftool () to extract metadata, in form GROUP:TAGNAME, from image. E.g. '{exiftool:EXIF:Make}' to get camera make, or {exiftool:IPTC:Keywords} to extract keywords. See for list of valid tag names. You must specify group (e.g. EXIF, IPTC, etc) as used in `exiftool -G`. exiftool must be installed in the path to use this template.| |{searchinfo.holiday}|Holiday names associated with a photo, e.g. 'Christmas Day'; (Photos 5+ only, applied automatically by Photos' image categorization algorithms).| |{searchinfo.activity}|Activities associated with a photo, e.g. 'Sporting Event'; (Photos 5+ only, applied automatically by Photos' image categorization algorithms).| |{searchinfo.venue}|Venues associated with a photo, e.g. name of restaurant; (Photos 5+ only, applied automatically by Photos' image categorization algorithms).| |{searchinfo.venue_type}|Venue types associated with a photo, e.g. 'Restaurant'; (Photos 5+ only, applied automatically by Photos' image categorization algorithms).| -|{photo}|Provides direct access to the PhotoInfo object for the photo. Must be used in format '{photo.property}' where 'property' represents a PhotoInfo property. For example: '{photo.favorite}' is the same as '{favorite}' and '{photo.place.name}' is the same as '{place.name}'. '{photo}' provides access to properties that are not available as separate template fields but it assumes some knowledge of the underlying PhotoInfo class. See https://rhettbull.github.io/osxphotos/ for additional documentation on the PhotoInfo class.| +|{photo}|Provides direct access to the PhotoInfo object for the photo. Must be used in format '{photo.property}' where 'property' represents a PhotoInfo property. For example: '{photo.favorite}' is the same as '{favorite}' and '{photo.place.name}' is the same as '{place.name}'. '{photo}' provides access to properties that are not available as separate template fields but it assumes some knowledge of the underlying PhotoInfo class. See for additional documentation on the PhotoInfo class.| |{detected_text}|List of text strings found in the image after performing text detection. Using '{detected_text}' will cause osxphotos to perform text detection on your photos using the built-in macOS text detection algorithms which will slow down your export. The results for each photo will be cached in the export database so that future exports with '--update' do not need to reprocess each photo. You may pass a confidence threshold value between 0.0 and 1.0 after a colon as in '{detected_text:0.5}'; The default confidence threshold is 0.75. '{detected_text}' works only on macOS Catalina (10.15) or later. Note: this feature is not the same thing as Live Text in macOS Monterey, which osxphotos does not yet support.| |{shell_quote}|Use in form '{shell_quote,TEMPLATE}'; quotes the rendered TEMPLATE value(s) for safe usage in the shell, e.g. My file.jpeg => 'My file.jpeg'; only adds quotes if needed.| |{strip}|Use in form '{strip,TEMPLATE}'; strips whitespace from begining and end of rendered TEMPLATE value(s).| |{format}|Use in form, '{format:TYPE:FORMAT,TEMPLATE}'; converts TEMPLATE value to TYPE then formats the value using Python string formatting codes specified by FORMAT; TYPE is one of: 'int', 'float', or 'str'. For example, '{format:float:.1f,{exiftool:EXIF:FocalLength}}' will format focal length to 1 decimal place (e.g. '100.0'). | -|{function}|Execute a python function from an external file and use return value as template substitution. Use in format: {function:file.py::function_name} where 'file.py' is the name of the python file and 'function_name' is the name of the function to call. The function will be passed the PhotoInfo object for the photo. See https://github.com/RhetTbull/osxphotos/blob/master/examples/template_function.py for an example of how to implement a template function.| +|{function}|Execute a python function from an external file and use return value as template substitution. Use in format: {function:file.py::function_name} where 'file.py' is the name of the python file and 'function_name' is the name of the function to call. The function will be passed the PhotoInfo object for the photo. See for an example of how to implement a template function.| ### ExifTool diff --git a/osxphotos/_constants.py b/osxphotos/_constants.py index 541a12cf..d96f8eda 100644 --- a/osxphotos/_constants.py +++ b/osxphotos/_constants.py @@ -2,10 +2,14 @@ from __future__ import annotations +import logging import os.path +import sqlite3 from datetime import datetime from enum import Enum +logger: logging.Logger = logging.getLogger("osxphotos") + APP_NAME = "osxphotos" OSXPHOTOS_URL = "https://github.com/RhetTbull/osxphotos" @@ -464,3 +468,12 @@ PROFILE_SORT_KEYS = [ UUID_PATTERN = ( r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}" ) +# Reference: https://docs.python.org/3/library/sqlite3.html?highlight=sqlite3%20threadsafety#sqlite3.threadsafety +# and https://docs.python.org/3/library/sqlite3.html?highlight=sqlite3%20threadsafety#sqlite3.connect +# 3: serialized mode; Threads may share the module, connections and cursors +# 3 is the default in the python.org python 3.11 distribution +# earlier versions of python.org python 3.x default to 1 which means threads may not share +# sqlite3 connections and thus PhotoInfo.export() cannot be used in a multithreaded environment +# pass SQLITE_CHECK_SAME_THREAD to sqlite3.connect() to enable multithreaded access on systems that support it +SQLITE_CHECK_SAME_THREAD = not sqlite3.threadsafety == 3 +logger.debug(f"{SQLITE_CHECK_SAME_THREAD=}, {sqlite3.threadsafety=}") diff --git a/osxphotos/albuminfo.py b/osxphotos/albuminfo.py index 0666ecc9..db4f457e 100644 --- a/osxphotos/albuminfo.py +++ b/osxphotos/albuminfo.py @@ -18,6 +18,7 @@ from ._constants import ( _PHOTOS_4_VERSION, _PHOTOS_5_ALBUM_KIND, _PHOTOS_5_FOLDER_KIND, + _PHOTOS_5_VERSION, TIME_DELTA, AlbumSortOrder, ) @@ -61,7 +62,7 @@ class AlbumInfoBaseClass: including folders, photos, etc. """ - def __init__(self, db=None, uuid=None): + def __init__(self, db, uuid): self._uuid = uuid self._db = db self._title = self._db._dbalbum_details[uuid]["title"] @@ -121,7 +122,8 @@ class AlbumInfoBaseClass: @property def end_date(self): """For Albums, return end date (most recent image) of album or None for albums with no images - For Import Sessions, return end date of import sessions (when import was completed)""" + For Import Sessions, return end date of import sessions (when import was completed) + """ try: return self._end_date except AttributeError: @@ -163,6 +165,17 @@ class AlbumInfoBaseClass: self._owner = None return self._owner + def asdict(self): + """Return album info as a dict""" + return { + "uuid": self.uuid, + "creation_date": self.creation_date, + "start_date": self.start_date, + "end_date": self.end_date, + "owner": self.owner, + "photos": [p.uuid for p in self.photos], + } + def __len__(self): """return number of photos contained in album""" return len(self.photos) @@ -174,6 +187,10 @@ class AlbumInfo(AlbumInfoBaseClass): including folders, photos, etc. """ + def __init__(self, db, uuid): + super().__init__(db=db, uuid=uuid) + self._title = self._db._dbalbum_details[uuid]["title"] + @property def title(self): """return title / name of album""" @@ -205,10 +222,11 @@ class AlbumInfo(AlbumInfoBaseClass): @property def folder_names(self): - """return hierarchical list of folders the album is contained in + """Return hierarchical list of folders the album is contained in the folder list is in form: ["Top level folder", "sub folder 1", "sub folder 2", ...] - returns empty list if album is not in any folders""" + or empty list if album is not in any folders + """ try: return self._folder_names @@ -218,10 +236,9 @@ class AlbumInfo(AlbumInfoBaseClass): @property def folder_list(self): - """return hierarchical list of folders the album is contained in - as list of FolderInfo objects in form - ["Top level folder", "sub folder 1", "sub folder 2", ...] - returns empty list if album is not in any folders""" + """Returns list of FolderInfo objects for each folder the album is contained in + or empty list if album is not in any folders + """ try: return self._folders @@ -246,7 +263,7 @@ class AlbumInfo(AlbumInfoBaseClass): parent_pk = self._db._dbalbum_details[self._uuid]["parentfolder"] self._parent = ( FolderInfo(db=self._db, uuid=self._db._dbalbums_pk[parent_pk]) - if parent_pk != self._db._folder_root_pk + if parent_pk is not None and parent_pk != self._db._folder_root_pk else None ) return self._parent @@ -281,27 +298,80 @@ class AlbumInfo(AlbumInfoBaseClass): f"Photo with uuid {photo.uuid} does not appear to be in this album" ) + def asdict(self): + """Return album info as a dict""" + dict_data = super().asdict() + dict_data["title"] = self.title + dict_data["folder_names"] = self.folder_names + dict_data["folder_list"] = [f.uuid for f in self.folder_list] + dict_data["sort_order"] = self.sort_order + dict_data["parent"] = self.parent.uuid if self.parent else None + return dict_data + class ImportInfo(AlbumInfoBaseClass): """Information about import sessions""" + def __init__(self, db, uuid): + self._uuid = uuid + self._db = db + + if self._db._db_version >= _PHOTOS_5_VERSION: + return super().__init__(db=db, uuid=uuid) + + import_session = self._db._db_import_group[self._uuid] + try: + self._creation_date_timestamp = import_session[3] + except (ValueError, TypeError, KeyError): + self._creation_date_timestamp = datetime(1970, 1, 1) + self._start_date_timestamp = self._creation_date_timestamp + self._end_date_timestamp = self._creation_date_timestamp + self._title = import_session[2] + self._local_tz = get_local_tz( + datetime.fromtimestamp(self._creation_date_timestamp + TIME_DELTA) + ) + + @property + def title(self): + """return title / name of import session""" + return self._title + @property def photos(self): """return list of photos contained in import session""" try: return self._photos except AttributeError: - uuid_list, sort_order = zip( - *[ - (uuid, self._db._dbphotos[uuid]["fok_import_session"]) - for uuid in self._db._dbphotos - if self._db._dbphotos[uuid]["import_uuid"] == self.uuid + if self._db._db_version >= _PHOTOS_5_VERSION: + uuid_list, sort_order = zip( + *[ + (uuid, self._db._dbphotos[uuid]["fok_import_session"]) + for uuid in self._db._dbphotos + if self._db._dbphotos[uuid]["import_uuid"] == self.uuid + ] + ) + sorted_uuid = sort_list_by_keys(uuid_list, sort_order) + self._photos = self._db.photos_by_uuid(sorted_uuid) + else: + import_photo_uuids = [ + u + for u in self._db._dbphotos + if self._db._dbphotos[u]["import_uuid"] == self.uuid ] - ) - sorted_uuid = sort_list_by_keys(uuid_list, sort_order) - self._photos = self._db.photos_by_uuid(sorted_uuid) + self._photos = self._db.photos_by_uuid(import_photo_uuids) return self._photos + def asdict(self): + """Return import info as a dict""" + return { + "uuid": self.uuid, + "creation_date": self.creation_date, + "start_date": self.start_date, + "end_date": self.end_date, + "title": self.title, + "photos": [p.uuid for p in self.photos], + } + def __bool__(self): """Always returns True A photo without an import session will return None for import_info, @@ -309,6 +379,7 @@ class ImportInfo(AlbumInfoBaseClass): """ return True + class ProjectInfo(AlbumInfo): """ ProjectInfo with info about projects @@ -386,7 +457,7 @@ class FolderInfo: parent_pk = self._db._dbalbum_details[self._uuid]["parentfolder"] self._parent = ( FolderInfo(db=self._db, uuid=self._db._dbalbums_pk[parent_pk]) - if parent_pk != self._db._folder_root_pk + if parent_pk is not None and parent_pk != self._db._folder_root_pk else None ) return self._parent @@ -416,6 +487,16 @@ class FolderInfo: self._folders = folders return self._folders + def asdict(self): + """Return folder info as a dict""" + return { + "title": self.title, + "uuid": self.uuid, + "parent": self.parent.uuid if self.parent is not None else None, + "subfolders": [f.uuid for f in self.subfolders], + "albums": [a.uuid for a in self.album_info], + } + def __len__(self): """returns count of folders + albums contained in the folder""" return len(self.subfolders) + len(self.album_info) diff --git a/osxphotos/cli/import_cli.py b/osxphotos/cli/import_cli.py index 69c74a21..15029e3c 100644 --- a/osxphotos/cli/import_cli.py +++ b/osxphotos/cli/import_cli.py @@ -25,7 +25,7 @@ from rich.console import Console from rich.markdown import Markdown from strpdatetime import strpdatetime -from osxphotos._constants import _OSXPHOTOS_NONE_SENTINEL +from osxphotos._constants import _OSXPHOTOS_NONE_SENTINEL, SQLITE_CHECK_SAME_THREAD from osxphotos._version import __version__ from osxphotos.cli.cli_params import TIMESTAMP_OPTION, VERBOSE_OPTION from osxphotos.cli.common import get_data_dir @@ -77,7 +77,8 @@ def echo(message, emoji=True, **kwargs): class PhotoInfoFromFile: """Mock PhotoInfo class for a file to be imported - Returns None for most attributes but allows some templates like exiftool and created to work correctly""" + Returns None for most attributes but allows some templates like exiftool and created to work correctly + """ def __init__(self, filepath: Union[str, Path], exiftool: Optional[str] = None): self._path = str(filepath) @@ -745,7 +746,7 @@ def write_sqlite_report( file_exists = os.path.isfile(report_file) - conn = sqlite3.connect(report_file) + conn = sqlite3.connect(report_file, check_same_thread=SQLITE_CHECK_SAME_THREAD) c = conn.cursor() if not append or not file_exists: diff --git a/osxphotos/cli/report_writer.py b/osxphotos/cli/report_writer.py index 966e773c..6d51d193 100644 --- a/osxphotos/cli/report_writer.py +++ b/osxphotos/cli/report_writer.py @@ -12,6 +12,7 @@ from abc import ABC, abstractmethod from contextlib import suppress from typing import Dict, Union +from osxphotos._constants import SQLITE_CHECK_SAME_THREAD from osxphotos.export_db import OSXPHOTOS_ABOUT_STRING from osxphotos.photoexporter import ExportResults from osxphotos.sqlite_utils import sqlite_columns @@ -181,7 +182,7 @@ class ExportReportWriterSQLite(ReportWriterABC): with suppress(FileNotFoundError): os.unlink(self.output_file) - self._conn = sqlite3.connect(self.output_file) + self._conn = sqlite3.connect(self.output_file, check_same_thread=SQLITE_CHECK_SAME_THREAD) self._create_tables() self.report_id = self._generate_report_id() @@ -533,7 +534,7 @@ class SyncReportWriterSQLite(ReportWriterABC): with suppress(FileNotFoundError): os.unlink(self.output_file) - self._conn = sqlite3.connect(self.output_file) + self._conn = sqlite3.connect(self.output_file, check_same_thread=SQLITE_CHECK_SAME_THREAD) self._create_tables() self.report_id = self._generate_report_id() diff --git a/osxphotos/export_db.py b/osxphotos/export_db.py index 023940b2..bf3a8cb2 100644 --- a/osxphotos/export_db.py +++ b/osxphotos/export_db.py @@ -5,6 +5,7 @@ from __future__ import annotations import datetime import gzip import json +import logging import os import os.path import pathlib @@ -12,17 +13,18 @@ import pickle import re import sqlite3 import sys +import threading import time from contextlib import suppress from io import StringIO from sqlite3 import Error -from tempfile import TemporaryDirectory -from typing import Any, List, Optional, Tuple, Union -import logging +from typing import Any from tenacity import retry, retry_if_not_exception_type, stop_after_attempt -from ._constants import OSXPHOTOS_EXPORT_DB +import osxphotos + +from ._constants import OSXPHOTOS_EXPORT_DB, SQLITE_CHECK_SAME_THREAD from ._version import __version__ from .fileutil import FileUtil from .utils import normalize_fs_path @@ -83,7 +85,7 @@ def unzip_and_unpickle(data: bytes) -> Any: class ExportDB: """Interface to sqlite3 database used to store state information for osxphotos export command""" - def __init__(self, dbfile, export_dir): + def __init__(self, dbfile: pathlib.Path | str, export_dir: pathlib.Path | str): """create a new ExportDB object Args: @@ -92,50 +94,60 @@ class ExportDB: memory: if True, use in-memory database """ - self._dbfile = dbfile + self._dbfile: str = str(dbfile) # export_dir is required as all files referenced by get_/set_uuid_for_file will be converted to # relative paths to this path # this allows the entire export tree to be moved to a new disk/location # whilst preserving the UUID to filename mapping - self._path = export_dir - self._conn = self._open_export_db(dbfile) - self._perform_db_maintenace(self._conn) + self._path: str = str(export_dir) + self.was_upgraded: tuple[str, str] | tuple = () + self.was_created = False + + self.lock = threading.Lock() + + self._conn = self._open_export_db(self._dbfile) + self._perform_db_maintenance(self._conn) self._insert_run_info() @property - def path(self): + def path(self) -> str: """returns path to export database""" return self._dbfile @property - def export_dir(self): + def export_dir(self) -> str: """returns path to export directory""" return self._path + @property + def connection(self) -> sqlite3.Connection: + """returns sqlite3 connection""" + return self._conn or self._get_db_connection(self._dbfile) + @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) - def get_file_record(self, filename: Union[pathlib.Path, str]) -> "ExportRecord": + def get_file_record(self, filename: pathlib.Path | str) -> "ExportRecord" | None: """get info for filename Returns: an ExportRecord object or None if filename not found """ filename = self._relative_filepath(filename) filename_normalized = self._normalize_filepath(filename) - conn = self._conn - c = conn.cursor() - if _ := c.execute( - "SELECT uuid FROM export_data WHERE filepath_normalized = ?;", - (filename_normalized,), - ).fetchone(): - return ExportRecord(conn, filename_normalized) - return None + with self.lock: + conn = self.connection + c = conn.cursor() + result = c.execute( + "SELECT uuid FROM export_data WHERE filepath_normalized = ?;", + (filename_normalized,), + ).fetchone() + return ExportRecord(conn, self.lock, filename_normalized) if result else None @retry( stop=stop_after_attempt(MAX_RETRY_ATTEMPTS), retry=retry_if_not_exception_type(sqlite3.IntegrityError), ) def create_file_record( - self, filename: Union[pathlib.Path, str], uuid: str + self, filename: pathlib.Path | str, uuid: str ) -> "ExportRecord": """create a new record for filename and uuid @@ -143,21 +155,23 @@ class ExportDB: """ filename = self._relative_filepath(filename) filename_normalized = self._normalize_filepath(filename) - conn = self._conn - c = conn.cursor() - c.execute( - "INSERT INTO export_data (filepath, filepath_normalized, uuid) VALUES (?, ?, ?);", - (filename, filename_normalized, uuid), - ) - conn.commit() - return ExportRecord(conn, filename_normalized) + + with self.lock: + conn = self.connection + c = conn.cursor() + c.execute( + "INSERT INTO export_data (filepath, filepath_normalized, uuid) VALUES (?, ?, ?);", + (filename, filename_normalized, uuid), + ) + conn.commit() + return ExportRecord(conn, self.lock, filename_normalized) @retry( stop=stop_after_attempt(MAX_RETRY_ATTEMPTS), retry=retry_if_not_exception_type(sqlite3.IntegrityError), ) def create_or_get_file_record( - self, filename: Union[pathlib.Path, str], uuid: str + self, filename: pathlib.Path | str, uuid: str ) -> "ExportRecord": """create a new record for filename and uuid or return existing record @@ -165,147 +179,159 @@ class ExportDB: """ filename = self._relative_filepath(filename) filename_normalized = self._normalize_filepath(filename) - conn = self._conn - c = conn.cursor() - c.execute( - "INSERT OR IGNORE INTO export_data (filepath, filepath_normalized, uuid) VALUES (?, ?, ?);", - (filename, filename_normalized, uuid), - ) - conn.commit() - return ExportRecord(conn, filename_normalized) + + with self.lock: + conn = self.connection + c = conn.cursor() + c.execute( + "INSERT OR IGNORE INTO export_data (filepath, filepath_normalized, uuid) VALUES (?, ?, ?);", + (filename, filename_normalized, uuid), + ) + conn.commit() + return ExportRecord(conn, self.lock, filename_normalized) @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) - def get_uuid_for_file(self, filename): + def get_uuid_for_file(self, filename: str) -> str | None: """query database for filename and return UUID returns None if filename not found in database """ filepath_normalized = self._normalize_filepath_relative(filename) - conn = self._conn - c = conn.cursor() - c.execute( - "SELECT uuid FROM export_data WHERE filepath_normalized = ?", - (filepath_normalized,), - ) - results = c.fetchone() - return results[0] if results else None + + with self.lock: + conn = self.connection + c = conn.cursor() + c.execute( + "SELECT uuid FROM export_data WHERE filepath_normalized = ?", + (filepath_normalized,), + ) + results = c.fetchone() + return results[0] if results else None @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) - def get_files_for_uuid(self, uuid: str) -> List: + def get_files_for_uuid(self, uuid: str) -> list[str]: """query database for UUID and return list of files associated with UUID or empty list""" - conn = self._conn - c = conn.cursor() - c.execute( - "SELECT filepath FROM export_data WHERE uuid = ?", - (uuid,), - ) - results = c.fetchall() - return [os.path.join(self.export_dir, r[0]) for r in results] + with self.lock: + conn = self.connection + c = conn.cursor() + c.execute( + "SELECT filepath FROM export_data WHERE uuid = ?", + (uuid,), + ) + results = c.fetchall() + return [os.path.join(self.export_dir, r[0]) for r in results] @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) - def get_photoinfo_for_uuid(self, uuid): - """returns the photoinfo JSON struct for a UUID""" - conn = self._conn - c = conn.cursor() - c.execute("SELECT photoinfo FROM photoinfo WHERE uuid = ?", (uuid,)) - results = c.fetchone() - return results[0] if results else None + def get_photoinfo_for_uuid(self, uuid: str) -> str | None: + """returns the photoinfo JSON string for a UUID or None if not found""" + with self.lock: + conn = self.connection + c = conn.cursor() + c.execute("SELECT photoinfo FROM photoinfo WHERE uuid = ?", (uuid,)) + results = c.fetchone() + return results[0] if results else None @retry( stop=stop_after_attempt(MAX_RETRY_ATTEMPTS), retry=retry_if_not_exception_type(sqlite3.IntegrityError), ) - def set_photoinfo_for_uuid(self, uuid, info): - """sets the photoinfo JSON struct for a UUID""" - conn = self._conn - c = conn.cursor() - c.execute( - "INSERT OR REPLACE INTO photoinfo(uuid, photoinfo) VALUES (?, ?);", - (uuid, info), - ) - conn.commit() + def set_photoinfo_for_uuid(self, uuid: str, info: str): + """sets the photoinfo JSON string for a UUID""" + with self.lock: + conn = self.connection + c = conn.cursor() + c.execute( + "INSERT OR REPLACE INTO photoinfo(uuid, photoinfo) VALUES (?, ?);", + (uuid, info), + ) + conn.commit() @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) def get_target_for_file( - self, uuid: str, filename: Union[str, pathlib.Path] - ) -> Optional[str]: + self, uuid: str, filename: pathlib.Path | str + ) -> str | None: """query database for file matching file name and return the matching filename if there is one; otherwise return None; looks for file.ext, file (1).ext, file (2).ext and so on to find the actual target name that was used to export filename Returns: the matching filename or None if no match found """ - conn = self._conn - c = conn.cursor() - filepath_normalized = self._normalize_filepath_relative(filename) - filepath_stem = os.path.splitext(filepath_normalized)[0] - c.execute( - "SELECT uuid, filepath, filepath_normalized FROM export_data WHERE uuid = ? AND filepath_normalized LIKE ?", - ( - uuid, - f"{filepath_stem}%", - ), - ) - results = c.fetchall() + with self.lock: + conn = self.connection + c = conn.cursor() + filepath_normalized = self._normalize_filepath_relative(filename) + filepath_stem = os.path.splitext(filepath_normalized)[0] + c.execute( + "SELECT uuid, filepath, filepath_normalized FROM export_data WHERE uuid = ? AND filepath_normalized LIKE ?", + ( + uuid, + f"{filepath_stem}%", + ), + ) + results = c.fetchall() - for result in results: - filepath_normalized = os.path.splitext(result[2])[0] - if re.match( - re.escape(filepath_stem) + r"(\s\(\d+\))?$", filepath_normalized - ): - return os.path.join(self.export_dir, result[1]) + for result in results: + filepath_normalized = os.path.splitext(result[2])[0] + if re.match( + re.escape(filepath_stem) + r"(\s\(\d+\))?$", filepath_normalized + ): + return os.path.join(self.export_dir, result[1]) - return None + return None @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) def get_previous_uuids(self): """returns list of UUIDs of previously exported photos found in export database""" - conn = self._conn - previous_uuids = [] - c = conn.cursor() - c.execute("SELECT DISTINCT uuid FROM export_data") - results = c.fetchall() - return [row[0] for row in results] + with self.lock: + conn = self.connection + c = conn.cursor() + c.execute("SELECT DISTINCT uuid FROM export_data") + results = c.fetchall() + return [row[0] for row in results] @retry( stop=stop_after_attempt(MAX_RETRY_ATTEMPTS), retry=retry_if_not_exception_type(sqlite3.IntegrityError), ) - def set_config(self, config_data): + def set_config(self, config_data: str): """set config in the database""" - conn = self._conn - dt = datetime.datetime.now().isoformat() - c = conn.cursor() - c.execute( - "INSERT OR REPLACE INTO config(datetime, config) VALUES (?, ?);", - (dt, config_data), - ) - conn.commit() + with self.lock: + conn = self.connection + dt = datetime.datetime.now().isoformat() + c = conn.cursor() + c.execute( + "INSERT OR REPLACE INTO config(datetime, config) VALUES (?, ?);", + (dt, config_data), + ) + conn.commit() @retry( stop=stop_after_attempt(MAX_RETRY_ATTEMPTS), retry=retry_if_not_exception_type(sqlite3.IntegrityError), ) - def set_export_results(self, results): + def set_export_results(self, results: "osxphotos.photoexporter.ExportResults"): """Store export results in database; data is pickled and gzipped for storage""" results_data = pickle_and_zip(results) - conn = self._conn - dt = datetime.datetime.now().isoformat() - c = conn.cursor() - c.execute( - """ - UPDATE export_results_data - SET datetime = ?, - export_results = ? - WHERE datetime = (SELECT MIN(datetime) FROM export_results_data); - """, - (dt, results_data), - ) - conn.commit() + with self.lock: + conn = self.connection + dt = datetime.datetime.now().isoformat() + c = conn.cursor() + c.execute( + """ + UPDATE export_results_data + SET datetime = ?, + export_results = ? + WHERE datetime = (SELECT MIN(datetime) FROM export_results_data); + """, + (dt, results_data), + ) + conn.commit() @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) - def get_export_results(self, run: int = 0): + def get_export_results( + self, run: int = 0 + ) -> "osxphotos.photoexporter.ExportResults" | None: """Retrieve export results from database Args: @@ -319,69 +345,72 @@ class ExportDB: raise ValueError("run must be 0 or negative") run = -run - conn = self._conn - c = conn.cursor() - c.execute( - """ - SELECT export_results - FROM export_results_data - ORDER BY datetime DESC - """, - ) - rows = c.fetchall() - try: - data = rows[run][0] - results = unzip_and_unpickle(data) if data else None - except IndexError: - results = None - return results + with self.lock: + conn = self.connection + c = conn.cursor() + c.execute( + """ + SELECT export_results + FROM export_results_data + ORDER BY datetime DESC + """, + ) + rows = c.fetchall() + try: + data = rows[run][0] + results = unzip_and_unpickle(data) if data else None + except IndexError: + results = None + return results @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) def get_exported_files(self): """Returns tuple of (uuid, filepath) for all paths of all exported files tracked in the database""" - conn = self._conn - c = conn.cursor() - c.execute("SELECT uuid, filepath FROM export_data") + with self.lock: + conn = self.connection + c = conn.cursor() + c.execute("SELECT uuid, filepath FROM export_data") - while row := c.fetchone(): - yield row[0], os.path.join(self.export_dir, row[1]) - return + while row := c.fetchone(): + yield row[0], os.path.join(self.export_dir, row[1]) + return @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) - def delete_data_for_uuid(self, uuid): + def delete_data_for_uuid(self, uuid: str): """Delete all exportdb data for given UUID""" - conn = self._conn - c = conn.cursor() - count = 0 - c.execute("DELETE FROM export_data WHERE uuid = ?;", (uuid,)) - count += c.execute("SELECT CHANGES();").fetchone()[0] - c.execute("DELETE FROM photoinfo WHERE uuid = ?;", (uuid,)) - count += c.execute("SELECT CHANGES();").fetchone()[0] - conn.commit() - return count + with self.lock: + conn = self.connection + c = conn.cursor() + count = 0 + c.execute("DELETE FROM export_data WHERE uuid = ?;", (uuid,)) + count += c.execute("SELECT CHANGES();").fetchone()[0] + c.execute("DELETE FROM photoinfo WHERE uuid = ?;", (uuid,)) + count += c.execute("SELECT CHANGES();").fetchone()[0] + conn.commit() + return count @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) - def delete_data_for_filepath(self, filepath): + def delete_data_for_filepath(self, filepath: pathlib.Path | str): """Delete all exportdb data for given filepath""" - conn = self._conn - c = conn.cursor() - filepath_normalized = self._normalize_filepath_relative(filepath) - results = c.execute( - "SELECT uuid FROM export_data WHERE filepath_normalized = ?;", - (filepath_normalized,), - ).fetchall() - count = 0 - for row in results: - count += self.delete_data_for_uuid(row[0]) - return count + with self.lock: + conn = self.connection + c = conn.cursor() + filepath_normalized = self._normalize_filepath_relative(filepath) + results = c.execute( + "SELECT uuid FROM export_data WHERE filepath_normalized = ?;", + (filepath_normalized,), + ).fetchall() + return sum(self.delete_data_for_uuid(row[0]) for row in results) @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) def close(self): """close the database connection""" - self._conn.close() + if self._conn: + self._conn.close() + self._conn = None @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) - def _open_export_db(self, dbfile): + def _open_export_db(self, dbfile: str) -> sqlite3.Connection: """open export database and return a db connection if dbfile does not exist, will create and initialize the database if dbfile needs to be upgraded, will perform needed migrations @@ -405,28 +434,30 @@ class ExportDB: self.version = OSXPHOTOS_EXPORTDB_VERSION # turn on performance optimizations - c = conn.cursor() - c.execute("PRAGMA journal_mode=WAL;") - c.execute("PRAGMA synchronous=NORMAL;") - c.execute("PRAGMA cache_size=-100000;") - c.execute("PRAGMA temp_store=MEMORY;") + with self.lock: + c = conn.cursor() + c.execute("PRAGMA journal_mode=WAL;") + c.execute("PRAGMA synchronous=NORMAL;") + c.execute("PRAGMA cache_size=-100000;") + c.execute("PRAGMA temp_store=MEMORY;") return conn @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) - def _get_db_connection(self, dbfile): + def _get_db_connection(self, dbfile: str) -> sqlite3.Connection: """return db connection to dbname""" - return sqlite3.connect(dbfile) + return sqlite3.connect(dbfile, check_same_thread=SQLITE_CHECK_SAME_THREAD) @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) - def _get_database_version(self, conn): + def _get_database_version(self, conn: sqlite3.Connection) -> tuple[str, str]: """return tuple of (osxphotos, exportdb) versions for database connection conn""" - version_info = conn.execute( - "SELECT osxphotos, exportdb, max(id) FROM version" - ).fetchone() - return (version_info[0], version_info[1]) + with self.lock: + version_info = conn.execute( + "SELECT osxphotos, exportdb, max(id) FROM version" + ).fetchone() + return (version_info[0], version_info[1]) - def _create_or_migrate_db_tables(self, conn): + def _create_or_migrate_db_tables(self, conn: sqlite3.Connection): """create (if not already created) the necessary db tables for the export database and apply any needed migrations Args: @@ -514,15 +545,16 @@ class ExportDB: """ CREATE UNIQUE INDEX IF NOT EXISTS idx_detected_text on detected_text (uuid);""", ] # create the tables if needed - c = conn.cursor() - for cmd in sql_commands: - c.execute(cmd) - c.execute( - "INSERT INTO version(osxphotos, exportdb) VALUES (?, ?);", - (__version__, OSXPHOTOS_EXPORTDB_VERSION), - ) - c.execute("INSERT INTO about(about) VALUES (?);", (OSXPHOTOS_ABOUT_STRING,)) - conn.commit() + with self.lock: + c = conn.cursor() + for cmd in sql_commands: + c.execute(cmd) + c.execute( + "INSERT INTO version(osxphotos, exportdb) VALUES (?, ?);", + (__version__, OSXPHOTOS_EXPORTDB_VERSION), + ) + c.execute("INSERT INTO about(about) VALUES (?);", (OSXPHOTOS_ABOUT_STRING,)) + conn.commit() # perform needed migrations if version[1] < "4.3": @@ -547,8 +579,9 @@ class ExportDB: # add error to export_data self._migrate_7_1_to_8_0(conn) - conn.execute("VACUUM;") - conn.commit() + with self.lock: + conn.execute("VACUUM;") + conn.commit() def __del__(self): """ensure the database connection is closed""" @@ -562,32 +595,31 @@ class ExportDB: cmd = sys.argv[0] args = " ".join(sys.argv[1:]) if len(sys.argv) > 1 else "" cwd = os.getcwd() - conn = self._conn - c = conn.cursor() - c.execute( - "INSERT INTO runs (datetime, python_path, script_name, args, cwd) VALUES (?, ?, ?, ?, ?)", - (dt, python_path, cmd, args, cwd), - ) + with self.lock: + conn = self.connection + c = conn.cursor() + c.execute( + "INSERT INTO runs (datetime, python_path, script_name, args, cwd) VALUES (?, ?, ?, ?, ?)", + (dt, python_path, cmd, args, cwd), + ) + conn.commit() - conn.commit() - - def _relative_filepath(self, filepath: Union[str, pathlib.Path]) -> str: + def _relative_filepath(self, filepath: pathlib.Path | str) -> str: """return filepath relative to self._path""" return str(pathlib.Path(filepath).relative_to(self._path)) - def _normalize_filepath(self, filepath: Union[str, pathlib.Path]) -> str: + def _normalize_filepath(self, filepath: pathlib.Path | str) -> str: """normalize filepath for unicode, lower case""" return normalize_fs_path(str(filepath)).lower() - def _normalize_filepath_relative(self, filepath: Union[str, pathlib.Path]) -> str: + def _normalize_filepath_relative(self, filepath: pathlib.Path | str) -> str: """normalize filepath for unicode, relative path (to export dir), lower case""" filepath = self._relative_filepath(filepath) return normalize_fs_path(str(filepath)).lower() - def _migrate_normalized_filepath(self, conn): + def _migrate_normalized_filepath(self, conn: sqlite3.Connection): """Fix all filepath_normalized columns for unicode normalization""" # Prior to database version 4.3, filepath_normalized was not normalized for unicode - c = conn.cursor() migration_sql = [ """ CREATE TABLE IF NOT EXISTS files_migrate ( id INTEGER PRIMARY KEY, @@ -606,205 +638,213 @@ class ExportDB: """ DROP TABLE files;""", """ ALTER TABLE files_migrate RENAME TO files;""", ] - for sql in migration_sql: - c.execute(sql) - conn.commit() - for table in ["converted", "edited", "exifdata", "files", "sidecar"]: - old_values = c.execute( - f"SELECT filepath_normalized, id FROM {table}" - ).fetchall() - new_values = [ - (self._normalize_filepath(filepath_normalized), id_) - for filepath_normalized, id_ in old_values - ] - c.executemany( - f"UPDATE {table} SET filepath_normalized=? WHERE id=?", new_values - ) - conn.commit() + with self.lock: + c = conn.cursor() + for sql in migration_sql: + c.execute(sql) + conn.commit() - def _migrate_4_3_to_5_0(self, conn): + for table in ["converted", "edited", "exifdata", "files", "sidecar"]: + old_values = c.execute( + f"SELECT filepath_normalized, id FROM {table}" + ).fetchall() + new_values = [ + (self._normalize_filepath(filepath_normalized), id_) + for filepath_normalized, id_ in old_values + ] + c.executemany( + f"UPDATE {table} SET filepath_normalized=? WHERE id=?", new_values + ) + conn.commit() + + def _migrate_4_3_to_5_0(self, conn: sqlite3.Connection): """Migrate database from version 4.3 to 5.0""" - c = conn.cursor() - # add metadata column to files to support --force-update - c.execute("ALTER TABLE files ADD COLUMN metadata TEXT;") - conn.commit() + with self.lock: + c = conn.cursor() + # add metadata column to files to support --force-update + c.execute("ALTER TABLE files ADD COLUMN metadata TEXT;") + conn.commit() - def _migrate_5_0_to_6_0(self, conn): - c = conn.cursor() - - # add export_data table - c.execute( - """ CREATE TABLE IF NOT EXISTS export_data( - id INTEGER PRIMARY KEY, - filepath_normalized TEXT NOT NULL, - filepath TEXT NOT NULL, - uuid TEXT NOT NULL, - src_mode INTEGER, - src_size INTEGER, - src_mtime REAL, - dest_mode INTEGER, - dest_size INTEGER, - dest_mtime REAL, - digest TEXT, - exifdata JSON, - export_options INTEGER, - UNIQUE(filepath_normalized) - ); """, - ) - c.execute( - """ CREATE UNIQUE INDEX IF NOT EXISTS idx_export_data_filepath_normalized on export_data (filepath_normalized); """, - ) - - # migrate data - c.execute( - """ INSERT INTO export_data (filepath_normalized, filepath, uuid) SELECT filepath_normalized, filepath, uuid FROM files;""", - ) - c.execute( - """ UPDATE export_data - SET (src_mode, src_size, src_mtime) = - (SELECT mode, size, mtime - FROM edited - WHERE export_data.filepath_normalized = edited.filepath_normalized); - """, - ) - c.execute( - """ UPDATE export_data - SET (dest_mode, dest_size, dest_mtime) = - (SELECT orig_mode, orig_size, orig_mtime - FROM files - WHERE export_data.filepath_normalized = files.filepath_normalized); - """, - ) - c.execute( - """ UPDATE export_data SET digest = - (SELECT metadata FROM files - WHERE files.filepath_normalized = export_data.filepath_normalized - ); """ - ) - c.execute( - """ UPDATE export_data SET exifdata = - (SELECT json_exifdata FROM exifdata - WHERE exifdata.filepath_normalized = export_data.filepath_normalized - ); """ - ) - - # create config table - c.execute( - """ CREATE TABLE IF NOT EXISTS config ( - id INTEGER PRIMARY KEY, - datetime TEXT, - config TEXT - ); """ - ) - - # create photoinfo table - c.execute( - """ CREATE TABLE IF NOT EXISTS photoinfo ( - id INTEGER PRIMARY KEY, - uuid TEXT NOT NULL, - photoinfo JSON, - UNIQUE(uuid) - ); """ - ) - c.execute( - """CREATE UNIQUE INDEX IF NOT EXISTS idx_photoinfo_uuid on photoinfo (uuid);""" - ) - c.execute( - """ INSERT INTO photoinfo (uuid, photoinfo) SELECT uuid, json_info FROM info;""" - ) - - # drop indexes no longer needed - c.execute("DROP INDEX IF EXISTS idx_files_filepath_normalized;") - c.execute("DROP INDEX IF EXISTS idx_exifdata_filename;") - c.execute("DROP INDEX IF EXISTS idx_edited_filename;") - c.execute("DROP INDEX IF EXISTS idx_converted_filename;") - c.execute("DROP INDEX IF EXISTS idx_sidecar_filename;") - c.execute("DROP INDEX IF EXISTS idx_detected_text;") - - # drop tables no longer needed - c.execute("DROP TABLE IF EXISTS files;") - c.execute("DROP TABLE IF EXISTS info;") - c.execute("DROP TABLE IF EXISTS exifdata;") - c.execute("DROP TABLE IF EXISTS edited;") - c.execute("DROP TABLE IF EXISTS converted;") - c.execute("DROP TABLE IF EXISTS sidecar;") - c.execute("DROP TABLE IF EXISTS detected_text;") - - conn.commit() - - def _migrate_6_0_to_7_0(self, conn): - c = conn.cursor() - c.execute( - """CREATE TABLE IF NOT EXISTS export_results_data ( - id INTEGER PRIMARY KEY, - datetime TEXT, - export_results BLOB - );""" - ) - # pre-populate report_data table with blank fields - # ExportDB will use these as circular buffer always writing to the oldest record - for _ in range(MAX_EXPORT_RESULTS_DATA_ROWS): + def _migrate_5_0_to_6_0(self, conn: sqlite3.Connection): + with self.lock: + c = conn.cursor() + # add export_data table c.execute( - """INSERT INTO export_results_data (datetime, export_results) VALUES (?, ?);""", - (datetime.datetime.now().isoformat(), b""), + """ CREATE TABLE IF NOT EXISTS export_data( + id INTEGER PRIMARY KEY, + filepath_normalized TEXT NOT NULL, + filepath TEXT NOT NULL, + uuid TEXT NOT NULL, + src_mode INTEGER, + src_size INTEGER, + src_mtime REAL, + dest_mode INTEGER, + dest_size INTEGER, + dest_mtime REAL, + digest TEXT, + exifdata JSON, + export_options INTEGER, + UNIQUE(filepath_normalized) + ); """, + ) + c.execute( + """ CREATE UNIQUE INDEX IF NOT EXISTS idx_export_data_filepath_normalized on export_data (filepath_normalized); """, ) - # sleep a tiny bit just to ensure time stamps increment - time.sleep(0.001) - conn.commit() - def _migrate_7_0_to_7_1(self, conn): + # migrate data + c.execute( + """ INSERT INTO export_data (filepath_normalized, filepath, uuid) SELECT filepath_normalized, filepath, uuid FROM files;""", + ) + c.execute( + """ UPDATE export_data + SET (src_mode, src_size, src_mtime) = + (SELECT mode, size, mtime + FROM edited + WHERE export_data.filepath_normalized = edited.filepath_normalized); + """, + ) + c.execute( + """ UPDATE export_data + SET (dest_mode, dest_size, dest_mtime) = + (SELECT orig_mode, orig_size, orig_mtime + FROM files + WHERE export_data.filepath_normalized = files.filepath_normalized); + """, + ) + c.execute( + """ UPDATE export_data SET digest = + (SELECT metadata FROM files + WHERE files.filepath_normalized = export_data.filepath_normalized + ); """ + ) + c.execute( + """ UPDATE export_data SET exifdata = + (SELECT json_exifdata FROM exifdata + WHERE exifdata.filepath_normalized = export_data.filepath_normalized + ); """ + ) + + # create config table + c.execute( + """ CREATE TABLE IF NOT EXISTS config ( + id INTEGER PRIMARY KEY, + datetime TEXT, + config TEXT + ); """ + ) + + # create photoinfo table + c.execute( + """ CREATE TABLE IF NOT EXISTS photoinfo ( + id INTEGER PRIMARY KEY, + uuid TEXT NOT NULL, + photoinfo JSON, + UNIQUE(uuid) + ); """ + ) + c.execute( + """CREATE UNIQUE INDEX IF NOT EXISTS idx_photoinfo_uuid on photoinfo (uuid);""" + ) + c.execute( + """ INSERT INTO photoinfo (uuid, photoinfo) SELECT uuid, json_info FROM info;""" + ) + + # drop indexes no longer needed + c.execute("DROP INDEX IF EXISTS idx_files_filepath_normalized;") + c.execute("DROP INDEX IF EXISTS idx_exifdata_filename;") + c.execute("DROP INDEX IF EXISTS idx_edited_filename;") + c.execute("DROP INDEX IF EXISTS idx_converted_filename;") + c.execute("DROP INDEX IF EXISTS idx_sidecar_filename;") + c.execute("DROP INDEX IF EXISTS idx_detected_text;") + + # drop tables no longer needed + c.execute("DROP TABLE IF EXISTS files;") + c.execute("DROP TABLE IF EXISTS info;") + c.execute("DROP TABLE IF EXISTS exifdata;") + c.execute("DROP TABLE IF EXISTS edited;") + c.execute("DROP TABLE IF EXISTS converted;") + c.execute("DROP TABLE IF EXISTS sidecar;") + c.execute("DROP TABLE IF EXISTS detected_text;") + + conn.commit() + + def _migrate_6_0_to_7_0(self, conn: sqlite3.Connection): + with self.lock: + c = conn.cursor() + c.execute( + """CREATE TABLE IF NOT EXISTS export_results_data ( + id INTEGER PRIMARY KEY, + datetime TEXT, + export_results BLOB + );""" + ) + # pre-populate report_data table with blank fields + # ExportDB will use these as circular buffer always writing to the oldest record + for _ in range(MAX_EXPORT_RESULTS_DATA_ROWS): + c.execute( + """INSERT INTO export_results_data (datetime, export_results) VALUES (?, ?);""", + (datetime.datetime.now().isoformat(), b""), + ) + # sleep a tiny bit just to ensure time stamps increment + time.sleep(0.001) + conn.commit() + + def _migrate_7_0_to_7_1(self, conn: sqlite3.Connection): """Add timestamp column to export_data table and triggers to update it on insert and update.""" - c = conn.cursor() - # timestamp column should not exist but this prevents error if migration is run on an already migrated database - # reference #794 - results = c.execute( - "SELECT COUNT(*) FROM pragma_table_info('export_data') WHERE name='timestamp';" - ).fetchone() - if results[0] == 0: - c.execute("""ALTER TABLE export_data ADD COLUMN timestamp DATETIME;""") - c.execute( - """ - CREATE TRIGGER IF NOT EXISTS insert_timestamp_trigger - AFTER INSERT ON export_data - BEGIN - UPDATE export_data SET timestamp = STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW') WHERE id = NEW.id; - END; - """ - ) - c.execute( - """ - CREATE TRIGGER IF NOT EXISTS update_timestamp_trigger - AFTER UPDATE On export_data - BEGIN - UPDATE export_data SET timestamp = STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW') WHERE id = NEW.id; - END; - """ - ) - conn.commit() + with self.lock: + c = conn.cursor() + # timestamp column should not exist but this prevents error if migration is run on an already migrated database + # reference #794 + results = c.execute( + "SELECT COUNT(*) FROM pragma_table_info('export_data') WHERE name='timestamp';" + ).fetchone() + if results[0] == 0: + c.execute("""ALTER TABLE export_data ADD COLUMN timestamp DATETIME;""") + c.execute( + """ + CREATE TRIGGER IF NOT EXISTS insert_timestamp_trigger + AFTER INSERT ON export_data + BEGIN + UPDATE export_data SET timestamp = STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW') WHERE id = NEW.id; + END; + """ + ) + c.execute( + """ + CREATE TRIGGER IF NOT EXISTS update_timestamp_trigger + AFTER UPDATE On export_data + BEGIN + UPDATE export_data SET timestamp = STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW') WHERE id = NEW.id; + END; + """ + ) + conn.commit() - def _migrate_7_1_to_8_0(self, conn): + def _migrate_7_1_to_8_0(self, conn: sqlite3.Connection): """Add error column to export_data table""" - c = conn.cursor() - results = c.execute( - "SELECT COUNT(*) FROM pragma_table_info('export_data') WHERE name='error';" - ).fetchone() - if results[0] == 0: - c.execute("""ALTER TABLE export_data ADD COLUMN error JSON;""") - conn.commit() + with self.lock: + c = conn.cursor() + results = c.execute( + "SELECT COUNT(*) FROM pragma_table_info('export_data') WHERE name='error';" + ).fetchone() + if results[0] == 0: + c.execute("""ALTER TABLE export_data ADD COLUMN error JSON;""") + conn.commit() - def _perform_db_maintenace(self, conn): + def _perform_db_maintenance(self, conn: sqlite3.Connection): """Perform database maintenance""" - c = conn.cursor() - c.execute( - """DELETE FROM config - WHERE id < ( - SELECT MIN(id) - FROM (SELECT id FROM config ORDER BY id DESC LIMIT 9) - ); - """ - ) - conn.commit() + with self.lock: + c = conn.cursor() + c.execute( + """DELETE FROM config + WHERE id < ( + SELECT MIN(id) + FROM (SELECT id FROM config ORDER BY id DESC LIMIT 9) + ); + """ + ) + conn.commit() class ExportDBInMemory(ExportDB): @@ -813,7 +853,7 @@ class ExportDBInMemory(ExportDB): modifying the on-disk version """ - def __init__(self, dbfile: str, export_dir: str): + def __init__(self, dbfile: pathlib.Path | str, export_dir: pathlib.Path | str): """ "Initialize ExportDBInMemory Args: @@ -821,12 +861,18 @@ class ExportDBInMemory(ExportDB): export_dir (str): path to export directory write_back (bool): whether to write changes back to disk when closing; if False (default), changes are not written to disk """ - self._dbfile = dbfile or f"./{OSXPHOTOS_EXPORT_DB}" + self._dbfile = str(dbfile) or f"./{OSXPHOTOS_EXPORT_DB}" # export_dir is required as all files referenced by get_/set_uuid_for_file will be converted to # relative paths to this path # this allows the entire export tree to be moved to a new disk/location # whilst preserving the UUID to filename mapping - self._path = export_dir + self._path = str(export_dir) + + self.was_upgraded: tuple[str, str] | tuple = () + self.was_created = False + + self.lock = threading.Lock() + self._conn = self._open_export_db(self._dbfile) self._insert_run_info() @@ -835,38 +881,42 @@ class ExportDBInMemory(ExportDB): """Write changes from in-memory database back to disk""" # dump the database - conn = self._conn - conn.commit() - dbdump = self._dump_db(conn) + with self.lock: + conn = self.connection + conn.commit() + dbdump = self._dump_db(conn) - # cleanup the old on-disk database - # also unlink the wal and shm files if needed - dbfile = pathlib.Path(self._dbfile) - if dbfile.exists(): - dbfile.unlink() - wal = dbfile.with_suffix(".db-wal") - if wal.exists(): - wal.unlink() - shm = dbfile.with_suffix(".db-shm") - if shm.exists(): - shm.unlink() + # cleanup the old on-disk database + # also unlink the wal and shm files if needed + dbfile = pathlib.Path(self._dbfile) + if dbfile.exists(): + dbfile.unlink() + wal = dbfile.with_suffix(".db-wal") + if wal.exists(): + wal.unlink() + shm = dbfile.with_suffix(".db-shm") + if shm.exists(): + shm.unlink() - conn_on_disk = sqlite3.connect(str(dbfile)) - conn_on_disk.cursor().executescript(dbdump.read()) - conn_on_disk.commit() - conn_on_disk.close() + conn_on_disk = sqlite3.connect( + str(dbfile), check_same_thread=SQLITE_CHECK_SAME_THREAD + ) + conn_on_disk.cursor().executescript(dbdump.read()) + conn_on_disk.commit() + conn_on_disk.close() @retry( stop=stop_after_attempt(MAX_RETRY_ATTEMPTS), - retry_error_callback=retry_log_error_no_raise, # #999 + retry_error_callback=retry_log_error_no_raise, # #999 ) def close(self): """close the database connection""" if self._conn: self._conn.close() + self._conn = None @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) - def _open_export_db(self, dbfile): # sourcery skip: raise-specific-error + def _open_export_db(self, dbfile: str): # sourcery skip: raise-specific-error """open export database and return a db connection returns: connection to the database """ @@ -878,14 +928,15 @@ class ExportDBInMemory(ExportDB): self.was_created = True self.was_upgraded = () else: - conn = sqlite3.connect(dbfile) + conn = sqlite3.connect(dbfile, check_same_thread=SQLITE_CHECK_SAME_THREAD) dbdump = self._dump_db(conn) conn.close() # Create a database in memory and import from the dump - conn = sqlite3.connect(":memory:") + conn = sqlite3.connect( + ":memory:", check_same_thread=SQLITE_CHECK_SAME_THREAD + ) conn.cursor().executescript(dbdump.read()) - conn.commit() self.was_created = False version_info = self._get_database_version(conn) if version_info[1] < OSXPHOTOS_EXPORTDB_VERSION: @@ -899,10 +950,11 @@ class ExportDBInMemory(ExportDB): def _get_db_connection(self): """return db connection to in memory database""" - return sqlite3.connect(":memory:") + return sqlite3.connect(":memory:", check_same_thread=SQLITE_CHECK_SAME_THREAD) def _dump_db(self, conn: sqlite3.Connection) -> StringIO: """dump sqlite db to a string buffer""" + dbdump = StringIO() for line in conn.iterdump(): dbdump.write("%s\n" % line) @@ -919,38 +971,62 @@ class ExportDBTemp(ExportDBInMemory): """Temporary in-memory version of ExportDB""" def __init__(self): - self._temp_dir = TemporaryDirectory() - self._dbfile = f"{self._temp_dir.name}/{OSXPHOTOS_EXPORT_DB}" - self._path = self._temp_dir.name + self._dbfile = ":memory:" + self._path = "./" + + self.lock = threading.Lock() + + self.was_upgraded = () + self.was_created = False + self._conn = self._open_export_db(self._dbfile) self._insert_run_info() - def _relative_filepath(self, filepath: Union[str, pathlib.Path]) -> str: + def _relative_filepath(self, filepath: pathlib.Path | str) -> str: """Overrides _relative_filepath to return a path for use in the temp db""" filepath = str(filepath) - if filepath[0] == "/": - return filepath[1:] - return filepath + return filepath[1:] if filepath[0] == "/" else filepath class ExportRecord: """ExportRecord class""" + # Implementation note:all properties and setters must be aware of whether or not running + # as a context manager. If running as a context manager, the lock is not acquired by the + # getter/setter as the lock is acquired by the context manager. If not running as a + # context manager, the lock is acquired by the getter/setter. + __slots__ = [ "_conn", "_context_manager", "_filepath_normalized", + "lock", ] - def __init__(self, conn, filepath_normalized): + def __init__( + self, conn: sqlite3.Connection, lock: threading.Lock, filepath_normalized: str + ): self._conn = conn + self.lock = lock self._filepath_normalized = filepath_normalized self._context_manager = False @property - def filepath(self): + def connection(self) -> sqlite3.Connection: + """return connection""" + return self._conn + + @property + def filepath(self) -> str: """return filepath""" - conn = self._conn + if self._context_manager: + return self._filepath() + with self.lock: + return self._filepath() + + def _filepath(self) -> str: + """return filepath""" + conn = self.connection c = conn.cursor() if row := c.execute( "SELECT filepath FROM export_data WHERE filepath_normalized = ?;", @@ -963,14 +1039,21 @@ class ExportRecord: ) @property - def filepath_normalized(self): + def filepath_normalized(self) -> str: """return filepath_normalized""" return self._filepath_normalized @property - def uuid(self): + def uuid(self) -> str: """return uuid""" - conn = self._conn + if self._context_manager: + return self._uuid() + with self.lock: + return self._uuid() + + def _uuid(self) -> str: + """return uuid""" + conn = self.connection c = conn.cursor() if row := c.execute( "SELECT uuid FROM export_data WHERE filepath_normalized = ?;", @@ -981,9 +1064,25 @@ class ExportRecord: raise ValueError(f"No uuid found in database for {self._filepath_normalized}") @property - def digest(self): + def digest(self) -> str: """returns the digest value""" - conn = self._conn + if self._context_manager: + return self._digest() + with self.lock: + return self._digest() + + @digest.setter + def digest(self, value: str): + """set digest value""" + if self._context_manager: + self._digest_setter(value) + else: + with self.lock: + self._digest_setter(value) + + def _digest(self) -> str: + """returns the digest value""" + conn = self.connection c = conn.cursor() if row := c.execute( "SELECT digest FROM export_data WHERE filepath_normalized = ?;", @@ -993,10 +1092,9 @@ class ExportRecord: raise ValueError(f"No digest found in database for {self._filepath_normalized}") - @digest.setter - def digest(self, value): + def _digest_setter(self, value: str): """set digest value""" - conn = self._conn + conn = self.connection c = conn.cursor() c.execute( "UPDATE export_data SET digest = ? WHERE filepath_normalized = ?;", @@ -1006,9 +1104,25 @@ class ExportRecord: conn.commit() @property - def exifdata(self): + def exifdata(self) -> str: """returns exifdata value for record""" - conn = self._conn + if self._context_manager: + return self._exifdata() + with self.lock: + return self._exifdata() + + @exifdata.setter + def exifdata(self, value: str): + """set exifdata value""" + if self._context_manager: + self._exifdata_setter(value) + else: + with self.lock: + self._exifdata_setter(value) + + def _exifdata(self) -> str: + """returns exifdata value for record""" + conn = self.connection c = conn.cursor() if row := c.execute( "SELECT exifdata FROM export_data WHERE filepath_normalized = ?;", @@ -1020,10 +1134,9 @@ class ExportRecord: f"No exifdata found in database for {self._filepath_normalized}" ) - @exifdata.setter - def exifdata(self, value): + def _exifdata_setter(self, value: str): """set exifdata value""" - conn = self._conn + conn = self.connection c = conn.cursor() c.execute( "UPDATE export_data SET exifdata = ? WHERE filepath_normalized = ?;", @@ -1036,9 +1149,25 @@ class ExportRecord: conn.commit() @property - def src_sig(self): + def src_sig(self) -> tuple[int, int, int | None]: """return source file signature value""" - conn = self._conn + if self._context_manager: + return self._src_sig() + with self.lock: + return self._src_sig() + + @src_sig.setter + def src_sig(self, value: tuple[int, int, int | None]): + """set source file signature value""" + if self._context_manager: + self._src_sig_setter(value) + else: + with self.lock: + self._src_sig_setter(value) + + def _src_sig(self) -> tuple[int, int, int | None]: + """return source file signature value""" + conn = self.connection c = conn.cursor() if row := c.execute( "SELECT src_mode, src_size, src_mtime FROM export_data WHERE filepath_normalized = ?;", @@ -1051,10 +1180,9 @@ class ExportRecord: f"No src_sig found in database for {self._filepath_normalized}" ) - @src_sig.setter - def src_sig(self, value): + def _src_sig_setter(self, value: tuple[int, int, int | None]): """set source file signature value""" - conn = self._conn + conn = self.connection c = conn.cursor() c.execute( "UPDATE export_data SET src_mode = ?, src_size = ?, src_mtime = ? WHERE filepath_normalized = ?;", @@ -1069,9 +1197,25 @@ class ExportRecord: conn.commit() @property - def dest_sig(self): + def dest_sig(self) -> tuple[int, int, int | None]: """return destination file signature""" - conn = self._conn + if self._context_manager: + return self._dest_sig() + with self.lock: + return self._dest_sig() + + @dest_sig.setter + def dest_sig(self, value: tuple[int, int, int | None]): + """set destination file signature""" + if self._context_manager: + self._dest_sig_setter(value) + else: + with self.lock: + self._dest_sig_setter(value) + + def _dest_sig(self) -> tuple[int, int, int | None]: + """return destination file signature""" + conn = self.connection c = conn.cursor() if row := c.execute( "SELECT dest_mode, dest_size, dest_mtime FROM export_data WHERE filepath_normalized = ?;", @@ -1084,10 +1228,9 @@ class ExportRecord: f"No dest_sig found in database for {self._filepath_normalized}" ) - @dest_sig.setter - def dest_sig(self, value): + def _dest_sig_setter(self, value: tuple[int, int, int | None]): """set destination file signature""" - conn = self._conn + conn = self.connection c = conn.cursor() c.execute( "UPDATE export_data SET dest_mode = ?, dest_size = ?, dest_mtime = ? WHERE filepath_normalized = ?;", @@ -1102,32 +1245,81 @@ class ExportRecord: conn.commit() @property - def photoinfo(self): + def photoinfo(self) -> str: """Returns info value""" - conn = self._conn + if self._context_manager: + return self._photoinfo() + with self.lock: + return self._photoinfo() + + @photoinfo.setter + def photoinfo(self, value: str): + """Sets info value""" + if self._context_manager: + self._photoinfo_setter(value) + else: + with self.lock: + self._photoinfo_setter(value) + + def _photoinfo(self) -> str: + """Returns info value""" + conn = self.connection c = conn.cursor() + if row := c.execute( + "SELECT uuid FROM export_data WHERE filepath_normalized = ?;", + (self._filepath_normalized,), + ).fetchone(): + uuid = row[0] + else: + raise ValueError( + f"No uuid found in database for {self._filepath_normalized}" + ) row = c.execute( "SELECT photoinfo from photoinfo where uuid = ?;", - (self.uuid,), + (uuid,), ).fetchone() return row[0] if row else None - @photoinfo.setter - def photoinfo(self, value): + def _photoinfo_setter(self, value: str): """Sets info value""" - conn = self._conn + conn = self.connection c = conn.cursor() + if row := c.execute( + "SELECT uuid FROM export_data WHERE filepath_normalized = ?;", + (self._filepath_normalized,), + ).fetchone(): + uuid = row[0] + else: + raise ValueError( + f"No uuid found in database for {self._filepath_normalized}" + ) c.execute( "INSERT OR REPLACE INTO photoinfo (uuid, photoinfo) VALUES (?, ?);", - (self.uuid, value), + (uuid, value), ) if not self._context_manager: conn.commit() @property - def export_options(self): + def export_options(self) -> str: """Get export_options value""" - conn = self._conn + if self._context_manager: + return self._export_options() + with self.lock: + return self._export_options() + + @export_options.setter + def export_options(self, value: str): + """Set export_options value""" + if self._context_manager: + self._export_options_setter(value) + else: + with self.lock: + self._export_options_setter(value) + + def _export_options(self) -> str: + """Get export_options value""" + conn = self.connection c = conn.cursor() row = c.execute( "SELECT export_options from export_data where filepath_normalized = ?;", @@ -1135,10 +1327,9 @@ class ExportRecord: ).fetchone() return row[0] if row else None - @export_options.setter - def export_options(self, value): + def _export_options_setter(self, value: str): """Set export_options value""" - conn = self._conn + conn = self.connection c = conn.cursor() c.execute( "UPDATE export_data SET export_options = ? WHERE filepath_normalized = ?;", @@ -1148,9 +1339,16 @@ class ExportRecord: conn.commit() @property - def timestamp(self): + def timestamp(self) -> str: """returns the timestamp value""" - conn = self._conn + if self._context_manager: + return self._timestamp() + with self.lock: + return self._timestamp() + + def _timestamp(self) -> str: + """returns the timestamp value""" + conn = self.connection c = conn.cursor() if row := c.execute( "SELECT timestamp FROM export_data WHERE filepath_normalized = ?;", @@ -1165,7 +1363,23 @@ class ExportRecord: @property def error(self) -> dict[str, Any] | None: """Return error value""" - conn = self._conn + if self._context_manager: + return self._error() + with self.lock: + return self._error() + + @error.setter + def error(self, value: dict[str, str] | None): + """Set error value""" + if self._context_manager: + self._error_setter(value) + else: + with self.lock: + self._error_setter(value) + + def _error(self) -> dict[str, Any] | None: + """Return error value""" + conn = self.connection c = conn.cursor() if row := c.execute( "SELECT error FROM export_data WHERE filepath_normalized = ?;", @@ -1175,13 +1389,11 @@ class ExportRecord: raise ValueError(f"No error found in database for {self._filepath_normalized}") - @error.setter - def error(self, value: dict[str, Any] | None): + def _error_setter(self, value: dict[str, str] | None): """Set error value""" - conn = self._conn + value = value or {} + conn = self.connection c = conn.cursor() - if value is None: - value = "" # use default=str because some of the values are Path objects error = json.dumps(value, default=str) c.execute( @@ -1191,7 +1403,7 @@ class ExportRecord: if not self._context_manager: conn.commit() - def asdict(self): + def asdict(self) -> dict[str, Any]: """Return dict of self""" exifdata = json.loads(self.exifdata) if self.exifdata else None photoinfo = json.loads(self.photoinfo) if self.photoinfo else None @@ -1209,17 +1421,19 @@ class ExportRecord: "photoinfo": photoinfo, } - def json(self, indent=None): - """Return json of self""" + def json(self, indent=None) -> str: + """Return json string of self""" return json.dumps(self.asdict(), indent=indent) def __enter__(self): self._context_manager = True + self.lock.acquire() return self def __exit__(self, exc_type, exc_value, traceback): - if exc_type: + if exc_type and self._conn.in_transaction: self._conn.rollback() - else: + elif self._conn.in_transaction: self._conn.commit() self._context_manager = False + self.lock.release() diff --git a/osxphotos/export_db_utils.py b/osxphotos/export_db_utils.py index 51f7da42..89078626 100644 --- a/osxphotos/export_db_utils.py +++ b/osxphotos/export_db_utils.py @@ -16,7 +16,7 @@ from rich import print from osxphotos.photoinfo import PhotoInfo -from ._constants import OSXPHOTOS_EXPORT_DB +from ._constants import OSXPHOTOS_EXPORT_DB, SQLITE_CHECK_SAME_THREAD from ._version import __version__ from .configoptions import ConfigOptions from .export_db import OSXPHOTOS_EXPORTDB_VERSION, ExportDB @@ -50,7 +50,7 @@ def export_db_get_version( dbfile: Union[str, pathlib.Path] ) -> Tuple[Optional[int], Optional[int]]: """returns version from export database as tuple of (osxphotos version, export_db version)""" - conn = sqlite3.connect(str(dbfile)) + conn = sqlite3.connect(str(dbfile), check_same_thread=SQLITE_CHECK_SAME_THREAD) c = conn.cursor() if row := c.execute( "SELECT osxphotos, exportdb FROM version ORDER BY id DESC LIMIT 1;" @@ -61,7 +61,7 @@ def export_db_get_version( def export_db_vacuum(dbfile: Union[str, pathlib.Path]) -> None: """Vacuum export database""" - conn = sqlite3.connect(str(dbfile)) + conn = sqlite3.connect(str(dbfile), check_same_thread=SQLITE_CHECK_SAME_THREAD) c = conn.cursor() c.execute("VACUUM;") conn.commit() @@ -79,7 +79,7 @@ def export_db_update_signatures( """ export_dir = pathlib.Path(export_dir) fileutil = FileUtil - conn = sqlite3.connect(str(dbfile)) + conn = sqlite3.connect(str(dbfile), check_same_thread=SQLITE_CHECK_SAME_THREAD) c = conn.cursor() c.execute("SELECT filepath_normalized, filepath FROM export_data;") rows = c.fetchall() @@ -114,7 +114,7 @@ def export_db_get_last_run( export_db: Union[str, pathlib.Path] ) -> Tuple[Optional[str], Optional[str]]: """Get last run from export database""" - conn = sqlite3.connect(str(export_db)) + conn = sqlite3.connect(str(export_db), check_same_thread=SQLITE_CHECK_SAME_THREAD) c = conn.cursor() if row := c.execute( "SELECT datetime, args FROM runs ORDER BY id DESC LIMIT 1;" @@ -127,7 +127,7 @@ def export_db_get_errors( export_db: Union[str, pathlib.Path] ) -> Tuple[Optional[str], Optional[str]]: """Get errors from export database""" - conn = sqlite3.connect(str(export_db)) + conn = sqlite3.connect(str(export_db), check_same_thread=SQLITE_CHECK_SAME_THREAD) c = conn.cursor() results = c.execute( "SELECT filepath, uuid, timestamp, error FROM export_data WHERE error is not null ORDER BY timestamp DESC;" @@ -145,7 +145,7 @@ def export_db_save_config_to_file( """Save export_db last run config to file""" export_db = pathlib.Path(export_db) config_file = pathlib.Path(config_file) - conn = sqlite3.connect(str(export_db)) + conn = sqlite3.connect(str(export_db), check_same_thread=SQLITE_CHECK_SAME_THREAD) c = conn.cursor() row = c.execute("SELECT config FROM config ORDER BY id DESC LIMIT 1;").fetchone() if not row: @@ -163,7 +163,7 @@ def export_db_get_config( export_db: path to export database override: if True, any loaded config values will overwrite existing values in config """ - conn = sqlite3.connect(str(export_db)) + conn = sqlite3.connect(str(export_db), check_same_thread=SQLITE_CHECK_SAME_THREAD) c = conn.cursor() row = c.execute("SELECT config FROM config ORDER BY id DESC LIMIT 1;").fetchone() return ( @@ -184,7 +184,7 @@ def export_db_check_signatures( """ export_dir = pathlib.Path(export_dir) fileutil = FileUtil - conn = sqlite3.connect(str(dbfile)) + conn = sqlite3.connect(str(dbfile), check_same_thread=SQLITE_CHECK_SAME_THREAD) c = conn.cursor() c.execute("SELECT filepath_normalized, filepath FROM export_data;") rows = c.fetchall() @@ -236,7 +236,7 @@ def export_db_touch_files( ) exportdb.close() - conn = sqlite3.connect(str(dbfile)) + conn = sqlite3.connect(str(dbfile), check_same_thread=SQLITE_CHECK_SAME_THREAD) c = conn.cursor() if row := c.execute( "SELECT config FROM config ORDER BY id DESC LIMIT 1;" @@ -318,7 +318,7 @@ def export_db_migrate_photos_library( and update the UUIDs in the export database """ verbose(f"Loading data from export database {dbfile}") - conn = sqlite3.connect(str(dbfile)) + conn = sqlite3.connect(str(dbfile), check_same_thread=SQLITE_CHECK_SAME_THREAD) c = conn.cursor() results = c.execute("SELECT uuid, photoinfo FROM photoinfo;").fetchall() exportdb_uuids = {} @@ -495,7 +495,7 @@ def export_db_get_last_library(dbpath: Union[str, pathlib.Path]) -> str: str: name of library used to export from or "" if not found """ dbpath = pathlib.Path(dbpath) - conn = sqlite3.connect(str(dbpath)) + conn = sqlite3.connect(str(dbpath), check_same_thread=SQLITE_CHECK_SAME_THREAD) c = conn.cursor() if results := c.execute( """ diff --git a/osxphotos/frozen_photoinfo.py b/osxphotos/frozen_photoinfo.py new file mode 100644 index 00000000..bbfea423 --- /dev/null +++ b/osxphotos/frozen_photoinfo.py @@ -0,0 +1,169 @@ +"""Freeze a PhotoInfo object to allow it to be used in concurrent.futures.""" + +from __future__ import annotations + +import datetime +import json +import logging +import os +import re +from types import SimpleNamespace +from typing import Any + +from osxmetadata import OSXMetaData + +import osxphotos + +from ._constants import TEXT_DETECTION_CONFIDENCE_THRESHOLD +from .exiftool import ExifToolCaching, get_exiftool_path +from .phototemplate import PhotoTemplate, RenderOptions +from .text_detection import detect_text + + +def frozen_photoinfo_factory(photo: "osxphotos.photoinfo.PhotoInfo") -> SimpleNamespace: + """Return a frozen SimpleNamespace object for a PhotoInfo object""" + photo_json = photo.json() + + def _object_hook(d: dict[Any, Any]): + if not d: + return d + + # if d key matches a ISO 8601 datetime ('2023-03-24T06:46:57.690786', '2019-07-04T16:24:01-07:00', '2019-07-04T16:24:01+07:00'), convert to datetime + # fromisoformat will also handle dates with timezone offset in form +0700, etc. + for k, v in d.items(): + if isinstance(v, str) and re.match( + r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[.]?\d*[+-]?\d{2}[:]?\d{2}?", v + ): + d[k] = datetime.datetime.fromisoformat(v) + return SimpleNamespace(**d) + + frozen = json.loads(photo_json, object_hook=lambda d: _object_hook(d)) + + # add on json() method to frozen object + def _json(*args): + return photo_json + + frozen.json = _json + + # add hexdigest property to frozen object + frozen.hexdigest = photo.hexdigest + + # add on detected_text method to frozen object + frozen = _add_detected_text(frozen) + + # add on exiftool property to frozen object + frozen = _add_exiftool(frozen, photo) + + # add on render_template method to frozen object + frozen = _add_render_template(frozen) + + # add on the _db property to frozen object + # frozen objects don't really have a _db class but some things expect it (e.g. _db._beta) + frozen._db = SimpleNamespace(_beta=photo._db._beta) + + return frozen + + +def _add_detected_text(frozen: SimpleNamespace) -> SimpleNamespace: + """Add detected_text method to frozen PhotoInfo object""" + + def detected_text(confidence_threshold=TEXT_DETECTION_CONFIDENCE_THRESHOLD): + """Detects text in photo and returns lists of results as (detected text, confidence) + + confidence_threshold: float between 0.0 and 1.0. If text detection confidence is below this threshold, + text will not be returned. Default is TEXT_DETECTION_CONFIDENCE_THRESHOLD + + If photo is edited, uses the edited photo, otherwise the original; falls back to the preview image if neither edited or original is available + + Returns: list of (detected text, confidence) tuples + """ + + try: + return frozen._detected_text_cache[confidence_threshold] + except (AttributeError, KeyError) as e: + if isinstance(e, AttributeError): + frozen._detected_text_cache = {} + + try: + detected_text = frozen._detected_text() + except Exception as e: + logging.warning(f"Error detecting text in photo {frozen.uuid}: {e}") + detected_text = [] + + frozen._detected_text_cache[confidence_threshold] = [ + (text, confidence) + for text, confidence in detected_text + if confidence >= confidence_threshold + ] + return frozen._detected_text_cache[confidence_threshold] + + def _detected_text(): + """detect text in photo, either from cached extended attribute or by attempting text detection""" + path = ( + frozen.path_edited + if frozen.hasadjustments and frozen.path_edited + else frozen.path + ) + path = path or frozen.path_derivatives[0] if frozen.path_derivatives else None + if not path: + return [] + + md = OSXMetaData(path) + try: + + def decoder(val): + """Decode value from JSON""" + return json.loads(val.decode("utf-8")) + + detected_text = md.get_xattr( + "osxphotos.metadata:detected_text", decode=decoder + ) + except KeyError: + detected_text = None + if detected_text is None: + orientation = frozen.orientation or None + detected_text = detect_text(path, orientation) + + def encoder(obj): + """Encode value as JSON""" + val = json.dumps(obj) + return val.encode("utf-8") + + md.set_xattr( + "osxphotos.metadata:detected_text", detected_text, encode=encoder + ) + return detected_text + + frozen.detected_text = detected_text + frozen._detected_text = _detected_text + + return frozen + + +def _add_exiftool( + frozen: SimpleNamespace, photo: "osxphotos.photoinfo.PhotoInfo" +) -> SimpleNamespace: + """Add exiftool property to frozen PhotoInfo object""" + frozen._exiftool_path = photo._db._exiftool_path or None + return frozen + + +def _add_render_template(frozen: SimpleNamespace) -> SimpleNamespace: + """Add render_template method to frozen PhotoInfo object""" + + def render_template(template_str: str, options: RenderOptions | None = None): + """Renders a template string for PhotoInfo instance using PhotoTemplate + + Args: + template_str: a template string with fields to render + options: a RenderOptions instance + + Returns: + ([rendered_strings], [unmatched]): tuple of list of rendered strings and list of unmatched template values + """ + options = options or RenderOptions() + template = PhotoTemplate(frozen, exiftool_path=frozen._exiftool_path) + return template.render(template_str, options) + + frozen.render_template = render_template + return frozen diff --git a/osxphotos/photodates.py b/osxphotos/photodates.py index fc466c11..d2da6e5d 100644 --- a/osxphotos/photodates.py +++ b/osxphotos/photodates.py @@ -11,7 +11,7 @@ import photoscript from strpdatetime import strpdatetime from tenacity import retry, stop_after_attempt, wait_exponential -from ._constants import _DB_TABLE_NAMES +from ._constants import _DB_TABLE_NAMES, SQLITE_CHECK_SAME_THREAD from .datetime_utils import ( datetime_has_tz, datetime_remove_tz, @@ -219,7 +219,7 @@ def _set_date_added(library_path: str, uuid: str, date_added: datetime.datetime) asset_table = _DB_TABLE_NAMES[photos_version]["ASSET"] timestamp = datetime_to_photos_timestamp(date_added) - conn = sqlite3.connect(db_path) + conn = sqlite3.connect(db_path, check_same_thread=SQLITE_CHECK_SAME_THREAD) c = conn.cursor() c.execute( f"UPDATE {asset_table} SET ZADDEDDATE=? WHERE ZUUID=?", @@ -268,7 +268,7 @@ def get_photo_date_added( photos_version = get_photos_library_version(library_path) db_path = str(pathlib.Path(library_path) / "database/Photos.sqlite") asset_table = _DB_TABLE_NAMES[photos_version]["ASSET"] - conn = sqlite3.connect(db_path) + conn = sqlite3.connect(db_path, check_same_thread=SQLITE_CHECK_SAME_THREAD) c = conn.cursor() c.execute( f"SELECT ZADDEDDATE FROM {asset_table} WHERE ZUUID=?", diff --git a/osxphotos/photoexporter.py b/osxphotos/photoexporter.py index fe34c469..470bf1f7 100644 --- a/osxphotos/photoexporter.py +++ b/osxphotos/photoexporter.py @@ -12,6 +12,7 @@ from collections import namedtuple # pylint: disable=syntax-error from dataclasses import asdict, dataclass from datetime import datetime from enum import Enum +from types import SimpleNamespace import photoscript from mako.template import Template @@ -31,7 +32,7 @@ from ._constants import ( ) from ._version import __version__ from .datetime_utils import datetime_tz_to_utc -from .exiftool import ExifTool, exiftool_can_write +from .exiftool import ExifTool, ExifToolCaching, exiftool_can_write, get_exiftool_path from .export_db import ExportDB, ExportDBTemp from .fileutil import FileUtil from .photokit import ( @@ -68,6 +69,7 @@ if t.TYPE_CHECKING: # retry if download_missing/use_photos_export fails the first time (which sometimes it does) MAX_PHOTOSCRIPT_RETRIES = 3 + # return values for _should_update_photo class ShouldUpdate(Enum): NOT_IN_DATABASE = 1 @@ -309,7 +311,6 @@ class ExportResults: xattr_skipped=None, xattr_written=None, ): - local_vars = locals() self._datetime = datetime.now().isoformat() for attr in self.attributes: @@ -374,7 +375,7 @@ class PhotoExporter: def __init__(self, photo: "PhotoInfo", tmpdir: t.Optional[str] = None): self.photo = photo self._render_options = RenderOptions() - self._verbose = self.photo._verbose + self._verbose = photo._verbose # define functions for adding markup self._filepath = add_rich_markup_tag("filepath", rich=False) @@ -950,7 +951,8 @@ class PhotoExporter: """Stage a photo for export with AppleScript to a temporary directory Note: If exporting an edited live photo, the associated live video will not be exported. - This is a limitation of the Photos AppleScript interface and Photos behaves the same way.""" + This is a limitation of the Photos AppleScript interface and Photos behaves the same way. + """ if options.edited and not self.photo.hasadjustments: raise ValueError("Edited version requested but photo has no adjustments") @@ -1564,7 +1566,7 @@ class PhotoExporter: with ExifTool( filepath, flags=options.exiftool_flags, - exiftool=self.photo._db._exiftool_path, + exiftool=self.photo._exiftool_path, ) as exiftool: for exiftag, val in exif_info.items(): if type(val) == list: @@ -1744,7 +1746,6 @@ class PhotoExporter: elif self.photo.ismovie: exif["Keys:GPSCoordinates"] = f"{lat} {lon}" exif["UserData:GPSCoordinates"] = f"{lat} {lon}" - # process date/time and timezone offset # Photos exports the following fields and sets modify date to creation date # [EXIF] Modify Date : 2020:10:30 00:00:00 @@ -1854,7 +1855,7 @@ class PhotoExporter: def _get_exif_keywords(self): """returns list of keywords found in the file's exif metadata""" keywords = [] - exif = self.photo.exiftool + exif = exiftool_caching(self.photo) if exif: exifdict = exif.asdict() for field in ["IPTC:Keywords", "XMP:TagsList", "XMP:Subject"]: @@ -1871,7 +1872,7 @@ class PhotoExporter: def _get_exif_persons(self): """returns list of persons found in the file's exif metadata""" persons = [] - exif = self.photo.exiftool + exif = exiftool_caching(self.photo) if exif: exifdict = exif.asdict() try: @@ -2142,3 +2143,32 @@ def rename_jpeg_files(files, jpeg_ext, fileutil): else: new_files.append(file) return new_files + + +def exiftool_caching(photo: SimpleNamespace) -> ExifToolCaching: + """Return ExifToolCaching object for photo + + Args: + photo: SimpleNamespace object with photo info + + Returns: + ExifToolCaching object + """ + try: + return photo._exiftool_caching + except AttributeError: + try: + exiftool_path = photo._exiftool_path or get_exiftool_path() + if photo.path is not None and os.path.isfile(photo.path): + exiftool = ExifToolCaching(photo.path, exiftool=exiftool_path) + else: + exiftool = None + except FileNotFoundError: + # get_exiftool_path raises FileNotFoundError if exiftool not found + exiftool = None + logging.warning( + "exiftool not in path; download and install from https://exiftool.org/" + ) + + photo._exiftool_caching = exiftool + return photo._exiftool_caching diff --git a/osxphotos/photoinfo.py b/osxphotos/photoinfo.py index 91490ef3..54332356 100644 --- a/osxphotos/photoinfo.py +++ b/osxphotos/photoinfo.py @@ -8,14 +8,16 @@ import contextlib import dataclasses import datetime import json +import logging import os import os.path import pathlib import plistlib +import re from datetime import timedelta, timezone from functools import cached_property +from types import SimpleNamespace from typing import Any, Dict, Optional -import logging import yaml from osxmetadata import OSXMetaData @@ -66,7 +68,7 @@ from .text_detection import detect_text from .uti import get_preferred_uti_extension, get_uti_for_extension from .utils import _get_resource_loc, hexdigest, list_directory -__all__ = ["PhotoInfo", "PhotoInfoNone"] +__all__ = ["PhotoInfo", "PhotoInfoNone", "frozen_photoinfo_factory"] logger = logging.getLogger("osxphotos") @@ -81,6 +83,7 @@ class PhotoInfo: self._uuid: str = uuid self._info: dict[str, Any] = info self._db: "osxphotos.PhotosDB" = db + self._exiftool_path = self._db._exiftool_path self._verbose = self._db._verbose @property @@ -388,6 +391,8 @@ class PhotoInfo: """return path_edited_live_photo for Photos <= 4""" if self._db._db_version > _PHOTOS_4_VERSION: raise RuntimeError("Wrong database format!") + if not self.live_photo: + return None photopath = self._get_predicted_path_edited_live_photo_4() if photopath is not None and not os.path.isfile(photopath): # the heuristic failed, so try to find the file @@ -401,10 +406,6 @@ class PhotoInfo: ), None, ) - if photopath is None: - logger.debug( - f"MISSING PATH: edited live photo file for UUID {self._uuid} does not appear to exist" - ) return photopath def _path_edited_5_live_photo(self): @@ -1198,7 +1199,6 @@ class PhotoInfo: """ if self._db._db_version <= _PHOTOS_4_VERSION: - logger.debug(f"score not implemented for this database version") return None try: @@ -1344,7 +1344,6 @@ class PhotoInfo: """ if self._db._db_version <= _PHOTOS_4_VERSION: - logger.debug(f"exif_info not implemented for this database version") return None try: @@ -1427,11 +1426,14 @@ class PhotoInfo: def hexdigest(self): """Returns a unique digest of the photo's properties and metadata; useful for detecting changes in any property/metadata of the photo""" - return hexdigest(self.json()) + return hexdigest(self._json_hexdigest()) @cached_property - def cloud_metadata(self) -> Dict: - """Returns contents of ZCLOUDMASTERMEDIAMETADATA as dict""" + def cloud_metadata(self) -> dict[Any, Any]: + """Returns contents of ZCLOUDMASTERMEDIAMETADATA as dict; Photos 5+ only""" + if self._db._db_version <= _PHOTOS_4_VERSION: + return {} + # This is a large blob of data so don't load it unless requested asset_table = _DB_TABLE_NAMES[self._db._photos_ver]["ASSET"] sql_cloud_metadata = f""" @@ -1442,10 +1444,6 @@ class PhotoInfo: WHERE {asset_table}.ZUUID = ? """ - if self._db._db_version <= _PHOTOS_4_VERSION: - logger.debug(f"cloud_metadata not implemented for this database version") - return {} - _, cursor = self._db.get_db_connection() metadata = {} if results := cursor.execute(sql_cloud_metadata, (self.uuid,)).fetchone(): @@ -1782,80 +1780,112 @@ class PhotoInfo: def asdict(self): """return dict representation""" - folders = {album.title: album.folder_names for album in self.album_info} - exif = dataclasses.asdict(self.exif_info) if self.exif_info else {} - place = self.place.asdict() if self.place else {} - score = dataclasses.asdict(self.score) if self.score else {} + adjustments = self.adjustments.asdict() if self.adjustments else {} + album_info = [album.asdict() for album in self.album_info] + burst_album_info = [a.asdict() for a in self.burst_album_info] + burst_photos = [p.uuid for p in self.burst_photos] comments = [comment.asdict() for comment in self.comments] + exif_info = dataclasses.asdict(self.exif_info) if self.exif_info else {} + face_info = [face.asdict() for face in self.face_info] + folders = {album.title: album.folder_names for album in self.album_info} + import_info = self.import_info.asdict() if self.import_info else {} likes = [like.asdict() for like in self.likes] - faces = [face.asdict() for face in self.face_info] + person_info = [p.asdict() for p in self.person_info] + place = self.place.asdict() if self.place else {} + project_info = [p.asdict() for p in self.project_info] + score = dataclasses.asdict(self.score) if self.score else {} search_info = self.search_info.asdict() if self.search_info else {} + search_info_normalized = ( + self.search_info_normalized.asdict() if self.search_info_normalized else {} + ) return { - "library": self._db._library_path, - "uuid": self.uuid, - "filename": self.filename, - "original_filename": self.original_filename, + "adjustments": adjustments, + "album_info": album_info, + "albums": self.albums, + "burst_album_info": burst_album_info, + "burst_albums": self.burst_albums, + "burst_default_pick": self.burst_default_pick, + "burst_key": self.burst_key, + "burst_photos": burst_photos, + "burst_selected": self.burst_selected, + "burst": self.burst, + "cloud_guid": self.cloud_guid, + "cloud_metadata": self.cloud_metadata, + "cloud_owner_hashed_id": self.cloud_owner_hashed_id, + "comments": comments, + "date_added": self.date_added, + "date_modified": self.date_modified, + "date_trashed": self.date_trashed, "date": self.date, "description": self.description, - "title": self.title, - "keywords": self.keywords, - "labels": self.labels, - "keywords": self.keywords, - "albums": self.albums, - "folders": folders, - "persons": self.persons, - "faces": faces, - "path": self.path, - "ismissing": self.ismissing, - "hasadjustments": self.hasadjustments, + "exif_info": exif_info, "external_edit": self.external_edit, + "face_info": face_info, "favorite": self.favorite, + "filename": self.filename, + "fingerprint": self.fingerprint, + "folders": folders, + "has_raw": self.has_raw, + "hasadjustments": self.hasadjustments, + "hdr": self.hdr, + "height": self.height, "hidden": self.hidden, - "latitude": self._latitude, - "longitude": self._longitude, - "path_edited": self.path_edited, - "shared": self.shared, - "isphoto": self.isphoto, - "ismovie": self.ismovie, - "uti": self.uti, - "uti_original": self.uti_original, - "burst": self.burst, - "live_photo": self.live_photo, - "path_live_photo": self.path_live_photo, - "iscloudasset": self.iscloudasset, + "import_info": import_info, "incloud": self.incloud, + "intrash": self.intrash, + "iscloudasset": self.iscloudasset, + "ismissing": self.ismissing, + "ismovie": self.ismovie, + "isphoto": self.isphoto, + "israw": self.israw, "isreference": self.isreference, - "date_modified": self.date_modified, + "keywords": self.keywords, + "labels_normalized": self.labels_normalized, + "labels": self.labels, + "latitude": self._latitude, + "library": self._db._library_path, + "likes": likes, + "live_photo": self.live_photo, + "location": self.location, + "longitude": self._longitude, + "orientation": self.orientation, + "original_filename": self.original_filename, + "original_filesize": self.original_filesize, + "original_height": self.original_height, + "original_orientation": self.original_orientation, + "original_width": self.original_width, + "owner": self.owner, + "panorama": self.panorama, + "path_derivatives": self.path_derivatives, + "path_edited_live_photo": self.path_edited_live_photo, + "path_edited": self.path_edited, + "path_live_photo": self.path_live_photo, + "path_raw": self.path_raw, + "path": self.path, + "person_info": person_info, + "persons": self.persons, + "place": place, "portrait": self.portrait, + "project_info": project_info, + "raw_original": self.raw_original, + "score": score, "screenshot": self.screenshot, + "search_info_normalized": search_info_normalized, + "search_info": search_info, + "selfie": self.selfie, + "shared": self.shared, "slow_mo": self.slow_mo, "time_lapse": self.time_lapse, - "hdr": self.hdr, - "selfie": self.selfie, - "panorama": self.panorama, - "has_raw": self.has_raw, - "israw": self.israw, - "raw_original": self.raw_original, + "title": self.title, + "tzoffset": self.tzoffset, + "uti_edited": self.uti_edited, + "uti_original": self.uti_original, "uti_raw": self.uti_raw, - "path_raw": self.path_raw, - "place": place, - "exif": exif, - "score": score, - "intrash": self.intrash, - "height": self.height, + "uti": self.uti, + "uuid": self.uuid, + "visible": self.visible, "width": self.width, - "orientation": self.orientation, - "original_height": self.original_height, - "original_width": self.original_width, - "original_orientation": self.original_orientation, - "original_filesize": self.original_filesize, - "comments": comments, - "likes": likes, - "search_info": search_info, - "fingerprint": self.fingerprint, - "cloud_guid": self.cloud_guid, - "cloud_owner_hashed_id": self.cloud_owner_hashed_id, } def json(self): @@ -1867,8 +1897,44 @@ class PhotoInfo: dict_data = self.asdict() for k, v in dict_data.items(): + # sort lists such as keywords so JSON is consistent + # but do not sort certain items like location + if k in ["location"]: + continue if v and isinstance(v, (list, tuple)) and not isinstance(v[0], dict): - dict_data[k] = sorted(v) + dict_data[k] = sorted(v, key=lambda v: v if v is not None else "") + return json.dumps(dict_data, sort_keys=True, default=default) + + def _json_hexdigest(self): + """JSON for use by hexdigest()""" + + # This differs from json() because hexdigest must not change if metadata changed + # With json(), sort order of lists of dicts is not consistent but these aren't needed + # for computing hexdigest so we can ignore them + # also don't use visible because it changes based on Photos UI state + + def default(o): + if isinstance(o, (datetime.date, datetime.datetime)): + return o.isoformat() + + dict_data = self.asdict() + + for k in [ + "album_info", + "burst_album_info", + "face_info", + "person_info", + "visible", + ]: + del dict_data[k] + + for k, v in dict_data.items(): + # sort lists such as keywords so JSON is consistent + # but do not sort certain items like location + if k in ["location"]: + continue + if v and isinstance(v, (list, tuple)) and not isinstance(v[0], dict): + dict_data[k] = sorted(v, key=lambda v: v if v is not None else "") return json.dumps(dict_data, sort_keys=True, default=default) def __eq__(self, other): @@ -1893,10 +1959,111 @@ class PhotoInfo: class PhotoInfoNone: - """mock class that returns None for all attributes""" + """Mock class that returns None for all attributes""" def __init__(self): pass def __getattribute__(self, name): return None + + +def frozen_photoinfo_factory(photo: PhotoInfo) -> SimpleNamespace: + """Return a frozen SimpleNamespace object for a PhotoInfo object""" + photo_json = photo.json() + + def _object_hook(d: dict[Any, Any]): + if not d: + return d + + # if d key matches a ISO 8601 datetime ('2023-03-24T06:46:57.690786', '2019-07-04T16:24:01-07:00', '2019-07-04T16:24:01+07:00'), convert to datetime + # fromisoformat will also handle dates with timezone offset in form +0700, etc. + for k, v in d.items(): + if isinstance(v, str) and re.match( + r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[.]?\d*[+-]?\d{2}[:]?\d{2}?", v + ): + d[k] = datetime.datetime.fromisoformat(v) + return SimpleNamespace(**d) + + frozen = json.loads(photo_json, object_hook=lambda d: _object_hook(d)) + + # add on json() method to frozen object + def _json(*args): + return photo_json + + frozen.json = _json + + # add hexdigest property to frozen object + frozen.hexdigest = photo.hexdigest + + def detected_text(confidence_threshold=TEXT_DETECTION_CONFIDENCE_THRESHOLD): + """Detects text in photo and returns lists of results as (detected text, confidence) + + confidence_threshold: float between 0.0 and 1.0. If text detection confidence is below this threshold, + text will not be returned. Default is TEXT_DETECTION_CONFIDENCE_THRESHOLD + + If photo is edited, uses the edited photo, otherwise the original; falls back to the preview image if neither edited or original is available + + Returns: list of (detected text, confidence) tuples + """ + + try: + return frozen._detected_text_cache[confidence_threshold] + except (AttributeError, KeyError) as e: + if isinstance(e, AttributeError): + frozen._detected_text_cache = {} + + try: + detected_text = frozen._detected_text() + except Exception as e: + logging.warning(f"Error detecting text in photo {frozen.uuid}: {e}") + detected_text = [] + + frozen._detected_text_cache[confidence_threshold] = [ + (text, confidence) + for text, confidence in detected_text + if confidence >= confidence_threshold + ] + return frozen._detected_text_cache[confidence_threshold] + + def _detected_text(): + """detect text in photo, either from cached extended attribute or by attempting text detection""" + path = ( + frozen.path_edited + if frozen.hasadjustments and frozen.path_edited + else frozen.path + ) + path = path or frozen.path_derivatives[0] if frozen.path_derivatives else None + if not path: + return [] + + md = OSXMetaData(path) + try: + + def decoder(val): + """Decode value from JSON""" + return json.loads(val.decode("utf-8")) + + detected_text = md.get_xattr( + "osxphotos.metadata:detected_text", decode=decoder + ) + except KeyError: + detected_text = None + if detected_text is None: + orientation = frozen.orientation or None + detected_text = detect_text(path, orientation) + + def encoder(obj): + """Encode value as JSON""" + val = json.dumps(obj) + return val.encode("utf-8") + + md.set_xattr( + "osxphotos.metadata:detected_text", detected_text, encode=encoder + ) + return detected_text + + frozen.detected_text = detected_text + frozen._detected_text = _detected_text + + return frozen diff --git a/osxphotos/photosdb/photosdb.py b/osxphotos/photosdb/photosdb.py index 23dba62f..348027b7 100644 --- a/osxphotos/photosdb/photosdb.py +++ b/osxphotos/photosdb/photosdb.py @@ -284,6 +284,9 @@ class PhotosDB: # key is Z_PK of ZMOMENT table and values are the moment info self._db_moment_pk = {} + # Dict to hold data on imports for Photos <= 4 + self._db_import_group = {} + logger.debug(f"dbfile = {dbfile}") if dbfile is None: diff --git a/osxphotos/phototemplate.py b/osxphotos/phototemplate.py index 3a818d70..4c8de213 100644 --- a/osxphotos/phototemplate.py +++ b/osxphotos/phototemplate.py @@ -1352,7 +1352,7 @@ class PhotoTemplate: subfield = subfield.lower() if subfield in exifdict: values = exifdict[subfield] - values = [values] if not isinstance(values, list) else values + values = values if isinstance(values, list) else [values] values = [str(v) for v in values] # sanitize directory names if needed diff --git a/osxphotos/phototz.py b/osxphotos/phototz.py index 2c0a0d0c..91103a1f 100644 --- a/osxphotos/phototz.py +++ b/osxphotos/phototz.py @@ -11,7 +11,7 @@ from typing import Callable, Optional, Tuple from photoscript import Photo from tenacity import retry, stop_after_attempt, wait_exponential -from ._constants import _DB_TABLE_NAMES +from ._constants import _DB_TABLE_NAMES, SQLITE_CHECK_SAME_THREAD from .photosdb.photosdb_utils import get_photos_library_version from .timezones import Timezone from .utils import get_last_library_path, get_system_library_path, noop @@ -67,7 +67,9 @@ class PhotoTimeZone: ON ZADDITIONALASSETATTRIBUTES.ZASSET = {self.ASSET_TABLE}.Z_PK WHERE {self.ASSET_TABLE}.ZUUID = '{uuid}' """ - with sqlite3.connect(self.db_path) as conn: + with sqlite3.connect( + self.db_path, check_same_thread=SQLITE_CHECK_SAME_THREAD + ) as conn: c = conn.cursor() c.execute(sql) results = c.fetchone() @@ -137,7 +139,9 @@ class PhotoTimeZoneUpdater: ON ZADDITIONALASSETATTRIBUTES.ZASSET = {self.ASSET_TABLE}.Z_PK WHERE {self.ASSET_TABLE}.ZUUID = '{uuid}' """ - with sqlite3.connect(self.db_path) as conn: + with sqlite3.connect( + self.db_path, check_same_thread=SQLITE_CHECK_SAME_THREAD + ) as conn: c = conn.cursor() c.execute(sql) results = c.fetchone() @@ -151,7 +155,9 @@ class PhotoTimeZoneUpdater: ZTIMEZONENAME='{self.tz_name}' WHERE Z_PK={z_pk}; """ - with sqlite3.connect(self.db_path) as conn: + with sqlite3.connect( + self.db_path, check_same_thread=SQLITE_CHECK_SAME_THREAD + ) as conn: c = conn.cursor() c.execute(sql_update) conn.commit() diff --git a/osxphotos/sqlite_utils.py b/osxphotos/sqlite_utils.py index 0cdd747d..3a287ad2 100644 --- a/osxphotos/sqlite_utils.py +++ b/osxphotos/sqlite_utils.py @@ -5,14 +5,22 @@ import pathlib import sqlite3 from typing import List, Tuple +from ._constants import SQLITE_CHECK_SAME_THREAD + logger = logging.getLogger("osxphotos") + def sqlite_open_ro(dbname: str) -> Tuple[sqlite3.Connection, sqlite3.Cursor]: """opens sqlite file dbname in read-only mode returns tuple of (connection, cursor)""" try: dbpath = pathlib.Path(dbname).resolve() - conn = sqlite3.connect(f"{dbpath.as_uri()}?mode=ro", timeout=1, uri=True) + conn = sqlite3.connect( + f"{dbpath.as_uri()}?mode=ro", + timeout=1, + uri=True, + check_same_thread=SQLITE_CHECK_SAME_THREAD, + ) c = conn.cursor() except sqlite3.Error as e: raise sqlite3.Error( diff --git a/osxphotos/sqlitekvstore.py b/osxphotos/sqlitekvstore.py index f1053baf..4b4fa0df 100644 --- a/osxphotos/sqlitekvstore.py +++ b/osxphotos/sqlitekvstore.py @@ -9,6 +9,8 @@ from typing import Callable, Dict, Generator, Iterable, Optional, Tuple, TypeVar # keep mypy happy, keys/values can be any type supported by SQLite T = TypeVar("T") +__version__ = "0.3.0" + __all__ = ["SQLiteKVStore"] @@ -41,7 +43,7 @@ class SQLiteKVStore: self._serialize_func = serialize self._deserialize_func = deserialize self._conn = ( - sqlite3.Connection(dbpath) + sqlite3.connect(dbpath) if os.path.exists(dbpath) else self._create_database(dbpath) ) @@ -53,7 +55,7 @@ class SQLiteKVStore: def _create_database(self, dbpath: str): """Create the key-value database""" - conn = sqlite3.Connection(dbpath) + conn = sqlite3.connect(dbpath) cursor = conn.cursor() cursor.execute( """CREATE TABLE IF NOT EXISTS _about ( diff --git a/tests/test_cli.py b/tests/test_cli.py index fa20dbb1..c82a3d14 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -4631,7 +4631,7 @@ def test_export_force_update(): conn = sqlite3.connect(dbpath) c = conn.cursor() except sqlite3.Error as e: - pytest.exit(f"An error occurred opening sqlite file") + pytest.exit("An error occurred opening sqlite file") # photo is IMG_4547.jpg c.execute( diff --git a/tests/test_concurrent_export.py b/tests/test_concurrent_export.py new file mode 100644 index 00000000..8b2d458c --- /dev/null +++ b/tests/test_concurrent_export.py @@ -0,0 +1,66 @@ +""""Test that PhotoInfo.export can export concurrently""" + +import concurrent.futures +import pathlib +import sqlite3 +import tempfile + +import pytest + +import osxphotos + +PHOTOS_DB = "tests/Test-10.15.7.photoslibrary" + + +@pytest.mark.skipif(sqlite3.threadsafety != 3, reason="sqlite3 not threadsafe") +@pytest.mark.parametrize( + "count", range(10) +) # repeat multiple times to try to catch any concurrency errors +def test_concurrent_export(count): + """Test that PhotoInfo.export can export concurrently""" + photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB) + photos = [p for p in photosdb.photos() if not p.ismissing] + + with tempfile.TemporaryDirectory() as tmpdir: + with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor: + futures = [ + executor.submit(p.export, tmpdir, f"{p.uuid}_{p.original_filename}") + for p in photos + ] + exported = [] + for future in concurrent.futures.as_completed(futures): + exported.extend(future.result()) + assert len(exported) == len(photos) + + +@pytest.mark.skipif(sqlite3.threadsafety != 3, reason="sqlite3 not threadsafe") +@pytest.mark.parametrize( + "count", range(10) +) # repeat multiple times to try to catch any concurrency errors +def test_concurrent_export_with_exportdb(count): + """Test that PhotoInfo.export can export concurrently""" + photosdb = osxphotos.PhotosDB(dbfile=PHOTOS_DB) + photos = [p for p in photosdb.photos() if not p.ismissing] + + with tempfile.TemporaryDirectory() as tmpdir: + exportdb = osxphotos.ExportDB(pathlib.Path(tmpdir) / "export.db", tmpdir) + with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor: + futures = [] + for p in photos: + options = osxphotos.ExportOptions() + options.export_db = exportdb + exporter = osxphotos.PhotoExporter(p) + futures.append( + executor.submit( + exporter.export, + tmpdir, + f"{p.uuid}_{p.original_filename}", + options=options, + ) + ) + export_results = osxphotos.photoexporter.ExportResults() + for future in concurrent.futures.as_completed(futures): + export_results += future.result() + + assert len(export_results.exported) == len(photos) + assert len(list(exportdb.get_exported_files())) == len(photos)