Coverage for python/lsst/daf/butler/registry/dimensions/caching.py: 89%
61 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-07 00:58 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-07 00:58 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["CachingDimensionRecordStorage"]
25from collections.abc import Mapping
26from typing import Any
28import sqlalchemy
29from lsst.daf.relation import Join, Relation
30from lsst.utils import doImportType
32from ...core import (
33 DatabaseDimensionElement,
34 DataCoordinate,
35 DimensionRecord,
36 GovernorDimension,
37 NamedKeyMapping,
38)
39from .. import queries
40from ..interfaces import (
41 Database,
42 DatabaseDimensionRecordStorage,
43 GovernorDimensionRecordStorage,
44 StaticTablesContext,
45)
48class CachingDimensionRecordStorage(DatabaseDimensionRecordStorage):
49 """A record storage implementation that adds caching to some other nested
50 storage implementation.
52 Parameters
53 ----------
54 nested : `DatabaseDimensionRecordStorage`
55 The other storage to cache fetches from and to delegate all other
56 operations to.
57 """
59 def __init__(self, nested: DatabaseDimensionRecordStorage):
60 self._nested = nested
61 self._cache: dict[DataCoordinate, DimensionRecord] | None = None
63 @classmethod
64 def initialize(
65 cls,
66 db: Database,
67 element: DatabaseDimensionElement,
68 *,
69 context: StaticTablesContext | None = None,
70 config: Mapping[str, Any],
71 governors: NamedKeyMapping[GovernorDimension, GovernorDimensionRecordStorage],
72 view_target: DatabaseDimensionRecordStorage | None = None,
73 ) -> DatabaseDimensionRecordStorage:
74 # Docstring inherited from DatabaseDimensionRecordStorage.
75 config = config["nested"]
76 NestedClass = doImportType(config["cls"])
77 if not hasattr(NestedClass, "initialize"): 77 ↛ 78line 77 didn't jump to line 78, because the condition on line 77 was never true
78 raise TypeError(f"Nested class {config['cls']} does not have an initialize() method.")
79 nested = NestedClass.initialize(
80 db, element, context=context, config=config, governors=governors, view_target=view_target
81 )
82 if view_target is not None:
83 # Caching records that are really a view into another element's
84 # records is problematic, because the caching code has no way of
85 # intercepting changes to its target's records. Instead of
86 # inventing a callback system to address that directly or dealing
87 # with an untrustworthy combination, we just ban this combination.
88 # But there's a problem: this is how we've configured the default
89 # dimension universe from the beginning, with the 'band' dimension
90 # being a cached view into physical_filter, and we don't want to
91 # break all those configurations.
92 if isinstance(view_target, CachingDimensionRecordStorage): 92 ↛ 100line 92 didn't jump to line 100, because the condition on line 92 was never false
93 # Happily, there's a way out: if the view target's record
94 # storage is _also_ cached, then this outer caching is pretty
95 # thoroughly unnecessary as well as problematic, and it's
96 # reasonable to silently drop it, by returning the nested
97 # storage object instead of a new caching wrapper. And this
98 # too is the case with the default dimension configuration.
99 return nested
100 raise RuntimeError(
101 f"Invalid dimension storage configuration: cannot cache dimension element {element} "
102 f"that is itself a view of {view_target.element}."
103 )
104 return cls(nested)
106 @property
107 def element(self) -> DatabaseDimensionElement:
108 # Docstring inherited from DimensionRecordStorage.element.
109 return self._nested.element
111 def clearCaches(self) -> None:
112 # Docstring inherited from DimensionRecordStorage.clearCaches.
113 self._cache = None
114 self._nested.clearCaches()
116 def make_relation(self, context: queries.SqlQueryContext) -> Relation:
117 # Docstring inherited.
118 return self._nested.make_relation(context)
120 def insert(self, *records: DimensionRecord, replace: bool = False, skip_existing: bool = False) -> None:
121 # Docstring inherited from DimensionRecordStorage.insert.
122 self._nested.insert(*records, replace=replace, skip_existing=skip_existing)
123 if self._cache is not None:
124 for record in records:
125 # We really shouldn't ever get into a situation where the
126 # record here differs from the one in the DB, but the last
127 # thing we want is to make it harder to debug by making the
128 # cache different from the DB.
129 if skip_existing: 129 ↛ 132line 129 didn't jump to line 132, because the condition on line 129 was never false
130 self._cache.setdefault(record.dataId, record)
131 else:
132 self._cache[record.dataId] = record
134 def sync(self, record: DimensionRecord, update: bool = False) -> bool | dict[str, Any]:
135 # Docstring inherited from DimensionRecordStorage.sync.
136 inserted_or_updated = self._nested.sync(record, update=update)
137 if self._cache is not None and inserted_or_updated: 137 ↛ 138line 137 didn't jump to line 138, because the condition on line 137 was never true
138 self._cache[record.dataId] = record
139 return inserted_or_updated
141 def fetch_one(self, data_id: DataCoordinate, context: queries.SqlQueryContext) -> DimensionRecord | None:
142 # Docstring inherited from DimensionRecordStorage.
143 cache = self.get_record_cache(context)
144 return cache.get(data_id)
146 def get_record_cache(self, context: queries.SqlQueryContext) -> Mapping[DataCoordinate, DimensionRecord]:
147 # Docstring inherited.
148 if self._cache is None:
149 relation = self._nested.join(
150 context.make_initial_relation(),
151 Join(),
152 context,
153 )
154 reader = queries.DimensionRecordReader(self.element)
155 cache: dict[DataCoordinate, DimensionRecord] = {}
156 for row in context.fetch_iterable(relation):
157 record = reader.read(row)
158 cache[record.dataId] = record
159 self._cache = cache
160 return self._cache
162 def digestTables(self) -> list[sqlalchemy.schema.Table]:
163 # Docstring inherited from DimensionRecordStorage.digestTables.
164 return self._nested.digestTables()