Coverage for python/lsst/daf/butler/persistence_context.py: 53%
54 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-16 02:57 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-16 02:57 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("PersistenceContextVars",)
33import uuid
34from collections.abc import Callable, Hashable
35from contextvars import Context, ContextVar, Token, copy_context
36from typing import TYPE_CHECKING, ParamSpec, TypeVar
38if TYPE_CHECKING:
39 from ._dataset_ref import DatasetRef
40 from ._dataset_type import DatasetType, SerializedDatasetType
41 from .datastore.record_data import DatastoreRecordData
42 from .dimensions._coordinate import DataCoordinate, SerializedDataCoordinate
43 from .dimensions._records import DimensionRecord, SerializedDimensionRecord
45_T = TypeVar("_T")
46_V = TypeVar("_V")
48_P = ParamSpec("_P")
49_Q = ParamSpec("_Q")
52class PersistenceContextVars:
53 r"""Helper class for deserializing butler data structures.
55 When serializing various butler data structures nested dataset types get
56 serialized independently. This means what were multiple references to the
57 same object in memory are all duplicated in the serialization process.
59 Upon deserialization multiple independent data structures are created to
60 represent the same logical bit of data.
62 This class can be used to remove this duplication by caching objects as
63 they are created and returning a reference to that object. This is done in
64 concert with ``direct`` and ``from_simple`` methods on the various butler
65 dataset structures.
67 This class utilizes class level variables as a form of global state. Each
68 of the various data structures can look to see if these global caches has
69 been initialized as a cache (a dictionary) or is in the default None state.
71 Users of this class are intended to create an instance, and then call the
72 `run` method, supplying a callable function, and passing any required
73 arguments. The `run` method then creates a specific execution context,
74 initializing the caches, and then runs the supplied function. Upon
75 completion of the function call, the caches are cleared and returned to the
76 default state.
78 This process is thread safe.
80 Notes
81 -----
82 Caches of `SerializedDatasetRef`\ s are intentionally left out. It was
83 discovered that these caused excessive python memory allocations which
84 though cleaned up upon completion, left the process using more memory than
85 it otherwise needed as python does not return allocated memory to the OS
86 until process completion. It was determined the runtime cost of recreating
87 the `SerializedDatasetRef`\ s was worth the memory savings.
88 """
90 serializedDatasetTypeMapping: ContextVar[dict[tuple[str, str], SerializedDatasetType] | None] = (
91 ContextVar("serializedDatasetTypeMapping", default=None)
92 )
93 r"""A cache of `SerializedDatasetType`\ s.
94 """
96 serializedDataCoordinateMapping: ContextVar[
97 dict[tuple[frozenset, bool], SerializedDataCoordinate] | None
98 ] = ContextVar("serializedDataCoordinateMapping", default=None)
99 r"""A cache of `SerializedDataCoordinate`\ s.
100 """
102 serializedDimensionRecordMapping: ContextVar[
103 dict[tuple[str, frozenset] | tuple[int, DataCoordinate], SerializedDimensionRecord] | None
104 ] = ContextVar("serializedDimensionRecordMapping", default=None)
105 r"""A cache of `SerializedDimensionRecord`\ s.
106 """
108 loadedTypes: ContextVar[dict[tuple[str, str], DatasetType] | None] = ContextVar(
109 "loadedTypes", default=None
110 )
111 r"""A cache of `DatasetType`\ s.
112 """
114 dataCoordinates: ContextVar[dict[tuple[frozenset, bool], DataCoordinate] | None] = ContextVar(
115 "dataCoordinates", default=None
116 )
117 r"""A cache of `DataCoordinate`\ s.
118 """
120 datasetRefs: ContextVar[dict[int, DatasetRef] | None] = ContextVar("datasetRefs", default=None)
121 r"""A cache of `DatasetRef`\ s.
123 Keys are UUID converted to int, but only refs of parent dataset types are
124 cached AND THE STORAGE CLASS IS UNSPECIFIED; consumers of this cache must
125 call overrideStorageClass on the result.
126 """
128 dimensionRecords: ContextVar[dict[Hashable, DimensionRecord] | None] = ContextVar(
129 "dimensionRecords", default=None
130 )
131 r"""A cache of `DimensionRecord`\ s.
132 """
134 dataStoreRecords: ContextVar[dict[frozenset[str | uuid.UUID], DatastoreRecordData] | None] = ContextVar(
135 "dataStoreRecords", default=None
136 )
137 r"""A cache of `DatastoreRecordData` objects.
138 """
140 @classmethod
141 def _getContextVars(cls) -> dict[str, ContextVar]:
142 """Build a dictionary of names to caches declared at class scope."""
143 classAttributes: dict[str, ContextVar] = {}
144 for k in vars(cls):
145 v = getattr(cls, k)
146 # filter out callables and private attributes
147 if not callable(v) and not k.startswith("__"):
148 classAttributes[k] = v
149 return classAttributes
151 def __init__(self) -> None:
152 self._ctx: Context | None = None
153 self._tokens: dict[str, Token] | None = None
155 def _functionRunner(self, function: Callable[_P, _V], *args: _P.args, **kwargs: _P.kwargs) -> _V:
156 # create a storage space for the tokens returned from setting the
157 # context variables
158 self._tokens = {}
160 # Set each cache to an empty dictionary and record the token returned
161 # by this operation.
162 for name, attribute in self._getContextVars().items():
163 self._tokens[name] = attribute.set({})
165 # Call the supplied function and record the result
166 result = function(*args, **kwargs)
168 # Reset all the context variables back to the state they were in before
169 # this function was run.
170 persistenceVars = self._getContextVars()
171 assert self._tokens is not None
172 for name, token in self._tokens.items():
173 attribute = persistenceVars[name]
174 attribute.reset(token)
175 self._tokens = None
176 return result
178 def run(self, function: Callable[_Q, _T], *args: _Q.args, **kwargs: _Q.kwargs) -> _T:
179 """Execute the supplied function inside context specific caches.
181 Parameters
182 ----------
183 function : `Callable`
184 A callable which is to be executed inside a specific context.
185 *args : tuple
186 Positional arguments which are to be passed to the `Callable`.
187 **kwargs : dict, optional
188 Extra key word arguments which are to be passed to the `Callable`.
190 Returns
191 -------
192 result : `Any`
193 The result returned by executing the supplied `Callable`.
194 """
195 self._ctx = copy_context()
196 # Type checkers seem to have trouble with a second layer nesting of
197 # parameter specs in callables, so ignore the call here and explicitly
198 # cast the result as we know this is exactly what the return type will
199 # be.
200 result = self._ctx.run(self._functionRunner, function, *args, **kwargs) # type: ignore
201 return result