Coverage for python/lsst/daf/butler/core/persistenceContext.py: 53%
54 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-12 09:19 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-12 09:19 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("PersistenceContextVars",)
27import uuid
28from collections.abc import Callable, Hashable
29from contextvars import Context, ContextVar, Token, copy_context
30from typing import TYPE_CHECKING, ParamSpec, TypeVar, cast
32if TYPE_CHECKING:
33 from .datasets.ref import DatasetRef
34 from .datasets.type import DatasetType, SerializedDatasetType
35 from .datastoreRecordData import DatastoreRecordData
36 from .dimensions._coordinate import DataCoordinate, SerializedDataCoordinate
37 from .dimensions._records import DimensionRecord, SerializedDimensionRecord
39_T = TypeVar("_T")
40_V = TypeVar("_V")
42_P = ParamSpec("_P")
43_Q = ParamSpec("_Q")
46class PersistenceContextVars:
47 r"""Helper class for deserializing butler data structures.
49 When serializing various butler data structures nested dataset types get
50 serialized independently. This means what were multiple references to the
51 same object in memory are all duplicated in the serialization process.
53 Upon deserialization multiple independent data structures are created to
54 represent the same logical bit of data.
56 This class can be used to remove this duplication by caching objects as
57 they are created and returning a reference to that object. This is done in
58 concert with ``direct`` and ``from_simple`` methods on the various butler
59 dataset structures.
61 This class utilizes class level variables as a form of global state. Each
62 of the various data structures can look to see if these global caches has
63 been initialized as a cache (a dictionary) or is in the default None state.
65 Users of this class are intended to create an instance, and then call the
66 `run` method, supplying a callable function, and passing any required
67 arguments. The `run` method then creates a specific execution context,
68 initializing the caches, and then runs the supplied function. Upon
69 completion of the function call, the caches are cleared and returned to the
70 default state.
72 This process is thread safe.
74 Notes
75 -----
76 Caches of `SerializedDatasetRef`\ s are intentionally left out. It was
77 discovered that these caused excessive python memory allocations which
78 though cleaned up upon completion, left the process using more memory than
79 it otherwise needed as python does not return allocated memory to the OS
80 until process completion. It was determined the runtime cost of recreating
81 the `SerializedDatasetRef`\ s was worth the memory savings.
82 """
84 serializedDatasetTypeMapping: ContextVar[
85 dict[tuple[str, str], SerializedDatasetType] | None
86 ] = ContextVar("serializedDatasetTypeMapping", default=None)
87 r"""A cache of `SerializedDatasetType`\ s.
88 """
90 serializedDataCoordinateMapping: ContextVar[
91 dict[tuple[frozenset, bool], SerializedDataCoordinate] | None
92 ] = ContextVar("serializedDataCoordinateMapping", default=None)
93 r"""A cache of `SerializedDataCoordinate`\ s.
94 """
96 serializedDimensionRecordMapping: ContextVar[
97 dict[tuple[str, frozenset], SerializedDimensionRecord] | None
98 ] = ContextVar("serializedDimensionRecordMapping", default=None)
99 r"""A cache of `SerializedDimensionRecord`\ s.
100 """
102 loadedTypes: ContextVar[dict[tuple[str, str], DatasetType] | None] = ContextVar(
103 "loadedTypes", default=None
104 )
105 r"""A cache of `DatasetType`\ s.
106 """
108 dataCoordinates: ContextVar[dict[tuple[frozenset, bool], DataCoordinate] | None] = ContextVar(
109 "dataCoordinates", default=None
110 )
111 r"""A cache of `DataCoordinate`\ s.
112 """
114 datasetRefs: ContextVar[dict[tuple[int, str], DatasetRef] | None] = ContextVar(
115 "datasetRefs", default=None
116 )
117 r"""A cache of `DatasetRef`\ s.
118 """
120 dimensionRecords: ContextVar[dict[Hashable, DimensionRecord] | None] = ContextVar(
121 "dimensionRecords", default=None
122 )
123 r"""A cache of `DimensionRecord`\ s.
124 """
126 dataStoreRecords: ContextVar[dict[frozenset[str | uuid.UUID], DatastoreRecordData] | None] = ContextVar(
127 "dataStoreRecords", default=None
128 )
129 r"""A cache of `DatastoreRecordData` objects.
130 """
132 @classmethod
133 def _getContextVars(cls) -> dict[str, ContextVar]:
134 """Build a dictionary of names to caches declared at class scope."""
135 classAttributes: dict[str, ContextVar] = {}
136 for k in vars(cls):
137 v = getattr(cls, k)
138 # filter out callables and private attributes
139 if not callable(v) and not k.startswith("__"):
140 classAttributes[k] = v
141 return classAttributes
143 def __init__(self) -> None:
144 self._ctx: Context | None = None
145 self._tokens: dict[str, Token] | None = None
147 def _functionRunner(self, function: Callable[_P, _V], *args: _P.args, **kwargs: _P.kwargs) -> _V:
148 # create a storage space for the tokens returned from setting the
149 # context variables
150 self._tokens = {}
152 # Set each cache to an empty dictionary and record the token returned
153 # by this operation.
154 for name, attribute in self._getContextVars().items():
155 self._tokens[name] = attribute.set({})
157 # Call the supplied function and record the result
158 result = function(*args, **kwargs)
160 # Reset all the context variables back to the state they were in before
161 # this function was run.
162 persistenceVars = self._getContextVars()
163 assert self._tokens is not None
164 for name, token in self._tokens.items():
165 attribute = persistenceVars[name]
166 attribute.reset(token)
167 self._tokens = None
168 return result
170 def run(self, function: Callable[_Q, _T], *args: _Q.args, **kwargs: _Q.kwargs) -> _T:
171 """Execute the supplied function inside context specific caches.
173 Parameters
174 ----------
175 function : `Callable`
176 A callable which is to be executed inside a specific context.
177 *args : tuple
178 Positional arguments which are to be passed to the `Callable`
179 **kwargs: dict, optional
180 Extra key word arguments which are to be passed to the `Callable`
182 Returns
183 -------
184 result : `Any`
185 The result returned by executing the supplied `Callable`
186 """
187 self._ctx = copy_context()
188 # Type checkers seem to have trouble with a second layer nesting of
189 # parameter specs in callables, so ignore the call here and explicitly
190 # cast the result as we know this is exactly what the return type will
191 # be.
192 result = self._ctx.run(self._functionRunner, function, *args, **kwargs) # type: ignore
193 return cast(_T, result)