Coverage for python/lsst/daf/butler/core/persistenceContext.py: 53%
54 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 07:59 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 07:59 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("PersistenceContextVars",)
33import uuid
34from collections.abc import Callable, Hashable
35from contextvars import Context, ContextVar, Token, copy_context
36from typing import TYPE_CHECKING, ParamSpec, TypeVar, cast
38if TYPE_CHECKING:
39 from .datasets.ref import DatasetRef
40 from .datasets.type import DatasetType, SerializedDatasetType
41 from .datastoreRecordData import DatastoreRecordData
42 from .dimensions._coordinate import DataCoordinate, SerializedDataCoordinate
43 from .dimensions._records import DimensionRecord, SerializedDimensionRecord
45_T = TypeVar("_T")
46_V = TypeVar("_V")
48_P = ParamSpec("_P")
49_Q = ParamSpec("_Q")
52class PersistenceContextVars:
53 r"""Helper class for deserializing butler data structures.
55 When serializing various butler data structures nested dataset types get
56 serialized independently. This means what were multiple references to the
57 same object in memory are all duplicated in the serialization process.
59 Upon deserialization multiple independent data structures are created to
60 represent the same logical bit of data.
62 This class can be used to remove this duplication by caching objects as
63 they are created and returning a reference to that object. This is done in
64 concert with ``direct`` and ``from_simple`` methods on the various butler
65 dataset structures.
67 This class utilizes class level variables as a form of global state. Each
68 of the various data structures can look to see if these global caches has
69 been initialized as a cache (a dictionary) or is in the default None state.
71 Users of this class are intended to create an instance, and then call the
72 `run` method, supplying a callable function, and passing any required
73 arguments. The `run` method then creates a specific execution context,
74 initializing the caches, and then runs the supplied function. Upon
75 completion of the function call, the caches are cleared and returned to the
76 default state.
78 This process is thread safe.
80 Notes
81 -----
82 Caches of `SerializedDatasetRef`\ s are intentionally left out. It was
83 discovered that these caused excessive python memory allocations which
84 though cleaned up upon completion, left the process using more memory than
85 it otherwise needed as python does not return allocated memory to the OS
86 until process completion. It was determined the runtime cost of recreating
87 the `SerializedDatasetRef`\ s was worth the memory savings.
88 """
90 serializedDatasetTypeMapping: ContextVar[
91 dict[tuple[str, str], SerializedDatasetType] | None
92 ] = ContextVar("serializedDatasetTypeMapping", default=None)
93 r"""A cache of `SerializedDatasetType`\ s.
94 """
96 serializedDataCoordinateMapping: ContextVar[
97 dict[tuple[frozenset, bool], SerializedDataCoordinate] | None
98 ] = ContextVar("serializedDataCoordinateMapping", default=None)
99 r"""A cache of `SerializedDataCoordinate`\ s.
100 """
102 serializedDimensionRecordMapping: ContextVar[
103 dict[tuple[str, frozenset] | tuple[int, DataCoordinate], SerializedDimensionRecord] | None
104 ] = ContextVar("serializedDimensionRecordMapping", default=None)
105 r"""A cache of `SerializedDimensionRecord`\ s.
106 """
108 loadedTypes: ContextVar[dict[tuple[str, str], DatasetType] | None] = ContextVar(
109 "loadedTypes", default=None
110 )
111 r"""A cache of `DatasetType`\ s.
112 """
114 dataCoordinates: ContextVar[dict[tuple[frozenset, bool], DataCoordinate] | None] = ContextVar(
115 "dataCoordinates", default=None
116 )
117 r"""A cache of `DataCoordinate`\ s.
118 """
120 datasetRefs: ContextVar[dict[tuple[int, str], DatasetRef] | None] = ContextVar(
121 "datasetRefs", default=None
122 )
123 r"""A cache of `DatasetRef`\ s.
124 """
126 dimensionRecords: ContextVar[dict[Hashable, DimensionRecord] | None] = ContextVar(
127 "dimensionRecords", default=None
128 )
129 r"""A cache of `DimensionRecord`\ s.
130 """
132 dataStoreRecords: ContextVar[dict[frozenset[str | uuid.UUID], DatastoreRecordData] | None] = ContextVar(
133 "dataStoreRecords", default=None
134 )
135 r"""A cache of `DatastoreRecordData` objects.
136 """
138 @classmethod
139 def _getContextVars(cls) -> dict[str, ContextVar]:
140 """Build a dictionary of names to caches declared at class scope."""
141 classAttributes: dict[str, ContextVar] = {}
142 for k in vars(cls):
143 v = getattr(cls, k)
144 # filter out callables and private attributes
145 if not callable(v) and not k.startswith("__"):
146 classAttributes[k] = v
147 return classAttributes
149 def __init__(self) -> None:
150 self._ctx: Context | None = None
151 self._tokens: dict[str, Token] | None = None
153 def _functionRunner(self, function: Callable[_P, _V], *args: _P.args, **kwargs: _P.kwargs) -> _V:
154 # create a storage space for the tokens returned from setting the
155 # context variables
156 self._tokens = {}
158 # Set each cache to an empty dictionary and record the token returned
159 # by this operation.
160 for name, attribute in self._getContextVars().items():
161 self._tokens[name] = attribute.set({})
163 # Call the supplied function and record the result
164 result = function(*args, **kwargs)
166 # Reset all the context variables back to the state they were in before
167 # this function was run.
168 persistenceVars = self._getContextVars()
169 assert self._tokens is not None
170 for name, token in self._tokens.items():
171 attribute = persistenceVars[name]
172 attribute.reset(token)
173 self._tokens = None
174 return result
176 def run(self, function: Callable[_Q, _T], *args: _Q.args, **kwargs: _Q.kwargs) -> _T:
177 """Execute the supplied function inside context specific caches.
179 Parameters
180 ----------
181 function : `Callable`
182 A callable which is to be executed inside a specific context.
183 *args : tuple
184 Positional arguments which are to be passed to the `Callable`
185 **kwargs: dict, optional
186 Extra key word arguments which are to be passed to the `Callable`
188 Returns
189 -------
190 result : `Any`
191 The result returned by executing the supplied `Callable`
192 """
193 self._ctx = copy_context()
194 # Type checkers seem to have trouble with a second layer nesting of
195 # parameter specs in callables, so ignore the call here and explicitly
196 # cast the result as we know this is exactly what the return type will
197 # be.
198 result = self._ctx.run(self._functionRunner, function, *args, **kwargs) # type: ignore
199 return cast(_T, result)