Coverage for python/lsst/pipe/base/_dataset_handle.py: 19%
89 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-01-30 10:51 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-01-30 10:51 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ["InMemoryDatasetHandle"]
31import dataclasses
32from typing import Any, cast
34from frozendict import frozendict
35from lsst.daf.butler import (
36 DataCoordinate,
37 DataId,
38 DimensionUniverse,
39 StorageClass,
40 StorageClassDelegate,
41 StorageClassFactory,
42)
45# Use an empty dataID as a default.
46def _default_dataId() -> DataCoordinate:
47 return DataCoordinate.make_empty(DimensionUniverse())
50@dataclasses.dataclass(frozen=True, init=False)
51class InMemoryDatasetHandle:
52 """An in-memory version of a `~lsst.daf.butler.DeferredDatasetHandle`.
54 Parameters
55 ----------
56 inMemoryDataset : `~typing.Any`
57 The dataset to be used by this handle.
58 storageClass : `~lsst.daf.butler.StorageClass` or `None`, optional
59 The storage class associated with the in-memory dataset. If `None`
60 and if a storage class is needed, an attempt will be made to work one
61 out from the underlying python type.
62 parameters : `dict` [`str`, `~typing.Any`]
63 Parameters to be used with `get`.
64 dataId : `~lsst.daf.butler.DataId` or `None`, optional
65 The dataId associated with this dataset. Only used for compatibility
66 with the Butler implementation. Can be used for logging messages
67 by calling code. If ``dataId`` is not specified, a default empty
68 dataId will be constructed.
69 copy : `bool`, optional
70 Whether to copy on `get` or not.
71 **kwargs : `~typing.Any`
72 If ``kwargs`` are provided without specifying a ``dataId``, those
73 parameters will be converted into a dataId-like entity.
74 """
76 _empty = DataCoordinate.make_empty(DimensionUniverse())
78 def __init__(
79 self,
80 inMemoryDataset: Any,
81 *,
82 storageClass: StorageClass | None = None,
83 parameters: dict[str, Any] | None = None,
84 dataId: DataId | None = None,
85 copy: bool = False,
86 **kwargs: Any,
87 ):
88 object.__setattr__(self, "inMemoryDataset", inMemoryDataset)
89 object.__setattr__(self, "storageClass", storageClass)
90 object.__setattr__(self, "parameters", parameters)
91 object.__setattr__(self, "copy", copy)
92 # Need to be able to construct a dataId from kwargs for convenience.
93 # This will not be a full DataCoordinate.
94 if dataId is None:
95 if kwargs:
96 dataId = frozendict(kwargs)
97 else:
98 dataId = self._empty
99 elif kwargs:
100 if isinstance(dataId, DataCoordinate):
101 dataId = DataCoordinate.standardize(kwargs, defaults=dataId, universe=dataId.universe)
102 else:
103 new = dict(dataId)
104 new.update(kwargs)
105 dataId = frozendict(new)
106 object.__setattr__(self, "dataId", dataId)
108 def get(
109 self,
110 *,
111 component: str | None = None,
112 parameters: dict | None = None,
113 storageClass: str | StorageClass | None = None,
114 ) -> Any:
115 """Retrieve the dataset pointed to by this handle.
117 This handle may be used multiple times, possibly with different
118 parameters.
120 Parameters
121 ----------
122 component : `str` or None
123 If the deferred object is a component dataset type, this parameter
124 may specify the name of the component to use in the get operation.
125 parameters : `dict` or None
126 The parameters argument will be passed to the butler get method.
127 It defaults to `None`. If the value is not `None`, this `dict` will
128 be merged with the parameters dict used to construct the
129 `~lsst.daf.butler.DeferredDatasetHandle` class.
130 storageClass : `~lsst.daf.butler.StorageClass` or `str`, optional
131 The storage class to be used to override the Python type
132 returned by this method. By default the returned type matches
133 the type stored. Specifying a read `~lsst.daf.butler.StorageClass`
134 can force a different type to be returned.
135 This type must be compatible with the original type.
137 Returns
138 -------
139 return : `object`
140 The dataset pointed to by this handle. Whether this returns the
141 original object or a copy is controlled by the ``copy`` property
142 of the handle that is set at handle construction time.
143 If the stored object is `None` this method always returns `None`
144 regardless of any component request or parameters.
146 Raises
147 ------
148 KeyError
149 Raised if a component or parameters are used but no storage
150 class can be found.
151 """
152 if self.inMemoryDataset is None:
153 return None
155 if self.parameters is not None:
156 mergedParameters = self.parameters.copy()
157 if parameters is not None:
158 mergedParameters.update(parameters)
159 elif parameters is not None:
160 mergedParameters = parameters
161 else:
162 mergedParameters = {}
164 returnStorageClass: StorageClass | None = None
165 if storageClass:
166 if isinstance(storageClass, str):
167 factory = StorageClassFactory()
168 returnStorageClass = factory.getStorageClass(storageClass)
169 else:
170 returnStorageClass = storageClass
172 inMemoryDataset = self.inMemoryDataset
174 if self.copy:
175 # An optimization might be to defer copying until any components
176 # and parameters have been applied. This can be a problem since
177 # most component storage classes do not bother to define a
178 # storage class delegate and the default delegate uses deepcopy()
179 # which can fail if explicit support for deepcopy() is missing
180 # or pickle does not work.
181 # Copying will require a storage class be determined, which is
182 # not normally required for the default case of no parameters and
183 # no components.
184 thisStorageClass = self._getStorageClass()
185 try:
186 delegate = thisStorageClass.delegate()
187 except TypeError:
188 # Try the default copy options if no delegate is available.
189 delegate = StorageClassDelegate(thisStorageClass)
191 inMemoryDataset = delegate.copy(inMemoryDataset)
193 if component or mergedParameters:
194 # This requires a storage class look up to locate the delegate
195 # class.
196 thisStorageClass = self._getStorageClass()
198 # Parameters for derived components are applied against the
199 # composite.
200 if component in thisStorageClass.derivedComponents:
201 # For some reason MyPy doesn't see the line above as narrowing
202 # 'component' from 'str | None' to 'str'.
203 component = cast(str, component)
204 thisStorageClass.validateParameters(parameters)
206 # Process the parameters (hoping this never modified the
207 # original object).
208 inMemoryDataset = thisStorageClass.delegate().handleParameters(
209 inMemoryDataset, mergedParameters
210 )
211 mergedParameters = {} # They have now been used
213 readStorageClass = thisStorageClass.derivedComponents[component]
214 else:
215 if component:
216 readStorageClass = thisStorageClass.components[component]
217 else:
218 readStorageClass = thisStorageClass
219 readStorageClass.validateParameters(mergedParameters)
221 if component:
222 inMemoryDataset = thisStorageClass.delegate().getComponent(inMemoryDataset, component)
224 if mergedParameters:
225 inMemoryDataset = readStorageClass.delegate().handleParameters(
226 inMemoryDataset, mergedParameters
227 )
229 if returnStorageClass:
230 return returnStorageClass.coerce_type(inMemoryDataset)
231 return inMemoryDataset
232 else:
233 # If there are no parameters or component requests the object
234 # can be returned as is, but possibly with conversion.
235 if returnStorageClass:
236 return returnStorageClass.coerce_type(inMemoryDataset)
237 return inMemoryDataset
239 def _getStorageClass(self) -> StorageClass:
240 """Return the relevant storage class.
242 Returns
243 -------
244 storageClass : `~lsst.daf.butler.StorageClass`
245 The storage class associated with this handle, or one derived
246 from the python type of the stored object.
248 Raises
249 ------
250 KeyError
251 Raised if the storage class could not be found.
252 """
253 factory = StorageClassFactory()
254 if self.storageClass:
255 return factory.getStorageClass(self.storageClass)
257 # Need to match python type.
258 pytype = type(self.inMemoryDataset)
259 return factory.findStorageClass(pytype)
261 inMemoryDataset: Any
262 """The object to store in this dataset handle for later retrieval.
263 """
265 dataId: DataCoordinate | frozendict
266 """The `~lsst.daf.butler.DataCoordinate` associated with this dataset
267 handle.
268 """
270 storageClass: str | None = None
271 """The name of the `~lsst.daf.butler.StorageClass` associated with this
272 dataset.
274 If `None`, the storage class will be looked up from the factory.
275 """
277 parameters: dict | None = None
278 """Optional parameters that may be used to specify a subset of the dataset
279 to be loaded (`dict` or `None`).
280 """
282 copy: bool = False
283 """Control whether a copy of the in-memory dataset is returned for every
284 call to `get()`."""