Coverage for python/lsst/pipe/base/_dataset_handle.py: 19%
89 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-25 09:14 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-25 09:14 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["InMemoryDatasetHandle"]
25import dataclasses
26from typing import Any, cast
28from frozendict import frozendict
29from lsst.daf.butler import (
30 DataCoordinate,
31 DataId,
32 DimensionUniverse,
33 StorageClass,
34 StorageClassDelegate,
35 StorageClassFactory,
36)
39# Use an empty dataID as a default.
40def _default_dataId() -> DataCoordinate:
41 return DataCoordinate.makeEmpty(DimensionUniverse())
44@dataclasses.dataclass(frozen=True, init=False)
45class InMemoryDatasetHandle:
46 """An in-memory version of a `~lsst.daf.butler.DeferredDatasetHandle`.
48 If ``dataId`` is not specified, a default empty dataId will be constructed.
49 If ``kwargs`` are provided without specifying a ``dataId``, those
50 parameters will be converted into a dataId-like entity.
51 """
53 _empty = DataCoordinate.makeEmpty(DimensionUniverse())
55 def __init__(
56 self,
57 inMemoryDataset: Any,
58 *,
59 storageClass: StorageClass | None = None,
60 parameters: dict[str, Any] | None = None,
61 dataId: DataId | None = None,
62 copy: bool = False,
63 **kwargs: Any,
64 ):
65 object.__setattr__(self, "inMemoryDataset", inMemoryDataset)
66 object.__setattr__(self, "storageClass", storageClass)
67 object.__setattr__(self, "parameters", parameters)
68 object.__setattr__(self, "copy", copy)
69 # Need to be able to construct a dataId from kwargs for convenience.
70 # This will not be a full DataCoordinate.
71 if dataId is None:
72 if kwargs:
73 dataId = frozendict(kwargs)
74 else:
75 dataId = self._empty
76 elif kwargs:
77 if isinstance(dataId, DataCoordinate):
78 dataId = DataCoordinate.standardize(kwargs, defaults=dataId, universe=dataId.universe)
79 else:
80 new = dict(dataId)
81 new.update(kwargs)
82 dataId = frozendict(new)
83 object.__setattr__(self, "dataId", dataId)
85 def get(
86 self,
87 *,
88 component: str | None = None,
89 parameters: dict | None = None,
90 storageClass: str | StorageClass | None = None,
91 ) -> Any:
92 """Retrieve the dataset pointed to by this handle.
94 This handle may be used multiple times, possibly with different
95 parameters.
97 Parameters
98 ----------
99 component : `str` or None
100 If the deferred object is a component dataset type, this parameter
101 may specify the name of the component to use in the get operation.
102 parameters : `dict` or None
103 The parameters argument will be passed to the butler get method.
104 It defaults to `None`. If the value is not `None`, this `dict` will
105 be merged with the parameters dict used to construct the
106 `~lsst.daf.butler.DeferredDatasetHandle` class.
107 storageClass : `~lsst.daf.butler.StorageClass` or `str`, optional
108 The storage class to be used to override the Python type
109 returned by this method. By default the returned type matches
110 the type stored. Specifying a read `~lsst.daf.butler.StorageClass`
111 can force a different type to be returned.
112 This type must be compatible with the original type.
114 Returns
115 -------
116 return : `object`
117 The dataset pointed to by this handle. Whether this returns the
118 original object or a copy is controlled by the ``copy`` property
119 of the handle that is set at handle construction time.
120 If the stored object is `None` this method always returns `None`
121 regardless of any component request or parameters.
123 Raises
124 ------
125 KeyError
126 Raised if a component or parameters are used but no storage
127 class can be found.
128 """
129 if self.inMemoryDataset is None:
130 return None
132 if self.parameters is not None:
133 mergedParameters = self.parameters.copy()
134 if parameters is not None:
135 mergedParameters.update(parameters)
136 elif parameters is not None:
137 mergedParameters = parameters
138 else:
139 mergedParameters = {}
141 returnStorageClass: StorageClass | None = None
142 if storageClass:
143 if isinstance(storageClass, str):
144 factory = StorageClassFactory()
145 returnStorageClass = factory.getStorageClass(storageClass)
146 else:
147 returnStorageClass = storageClass
149 inMemoryDataset = self.inMemoryDataset
151 if self.copy:
152 # An optimization might be to defer copying until any components
153 # and parameters have been applied. This can be a problem since
154 # most component storage classes do not bother to define a
155 # storage class delegate and the default delegate uses deepcopy()
156 # which can fail if explicit support for deepcopy() is missing
157 # or pickle does not work.
158 # Copying will require a storage class be determined, which is
159 # not normally required for the default case of no parameters and
160 # no components.
161 thisStorageClass = self._getStorageClass()
162 try:
163 delegate = thisStorageClass.delegate()
164 except TypeError:
165 # Try the default copy options if no delegate is available.
166 delegate = StorageClassDelegate(thisStorageClass)
168 inMemoryDataset = delegate.copy(inMemoryDataset)
170 if component or mergedParameters:
171 # This requires a storage class look up to locate the delegate
172 # class.
173 thisStorageClass = self._getStorageClass()
175 # Parameters for derived components are applied against the
176 # composite.
177 if component in thisStorageClass.derivedComponents:
178 # For some reason MyPy doesn't see the line above as narrowing
179 # 'component' from 'str | None' to 'str'.
180 component = cast(str, component)
181 thisStorageClass.validateParameters(parameters)
183 # Process the parameters (hoping this never modified the
184 # original object).
185 inMemoryDataset = thisStorageClass.delegate().handleParameters(
186 inMemoryDataset, mergedParameters
187 )
188 mergedParameters = {} # They have now been used
190 readStorageClass = thisStorageClass.derivedComponents[component]
191 else:
192 if component:
193 readStorageClass = thisStorageClass.components[component]
194 else:
195 readStorageClass = thisStorageClass
196 readStorageClass.validateParameters(mergedParameters)
198 if component:
199 inMemoryDataset = thisStorageClass.delegate().getComponent(inMemoryDataset, component)
201 if mergedParameters:
202 inMemoryDataset = readStorageClass.delegate().handleParameters(
203 inMemoryDataset, mergedParameters
204 )
206 if returnStorageClass:
207 return returnStorageClass.coerce_type(inMemoryDataset)
208 return inMemoryDataset
209 else:
210 # If there are no parameters or component requests the object
211 # can be returned as is, but possibly with conversion.
212 if returnStorageClass:
213 return returnStorageClass.coerce_type(inMemoryDataset)
214 return inMemoryDataset
216 def _getStorageClass(self) -> StorageClass:
217 """Return the relevant storage class.
219 Returns
220 -------
221 storageClass : `~lsst.daf.butler.StorageClass`
222 The storage class associated with this handle, or one derived
223 from the python type of the stored object.
225 Raises
226 ------
227 KeyError
228 Raised if the storage class could not be found.
229 """
230 factory = StorageClassFactory()
231 if self.storageClass:
232 return factory.getStorageClass(self.storageClass)
234 # Need to match python type.
235 pytype = type(self.inMemoryDataset)
236 return factory.findStorageClass(pytype)
238 inMemoryDataset: Any
239 """The object to store in this dataset handle for later retrieval.
240 """
242 dataId: DataCoordinate | frozendict
243 """The `~lsst.daf.butler.DataCoordinate` associated with this dataset
244 handle.
245 """
247 storageClass: str | None = None
248 """The name of the `~lsst.daf.butler.StorageClass` associated with this
249 dataset.
251 If `None`, the storage class will be looked up from the factory.
252 """
254 parameters: dict | None = None
255 """Optional parameters that may be used to specify a subset of the dataset
256 to be loaded (`dict` or `None`).
257 """
259 copy: bool = False
260 """Control whether a copy of the in-memory dataset is returned for every
261 call to `get()`."""