Coverage for python/lsst/pipe/base/_dataset_handle.py: 20%
88 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-18 02:12 -0700
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-18 02:12 -0700
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["InMemoryDatasetHandle"]
25import dataclasses
26from typing import Any, Optional
28from frozendict import frozendict
29from lsst.daf.butler import (
30 DataCoordinate,
31 DataId,
32 DimensionUniverse,
33 StorageClass,
34 StorageClassDelegate,
35 StorageClassFactory,
36)
39# Use an empty dataID as a default.
40def _default_dataId() -> DataCoordinate:
41 return DataCoordinate.makeEmpty(DimensionUniverse())
44@dataclasses.dataclass(frozen=True, init=False)
45class InMemoryDatasetHandle:
46 """An in-memory version of a `~lsst.daf.butler.DeferredDatasetHandle`.
48 If ``dataId`` is not specified, a default empty dataId will be constructed.
49 If ``kwargs`` are provided without specifying a ``dataId``, those
50 parameters will be converted into a dataId-like entity.
51 """
53 _empty = DataCoordinate.makeEmpty(DimensionUniverse())
55 def __init__(
56 self,
57 inMemoryDataset: Any,
58 *,
59 storageClass: StorageClass | None = None,
60 parameters: dict[str, Any] | None = None,
61 dataId: DataId | None = None,
62 copy: bool = False,
63 **kwargs: Any,
64 ):
65 object.__setattr__(self, "inMemoryDataset", inMemoryDataset)
66 object.__setattr__(self, "storageClass", storageClass)
67 object.__setattr__(self, "parameters", parameters)
68 object.__setattr__(self, "copy", copy)
69 # Need to be able to construct a dataId from kwargs for convenience.
70 # This will not be a full DataCoordinate.
71 if dataId is None:
72 if kwargs:
73 dataId = frozendict(kwargs)
74 else:
75 dataId = self._empty
76 elif kwargs:
77 if isinstance(dataId, DataCoordinate):
78 dataId = DataCoordinate.standardize(kwargs, defaults=dataId, universe=dataId.universe)
79 else:
80 new = dict(dataId)
81 new.update(kwargs)
82 dataId = frozendict(new)
83 object.__setattr__(self, "dataId", dataId)
85 def get(
86 self,
87 *,
88 component: Optional[str] = None,
89 parameters: Optional[dict] = None,
90 storageClass: str | StorageClass | None = None,
91 ) -> Any:
92 """Retrieves the dataset pointed to by this handle
94 This handle may be used multiple times, possibly with different
95 parameters.
97 Parameters
98 ----------
99 component : `str` or None
100 If the deferred object is a component dataset type, this parameter
101 may specify the name of the component to use in the get operation.
102 parameters : `dict` or None
103 The parameters argument will be passed to the butler get method.
104 It defaults to None. If the value is not None, this dict will
105 be merged with the parameters dict used to construct the
106 `DeferredDatasetHandle` class.
107 storageClass : `StorageClass` or `str`, optional
108 The storage class to be used to override the Python type
109 returned by this method. By default the returned type matches
110 the type stored. Specifying a read `StorageClass` can force a
111 different type to be returned.
112 This type must be compatible with the original type.
114 Returns
115 -------
116 return : `object`
117 The dataset pointed to by this handle. Whether this returns the
118 original object or a copy is controlled by the ``copy`` property
119 of the handle that is set at handle construction time.
120 If the stored object is `None` this method always returns `None`
121 regardless of any component request or parameters.
123 Raises
124 ------
125 KeyError
126 Raised if a component or parameters are used but no storage
127 class can be found.
128 """
129 if self.inMemoryDataset is None:
130 return None
132 if self.parameters is not None:
133 mergedParameters = self.parameters.copy()
134 if parameters is not None:
135 mergedParameters.update(parameters)
136 elif parameters is not None:
137 mergedParameters = parameters
138 else:
139 mergedParameters = {}
141 returnStorageClass: StorageClass | None = None
142 if storageClass:
143 if isinstance(storageClass, str):
144 factory = StorageClassFactory()
145 returnStorageClass = factory.getStorageClass(storageClass)
146 else:
147 returnStorageClass = storageClass
149 inMemoryDataset = self.inMemoryDataset
151 if self.copy:
152 # An optimization might be to defer copying until any components
153 # and parameters have been applied. This can be a problem since
154 # most component storage classes do not bother to define a
155 # storage class delegate and the default delegate uses deepcopy()
156 # which can fail if explicit support for deepcopy() is missing
157 # or pickle does not work.
158 # Copying will require a storage class be determined, which is
159 # not normally required for the default case of no parameters and
160 # no components.
161 thisStorageClass = self._getStorageClass()
162 try:
163 delegate = thisStorageClass.delegate()
164 except TypeError:
165 # Try the default copy options if no delegate is available.
166 delegate = StorageClassDelegate(thisStorageClass)
168 inMemoryDataset = delegate.copy(inMemoryDataset)
170 if component or mergedParameters:
171 # This requires a storage class look up to locate the delegate
172 # class.
173 thisStorageClass = self._getStorageClass()
175 # Parameters for derived components are applied against the
176 # composite.
177 if component in thisStorageClass.derivedComponents:
178 thisStorageClass.validateParameters(parameters)
180 # Process the parameters (hoping this never modified the
181 # original object).
182 inMemoryDataset = thisStorageClass.delegate().handleParameters(
183 inMemoryDataset, mergedParameters
184 )
185 mergedParameters = {} # They have now been used
187 readStorageClass = thisStorageClass.derivedComponents[component]
188 else:
189 if component:
190 readStorageClass = thisStorageClass.components[component]
191 else:
192 readStorageClass = thisStorageClass
193 readStorageClass.validateParameters(mergedParameters)
195 if component:
196 inMemoryDataset = thisStorageClass.delegate().getComponent(inMemoryDataset, component)
198 if mergedParameters:
199 inMemoryDataset = readStorageClass.delegate().handleParameters(
200 inMemoryDataset, mergedParameters
201 )
203 if returnStorageClass:
204 return returnStorageClass.coerce_type(inMemoryDataset)
205 return inMemoryDataset
206 else:
207 # If there are no parameters or component requests the object
208 # can be returned as is, but possibly with conversion.
209 if returnStorageClass:
210 return returnStorageClass.coerce_type(inMemoryDataset)
211 return inMemoryDataset
213 def _getStorageClass(self) -> StorageClass:
214 """Return the relevant storage class.
216 Returns
217 -------
218 storageClass : `StorageClass`
219 The storage class associated with this handle, or one derived
220 from the python type of the stored object.
222 Raises
223 ------
224 KeyError
225 Raised if the storage class could not be found.
226 """
227 factory = StorageClassFactory()
228 if self.storageClass:
229 return factory.getStorageClass(self.storageClass)
231 # Need to match python type.
232 pytype = type(self.inMemoryDataset)
233 return factory.findStorageClass(pytype)
235 inMemoryDataset: Any
236 """The object to store in this dataset handle for later retrieval.
237 """
239 dataId: DataCoordinate | frozendict
240 """The `~lsst.daf.butler.DataCoordinate` associated with this dataset
241 handle.
242 """
244 storageClass: Optional[str] = None
245 """The name of the `~lsst.daf.butler.StorageClass` associated with this
246 dataset.
248 If `None`, the storage class will be looked up from the factory.
249 """
251 parameters: Optional[dict] = None
252 """Optional parameters that may be used to specify a subset of the dataset
253 to be loaded (`dict` or `None`).
254 """
256 copy: bool = False
257 """Control whether a copy of the in-memory dataset is returned for every
258 call to get()."""