Coverage for python/lsst/pipe/base/_dataset_handle.py: 19%
89 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-18 10:50 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-18 10:50 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ["InMemoryDatasetHandle"]
31import dataclasses
32from typing import Any, cast
34from frozendict import frozendict
35from lsst.daf.butler import (
36 DataCoordinate,
37 DataId,
38 DimensionUniverse,
39 StorageClass,
40 StorageClassDelegate,
41 StorageClassFactory,
42)
45# Use an empty dataID as a default.
46def _default_dataId() -> DataCoordinate:
47 return DataCoordinate.makeEmpty(DimensionUniverse())
50@dataclasses.dataclass(frozen=True, init=False)
51class InMemoryDatasetHandle:
52 """An in-memory version of a `~lsst.daf.butler.DeferredDatasetHandle`.
54 If ``dataId`` is not specified, a default empty dataId will be constructed.
55 If ``kwargs`` are provided without specifying a ``dataId``, those
56 parameters will be converted into a dataId-like entity.
57 """
59 _empty = DataCoordinate.makeEmpty(DimensionUniverse())
61 def __init__(
62 self,
63 inMemoryDataset: Any,
64 *,
65 storageClass: StorageClass | None = None,
66 parameters: dict[str, Any] | None = None,
67 dataId: DataId | None = None,
68 copy: bool = False,
69 **kwargs: Any,
70 ):
71 object.__setattr__(self, "inMemoryDataset", inMemoryDataset)
72 object.__setattr__(self, "storageClass", storageClass)
73 object.__setattr__(self, "parameters", parameters)
74 object.__setattr__(self, "copy", copy)
75 # Need to be able to construct a dataId from kwargs for convenience.
76 # This will not be a full DataCoordinate.
77 if dataId is None:
78 if kwargs:
79 dataId = frozendict(kwargs)
80 else:
81 dataId = self._empty
82 elif kwargs:
83 if isinstance(dataId, DataCoordinate):
84 dataId = DataCoordinate.standardize(kwargs, defaults=dataId, universe=dataId.universe)
85 else:
86 new = dict(dataId)
87 new.update(kwargs)
88 dataId = frozendict(new)
89 object.__setattr__(self, "dataId", dataId)
91 def get(
92 self,
93 *,
94 component: str | None = None,
95 parameters: dict | None = None,
96 storageClass: str | StorageClass | None = None,
97 ) -> Any:
98 """Retrieve the dataset pointed to by this handle.
100 This handle may be used multiple times, possibly with different
101 parameters.
103 Parameters
104 ----------
105 component : `str` or None
106 If the deferred object is a component dataset type, this parameter
107 may specify the name of the component to use in the get operation.
108 parameters : `dict` or None
109 The parameters argument will be passed to the butler get method.
110 It defaults to `None`. If the value is not `None`, this `dict` will
111 be merged with the parameters dict used to construct the
112 `~lsst.daf.butler.DeferredDatasetHandle` class.
113 storageClass : `~lsst.daf.butler.StorageClass` or `str`, optional
114 The storage class to be used to override the Python type
115 returned by this method. By default the returned type matches
116 the type stored. Specifying a read `~lsst.daf.butler.StorageClass`
117 can force a different type to be returned.
118 This type must be compatible with the original type.
120 Returns
121 -------
122 return : `object`
123 The dataset pointed to by this handle. Whether this returns the
124 original object or a copy is controlled by the ``copy`` property
125 of the handle that is set at handle construction time.
126 If the stored object is `None` this method always returns `None`
127 regardless of any component request or parameters.
129 Raises
130 ------
131 KeyError
132 Raised if a component or parameters are used but no storage
133 class can be found.
134 """
135 if self.inMemoryDataset is None:
136 return None
138 if self.parameters is not None:
139 mergedParameters = self.parameters.copy()
140 if parameters is not None:
141 mergedParameters.update(parameters)
142 elif parameters is not None:
143 mergedParameters = parameters
144 else:
145 mergedParameters = {}
147 returnStorageClass: StorageClass | None = None
148 if storageClass:
149 if isinstance(storageClass, str):
150 factory = StorageClassFactory()
151 returnStorageClass = factory.getStorageClass(storageClass)
152 else:
153 returnStorageClass = storageClass
155 inMemoryDataset = self.inMemoryDataset
157 if self.copy:
158 # An optimization might be to defer copying until any components
159 # and parameters have been applied. This can be a problem since
160 # most component storage classes do not bother to define a
161 # storage class delegate and the default delegate uses deepcopy()
162 # which can fail if explicit support for deepcopy() is missing
163 # or pickle does not work.
164 # Copying will require a storage class be determined, which is
165 # not normally required for the default case of no parameters and
166 # no components.
167 thisStorageClass = self._getStorageClass()
168 try:
169 delegate = thisStorageClass.delegate()
170 except TypeError:
171 # Try the default copy options if no delegate is available.
172 delegate = StorageClassDelegate(thisStorageClass)
174 inMemoryDataset = delegate.copy(inMemoryDataset)
176 if component or mergedParameters:
177 # This requires a storage class look up to locate the delegate
178 # class.
179 thisStorageClass = self._getStorageClass()
181 # Parameters for derived components are applied against the
182 # composite.
183 if component in thisStorageClass.derivedComponents:
184 # For some reason MyPy doesn't see the line above as narrowing
185 # 'component' from 'str | None' to 'str'.
186 component = cast(str, component)
187 thisStorageClass.validateParameters(parameters)
189 # Process the parameters (hoping this never modified the
190 # original object).
191 inMemoryDataset = thisStorageClass.delegate().handleParameters(
192 inMemoryDataset, mergedParameters
193 )
194 mergedParameters = {} # They have now been used
196 readStorageClass = thisStorageClass.derivedComponents[component]
197 else:
198 if component:
199 readStorageClass = thisStorageClass.components[component]
200 else:
201 readStorageClass = thisStorageClass
202 readStorageClass.validateParameters(mergedParameters)
204 if component:
205 inMemoryDataset = thisStorageClass.delegate().getComponent(inMemoryDataset, component)
207 if mergedParameters:
208 inMemoryDataset = readStorageClass.delegate().handleParameters(
209 inMemoryDataset, mergedParameters
210 )
212 if returnStorageClass:
213 return returnStorageClass.coerce_type(inMemoryDataset)
214 return inMemoryDataset
215 else:
216 # If there are no parameters or component requests the object
217 # can be returned as is, but possibly with conversion.
218 if returnStorageClass:
219 return returnStorageClass.coerce_type(inMemoryDataset)
220 return inMemoryDataset
222 def _getStorageClass(self) -> StorageClass:
223 """Return the relevant storage class.
225 Returns
226 -------
227 storageClass : `~lsst.daf.butler.StorageClass`
228 The storage class associated with this handle, or one derived
229 from the python type of the stored object.
231 Raises
232 ------
233 KeyError
234 Raised if the storage class could not be found.
235 """
236 factory = StorageClassFactory()
237 if self.storageClass:
238 return factory.getStorageClass(self.storageClass)
240 # Need to match python type.
241 pytype = type(self.inMemoryDataset)
242 return factory.findStorageClass(pytype)
244 inMemoryDataset: Any
245 """The object to store in this dataset handle for later retrieval.
246 """
248 dataId: DataCoordinate | frozendict
249 """The `~lsst.daf.butler.DataCoordinate` associated with this dataset
250 handle.
251 """
253 storageClass: str | None = None
254 """The name of the `~lsst.daf.butler.StorageClass` associated with this
255 dataset.
257 If `None`, the storage class will be looked up from the factory.
258 """
260 parameters: dict | None = None
261 """Optional parameters that may be used to specify a subset of the dataset
262 to be loaded (`dict` or `None`).
263 """
265 copy: bool = False
266 """Control whether a copy of the in-memory dataset is returned for every
267 call to `get()`."""