Coverage for python/lsst/daf/butler/datastores/genericDatastore.py : 83%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Generic datastore code useful for most datastores."""
26__all__ = ("GenericBaseDatastore", )
28import logging
29from abc import abstractmethod
30from typing import (
31 TYPE_CHECKING,
32 Any,
33 Iterable,
34 List,
35 Mapping,
36 Optional,
37 Sequence,
38 Tuple,
39)
41from lsst.daf.butler import Datastore, DatasetTypeNotSupportedError
42from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridge
44if TYPE_CHECKING: 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true
45 from lsst.daf.butler import DatasetRef, StorageClass, StoredDatastoreItemInfo
47log = logging.getLogger(__name__)
50class GenericBaseDatastore(Datastore):
51 """Methods useful for most implementations of a `Datastore`.
53 Should always be sub-classed since key abstract methods are missing.
54 """
56 @property
57 @abstractmethod
58 def bridge(self) -> DatastoreRegistryBridge:
59 """Object that manages the interface between this `Datastore` and the
60 `Registry` (`DatastoreRegistryBridge`).
61 """
62 raise NotImplementedError()
64 @abstractmethod
65 def addStoredItemInfo(self, refs: Iterable[DatasetRef],
66 infos: Iterable[Any]) -> None:
67 """Record internal storage information associated with one or more
68 datasets.
70 Parameters
71 ----------
72 refs : sequence of `DatasetRef`
73 The datasets that have been stored.
74 infos : sequence of `StoredDatastoreItemInfo`
75 Metadata associated with the stored datasets.
76 """
77 raise NotImplementedError()
79 @abstractmethod
80 def getStoredItemInfo(self, ref: DatasetRef) -> Any:
81 """Retrieve information associated with file stored in this
82 `Datastore`.
84 Parameters
85 ----------
86 ref : `DatasetRef`
87 The dataset that is to be queried.
89 Returns
90 -------
91 info : `StoredDatastoreItemInfo`
92 Stored information about this file and its formatter.
94 Raises
95 ------
96 KeyError
97 Dataset with that id can not be found.
98 """
99 raise NotImplementedError()
101 @abstractmethod
102 def getStoredItemsInfo(self, ref: DatasetRef) -> Sequence[Any]:
103 """Retrieve information associated with files stored in this
104 `Datastore` associated with this dataset ref.
106 Parameters
107 ----------
108 ref : `DatasetRef`
109 The dataset that is to be queried.
111 Returns
112 -------
113 items : `list` [`StoredDatastoreItemInfo`]
114 Stored information about the files and associated formatters
115 associated with this dataset. Only one file will be returned
116 if the dataset has not been disassembled. Can return an empty
117 list if no matching datasets can be found.
118 """
119 raise NotImplementedError()
121 @abstractmethod
122 def removeStoredItemInfo(self, ref: DatasetRef) -> None:
123 """Remove information about the file associated with this dataset.
125 Parameters
126 ----------
127 ref : `DatasetRef`
128 The dataset that has been removed.
129 """
130 raise NotImplementedError()
132 def _register_datasets(self, refsAndInfos: Iterable[Tuple[DatasetRef, StoredDatastoreItemInfo]]) -> None:
133 """Update registry to indicate that one or more datasets have been
134 stored.
136 Parameters
137 ----------
138 refsAndInfos : sequence `tuple` [`DatasetRef`,
139 `StoredDatastoreItemInfo`]
140 Datasets to register and the internal datastore metadata associated
141 with them.
142 """
143 expandedRefs: List[DatasetRef] = []
144 expandedItemInfos = []
146 for ref, itemInfo in refsAndInfos:
147 # Need the main dataset and the components
148 expandedRefs.extend(ref.flatten([ref]))
150 if ref.components is None: 150 ↛ 151line 150 didn't jump to line 151, because the condition on line 150 was never true
151 raise RuntimeError("Unable to register an unresolved DatasetRef")
153 # Need one for the main ref and then one for each registered
154 # component
155 expandedItemInfos.extend([itemInfo] * (len(ref.components) + 1))
157 # Dataset location only cares about registry ID so if we have
158 # disassembled in datastore we have to deduplicate. Since they
159 # will have different datasetTypes we can't use a set
160 registryRefs = {r.id: r for r in expandedRefs}
161 self.bridge.insert(registryRefs.values())
162 self.addStoredItemInfo(expandedRefs, expandedItemInfos)
164 def _move_to_trash_in_registry(self, ref: DatasetRef) -> None:
165 """Tell registry that this dataset and associated components
166 are to be trashed.
168 Parameters
169 ----------
170 ref : `DatasetRef`
171 Dataset to mark for removal from registry.
173 Notes
174 -----
175 Dataset is not removed from internal stored item info table.
176 """
177 # Note that a ref can point to component dataset refs that
178 # have been deleted already from registry but are still in
179 # the python object. moveToTrash will deal with that.
180 self.bridge.moveToTrash(ref.flatten([ref]))
182 def _post_process_get(self, inMemoryDataset: Any, readStorageClass: StorageClass,
183 assemblerParams: Optional[Mapping[str, Any]] = None,
184 isComponent: bool = False) -> Any:
185 """Given the Python object read from the datastore, manipulate
186 it based on the supplied parameters and ensure the Python
187 type is correct.
189 Parameters
190 ----------
191 inMemoryDataset : `object`
192 Dataset to check.
193 readStorageClass: `StorageClass`
194 The `StorageClass` used to obtain the assembler and to
195 check the python type.
196 assemblerParams : `dict`, optional
197 Parameters to pass to the assembler. Can be `None`.
198 isComponent : `bool`, optional
199 If this is a component, allow the inMemoryDataset to be `None`.
200 """
201 # Process any left over parameters
202 if assemblerParams:
203 inMemoryDataset = readStorageClass.assembler().handleParameters(inMemoryDataset, assemblerParams)
205 # Validate the returned data type matches the expected data type
206 pytype = readStorageClass.pytype
208 allowedTypes = []
209 if pytype: 209 ↛ 213line 209 didn't jump to line 213, because the condition on line 209 was never false
210 allowedTypes.append(pytype)
212 # Special case components to allow them to be None
213 if isComponent:
214 allowedTypes.append(type(None))
216 if allowedTypes and not isinstance(inMemoryDataset, tuple(allowedTypes)): 216 ↛ 217line 216 didn't jump to line 217, because the condition on line 216 was never true
217 raise TypeError("Got Python type {} from datastore but expected {}".format(type(inMemoryDataset),
218 pytype))
220 return inMemoryDataset
222 def _validate_put_parameters(self, inMemoryDataset: Any, ref: DatasetRef) -> None:
223 """Validate the supplied arguments for put.
225 Parameters
226 ----------
227 inMemoryDataset : `object`
228 The dataset to store.
229 ref : `DatasetRef`
230 Reference to the associated Dataset.
231 """
232 storageClass = ref.datasetType.storageClass
234 # Sanity check
235 if not isinstance(inMemoryDataset, storageClass.pytype): 235 ↛ 236line 235 didn't jump to line 236, because the condition on line 235 was never true
236 raise TypeError("Inconsistency between supplied object ({}) "
237 "and storage class type ({})".format(type(inMemoryDataset),
238 storageClass.pytype))
240 # Confirm that we can accept this dataset
241 if not self.constraints.isAcceptable(ref):
242 # Raise rather than use boolean return value.
243 raise DatasetTypeNotSupportedError(f"Dataset {ref} has been rejected by this datastore via"
244 " configuration.")
246 return
248 def remove(self, ref: DatasetRef) -> None:
249 """Indicate to the Datastore that a dataset can be removed.
251 .. warning::
253 This method deletes the artifact associated with this
254 dataset and can not be reversed.
256 Parameters
257 ----------
258 ref : `DatasetRef`
259 Reference to the required Dataset.
261 Raises
262 ------
263 FileNotFoundError
264 Attempt to remove a dataset that does not exist.
266 Notes
267 -----
268 This method is used for immediate removal of a dataset and is
269 generally reserved for internal testing of datastore APIs.
270 It is implemented by calling `trash()` and then immediately calling
271 `emptyTrash()`. This call is meant to be immediate so errors
272 encountered during removal are not ignored.
273 """
274 self.trash(ref, ignore_errors=False)
275 self.emptyTrash(ignore_errors=False)
277 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None:
278 """Retrieve a dataset from an input `Datastore`,
279 and store the result in this `Datastore`.
281 Parameters
282 ----------
283 inputDatastore : `Datastore`
284 The external `Datastore` from which to retreive the Dataset.
285 ref : `DatasetRef`
286 Reference to the required dataset in the input data store.
288 """
289 assert inputDatastore is not self # unless we want it for renames?
290 inMemoryDataset = inputDatastore.get(ref)
291 return self.put(inMemoryDataset, ref)