Coverage for python/lsst/daf/butler/datastores/genericDatastore.py : 85%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Generic datastore code useful for most datastores."""
26__all__ = ("GenericBaseDatastore", )
28import logging
29from abc import abstractmethod
30from typing import (
31 TYPE_CHECKING,
32 Any,
33 Iterable,
34 List,
35 Mapping,
36 Optional,
37 Sequence,
38 Tuple,
39)
41from lsst.daf.butler import Datastore, DatasetTypeNotSupportedError
42from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridge
44if TYPE_CHECKING: 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true
45 from lsst.daf.butler import DatasetRef, StorageClass, StoredDatastoreItemInfo
47log = logging.getLogger(__name__)
50class GenericBaseDatastore(Datastore):
51 """Methods useful for most implementations of a `Datastore`.
53 Should always be sub-classed since key abstract methods are missing.
54 """
56 @property
57 @abstractmethod
58 def bridge(self) -> DatastoreRegistryBridge:
59 """Object that manages the interface between this `Datastore` and the
60 `Registry` (`DatastoreRegistryBridge`).
61 """
62 raise NotImplementedError()
64 @abstractmethod
65 def addStoredItemInfo(self, refs: Iterable[DatasetRef],
66 infos: Iterable[Any]) -> None:
67 """Record internal storage information associated with one or more
68 datasets.
70 Parameters
71 ----------
72 refs : sequence of `DatasetRef`
73 The datasets that have been stored.
74 infos : sequence of `StoredDatastoreItemInfo`
75 Metadata associated with the stored datasets.
76 """
77 raise NotImplementedError()
79 @abstractmethod
80 def getStoredItemsInfo(self, ref: DatasetRef) -> Sequence[Any]:
81 """Retrieve information associated with files stored in this
82 `Datastore` associated with this dataset ref.
84 Parameters
85 ----------
86 ref : `DatasetRef`
87 The dataset that is to be queried.
89 Returns
90 -------
91 items : `list` [`StoredDatastoreItemInfo`]
92 Stored information about the files and associated formatters
93 associated with this dataset. Only one file will be returned
94 if the dataset has not been disassembled. Can return an empty
95 list if no matching datasets can be found.
96 """
97 raise NotImplementedError()
99 @abstractmethod
100 def removeStoredItemInfo(self, ref: DatasetRef) -> None:
101 """Remove information about the file associated with this dataset.
103 Parameters
104 ----------
105 ref : `DatasetRef`
106 The dataset that has been removed.
107 """
108 raise NotImplementedError()
110 def _register_datasets(self, refsAndInfos: Iterable[Tuple[DatasetRef, StoredDatastoreItemInfo]]) -> None:
111 """Update registry to indicate that one or more datasets have been
112 stored.
114 Parameters
115 ----------
116 refsAndInfos : sequence `tuple` [`DatasetRef`,
117 `StoredDatastoreItemInfo`]
118 Datasets to register and the internal datastore metadata associated
119 with them.
120 """
121 expandedRefs: List[DatasetRef] = []
122 expandedItemInfos = []
124 for ref, itemInfo in refsAndInfos:
125 expandedRefs.append(ref)
126 expandedItemInfos.append(itemInfo)
128 # Dataset location only cares about registry ID so if we have
129 # disassembled in datastore we have to deduplicate. Since they
130 # will have different datasetTypes we can't use a set
131 registryRefs = {r.id: r for r in expandedRefs}
132 self.bridge.insert(registryRefs.values())
133 self.addStoredItemInfo(expandedRefs, expandedItemInfos)
135 def _move_to_trash_in_registry(self, ref: DatasetRef) -> None:
136 """Tell registry that this dataset and associated components
137 are to be trashed.
139 Parameters
140 ----------
141 ref : `DatasetRef`
142 Dataset to mark for removal from registry.
144 Notes
145 -----
146 Dataset is not removed from internal stored item info table.
147 """
148 # Note that a ref can point to component dataset refs that
149 # have been deleted already from registry but are still in
150 # the python object. moveToTrash will deal with that.
151 self.bridge.moveToTrash([ref])
153 def _post_process_get(self, inMemoryDataset: Any, readStorageClass: StorageClass,
154 assemblerParams: Optional[Mapping[str, Any]] = None,
155 isComponent: bool = False) -> Any:
156 """Given the Python object read from the datastore, manipulate
157 it based on the supplied parameters and ensure the Python
158 type is correct.
160 Parameters
161 ----------
162 inMemoryDataset : `object`
163 Dataset to check.
164 readStorageClass: `StorageClass`
165 The `StorageClass` used to obtain the assembler and to
166 check the python type.
167 assemblerParams : `dict`, optional
168 Parameters to pass to the assembler. Can be `None`.
169 isComponent : `bool`, optional
170 If this is a component, allow the inMemoryDataset to be `None`.
171 """
172 # Process any left over parameters
173 if assemblerParams:
174 inMemoryDataset = readStorageClass.assembler().handleParameters(inMemoryDataset, assemblerParams)
176 # Validate the returned data type matches the expected data type
177 pytype = readStorageClass.pytype
179 allowedTypes = []
180 if pytype: 180 ↛ 184line 180 didn't jump to line 184, because the condition on line 180 was never false
181 allowedTypes.append(pytype)
183 # Special case components to allow them to be None
184 if isComponent:
185 allowedTypes.append(type(None))
187 if allowedTypes and not isinstance(inMemoryDataset, tuple(allowedTypes)): 187 ↛ 188line 187 didn't jump to line 188, because the condition on line 187 was never true
188 raise TypeError("Got Python type {} from datastore but expected {}".format(type(inMemoryDataset),
189 pytype))
191 return inMemoryDataset
193 def _validate_put_parameters(self, inMemoryDataset: Any, ref: DatasetRef) -> None:
194 """Validate the supplied arguments for put.
196 Parameters
197 ----------
198 inMemoryDataset : `object`
199 The dataset to store.
200 ref : `DatasetRef`
201 Reference to the associated Dataset.
202 """
203 storageClass = ref.datasetType.storageClass
205 # Sanity check
206 if not isinstance(inMemoryDataset, storageClass.pytype): 206 ↛ 207line 206 didn't jump to line 207, because the condition on line 206 was never true
207 raise TypeError("Inconsistency between supplied object ({}) "
208 "and storage class type ({})".format(type(inMemoryDataset),
209 storageClass.pytype))
211 # Confirm that we can accept this dataset
212 if not self.constraints.isAcceptable(ref):
213 # Raise rather than use boolean return value.
214 raise DatasetTypeNotSupportedError(f"Dataset {ref} has been rejected by this datastore via"
215 " configuration.")
217 return
219 def remove(self, ref: DatasetRef) -> None:
220 """Indicate to the Datastore that a dataset can be removed.
222 .. warning::
224 This method deletes the artifact associated with this
225 dataset and can not be reversed.
227 Parameters
228 ----------
229 ref : `DatasetRef`
230 Reference to the required Dataset.
232 Raises
233 ------
234 FileNotFoundError
235 Attempt to remove a dataset that does not exist.
237 Notes
238 -----
239 This method is used for immediate removal of a dataset and is
240 generally reserved for internal testing of datastore APIs.
241 It is implemented by calling `trash()` and then immediately calling
242 `emptyTrash()`. This call is meant to be immediate so errors
243 encountered during removal are not ignored.
244 """
245 self.trash(ref, ignore_errors=False)
246 self.emptyTrash(ignore_errors=False)
248 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None:
249 """Retrieve a dataset from an input `Datastore`,
250 and store the result in this `Datastore`.
252 Parameters
253 ----------
254 inputDatastore : `Datastore`
255 The external `Datastore` from which to retreive the Dataset.
256 ref : `DatasetRef`
257 Reference to the required dataset in the input data store.
259 """
260 assert inputDatastore is not self # unless we want it for renames?
261 inMemoryDataset = inputDatastore.get(ref)
262 return self.put(inMemoryDataset, ref)