Coverage for python/lsst/daf/butler/datastores/genericDatastore.py: 86%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Generic datastore code useful for most datastores."""
26__all__ = ("GenericBaseDatastore", )
28import logging
29from abc import abstractmethod
30from typing import (
31 TYPE_CHECKING,
32 Any,
33 Iterable,
34 List,
35 Mapping,
36 Optional,
37 Sequence,
38 Tuple,
39)
41from lsst.daf.butler import Datastore, DatasetTypeNotSupportedError
42from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridge
44if TYPE_CHECKING: 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true
45 from lsst.daf.butler import DatasetRef, StorageClass, StoredDatastoreItemInfo
47log = logging.getLogger(__name__)
50class GenericBaseDatastore(Datastore):
51 """Methods useful for most implementations of a `Datastore`.
53 Should always be sub-classed since key abstract methods are missing.
54 """
56 @property
57 @abstractmethod
58 def bridge(self) -> DatastoreRegistryBridge:
59 """Object that manages the interface between this `Datastore` and the
60 `Registry` (`DatastoreRegistryBridge`).
61 """
62 raise NotImplementedError()
64 @abstractmethod
65 def addStoredItemInfo(self, refs: Iterable[DatasetRef],
66 infos: Iterable[Any]) -> None:
67 """Record internal storage information associated with one or more
68 datasets.
70 Parameters
71 ----------
72 refs : sequence of `DatasetRef`
73 The datasets that have been stored.
74 infos : sequence of `StoredDatastoreItemInfo`
75 Metadata associated with the stored datasets.
76 """
77 raise NotImplementedError()
79 @abstractmethod
80 def getStoredItemsInfo(self, ref: DatasetRef) -> Sequence[Any]:
81 """Retrieve information associated with files stored in this
82 `Datastore` associated with this dataset ref.
84 Parameters
85 ----------
86 ref : `DatasetRef`
87 The dataset that is to be queried.
89 Returns
90 -------
91 items : `list` [`StoredDatastoreItemInfo`]
92 Stored information about the files and associated formatters
93 associated with this dataset. Only one file will be returned
94 if the dataset has not been disassembled. Can return an empty
95 list if no matching datasets can be found.
96 """
97 raise NotImplementedError()
99 @abstractmethod
100 def removeStoredItemInfo(self, ref: DatasetRef) -> None:
101 """Remove information about the file associated with this dataset.
103 Parameters
104 ----------
105 ref : `DatasetRef`
106 The dataset that has been removed.
107 """
108 raise NotImplementedError()
110 def _register_datasets(self, refsAndInfos: Iterable[Tuple[DatasetRef, StoredDatastoreItemInfo]]) -> None:
111 """Update registry to indicate that one or more datasets have been
112 stored.
114 Parameters
115 ----------
116 refsAndInfos : sequence `tuple` [`DatasetRef`,
117 `StoredDatastoreItemInfo`]
118 Datasets to register and the internal datastore metadata associated
119 with them.
120 """
121 expandedRefs: List[DatasetRef] = []
122 expandedItemInfos = []
124 for ref, itemInfo in refsAndInfos:
125 expandedRefs.append(ref)
126 expandedItemInfos.append(itemInfo)
128 # Dataset location only cares about registry ID so if we have
129 # disassembled in datastore we have to deduplicate. Since they
130 # will have different datasetTypes we can't use a set
131 registryRefs = {r.id: r for r in expandedRefs}
132 self.bridge.insert(registryRefs.values())
133 self.addStoredItemInfo(expandedRefs, expandedItemInfos)
135 def _post_process_get(self, inMemoryDataset: Any, readStorageClass: StorageClass,
136 assemblerParams: Optional[Mapping[str, Any]] = None,
137 isComponent: bool = False) -> Any:
138 """Given the Python object read from the datastore, manipulate
139 it based on the supplied parameters and ensure the Python
140 type is correct.
142 Parameters
143 ----------
144 inMemoryDataset : `object`
145 Dataset to check.
146 readStorageClass: `StorageClass`
147 The `StorageClass` used to obtain the assembler and to
148 check the python type.
149 assemblerParams : `dict`, optional
150 Parameters to pass to the assembler. Can be `None`.
151 isComponent : `bool`, optional
152 If this is a component, allow the inMemoryDataset to be `None`.
153 """
154 # Process any left over parameters
155 if assemblerParams:
156 inMemoryDataset = readStorageClass.delegate().handleParameters(inMemoryDataset, assemblerParams)
158 # Validate the returned data type matches the expected data type
159 pytype = readStorageClass.pytype
161 allowedTypes = []
162 if pytype: 162 ↛ 166line 162 didn't jump to line 166, because the condition on line 162 was never false
163 allowedTypes.append(pytype)
165 # Special case components to allow them to be None
166 if isComponent:
167 allowedTypes.append(type(None))
169 if allowedTypes and not isinstance(inMemoryDataset, tuple(allowedTypes)): 169 ↛ 170line 169 didn't jump to line 170, because the condition on line 169 was never true
170 raise TypeError("Got Python type {} from datastore but expected {}".format(type(inMemoryDataset),
171 pytype))
173 return inMemoryDataset
175 def _validate_put_parameters(self, inMemoryDataset: Any, ref: DatasetRef) -> None:
176 """Validate the supplied arguments for put.
178 Parameters
179 ----------
180 inMemoryDataset : `object`
181 The dataset to store.
182 ref : `DatasetRef`
183 Reference to the associated Dataset.
184 """
185 storageClass = ref.datasetType.storageClass
187 # Sanity check
188 if not isinstance(inMemoryDataset, storageClass.pytype): 188 ↛ 189line 188 didn't jump to line 189, because the condition on line 188 was never true
189 raise TypeError("Inconsistency between supplied object ({}) "
190 "and storage class type ({})".format(type(inMemoryDataset),
191 storageClass.pytype))
193 # Confirm that we can accept this dataset
194 if not self.constraints.isAcceptable(ref):
195 # Raise rather than use boolean return value.
196 raise DatasetTypeNotSupportedError(f"Dataset {ref} has been rejected by this datastore via"
197 " configuration.")
199 return
201 def remove(self, ref: DatasetRef) -> None:
202 """Indicate to the Datastore that a dataset can be removed.
204 .. warning::
206 This method deletes the artifact associated with this
207 dataset and can not be reversed.
209 Parameters
210 ----------
211 ref : `DatasetRef`
212 Reference to the required Dataset.
214 Raises
215 ------
216 FileNotFoundError
217 Attempt to remove a dataset that does not exist.
219 Notes
220 -----
221 This method is used for immediate removal of a dataset and is
222 generally reserved for internal testing of datastore APIs.
223 It is implemented by calling `trash()` and then immediately calling
224 `emptyTrash()`. This call is meant to be immediate so errors
225 encountered during removal are not ignored.
226 """
227 self.trash(ref, ignore_errors=False)
228 self.emptyTrash(ignore_errors=False)
230 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None:
231 """Retrieve a dataset from an input `Datastore`,
232 and store the result in this `Datastore`.
234 Parameters
235 ----------
236 inputDatastore : `Datastore`
237 The external `Datastore` from which to retreive the Dataset.
238 ref : `DatasetRef`
239 Reference to the required dataset in the input data store.
241 """
242 assert inputDatastore is not self # unless we want it for renames?
243 inMemoryDataset = inputDatastore.get(ref)
244 return self.put(inMemoryDataset, ref)