Coverage for python/lsst/daf/butler/datastores/genericDatastore.py: 86%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Generic datastore code useful for most datastores."""
26__all__ = ("GenericBaseDatastore", )
28import logging
29from abc import abstractmethod
30from typing import (
31 TYPE_CHECKING,
32 Any,
33 Iterable,
34 List,
35 Mapping,
36 Optional,
37 Sequence,
38 Tuple,
39)
41from lsst.daf.butler import Datastore, DatasetTypeNotSupportedError
42from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridge
44if TYPE_CHECKING: 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true
45 from lsst.daf.butler import DatasetRef, StorageClass, StoredDatastoreItemInfo
47log = logging.getLogger(__name__)
50class GenericBaseDatastore(Datastore):
51 """Methods useful for most implementations of a `Datastore`.
53 Should always be sub-classed since key abstract methods are missing.
54 """
56 @property
57 @abstractmethod
58 def bridge(self) -> DatastoreRegistryBridge:
59 """Object that manages the interface between this `Datastore` and the
60 `Registry` (`DatastoreRegistryBridge`).
61 """
62 raise NotImplementedError()
64 @abstractmethod
65 def addStoredItemInfo(self, refs: Iterable[DatasetRef],
66 infos: Iterable[Any]) -> None:
67 """Record internal storage information associated with one or more
68 datasets.
70 Parameters
71 ----------
72 refs : sequence of `DatasetRef`
73 The datasets that have been stored.
74 infos : sequence of `StoredDatastoreItemInfo`
75 Metadata associated with the stored datasets.
76 """
77 raise NotImplementedError()
79 @abstractmethod
80 def getStoredItemsInfo(self, ref: DatasetRef) -> Sequence[Any]:
81 """Retrieve information associated with files stored in this
82 `Datastore` associated with this dataset ref.
84 Parameters
85 ----------
86 ref : `DatasetRef`
87 The dataset that is to be queried.
89 Returns
90 -------
91 items : `list` [`StoredDatastoreItemInfo`]
92 Stored information about the files and associated formatters
93 associated with this dataset. Only one file will be returned
94 if the dataset has not been disassembled. Can return an empty
95 list if no matching datasets can be found.
96 """
97 raise NotImplementedError()
99 @abstractmethod
100 def removeStoredItemInfo(self, ref: DatasetRef) -> None:
101 """Remove information about the file associated with this dataset.
103 Parameters
104 ----------
105 ref : `DatasetRef`
106 The dataset that has been removed.
107 """
108 raise NotImplementedError()
110 def _register_datasets(self, refsAndInfos: Iterable[Tuple[DatasetRef, StoredDatastoreItemInfo]]) -> None:
111 """Update registry to indicate that one or more datasets have been
112 stored.
114 Parameters
115 ----------
116 refsAndInfos : sequence `tuple` [`DatasetRef`,
117 `StoredDatastoreItemInfo`]
118 Datasets to register and the internal datastore metadata associated
119 with them.
120 """
121 expandedRefs: List[DatasetRef] = []
122 expandedItemInfos = []
124 for ref, itemInfo in refsAndInfos:
125 expandedRefs.append(ref)
126 expandedItemInfos.append(itemInfo)
128 # Dataset location only cares about registry ID so if we have
129 # disassembled in datastore we have to deduplicate. Since they
130 # will have different datasetTypes we can't use a set
131 registryRefs = {r.id: r for r in expandedRefs}
132 self.bridge.insert(registryRefs.values())
133 self.addStoredItemInfo(expandedRefs, expandedItemInfos)
135 def _post_process_get(self, inMemoryDataset: Any, readStorageClass: StorageClass,
136 assemblerParams: Optional[Mapping[str, Any]] = None,
137 isComponent: bool = False) -> Any:
138 """Given the Python object read from the datastore, manipulate
139 it based on the supplied parameters and ensure the Python
140 type is correct.
142 Parameters
143 ----------
144 inMemoryDataset : `object`
145 Dataset to check.
146 readStorageClass: `StorageClass`
147 The `StorageClass` used to obtain the assembler and to
148 check the python type.
149 assemblerParams : `dict`, optional
150 Parameters to pass to the assembler. Can be `None`.
151 isComponent : `bool`, optional
152 If this is a component, allow the inMemoryDataset to be `None`.
153 """
154 # Process any left over parameters
155 if assemblerParams:
156 inMemoryDataset = readStorageClass.delegate().handleParameters(inMemoryDataset, assemblerParams)
158 # Validate the returned data type matches the expected data type
159 pytype = readStorageClass.pytype
161 allowedTypes = []
162 if pytype: 162 ↛ 166line 162 didn't jump to line 166, because the condition on line 162 was never false
163 allowedTypes.append(pytype)
165 # Special case components to allow them to be None
166 if isComponent:
167 allowedTypes.append(type(None))
169 if allowedTypes and not isinstance(inMemoryDataset, tuple(allowedTypes)): 169 ↛ 170line 169 didn't jump to line 170, because the condition on line 169 was never true
170 inMemoryDataset = readStorageClass.coerce_type(inMemoryDataset)
172 return inMemoryDataset
174 def _validate_put_parameters(self, inMemoryDataset: Any, ref: DatasetRef) -> None:
175 """Validate the supplied arguments for put.
177 Parameters
178 ----------
179 inMemoryDataset : `object`
180 The dataset to store.
181 ref : `DatasetRef`
182 Reference to the associated Dataset.
183 """
184 storageClass = ref.datasetType.storageClass
186 # Sanity check
187 if not isinstance(inMemoryDataset, storageClass.pytype): 187 ↛ 188line 187 didn't jump to line 188, because the condition on line 187 was never true
188 raise TypeError("Inconsistency between supplied object ({}) "
189 "and storage class type ({})".format(type(inMemoryDataset),
190 storageClass.pytype))
192 # Confirm that we can accept this dataset
193 if not self.constraints.isAcceptable(ref):
194 # Raise rather than use boolean return value.
195 raise DatasetTypeNotSupportedError(f"Dataset {ref} has been rejected by this datastore via"
196 " configuration.")
198 return
200 def remove(self, ref: DatasetRef) -> None:
201 """Indicate to the Datastore that a dataset can be removed.
203 .. warning::
205 This method deletes the artifact associated with this
206 dataset and can not be reversed.
208 Parameters
209 ----------
210 ref : `DatasetRef`
211 Reference to the required Dataset.
213 Raises
214 ------
215 FileNotFoundError
216 Attempt to remove a dataset that does not exist.
218 Notes
219 -----
220 This method is used for immediate removal of a dataset and is
221 generally reserved for internal testing of datastore APIs.
222 It is implemented by calling `trash()` and then immediately calling
223 `emptyTrash()`. This call is meant to be immediate so errors
224 encountered during removal are not ignored.
225 """
226 self.trash(ref, ignore_errors=False)
227 self.emptyTrash(ignore_errors=False)
229 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None:
230 """Retrieve a dataset from an input `Datastore`,
231 and store the result in this `Datastore`.
233 Parameters
234 ----------
235 inputDatastore : `Datastore`
236 The external `Datastore` from which to retreive the Dataset.
237 ref : `DatasetRef`
238 Reference to the required dataset in the input data store.
240 """
241 assert inputDatastore is not self # unless we want it for renames?
242 inMemoryDataset = inputDatastore.get(ref)
243 return self.put(inMemoryDataset, ref)