Coverage for python/lsst/daf/butler/datastores/genericDatastore.py: 96%
54 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-03 09:13 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-03 09:13 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Generic datastore code useful for most datastores."""
26__all__ = ("GenericBaseDatastore",)
28import logging
29from abc import abstractmethod
30from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, Optional, Sequence, Tuple
32from lsst.daf.butler import DatasetTypeNotSupportedError, Datastore
33from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridge
35if TYPE_CHECKING:
36 from lsst.daf.butler import DatasetRef, StorageClass, StoredDatastoreItemInfo
38log = logging.getLogger(__name__)
41class GenericBaseDatastore(Datastore):
42 """Methods useful for most implementations of a `Datastore`.
44 Should always be sub-classed since key abstract methods are missing.
45 """
47 @property
48 @abstractmethod
49 def bridge(self) -> DatastoreRegistryBridge:
50 """Object that manages the interface between this `Datastore` and the
51 `Registry` (`DatastoreRegistryBridge`).
52 """
53 raise NotImplementedError()
55 @abstractmethod
56 def addStoredItemInfo(self, refs: Iterable[DatasetRef], infos: Iterable[Any]) -> None:
57 """Record internal storage information associated with one or more
58 datasets.
60 Parameters
61 ----------
62 refs : sequence of `DatasetRef`
63 The datasets that have been stored.
64 infos : sequence of `StoredDatastoreItemInfo`
65 Metadata associated with the stored datasets.
66 """
67 raise NotImplementedError()
69 @abstractmethod
70 def getStoredItemsInfo(self, ref: DatasetRef) -> Sequence[Any]:
71 """Retrieve information associated with files stored in this
72 `Datastore` associated with this dataset ref.
74 Parameters
75 ----------
76 ref : `DatasetRef`
77 The dataset that is to be queried.
79 Returns
80 -------
81 items : `list` [`StoredDatastoreItemInfo`]
82 Stored information about the files and associated formatters
83 associated with this dataset. Only one file will be returned
84 if the dataset has not been disassembled. Can return an empty
85 list if no matching datasets can be found.
86 """
87 raise NotImplementedError()
89 @abstractmethod
90 def removeStoredItemInfo(self, ref: DatasetRef) -> None:
91 """Remove information about the file associated with this dataset.
93 Parameters
94 ----------
95 ref : `DatasetRef`
96 The dataset that has been removed.
97 """
98 raise NotImplementedError()
100 def _register_datasets(self, refsAndInfos: Iterable[Tuple[DatasetRef, StoredDatastoreItemInfo]]) -> None:
101 """Update registry to indicate that one or more datasets have been
102 stored.
104 Parameters
105 ----------
106 refsAndInfos : sequence `tuple` [`DatasetRef`,
107 `StoredDatastoreItemInfo`]
108 Datasets to register and the internal datastore metadata associated
109 with them.
110 """
111 expandedRefs: List[DatasetRef] = []
112 expandedItemInfos = []
114 for ref, itemInfo in refsAndInfos:
115 expandedRefs.append(ref)
116 expandedItemInfos.append(itemInfo)
118 # Dataset location only cares about registry ID so if we have
119 # disassembled in datastore we have to deduplicate. Since they
120 # will have different datasetTypes we can't use a set
121 registryRefs = {r.id: r for r in expandedRefs}
122 self.bridge.insert(registryRefs.values())
123 self.addStoredItemInfo(expandedRefs, expandedItemInfos)
125 def _post_process_get(
126 self,
127 inMemoryDataset: Any,
128 readStorageClass: StorageClass,
129 assemblerParams: Optional[Mapping[str, Any]] = None,
130 isComponent: bool = False,
131 ) -> Any:
132 """Given the Python object read from the datastore, manipulate
133 it based on the supplied parameters and ensure the Python
134 type is correct.
136 Parameters
137 ----------
138 inMemoryDataset : `object`
139 Dataset to check.
140 readStorageClass: `StorageClass`
141 The `StorageClass` used to obtain the assembler and to
142 check the python type.
143 assemblerParams : `dict`, optional
144 Parameters to pass to the assembler. Can be `None`.
145 isComponent : `bool`, optional
146 If this is a component, allow the inMemoryDataset to be `None`.
147 """
148 # Process any left over parameters
149 if assemblerParams:
150 inMemoryDataset = readStorageClass.delegate().handleParameters(inMemoryDataset, assemblerParams)
152 # Validate the returned data type matches the expected data type
153 pytype = readStorageClass.pytype
155 allowedTypes = []
156 if pytype: 156 ↛ 160line 156 didn't jump to line 160, because the condition on line 156 was never false
157 allowedTypes.append(pytype)
159 # Special case components to allow them to be None
160 if isComponent:
161 allowedTypes.append(type(None))
163 if allowedTypes and not isinstance(inMemoryDataset, tuple(allowedTypes)):
164 inMemoryDataset = readStorageClass.coerce_type(inMemoryDataset)
166 return inMemoryDataset
168 def _validate_put_parameters(self, inMemoryDataset: Any, ref: DatasetRef) -> None:
169 """Validate the supplied arguments for put.
171 Parameters
172 ----------
173 inMemoryDataset : `object`
174 The dataset to store.
175 ref : `DatasetRef`
176 Reference to the associated Dataset.
177 """
178 storageClass = ref.datasetType.storageClass
180 # Sanity check
181 if not isinstance(inMemoryDataset, storageClass.pytype): 181 ↛ 182line 181 didn't jump to line 182, because the condition on line 181 was never true
182 raise TypeError(
183 f"Inconsistency between supplied object ({type(inMemoryDataset)}) "
184 f"and storage class type ({storageClass.pytype})"
185 )
187 # Confirm that we can accept this dataset
188 if not self.constraints.isAcceptable(ref):
189 # Raise rather than use boolean return value.
190 raise DatasetTypeNotSupportedError(
191 f"Dataset {ref} has been rejected by this datastore via configuration."
192 )
194 return
196 def remove(self, ref: DatasetRef) -> None:
197 """Indicate to the Datastore that a dataset can be removed.
199 .. warning::
201 This method deletes the artifact associated with this
202 dataset and can not be reversed.
204 Parameters
205 ----------
206 ref : `DatasetRef`
207 Reference to the required Dataset.
209 Raises
210 ------
211 FileNotFoundError
212 Attempt to remove a dataset that does not exist.
214 Notes
215 -----
216 This method is used for immediate removal of a dataset and is
217 generally reserved for internal testing of datastore APIs.
218 It is implemented by calling `trash()` and then immediately calling
219 `emptyTrash()`. This call is meant to be immediate so errors
220 encountered during removal are not ignored.
221 """
222 self.trash(ref, ignore_errors=False)
223 self.emptyTrash(ignore_errors=False)
225 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None:
226 """Retrieve a dataset from an input `Datastore`,
227 and store the result in this `Datastore`.
229 Parameters
230 ----------
231 inputDatastore : `Datastore`
232 The external `Datastore` from which to retreive the Dataset.
233 ref : `DatasetRef`
234 Reference to the required dataset in the input data store.
236 """
237 assert inputDatastore is not self # unless we want it for renames?
238 inMemoryDataset = inputDatastore.get(ref)
239 return self.put(inMemoryDataset, ref)