Coverage for python/lsst/daf/butler/datastores/genericDatastore.py: 35%
55 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-28 10:10 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-28 10:10 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24"""Generic datastore code useful for most datastores."""
26__all__ = ("GenericBaseDatastore",)
28import logging
29from abc import abstractmethod
30from collections.abc import Iterable, Mapping, Sequence
31from typing import TYPE_CHECKING, Any
33from lsst.daf.butler import DatasetTypeNotSupportedError, Datastore
34from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridge
36if TYPE_CHECKING:
37 from lsst.daf.butler import DatasetRef, StorageClass, StoredDatastoreItemInfo
39log = logging.getLogger(__name__)
42class GenericBaseDatastore(Datastore):
43 """Methods useful for most implementations of a `Datastore`.
45 Should always be sub-classed since key abstract methods are missing.
46 """
48 @property
49 @abstractmethod
50 def bridge(self) -> DatastoreRegistryBridge:
51 """Object that manages the interface between this `Datastore` and the
52 `Registry` (`DatastoreRegistryBridge`).
53 """
54 raise NotImplementedError()
56 @abstractmethod
57 def addStoredItemInfo(self, refs: Iterable[DatasetRef], infos: Iterable[Any]) -> None:
58 """Record internal storage information associated with one or more
59 datasets.
61 Parameters
62 ----------
63 refs : sequence of `DatasetRef`
64 The datasets that have been stored.
65 infos : sequence of `StoredDatastoreItemInfo`
66 Metadata associated with the stored datasets.
67 """
68 raise NotImplementedError()
70 @abstractmethod
71 def getStoredItemsInfo(self, ref: DatasetRef) -> Sequence[Any]:
72 """Retrieve information associated with files stored in this
73 `Datastore` associated with this dataset ref.
75 Parameters
76 ----------
77 ref : `DatasetRef`
78 The dataset that is to be queried.
80 Returns
81 -------
82 items : `list` [`StoredDatastoreItemInfo`]
83 Stored information about the files and associated formatters
84 associated with this dataset. Only one file will be returned
85 if the dataset has not been disassembled. Can return an empty
86 list if no matching datasets can be found.
87 """
88 raise NotImplementedError()
90 @abstractmethod
91 def removeStoredItemInfo(self, ref: DatasetRef) -> None:
92 """Remove information about the file associated with this dataset.
94 Parameters
95 ----------
96 ref : `DatasetRef`
97 The dataset that has been removed.
98 """
99 raise NotImplementedError()
101 def _register_datasets(self, refsAndInfos: Iterable[tuple[DatasetRef, StoredDatastoreItemInfo]]) -> None:
102 """Update registry to indicate that one or more datasets have been
103 stored.
105 Parameters
106 ----------
107 refsAndInfos : sequence `tuple` [`DatasetRef`,
108 `StoredDatastoreItemInfo`]
109 Datasets to register and the internal datastore metadata associated
110 with them.
111 """
112 expandedRefs: list[DatasetRef] = []
113 expandedItemInfos = []
115 for ref, itemInfo in refsAndInfos:
116 expandedRefs.append(ref)
117 expandedItemInfos.append(itemInfo)
119 # Dataset location only cares about registry ID so if we have
120 # disassembled in datastore we have to deduplicate. Since they
121 # will have different datasetTypes we can't use a set
122 registryRefs = {r.id: r for r in expandedRefs}
123 self.bridge.insert(registryRefs.values())
124 self.addStoredItemInfo(expandedRefs, expandedItemInfos)
126 def _post_process_get(
127 self,
128 inMemoryDataset: Any,
129 readStorageClass: StorageClass,
130 assemblerParams: Mapping[str, Any] | None = None,
131 isComponent: bool = False,
132 ) -> Any:
133 """Given the Python object read from the datastore, manipulate
134 it based on the supplied parameters and ensure the Python
135 type is correct.
137 Parameters
138 ----------
139 inMemoryDataset : `object`
140 Dataset to check.
141 readStorageClass: `StorageClass`
142 The `StorageClass` used to obtain the assembler and to
143 check the python type.
144 assemblerParams : `dict`, optional
145 Parameters to pass to the assembler. Can be `None`.
146 isComponent : `bool`, optional
147 If this is a component, allow the inMemoryDataset to be `None`.
148 """
149 # Process any left over parameters
150 if assemblerParams:
151 inMemoryDataset = readStorageClass.delegate().handleParameters(inMemoryDataset, assemblerParams)
153 # Validate the returned data type matches the expected data type
154 pytype = readStorageClass.pytype
156 allowedTypes = []
157 if pytype:
158 allowedTypes.append(pytype)
160 # Special case components to allow them to be None
161 if isComponent:
162 allowedTypes.append(type(None))
164 if allowedTypes and not isinstance(inMemoryDataset, tuple(allowedTypes)):
165 inMemoryDataset = readStorageClass.coerce_type(inMemoryDataset)
167 return inMemoryDataset
169 def _validate_put_parameters(self, inMemoryDataset: Any, ref: DatasetRef) -> None:
170 """Validate the supplied arguments for put.
172 Parameters
173 ----------
174 inMemoryDataset : `object`
175 The dataset to store.
176 ref : `DatasetRef`
177 Reference to the associated Dataset.
178 """
179 storageClass = ref.datasetType.storageClass
181 # Sanity check
182 if not isinstance(inMemoryDataset, storageClass.pytype):
183 raise TypeError(
184 f"Inconsistency between supplied object ({type(inMemoryDataset)}) "
185 f"and storage class type ({storageClass.pytype})"
186 )
188 # Confirm that we can accept this dataset
189 if not self.constraints.isAcceptable(ref):
190 # Raise rather than use boolean return value.
191 raise DatasetTypeNotSupportedError(
192 f"Dataset {ref} has been rejected by this datastore via configuration."
193 )
195 return
197 def remove(self, ref: DatasetRef) -> None:
198 """Indicate to the Datastore that a dataset can be removed.
200 .. warning::
202 This method deletes the artifact associated with this
203 dataset and can not be reversed.
205 Parameters
206 ----------
207 ref : `DatasetRef`
208 Reference to the required Dataset.
210 Raises
211 ------
212 FileNotFoundError
213 Attempt to remove a dataset that does not exist.
215 Notes
216 -----
217 This method is used for immediate removal of a dataset and is
218 generally reserved for internal testing of datastore APIs.
219 It is implemented by calling `trash()` and then immediately calling
220 `emptyTrash()`. This call is meant to be immediate so errors
221 encountered during removal are not ignored.
222 """
223 self.trash(ref, ignore_errors=False)
224 self.emptyTrash(ignore_errors=False)
226 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None:
227 """Retrieve a dataset from an input `Datastore`,
228 and store the result in this `Datastore`.
230 Parameters
231 ----------
232 inputDatastore : `Datastore`
233 The external `Datastore` from which to retreive the Dataset.
234 ref : `DatasetRef`
235 Reference to the required dataset in the input data store.
237 """
238 assert inputDatastore is not self # unless we want it for renames?
239 inMemoryDataset = inputDatastore.get(ref)
240 return self.put(inMemoryDataset, ref)