Coverage for python/lsst/daf/butler/datastores/genericDatastore.py: 43%
58 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:14 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:14 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22"""Generic datastore code useful for most datastores."""
24from __future__ import annotations
26__all__ = ("GenericBaseDatastore",)
28import logging
29from abc import abstractmethod
30from collections.abc import Iterable, Mapping, Sequence
31from typing import TYPE_CHECKING, Any
33from lsst.daf.butler import DatasetTypeNotSupportedError, Datastore
34from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridge
36from ..registry.interfaces import DatabaseInsertMode
38if TYPE_CHECKING:
39 from lsst.daf.butler import DatasetRef, StorageClass, StoredDatastoreItemInfo
41log = logging.getLogger(__name__)
44class GenericBaseDatastore(Datastore):
45 """Methods useful for most implementations of a `Datastore`.
47 Should always be sub-classed since key abstract methods are missing.
48 """
50 @property
51 @abstractmethod
52 def bridge(self) -> DatastoreRegistryBridge:
53 """Object that manages the interface between this `Datastore` and the
54 `Registry` (`DatastoreRegistryBridge`).
55 """
56 raise NotImplementedError()
58 @abstractmethod
59 def addStoredItemInfo(
60 self,
61 refs: Iterable[DatasetRef],
62 infos: Iterable[Any],
63 insert_mode: DatabaseInsertMode = DatabaseInsertMode.INSERT,
64 ) -> None:
65 """Record internal storage information associated with one or more
66 datasets.
68 Parameters
69 ----------
70 refs : sequence of `DatasetRef`
71 The datasets that have been stored.
72 infos : sequence of `StoredDatastoreItemInfo`
73 Metadata associated with the stored datasets.
74 insert_mode : `~lsst.daf.butler.registry.interfaces.DatabaseInsertMode`
75 Mode to use to insert the new records into the table. The
76 options are ``INSERT`` (error if pre-existing), ``REPLACE``
77 (replace content with new values), and ``ENSURE`` (skip if the row
78 already exists).
79 """
80 raise NotImplementedError()
82 @abstractmethod
83 def getStoredItemsInfo(self, ref: DatasetRef) -> Sequence[Any]:
84 """Retrieve information associated with files stored in this
85 `Datastore` associated with this dataset ref.
87 Parameters
88 ----------
89 ref : `DatasetRef`
90 The dataset that is to be queried.
92 Returns
93 -------
94 items : `list` [`StoredDatastoreItemInfo`]
95 Stored information about the files and associated formatters
96 associated with this dataset. Only one file will be returned
97 if the dataset has not been disassembled. Can return an empty
98 list if no matching datasets can be found.
99 """
100 raise NotImplementedError()
102 @abstractmethod
103 def removeStoredItemInfo(self, ref: DatasetRef) -> None:
104 """Remove information about the file associated with this dataset.
106 Parameters
107 ----------
108 ref : `DatasetRef`
109 The dataset that has been removed.
110 """
111 raise NotImplementedError()
113 def _register_datasets(
114 self,
115 refsAndInfos: Iterable[tuple[DatasetRef, StoredDatastoreItemInfo]],
116 insert_mode: DatabaseInsertMode = DatabaseInsertMode.INSERT,
117 ) -> None:
118 """Update registry to indicate that one or more datasets have been
119 stored.
121 Parameters
122 ----------
123 refsAndInfos : sequence `tuple` [`DatasetRef`,
124 `StoredDatastoreItemInfo`]
125 Datasets to register and the internal datastore metadata associated
126 with them.
127 insert_mode : `str`, optional
128 Indicate whether the new records should be new ("insert", default),
129 or allowed to exists ("ensure") or be replaced if already present
130 ("replace").
131 """
132 expandedRefs: list[DatasetRef] = []
133 expandedItemInfos = []
135 for ref, itemInfo in refsAndInfos:
136 expandedRefs.append(ref)
137 expandedItemInfos.append(itemInfo)
139 # Dataset location only cares about registry ID so if we have
140 # disassembled in datastore we have to deduplicate. Since they
141 # will have different datasetTypes we can't use a set
142 registryRefs = {r.id: r for r in expandedRefs}
143 if insert_mode == DatabaseInsertMode.INSERT:
144 self.bridge.insert(registryRefs.values())
145 else:
146 # There are only two columns and all that matters is the
147 # dataset ID.
148 self.bridge.ensure(registryRefs.values())
149 self.addStoredItemInfo(expandedRefs, expandedItemInfos, insert_mode=insert_mode)
151 def _post_process_get(
152 self,
153 inMemoryDataset: Any,
154 readStorageClass: StorageClass,
155 assemblerParams: Mapping[str, Any] | None = None,
156 isComponent: bool = False,
157 ) -> Any:
158 """Given the Python object read from the datastore, manipulate
159 it based on the supplied parameters and ensure the Python
160 type is correct.
162 Parameters
163 ----------
164 inMemoryDataset : `object`
165 Dataset to check.
166 readStorageClass: `StorageClass`
167 The `StorageClass` used to obtain the assembler and to
168 check the python type.
169 assemblerParams : `dict`, optional
170 Parameters to pass to the assembler. Can be `None`.
171 isComponent : `bool`, optional
172 If this is a component, allow the inMemoryDataset to be `None`.
173 """
174 # Process any left over parameters
175 if assemblerParams:
176 inMemoryDataset = readStorageClass.delegate().handleParameters(inMemoryDataset, assemblerParams)
178 # Validate the returned data type matches the expected data type
179 pytype = readStorageClass.pytype
181 allowedTypes = []
182 if pytype:
183 allowedTypes.append(pytype)
185 # Special case components to allow them to be None
186 if isComponent:
187 allowedTypes.append(type(None))
189 if allowedTypes and not isinstance(inMemoryDataset, tuple(allowedTypes)):
190 inMemoryDataset = readStorageClass.coerce_type(inMemoryDataset)
192 return inMemoryDataset
194 def _validate_put_parameters(self, inMemoryDataset: Any, ref: DatasetRef) -> None:
195 """Validate the supplied arguments for put.
197 Parameters
198 ----------
199 inMemoryDataset : `object`
200 The dataset to store.
201 ref : `DatasetRef`
202 Reference to the associated Dataset.
203 """
204 storageClass = ref.datasetType.storageClass
206 # Sanity check
207 if not isinstance(inMemoryDataset, storageClass.pytype):
208 raise TypeError(
209 f"Inconsistency between supplied object ({type(inMemoryDataset)}) "
210 f"and storage class type ({storageClass.pytype})"
211 )
213 # Confirm that we can accept this dataset
214 if not self.constraints.isAcceptable(ref):
215 # Raise rather than use boolean return value.
216 raise DatasetTypeNotSupportedError(
217 f"Dataset {ref} has been rejected by this datastore via configuration."
218 )
220 return
222 def remove(self, ref: DatasetRef) -> None:
223 """Indicate to the Datastore that a dataset can be removed.
225 .. warning::
227 This method deletes the artifact associated with this
228 dataset and can not be reversed.
230 Parameters
231 ----------
232 ref : `DatasetRef`
233 Reference to the required Dataset.
235 Raises
236 ------
237 FileNotFoundError
238 Attempt to remove a dataset that does not exist.
240 Notes
241 -----
242 This method is used for immediate removal of a dataset and is
243 generally reserved for internal testing of datastore APIs.
244 It is implemented by calling `trash()` and then immediately calling
245 `emptyTrash()`. This call is meant to be immediate so errors
246 encountered during removal are not ignored.
247 """
248 self.trash(ref, ignore_errors=False)
249 self.emptyTrash(ignore_errors=False)
251 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None:
252 """Retrieve a dataset from an input `Datastore`,
253 and store the result in this `Datastore`.
255 Parameters
256 ----------
257 inputDatastore : `Datastore`
258 The external `Datastore` from which to retreive the Dataset.
259 ref : `DatasetRef`
260 Reference to the required dataset in the input data store.
262 """
263 assert inputDatastore is not self # unless we want it for renames?
264 inMemoryDataset = inputDatastore.get(ref)
265 return self.put(inMemoryDataset, ref)