Coverage for python/lsst/daf/butler/datastores/genericDatastore.py: 43%
58 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Generic datastore code useful for most datastores."""
30from __future__ import annotations
32__all__ = ("GenericBaseDatastore",)
34import logging
35from abc import abstractmethod
36from collections.abc import Iterable, Mapping, Sequence
37from typing import TYPE_CHECKING, Any
39from lsst.daf.butler import DatasetTypeNotSupportedError, Datastore
40from lsst.daf.butler.registry.interfaces import DatastoreRegistryBridge
42from ..registry.interfaces import DatabaseInsertMode
44if TYPE_CHECKING:
45 from lsst.daf.butler import DatasetRef, StorageClass, StoredDatastoreItemInfo
47log = logging.getLogger(__name__)
50class GenericBaseDatastore(Datastore):
51 """Methods useful for most implementations of a `Datastore`.
53 Should always be sub-classed since key abstract methods are missing.
54 """
56 @property
57 @abstractmethod
58 def bridge(self) -> DatastoreRegistryBridge:
59 """Object that manages the interface between this `Datastore` and the
60 `Registry` (`DatastoreRegistryBridge`).
61 """
62 raise NotImplementedError()
64 @abstractmethod
65 def addStoredItemInfo(
66 self,
67 refs: Iterable[DatasetRef],
68 infos: Iterable[Any],
69 insert_mode: DatabaseInsertMode = DatabaseInsertMode.INSERT,
70 ) -> None:
71 """Record internal storage information associated with one or more
72 datasets.
74 Parameters
75 ----------
76 refs : sequence of `DatasetRef`
77 The datasets that have been stored.
78 infos : sequence of `StoredDatastoreItemInfo`
79 Metadata associated with the stored datasets.
80 insert_mode : `~lsst.daf.butler.registry.interfaces.DatabaseInsertMode`
81 Mode to use to insert the new records into the table. The
82 options are ``INSERT`` (error if pre-existing), ``REPLACE``
83 (replace content with new values), and ``ENSURE`` (skip if the row
84 already exists).
85 """
86 raise NotImplementedError()
88 @abstractmethod
89 def getStoredItemsInfo(self, ref: DatasetRef) -> Sequence[Any]:
90 """Retrieve information associated with files stored in this
91 `Datastore` associated with this dataset ref.
93 Parameters
94 ----------
95 ref : `DatasetRef`
96 The dataset that is to be queried.
98 Returns
99 -------
100 items : `list` [`StoredDatastoreItemInfo`]
101 Stored information about the files and associated formatters
102 associated with this dataset. Only one file will be returned
103 if the dataset has not been disassembled. Can return an empty
104 list if no matching datasets can be found.
105 """
106 raise NotImplementedError()
108 @abstractmethod
109 def removeStoredItemInfo(self, ref: DatasetRef) -> None:
110 """Remove information about the file associated with this dataset.
112 Parameters
113 ----------
114 ref : `DatasetRef`
115 The dataset that has been removed.
116 """
117 raise NotImplementedError()
119 def _register_datasets(
120 self,
121 refsAndInfos: Iterable[tuple[DatasetRef, StoredDatastoreItemInfo]],
122 insert_mode: DatabaseInsertMode = DatabaseInsertMode.INSERT,
123 ) -> None:
124 """Update registry to indicate that one or more datasets have been
125 stored.
127 Parameters
128 ----------
129 refsAndInfos : sequence `tuple` [`DatasetRef`,
130 `StoredDatastoreItemInfo`]
131 Datasets to register and the internal datastore metadata associated
132 with them.
133 insert_mode : `str`, optional
134 Indicate whether the new records should be new ("insert", default),
135 or allowed to exists ("ensure") or be replaced if already present
136 ("replace").
137 """
138 expandedRefs: list[DatasetRef] = []
139 expandedItemInfos = []
141 for ref, itemInfo in refsAndInfos:
142 expandedRefs.append(ref)
143 expandedItemInfos.append(itemInfo)
145 # Dataset location only cares about registry ID so if we have
146 # disassembled in datastore we have to deduplicate. Since they
147 # will have different datasetTypes we can't use a set
148 registryRefs = {r.id: r for r in expandedRefs}
149 if insert_mode == DatabaseInsertMode.INSERT:
150 self.bridge.insert(registryRefs.values())
151 else:
152 # There are only two columns and all that matters is the
153 # dataset ID.
154 self.bridge.ensure(registryRefs.values())
155 self.addStoredItemInfo(expandedRefs, expandedItemInfos, insert_mode=insert_mode)
157 def _post_process_get(
158 self,
159 inMemoryDataset: Any,
160 readStorageClass: StorageClass,
161 assemblerParams: Mapping[str, Any] | None = None,
162 isComponent: bool = False,
163 ) -> Any:
164 """Given the Python object read from the datastore, manipulate
165 it based on the supplied parameters and ensure the Python
166 type is correct.
168 Parameters
169 ----------
170 inMemoryDataset : `object`
171 Dataset to check.
172 readStorageClass: `StorageClass`
173 The `StorageClass` used to obtain the assembler and to
174 check the python type.
175 assemblerParams : `dict`, optional
176 Parameters to pass to the assembler. Can be `None`.
177 isComponent : `bool`, optional
178 If this is a component, allow the inMemoryDataset to be `None`.
179 """
180 # Process any left over parameters
181 if assemblerParams:
182 inMemoryDataset = readStorageClass.delegate().handleParameters(inMemoryDataset, assemblerParams)
184 # Validate the returned data type matches the expected data type
185 pytype = readStorageClass.pytype
187 allowedTypes = []
188 if pytype:
189 allowedTypes.append(pytype)
191 # Special case components to allow them to be None
192 if isComponent:
193 allowedTypes.append(type(None))
195 if allowedTypes and not isinstance(inMemoryDataset, tuple(allowedTypes)):
196 inMemoryDataset = readStorageClass.coerce_type(inMemoryDataset)
198 return inMemoryDataset
200 def _validate_put_parameters(self, inMemoryDataset: Any, ref: DatasetRef) -> None:
201 """Validate the supplied arguments for put.
203 Parameters
204 ----------
205 inMemoryDataset : `object`
206 The dataset to store.
207 ref : `DatasetRef`
208 Reference to the associated Dataset.
209 """
210 storageClass = ref.datasetType.storageClass
212 # Sanity check
213 if not isinstance(inMemoryDataset, storageClass.pytype):
214 raise TypeError(
215 f"Inconsistency between supplied object ({type(inMemoryDataset)}) "
216 f"and storage class type ({storageClass.pytype})"
217 )
219 # Confirm that we can accept this dataset
220 if not self.constraints.isAcceptable(ref):
221 # Raise rather than use boolean return value.
222 raise DatasetTypeNotSupportedError(
223 f"Dataset {ref} has been rejected by this datastore via configuration."
224 )
226 return
228 def remove(self, ref: DatasetRef) -> None:
229 """Indicate to the Datastore that a dataset can be removed.
231 .. warning::
233 This method deletes the artifact associated with this
234 dataset and can not be reversed.
236 Parameters
237 ----------
238 ref : `DatasetRef`
239 Reference to the required Dataset.
241 Raises
242 ------
243 FileNotFoundError
244 Attempt to remove a dataset that does not exist.
246 Notes
247 -----
248 This method is used for immediate removal of a dataset and is
249 generally reserved for internal testing of datastore APIs.
250 It is implemented by calling `trash()` and then immediately calling
251 `emptyTrash()`. This call is meant to be immediate so errors
252 encountered during removal are not ignored.
253 """
254 self.trash(ref, ignore_errors=False)
255 self.emptyTrash(ignore_errors=False)
257 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None:
258 """Retrieve a dataset from an input `Datastore`,
259 and store the result in this `Datastore`.
261 Parameters
262 ----------
263 inputDatastore : `Datastore`
264 The external `Datastore` from which to retreive the Dataset.
265 ref : `DatasetRef`
266 Reference to the required dataset in the input data store.
268 """
269 assert inputDatastore is not self # unless we want it for renames?
270 inMemoryDataset = inputDatastore.get(ref)
271 return self.put(inMemoryDataset, ref)