Coverage for python/lsst/daf/butler/datastore/generic_base.py: 43%
58 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-12 09:44 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-12 09:44 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Generic datastore code useful for most datastores."""
30from __future__ import annotations
32__all__ = ("GenericBaseDatastore",)
34import logging
35from abc import abstractmethod
36from collections.abc import Iterable, Mapping, Sequence
37from typing import TYPE_CHECKING, Any
39from .._exceptions import DatasetTypeNotSupportedError
40from ..registry.interfaces import DatabaseInsertMode, DatastoreRegistryBridge
41from ._datastore import Datastore
43if TYPE_CHECKING:
44 from .._dataset_ref import DatasetRef
45 from .._storage_class import StorageClass
46 from .stored_file_info import StoredDatastoreItemInfo
48log = logging.getLogger(__name__)
51class GenericBaseDatastore(Datastore):
52 """Methods useful for most implementations of a `Datastore`.
54 Should always be sub-classed since key abstract methods are missing.
55 """
57 @property
58 @abstractmethod
59 def bridge(self) -> DatastoreRegistryBridge:
60 """Object that manages the interface between this `Datastore` and the
61 `Registry` (`DatastoreRegistryBridge`).
62 """
63 raise NotImplementedError()
65 @abstractmethod
66 def addStoredItemInfo(
67 self,
68 refs: Iterable[DatasetRef],
69 infos: Iterable[Any],
70 insert_mode: DatabaseInsertMode = DatabaseInsertMode.INSERT,
71 ) -> None:
72 """Record internal storage information associated with one or more
73 datasets.
75 Parameters
76 ----------
77 refs : sequence of `DatasetRef`
78 The datasets that have been stored.
79 infos : sequence of `StoredDatastoreItemInfo`
80 Metadata associated with the stored datasets.
81 insert_mode : `~lsst.daf.butler.registry.interfaces.DatabaseInsertMode`
82 Mode to use to insert the new records into the table. The
83 options are ``INSERT`` (error if pre-existing), ``REPLACE``
84 (replace content with new values), and ``ENSURE`` (skip if the row
85 already exists).
86 """
87 raise NotImplementedError()
89 @abstractmethod
90 def getStoredItemsInfo(self, ref: DatasetRef) -> Sequence[Any]:
91 """Retrieve information associated with files stored in this
92 `Datastore` associated with this dataset ref.
94 Parameters
95 ----------
96 ref : `DatasetRef`
97 The dataset that is to be queried.
99 Returns
100 -------
101 items : `list` [`StoredDatastoreItemInfo`]
102 Stored information about the files and associated formatters
103 associated with this dataset. Only one file will be returned
104 if the dataset has not been disassembled. Can return an empty
105 list if no matching datasets can be found.
106 """
107 raise NotImplementedError()
109 @abstractmethod
110 def removeStoredItemInfo(self, ref: DatasetRef) -> None:
111 """Remove information about the file associated with this dataset.
113 Parameters
114 ----------
115 ref : `DatasetRef`
116 The dataset that has been removed.
117 """
118 raise NotImplementedError()
120 def _register_datasets(
121 self,
122 refsAndInfos: Iterable[tuple[DatasetRef, StoredDatastoreItemInfo]],
123 insert_mode: DatabaseInsertMode = DatabaseInsertMode.INSERT,
124 ) -> None:
125 """Update registry to indicate that one or more datasets have been
126 stored.
128 Parameters
129 ----------
130 refsAndInfos : sequence `tuple` [`DatasetRef`,
131 `StoredDatastoreItemInfo`]
132 Datasets to register and the internal datastore metadata associated
133 with them.
134 insert_mode : `str`, optional
135 Indicate whether the new records should be new ("insert", default),
136 or allowed to exists ("ensure") or be replaced if already present
137 ("replace").
138 """
139 expandedRefs: list[DatasetRef] = []
140 expandedItemInfos = []
142 for ref, itemInfo in refsAndInfos:
143 expandedRefs.append(ref)
144 expandedItemInfos.append(itemInfo)
146 # Dataset location only cares about registry ID so if we have
147 # disassembled in datastore we have to deduplicate. Since they
148 # will have different datasetTypes we can't use a set
149 registryRefs = {r.id: r for r in expandedRefs}
150 if insert_mode == DatabaseInsertMode.INSERT:
151 self.bridge.insert(registryRefs.values())
152 else:
153 # There are only two columns and all that matters is the
154 # dataset ID.
155 self.bridge.ensure(registryRefs.values())
156 self.addStoredItemInfo(expandedRefs, expandedItemInfos, insert_mode=insert_mode)
158 def _post_process_get(
159 self,
160 inMemoryDataset: Any,
161 readStorageClass: StorageClass,
162 assemblerParams: Mapping[str, Any] | None = None,
163 isComponent: bool = False,
164 ) -> Any:
165 """Given the Python object read from the datastore, manipulate
166 it based on the supplied parameters and ensure the Python
167 type is correct.
169 Parameters
170 ----------
171 inMemoryDataset : `object`
172 Dataset to check.
173 readStorageClass: `StorageClass`
174 The `StorageClass` used to obtain the assembler and to
175 check the python type.
176 assemblerParams : `dict`, optional
177 Parameters to pass to the assembler. Can be `None`.
178 isComponent : `bool`, optional
179 If this is a component, allow the inMemoryDataset to be `None`.
180 """
181 # Process any left over parameters
182 if assemblerParams:
183 inMemoryDataset = readStorageClass.delegate().handleParameters(inMemoryDataset, assemblerParams)
185 # Validate the returned data type matches the expected data type
186 pytype = readStorageClass.pytype
188 allowedTypes = []
189 if pytype:
190 allowedTypes.append(pytype)
192 # Special case components to allow them to be None
193 if isComponent:
194 allowedTypes.append(type(None))
196 if allowedTypes and not isinstance(inMemoryDataset, tuple(allowedTypes)):
197 inMemoryDataset = readStorageClass.coerce_type(inMemoryDataset)
199 return inMemoryDataset
201 def _validate_put_parameters(self, inMemoryDataset: Any, ref: DatasetRef) -> None:
202 """Validate the supplied arguments for put.
204 Parameters
205 ----------
206 inMemoryDataset : `object`
207 The dataset to store.
208 ref : `DatasetRef`
209 Reference to the associated Dataset.
210 """
211 storageClass = ref.datasetType.storageClass
213 # Sanity check
214 if not isinstance(inMemoryDataset, storageClass.pytype):
215 raise TypeError(
216 f"Inconsistency between supplied object ({type(inMemoryDataset)}) "
217 f"and storage class type ({storageClass.pytype})"
218 )
220 # Confirm that we can accept this dataset
221 if not self.constraints.isAcceptable(ref):
222 # Raise rather than use boolean return value.
223 raise DatasetTypeNotSupportedError(
224 f"Dataset {ref} has been rejected by this datastore via configuration."
225 )
227 return
229 def remove(self, ref: DatasetRef) -> None:
230 """Indicate to the Datastore that a dataset can be removed.
232 .. warning::
234 This method deletes the artifact associated with this
235 dataset and can not be reversed.
237 Parameters
238 ----------
239 ref : `DatasetRef`
240 Reference to the required Dataset.
242 Raises
243 ------
244 FileNotFoundError
245 Attempt to remove a dataset that does not exist.
247 Notes
248 -----
249 This method is used for immediate removal of a dataset and is
250 generally reserved for internal testing of datastore APIs.
251 It is implemented by calling `trash()` and then immediately calling
252 `emptyTrash()`. This call is meant to be immediate so errors
253 encountered during removal are not ignored.
254 """
255 self.trash(ref, ignore_errors=False)
256 self.emptyTrash(ignore_errors=False)
258 def transfer(self, inputDatastore: Datastore, ref: DatasetRef) -> None:
259 """Retrieve a dataset from an input `Datastore`,
260 and store the result in this `Datastore`.
262 Parameters
263 ----------
264 inputDatastore : `Datastore`
265 The external `Datastore` from which to retreive the Dataset.
266 ref : `DatasetRef`
267 Reference to the required dataset in the input data store.
269 """
270 assert inputDatastore is not self # unless we want it for renames?
271 inMemoryDataset = inputDatastore.get(ref)
272 return self.put(inMemoryDataset, ref)