Coverage for python/lsst/daf/butler/registry/interfaces/_bridge.py: 68%
79 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-16 02:09 -0700
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-16 02:09 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef")
25from abc import ABC, abstractmethod
26from typing import TYPE_CHECKING, Any, ContextManager, Iterable, Optional, Set, Tuple, Type, Union
28from lsst.utils.classes import immutable
30from ...core import DatasetId, DatasetRef
31from ._versioning import VersionedExtension
33if TYPE_CHECKING: 33 ↛ 34line 33 didn't jump to line 34, because the condition on line 33 was never true
34 from ...core import DatasetType, DimensionUniverse, StoredDatastoreItemInfo
35 from ._database import Database, StaticTablesContext
36 from ._datasets import DatasetRecordStorageManager
37 from ._opaque import OpaqueTableStorage, OpaqueTableStorageManager
40@immutable
41class FakeDatasetRef:
42 """A fake `DatasetRef` that can be used internally by butler where
43 only the dataset ID is available.
45 Should only be used when registry can not be used to create a full
46 `DatasetRef` from the ID. A particular use case is during dataset
47 deletion when solely the ID is available.
49 Parameters
50 ----------
51 id : `DatasetId`
52 The dataset ID.
53 """
55 __slots__ = ("id",)
57 def __init__(self, id: DatasetId):
58 self.id = id
60 def __str__(self) -> str:
61 return f"dataset_id={self.id}"
63 def __repr__(self) -> str:
64 return f"FakeDatasetRef({self.id})"
66 def __eq__(self, other: Any) -> bool:
67 try:
68 return self.id == other.id
69 except AttributeError:
70 return NotImplemented
72 def __hash__(self) -> int:
73 return hash(self.id)
75 id: DatasetId
76 """Unique identifier for this dataset.
77 """
79 def getCheckedId(self) -> DatasetId:
80 """Return ``self.id``.
82 This trivial method exists for compatibility with `DatasetRef`, for
83 which checking is actually done.
85 Returns
86 -------
87 id : `DatasetId`
88 ``self.id``.
89 """
90 return self.id
92 @property
93 def datasetType(self) -> DatasetType:
94 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType")
97DatasetIdRef = Union[DatasetRef, FakeDatasetRef]
98"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`.
99"""
102class DatastoreRegistryBridge(ABC):
103 """An abstract base class that defines the interface that a `Datastore`
104 uses to communicate with a `Registry`.
106 Parameters
107 ----------
108 datastoreName : `str`
109 Name of the `Datastore` as it should appear in `Registry` tables
110 referencing it.
111 """
113 def __init__(self, datastoreName: str):
114 self.datastoreName = datastoreName
116 @abstractmethod
117 def insert(self, refs: Iterable[DatasetIdRef]) -> None:
118 """Record that a datastore holds the given datasets.
120 Parameters
121 ----------
122 refs : `Iterable` of `DatasetIdRef`
123 References to the datasets.
125 Raises
126 ------
127 AmbiguousDatasetError
128 Raised if ``any(ref.id is None for ref in refs)``.
129 """
130 raise NotImplementedError()
132 @abstractmethod
133 def forget(self, refs: Iterable[DatasetIdRef]) -> None:
134 """Remove dataset location information without any attempt to put it
135 in the trash while waiting for external deletes.
137 This should be used only to implement `Datastore.forget`, or in cases
138 where deleting the actual datastore artifacts cannot fail.
140 Parameters
141 ----------
142 refs : `Iterable` of `DatasetIdRef`
143 References to the datasets.
145 Raises
146 ------
147 AmbiguousDatasetError
148 Raised if ``any(ref.id is None for ref in refs)``.
149 """
150 raise NotImplementedError()
152 @abstractmethod
153 def moveToTrash(self, refs: Iterable[DatasetIdRef]) -> None:
154 """Move dataset location information to trash.
156 Parameters
157 ----------
158 refs : `Iterable` of `DatasetIdRef`
159 References to the datasets.
161 Raises
162 ------
163 AmbiguousDatasetError
164 Raised if ``any(ref.id is None for ref in refs)``.
165 """
166 raise NotImplementedError()
168 @abstractmethod
169 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]:
170 """Check which refs are listed for this datastore.
172 Parameters
173 ----------
174 refs : `~collections.abc.Iterable` of `DatasetIdRef`
175 References to the datasets.
177 Returns
178 -------
179 present : `Iterable` [ `DatasetIdRef` ]
180 Datasets from ``refs`` that are recorded as being in this
181 datastore.
183 Raises
184 ------
185 AmbiguousDatasetError
186 Raised if ``any(ref.id is None for ref in refs)``.
187 """
188 raise NotImplementedError()
190 @abstractmethod
191 def emptyTrash(
192 self,
193 records_table: Optional[OpaqueTableStorage] = None,
194 record_class: Optional[Type[StoredDatastoreItemInfo]] = None,
195 record_column: Optional[str] = None,
196 ) -> ContextManager[
197 Tuple[Iterable[Tuple[DatasetIdRef, Optional[StoredDatastoreItemInfo]]], Optional[Set[str]]]
198 ]:
199 """Retrieve all the dataset ref IDs that are in the trash
200 associated for this datastore, and then remove them if the context
201 exists without an exception being raised.
203 Parameters
204 ----------
205 records_table : `OpaqueTableStorage`, optional
206 Table of records to query with the trash records.
207 record_class : `type` of `StoredDatastoreItemInfo`, optional
208 Class to use when reading records from ``records_table``.
209 record_column : `str`, optional
210 Name of the column in records_table that refers to the artifact.
212 Yields
213 ------
214 matches : iterable of (`DatasetIdRef`, `StoredDatastoreItemInfo`)
215 The IDs of datasets that can be safely removed from this datastore
216 and the corresponding information from the records table.
217 Can be empty.
218 artifacts_to_keep : `set` of `str`, optional
219 Any external artifacts that are known to the table but which should
220 not be deleted. If `None`, the caller should check themselves.
222 Examples
223 --------
224 Typical usage by a Datastore is something like::
226 with self.bridge.emptyTrash() as trashed:
227 iter, to_keep = trashed
228 for ref, info in iter:
229 # Remove artifacts associated with id,
230 # raise an exception if something goes wrong.
232 Notes
233 -----
234 The object yielded by the context manager may be a single-pass
235 iterator. If multiple passes are required, it should be converted to
236 a `list` or other container.
238 Datastores should never raise (except perhaps in testing) when an
239 artifact cannot be removed only because it is already gone - this
240 condition is an unavoidable outcome of concurrent delete operations,
241 and must not be considered and error for those to be safe.
243 If a table record is provided the trashed records will be deleted
244 when the context manager completes.
245 """
246 raise NotImplementedError()
248 datastoreName: str
249 """The name of the `Datastore` as it should appear in `Registry` tables
250 (`str`).
251 """
254class DatastoreRegistryBridgeManager(VersionedExtension):
255 """An abstract base class that defines the interface between `Registry`
256 and `Datastore` when a new `Datastore` is constructed.
258 Parameters
259 ----------
260 opaque : `OpaqueTableStorageManager`
261 Manager object for opaque table storage in the `Registry`.
262 universe : `DimensionUniverse`
263 All dimensions know to the `Registry`.
264 datasetIdColumnType : `type`
265 Type for dataset ID column.
267 Notes
268 -----
269 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at
270 construction, and should use it to obtain and keep any of the following:
272 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is
273 present in the datastore (needed by all datastores that are not just
274 forwarders);
276 - one or more `OpaqueTableStorage` instance if they wish to store internal
277 records in the `Registry` database;
279 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate
280 filename templates.
282 """
284 def __init__(
285 self, *, opaque: OpaqueTableStorageManager, universe: DimensionUniverse, datasetIdColumnType: type
286 ):
287 self.opaque = opaque
288 self.universe = universe
289 self.datasetIdColumnType = datasetIdColumnType
291 @classmethod
292 @abstractmethod
293 def initialize(
294 cls,
295 db: Database,
296 context: StaticTablesContext,
297 *,
298 opaque: OpaqueTableStorageManager,
299 datasets: Type[DatasetRecordStorageManager],
300 universe: DimensionUniverse,
301 ) -> DatastoreRegistryBridgeManager:
302 """Construct an instance of the manager.
304 Parameters
305 ----------
306 db : `Database`
307 Interface to the underlying database engine and namespace.
308 context : `StaticTablesContext`
309 Context object obtained from `Database.declareStaticTables`; used
310 to declare any tables that should always be present in a layer
311 implemented with this manager.
312 opaque : `OpaqueTableStorageManager`
313 Registry manager object for opaque (to Registry) tables, provided
314 to allow Datastores to store their internal information inside the
315 Registry database.
316 datasets : subclass of `DatasetRecordStorageManager`
317 Concrete class that will be used to manage the core dataset tables
318 in this registry; should be used only to create foreign keys to
319 those tables.
320 universe : `DimensionUniverse`
321 All dimensions known to the registry.
323 Returns
324 -------
325 manager : `DatastoreRegistryBridgeManager`
326 An instance of a concrete `DatastoreRegistryBridgeManager`
327 subclass.
328 """
329 raise NotImplementedError()
331 @abstractmethod
332 def refresh(self) -> None:
333 """Ensure all other operations on this manager are aware of any
334 collections that may have been registered by other clients since it
335 was initialized or last refreshed.
336 """
337 raise NotImplementedError()
339 @abstractmethod
340 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge:
341 """Register a new `Datastore` associated with this `Registry`.
343 This method should be called by all `Datastore` classes aside from
344 those that only forward storage to other datastores.
346 Parameters
347 ----------
348 name : `str`
349 Name of the datastore, as it should appear in `Registry` tables.
350 ephemeral : `bool`, optional
351 If `True` (`False` is default), return a bridge object that is
352 backed by storage that will not last past the end of the current
353 process. This should be used whenever the same is true of the
354 dataset's artifacts.
356 Returns
357 -------
358 bridge : `DatastoreRegistryBridge`
359 Object that provides the interface this `Datastore` should use to
360 communicate with the `Registry`.
361 """
362 raise NotImplementedError()
364 @abstractmethod
365 def findDatastores(self, ref: DatasetIdRef) -> Iterable[str]:
366 """Retrieve datastore locations for a given dataset.
368 Parameters
369 ----------
370 ref : `DatasetIdRef`
371 A reference to the dataset for which to retrieve storage
372 information.
374 Returns
375 -------
376 datastores : `Iterable` [ `str` ]
377 All the matching datastores holding this dataset. Empty if the
378 dataset does not exist anywhere.
380 Raises
381 ------
382 AmbiguousDatasetError
383 Raised if ``ref.id`` is `None`.
384 """
385 raise NotImplementedError()
387 opaque: OpaqueTableStorageManager
388 """Registry manager object for opaque (to Registry) tables, provided
389 to allow Datastores to store their internal information inside the
390 Registry database.
391 """
393 universe: DimensionUniverse
394 """All dimensions known to the `Registry`.
395 """