Coverage for python/lsst/daf/butler/registry/interfaces/_bridge.py: 85%
68 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:14 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-25 15:14 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef")
25from abc import ABC, abstractmethod
26from collections.abc import Iterable
27from contextlib import AbstractContextManager
28from typing import TYPE_CHECKING, Any
30from lsst.utils.classes import immutable
32from ...core import DatasetId, DatasetRef
33from ._versioning import VersionedExtension, VersionTuple
35if TYPE_CHECKING:
36 from ...core import DatasetType, DimensionUniverse, StoredDatastoreItemInfo
37 from ...core.datastore import DatastoreTransaction
38 from ._database import Database, StaticTablesContext
39 from ._datasets import DatasetRecordStorageManager
40 from ._opaque import OpaqueTableStorage, OpaqueTableStorageManager
43@immutable
44class FakeDatasetRef:
45 """A fake `DatasetRef` that can be used internally by butler where
46 only the dataset ID is available.
48 Should only be used when registry can not be used to create a full
49 `DatasetRef` from the ID. A particular use case is during dataset
50 deletion when solely the ID is available.
52 Parameters
53 ----------
54 id : `DatasetId`
55 The dataset ID.
56 """
58 __slots__ = ("id",)
60 def __init__(self, id: DatasetId):
61 self.id = id
63 def __str__(self) -> str:
64 return f"dataset_id={self.id}"
66 def __repr__(self) -> str:
67 return f"FakeDatasetRef({self.id})"
69 def __eq__(self, other: Any) -> bool:
70 try:
71 return self.id == other.id
72 except AttributeError:
73 return NotImplemented
75 def __hash__(self) -> int:
76 return hash(self.id)
78 id: DatasetId
79 """Unique identifier for this dataset.
80 """
82 @property
83 def datasetType(self) -> DatasetType:
84 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType")
87DatasetIdRef = DatasetRef | FakeDatasetRef
88"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`.
89"""
92class DatastoreRegistryBridge(ABC):
93 """An abstract base class that defines the interface that a `Datastore`
94 uses to communicate with a `Registry`.
96 Parameters
97 ----------
98 datastoreName : `str`
99 Name of the `Datastore` as it should appear in `Registry` tables
100 referencing it.
101 """
103 def __init__(self, datastoreName: str):
104 self.datastoreName = datastoreName
106 @abstractmethod
107 def insert(self, refs: Iterable[DatasetIdRef]) -> None:
108 """Record that a datastore holds the given datasets.
110 Parameters
111 ----------
112 refs : `~collections.abc.Iterable` of `DatasetIdRef`
113 References to the datasets.
114 """
115 raise NotImplementedError()
117 @abstractmethod
118 def ensure(self, refs: Iterable[DatasetIdRef]) -> None:
119 """Record that a datastore holds the given datasets, skipping if
120 the ref is already registered.
122 Parameters
123 ----------
124 refs : `~collections.abc.Iterable` of `DatasetIdRef`
125 References to the datasets.
126 """
127 raise NotImplementedError()
129 @abstractmethod
130 def forget(self, refs: Iterable[DatasetIdRef]) -> None:
131 """Remove dataset location information without any attempt to put it
132 in the trash while waiting for external deletes.
134 This should be used only to implement `Datastore.forget`, or in cases
135 where deleting the actual datastore artifacts cannot fail.
137 Parameters
138 ----------
139 refs : `~collections.abc.Iterable` of `DatasetIdRef`
140 References to the datasets.
141 """
142 raise NotImplementedError()
144 @abstractmethod
145 def moveToTrash(self, refs: Iterable[DatasetIdRef], transaction: DatastoreTransaction | None) -> None:
146 """Move dataset location information to trash.
148 Parameters
149 ----------
150 refs : `~collections.abc.Iterable` of `DatasetIdRef`
151 References to the datasets.
152 transaction : `DatastoreTransaction` or `None`
153 Transaction object. Can be `None` in some bridges or if no rollback
154 is required.
155 """
156 raise NotImplementedError()
158 @abstractmethod
159 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]:
160 """Check which refs are listed for this datastore.
162 Parameters
163 ----------
164 refs : `~collections.abc.Iterable` of `DatasetIdRef`
165 References to the datasets.
167 Returns
168 -------
169 present : `~collections.abc.Iterable` [ `DatasetIdRef` ]
170 Datasets from ``refs`` that are recorded as being in this
171 datastore.
172 """
173 raise NotImplementedError()
175 @abstractmethod
176 def emptyTrash(
177 self,
178 records_table: OpaqueTableStorage | None = None,
179 record_class: type[StoredDatastoreItemInfo] | None = None,
180 record_column: str | None = None,
181 ) -> AbstractContextManager[
182 tuple[Iterable[tuple[DatasetIdRef, StoredDatastoreItemInfo | None]], set[str] | None]
183 ]:
184 """Retrieve all the dataset ref IDs that are in the trash
185 associated for this datastore, and then remove them if the context
186 exists without an exception being raised.
188 Parameters
189 ----------
190 records_table : `OpaqueTableStorage`, optional
191 Table of records to query with the trash records.
192 record_class : `type` of `StoredDatastoreItemInfo`, optional
193 Class to use when reading records from ``records_table``.
194 record_column : `str`, optional
195 Name of the column in records_table that refers to the artifact.
197 Yields
198 ------
199 matches : iterable of (`DatasetIdRef`, `StoredDatastoreItemInfo`)
200 The IDs of datasets that can be safely removed from this datastore
201 and the corresponding information from the records table.
202 Can be empty.
203 artifacts_to_keep : `set` of `str`, optional
204 Any external artifacts that are known to the table but which should
205 not be deleted. If `None`, the caller should check themselves.
207 Examples
208 --------
209 Typical usage by a Datastore is something like::
211 with self.bridge.emptyTrash() as trashed:
212 iter, to_keep = trashed
213 for ref, info in iter:
214 # Remove artifacts associated with id,
215 # raise an exception if something goes wrong.
217 Notes
218 -----
219 The object yielded by the context manager may be a single-pass
220 iterator. If multiple passes are required, it should be converted to
221 a `list` or other container.
223 Datastores should never raise (except perhaps in testing) when an
224 artifact cannot be removed only because it is already gone - this
225 condition is an unavoidable outcome of concurrent delete operations,
226 and must not be considered and error for those to be safe.
228 If a table record is provided the trashed records will be deleted
229 when the context manager completes.
230 """
231 raise NotImplementedError()
233 datastoreName: str
234 """The name of the `Datastore` as it should appear in `Registry` tables
235 (`str`).
236 """
239class DatastoreRegistryBridgeManager(VersionedExtension):
240 """An abstract base class that defines the interface between `Registry`
241 and `Datastore` when a new `Datastore` is constructed.
243 Parameters
244 ----------
245 opaque : `OpaqueTableStorageManager`
246 Manager object for opaque table storage in the `Registry`.
247 universe : `DimensionUniverse`
248 All dimensions know to the `Registry`.
249 datasetIdColumnType : `type`
250 Type for dataset ID column.
252 Notes
253 -----
254 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at
255 construction, and should use it to obtain and keep any of the following:
257 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is
258 present in the datastore (needed by all datastores that are not just
259 forwarders);
261 - one or more `OpaqueTableStorage` instance if they wish to store internal
262 records in the `Registry` database;
264 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate
265 filename templates.
267 """
269 def __init__(
270 self,
271 *,
272 opaque: OpaqueTableStorageManager,
273 universe: DimensionUniverse,
274 datasetIdColumnType: type,
275 registry_schema_version: VersionTuple | None = None,
276 ):
277 super().__init__(registry_schema_version=registry_schema_version)
278 self.opaque = opaque
279 self.universe = universe
280 self.datasetIdColumnType = datasetIdColumnType
282 @classmethod
283 @abstractmethod
284 def initialize(
285 cls,
286 db: Database,
287 context: StaticTablesContext,
288 *,
289 opaque: OpaqueTableStorageManager,
290 datasets: type[DatasetRecordStorageManager],
291 universe: DimensionUniverse,
292 registry_schema_version: VersionTuple | None = None,
293 ) -> DatastoreRegistryBridgeManager:
294 """Construct an instance of the manager.
296 Parameters
297 ----------
298 db : `Database`
299 Interface to the underlying database engine and namespace.
300 context : `StaticTablesContext`
301 Context object obtained from `Database.declareStaticTables`; used
302 to declare any tables that should always be present in a layer
303 implemented with this manager.
304 opaque : `OpaqueTableStorageManager`
305 Registry manager object for opaque (to Registry) tables, provided
306 to allow Datastores to store their internal information inside the
307 Registry database.
308 datasets : subclass of `DatasetRecordStorageManager`
309 Concrete class that will be used to manage the core dataset tables
310 in this registry; should be used only to create foreign keys to
311 those tables.
312 universe : `DimensionUniverse`
313 All dimensions known to the registry.
314 registry_schema_version : `VersionTuple` or `None`
315 Schema version of this extension as defined in registry.
317 Returns
318 -------
319 manager : `DatastoreRegistryBridgeManager`
320 An instance of a concrete `DatastoreRegistryBridgeManager`
321 subclass.
322 """
323 raise NotImplementedError()
325 @abstractmethod
326 def refresh(self) -> None:
327 """Ensure all other operations on this manager are aware of any
328 collections that may have been registered by other clients since it
329 was initialized or last refreshed.
330 """
331 raise NotImplementedError()
333 @abstractmethod
334 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge:
335 """Register a new `Datastore` associated with this `Registry`.
337 This method should be called by all `Datastore` classes aside from
338 those that only forward storage to other datastores.
340 Parameters
341 ----------
342 name : `str`
343 Name of the datastore, as it should appear in `Registry` tables.
344 ephemeral : `bool`, optional
345 If `True` (`False` is default), return a bridge object that is
346 backed by storage that will not last past the end of the current
347 process. This should be used whenever the same is true of the
348 dataset's artifacts.
350 Returns
351 -------
352 bridge : `DatastoreRegistryBridge`
353 Object that provides the interface this `Datastore` should use to
354 communicate with the `Registry`.
355 """
356 raise NotImplementedError()
358 @abstractmethod
359 def findDatastores(self, ref: DatasetIdRef) -> Iterable[str]:
360 """Retrieve datastore locations for a given dataset.
362 Parameters
363 ----------
364 ref : `DatasetIdRef`
365 A reference to the dataset for which to retrieve storage
366 information.
368 Returns
369 -------
370 datastores : `~collections.abc.Iterable` [ `str` ]
371 All the matching datastores holding this dataset. Empty if the
372 dataset does not exist anywhere.
374 Raises
375 ------
376 AmbiguousDatasetError
377 Raised if ``ref.id`` is `None`.
378 """
379 raise NotImplementedError()
381 opaque: OpaqueTableStorageManager
382 """Registry manager object for opaque (to Registry) tables, provided
383 to allow Datastores to store their internal information inside the
384 Registry database.
385 """
387 universe: DimensionUniverse
388 """All dimensions known to the `Registry`.
389 """