Coverage for python/lsst/daf/butler/registry/interfaces/_bridge.py: 78%
66 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-09 02:11 -0700
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-09 02:11 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef")
25from abc import ABC, abstractmethod
26from typing import TYPE_CHECKING, Any, ContextManager, Iterable, Optional, Set, Tuple, Type, Union
28from lsst.utils.classes import immutable
30from ...core import DatasetId, DatasetRef
31from ._versioning import VersionedExtension, VersionTuple
33if TYPE_CHECKING:
34 from ...core import DatasetType, DimensionUniverse, StoredDatastoreItemInfo
35 from ...core.datastore import DatastoreTransaction
36 from ._database import Database, StaticTablesContext
37 from ._datasets import DatasetRecordStorageManager
38 from ._opaque import OpaqueTableStorage, OpaqueTableStorageManager
41@immutable
42class FakeDatasetRef:
43 """A fake `DatasetRef` that can be used internally by butler where
44 only the dataset ID is available.
46 Should only be used when registry can not be used to create a full
47 `DatasetRef` from the ID. A particular use case is during dataset
48 deletion when solely the ID is available.
50 Parameters
51 ----------
52 id : `DatasetId`
53 The dataset ID.
54 """
56 __slots__ = ("id",)
58 def __init__(self, id: DatasetId):
59 self.id = id
61 def __str__(self) -> str:
62 return f"dataset_id={self.id}"
64 def __repr__(self) -> str:
65 return f"FakeDatasetRef({self.id})"
67 def __eq__(self, other: Any) -> bool:
68 try:
69 return self.id == other.id
70 except AttributeError:
71 return NotImplemented
73 def __hash__(self) -> int:
74 return hash(self.id)
76 id: DatasetId
77 """Unique identifier for this dataset.
78 """
80 def getCheckedId(self) -> DatasetId:
81 """Return ``self.id``.
83 This trivial method exists for compatibility with `DatasetRef`, for
84 which checking is actually done.
86 Returns
87 -------
88 id : `DatasetId`
89 ``self.id``.
90 """
91 return self.id
93 @property
94 def datasetType(self) -> DatasetType:
95 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType")
98DatasetIdRef = Union[DatasetRef, FakeDatasetRef]
99"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`.
100"""
103class DatastoreRegistryBridge(ABC):
104 """An abstract base class that defines the interface that a `Datastore`
105 uses to communicate with a `Registry`.
107 Parameters
108 ----------
109 datastoreName : `str`
110 Name of the `Datastore` as it should appear in `Registry` tables
111 referencing it.
112 """
114 def __init__(self, datastoreName: str):
115 self.datastoreName = datastoreName
117 @abstractmethod
118 def insert(self, refs: Iterable[DatasetIdRef]) -> None:
119 """Record that a datastore holds the given datasets.
121 Parameters
122 ----------
123 refs : `Iterable` of `DatasetIdRef`
124 References to the datasets.
126 Raises
127 ------
128 AmbiguousDatasetError
129 Raised if ``any(ref.id is None for ref in refs)``.
130 """
131 raise NotImplementedError()
133 @abstractmethod
134 def forget(self, refs: Iterable[DatasetIdRef]) -> None:
135 """Remove dataset location information without any attempt to put it
136 in the trash while waiting for external deletes.
138 This should be used only to implement `Datastore.forget`, or in cases
139 where deleting the actual datastore artifacts cannot fail.
141 Parameters
142 ----------
143 refs : `Iterable` of `DatasetIdRef`
144 References to the datasets.
146 Raises
147 ------
148 AmbiguousDatasetError
149 Raised if ``any(ref.id is None for ref in refs)``.
150 """
151 raise NotImplementedError()
153 @abstractmethod
154 def moveToTrash(self, refs: Iterable[DatasetIdRef], transaction: Optional[DatastoreTransaction]) -> None:
155 """Move dataset location information to trash.
157 Parameters
158 ----------
159 refs : `Iterable` of `DatasetIdRef`
160 References to the datasets.
161 transaction : `DatastoreTransaction` or `None`
162 Transaction object. Can be `None` in some bridges or if no rollback
163 is required.
165 Raises
166 ------
167 AmbiguousDatasetError
168 Raised if ``any(ref.id is None for ref in refs)``.
169 """
170 raise NotImplementedError()
172 @abstractmethod
173 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]:
174 """Check which refs are listed for this datastore.
176 Parameters
177 ----------
178 refs : `~collections.abc.Iterable` of `DatasetIdRef`
179 References to the datasets.
181 Returns
182 -------
183 present : `Iterable` [ `DatasetIdRef` ]
184 Datasets from ``refs`` that are recorded as being in this
185 datastore.
187 Raises
188 ------
189 AmbiguousDatasetError
190 Raised if ``any(ref.id is None for ref in refs)``.
191 """
192 raise NotImplementedError()
194 @abstractmethod
195 def emptyTrash(
196 self,
197 records_table: Optional[OpaqueTableStorage] = None,
198 record_class: Optional[Type[StoredDatastoreItemInfo]] = None,
199 record_column: Optional[str] = None,
200 ) -> ContextManager[
201 Tuple[Iterable[Tuple[DatasetIdRef, Optional[StoredDatastoreItemInfo]]], Optional[Set[str]]]
202 ]:
203 """Retrieve all the dataset ref IDs that are in the trash
204 associated for this datastore, and then remove them if the context
205 exists without an exception being raised.
207 Parameters
208 ----------
209 records_table : `OpaqueTableStorage`, optional
210 Table of records to query with the trash records.
211 record_class : `type` of `StoredDatastoreItemInfo`, optional
212 Class to use when reading records from ``records_table``.
213 record_column : `str`, optional
214 Name of the column in records_table that refers to the artifact.
216 Yields
217 ------
218 matches : iterable of (`DatasetIdRef`, `StoredDatastoreItemInfo`)
219 The IDs of datasets that can be safely removed from this datastore
220 and the corresponding information from the records table.
221 Can be empty.
222 artifacts_to_keep : `set` of `str`, optional
223 Any external artifacts that are known to the table but which should
224 not be deleted. If `None`, the caller should check themselves.
226 Examples
227 --------
228 Typical usage by a Datastore is something like::
230 with self.bridge.emptyTrash() as trashed:
231 iter, to_keep = trashed
232 for ref, info in iter:
233 # Remove artifacts associated with id,
234 # raise an exception if something goes wrong.
236 Notes
237 -----
238 The object yielded by the context manager may be a single-pass
239 iterator. If multiple passes are required, it should be converted to
240 a `list` or other container.
242 Datastores should never raise (except perhaps in testing) when an
243 artifact cannot be removed only because it is already gone - this
244 condition is an unavoidable outcome of concurrent delete operations,
245 and must not be considered and error for those to be safe.
247 If a table record is provided the trashed records will be deleted
248 when the context manager completes.
249 """
250 raise NotImplementedError()
252 datastoreName: str
253 """The name of the `Datastore` as it should appear in `Registry` tables
254 (`str`).
255 """
258class DatastoreRegistryBridgeManager(VersionedExtension):
259 """An abstract base class that defines the interface between `Registry`
260 and `Datastore` when a new `Datastore` is constructed.
262 Parameters
263 ----------
264 opaque : `OpaqueTableStorageManager`
265 Manager object for opaque table storage in the `Registry`.
266 universe : `DimensionUniverse`
267 All dimensions know to the `Registry`.
268 datasetIdColumnType : `type`
269 Type for dataset ID column.
271 Notes
272 -----
273 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at
274 construction, and should use it to obtain and keep any of the following:
276 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is
277 present in the datastore (needed by all datastores that are not just
278 forwarders);
280 - one or more `OpaqueTableStorage` instance if they wish to store internal
281 records in the `Registry` database;
283 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate
284 filename templates.
286 """
288 def __init__(
289 self,
290 *,
291 opaque: OpaqueTableStorageManager,
292 universe: DimensionUniverse,
293 datasetIdColumnType: type,
294 registry_schema_version: VersionTuple | None = None,
295 ):
296 super().__init__(registry_schema_version=registry_schema_version)
297 self.opaque = opaque
298 self.universe = universe
299 self.datasetIdColumnType = datasetIdColumnType
301 @classmethod
302 @abstractmethod
303 def initialize(
304 cls,
305 db: Database,
306 context: StaticTablesContext,
307 *,
308 opaque: OpaqueTableStorageManager,
309 datasets: Type[DatasetRecordStorageManager],
310 universe: DimensionUniverse,
311 registry_schema_version: VersionTuple | None = None,
312 ) -> DatastoreRegistryBridgeManager:
313 """Construct an instance of the manager.
315 Parameters
316 ----------
317 db : `Database`
318 Interface to the underlying database engine and namespace.
319 context : `StaticTablesContext`
320 Context object obtained from `Database.declareStaticTables`; used
321 to declare any tables that should always be present in a layer
322 implemented with this manager.
323 opaque : `OpaqueTableStorageManager`
324 Registry manager object for opaque (to Registry) tables, provided
325 to allow Datastores to store their internal information inside the
326 Registry database.
327 datasets : subclass of `DatasetRecordStorageManager`
328 Concrete class that will be used to manage the core dataset tables
329 in this registry; should be used only to create foreign keys to
330 those tables.
331 universe : `DimensionUniverse`
332 All dimensions known to the registry.
333 registry_schema_version : `VersionTuple` or `None`
334 Schema version of this extension as defined in registry.
336 Returns
337 -------
338 manager : `DatastoreRegistryBridgeManager`
339 An instance of a concrete `DatastoreRegistryBridgeManager`
340 subclass.
341 """
342 raise NotImplementedError()
344 @abstractmethod
345 def refresh(self) -> None:
346 """Ensure all other operations on this manager are aware of any
347 collections that may have been registered by other clients since it
348 was initialized or last refreshed.
349 """
350 raise NotImplementedError()
352 @abstractmethod
353 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge:
354 """Register a new `Datastore` associated with this `Registry`.
356 This method should be called by all `Datastore` classes aside from
357 those that only forward storage to other datastores.
359 Parameters
360 ----------
361 name : `str`
362 Name of the datastore, as it should appear in `Registry` tables.
363 ephemeral : `bool`, optional
364 If `True` (`False` is default), return a bridge object that is
365 backed by storage that will not last past the end of the current
366 process. This should be used whenever the same is true of the
367 dataset's artifacts.
369 Returns
370 -------
371 bridge : `DatastoreRegistryBridge`
372 Object that provides the interface this `Datastore` should use to
373 communicate with the `Registry`.
374 """
375 raise NotImplementedError()
377 @abstractmethod
378 def findDatastores(self, ref: DatasetIdRef) -> Iterable[str]:
379 """Retrieve datastore locations for a given dataset.
381 Parameters
382 ----------
383 ref : `DatasetIdRef`
384 A reference to the dataset for which to retrieve storage
385 information.
387 Returns
388 -------
389 datastores : `Iterable` [ `str` ]
390 All the matching datastores holding this dataset. Empty if the
391 dataset does not exist anywhere.
393 Raises
394 ------
395 AmbiguousDatasetError
396 Raised if ``ref.id`` is `None`.
397 """
398 raise NotImplementedError()
400 opaque: OpaqueTableStorageManager
401 """Registry manager object for opaque (to Registry) tables, provided
402 to allow Datastores to store their internal information inside the
403 Registry database.
404 """
406 universe: DimensionUniverse
407 """All dimensions known to the `Registry`.
408 """