Coverage for python/lsst/daf/butler/registry/interfaces/_bridge.py: 85%
68 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-12 09:44 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-12 09:44 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef")
31from abc import ABC, abstractmethod
32from collections.abc import Iterable
33from contextlib import AbstractContextManager
34from typing import TYPE_CHECKING, Any
36from lsst.utils.classes import immutable
38from ..._dataset_ref import DatasetId, DatasetRef
39from ._versioning import VersionedExtension, VersionTuple
41if TYPE_CHECKING:
42 from ..._dataset_type import DatasetType
43 from ...datastore import DatastoreTransaction
44 from ...datastore.stored_file_info import StoredDatastoreItemInfo
45 from ...dimensions import DimensionUniverse
46 from ._database import Database, StaticTablesContext
47 from ._datasets import DatasetRecordStorageManager
48 from ._opaque import OpaqueTableStorage, OpaqueTableStorageManager
51@immutable
52class FakeDatasetRef:
53 """A fake `DatasetRef` that can be used internally by butler where
54 only the dataset ID is available.
56 Should only be used when registry can not be used to create a full
57 `DatasetRef` from the ID. A particular use case is during dataset
58 deletion when solely the ID is available.
60 Parameters
61 ----------
62 id : `DatasetId`
63 The dataset ID.
64 """
66 __slots__ = ("id",)
68 def __init__(self, id: DatasetId):
69 self.id = id
71 def __str__(self) -> str:
72 return f"dataset_id={self.id}"
74 def __repr__(self) -> str:
75 return f"FakeDatasetRef({self.id})"
77 def __eq__(self, other: Any) -> bool:
78 try:
79 return self.id == other.id
80 except AttributeError:
81 return NotImplemented
83 def __hash__(self) -> int:
84 return hash(self.id)
86 id: DatasetId
87 """Unique identifier for this dataset.
88 """
90 @property
91 def datasetType(self) -> DatasetType:
92 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType")
95DatasetIdRef = DatasetRef | FakeDatasetRef
96"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`.
97"""
100class DatastoreRegistryBridge(ABC):
101 """An abstract base class that defines the interface that a `Datastore`
102 uses to communicate with a `Registry`.
104 Parameters
105 ----------
106 datastoreName : `str`
107 Name of the `Datastore` as it should appear in `Registry` tables
108 referencing it.
109 """
111 def __init__(self, datastoreName: str):
112 self.datastoreName = datastoreName
114 @abstractmethod
115 def insert(self, refs: Iterable[DatasetIdRef]) -> None:
116 """Record that a datastore holds the given datasets.
118 Parameters
119 ----------
120 refs : `~collections.abc.Iterable` of `DatasetIdRef`
121 References to the datasets.
122 """
123 raise NotImplementedError()
125 @abstractmethod
126 def ensure(self, refs: Iterable[DatasetIdRef]) -> None:
127 """Record that a datastore holds the given datasets, skipping if
128 the ref is already registered.
130 Parameters
131 ----------
132 refs : `~collections.abc.Iterable` of `DatasetIdRef`
133 References to the datasets.
134 """
135 raise NotImplementedError()
137 @abstractmethod
138 def forget(self, refs: Iterable[DatasetIdRef]) -> None:
139 """Remove dataset location information without any attempt to put it
140 in the trash while waiting for external deletes.
142 This should be used only to implement `Datastore.forget`, or in cases
143 where deleting the actual datastore artifacts cannot fail.
145 Parameters
146 ----------
147 refs : `~collections.abc.Iterable` of `DatasetIdRef`
148 References to the datasets.
149 """
150 raise NotImplementedError()
152 @abstractmethod
153 def moveToTrash(self, refs: Iterable[DatasetIdRef], transaction: DatastoreTransaction | None) -> None:
154 """Move dataset location information to trash.
156 Parameters
157 ----------
158 refs : `~collections.abc.Iterable` of `DatasetIdRef`
159 References to the datasets.
160 transaction : `DatastoreTransaction` or `None`
161 Transaction object. Can be `None` in some bridges or if no rollback
162 is required.
163 """
164 raise NotImplementedError()
166 @abstractmethod
167 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]:
168 """Check which refs are listed for this datastore.
170 Parameters
171 ----------
172 refs : `~collections.abc.Iterable` of `DatasetIdRef`
173 References to the datasets.
175 Returns
176 -------
177 present : `~collections.abc.Iterable` [ `DatasetIdRef` ]
178 Datasets from ``refs`` that are recorded as being in this
179 datastore.
180 """
181 raise NotImplementedError()
183 @abstractmethod
184 def emptyTrash(
185 self,
186 records_table: OpaqueTableStorage | None = None,
187 record_class: type[StoredDatastoreItemInfo] | None = None,
188 record_column: str | None = None,
189 ) -> AbstractContextManager[
190 tuple[Iterable[tuple[DatasetIdRef, StoredDatastoreItemInfo | None]], set[str] | None]
191 ]:
192 """Retrieve all the dataset ref IDs that are in the trash
193 associated for this datastore, and then remove them if the context
194 exists without an exception being raised.
196 Parameters
197 ----------
198 records_table : `OpaqueTableStorage`, optional
199 Table of records to query with the trash records.
200 record_class : `type` of `StoredDatastoreItemInfo`, optional
201 Class to use when reading records from ``records_table``.
202 record_column : `str`, optional
203 Name of the column in records_table that refers to the artifact.
205 Yields
206 ------
207 matches : iterable of (`DatasetIdRef`, `StoredDatastoreItemInfo`)
208 The IDs of datasets that can be safely removed from this datastore
209 and the corresponding information from the records table.
210 Can be empty.
211 artifacts_to_keep : `set` of `str`, optional
212 Any external artifacts that are known to the table but which should
213 not be deleted. If `None`, the caller should check themselves.
215 Examples
216 --------
217 Typical usage by a Datastore is something like::
219 with self.bridge.emptyTrash() as trashed:
220 iter, to_keep = trashed
221 for ref, info in iter:
222 # Remove artifacts associated with id,
223 # raise an exception if something goes wrong.
225 Notes
226 -----
227 The object yielded by the context manager may be a single-pass
228 iterator. If multiple passes are required, it should be converted to
229 a `list` or other container.
231 Datastores should never raise (except perhaps in testing) when an
232 artifact cannot be removed only because it is already gone - this
233 condition is an unavoidable outcome of concurrent delete operations,
234 and must not be considered and error for those to be safe.
236 If a table record is provided the trashed records will be deleted
237 when the context manager completes.
238 """
239 raise NotImplementedError()
241 datastoreName: str
242 """The name of the `Datastore` as it should appear in `Registry` tables
243 (`str`).
244 """
247class DatastoreRegistryBridgeManager(VersionedExtension):
248 """An abstract base class that defines the interface between `Registry`
249 and `Datastore` when a new `Datastore` is constructed.
251 Parameters
252 ----------
253 opaque : `OpaqueTableStorageManager`
254 Manager object for opaque table storage in the `Registry`.
255 universe : `DimensionUniverse`
256 All dimensions know to the `Registry`.
257 datasetIdColumnType : `type`
258 Type for dataset ID column.
260 Notes
261 -----
262 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at
263 construction, and should use it to obtain and keep any of the following:
265 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is
266 present in the datastore (needed by all datastores that are not just
267 forwarders);
269 - one or more `OpaqueTableStorage` instance if they wish to store internal
270 records in the `Registry` database;
272 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate
273 filename templates.
275 """
277 def __init__(
278 self,
279 *,
280 opaque: OpaqueTableStorageManager,
281 universe: DimensionUniverse,
282 datasetIdColumnType: type,
283 registry_schema_version: VersionTuple | None = None,
284 ):
285 super().__init__(registry_schema_version=registry_schema_version)
286 self.opaque = opaque
287 self.universe = universe
288 self.datasetIdColumnType = datasetIdColumnType
290 @classmethod
291 @abstractmethod
292 def initialize(
293 cls,
294 db: Database,
295 context: StaticTablesContext,
296 *,
297 opaque: OpaqueTableStorageManager,
298 datasets: type[DatasetRecordStorageManager],
299 universe: DimensionUniverse,
300 registry_schema_version: VersionTuple | None = None,
301 ) -> DatastoreRegistryBridgeManager:
302 """Construct an instance of the manager.
304 Parameters
305 ----------
306 db : `Database`
307 Interface to the underlying database engine and namespace.
308 context : `StaticTablesContext`
309 Context object obtained from `Database.declareStaticTables`; used
310 to declare any tables that should always be present in a layer
311 implemented with this manager.
312 opaque : `OpaqueTableStorageManager`
313 Registry manager object for opaque (to Registry) tables, provided
314 to allow Datastores to store their internal information inside the
315 Registry database.
316 datasets : subclass of `DatasetRecordStorageManager`
317 Concrete class that will be used to manage the core dataset tables
318 in this registry; should be used only to create foreign keys to
319 those tables.
320 universe : `DimensionUniverse`
321 All dimensions known to the registry.
322 registry_schema_version : `VersionTuple` or `None`
323 Schema version of this extension as defined in registry.
325 Returns
326 -------
327 manager : `DatastoreRegistryBridgeManager`
328 An instance of a concrete `DatastoreRegistryBridgeManager`
329 subclass.
330 """
331 raise NotImplementedError()
333 @abstractmethod
334 def refresh(self) -> None:
335 """Ensure all other operations on this manager are aware of any
336 collections that may have been registered by other clients since it
337 was initialized or last refreshed.
338 """
339 raise NotImplementedError()
341 @abstractmethod
342 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge:
343 """Register a new `Datastore` associated with this `Registry`.
345 This method should be called by all `Datastore` classes aside from
346 those that only forward storage to other datastores.
348 Parameters
349 ----------
350 name : `str`
351 Name of the datastore, as it should appear in `Registry` tables.
352 ephemeral : `bool`, optional
353 If `True` (`False` is default), return a bridge object that is
354 backed by storage that will not last past the end of the current
355 process. This should be used whenever the same is true of the
356 dataset's artifacts.
358 Returns
359 -------
360 bridge : `DatastoreRegistryBridge`
361 Object that provides the interface this `Datastore` should use to
362 communicate with the `Registry`.
363 """
364 raise NotImplementedError()
366 @abstractmethod
367 def findDatastores(self, ref: DatasetIdRef) -> Iterable[str]:
368 """Retrieve datastore locations for a given dataset.
370 Parameters
371 ----------
372 ref : `DatasetIdRef`
373 A reference to the dataset for which to retrieve storage
374 information.
376 Returns
377 -------
378 datastores : `~collections.abc.Iterable` [ `str` ]
379 All the matching datastores holding this dataset. Empty if the
380 dataset does not exist anywhere.
382 Raises
383 ------
384 AmbiguousDatasetError
385 Raised if ``ref.id`` is `None`.
386 """
387 raise NotImplementedError()
389 opaque: OpaqueTableStorageManager
390 """Registry manager object for opaque (to Registry) tables, provided
391 to allow Datastores to store their internal information inside the
392 Registry database.
393 """
395 universe: DimensionUniverse
396 """All dimensions known to the `Registry`.
397 """