Coverage for python/lsst/daf/butler/registry/interfaces/_bridge.py: 85%
65 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-21 09:55 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-21 09:55 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef")
25from abc import ABC, abstractmethod
26from collections.abc import Iterable
27from typing import TYPE_CHECKING, Any, ContextManager
29from lsst.utils.classes import immutable
31from ...core import DatasetId, DatasetRef
32from ._versioning import VersionedExtension, VersionTuple
34if TYPE_CHECKING:
35 from ...core import DatasetType, DimensionUniverse, StoredDatastoreItemInfo
36 from ...core.datastore import DatastoreTransaction
37 from ._database import Database, StaticTablesContext
38 from ._datasets import DatasetRecordStorageManager
39 from ._opaque import OpaqueTableStorage, OpaqueTableStorageManager
42@immutable
43class FakeDatasetRef:
44 """A fake `DatasetRef` that can be used internally by butler where
45 only the dataset ID is available.
47 Should only be used when registry can not be used to create a full
48 `DatasetRef` from the ID. A particular use case is during dataset
49 deletion when solely the ID is available.
51 Parameters
52 ----------
53 id : `DatasetId`
54 The dataset ID.
55 """
57 __slots__ = ("id",)
59 def __init__(self, id: DatasetId):
60 self.id = id
62 def __str__(self) -> str:
63 return f"dataset_id={self.id}"
65 def __repr__(self) -> str:
66 return f"FakeDatasetRef({self.id})"
68 def __eq__(self, other: Any) -> bool:
69 try:
70 return self.id == other.id
71 except AttributeError:
72 return NotImplemented
74 def __hash__(self) -> int:
75 return hash(self.id)
77 id: DatasetId
78 """Unique identifier for this dataset.
79 """
81 @property
82 def datasetType(self) -> DatasetType:
83 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType")
86DatasetIdRef = DatasetRef | FakeDatasetRef
87"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`.
88"""
91class DatastoreRegistryBridge(ABC):
92 """An abstract base class that defines the interface that a `Datastore`
93 uses to communicate with a `Registry`.
95 Parameters
96 ----------
97 datastoreName : `str`
98 Name of the `Datastore` as it should appear in `Registry` tables
99 referencing it.
100 """
102 def __init__(self, datastoreName: str):
103 self.datastoreName = datastoreName
105 @abstractmethod
106 def insert(self, refs: Iterable[DatasetIdRef]) -> None:
107 """Record that a datastore holds the given datasets.
109 Parameters
110 ----------
111 refs : `~collections.abc.Iterable` of `DatasetIdRef`
112 References to the datasets.
113 """
114 raise NotImplementedError()
116 @abstractmethod
117 def forget(self, refs: Iterable[DatasetIdRef]) -> None:
118 """Remove dataset location information without any attempt to put it
119 in the trash while waiting for external deletes.
121 This should be used only to implement `Datastore.forget`, or in cases
122 where deleting the actual datastore artifacts cannot fail.
124 Parameters
125 ----------
126 refs : `~collections.abc.Iterable` of `DatasetIdRef`
127 References to the datasets.
128 """
129 raise NotImplementedError()
131 @abstractmethod
132 def moveToTrash(self, refs: Iterable[DatasetIdRef], transaction: DatastoreTransaction | None) -> None:
133 """Move dataset location information to trash.
135 Parameters
136 ----------
137 refs : `~collections.abc.Iterable` of `DatasetIdRef`
138 References to the datasets.
139 transaction : `DatastoreTransaction` or `None`
140 Transaction object. Can be `None` in some bridges or if no rollback
141 is required.
142 """
143 raise NotImplementedError()
145 @abstractmethod
146 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]:
147 """Check which refs are listed for this datastore.
149 Parameters
150 ----------
151 refs : `~collections.abc.Iterable` of `DatasetIdRef`
152 References to the datasets.
154 Returns
155 -------
156 present : `~collections.abc.Iterable` [ `DatasetIdRef` ]
157 Datasets from ``refs`` that are recorded as being in this
158 datastore.
159 """
160 raise NotImplementedError()
162 @abstractmethod
163 def emptyTrash(
164 self,
165 records_table: OpaqueTableStorage | None = None,
166 record_class: type[StoredDatastoreItemInfo] | None = None,
167 record_column: str | None = None,
168 ) -> ContextManager[
169 tuple[Iterable[tuple[DatasetIdRef, StoredDatastoreItemInfo | None]], set[str] | None]
170 ]:
171 """Retrieve all the dataset ref IDs that are in the trash
172 associated for this datastore, and then remove them if the context
173 exists without an exception being raised.
175 Parameters
176 ----------
177 records_table : `OpaqueTableStorage`, optional
178 Table of records to query with the trash records.
179 record_class : `type` of `StoredDatastoreItemInfo`, optional
180 Class to use when reading records from ``records_table``.
181 record_column : `str`, optional
182 Name of the column in records_table that refers to the artifact.
184 Yields
185 ------
186 matches : iterable of (`DatasetIdRef`, `StoredDatastoreItemInfo`)
187 The IDs of datasets that can be safely removed from this datastore
188 and the corresponding information from the records table.
189 Can be empty.
190 artifacts_to_keep : `set` of `str`, optional
191 Any external artifacts that are known to the table but which should
192 not be deleted. If `None`, the caller should check themselves.
194 Examples
195 --------
196 Typical usage by a Datastore is something like::
198 with self.bridge.emptyTrash() as trashed:
199 iter, to_keep = trashed
200 for ref, info in iter:
201 # Remove artifacts associated with id,
202 # raise an exception if something goes wrong.
204 Notes
205 -----
206 The object yielded by the context manager may be a single-pass
207 iterator. If multiple passes are required, it should be converted to
208 a `list` or other container.
210 Datastores should never raise (except perhaps in testing) when an
211 artifact cannot be removed only because it is already gone - this
212 condition is an unavoidable outcome of concurrent delete operations,
213 and must not be considered and error for those to be safe.
215 If a table record is provided the trashed records will be deleted
216 when the context manager completes.
217 """
218 raise NotImplementedError()
220 datastoreName: str
221 """The name of the `Datastore` as it should appear in `Registry` tables
222 (`str`).
223 """
226class DatastoreRegistryBridgeManager(VersionedExtension):
227 """An abstract base class that defines the interface between `Registry`
228 and `Datastore` when a new `Datastore` is constructed.
230 Parameters
231 ----------
232 opaque : `OpaqueTableStorageManager`
233 Manager object for opaque table storage in the `Registry`.
234 universe : `DimensionUniverse`
235 All dimensions know to the `Registry`.
236 datasetIdColumnType : `type`
237 Type for dataset ID column.
239 Notes
240 -----
241 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at
242 construction, and should use it to obtain and keep any of the following:
244 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is
245 present in the datastore (needed by all datastores that are not just
246 forwarders);
248 - one or more `OpaqueTableStorage` instance if they wish to store internal
249 records in the `Registry` database;
251 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate
252 filename templates.
254 """
256 def __init__(
257 self,
258 *,
259 opaque: OpaqueTableStorageManager,
260 universe: DimensionUniverse,
261 datasetIdColumnType: type,
262 registry_schema_version: VersionTuple | None = None,
263 ):
264 super().__init__(registry_schema_version=registry_schema_version)
265 self.opaque = opaque
266 self.universe = universe
267 self.datasetIdColumnType = datasetIdColumnType
269 @classmethod
270 @abstractmethod
271 def initialize(
272 cls,
273 db: Database,
274 context: StaticTablesContext,
275 *,
276 opaque: OpaqueTableStorageManager,
277 datasets: type[DatasetRecordStorageManager],
278 universe: DimensionUniverse,
279 registry_schema_version: VersionTuple | None = None,
280 ) -> DatastoreRegistryBridgeManager:
281 """Construct an instance of the manager.
283 Parameters
284 ----------
285 db : `Database`
286 Interface to the underlying database engine and namespace.
287 context : `StaticTablesContext`
288 Context object obtained from `Database.declareStaticTables`; used
289 to declare any tables that should always be present in a layer
290 implemented with this manager.
291 opaque : `OpaqueTableStorageManager`
292 Registry manager object for opaque (to Registry) tables, provided
293 to allow Datastores to store their internal information inside the
294 Registry database.
295 datasets : subclass of `DatasetRecordStorageManager`
296 Concrete class that will be used to manage the core dataset tables
297 in this registry; should be used only to create foreign keys to
298 those tables.
299 universe : `DimensionUniverse`
300 All dimensions known to the registry.
301 registry_schema_version : `VersionTuple` or `None`
302 Schema version of this extension as defined in registry.
304 Returns
305 -------
306 manager : `DatastoreRegistryBridgeManager`
307 An instance of a concrete `DatastoreRegistryBridgeManager`
308 subclass.
309 """
310 raise NotImplementedError()
312 @abstractmethod
313 def refresh(self) -> None:
314 """Ensure all other operations on this manager are aware of any
315 collections that may have been registered by other clients since it
316 was initialized or last refreshed.
317 """
318 raise NotImplementedError()
320 @abstractmethod
321 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge:
322 """Register a new `Datastore` associated with this `Registry`.
324 This method should be called by all `Datastore` classes aside from
325 those that only forward storage to other datastores.
327 Parameters
328 ----------
329 name : `str`
330 Name of the datastore, as it should appear in `Registry` tables.
331 ephemeral : `bool`, optional
332 If `True` (`False` is default), return a bridge object that is
333 backed by storage that will not last past the end of the current
334 process. This should be used whenever the same is true of the
335 dataset's artifacts.
337 Returns
338 -------
339 bridge : `DatastoreRegistryBridge`
340 Object that provides the interface this `Datastore` should use to
341 communicate with the `Registry`.
342 """
343 raise NotImplementedError()
345 @abstractmethod
346 def findDatastores(self, ref: DatasetIdRef) -> Iterable[str]:
347 """Retrieve datastore locations for a given dataset.
349 Parameters
350 ----------
351 ref : `DatasetIdRef`
352 A reference to the dataset for which to retrieve storage
353 information.
355 Returns
356 -------
357 datastores : `~collections.abc.Iterable` [ `str` ]
358 All the matching datastores holding this dataset. Empty if the
359 dataset does not exist anywhere.
361 Raises
362 ------
363 AmbiguousDatasetError
364 Raised if ``ref.id`` is `None`.
365 """
366 raise NotImplementedError()
368 opaque: OpaqueTableStorageManager
369 """Registry manager object for opaque (to Registry) tables, provided
370 to allow Datastores to store their internal information inside the
371 Registry database.
372 """
374 universe: DimensionUniverse
375 """All dimensions known to the `Registry`.
376 """