Coverage for python/lsst/daf/butler/registry/interfaces/_bridge.py: 67%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef")
25from abc import ABC, abstractmethod
26from typing import (
27 Any,
28 ContextManager,
29 Iterable,
30 Optional,
31 Set,
32 Tuple,
33 Type,
34 TYPE_CHECKING,
35 Union,
36)
38from lsst.utils.classes import immutable
39from ...core import DatasetId, DatasetRef
40from ._versioning import VersionedExtension
42if TYPE_CHECKING: 42 ↛ 43line 42 didn't jump to line 43, because the condition on line 42 was never true
43 from ...core import DatasetType, DimensionUniverse, StoredDatastoreItemInfo
44 from ._database import Database, StaticTablesContext
45 from ._datasets import DatasetRecordStorageManager
46 from ._opaque import OpaqueTableStorageManager, OpaqueTableStorage
49@immutable
50class FakeDatasetRef:
51 """A fake `DatasetRef` that can be used internally by butler where
52 only the dataset ID is available.
54 Should only be used when registry can not be used to create a full
55 `DatasetRef` from the ID. A particular use case is during dataset
56 deletion when solely the ID is available.
58 Parameters
59 ----------
60 id : `DatasetId`
61 The dataset ID.
62 """
63 __slots__ = ("id",)
65 def __init__(self, id: DatasetId):
66 self.id = id
68 def __str__(self) -> str:
69 return f"dataset_id={self.id}"
71 def __repr__(self) -> str:
72 return f"FakeDatasetRef({self.id})"
74 def __eq__(self, other: Any) -> bool:
75 try:
76 return self.id == other.id
77 except AttributeError:
78 return NotImplemented
80 def __hash__(self) -> int:
81 return hash(self.id)
83 id: DatasetId
84 """Unique identifier for this dataset.
85 """
87 def getCheckedId(self) -> DatasetId:
88 """Return ``self.id``.
90 This trivial method exists for compatibility with `DatasetRef`, for
91 which checking is actually done.
93 Returns
94 -------
95 id : `DatasetId`
96 ``self.id``.
97 """
98 return self.id
100 @property
101 def datasetType(self) -> DatasetType:
102 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType")
105DatasetIdRef = Union[DatasetRef, FakeDatasetRef]
106"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`.
107"""
110class DatastoreRegistryBridge(ABC):
111 """An abstract base class that defines the interface that a `Datastore`
112 uses to communicate with a `Registry`.
114 Parameters
115 ----------
116 datastoreName : `str`
117 Name of the `Datastore` as it should appear in `Registry` tables
118 referencing it.
119 """
120 def __init__(self, datastoreName: str):
121 self.datastoreName = datastoreName
123 @abstractmethod
124 def insert(self, refs: Iterable[DatasetIdRef]) -> None:
125 """Record that a datastore holds the given datasets.
127 Parameters
128 ----------
129 refs : `Iterable` of `DatasetIdRef`
130 References to the datasets.
132 Raises
133 ------
134 AmbiguousDatasetError
135 Raised if ``any(ref.id is None for ref in refs)``.
136 """
137 raise NotImplementedError()
139 @abstractmethod
140 def forget(self, refs: Iterable[DatasetIdRef]) -> None:
141 """Remove dataset location information without any attempt to put it
142 in the trash while waiting for external deletes.
144 This should be used only to implement `Datastore.forget`, or in cases
145 where deleting the actual datastore artifacts cannot fail.
147 Parameters
148 ----------
149 refs : `Iterable` of `DatasetIdRef`
150 References to the datasets.
152 Raises
153 ------
154 AmbiguousDatasetError
155 Raised if ``any(ref.id is None for ref in refs)``.
156 """
157 raise NotImplementedError()
159 @abstractmethod
160 def moveToTrash(self, refs: Iterable[DatasetIdRef]) -> None:
161 """Move dataset location information to trash.
163 Parameters
164 ----------
165 refs : `Iterable` of `DatasetIdRef`
166 References to the datasets.
168 Raises
169 ------
170 AmbiguousDatasetError
171 Raised if ``any(ref.id is None for ref in refs)``.
172 """
173 raise NotImplementedError()
175 @abstractmethod
176 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]:
177 """Check which refs are listed for this datastore.
179 Parameters
180 ----------
181 refs : `~collections.abc.Iterable` of `DatasetIdRef`
182 References to the datasets.
184 Returns
185 -------
186 present : `Iterable` [ `DatasetIdRef` ]
187 Datasets from ``refs`` that are recorded as being in this
188 datastore.
190 Raises
191 ------
192 AmbiguousDatasetError
193 Raised if ``any(ref.id is None for ref in refs)``.
194 """
195 raise NotImplementedError()
197 @abstractmethod
198 def emptyTrash(self, records_table: Optional[OpaqueTableStorage] = None,
199 record_class: Optional[Type[StoredDatastoreItemInfo]] = None,
200 record_column: Optional[str] = None,
201 ) -> ContextManager[Tuple[Iterable[Tuple[DatasetIdRef,
202 Optional[StoredDatastoreItemInfo]]],
203 Optional[Set[str]]]]:
204 """Retrieve all the dataset ref IDs that are in the trash
205 associated for this datastore, and then remove them if the context
206 exists without an exception being raised.
208 Parameters
209 ----------
210 records_table : `OpaqueTableStorage`, optional
211 Table of records to query with the trash records.
212 record_class : `type` of `StoredDatastoreItemInfo`, optional
213 Class to use when reading records from ``records_table``.
214 record_column : `str`, optional
215 Name of the column in records_table that refers to the artifact.
217 Yields
218 ------
219 matches : iterable of (`DatasetIdRef`, `StoredDatastoreItemInfo`)
220 The IDs of datasets that can be safely removed from this datastore
221 and the corresponding information from the records table.
222 Can be empty.
223 artifacts_to_keep : `set` of `str`, optional
224 Any external artifacts that are known to the table but which should
225 not be deleted. If `None`, the caller should check themselves.
227 Examples
228 --------
229 Typical usage by a Datastore is something like::
231 with self.bridge.emptyTrash() as trashed:
232 iter, to_keep = trashed
233 for ref, info in iter:
234 # Remove artifacts associated with id,
235 # raise an exception if something goes wrong.
237 Notes
238 -----
239 The object yielded by the context manager may be a single-pass
240 iterator. If multiple passes are required, it should be converted to
241 a `list` or other container.
243 Datastores should never raise (except perhaps in testing) when an
244 artifact cannot be removed only because it is already gone - this
245 condition is an unavoidable outcome of concurrent delete operations,
246 and must not be considered and error for those to be safe.
248 If a table record is provided the trashed records will be deleted
249 when the context manager completes.
250 """
251 raise NotImplementedError()
253 datastoreName: str
254 """The name of the `Datastore` as it should appear in `Registry` tables
255 (`str`).
256 """
259class DatastoreRegistryBridgeManager(VersionedExtension):
260 """An abstract base class that defines the interface between `Registry`
261 and `Datastore` when a new `Datastore` is constructed.
263 Parameters
264 ----------
265 opaque : `OpaqueTableStorageManager`
266 Manager object for opaque table storage in the `Registry`.
267 universe : `DimensionUniverse`
268 All dimensions know to the `Registry`.
269 datasetIdColumnType : `type`
270 Type for dataset ID column.
272 Notes
273 -----
274 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at
275 construction, and should use it to obtain and keep any of the following:
277 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is
278 present in the datastore (needed by all datastores that are not just
279 forwarders);
281 - one or more `OpaqueTableStorage` instance if they wish to store internal
282 records in the `Registry` database;
284 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate
285 filename templates.
287 """
288 def __init__(self, *, opaque: OpaqueTableStorageManager, universe: DimensionUniverse,
289 datasetIdColumnType: type):
290 self.opaque = opaque
291 self.universe = universe
292 self.datasetIdColumnType = datasetIdColumnType
294 @classmethod
295 @abstractmethod
296 def initialize(cls, db: Database, context: StaticTablesContext, *,
297 opaque: OpaqueTableStorageManager,
298 datasets: Type[DatasetRecordStorageManager],
299 universe: DimensionUniverse,
300 ) -> DatastoreRegistryBridgeManager:
301 """Construct an instance of the manager.
303 Parameters
304 ----------
305 db : `Database`
306 Interface to the underlying database engine and namespace.
307 context : `StaticTablesContext`
308 Context object obtained from `Database.declareStaticTables`; used
309 to declare any tables that should always be present in a layer
310 implemented with this manager.
311 opaque : `OpaqueTableStorageManager`
312 Registry manager object for opaque (to Registry) tables, provided
313 to allow Datastores to store their internal information inside the
314 Registry database.
315 datasets : subclass of `DatasetRecordStorageManager`
316 Concrete class that will be used to manage the core dataset tables
317 in this registry; should be used only to create foreign keys to
318 those tables.
319 universe : `DimensionUniverse`
320 All dimensions known to the registry.
322 Returns
323 -------
324 manager : `DatastoreRegistryBridgeManager`
325 An instance of a concrete `DatastoreRegistryBridgeManager`
326 subclass.
327 """
328 raise NotImplementedError()
330 @abstractmethod
331 def refresh(self) -> None:
332 """Ensure all other operations on this manager are aware of any
333 collections that may have been registered by other clients since it
334 was initialized or last refreshed.
335 """
336 raise NotImplementedError()
338 @abstractmethod
339 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge:
340 """Register a new `Datastore` associated with this `Registry`.
342 This method should be called by all `Datastore` classes aside from
343 those that only forward storage to other datastores.
345 Parameters
346 ----------
347 name : `str`
348 Name of the datastore, as it should appear in `Registry` tables.
349 ephemeral : `bool`, optional
350 If `True` (`False` is default), return a bridge object that is
351 backed by storage that will not last past the end of the current
352 process. This should be used whenever the same is true of the
353 dataset's artifacts.
355 Returns
356 -------
357 bridge : `DatastoreRegistryBridge`
358 Object that provides the interface this `Datastore` should use to
359 communicate with the `Registry`.
360 """
361 raise NotImplementedError()
363 @abstractmethod
364 def findDatastores(self, ref: DatasetRef) -> Iterable[str]:
365 """Retrieve datastore locations for a given dataset.
367 Parameters
368 ----------
369 ref : `DatasetRef`
370 A reference to the dataset for which to retrieve storage
371 information.
373 Returns
374 -------
375 datastores : `Iterable` [ `str` ]
376 All the matching datastores holding this dataset. Empty if the
377 dataset does not exist anywhere.
379 Raises
380 ------
381 AmbiguousDatasetError
382 Raised if ``ref.id`` is `None`.
383 """
384 raise NotImplementedError()
386 opaque: OpaqueTableStorageManager
387 """Registry manager object for opaque (to Registry) tables, provided
388 to allow Datastores to store their internal information inside the
389 Registry database.
390 """
392 universe: DimensionUniverse
393 """All dimensions known to the `Registry`.
394 """