Coverage for python/lsst/daf/butler/registry/interfaces/_bridge.py : 65%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef")
25from abc import ABC, abstractmethod
26from typing import (
27 Any,
28 ContextManager,
29 Iterable,
30 Type,
31 TYPE_CHECKING,
32 Union,
33)
35from ...core.utils import immutable
36from ...core import DatasetId, DatasetRef
37from ._versioning import VersionedExtension
39if TYPE_CHECKING: 39 ↛ 40line 39 didn't jump to line 40, because the condition on line 39 was never true
40 from ...core import DatasetType, DimensionUniverse
41 from ._database import Database, StaticTablesContext
42 from ._datasets import DatasetRecordStorageManager
43 from ._opaque import OpaqueTableStorageManager
46@immutable
47class FakeDatasetRef:
48 """A fake `DatasetRef` that can be used internally by butler where
49 only the dataset ID is available.
51 Should only be used when registry can not be used to create a full
52 `DatasetRef` from the ID. A particular use case is during dataset
53 deletion when solely the ID is available.
55 Parameters
56 ----------
57 id : `DatasetId`
58 The dataset ID.
59 """
60 __slots__ = ("id",)
62 def __init__(self, id: DatasetId):
63 self.id = id
65 def __str__(self) -> str:
66 return f"dataset_id={self.id}"
68 def __repr__(self) -> str:
69 return f"FakeDatasetRef({self.id})"
71 def __eq__(self, other: Any) -> bool:
72 try:
73 return self.id == other.id
74 except AttributeError:
75 return NotImplemented
77 def __hash__(self) -> int:
78 return hash(self.id)
80 id: DatasetId
81 """Unique identifier for this dataset.
82 """
84 def getCheckedId(self) -> DatasetId:
85 """Return ``self.id``.
87 This trivial method exists for compatibility with `DatasetRef`, for
88 which checking is actually done.
90 Returns
91 -------
92 id : `DatasetId`
93 ``self.id``.
94 """
95 return self.id
97 @property
98 def datasetType(self) -> DatasetType:
99 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType")
102DatasetIdRef = Union[DatasetRef, FakeDatasetRef]
103"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`.
104"""
107class DatastoreRegistryBridge(ABC):
108 """An abstract base class that defines the interface that a `Datastore`
109 uses to communicate with a `Registry`.
111 Parameters
112 ----------
113 datastoreName : `str`
114 Name of the `Datastore` as it should appear in `Registry` tables
115 referencing it.
116 """
117 def __init__(self, datastoreName: str):
118 self.datastoreName = datastoreName
120 @abstractmethod
121 def insert(self, refs: Iterable[DatasetIdRef]) -> None:
122 """Record that a datastore holds the given datasets.
124 Parameters
125 ----------
126 refs : `Iterable` of `DatasetIdRef`
127 References to the datasets.
129 Raises
130 ------
131 AmbiguousDatasetError
132 Raised if ``any(ref.id is None for ref in refs)``.
133 """
134 raise NotImplementedError()
136 @abstractmethod
137 def forget(self, refs: Iterable[DatasetIdRef]) -> None:
138 """Remove dataset location information without any attempt to put it
139 in the trash while waiting for external deletes.
141 This should be used only to implement `Datastore.forget`, or in cases
142 where deleting the actual datastore artifacts cannot fail.
144 Parameters
145 ----------
146 refs : `Iterable` of `DatasetIdRef`
147 References to the datasets.
149 Raises
150 ------
151 AmbiguousDatasetError
152 Raised if ``any(ref.id is None for ref in refs)``.
153 """
154 raise NotImplementedError()
156 @abstractmethod
157 def moveToTrash(self, refs: Iterable[DatasetIdRef]) -> None:
158 """Move dataset location information to trash.
160 Parameters
161 ----------
162 refs : `Iterable` of `DatasetIdRef`
163 References to the datasets.
165 Raises
166 ------
167 AmbiguousDatasetError
168 Raised if ``any(ref.id is None for ref in refs)``.
169 """
170 raise NotImplementedError()
172 @abstractmethod
173 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]:
174 """Check which refs are listed for this datastore.
176 Parameters
177 ----------
178 refs : `~collections.abc.Iterable` of `DatasetIdRef`
179 References to the datasets.
181 Returns
182 -------
183 present : `Iterable` [ `DatasetIdRef` ]
184 Datasets from ``refs`` that are recorded as being in this
185 datastore.
187 Raises
188 ------
189 AmbiguousDatasetError
190 Raised if ``any(ref.id is None for ref in refs)``.
191 """
192 raise NotImplementedError()
194 @abstractmethod
195 def emptyTrash(self) -> ContextManager[Iterable[DatasetIdRef]]:
196 """Retrieve all the dataset ref IDs that are in the trash
197 associated for this datastore, and then remove them if the context
198 exists without an exception being raised.
200 Returns
201 -------
202 ids : `set` of `DatasetIdRef`
203 The IDs of datasets that can be safely removed from this datastore.
204 Can be empty.
206 Examples
207 --------
208 Typical usage by a Datastore is something like::
210 with self.bridge.emptyTrash() as iter:
211 for ref in iter:
212 # Remove artifacts associated with ref.id,
213 # raise an exception if something goes wrong.
215 Notes
216 -----
217 The object yielded by the context manager may be a single-pass
218 iterator. If multiple passes are required, it should be converted to
219 a `list` or other container.
221 Datastores should never raise (except perhaps in testing) when an
222 artifact cannot be removed only because it is already gone - this
223 condition is an unavoidable outcome of concurrent delete operations,
224 and must not be considered and error for those to be safe.
225 """
226 raise NotImplementedError()
228 datastoreName: str
229 """The name of the `Datastore` as it should appear in `Registry` tables
230 (`str`).
231 """
234class DatastoreRegistryBridgeManager(VersionedExtension):
235 """An abstract base class that defines the interface between `Registry`
236 and `Datastore` when a new `Datastore` is constructed.
238 Parameters
239 ----------
240 opaque : `OpaqueTableStorageManager`
241 Manager object for opaque table storage in the `Registry`.
242 universe : `DimensionUniverse`
243 All dimensions know to the `Registry`.
244 datasetIdColumnType : `type`
245 Type for dataset ID column.
247 Notes
248 -----
249 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at
250 construction, and should use it to obtain and keep any of the following:
252 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is
253 present in the datastore (needed by all datastores that are not just
254 forwarders);
256 - one or more `OpaqueTableStorage` instance if they wish to store internal
257 records in the `Registry` database;
259 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate
260 filename templates.
262 """
263 def __init__(self, *, opaque: OpaqueTableStorageManager, universe: DimensionUniverse,
264 datasetIdColumnType: type):
265 self.opaque = opaque
266 self.universe = universe
267 self.datasetIdColumnType = datasetIdColumnType
269 @classmethod
270 @abstractmethod
271 def initialize(cls, db: Database, context: StaticTablesContext, *,
272 opaque: OpaqueTableStorageManager,
273 datasets: Type[DatasetRecordStorageManager],
274 universe: DimensionUniverse,
275 ) -> DatastoreRegistryBridgeManager:
276 """Construct an instance of the manager.
278 Parameters
279 ----------
280 db : `Database`
281 Interface to the underlying database engine and namespace.
282 context : `StaticTablesContext`
283 Context object obtained from `Database.declareStaticTables`; used
284 to declare any tables that should always be present in a layer
285 implemented with this manager.
286 opaque : `OpaqueTableStorageManager`
287 Registry manager object for opaque (to Registry) tables, provided
288 to allow Datastores to store their internal information inside the
289 Registry database.
290 datasets : subclass of `DatasetRecordStorageManager`
291 Concrete class that will be used to manage the core dataset tables
292 in this registry; should be used only to create foreign keys to
293 those tables.
294 universe : `DimensionUniverse`
295 All dimensions known to the registry.
297 Returns
298 -------
299 manager : `DatastoreRegistryBridgeManager`
300 An instance of a concrete `DatastoreRegistryBridgeManager`
301 subclass.
302 """
303 raise NotImplementedError()
305 @abstractmethod
306 def refresh(self) -> None:
307 """Ensure all other operations on this manager are aware of any
308 collections that may have been registered by other clients since it
309 was initialized or last refreshed.
310 """
311 raise NotImplementedError()
313 @abstractmethod
314 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge:
315 """Register a new `Datastore` associated with this `Registry`.
317 This method should be called by all `Datastore` classes aside from
318 those that only forward storage to other datastores.
320 Parameters
321 ----------
322 name : `str`
323 Name of the datastore, as it should appear in `Registry` tables.
324 ephemeral : `bool`, optional
325 If `True` (`False` is default), return a bridge object that is
326 backed by storage that will not last past the end of the current
327 process. This should be used whenever the same is true of the
328 dataset's artifacts.
330 Returns
331 -------
332 bridge : `DatastoreRegistryBridge`
333 Object that provides the interface this `Datastore` should use to
334 communicate with the `Regitry`.
335 """
336 raise NotImplementedError()
338 @abstractmethod
339 def findDatastores(self, ref: DatasetRef) -> Iterable[str]:
340 """Retrieve datastore locations for a given dataset.
342 Parameters
343 ----------
344 ref : `DatasetRef`
345 A reference to the dataset for which to retrieve storage
346 information.
348 Returns
349 -------
350 datastores : `Iterable` [ `str` ]
351 All the matching datastores holding this dataset. Empty if the
352 dataset does not exist anywhere.
354 Raises
355 ------
356 AmbiguousDatasetError
357 Raised if ``ref.id`` is `None`.
358 """
359 raise NotImplementedError()
361 opaque: OpaqueTableStorageManager
362 """Registry manager object for opaque (to Registry) tables, provided
363 to allow Datastores to store their internal information inside the
364 Registry database.
365 """
367 universe: DimensionUniverse
368 """All dimensions known to the `Registry`.
369 """