Coverage for python/lsst/daf/butler/registry/interfaces/_bridge.py : 54%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("DatastoreRegistryBridgeManager", "DatastoreRegistryBridge", "FakeDatasetRef", "DatasetIdRef")
25from abc import ABC, abstractmethod
26from typing import (
27 Any,
28 ContextManager,
29 Dict,
30 Iterable,
31 Iterator,
32 Type,
33 TYPE_CHECKING,
34 Union,
35)
37from ...core.utils import immutable
38from ...core import DatasetRef
40if TYPE_CHECKING: 40 ↛ 41line 40 didn't jump to line 41, because the condition on line 40 was never true
41 from ...core import DatasetType, DimensionUniverse
42 from ._database import Database, StaticTablesContext
43 from ._datasets import DatasetRecordStorageManager
44 from ._opaque import OpaqueTableStorageManager
47@immutable
48class FakeDatasetRef:
49 """A fake `DatasetRef` that can be used internally by butler where
50 only the dataset ID is available.
52 Should only be used when registry can not be used to create a full
53 `DatasetRef` from the ID. A particular use case is during dataset
54 deletion when solely the ID is available.
56 Parameters
57 ----------
58 id : `int`
59 The dataset ID.
60 """
61 __slots__ = ("id",)
63 def __new__(cls, id: int) -> FakeDatasetRef:
64 self = super().__new__(cls)
65 self.id = id
66 return self
68 def __str__(self) -> str:
69 return f"dataset_id={self.id}"
71 def __repr__(self) -> str:
72 return f"FakeDatasetRef({self.id})"
74 def __eq__(self, other: Any) -> Union[bool, NotImplemented]:
75 try:
76 return self.id == other.id
77 except AttributeError:
78 return NotImplemented
80 def __hash__(self) -> int:
81 return hash(self.id)
83 id: int
84 """Unique integer that identifies this dataset.
85 """
87 @property
88 def components(self) -> Dict[str, FakeDatasetRef]:
89 return {}
91 def allRefs(self, parents: bool = True) -> Iterator[FakeDatasetRef]:
92 """Return all the nested component `DatasetRef` and optionally the
93 parent.
95 Parameters
96 ----------
97 parents : `bool`, optional
98 If `True` (default) include the given datasets in the output
99 iterable. If `False`, include only their components. Since
100 a `FakeDatasetRef` never have components, setting this to
101 `False` will yield no results.
103 Yields
104 ------
105 ref : `FakeDatasetRef`
106 Since there are never components, this will either return
107 itself or no results (depending on the value of ``parents``).
108 """
109 # No components
110 yield self
112 @staticmethod
113 def flatten(refs: Iterable[FakeDatasetRef], *,
114 parents: bool = True) -> Iterator[FakeDatasetRef]:
115 """Recursively transform an iterable over `FakeDatasetRef` to include
116 nested component `FakeDatasetRef` instances.
118 Parameters
119 ----------
120 refs : `~collections.abc.Iterable` [ `FakeDatasetRef` ]
121 Input iterable to process.
122 parents : `bool`, optional
123 If `True` (default) include the given datasets in the output
124 iterable. If `False`, include only their components, which
125 for `FakeDatasetRef` means no results.
127 Yields
128 ------
129 ref : `DatasetRef`
130 Either one of the given `FakeDatasetRef` instances (only if
131 ``parent`` is `True`) or one of its (recursive) children.
133 Notes
134 -----
135 If ``parents`` is `True`, components are guaranteed to be yielded
136 before their parents.
137 """
138 for ref in refs:
139 for subref in ref.allRefs(parents):
140 yield subref
142 def getCheckedId(self) -> int:
143 """Return ``self.id``.
145 This trivial method exists for compatibility with `DatasetRef`, for
146 which checking is actually done.
148 Returns
149 -------
150 id : `int`
151 ``self.id``.
152 """
153 return self.id
155 @property
156 def datasetType(self) -> DatasetType:
157 raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType")
160DatasetIdRef = Union[DatasetRef, FakeDatasetRef]
161"""A type-annotation alias that matches both `DatasetRef` and `FakeDatasetRef`.
162"""
165class DatastoreRegistryBridge(ABC):
166 """An abstract base class that defines the interface that a `Datastore`
167 uses to communicate with a `Registry`.
169 Parameters
170 ----------
171 datastoreName : `str`
172 Name of the `Datastore` as it should appear in `Registry` tables
173 referencing it.
174 """
175 def __init__(self, datastoreName: str):
176 self.datastoreName = datastoreName
178 @abstractmethod
179 def insert(self, refs: Iterable[DatasetIdRef]) -> None:
180 """Record that a datastore holds the given datasets.
182 Parameters
183 ----------
184 refs : `Iterable` of `DatasetIdRef`
185 References to the datasets.
187 Raises
188 ------
189 AmbiguousDatasetError
190 Raised if ``any(ref.id is None for ref in refs)``.
191 """
192 raise NotImplementedError()
194 @abstractmethod
195 def moveToTrash(self, refs: Iterable[DatasetIdRef]) -> None:
196 """Move dataset location information to trash.
198 Parameters
199 ----------
200 refs : `Iterable` of `DatasetIdRef`
201 References to the datasets.
203 Raises
204 ------
205 AmbiguousDatasetError
206 Raised if ``any(ref.id is None for ref in refs)``.
207 """
208 raise NotImplementedError()
210 @abstractmethod
211 def check(self, refs: Iterable[DatasetIdRef]) -> Iterable[DatasetIdRef]:
212 """Check which refs are listed for this datastore.
214 Parameters
215 ----------
216 refs : `~collections.abc.Iterable` of `DatasetIdRef`
217 References to the datasets.
219 Returns
220 -------
221 present : `Iterable` [ `DatasetIdRef` ]
222 Datasets from ``refs`` that are recorded as being in this
223 datastore.
225 Raises
226 ------
227 AmbiguousDatasetError
228 Raised if ``any(ref.id is None for ref in refs)``.
229 """
230 raise NotImplementedError()
232 @abstractmethod
233 def emptyTrash(self) -> ContextManager[Iterable[DatasetIdRef]]:
234 """Retrieve all the dataset ref IDs that are in the trash
235 associated for this datastore, and then remove them if the context
236 exists without an exception being raised.
238 Returns
239 -------
240 ids : `set` of `DatasetIdRef`
241 The IDs of datasets that can be safely removed from this datastore.
242 Can be empty.
244 Examples
245 --------
246 Typical usage by a Datastore is something like::
248 with self.bridge.emptyTrash() as iter:
249 for ref in iter:
250 # Remove artifacts associated with ref.id,
251 # raise an exception if something goes wrong.
253 Notes
254 -----
255 The object yielded by the context manager may be a single-pass
256 iterator. If multiple passes are required, it should be converted to
257 a `list` or other container.
259 Datastores should never raise (except perhaps in testing) when an
260 artifact cannot be removed only because it is already gone - this
261 condition is an unavoidable outcome of concurrent delete operations,
262 and must not be considered and error for those to be safe.
263 """
264 raise NotImplementedError()
266 datastoreName: str
267 """The name of the `Datastore` as it should appear in `Registry` tables
268 (`str`).
269 """
272class DatastoreRegistryBridgeManager(ABC):
273 """An abstract base class that defines the interface between `Registry`
274 and `Datastore` when a new `Datastore` is constructed.
276 Parameters
277 ----------
278 opaque : `OpaqueTableStorageManager`
279 Manager object for opaque table storage in the `Registry`.
280 universe : `DimensionUniverse`
281 All dimensions know to the `Registry`.
283 Notes
284 -----
285 Datastores are passed an instance of `DatastoreRegistryBridgeManager` at
286 construction, and should use it to obtain and keep any of the following:
288 - a `DatastoreRegistryBridge` instance to record in the `Registry` what is
289 present in the datastore (needed by all datastores that are not just
290 forwarders);
292 - one or more `OpaqueTableStorage` instance if they wish to store internal
293 records in the `Registry` database;
295 - the `DimensionUniverse`, if they need it to (e.g.) construct or validate
296 filename templates.
297 """
298 def __init__(self, *, opaque: OpaqueTableStorageManager, universe: DimensionUniverse):
299 self.opaque = opaque
300 self.universe = universe
302 @classmethod
303 @abstractmethod
304 def initialize(cls, db: Database, context: StaticTablesContext, *,
305 opaque: OpaqueTableStorageManager,
306 datasets: Type[DatasetRecordStorageManager],
307 universe: DimensionUniverse,
308 ) -> DatastoreRegistryBridgeManager:
309 """Construct an instance of the manager.
311 Parameters
312 ----------
313 db : `Database`
314 Interface to the underlying database engine and namespace.
315 context : `StaticTablesContext`
316 Context object obtained from `Database.declareStaticTables`; used
317 to declare any tables that should always be present in a layer
318 implemented with this manager.
319 opaque : `OpaqueTableStorageManager`
320 Registry manager object for opaque (to Registry) tables, provided
321 to allow Datastores to store their internal information inside the
322 Registry database.
323 datasets : subclass of `DatasetRecordStorageManager`
324 Concrete class that will be used to manage the core dataset tables
325 in this registry; should be used only to create foreign keys to
326 those tables.
327 universe : `DimensionUniverse`
328 All dimensions known to the registry.
330 Returns
331 -------
332 manager : `DatastoreRegistryBridgeManager`
333 An instance of a concrete `DatastoreRegistryBridgeManager`
334 subclass.
335 """
336 raise NotImplementedError()
338 @abstractmethod
339 def refresh(self) -> None:
340 """Ensure all other operations on this manager are aware of any
341 collections that may have been registered by other clients since it
342 was initialized or last refreshed.
343 """
344 raise NotImplementedError()
346 @abstractmethod
347 def register(self, name: str, *, ephemeral: bool = True) -> DatastoreRegistryBridge:
348 """Register a new `Datastore` associated with this `Registry`.
350 This method should be called by all `Datastore` classes aside from
351 those that only forward storage to other datastores.
353 Parameters
354 ----------
355 name : `str`
356 Name of the datastore, as it should appear in `Registry` tables.
357 ephemeral : `bool`, optional
358 If `True` (`False` is default), return a bridge object that is
359 backed by storage that will not last past the end of the current
360 process. This should be used whenever the same is true of the
361 dataset's artifacts.
363 Returns
364 -------
365 bridge : `DatastoreRegistryBridge`
366 Object that provides the interface this `Datastore` should use to
367 communicate with the `Regitry`.
368 """
369 raise NotImplementedError()
371 @abstractmethod
372 def findDatastores(self, ref: DatasetRef) -> Iterable[str]:
373 """Retrieve datastore locations for a given dataset.
375 Parameters
376 ----------
377 ref : `DatasetRef`
378 A reference to the dataset for which to retrieve storage
379 information.
381 Returns
382 -------
383 datastores : `Iterable` [ `str` ]
384 All the matching datastores holding this dataset. Empty if the
385 dataset does not exist anywhere.
387 Raises
388 ------
389 AmbiguousDatasetError
390 Raised if ``ref.id`` is `None`.
391 """
392 raise NotImplementedError()
394 opaque: OpaqueTableStorageManager
395 """Registry manager object for opaque (to Registry) tables, provided
396 to allow Datastores to store their internal information inside the
397 Registry database.
398 """
400 universe: DimensionUniverse
401 """All dimensions known to the `Registry`.
402 """