Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py : 60%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "ChainedCollectionRecord",
25 "CollectionManager",
26 "CollectionRecord",
27 "MissingCollectionError",
28 "RunRecord",
29]
31from abc import ABC, abstractmethod
32from typing import (
33 Any,
34 Iterator,
35 Optional,
36 TYPE_CHECKING,
37)
39import astropy.time
41from ...core import ddl, Timespan
42from ..wildcards import CollectionSearch
43from .._collectionType import CollectionType
45if TYPE_CHECKING: 45 ↛ 46line 45 didn't jump to line 46, because the condition on line 45 was never true
46 from ._database import Database, StaticTablesContext
49class MissingCollectionError(Exception):
50 """Exception raised when an operation attempts to use a collection that
51 does not exist.
52 """
55class CollectionRecord:
56 """A struct used to represent a collection in internal `Registry` APIs.
58 User-facing code should always just use a `str` to represent collections.
60 Parameters
61 ----------
62 key
63 Unique collection ID, can be the same as ``name`` if ``name`` is used
64 for identification. Usually this is an integer or string, but can be
65 other database-specific type.
66 name : `str`
67 Name of the collection.
68 type : `CollectionType`
69 Enumeration value describing the type of the collection.
70 """
71 def __init__(self, key: Any, name: str, type: CollectionType):
72 self.key = key
73 self.name = name
74 self.type = type
75 assert isinstance(self.type, CollectionType)
77 name: str
78 """Name of the collection (`str`).
79 """
81 key: Any
82 """The primary/foreign key value for this collection.
83 """
85 type: CollectionType
86 """Enumeration value describing the type of the collection
87 (`CollectionType`).
88 """
91class RunRecord(CollectionRecord):
92 """A subclass of `CollectionRecord` that adds execution information and
93 an interface for updating it.
94 """
96 @abstractmethod
97 def update(self, host: Optional[str] = None,
98 timespan: Optional[Timespan[astropy.time.Time]] = None) -> None:
99 """Update the database record for this run with new execution
100 information.
102 Values not provided will set to ``NULL`` in the database, not ignored.
104 Parameters
105 ----------
106 host : `str`, optional
107 Name of the host or system on which this run was produced.
108 Detailed form to be set by higher-level convention; from the
109 `Registry` perspective, this is an entirely opaque value.
110 timespan : `Timespan`, optional
111 Begin and end timestamps for the period over which the run was
112 produced. `None`/``NULL`` values are interpreted as infinite
113 bounds.
114 """
115 raise NotImplementedError()
117 @property
118 @abstractmethod
119 def host(self) -> Optional[str]:
120 """Return the name of the host or system on which this run was
121 produced (`str` or `None`).
122 """
123 raise NotImplementedError()
125 @property
126 @abstractmethod
127 def timespan(self) -> Timespan[astropy.time.Time]:
128 """Begin and end timestamps for the period over which the run was
129 produced. `None`/``NULL`` values are interpreted as infinite
130 bounds.
131 """
132 raise NotImplementedError()
135class ChainedCollectionRecord(CollectionRecord):
136 """A subclass of `CollectionRecord` that adds the list of child collections
137 in a ``CHAINED`` collection.
139 Parameters
140 ----------
141 key
142 Unique collection ID, can be the same as ``name`` if ``name`` is used
143 for identification. Usually this is an integer or string, but can be
144 other database-specific type.
145 name : `str`
146 Name of the collection.
147 """
149 def __init__(self, key: Any, name: str):
150 super().__init__(key=key, name=name, type=CollectionType.CHAINED)
151 self._children = CollectionSearch.fromExpression([])
153 @property
154 def children(self) -> CollectionSearch:
155 """The ordered search path of child collections that define this chain
156 (`CollectionSearch`).
157 """
158 return self._children
160 def update(self, manager: CollectionManager, children: CollectionSearch) -> None:
161 """Redefine this chain to search the given child collections.
163 This method should be used by all external code to set children. It
164 delegates to `_update`, which is what should be overridden by
165 subclasses.
167 Parameters
168 ----------
169 manager : `CollectionManager`
170 The object that manages this records instance and all records
171 instances that may appear as its children.
172 children : `CollectionSearch`
173 A collection search path that should be resolved to set the child
174 collections of this chain.
176 Raises
177 ------
178 ValueError
179 Raised when the child collections contain a cycle.
180 """
181 for record in children.iter(manager, flattenChains=True, includeChains=True,
182 collectionType=CollectionType.CHAINED):
183 if record == self:
184 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.")
185 self._update(manager, children)
186 self._children = children
188 def refresh(self, manager: CollectionManager) -> None:
189 """Load children from the database, using the given manager to resolve
190 collection primary key values into records.
192 This method exists to ensure that all collections that may appear in a
193 chain are known to the manager before any particular chain tries to
194 retrieve their records from it. `ChainedCollectionRecord` subclasses
195 can rely on it being called sometime after their own ``__init__`` to
196 finish construction.
198 Parameters
199 ----------
200 manager : `CollectionManager`
201 The object that manages this records instance and all records
202 instances that may appear as its children.
203 """
204 self._children = self._load(manager)
206 @abstractmethod
207 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None:
208 """Protected implementation hook for setting the `children` property.
210 This method should be implemented by subclasses to update the database
211 to reflect the children given. It should never be called by anything
212 other than the `children` setter, which should be used by all external
213 code.
215 Parameters
216 ----------
217 manager : `CollectionManager`
218 The object that manages this records instance and all records
219 instances that may appear as its children.
220 children : `CollectionSearch`
221 A collection search path that should be resolved to set the child
222 collections of this chain. Guaranteed not to contain cycles.
223 """
224 raise NotImplementedError()
226 @abstractmethod
227 def _load(self, manager: CollectionManager) -> CollectionSearch:
228 """Protected implementation hook for `refresh`.
230 This method should be implemented by subclasses to retrieve the chain's
231 child collections from the database and return them. It should never
232 be called by anything other than `refresh`, which should be used by all
233 external code.
235 Parameters
236 ----------
237 manager : `CollectionManager`
238 The object that manages this records instance and all records
239 instances that may appear as its children.
240 """
241 raise NotImplementedError()
244class CollectionManager(ABC):
245 """An interface for managing the collections (including runs) in a
246 `Registry`.
248 Notes
249 -----
250 Each layer in a multi-layer `Registry` has its own record for any
251 collection for which it has datasets (or quanta). Different layers may
252 use different IDs for the same collection, so any usage of the IDs
253 obtained through the `CollectionManager` APIs are strictly for internal
254 (to `Registry`) use.
255 """
257 @classmethod
258 @abstractmethod
259 def initialize(cls, db: Database, context: StaticTablesContext) -> CollectionManager:
260 """Construct an instance of the manager.
262 Parameters
263 ----------
264 db : `Database`
265 Interface to the underlying database engine and namespace.
266 context : `StaticTablesContext`
267 Context object obtained from `Database.declareStaticTables`; used
268 to declare any tables that should always be present in a layer
269 implemented with this manager.
271 Returns
272 -------
273 manager : `CollectionManager`
274 An instance of a concrete `CollectionManager` subclass.
275 """
276 raise NotImplementedError()
278 @classmethod
279 @abstractmethod
280 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection",
281 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec:
282 """Add a foreign key (field and constraint) referencing the collection
283 table.
285 Parameters
286 ----------
287 tableSpec : `ddl.TableSpec`
288 Specification for the table that should reference the collection
289 table. Will be modified in place.
290 prefix: `str`, optional
291 A name to use for the prefix of the new field; the full name may
292 have a suffix (and is given in the returned `ddl.FieldSpec`).
293 onDelete: `str`, optional
294 One of "CASCADE" or "SET NULL", indicating what should happen to
295 the referencing row if the collection row is deleted. `None`
296 indicates that this should be an integrity error.
297 **kwds
298 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
299 constructor (only the ``name`` and ``dtype`` arguments are
300 otherwise provided).
302 Returns
303 -------
304 fieldSpec : `ddl.FieldSpec`
305 Specification for the field being added.
306 """
307 raise NotImplementedError()
309 @classmethod
310 @abstractmethod
311 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run",
312 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec:
313 """Add a foreign key (field and constraint) referencing the run
314 table.
316 Parameters
317 ----------
318 tableSpec : `ddl.TableSpec`
319 Specification for the table that should reference the run table.
320 Will be modified in place.
321 prefix: `str`, optional
322 A name to use for the prefix of the new field; the full name may
323 have a suffix (and is given in the returned `ddl.FieldSpec`).
324 onDelete: `str`, optional
325 One of "CASCADE" or "SET NULL", indicating what should happen to
326 the referencing row if the collection row is deleted. `None`
327 indicates that this should be an integrity error.
328 **kwds
329 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
330 constructor (only the ``name`` and ``dtype`` arguments are
331 otherwise provided).
333 Returns
334 -------
335 fieldSpec : `ddl.FieldSpec`
336 Specification for the field being added.
337 """
338 raise NotImplementedError()
340 @classmethod
341 @abstractmethod
342 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
343 """Return the name of the field added by `addCollectionForeignKey`
344 if called with the same prefix.
346 Parameters
347 ----------
348 prefix : `str`
349 A name to use for the prefix of the new field; the full name may
350 have a suffix.
352 Returns
353 -------
354 name : `str`
355 The field name.
356 """
357 raise NotImplementedError()
359 @classmethod
360 @abstractmethod
361 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
362 """Return the name of the field added by `addRunForeignKey`
363 if called with the same prefix.
365 Parameters
366 ----------
367 prefix : `str`
368 A name to use for the prefix of the new field; the full name may
369 have a suffix.
371 Returns
372 -------
373 name : `str`
374 The field name.
375 """
376 raise NotImplementedError()
378 @abstractmethod
379 def refresh(self) -> None:
380 """Ensure all other operations on this manager are aware of any
381 collections that may have been registered by other clients since it
382 was initialized or last refreshed.
383 """
384 raise NotImplementedError()
386 @abstractmethod
387 def register(self, name: str, type: CollectionType) -> CollectionRecord:
388 """Ensure that a collection of the given name and type are present
389 in the layer this manager is associated with.
391 Parameters
392 ----------
393 name : `str`
394 Name of the collection.
395 type : `CollectionType`
396 Enumeration value indicating the type of collection.
398 Returns
399 -------
400 record : `CollectionRecord`
401 Object representing the collection, including its type and ID.
402 If ``type is CollectionType.RUN``, this will be a `RunRecord`
403 instance. If ``type is CollectionType.CHAIN``, this will be a
404 `ChainedCollectionRecord` instance.
406 Raises
407 ------
408 TransactionInterruption
409 Raised if this operation is invoked within a `Database.transaction`
410 context.
411 DatabaseConflictError
412 Raised if a collection with this name but a different type already
413 exists.
415 Notes
416 -----
417 Concurrent registrations of the same collection should be safe; nothing
418 should happen if the types are consistent, and integrity errors due to
419 inconsistent types should happen before any database changes are made.
420 """
421 raise NotImplementedError()
423 @abstractmethod
424 def remove(self, name: str) -> None:
425 """Completely remove a collection.
427 Any existing `CollectionRecord` objects that correspond to the removed
428 collection are considered invalidated.
430 Parameters
431 ----------
432 name : `str`
433 Name of the collection to remove.
435 Notes
436 -----
437 If this collection is referenced by foreign keys in tables managed by
438 other objects, the ON DELETE clauses of those tables will be invoked.
439 That will frequently delete many dependent rows automatically (via
440 "CASCADE", but it may also cause this operation to fail (with rollback)
441 unless dependent rows that do not have an ON DELETE clause are removed
442 first.
443 """
444 raise NotImplementedError()
446 @abstractmethod
447 def find(self, name: str) -> CollectionRecord:
448 """Return the collection record associated with the given name.
450 Parameters
451 ----------
452 name : `str`
453 Name of the collection.
455 Returns
456 -------
457 record : `CollectionRecord`
458 Object representing the collection, including its type and ID.
459 If ``record.type is CollectionType.RUN``, this will be a
460 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
461 this will be a `ChainedCollectionRecord` instance.
463 Raises
464 ------
465 MissingCollectionError
466 Raised if the given collection does not exist.
468 Notes
469 -----
470 Collections registered by another client of the same layer since the
471 last call to `initialize` or `refresh` may not be found.
472 """
473 raise NotImplementedError()
475 @abstractmethod
476 def __getitem__(self, key: Any) -> CollectionRecord:
477 """Return the collection record associated with the given
478 primary/foreign key value.
480 Parameters
481 ----------
482 key
483 Internal primary key value for the collection.
485 Returns
486 -------
487 record : `CollectionRecord`
488 Object representing the collection, including its type and name.
489 If ``record.type is CollectionType.RUN``, this will be a
490 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
491 this will be a `ChainedCollectionRecord` instance.
493 Raises
494 ------
495 MissingCollectionError
496 Raised if no collection with this key exists.
498 Notes
499 -----
500 Collections registered by another client of the same layer since the
501 last call to `initialize` or `refresh` may not be found.
502 """
503 raise NotImplementedError()
505 @abstractmethod
506 def __iter__(self) -> Iterator[CollectionRecord]:
507 """Iterate over all collections.
509 Yields
510 ------
511 record : `CollectionRecord`
512 The record for a managed collection.
513 """
514 raise NotImplementedError()