Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py: 85%
95 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-01 11:20 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-01 11:20 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import ddl
31__all__ = [
32 "ChainedCollectionRecord",
33 "CollectionManager",
34 "CollectionRecord",
35 "RunRecord",
36]
38from abc import abstractmethod
39from collections.abc import Iterable, Set
40from typing import TYPE_CHECKING, Any, Generic, Self, TypeVar
42from ..._timespan import Timespan
43from .._collection_type import CollectionType
44from ..wildcards import CollectionWildcard
45from ._versioning import VersionedExtension, VersionTuple
47if TYPE_CHECKING:
48 from .._caching_context import CachingContext
49 from ._database import Database, StaticTablesContext
52_Key = TypeVar("_Key")
55class CollectionRecord(Generic[_Key]):
56 """A struct used to represent a collection in internal `Registry` APIs.
58 User-facing code should always just use a `str` to represent collections.
60 Parameters
61 ----------
62 key : _Key
63 Unique collection ID, can be the same as ``name`` if ``name`` is used
64 for identification. Usually this is an integer or string, but can be
65 other database-specific type.
66 name : `str`
67 Name of the collection.
68 type : `CollectionType`
69 Enumeration value describing the type of the collection.
71 Notes
72 -----
73 The `name`, `key`, and `type` attributes set by the base class should be
74 considered immutable by all users and derived classes (as these are used
75 in the definition of equality and this is a hashable type). Other
76 attributes defined by subclasses may be mutable, as long as they do not
77 participate in some subclass equality definition.
78 """
80 def __init__(self, key: _Key, name: str, type: CollectionType):
81 self.key = key
82 self.name = name
83 self.type = type
84 assert isinstance(self.type, CollectionType)
86 name: str
87 """Name of the collection (`str`).
88 """
90 key: _Key
91 """The primary/foreign key value for this collection.
92 """
94 type: CollectionType
95 """Enumeration value describing the type of the collection
96 (`CollectionType`).
97 """
99 def __eq__(self, other: Any) -> bool:
100 try:
101 return self.name == other.name and self.type == other.type and self.key == other.key
102 except AttributeError:
103 return NotImplemented
105 def __hash__(self) -> int:
106 return hash(self.name)
108 def __repr__(self) -> str:
109 return f"CollectionRecord(key={self.key!r}, name={self.name!r}, type={self.type!r})"
111 def __str__(self) -> str:
112 return self.name
115class RunRecord(CollectionRecord[_Key]):
116 """A subclass of `CollectionRecord` that adds execution information and
117 an interface for updating it.
119 Parameters
120 ----------
121 key : `object`
122 Unique collection key.
123 name : `str`
124 Name of the collection.
125 host : `str`, optional
126 Name of the host or system on which this run was produced.
127 timespan : `Timespan`, optional
128 Begin and end timestamps for the period over which the run was
129 produced.
130 """
132 host: str | None
133 """Name of the host or system on which this run was produced (`str` or
134 `None`).
135 """
137 timespan: Timespan
138 """Begin and end timestamps for the period over which the run was produced.
139 None`/``NULL`` values are interpreted as infinite bounds.
140 """
142 def __init__(
143 self,
144 key: _Key,
145 name: str,
146 *,
147 host: str | None = None,
148 timespan: Timespan | None = None,
149 ):
150 super().__init__(key=key, name=name, type=CollectionType.RUN)
151 self.host = host
152 if timespan is None:
153 timespan = Timespan(begin=None, end=None)
154 self.timespan = timespan
156 def __repr__(self) -> str:
157 return f"RunRecord(key={self.key!r}, name={self.name!r})"
160class ChainedCollectionRecord(CollectionRecord[_Key]):
161 """A subclass of `CollectionRecord` that adds the list of child collections
162 in a ``CHAINED`` collection.
164 Parameters
165 ----------
166 key : `object`
167 Unique collection key.
168 name : `str`
169 Name of the collection.
170 children : Iterable[str],
171 Ordered sequence of names of child collections.
172 """
174 children: tuple[str, ...]
175 """The ordered search path of child collections that define this chain
176 (`tuple` [ `str` ]).
177 """
179 def __init__(
180 self,
181 key: Any,
182 name: str,
183 *,
184 children: Iterable[str],
185 ):
186 super().__init__(key=key, name=name, type=CollectionType.CHAINED)
187 self.children = tuple(children)
189 def __repr__(self) -> str:
190 return f"ChainedCollectionRecord(key={self.key!r}, name={self.name!r}, children={self.children!r})"
193class CollectionManager(Generic[_Key], VersionedExtension):
194 """An interface for managing the collections (including runs) in a
195 `Registry`.
197 Parameters
198 ----------
199 registry_schema_version : `VersionTuple` or `None`, optional
200 Version of registry schema.
202 Notes
203 -----
204 Each layer in a multi-layer `Registry` has its own record for any
205 collection for which it has datasets (or quanta). Different layers may
206 use different IDs for the same collection, so any usage of the IDs
207 obtained through the `CollectionManager` APIs are strictly for internal
208 (to `Registry`) use.
209 """
211 def __init__(self, *, registry_schema_version: VersionTuple | None = None) -> None:
212 super().__init__(registry_schema_version=registry_schema_version)
214 @abstractmethod
215 def clone(self, db: Database, caching_context: CachingContext) -> Self:
216 """Make an independent copy of this manager instance bound to a new
217 `Database` instance.
219 Parameters
220 ----------
221 db : `Database`
222 New `Database` object to use when instantiating the manager.
223 caching_context : `CachingContext`
224 New `CachingContext` object to use when instantiating the manager.
226 Returns
227 -------
228 instance : `CollectionManager`
229 New manager instance with the same configuration as this instance,
230 but bound to a new Database object.
231 """
232 raise NotImplementedError()
234 @classmethod
235 @abstractmethod
236 def initialize(
237 cls,
238 db: Database,
239 context: StaticTablesContext,
240 *,
241 caching_context: CachingContext,
242 registry_schema_version: VersionTuple | None = None,
243 ) -> CollectionManager:
244 """Construct an instance of the manager.
246 Parameters
247 ----------
248 db : `Database`
249 Interface to the underlying database engine and namespace.
250 context : `StaticTablesContext`
251 Context object obtained from `Database.declareStaticTables`; used
252 to declare any tables that should always be present in a layer
253 implemented with this manager.
254 caching_context : `CachingContext`
255 Object controlling caching of information returned by managers.
256 registry_schema_version : `VersionTuple` or `None`
257 Schema version of this extension as defined in registry.
259 Returns
260 -------
261 manager : `CollectionManager`
262 An instance of a concrete `CollectionManager` subclass.
263 """
264 raise NotImplementedError()
266 @classmethod
267 @abstractmethod
268 def addCollectionForeignKey(
269 cls,
270 tableSpec: ddl.TableSpec,
271 *,
272 prefix: str = "collection",
273 onDelete: str | None = None,
274 constraint: bool = True,
275 **kwargs: Any,
276 ) -> ddl.FieldSpec:
277 """Add a foreign key (field and constraint) referencing the collection
278 table.
280 Parameters
281 ----------
282 tableSpec : `ddl.TableSpec`
283 Specification for the table that should reference the collection
284 table. Will be modified in place.
285 prefix : `str`, optional
286 A name to use for the prefix of the new field; the full name may
287 have a suffix (and is given in the returned `ddl.FieldSpec`).
288 onDelete : `str`, optional
289 One of "CASCADE" or "SET NULL", indicating what should happen to
290 the referencing row if the collection row is deleted. `None`
291 indicates that this should be an integrity error.
292 constraint : `bool`, optional
293 If `False` (`True` is default), add a field that can be joined to
294 the collection primary key, but do not add a foreign key
295 constraint.
296 **kwargs
297 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
298 constructor (only the ``name`` and ``dtype`` arguments are
299 otherwise provided).
301 Returns
302 -------
303 fieldSpec : `ddl.FieldSpec`
304 Specification for the field being added.
305 """
306 raise NotImplementedError()
308 @classmethod
309 @abstractmethod
310 def addRunForeignKey(
311 cls,
312 tableSpec: ddl.TableSpec,
313 *,
314 prefix: str = "run",
315 onDelete: str | None = None,
316 constraint: bool = True,
317 **kwargs: Any,
318 ) -> ddl.FieldSpec:
319 """Add a foreign key (field and constraint) referencing the run
320 table.
322 Parameters
323 ----------
324 tableSpec : `ddl.TableSpec`
325 Specification for the table that should reference the run table.
326 Will be modified in place.
327 prefix : `str`, optional
328 A name to use for the prefix of the new field; the full name may
329 have a suffix (and is given in the returned `ddl.FieldSpec`).
330 onDelete : `str`, optional
331 One of "CASCADE" or "SET NULL", indicating what should happen to
332 the referencing row if the collection row is deleted. `None`
333 indicates that this should be an integrity error.
334 constraint : `bool`, optional
335 If `False` (`True` is default), add a field that can be joined to
336 the run primary key, but do not add a foreign key constraint.
337 **kwargs
338 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
339 constructor (only the ``name`` and ``dtype`` arguments are
340 otherwise provided).
342 Returns
343 -------
344 fieldSpec : `ddl.FieldSpec`
345 Specification for the field being added.
346 """
347 raise NotImplementedError()
349 @classmethod
350 @abstractmethod
351 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
352 """Return the name of the field added by `addCollectionForeignKey`
353 if called with the same prefix.
355 Parameters
356 ----------
357 prefix : `str`
358 A name to use for the prefix of the new field; the full name may
359 have a suffix.
361 Returns
362 -------
363 name : `str`
364 The field name.
365 """
366 raise NotImplementedError()
368 @classmethod
369 @abstractmethod
370 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
371 """Return the name of the field added by `addRunForeignKey`
372 if called with the same prefix.
374 Parameters
375 ----------
376 prefix : `str`
377 A name to use for the prefix of the new field; the full name may
378 have a suffix.
380 Returns
381 -------
382 name : `str`
383 The field name.
384 """
385 raise NotImplementedError()
387 @abstractmethod
388 def refresh(self) -> None:
389 """Ensure all other operations on this manager are aware of any
390 collections that may have been registered by other clients since it
391 was initialized or last refreshed.
392 """
393 raise NotImplementedError()
395 @abstractmethod
396 def register(
397 self, name: str, type: CollectionType, doc: str | None = None
398 ) -> tuple[CollectionRecord[_Key], bool]:
399 """Ensure that a collection of the given name and type are present
400 in the layer this manager is associated with.
402 Parameters
403 ----------
404 name : `str`
405 Name of the collection.
406 type : `CollectionType`
407 Enumeration value indicating the type of collection.
408 doc : `str`, optional
409 Documentation string for the collection. Ignored if the collection
410 already exists.
412 Returns
413 -------
414 record : `CollectionRecord`
415 Object representing the collection, including its type and ID.
416 If ``type is CollectionType.RUN``, this will be a `RunRecord`
417 instance. If ``type is CollectionType.CHAIN``, this will be a
418 `ChainedCollectionRecord` instance.
419 registered : `bool`
420 True if the collection was registered, `False` if it already
421 existed.
423 Raises
424 ------
425 TransactionInterruption
426 Raised if this operation is invoked within a `Database.transaction`
427 context.
428 DatabaseConflictError
429 Raised if a collection with this name but a different type already
430 exists.
432 Notes
433 -----
434 Concurrent registrations of the same collection should be safe; nothing
435 should happen if the types are consistent, and integrity errors due to
436 inconsistent types should happen before any database changes are made.
437 """
438 raise NotImplementedError()
440 @abstractmethod
441 def remove(self, name: str) -> None:
442 """Completely remove a collection.
444 Any existing `CollectionRecord` objects that correspond to the removed
445 collection are considered invalidated.
447 Parameters
448 ----------
449 name : `str`
450 Name of the collection to remove.
452 Notes
453 -----
454 If this collection is referenced by foreign keys in tables managed by
455 other objects, the ON DELETE clauses of those tables will be invoked.
456 That will frequently delete many dependent rows automatically (via
457 "CASCADE", but it may also cause this operation to fail (with rollback)
458 unless dependent rows that do not have an ON DELETE clause are removed
459 first.
460 """
461 raise NotImplementedError()
463 @abstractmethod
464 def find(self, name: str) -> CollectionRecord[_Key]:
465 """Return the collection record associated with the given name.
467 Parameters
468 ----------
469 name : `str`
470 Name of the collection.
472 Returns
473 -------
474 record : `CollectionRecord`
475 Object representing the collection, including its type and ID.
476 If ``record.type is CollectionType.RUN``, this will be a
477 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
478 this will be a `ChainedCollectionRecord` instance.
480 Raises
481 ------
482 MissingCollectionError
483 Raised if the given collection does not exist.
485 Notes
486 -----
487 Collections registered by another client of the same layer since the
488 last call to `initialize` or `refresh` may not be found.
489 """
490 raise NotImplementedError()
492 @abstractmethod
493 def __getitem__(self, key: Any) -> CollectionRecord[_Key]:
494 """Return the collection record associated with the given
495 primary/foreign key value.
497 Parameters
498 ----------
499 key : `typing.Any`
500 Internal primary key value for the collection.
502 Returns
503 -------
504 record : `CollectionRecord`
505 Object representing the collection, including its type and name.
506 If ``record.type is CollectionType.RUN``, this will be a
507 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
508 this will be a `ChainedCollectionRecord` instance.
510 Raises
511 ------
512 MissingCollectionError
513 Raised if no collection with this key exists.
515 Notes
516 -----
517 Collections registered by another client of the same layer since the
518 last call to `initialize` or `refresh` may not be found.
519 """
520 raise NotImplementedError()
522 @abstractmethod
523 def resolve_wildcard(
524 self,
525 wildcard: CollectionWildcard,
526 *,
527 collection_types: Set[CollectionType] = CollectionType.all(),
528 done: set[str] | None = None,
529 flatten_chains: bool = True,
530 include_chains: bool | None = None,
531 ) -> list[CollectionRecord[_Key]]:
532 """Iterate over collection records that match a wildcard.
534 Parameters
535 ----------
536 wildcard : `CollectionWildcard`
537 Names and/or patterns for collections.
538 collection_types : `collections.abc.Set` [ `CollectionType` ], optional
539 If provided, only yield collections of these types.
540 done : `set` [ `str` ], optional
541 A `set` of collection names that will not be returned (presumably
542 because they have already been returned in some higher-level logic)
543 that will also be updated with the names of the collections
544 returned.
545 flatten_chains : `bool`, optional
546 If `True` (default) recursively yield the child collections of
547 `~CollectionType.CHAINED` collections.
548 include_chains : `bool`, optional
549 If `True`, return records for `~CollectionType.CHAINED`
550 collections themselves. The default is the opposite of
551 ``flatten_chains``: either return records for CHAINED collections
552 or their children, but not both.
554 Returns
555 -------
556 records : `list` [ `CollectionRecord` ]
557 Matching collection records.
558 """
559 raise NotImplementedError()
561 @abstractmethod
562 def getDocumentation(self, key: _Key) -> str | None:
563 """Retrieve the documentation string for a collection.
565 Parameters
566 ----------
567 key : _Key
568 Internal primary key value for the collection.
570 Returns
571 -------
572 docs : `str` or `None`
573 Docstring for the collection with the given key.
574 """
575 raise NotImplementedError()
577 @abstractmethod
578 def setDocumentation(self, key: _Key, doc: str | None) -> None:
579 """Set the documentation string for a collection.
581 Parameters
582 ----------
583 key : _Key
584 Internal primary key value for the collection.
585 doc : `str`, optional
586 Docstring for the collection with the given key.
587 """
588 raise NotImplementedError()
590 @abstractmethod
591 def getParentChains(self, key: _Key) -> set[str]:
592 """Find all CHAINED collection names that directly contain the given
593 collection.
595 Parameters
596 ----------
597 key : _Key
598 Internal primary key value for the collection.
600 Returns
601 -------
602 names : `set` [`str`]
603 Parent collection names.
604 """
605 raise NotImplementedError()
607 @abstractmethod
608 def update_chain(
609 self, record: ChainedCollectionRecord[_Key], children: Iterable[str], flatten: bool = False
610 ) -> ChainedCollectionRecord[_Key]:
611 """Update chained collection composition.
613 Parameters
614 ----------
615 record : `ChainedCollectionRecord`
616 Chained collection record.
617 children : `~collections.abc.Iterable` [`str`]
618 Ordered names of children collections.
619 flatten : `bool`, optional
620 If `True`, recursively flatten out any nested
621 `~CollectionType.CHAINED` collections in ``children`` first.
622 """
623 raise NotImplementedError()