Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py: 86%
104 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-19 03:44 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-19 03:44 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import ddl
31__all__ = [
32 "ChainedCollectionRecord",
33 "CollectionManager",
34 "CollectionRecord",
35 "RunRecord",
36]
38from abc import abstractmethod
39from collections.abc import Iterable, Set
40from typing import TYPE_CHECKING, Any, Generic, Self, TypeVar
42import sqlalchemy
44from ..._timespan import Timespan
45from .._collection_type import CollectionType
46from ..wildcards import CollectionWildcard
47from ._versioning import VersionedExtension, VersionTuple
49if TYPE_CHECKING:
50 from .._caching_context import CachingContext
51 from ._database import Database, StaticTablesContext
54_Key = TypeVar("_Key")
57class CollectionRecord(Generic[_Key]):
58 """A struct used to represent a collection in internal `Registry` APIs.
60 User-facing code should always just use a `str` to represent collections.
62 Parameters
63 ----------
64 key : _Key
65 Unique collection ID, can be the same as ``name`` if ``name`` is used
66 for identification. Usually this is an integer or string, but can be
67 other database-specific type.
68 name : `str`
69 Name of the collection.
70 type : `CollectionType`
71 Enumeration value describing the type of the collection.
73 Notes
74 -----
75 The `name`, `key`, and `type` attributes set by the base class should be
76 considered immutable by all users and derived classes (as these are used
77 in the definition of equality and this is a hashable type). Other
78 attributes defined by subclasses may be mutable, as long as they do not
79 participate in some subclass equality definition.
80 """
82 def __init__(self, key: _Key, name: str, type: CollectionType):
83 self.key = key
84 self.name = name
85 self.type = type
86 assert isinstance(self.type, CollectionType)
88 name: str
89 """Name of the collection (`str`).
90 """
92 key: _Key
93 """The primary/foreign key value for this collection.
94 """
96 type: CollectionType
97 """Enumeration value describing the type of the collection
98 (`CollectionType`).
99 """
101 def __eq__(self, other: Any) -> bool:
102 try:
103 return self.name == other.name and self.type == other.type and self.key == other.key
104 except AttributeError:
105 return NotImplemented
107 def __hash__(self) -> int:
108 return hash(self.name)
110 def __repr__(self) -> str:
111 return f"CollectionRecord(key={self.key!r}, name={self.name!r}, type={self.type!r})"
113 def __str__(self) -> str:
114 return self.name
117class RunRecord(CollectionRecord[_Key]):
118 """A subclass of `CollectionRecord` that adds execution information and
119 an interface for updating it.
121 Parameters
122 ----------
123 key : `object`
124 Unique collection key.
125 name : `str`
126 Name of the collection.
127 host : `str`, optional
128 Name of the host or system on which this run was produced.
129 timespan : `Timespan`, optional
130 Begin and end timestamps for the period over which the run was
131 produced.
132 """
134 host: str | None
135 """Name of the host or system on which this run was produced (`str` or
136 `None`).
137 """
139 timespan: Timespan
140 """Begin and end timestamps for the period over which the run was produced.
141 None`/``NULL`` values are interpreted as infinite bounds.
142 """
144 def __init__(
145 self,
146 key: _Key,
147 name: str,
148 *,
149 host: str | None = None,
150 timespan: Timespan | None = None,
151 ):
152 super().__init__(key=key, name=name, type=CollectionType.RUN)
153 self.host = host
154 if timespan is None:
155 timespan = Timespan(begin=None, end=None)
156 self.timespan = timespan
158 def __repr__(self) -> str:
159 return f"RunRecord(key={self.key!r}, name={self.name!r})"
162class ChainedCollectionRecord(CollectionRecord[_Key]):
163 """A subclass of `CollectionRecord` that adds the list of child collections
164 in a ``CHAINED`` collection.
166 Parameters
167 ----------
168 key : `object`
169 Unique collection key.
170 name : `str`
171 Name of the collection.
172 children : Iterable[str],
173 Ordered sequence of names of child collections.
174 """
176 children: tuple[str, ...]
177 """The ordered search path of child collections that define this chain
178 (`tuple` [ `str` ]).
179 """
181 def __init__(
182 self,
183 key: Any,
184 name: str,
185 *,
186 children: Iterable[str],
187 ):
188 super().__init__(key=key, name=name, type=CollectionType.CHAINED)
189 self.children = tuple(children)
191 def __repr__(self) -> str:
192 return f"ChainedCollectionRecord(key={self.key!r}, name={self.name!r}, children={self.children!r})"
195class CollectionManager(Generic[_Key], VersionedExtension):
196 """An interface for managing the collections (including runs) in a
197 `Registry`.
199 Parameters
200 ----------
201 registry_schema_version : `VersionTuple` or `None`, optional
202 Version of registry schema.
204 Notes
205 -----
206 Each layer in a multi-layer `Registry` has its own record for any
207 collection for which it has datasets (or quanta). Different layers may
208 use different IDs for the same collection, so any usage of the IDs
209 obtained through the `CollectionManager` APIs are strictly for internal
210 (to `Registry`) use.
211 """
213 def __init__(self, *, registry_schema_version: VersionTuple | None = None) -> None:
214 super().__init__(registry_schema_version=registry_schema_version)
216 @abstractmethod
217 def clone(self, db: Database, caching_context: CachingContext) -> Self:
218 """Make an independent copy of this manager instance bound to a new
219 `Database` instance.
221 Parameters
222 ----------
223 db : `Database`
224 New `Database` object to use when instantiating the manager.
225 caching_context : `CachingContext`
226 New `CachingContext` object to use when instantiating the manager.
228 Returns
229 -------
230 instance : `CollectionManager`
231 New manager instance with the same configuration as this instance,
232 but bound to a new Database object.
233 """
234 raise NotImplementedError()
236 @classmethod
237 @abstractmethod
238 def initialize(
239 cls,
240 db: Database,
241 context: StaticTablesContext,
242 *,
243 caching_context: CachingContext,
244 registry_schema_version: VersionTuple | None = None,
245 ) -> CollectionManager:
246 """Construct an instance of the manager.
248 Parameters
249 ----------
250 db : `Database`
251 Interface to the underlying database engine and namespace.
252 context : `StaticTablesContext`
253 Context object obtained from `Database.declareStaticTables`; used
254 to declare any tables that should always be present in a layer
255 implemented with this manager.
256 caching_context : `CachingContext`
257 Object controlling caching of information returned by managers.
258 registry_schema_version : `VersionTuple` or `None`
259 Schema version of this extension as defined in registry.
261 Returns
262 -------
263 manager : `CollectionManager`
264 An instance of a concrete `CollectionManager` subclass.
265 """
266 raise NotImplementedError()
268 @classmethod
269 @abstractmethod
270 def addCollectionForeignKey(
271 cls,
272 tableSpec: ddl.TableSpec,
273 *,
274 prefix: str = "collection",
275 onDelete: str | None = None,
276 constraint: bool = True,
277 **kwargs: Any,
278 ) -> ddl.FieldSpec:
279 """Add a foreign key (field and constraint) referencing the collection
280 table.
282 Parameters
283 ----------
284 tableSpec : `ddl.TableSpec`
285 Specification for the table that should reference the collection
286 table. Will be modified in place.
287 prefix : `str`, optional
288 A name to use for the prefix of the new field; the full name may
289 have a suffix (and is given in the returned `ddl.FieldSpec`).
290 onDelete : `str`, optional
291 One of "CASCADE" or "SET NULL", indicating what should happen to
292 the referencing row if the collection row is deleted. `None`
293 indicates that this should be an integrity error.
294 constraint : `bool`, optional
295 If `False` (`True` is default), add a field that can be joined to
296 the collection primary key, but do not add a foreign key
297 constraint.
298 **kwargs
299 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
300 constructor (only the ``name`` and ``dtype`` arguments are
301 otherwise provided).
303 Returns
304 -------
305 fieldSpec : `ddl.FieldSpec`
306 Specification for the field being added.
307 """
308 raise NotImplementedError()
310 @classmethod
311 @abstractmethod
312 def addRunForeignKey(
313 cls,
314 tableSpec: ddl.TableSpec,
315 *,
316 prefix: str = "run",
317 onDelete: str | None = None,
318 constraint: bool = True,
319 **kwargs: Any,
320 ) -> ddl.FieldSpec:
321 """Add a foreign key (field and constraint) referencing the run
322 table.
324 Parameters
325 ----------
326 tableSpec : `ddl.TableSpec`
327 Specification for the table that should reference the run table.
328 Will be modified in place.
329 prefix : `str`, optional
330 A name to use for the prefix of the new field; the full name may
331 have a suffix (and is given in the returned `ddl.FieldSpec`).
332 onDelete : `str`, optional
333 One of "CASCADE" or "SET NULL", indicating what should happen to
334 the referencing row if the collection row is deleted. `None`
335 indicates that this should be an integrity error.
336 constraint : `bool`, optional
337 If `False` (`True` is default), add a field that can be joined to
338 the run primary key, but do not add a foreign key constraint.
339 **kwargs
340 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
341 constructor (only the ``name`` and ``dtype`` arguments are
342 otherwise provided).
344 Returns
345 -------
346 fieldSpec : `ddl.FieldSpec`
347 Specification for the field being added.
348 """
349 raise NotImplementedError()
351 @classmethod
352 @abstractmethod
353 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
354 """Return the name of the field added by `addCollectionForeignKey`
355 if called with the same prefix.
357 Parameters
358 ----------
359 prefix : `str`
360 A name to use for the prefix of the new field; the full name may
361 have a suffix.
363 Returns
364 -------
365 name : `str`
366 The field name.
367 """
368 raise NotImplementedError()
370 @classmethod
371 @abstractmethod
372 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
373 """Return the name of the field added by `addRunForeignKey`
374 if called with the same prefix.
376 Parameters
377 ----------
378 prefix : `str`
379 A name to use for the prefix of the new field; the full name may
380 have a suffix.
382 Returns
383 -------
384 name : `str`
385 The field name.
386 """
387 raise NotImplementedError()
389 @abstractmethod
390 def refresh(self) -> None:
391 """Ensure all other operations on this manager are aware of any
392 collections that may have been registered by other clients since it
393 was initialized or last refreshed.
394 """
395 raise NotImplementedError()
397 @abstractmethod
398 def register(
399 self, name: str, type: CollectionType, doc: str | None = None
400 ) -> tuple[CollectionRecord[_Key], bool]:
401 """Ensure that a collection of the given name and type are present
402 in the layer this manager is associated with.
404 Parameters
405 ----------
406 name : `str`
407 Name of the collection.
408 type : `CollectionType`
409 Enumeration value indicating the type of collection.
410 doc : `str`, optional
411 Documentation string for the collection. Ignored if the collection
412 already exists.
414 Returns
415 -------
416 record : `CollectionRecord`
417 Object representing the collection, including its type and ID.
418 If ``type is CollectionType.RUN``, this will be a `RunRecord`
419 instance. If ``type is CollectionType.CHAIN``, this will be a
420 `ChainedCollectionRecord` instance.
421 registered : `bool`
422 True if the collection was registered, `False` if it already
423 existed.
425 Raises
426 ------
427 TransactionInterruption
428 Raised if this operation is invoked within a `Database.transaction`
429 context.
430 DatabaseConflictError
431 Raised if a collection with this name but a different type already
432 exists.
434 Notes
435 -----
436 Concurrent registrations of the same collection should be safe; nothing
437 should happen if the types are consistent, and integrity errors due to
438 inconsistent types should happen before any database changes are made.
439 """
440 raise NotImplementedError()
442 @abstractmethod
443 def remove(self, name: str) -> None:
444 """Completely remove a collection.
446 Any existing `CollectionRecord` objects that correspond to the removed
447 collection are considered invalidated.
449 Parameters
450 ----------
451 name : `str`
452 Name of the collection to remove.
454 Notes
455 -----
456 If this collection is referenced by foreign keys in tables managed by
457 other objects, the ON DELETE clauses of those tables will be invoked.
458 That will frequently delete many dependent rows automatically (via
459 "CASCADE", but it may also cause this operation to fail (with rollback)
460 unless dependent rows that do not have an ON DELETE clause are removed
461 first.
462 """
463 raise NotImplementedError()
465 @abstractmethod
466 def find(self, name: str) -> CollectionRecord[_Key]:
467 """Return the collection record associated with the given name.
469 Parameters
470 ----------
471 name : `str`
472 Name of the collection.
474 Returns
475 -------
476 record : `CollectionRecord`
477 Object representing the collection, including its type and ID.
478 If ``record.type is CollectionType.RUN``, this will be a
479 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
480 this will be a `ChainedCollectionRecord` instance.
482 Raises
483 ------
484 MissingCollectionError
485 Raised if the given collection does not exist.
487 Notes
488 -----
489 Collections registered by another client of the same layer since the
490 last call to `initialize` or `refresh` may not be found.
491 """
492 raise NotImplementedError()
494 @abstractmethod
495 def __getitem__(self, key: Any) -> CollectionRecord[_Key]:
496 """Return the collection record associated with the given
497 primary/foreign key value.
499 Parameters
500 ----------
501 key : `typing.Any`
502 Internal primary key value for the collection.
504 Returns
505 -------
506 record : `CollectionRecord`
507 Object representing the collection, including its type and name.
508 If ``record.type is CollectionType.RUN``, this will be a
509 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
510 this will be a `ChainedCollectionRecord` instance.
512 Raises
513 ------
514 MissingCollectionError
515 Raised if no collection with this key exists.
517 Notes
518 -----
519 Collections registered by another client of the same layer since the
520 last call to `initialize` or `refresh` may not be found.
521 """
522 raise NotImplementedError()
524 @abstractmethod
525 def resolve_wildcard(
526 self,
527 wildcard: CollectionWildcard,
528 *,
529 collection_types: Set[CollectionType] = CollectionType.all(),
530 done: set[str] | None = None,
531 flatten_chains: bool = True,
532 include_chains: bool | None = None,
533 ) -> list[CollectionRecord[_Key]]:
534 """Iterate over collection records that match a wildcard.
536 Parameters
537 ----------
538 wildcard : `CollectionWildcard`
539 Names and/or patterns for collections.
540 collection_types : `collections.abc.Set` [ `CollectionType` ], optional
541 If provided, only yield collections of these types.
542 done : `set` [ `str` ], optional
543 A `set` of collection names that will not be returned (presumably
544 because they have already been returned in some higher-level logic)
545 that will also be updated with the names of the collections
546 returned.
547 flatten_chains : `bool`, optional
548 If `True` (default) recursively yield the child collections of
549 `~CollectionType.CHAINED` collections.
550 include_chains : `bool`, optional
551 If `True`, return records for `~CollectionType.CHAINED`
552 collections themselves. The default is the opposite of
553 ``flatten_chains``: either return records for CHAINED collections
554 or their children, but not both.
556 Returns
557 -------
558 records : `list` [ `CollectionRecord` ]
559 Matching collection records.
560 """
561 raise NotImplementedError()
563 @abstractmethod
564 def getDocumentation(self, key: _Key) -> str | None:
565 """Retrieve the documentation string for a collection.
567 Parameters
568 ----------
569 key : _Key
570 Internal primary key value for the collection.
572 Returns
573 -------
574 docs : `str` or `None`
575 Docstring for the collection with the given key.
576 """
577 raise NotImplementedError()
579 @abstractmethod
580 def setDocumentation(self, key: _Key, doc: str | None) -> None:
581 """Set the documentation string for a collection.
583 Parameters
584 ----------
585 key : _Key
586 Internal primary key value for the collection.
587 doc : `str`, optional
588 Docstring for the collection with the given key.
589 """
590 raise NotImplementedError()
592 @abstractmethod
593 def getParentChains(self, key: _Key) -> set[str]:
594 """Find all CHAINED collection names that directly contain the given
595 collection.
597 Parameters
598 ----------
599 key : _Key
600 Internal primary key value for the collection.
602 Returns
603 -------
604 names : `set` [`str`]
605 Parent collection names.
606 """
607 raise NotImplementedError()
609 @abstractmethod
610 def update_chain(
611 self,
612 parent_collection_name: str,
613 child_collection_names: list[str],
614 allow_use_in_caching_context: bool = False,
615 ) -> None:
616 """Replace all of the children in a chained collection with a new list.
618 Parameters
619 ----------
620 parent_collection_name : `str`
621 The name of a CHAINED collection to be modified.
622 child_collection_names : `list` [ `str ` ]
623 A child collection name or list of child collection names to be
624 assigned to the parent.
625 allow_use_in_caching_context : `bool`, optional
626 If `True`, skip a check that would otherwise disallow this function
627 from being called inside an active caching context.
628 (Only exists for legacy use, will eventually be removed).
630 Raises
631 ------
632 MissingCollectionError
633 If any of the specified collections do not exist.
634 CollectionTypeError
635 If the parent collection is not a CHAINED collection.
636 CollectionCycleError
637 If this operation would create a collection cycle.
639 Notes
640 -----
641 If this function is called within a call to ``Butler.transaction``, it
642 will hold a lock that prevents other processes from modifying the
643 parent collection until the end of the transaction. Keep these
644 transactions short.
645 """
646 raise NotImplementedError()
648 @abstractmethod
649 def prepend_collection_chain(
650 self, parent_collection_name: str, child_collection_names: list[str]
651 ) -> None:
652 """Add children to the beginning of a CHAINED collection.
654 If any of the children already existed in the chain, they will be moved
655 to the new position at the beginning of the chain.
657 Parameters
658 ----------
659 parent_collection_name : `str`
660 The name of a CHAINED collection to which we will add new children.
661 child_collection_names : `list` [ `str ` ]
662 A child collection name or list of child collection names to be
663 added to the parent.
665 Raises
666 ------
667 MissingCollectionError
668 If any of the specified collections do not exist.
669 CollectionTypeError
670 If the parent collection is not a CHAINED collection.
671 CollectionCycleError
672 If this operation would create a collection cycle.
674 Notes
675 -----
676 If this function is called within a call to ``Butler.transaction``, it
677 will hold a lock that prevents other processes from modifying the
678 parent collection until the end of the transaction. Keep these
679 transactions short.
680 """
681 raise NotImplementedError()
683 @abstractmethod
684 def extend_collection_chain(self, parent_collection_name: str, child_collection_names: list[str]) -> None:
685 """Add children to the end of a CHAINED collection.
687 If any of the children already existed in the chain, they will be moved
688 to the new position at the end of the chain.
690 Parameters
691 ----------
692 parent_collection_name : `str`
693 The name of a CHAINED collection to which we will add new children.
694 child_collection_names : `list` [ `str ` ]
695 A child collection name or list of child collection names to be
696 added to the parent.
698 Raises
699 ------
700 MissingCollectionError
701 If any of the specified collections do not exist.
702 CollectionTypeError
703 If the parent collection is not a CHAINED collection.
704 CollectionCycleError
705 If this operation would create a collection cycle.
707 Notes
708 -----
709 If this function is called within a call to ``Butler.transaction``, it
710 will hold a lock that prevents other processes from modifying the
711 parent collection until the end of the transaction. Keep these
712 transactions short.
713 """
714 raise NotImplementedError()
716 @abstractmethod
717 def remove_from_collection_chain(
718 self, parent_collection_name: str, child_collection_names: list[str]
719 ) -> None:
720 """Remove children from a CHAINED collection.
722 Parameters
723 ----------
724 parent_collection_name : `str`
725 The name of a CHAINED collection from which we will remove
726 children.
727 child_collection_names : `list` [ `str ` ]
728 A child collection name or list of child collection names to be
729 removed from the parent.
731 Raises
732 ------
733 MissingCollectionError
734 If any of the specified collections do not exist.
735 CollectionTypeError
736 If the parent collection is not a CHAINED collection.
738 Notes
739 -----
740 If this function is called within a call to ``Butler.transaction``, it
741 will hold a lock that prevents other processes from modifying the
742 parent collection until the end of the transaction. Keep these
743 transactions short.
744 """
745 raise NotImplementedError()
747 def lookup_name_sql(
748 self, sql_key: sqlalchemy.ColumnElement[_Key], sql_from_clause: sqlalchemy.FromClause
749 ) -> tuple[sqlalchemy.ColumnElement[str], sqlalchemy.FromClause]:
750 """Return a SQLAlchemy column and FROM clause that enable a query
751 to look up a collection name from the key.
753 Parameters
754 ----------
755 sql_key : `sqlalchemy.ColumnElement`
756 SQL column expression that evaluates to the collection key.
757 sql_from_clause : `sqlalchemy.FromClause`
758 SQL FROM clause from which ``sql_key`` was obtained.
760 Returns
761 -------
762 sql_name : `sqlalchemy.ColumnElement` [ `str` ]
763 SQL column expression that evalutes to the collection name.
764 sql_from_clause : `sqlalchemy.FromClause`
765 SQL FROM clause that includes the given ``sql_from_clause`` and
766 any table needed to provided ``sql_name``.
767 """
768 raise NotImplementedError()
770 def _block_for_concurrency_test(self) -> None:
771 """No-op normally. Provide a place for unit tests to hook in and
772 verify locking behavior.
773 """