Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py: 72%
116 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = [
30 "ChainedCollectionRecord",
31 "CollectionManager",
32 "CollectionRecord",
33 "RunRecord",
34]
36from abc import abstractmethod
37from collections import defaultdict
38from collections.abc import Iterator, Set
39from typing import TYPE_CHECKING, Any
41from ...core import DimensionUniverse, Timespan, ddl
42from .._collectionType import CollectionType
43from ..wildcards import CollectionWildcard
44from ._versioning import VersionedExtension, VersionTuple
46if TYPE_CHECKING:
47 from ._database import Database, StaticTablesContext
48 from ._dimensions import DimensionRecordStorageManager
51class CollectionRecord:
52 """A struct used to represent a collection in internal `Registry` APIs.
54 User-facing code should always just use a `str` to represent collections.
56 Parameters
57 ----------
58 key
59 Unique collection ID, can be the same as ``name`` if ``name`` is used
60 for identification. Usually this is an integer or string, but can be
61 other database-specific type.
62 name : `str`
63 Name of the collection.
64 type : `CollectionType`
65 Enumeration value describing the type of the collection.
67 Notes
68 -----
69 The `name`, `key`, and `type` attributes set by the base class should be
70 considered immutable by all users and derived classes (as these are used
71 in the definition of equality and this is a hashable type). Other
72 attributes defined by subclasses may be mutable, as long as they do not
73 participate in some subclass equality definition.
74 """
76 def __init__(self, key: Any, name: str, type: CollectionType):
77 self.key = key
78 self.name = name
79 self.type = type
80 assert isinstance(self.type, CollectionType)
82 name: str
83 """Name of the collection (`str`).
84 """
86 key: Any
87 """The primary/foreign key value for this collection.
88 """
90 type: CollectionType
91 """Enumeration value describing the type of the collection
92 (`CollectionType`).
93 """
95 def __eq__(self, other: Any) -> bool:
96 try:
97 return self.name == other.name and self.type == other.type and self.key == other.key
98 except AttributeError:
99 return NotImplemented
101 def __hash__(self) -> int:
102 return hash(self.name)
104 def __repr__(self) -> str:
105 return f"CollectionRecord(key={self.key!r}, name={self.name!r}, type={self.type!r})"
107 def __str__(self) -> str:
108 return self.name
111class RunRecord(CollectionRecord):
112 """A subclass of `CollectionRecord` that adds execution information and
113 an interface for updating it.
114 """
116 @abstractmethod
117 def update(self, host: str | None = None, timespan: Timespan | None = None) -> None:
118 """Update the database record for this run with new execution
119 information.
121 Values not provided will set to ``NULL`` in the database, not ignored.
123 Parameters
124 ----------
125 host : `str`, optional
126 Name of the host or system on which this run was produced.
127 Detailed form to be set by higher-level convention; from the
128 `Registry` perspective, this is an entirely opaque value.
129 timespan : `Timespan`, optional
130 Begin and end timestamps for the period over which the run was
131 produced. `None`/``NULL`` values are interpreted as infinite
132 bounds.
133 """
134 raise NotImplementedError()
136 @property
137 @abstractmethod
138 def host(self) -> str | None:
139 """Return the name of the host or system on which this run was
140 produced (`str` or `None`).
141 """
142 raise NotImplementedError()
144 @property
145 @abstractmethod
146 def timespan(self) -> Timespan:
147 """Begin and end timestamps for the period over which the run was
148 produced. `None`/``NULL`` values are interpreted as infinite
149 bounds.
150 """
151 raise NotImplementedError()
153 def __repr__(self) -> str:
154 return f"RunRecord(key={self.key!r}, name={self.name!r})"
157class ChainedCollectionRecord(CollectionRecord):
158 """A subclass of `CollectionRecord` that adds the list of child collections
159 in a ``CHAINED`` collection.
161 Parameters
162 ----------
163 key
164 Unique collection ID, can be the same as ``name`` if ``name`` is used
165 for identification. Usually this is an integer or string, but can be
166 other database-specific type.
167 name : `str`
168 Name of the collection.
169 """
171 def __init__(self, key: Any, name: str, universe: DimensionUniverse):
172 super().__init__(key=key, name=name, type=CollectionType.CHAINED)
173 self._children: tuple[str, ...] = ()
175 @property
176 def children(self) -> tuple[str, ...]:
177 """The ordered search path of child collections that define this chain
178 (`tuple` [ `str` ]).
179 """
180 return self._children
182 def update(self, manager: CollectionManager, children: tuple[str, ...], flatten: bool) -> None:
183 """Redefine this chain to search the given child collections.
185 This method should be used by all external code to set children. It
186 delegates to `_update`, which is what should be overridden by
187 subclasses.
189 Parameters
190 ----------
191 manager : `CollectionManager`
192 The object that manages this records instance and all records
193 instances that may appear as its children.
194 children : `tuple` [ `str` ]
195 A collection search path that should be resolved to set the child
196 collections of this chain.
197 flatten : `bool`
198 If `True`, recursively flatten out any nested
199 `~CollectionType.CHAINED` collections in ``children`` first.
201 Raises
202 ------
203 ValueError
204 Raised when the child collections contain a cycle.
205 """
206 children_as_wildcard = CollectionWildcard.from_names(children)
207 for record in manager.resolve_wildcard(
208 children_as_wildcard,
209 flatten_chains=True,
210 include_chains=True,
211 collection_types={CollectionType.CHAINED},
212 ):
213 if record == self:
214 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.")
215 if flatten:
216 children = tuple(
217 record.name for record in manager.resolve_wildcard(children_as_wildcard, flatten_chains=True)
218 )
219 # Delegate to derived classes to do the database updates.
220 self._update(manager, children)
221 # Update the reverse mapping (from child to parents) in the manager,
222 # by removing the old relationships and adding back in the new ones.
223 for old_child in self._children:
224 manager._parents_by_child[manager.find(old_child).key].discard(self.key)
225 for new_child in children:
226 manager._parents_by_child[manager.find(new_child).key].add(self.key)
227 # Actually set this instances sequence of children.
228 self._children = children
230 def refresh(self, manager: CollectionManager) -> None:
231 """Load children from the database, using the given manager to resolve
232 collection primary key values into records.
234 This method exists to ensure that all collections that may appear in a
235 chain are known to the manager before any particular chain tries to
236 retrieve their records from it. `ChainedCollectionRecord` subclasses
237 can rely on it being called sometime after their own ``__init__`` to
238 finish construction.
240 Parameters
241 ----------
242 manager : `CollectionManager`
243 The object that manages this records instance and all records
244 instances that may appear as its children.
245 """
246 # Clear out the old reverse mapping (from child to parents).
247 for child in self._children:
248 manager._parents_by_child[manager.find(child).key].discard(self.key)
249 self._children = self._load(manager)
250 # Update the reverse mapping (from child to parents) in the manager.
251 for child in self._children:
252 manager._parents_by_child[manager.find(child).key].add(self.key)
254 @abstractmethod
255 def _update(self, manager: CollectionManager, children: tuple[str, ...]) -> None:
256 """Protected implementation hook for `update`.
258 This method should be implemented by subclasses to update the database
259 to reflect the children given. It should never be called by anything
260 other than `update`, which should be used by all external code.
262 Parameters
263 ----------
264 manager : `CollectionManager`
265 The object that manages this records instance and all records
266 instances that may appear as its children.
267 children : `tuple` [ `str` ]
268 A collection search path that should be resolved to set the child
269 collections of this chain. Guaranteed not to contain cycles.
270 """
271 raise NotImplementedError()
273 @abstractmethod
274 def _load(self, manager: CollectionManager) -> tuple[str, ...]:
275 """Protected implementation hook for `refresh`.
277 This method should be implemented by subclasses to retrieve the chain's
278 child collections from the database and return them. It should never
279 be called by anything other than `refresh`, which should be used by all
280 external code.
282 Parameters
283 ----------
284 manager : `CollectionManager`
285 The object that manages this records instance and all records
286 instances that may appear as its children.
288 Returns
289 -------
290 children : `tuple` [ `str` ]
291 The ordered sequence of collection names that defines the chained
292 collection. Guaranteed not to contain cycles.
293 """
294 raise NotImplementedError()
296 def __repr__(self) -> str:
297 return f"ChainedCollectionRecord(key={self.key!r}, name={self.name!r}, children={self.children!r})"
300class CollectionManager(VersionedExtension):
301 """An interface for managing the collections (including runs) in a
302 `Registry`.
304 Notes
305 -----
306 Each layer in a multi-layer `Registry` has its own record for any
307 collection for which it has datasets (or quanta). Different layers may
308 use different IDs for the same collection, so any usage of the IDs
309 obtained through the `CollectionManager` APIs are strictly for internal
310 (to `Registry`) use.
311 """
313 def __init__(self, *, registry_schema_version: VersionTuple | None = None) -> None:
314 super().__init__(registry_schema_version=registry_schema_version)
315 self._parents_by_child: defaultdict[Any, set[Any]] = defaultdict(set)
317 @classmethod
318 @abstractmethod
319 def initialize(
320 cls,
321 db: Database,
322 context: StaticTablesContext,
323 *,
324 dimensions: DimensionRecordStorageManager,
325 registry_schema_version: VersionTuple | None = None,
326 ) -> CollectionManager:
327 """Construct an instance of the manager.
329 Parameters
330 ----------
331 db : `Database`
332 Interface to the underlying database engine and namespace.
333 context : `StaticTablesContext`
334 Context object obtained from `Database.declareStaticTables`; used
335 to declare any tables that should always be present in a layer
336 implemented with this manager.
337 dimensions : `DimensionRecordStorageManager`
338 Manager object for the dimensions in this `Registry`.
339 registry_schema_version : `VersionTuple` or `None`
340 Schema version of this extension as defined in registry.
342 Returns
343 -------
344 manager : `CollectionManager`
345 An instance of a concrete `CollectionManager` subclass.
346 """
347 raise NotImplementedError()
349 @classmethod
350 @abstractmethod
351 def addCollectionForeignKey(
352 cls,
353 tableSpec: ddl.TableSpec,
354 *,
355 prefix: str = "collection",
356 onDelete: str | None = None,
357 constraint: bool = True,
358 **kwargs: Any,
359 ) -> ddl.FieldSpec:
360 """Add a foreign key (field and constraint) referencing the collection
361 table.
363 Parameters
364 ----------
365 tableSpec : `ddl.TableSpec`
366 Specification for the table that should reference the collection
367 table. Will be modified in place.
368 prefix: `str`, optional
369 A name to use for the prefix of the new field; the full name may
370 have a suffix (and is given in the returned `ddl.FieldSpec`).
371 onDelete: `str`, optional
372 One of "CASCADE" or "SET NULL", indicating what should happen to
373 the referencing row if the collection row is deleted. `None`
374 indicates that this should be an integrity error.
375 constraint: `bool`, optional
376 If `False` (`True` is default), add a field that can be joined to
377 the collection primary key, but do not add a foreign key
378 constraint.
379 **kwargs
380 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
381 constructor (only the ``name`` and ``dtype`` arguments are
382 otherwise provided).
384 Returns
385 -------
386 fieldSpec : `ddl.FieldSpec`
387 Specification for the field being added.
388 """
389 raise NotImplementedError()
391 @classmethod
392 @abstractmethod
393 def addRunForeignKey(
394 cls,
395 tableSpec: ddl.TableSpec,
396 *,
397 prefix: str = "run",
398 onDelete: str | None = None,
399 constraint: bool = True,
400 **kwargs: Any,
401 ) -> ddl.FieldSpec:
402 """Add a foreign key (field and constraint) referencing the run
403 table.
405 Parameters
406 ----------
407 tableSpec : `ddl.TableSpec`
408 Specification for the table that should reference the run table.
409 Will be modified in place.
410 prefix: `str`, optional
411 A name to use for the prefix of the new field; the full name may
412 have a suffix (and is given in the returned `ddl.FieldSpec`).
413 onDelete: `str`, optional
414 One of "CASCADE" or "SET NULL", indicating what should happen to
415 the referencing row if the collection row is deleted. `None`
416 indicates that this should be an integrity error.
417 constraint: `bool`, optional
418 If `False` (`True` is default), add a field that can be joined to
419 the run primary key, but do not add a foreign key constraint.
420 **kwargs
421 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
422 constructor (only the ``name`` and ``dtype`` arguments are
423 otherwise provided).
425 Returns
426 -------
427 fieldSpec : `ddl.FieldSpec`
428 Specification for the field being added.
429 """
430 raise NotImplementedError()
432 @classmethod
433 @abstractmethod
434 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
435 """Return the name of the field added by `addCollectionForeignKey`
436 if called with the same prefix.
438 Parameters
439 ----------
440 prefix : `str`
441 A name to use for the prefix of the new field; the full name may
442 have a suffix.
444 Returns
445 -------
446 name : `str`
447 The field name.
448 """
449 raise NotImplementedError()
451 @classmethod
452 @abstractmethod
453 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
454 """Return the name of the field added by `addRunForeignKey`
455 if called with the same prefix.
457 Parameters
458 ----------
459 prefix : `str`
460 A name to use for the prefix of the new field; the full name may
461 have a suffix.
463 Returns
464 -------
465 name : `str`
466 The field name.
467 """
468 raise NotImplementedError()
470 @abstractmethod
471 def refresh(self) -> None:
472 """Ensure all other operations on this manager are aware of any
473 collections that may have been registered by other clients since it
474 was initialized or last refreshed.
475 """
476 raise NotImplementedError()
478 @abstractmethod
479 def register(
480 self, name: str, type: CollectionType, doc: str | None = None
481 ) -> tuple[CollectionRecord, bool]:
482 """Ensure that a collection of the given name and type are present
483 in the layer this manager is associated with.
485 Parameters
486 ----------
487 name : `str`
488 Name of the collection.
489 type : `CollectionType`
490 Enumeration value indicating the type of collection.
491 doc : `str`, optional
492 Documentation string for the collection. Ignored if the collection
493 already exists.
495 Returns
496 -------
497 record : `CollectionRecord`
498 Object representing the collection, including its type and ID.
499 If ``type is CollectionType.RUN``, this will be a `RunRecord`
500 instance. If ``type is CollectionType.CHAIN``, this will be a
501 `ChainedCollectionRecord` instance.
502 registered : `bool`
503 True if the collection was registered, `False` if it already
504 existed.
506 Raises
507 ------
508 TransactionInterruption
509 Raised if this operation is invoked within a `Database.transaction`
510 context.
511 DatabaseConflictError
512 Raised if a collection with this name but a different type already
513 exists.
515 Notes
516 -----
517 Concurrent registrations of the same collection should be safe; nothing
518 should happen if the types are consistent, and integrity errors due to
519 inconsistent types should happen before any database changes are made.
520 """
521 raise NotImplementedError()
523 @abstractmethod
524 def remove(self, name: str) -> None:
525 """Completely remove a collection.
527 Any existing `CollectionRecord` objects that correspond to the removed
528 collection are considered invalidated.
530 Parameters
531 ----------
532 name : `str`
533 Name of the collection to remove.
535 Notes
536 -----
537 If this collection is referenced by foreign keys in tables managed by
538 other objects, the ON DELETE clauses of those tables will be invoked.
539 That will frequently delete many dependent rows automatically (via
540 "CASCADE", but it may also cause this operation to fail (with rollback)
541 unless dependent rows that do not have an ON DELETE clause are removed
542 first.
543 """
544 raise NotImplementedError()
546 @abstractmethod
547 def find(self, name: str) -> CollectionRecord:
548 """Return the collection record associated with the given name.
550 Parameters
551 ----------
552 name : `str`
553 Name of the collection.
555 Returns
556 -------
557 record : `CollectionRecord`
558 Object representing the collection, including its type and ID.
559 If ``record.type is CollectionType.RUN``, this will be a
560 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
561 this will be a `ChainedCollectionRecord` instance.
563 Raises
564 ------
565 MissingCollectionError
566 Raised if the given collection does not exist.
568 Notes
569 -----
570 Collections registered by another client of the same layer since the
571 last call to `initialize` or `refresh` may not be found.
572 """
573 raise NotImplementedError()
575 @abstractmethod
576 def __getitem__(self, key: Any) -> CollectionRecord:
577 """Return the collection record associated with the given
578 primary/foreign key value.
580 Parameters
581 ----------
582 key
583 Internal primary key value for the collection.
585 Returns
586 -------
587 record : `CollectionRecord`
588 Object representing the collection, including its type and name.
589 If ``record.type is CollectionType.RUN``, this will be a
590 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
591 this will be a `ChainedCollectionRecord` instance.
593 Raises
594 ------
595 MissingCollectionError
596 Raised if no collection with this key exists.
598 Notes
599 -----
600 Collections registered by another client of the same layer since the
601 last call to `initialize` or `refresh` may not be found.
602 """
603 raise NotImplementedError()
605 @abstractmethod
606 def resolve_wildcard(
607 self,
608 wildcard: CollectionWildcard,
609 *,
610 collection_types: Set[CollectionType] = CollectionType.all(),
611 done: set[str] | None = None,
612 flatten_chains: bool = True,
613 include_chains: bool | None = None,
614 ) -> list[CollectionRecord]:
615 """Iterate over collection records that match a wildcard.
617 Parameters
618 ----------
619 wildcard : `CollectionWildcard`
620 Names and/or patterns for collections.
621 collection_types : `collections.abc.Set` [ `CollectionType` ], optional
622 If provided, only yield collections of these types.
623 done : `set` [ `str` ], optional
624 A `set` of collection names that will not be returned (presumably
625 because they have already been returned in some higher-level logic)
626 that will also be updated with the names of the collections
627 returned.
628 flatten_chains : `bool`, optional
629 If `True` (default) recursively yield the child collections of
630 `~CollectionType.CHAINED` collections.
631 include_chains : `bool`, optional
632 If `False`, return records for `~CollectionType.CHAINED`
633 collections themselves. The default is the opposite of
634 ``flattenChains``: either return records for CHAINED collections or
635 their children, but not both.
637 Returns
638 -------
639 records : `list` [ `CollectionRecord` ]
640 Matching collection records.
641 """
642 raise NotImplementedError()
644 @abstractmethod
645 def getDocumentation(self, key: Any) -> str | None:
646 """Retrieve the documentation string for a collection.
648 Parameters
649 ----------
650 key
651 Internal primary key value for the collection.
653 Returns
654 -------
655 docs : `str` or `None`
656 Docstring for the collection with the given key.
657 """
658 raise NotImplementedError()
660 @abstractmethod
661 def setDocumentation(self, key: Any, doc: str | None) -> None:
662 """Set the documentation string for a collection.
664 Parameters
665 ----------
666 key
667 Internal primary key value for the collection.
668 docs : `str`, optional
669 Docstring for the collection with the given key.
670 """
671 raise NotImplementedError()
673 def getParentChains(self, key: Any) -> Iterator[ChainedCollectionRecord]:
674 """Find all CHAINED collections that directly contain the given
675 collection.
677 Parameters
678 ----------
679 key
680 Internal primary key value for the collection.
681 """
682 for parent_key in self._parents_by_child[key]:
683 result = self[parent_key]
684 assert isinstance(result, ChainedCollectionRecord)
685 yield result