Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py: 51%
136 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-07 02:05 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-07 02:05 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "ChainedCollectionRecord",
25 "CollectionManager",
26 "CollectionRecord",
27 "RunRecord",
28]
30from abc import abstractmethod
31from collections import defaultdict
32from collections.abc import Iterator, Set
33from typing import TYPE_CHECKING, Any
35from ...core import DimensionUniverse, Timespan, ddl
36from .._collectionType import CollectionType
37from ..wildcards import CollectionWildcard
38from ._versioning import VersionedExtension
40if TYPE_CHECKING: 40 ↛ 41line 40 didn't jump to line 41, because the condition on line 40 was never true
41 from ._database import Database, StaticTablesContext
42 from ._dimensions import DimensionRecordStorageManager
45class CollectionRecord:
46 """A struct used to represent a collection in internal `Registry` APIs.
48 User-facing code should always just use a `str` to represent collections.
50 Parameters
51 ----------
52 key
53 Unique collection ID, can be the same as ``name`` if ``name`` is used
54 for identification. Usually this is an integer or string, but can be
55 other database-specific type.
56 name : `str`
57 Name of the collection.
58 type : `CollectionType`
59 Enumeration value describing the type of the collection.
61 Notes
62 -----
63 The `name`, `key`, and `type` attributes set by the base class should be
64 considered immutable by all users and derived classes (as these are used
65 in the definition of equality and this is a hashable type). Other
66 attributes defined by subclasses may be mutable, as long as they do not
67 participate in some subclass equality definition.
68 """
70 def __init__(self, key: Any, name: str, type: CollectionType):
71 self.key = key
72 self.name = name
73 self.type = type
74 assert isinstance(self.type, CollectionType)
76 name: str
77 """Name of the collection (`str`).
78 """
80 key: Any
81 """The primary/foreign key value for this collection.
82 """
84 type: CollectionType
85 """Enumeration value describing the type of the collection
86 (`CollectionType`).
87 """
89 def __eq__(self, other: Any) -> bool:
90 try:
91 return self.name == other.name and self.type == other.type and self.key == other.key
92 except AttributeError:
93 return NotImplemented
95 def __hash__(self) -> int:
96 return hash(self.name)
98 def __repr__(self) -> str:
99 return f"CollectionRecord(key={self.key!r}, name={self.name!r}, type={self.type!r})"
101 def __str__(self) -> str:
102 return self.name
105class RunRecord(CollectionRecord):
106 """A subclass of `CollectionRecord` that adds execution information and
107 an interface for updating it.
108 """
110 @abstractmethod
111 def update(self, host: str | None = None, timespan: Timespan | None = None) -> None:
112 """Update the database record for this run with new execution
113 information.
115 Values not provided will set to ``NULL`` in the database, not ignored.
117 Parameters
118 ----------
119 host : `str`, optional
120 Name of the host or system on which this run was produced.
121 Detailed form to be set by higher-level convention; from the
122 `Registry` perspective, this is an entirely opaque value.
123 timespan : `Timespan`, optional
124 Begin and end timestamps for the period over which the run was
125 produced. `None`/``NULL`` values are interpreted as infinite
126 bounds.
127 """
128 raise NotImplementedError()
130 @property
131 @abstractmethod
132 def host(self) -> str | None:
133 """Return the name of the host or system on which this run was
134 produced (`str` or `None`).
135 """
136 raise NotImplementedError()
138 @property
139 @abstractmethod
140 def timespan(self) -> Timespan:
141 """Begin and end timestamps for the period over which the run was
142 produced. `None`/``NULL`` values are interpreted as infinite
143 bounds.
144 """
145 raise NotImplementedError()
147 def __repr__(self) -> str:
148 return f"RunRecord(key={self.key!r}, name={self.name!r})"
151class ChainedCollectionRecord(CollectionRecord):
152 """A subclass of `CollectionRecord` that adds the list of child collections
153 in a ``CHAINED`` collection.
155 Parameters
156 ----------
157 key
158 Unique collection ID, can be the same as ``name`` if ``name`` is used
159 for identification. Usually this is an integer or string, but can be
160 other database-specific type.
161 name : `str`
162 Name of the collection.
163 """
165 def __init__(self, key: Any, name: str, universe: DimensionUniverse):
166 super().__init__(key=key, name=name, type=CollectionType.CHAINED)
167 self._children: tuple[str, ...] = ()
169 @property
170 def children(self) -> tuple[str, ...]:
171 """The ordered search path of child collections that define this chain
172 (`tuple` [ `str` ]).
173 """
174 return self._children
176 def update(self, manager: CollectionManager, children: tuple[str, ...], flatten: bool) -> None:
177 """Redefine this chain to search the given child collections.
179 This method should be used by all external code to set children. It
180 delegates to `_update`, which is what should be overridden by
181 subclasses.
183 Parameters
184 ----------
185 manager : `CollectionManager`
186 The object that manages this records instance and all records
187 instances that may appear as its children.
188 children : `tuple` [ `str` ]
189 A collection search path that should be resolved to set the child
190 collections of this chain.
191 flatten : `bool`
192 If `True`, recursively flatten out any nested
193 `~CollectionType.CHAINED` collections in ``children`` first.
195 Raises
196 ------
197 ValueError
198 Raised when the child collections contain a cycle.
199 """
200 children_as_wildcard = CollectionWildcard.from_names(children)
201 for record in manager.resolve_wildcard(
202 children_as_wildcard,
203 flatten_chains=True,
204 include_chains=True,
205 collection_types={CollectionType.CHAINED},
206 ):
207 if record == self:
208 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.")
209 if flatten:
210 children = tuple(
211 record.name for record in manager.resolve_wildcard(children_as_wildcard, flatten_chains=True)
212 )
213 # Delegate to derived classes to do the database updates.
214 self._update(manager, children)
215 # Update the reverse mapping (from child to parents) in the manager,
216 # by removing the old relationships and adding back in the new ones.
217 for old_child in self._children:
218 manager._parents_by_child[manager.find(old_child).key].discard(self.key)
219 for new_child in children:
220 manager._parents_by_child[manager.find(new_child).key].add(self.key)
221 # Actually set this instances sequence of children.
222 self._children = children
224 def refresh(self, manager: CollectionManager) -> None:
225 """Load children from the database, using the given manager to resolve
226 collection primary key values into records.
228 This method exists to ensure that all collections that may appear in a
229 chain are known to the manager before any particular chain tries to
230 retrieve their records from it. `ChainedCollectionRecord` subclasses
231 can rely on it being called sometime after their own ``__init__`` to
232 finish construction.
234 Parameters
235 ----------
236 manager : `CollectionManager`
237 The object that manages this records instance and all records
238 instances that may appear as its children.
239 """
240 # Clear out the old reverse mapping (from child to parents).
241 for child in self._children:
242 manager._parents_by_child[manager.find(child).key].discard(self.key)
243 self._children = self._load(manager)
244 # Update the reverse mapping (from child to parents) in the manager.
245 for child in self._children:
246 manager._parents_by_child[manager.find(child).key].add(self.key)
248 @abstractmethod
249 def _update(self, manager: CollectionManager, children: tuple[str, ...]) -> None:
250 """Protected implementation hook for `update`.
252 This method should be implemented by subclasses to update the database
253 to reflect the children given. It should never be called by anything
254 other than `update`, which should be used by all external code.
256 Parameters
257 ----------
258 manager : `CollectionManager`
259 The object that manages this records instance and all records
260 instances that may appear as its children.
261 children : `tuple` [ `str` ]
262 A collection search path that should be resolved to set the child
263 collections of this chain. Guaranteed not to contain cycles.
264 """
265 raise NotImplementedError()
267 @abstractmethod
268 def _load(self, manager: CollectionManager) -> tuple[str, ...]:
269 """Protected implementation hook for `refresh`.
271 This method should be implemented by subclasses to retrieve the chain's
272 child collections from the database and return them. It should never
273 be called by anything other than `refresh`, which should be used by all
274 external code.
276 Parameters
277 ----------
278 manager : `CollectionManager`
279 The object that manages this records instance and all records
280 instances that may appear as its children.
282 Returns
283 -------
284 children : `tuple` [ `str` ]
285 The ordered sequence of collection names that defines the chained
286 collection. Guaranteed not to contain cycles.
287 """
288 raise NotImplementedError()
290 def __repr__(self) -> str:
291 return f"ChainedCollectionRecord(key={self.key!r}, name={self.name!r}, children={self.children!r})"
294class CollectionManager(VersionedExtension):
295 """An interface for managing the collections (including runs) in a
296 `Registry`.
298 Notes
299 -----
300 Each layer in a multi-layer `Registry` has its own record for any
301 collection for which it has datasets (or quanta). Different layers may
302 use different IDs for the same collection, so any usage of the IDs
303 obtained through the `CollectionManager` APIs are strictly for internal
304 (to `Registry`) use.
305 """
307 def __init__(self) -> None:
308 self._parents_by_child: defaultdict[Any, set[Any]] = defaultdict(set)
310 @classmethod
311 @abstractmethod
312 def initialize(
313 cls, db: Database, context: StaticTablesContext, *, dimensions: DimensionRecordStorageManager
314 ) -> CollectionManager:
315 """Construct an instance of the manager.
317 Parameters
318 ----------
319 db : `Database`
320 Interface to the underlying database engine and namespace.
321 context : `StaticTablesContext`
322 Context object obtained from `Database.declareStaticTables`; used
323 to declare any tables that should always be present in a layer
324 implemented with this manager.
325 dimensions : `DimensionRecordStorageManager`
326 Manager object for the dimensions in this `Registry`.
328 Returns
329 -------
330 manager : `CollectionManager`
331 An instance of a concrete `CollectionManager` subclass.
332 """
333 raise NotImplementedError()
335 @classmethod
336 @abstractmethod
337 def addCollectionForeignKey(
338 cls,
339 tableSpec: ddl.TableSpec,
340 *,
341 prefix: str = "collection",
342 onDelete: str | None = None,
343 constraint: bool = True,
344 **kwargs: Any,
345 ) -> ddl.FieldSpec:
346 """Add a foreign key (field and constraint) referencing the collection
347 table.
349 Parameters
350 ----------
351 tableSpec : `ddl.TableSpec`
352 Specification for the table that should reference the collection
353 table. Will be modified in place.
354 prefix: `str`, optional
355 A name to use for the prefix of the new field; the full name may
356 have a suffix (and is given in the returned `ddl.FieldSpec`).
357 onDelete: `str`, optional
358 One of "CASCADE" or "SET NULL", indicating what should happen to
359 the referencing row if the collection row is deleted. `None`
360 indicates that this should be an integrity error.
361 constraint: `bool`, optional
362 If `False` (`True` is default), add a field that can be joined to
363 the collection primary key, but do not add a foreign key
364 constraint.
365 **kwargs
366 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
367 constructor (only the ``name`` and ``dtype`` arguments are
368 otherwise provided).
370 Returns
371 -------
372 fieldSpec : `ddl.FieldSpec`
373 Specification for the field being added.
374 """
375 raise NotImplementedError()
377 @classmethod
378 @abstractmethod
379 def addRunForeignKey(
380 cls,
381 tableSpec: ddl.TableSpec,
382 *,
383 prefix: str = "run",
384 onDelete: str | None = None,
385 constraint: bool = True,
386 **kwargs: Any,
387 ) -> ddl.FieldSpec:
388 """Add a foreign key (field and constraint) referencing the run
389 table.
391 Parameters
392 ----------
393 tableSpec : `ddl.TableSpec`
394 Specification for the table that should reference the run table.
395 Will be modified in place.
396 prefix: `str`, optional
397 A name to use for the prefix of the new field; the full name may
398 have a suffix (and is given in the returned `ddl.FieldSpec`).
399 onDelete: `str`, optional
400 One of "CASCADE" or "SET NULL", indicating what should happen to
401 the referencing row if the collection row is deleted. `None`
402 indicates that this should be an integrity error.
403 constraint: `bool`, optional
404 If `False` (`True` is default), add a field that can be joined to
405 the run primary key, but do not add a foreign key constraint.
406 **kwargs
407 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
408 constructor (only the ``name`` and ``dtype`` arguments are
409 otherwise provided).
411 Returns
412 -------
413 fieldSpec : `ddl.FieldSpec`
414 Specification for the field being added.
415 """
416 raise NotImplementedError()
418 @classmethod
419 @abstractmethod
420 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
421 """Return the name of the field added by `addCollectionForeignKey`
422 if called with the same prefix.
424 Parameters
425 ----------
426 prefix : `str`
427 A name to use for the prefix of the new field; the full name may
428 have a suffix.
430 Returns
431 -------
432 name : `str`
433 The field name.
434 """
435 raise NotImplementedError()
437 @classmethod
438 @abstractmethod
439 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
440 """Return the name of the field added by `addRunForeignKey`
441 if called with the same prefix.
443 Parameters
444 ----------
445 prefix : `str`
446 A name to use for the prefix of the new field; the full name may
447 have a suffix.
449 Returns
450 -------
451 name : `str`
452 The field name.
453 """
454 raise NotImplementedError()
456 @abstractmethod
457 def refresh(self) -> None:
458 """Ensure all other operations on this manager are aware of any
459 collections that may have been registered by other clients since it
460 was initialized or last refreshed.
461 """
462 raise NotImplementedError()
464 @abstractmethod
465 def register(
466 self, name: str, type: CollectionType, doc: str | None = None
467 ) -> tuple[CollectionRecord, bool]:
468 """Ensure that a collection of the given name and type are present
469 in the layer this manager is associated with.
471 Parameters
472 ----------
473 name : `str`
474 Name of the collection.
475 type : `CollectionType`
476 Enumeration value indicating the type of collection.
477 doc : `str`, optional
478 Documentation string for the collection. Ignored if the collection
479 already exists.
481 Returns
482 -------
483 record : `CollectionRecord`
484 Object representing the collection, including its type and ID.
485 If ``type is CollectionType.RUN``, this will be a `RunRecord`
486 instance. If ``type is CollectionType.CHAIN``, this will be a
487 `ChainedCollectionRecord` instance.
488 registered : `bool`
489 True if the collection was registered, `False` if it already
490 existed.
492 Raises
493 ------
494 TransactionInterruption
495 Raised if this operation is invoked within a `Database.transaction`
496 context.
497 DatabaseConflictError
498 Raised if a collection with this name but a different type already
499 exists.
501 Notes
502 -----
503 Concurrent registrations of the same collection should be safe; nothing
504 should happen if the types are consistent, and integrity errors due to
505 inconsistent types should happen before any database changes are made.
506 """
507 raise NotImplementedError()
509 @abstractmethod
510 def remove(self, name: str) -> None:
511 """Completely remove a collection.
513 Any existing `CollectionRecord` objects that correspond to the removed
514 collection are considered invalidated.
516 Parameters
517 ----------
518 name : `str`
519 Name of the collection to remove.
521 Notes
522 -----
523 If this collection is referenced by foreign keys in tables managed by
524 other objects, the ON DELETE clauses of those tables will be invoked.
525 That will frequently delete many dependent rows automatically (via
526 "CASCADE", but it may also cause this operation to fail (with rollback)
527 unless dependent rows that do not have an ON DELETE clause are removed
528 first.
529 """
530 raise NotImplementedError()
532 @abstractmethod
533 def find(self, name: str) -> CollectionRecord:
534 """Return the collection record associated with the given name.
536 Parameters
537 ----------
538 name : `str`
539 Name of the collection.
541 Returns
542 -------
543 record : `CollectionRecord`
544 Object representing the collection, including its type and ID.
545 If ``record.type is CollectionType.RUN``, this will be a
546 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
547 this will be a `ChainedCollectionRecord` instance.
549 Raises
550 ------
551 MissingCollectionError
552 Raised if the given collection does not exist.
554 Notes
555 -----
556 Collections registered by another client of the same layer since the
557 last call to `initialize` or `refresh` may not be found.
558 """
559 raise NotImplementedError()
561 @abstractmethod
562 def __getitem__(self, key: Any) -> CollectionRecord:
563 """Return the collection record associated with the given
564 primary/foreign key value.
566 Parameters
567 ----------
568 key
569 Internal primary key value for the collection.
571 Returns
572 -------
573 record : `CollectionRecord`
574 Object representing the collection, including its type and name.
575 If ``record.type is CollectionType.RUN``, this will be a
576 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
577 this will be a `ChainedCollectionRecord` instance.
579 Raises
580 ------
581 MissingCollectionError
582 Raised if no collection with this key exists.
584 Notes
585 -----
586 Collections registered by another client of the same layer since the
587 last call to `initialize` or `refresh` may not be found.
588 """
589 raise NotImplementedError()
591 @abstractmethod
592 def resolve_wildcard(
593 self,
594 wildcard: CollectionWildcard,
595 *,
596 collection_types: Set[CollectionType] = CollectionType.all(),
597 done: set[str] | None = None,
598 flatten_chains: bool = True,
599 include_chains: bool | None = None,
600 ) -> list[CollectionRecord]:
601 """Iterate over collection records that match a wildcard.
603 Parameters
604 ----------
605 wildcard : `CollectionWildcard`
606 Names and/or patterns for collections.
607 collection_types : `collections.abc.Set` [ `CollectionType` ], optional
608 If provided, only yield collections of these types.
609 done : `set` [ `str` ], optional
610 A `set` of collection names that will not be returned (presumably
611 because they have already been returned in some higher-level logic)
612 that will also be updated with the names of the collections
613 returned.
614 flatten_chains : `bool`, optional
615 If `True` (default) recursively yield the child collections of
616 `~CollectionType.CHAINED` collections.
617 include_chains : `bool`, optional
618 If `False`, return records for `~CollectionType.CHAINED`
619 collections themselves. The default is the opposite of
620 ``flattenChains``: either return records for CHAINED collections or
621 their children, but not both.
623 Returns
624 -------
625 records : `list` [ `CollectionRecord` ]
626 Matching collection records.
627 """
628 raise NotImplementedError()
630 @abstractmethod
631 def getDocumentation(self, key: Any) -> str | None:
632 """Retrieve the documentation string for a collection.
634 Parameters
635 ----------
636 key
637 Internal primary key value for the collection.
639 Returns
640 -------
641 docs : `str` or `None`
642 Docstring for the collection with the given key.
643 """
644 raise NotImplementedError()
646 @abstractmethod
647 def setDocumentation(self, key: Any, doc: str | None) -> None:
648 """Set the documentation string for a collection.
650 Parameters
651 ----------
652 key
653 Internal primary key value for the collection.
654 docs : `str`, optional
655 Docstring for the collection with the given key.
656 """
657 raise NotImplementedError()
659 def getParentChains(self, key: Any) -> Iterator[ChainedCollectionRecord]:
660 """Find all CHAINED collections that directly contain the given
661 collection.
663 Parameters
664 ----------
665 key
666 Internal primary key value for the collection.
667 """
668 for parent_key in self._parents_by_child[key]:
669 result = self[parent_key]
670 assert isinstance(result, ChainedCollectionRecord)
671 yield result