Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py: 57%
116 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-18 09:13 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-18 09:13 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "ChainedCollectionRecord",
25 "CollectionManager",
26 "CollectionRecord",
27 "RunRecord",
28]
30from abc import abstractmethod
31from collections import defaultdict
32from collections.abc import Iterator, Set
33from typing import TYPE_CHECKING, Any
35from ...core import DimensionUniverse, Timespan, ddl
36from .._collectionType import CollectionType
37from ..wildcards import CollectionWildcard
38from ._versioning import VersionedExtension, VersionTuple
40if TYPE_CHECKING:
41 from ._database import Database, StaticTablesContext
42 from ._dimensions import DimensionRecordStorageManager
45class CollectionRecord:
46 """A struct used to represent a collection in internal `Registry` APIs.
48 User-facing code should always just use a `str` to represent collections.
50 Parameters
51 ----------
52 key
53 Unique collection ID, can be the same as ``name`` if ``name`` is used
54 for identification. Usually this is an integer or string, but can be
55 other database-specific type.
56 name : `str`
57 Name of the collection.
58 type : `CollectionType`
59 Enumeration value describing the type of the collection.
61 Notes
62 -----
63 The `name`, `key`, and `type` attributes set by the base class should be
64 considered immutable by all users and derived classes (as these are used
65 in the definition of equality and this is a hashable type). Other
66 attributes defined by subclasses may be mutable, as long as they do not
67 participate in some subclass equality definition.
68 """
70 def __init__(self, key: Any, name: str, type: CollectionType):
71 self.key = key
72 self.name = name
73 self.type = type
74 assert isinstance(self.type, CollectionType)
76 name: str
77 """Name of the collection (`str`).
78 """
80 key: Any
81 """The primary/foreign key value for this collection.
82 """
84 type: CollectionType
85 """Enumeration value describing the type of the collection
86 (`CollectionType`).
87 """
89 def __eq__(self, other: Any) -> bool:
90 try:
91 return self.name == other.name and self.type == other.type and self.key == other.key
92 except AttributeError:
93 return NotImplemented
95 def __hash__(self) -> int:
96 return hash(self.name)
98 def __repr__(self) -> str:
99 return f"CollectionRecord(key={self.key!r}, name={self.name!r}, type={self.type!r})"
101 def __str__(self) -> str:
102 return self.name
105class RunRecord(CollectionRecord):
106 """A subclass of `CollectionRecord` that adds execution information and
107 an interface for updating it.
108 """
110 @abstractmethod
111 def update(self, host: str | None = None, timespan: Timespan | None = None) -> None:
112 """Update the database record for this run with new execution
113 information.
115 Values not provided will set to ``NULL`` in the database, not ignored.
117 Parameters
118 ----------
119 host : `str`, optional
120 Name of the host or system on which this run was produced.
121 Detailed form to be set by higher-level convention; from the
122 `Registry` perspective, this is an entirely opaque value.
123 timespan : `Timespan`, optional
124 Begin and end timestamps for the period over which the run was
125 produced. `None`/``NULL`` values are interpreted as infinite
126 bounds.
127 """
128 raise NotImplementedError()
130 @property
131 @abstractmethod
132 def host(self) -> str | None:
133 """Return the name of the host or system on which this run was
134 produced (`str` or `None`).
135 """
136 raise NotImplementedError()
138 @property
139 @abstractmethod
140 def timespan(self) -> Timespan:
141 """Begin and end timestamps for the period over which the run was
142 produced. `None`/``NULL`` values are interpreted as infinite
143 bounds.
144 """
145 raise NotImplementedError()
147 def __repr__(self) -> str:
148 return f"RunRecord(key={self.key!r}, name={self.name!r})"
151class ChainedCollectionRecord(CollectionRecord):
152 """A subclass of `CollectionRecord` that adds the list of child collections
153 in a ``CHAINED`` collection.
155 Parameters
156 ----------
157 key
158 Unique collection ID, can be the same as ``name`` if ``name`` is used
159 for identification. Usually this is an integer or string, but can be
160 other database-specific type.
161 name : `str`
162 Name of the collection.
163 """
165 def __init__(self, key: Any, name: str, universe: DimensionUniverse):
166 super().__init__(key=key, name=name, type=CollectionType.CHAINED)
167 self._children: tuple[str, ...] = ()
169 @property
170 def children(self) -> tuple[str, ...]:
171 """The ordered search path of child collections that define this chain
172 (`tuple` [ `str` ]).
173 """
174 return self._children
176 def update(self, manager: CollectionManager, children: tuple[str, ...], flatten: bool) -> None:
177 """Redefine this chain to search the given child collections.
179 This method should be used by all external code to set children. It
180 delegates to `_update`, which is what should be overridden by
181 subclasses.
183 Parameters
184 ----------
185 manager : `CollectionManager`
186 The object that manages this records instance and all records
187 instances that may appear as its children.
188 children : `tuple` [ `str` ]
189 A collection search path that should be resolved to set the child
190 collections of this chain.
191 flatten : `bool`
192 If `True`, recursively flatten out any nested
193 `~CollectionType.CHAINED` collections in ``children`` first.
195 Raises
196 ------
197 ValueError
198 Raised when the child collections contain a cycle.
199 """
200 children_as_wildcard = CollectionWildcard.from_names(children)
201 for record in manager.resolve_wildcard(
202 children_as_wildcard,
203 flatten_chains=True,
204 include_chains=True,
205 collection_types={CollectionType.CHAINED},
206 ):
207 if record == self:
208 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.")
209 if flatten:
210 children = tuple(
211 record.name for record in manager.resolve_wildcard(children_as_wildcard, flatten_chains=True)
212 )
213 # Delegate to derived classes to do the database updates.
214 self._update(manager, children)
215 # Update the reverse mapping (from child to parents) in the manager,
216 # by removing the old relationships and adding back in the new ones.
217 for old_child in self._children:
218 manager._parents_by_child[manager.find(old_child).key].discard(self.key)
219 for new_child in children:
220 manager._parents_by_child[manager.find(new_child).key].add(self.key)
221 # Actually set this instances sequence of children.
222 self._children = children
224 def refresh(self, manager: CollectionManager) -> None:
225 """Load children from the database, using the given manager to resolve
226 collection primary key values into records.
228 This method exists to ensure that all collections that may appear in a
229 chain are known to the manager before any particular chain tries to
230 retrieve their records from it. `ChainedCollectionRecord` subclasses
231 can rely on it being called sometime after their own ``__init__`` to
232 finish construction.
234 Parameters
235 ----------
236 manager : `CollectionManager`
237 The object that manages this records instance and all records
238 instances that may appear as its children.
239 """
240 # Clear out the old reverse mapping (from child to parents).
241 for child in self._children:
242 manager._parents_by_child[manager.find(child).key].discard(self.key)
243 self._children = self._load(manager)
244 # Update the reverse mapping (from child to parents) in the manager.
245 for child in self._children:
246 manager._parents_by_child[manager.find(child).key].add(self.key)
248 @abstractmethod
249 def _update(self, manager: CollectionManager, children: tuple[str, ...]) -> None:
250 """Protected implementation hook for `update`.
252 This method should be implemented by subclasses to update the database
253 to reflect the children given. It should never be called by anything
254 other than `update`, which should be used by all external code.
256 Parameters
257 ----------
258 manager : `CollectionManager`
259 The object that manages this records instance and all records
260 instances that may appear as its children.
261 children : `tuple` [ `str` ]
262 A collection search path that should be resolved to set the child
263 collections of this chain. Guaranteed not to contain cycles.
264 """
265 raise NotImplementedError()
267 @abstractmethod
268 def _load(self, manager: CollectionManager) -> tuple[str, ...]:
269 """Protected implementation hook for `refresh`.
271 This method should be implemented by subclasses to retrieve the chain's
272 child collections from the database and return them. It should never
273 be called by anything other than `refresh`, which should be used by all
274 external code.
276 Parameters
277 ----------
278 manager : `CollectionManager`
279 The object that manages this records instance and all records
280 instances that may appear as its children.
282 Returns
283 -------
284 children : `tuple` [ `str` ]
285 The ordered sequence of collection names that defines the chained
286 collection. Guaranteed not to contain cycles.
287 """
288 raise NotImplementedError()
290 def __repr__(self) -> str:
291 return f"ChainedCollectionRecord(key={self.key!r}, name={self.name!r}, children={self.children!r})"
294class CollectionManager(VersionedExtension):
295 """An interface for managing the collections (including runs) in a
296 `Registry`.
298 Notes
299 -----
300 Each layer in a multi-layer `Registry` has its own record for any
301 collection for which it has datasets (or quanta). Different layers may
302 use different IDs for the same collection, so any usage of the IDs
303 obtained through the `CollectionManager` APIs are strictly for internal
304 (to `Registry`) use.
305 """
307 def __init__(self, *, registry_schema_version: VersionTuple | None = None) -> None:
308 super().__init__(registry_schema_version=registry_schema_version)
309 self._parents_by_child: defaultdict[Any, set[Any]] = defaultdict(set)
311 @classmethod
312 @abstractmethod
313 def initialize(
314 cls,
315 db: Database,
316 context: StaticTablesContext,
317 *,
318 dimensions: DimensionRecordStorageManager,
319 registry_schema_version: VersionTuple | None = None,
320 ) -> CollectionManager:
321 """Construct an instance of the manager.
323 Parameters
324 ----------
325 db : `Database`
326 Interface to the underlying database engine and namespace.
327 context : `StaticTablesContext`
328 Context object obtained from `Database.declareStaticTables`; used
329 to declare any tables that should always be present in a layer
330 implemented with this manager.
331 dimensions : `DimensionRecordStorageManager`
332 Manager object for the dimensions in this `Registry`.
333 registry_schema_version : `VersionTuple` or `None`
334 Schema version of this extension as defined in registry.
336 Returns
337 -------
338 manager : `CollectionManager`
339 An instance of a concrete `CollectionManager` subclass.
340 """
341 raise NotImplementedError()
343 @classmethod
344 @abstractmethod
345 def addCollectionForeignKey(
346 cls,
347 tableSpec: ddl.TableSpec,
348 *,
349 prefix: str = "collection",
350 onDelete: str | None = None,
351 constraint: bool = True,
352 **kwargs: Any,
353 ) -> ddl.FieldSpec:
354 """Add a foreign key (field and constraint) referencing the collection
355 table.
357 Parameters
358 ----------
359 tableSpec : `ddl.TableSpec`
360 Specification for the table that should reference the collection
361 table. Will be modified in place.
362 prefix: `str`, optional
363 A name to use for the prefix of the new field; the full name may
364 have a suffix (and is given in the returned `ddl.FieldSpec`).
365 onDelete: `str`, optional
366 One of "CASCADE" or "SET NULL", indicating what should happen to
367 the referencing row if the collection row is deleted. `None`
368 indicates that this should be an integrity error.
369 constraint: `bool`, optional
370 If `False` (`True` is default), add a field that can be joined to
371 the collection primary key, but do not add a foreign key
372 constraint.
373 **kwargs
374 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
375 constructor (only the ``name`` and ``dtype`` arguments are
376 otherwise provided).
378 Returns
379 -------
380 fieldSpec : `ddl.FieldSpec`
381 Specification for the field being added.
382 """
383 raise NotImplementedError()
385 @classmethod
386 @abstractmethod
387 def addRunForeignKey(
388 cls,
389 tableSpec: ddl.TableSpec,
390 *,
391 prefix: str = "run",
392 onDelete: str | None = None,
393 constraint: bool = True,
394 **kwargs: Any,
395 ) -> ddl.FieldSpec:
396 """Add a foreign key (field and constraint) referencing the run
397 table.
399 Parameters
400 ----------
401 tableSpec : `ddl.TableSpec`
402 Specification for the table that should reference the run table.
403 Will be modified in place.
404 prefix: `str`, optional
405 A name to use for the prefix of the new field; the full name may
406 have a suffix (and is given in the returned `ddl.FieldSpec`).
407 onDelete: `str`, optional
408 One of "CASCADE" or "SET NULL", indicating what should happen to
409 the referencing row if the collection row is deleted. `None`
410 indicates that this should be an integrity error.
411 constraint: `bool`, optional
412 If `False` (`True` is default), add a field that can be joined to
413 the run primary key, but do not add a foreign key constraint.
414 **kwargs
415 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
416 constructor (only the ``name`` and ``dtype`` arguments are
417 otherwise provided).
419 Returns
420 -------
421 fieldSpec : `ddl.FieldSpec`
422 Specification for the field being added.
423 """
424 raise NotImplementedError()
426 @classmethod
427 @abstractmethod
428 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
429 """Return the name of the field added by `addCollectionForeignKey`
430 if called with the same prefix.
432 Parameters
433 ----------
434 prefix : `str`
435 A name to use for the prefix of the new field; the full name may
436 have a suffix.
438 Returns
439 -------
440 name : `str`
441 The field name.
442 """
443 raise NotImplementedError()
445 @classmethod
446 @abstractmethod
447 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
448 """Return the name of the field added by `addRunForeignKey`
449 if called with the same prefix.
451 Parameters
452 ----------
453 prefix : `str`
454 A name to use for the prefix of the new field; the full name may
455 have a suffix.
457 Returns
458 -------
459 name : `str`
460 The field name.
461 """
462 raise NotImplementedError()
464 @abstractmethod
465 def refresh(self) -> None:
466 """Ensure all other operations on this manager are aware of any
467 collections that may have been registered by other clients since it
468 was initialized or last refreshed.
469 """
470 raise NotImplementedError()
472 @abstractmethod
473 def register(
474 self, name: str, type: CollectionType, doc: str | None = None
475 ) -> tuple[CollectionRecord, bool]:
476 """Ensure that a collection of the given name and type are present
477 in the layer this manager is associated with.
479 Parameters
480 ----------
481 name : `str`
482 Name of the collection.
483 type : `CollectionType`
484 Enumeration value indicating the type of collection.
485 doc : `str`, optional
486 Documentation string for the collection. Ignored if the collection
487 already exists.
489 Returns
490 -------
491 record : `CollectionRecord`
492 Object representing the collection, including its type and ID.
493 If ``type is CollectionType.RUN``, this will be a `RunRecord`
494 instance. If ``type is CollectionType.CHAIN``, this will be a
495 `ChainedCollectionRecord` instance.
496 registered : `bool`
497 True if the collection was registered, `False` if it already
498 existed.
500 Raises
501 ------
502 TransactionInterruption
503 Raised if this operation is invoked within a `Database.transaction`
504 context.
505 DatabaseConflictError
506 Raised if a collection with this name but a different type already
507 exists.
509 Notes
510 -----
511 Concurrent registrations of the same collection should be safe; nothing
512 should happen if the types are consistent, and integrity errors due to
513 inconsistent types should happen before any database changes are made.
514 """
515 raise NotImplementedError()
517 @abstractmethod
518 def remove(self, name: str) -> None:
519 """Completely remove a collection.
521 Any existing `CollectionRecord` objects that correspond to the removed
522 collection are considered invalidated.
524 Parameters
525 ----------
526 name : `str`
527 Name of the collection to remove.
529 Notes
530 -----
531 If this collection is referenced by foreign keys in tables managed by
532 other objects, the ON DELETE clauses of those tables will be invoked.
533 That will frequently delete many dependent rows automatically (via
534 "CASCADE", but it may also cause this operation to fail (with rollback)
535 unless dependent rows that do not have an ON DELETE clause are removed
536 first.
537 """
538 raise NotImplementedError()
540 @abstractmethod
541 def find(self, name: str) -> CollectionRecord:
542 """Return the collection record associated with the given name.
544 Parameters
545 ----------
546 name : `str`
547 Name of the collection.
549 Returns
550 -------
551 record : `CollectionRecord`
552 Object representing the collection, including its type and ID.
553 If ``record.type is CollectionType.RUN``, this will be a
554 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
555 this will be a `ChainedCollectionRecord` instance.
557 Raises
558 ------
559 MissingCollectionError
560 Raised if the given collection does not exist.
562 Notes
563 -----
564 Collections registered by another client of the same layer since the
565 last call to `initialize` or `refresh` may not be found.
566 """
567 raise NotImplementedError()
569 @abstractmethod
570 def __getitem__(self, key: Any) -> CollectionRecord:
571 """Return the collection record associated with the given
572 primary/foreign key value.
574 Parameters
575 ----------
576 key
577 Internal primary key value for the collection.
579 Returns
580 -------
581 record : `CollectionRecord`
582 Object representing the collection, including its type and name.
583 If ``record.type is CollectionType.RUN``, this will be a
584 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
585 this will be a `ChainedCollectionRecord` instance.
587 Raises
588 ------
589 MissingCollectionError
590 Raised if no collection with this key exists.
592 Notes
593 -----
594 Collections registered by another client of the same layer since the
595 last call to `initialize` or `refresh` may not be found.
596 """
597 raise NotImplementedError()
599 @abstractmethod
600 def resolve_wildcard(
601 self,
602 wildcard: CollectionWildcard,
603 *,
604 collection_types: Set[CollectionType] = CollectionType.all(),
605 done: set[str] | None = None,
606 flatten_chains: bool = True,
607 include_chains: bool | None = None,
608 ) -> list[CollectionRecord]:
609 """Iterate over collection records that match a wildcard.
611 Parameters
612 ----------
613 wildcard : `CollectionWildcard`
614 Names and/or patterns for collections.
615 collection_types : `collections.abc.Set` [ `CollectionType` ], optional
616 If provided, only yield collections of these types.
617 done : `set` [ `str` ], optional
618 A `set` of collection names that will not be returned (presumably
619 because they have already been returned in some higher-level logic)
620 that will also be updated with the names of the collections
621 returned.
622 flatten_chains : `bool`, optional
623 If `True` (default) recursively yield the child collections of
624 `~CollectionType.CHAINED` collections.
625 include_chains : `bool`, optional
626 If `False`, return records for `~CollectionType.CHAINED`
627 collections themselves. The default is the opposite of
628 ``flattenChains``: either return records for CHAINED collections or
629 their children, but not both.
631 Returns
632 -------
633 records : `list` [ `CollectionRecord` ]
634 Matching collection records.
635 """
636 raise NotImplementedError()
638 @abstractmethod
639 def getDocumentation(self, key: Any) -> str | None:
640 """Retrieve the documentation string for a collection.
642 Parameters
643 ----------
644 key
645 Internal primary key value for the collection.
647 Returns
648 -------
649 docs : `str` or `None`
650 Docstring for the collection with the given key.
651 """
652 raise NotImplementedError()
654 @abstractmethod
655 def setDocumentation(self, key: Any, doc: str | None) -> None:
656 """Set the documentation string for a collection.
658 Parameters
659 ----------
660 key
661 Internal primary key value for the collection.
662 docs : `str`, optional
663 Docstring for the collection with the given key.
664 """
665 raise NotImplementedError()
667 def getParentChains(self, key: Any) -> Iterator[ChainedCollectionRecord]:
668 """Find all CHAINED collections that directly contain the given
669 collection.
671 Parameters
672 ----------
673 key
674 Internal primary key value for the collection.
675 """
676 for parent_key in self._parents_by_child[key]:
677 result = self[parent_key]
678 assert isinstance(result, ChainedCollectionRecord)
679 yield result