Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py: 53%
134 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-22 02:05 -0700
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-22 02:05 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "ChainedCollectionRecord",
25 "CollectionManager",
26 "CollectionRecord",
27 "RunRecord",
28]
30from abc import abstractmethod
31from collections import defaultdict
32from typing import TYPE_CHECKING, Any, DefaultDict, Iterator, Optional, Set, Tuple
34from ...core import DimensionUniverse, Timespan, ddl
35from .._collectionType import CollectionType
36from ..wildcards import CollectionSearch
37from ._versioning import VersionedExtension
39if TYPE_CHECKING: 39 ↛ 40line 39 didn't jump to line 40, because the condition on line 39 was never true
40 from ._database import Database, StaticTablesContext
41 from ._dimensions import DimensionRecordStorageManager
44class CollectionRecord:
45 """A struct used to represent a collection in internal `Registry` APIs.
47 User-facing code should always just use a `str` to represent collections.
49 Parameters
50 ----------
51 key
52 Unique collection ID, can be the same as ``name`` if ``name`` is used
53 for identification. Usually this is an integer or string, but can be
54 other database-specific type.
55 name : `str`
56 Name of the collection.
57 type : `CollectionType`
58 Enumeration value describing the type of the collection.
60 Notes
61 -----
62 The `name`, `key`, and `type` attributes set by the base class should be
63 considered immutable by all users and derived classes (as these are used
64 in the definition of equality and this is a hashable type). Other
65 attributes defined by subclasses may be mutable, as long as they do not
66 participate in some subclass equality definition.
67 """
69 def __init__(self, key: Any, name: str, type: CollectionType):
70 self.key = key
71 self.name = name
72 self.type = type
73 assert isinstance(self.type, CollectionType)
75 name: str
76 """Name of the collection (`str`).
77 """
79 key: Any
80 """The primary/foreign key value for this collection.
81 """
83 type: CollectionType
84 """Enumeration value describing the type of the collection
85 (`CollectionType`).
86 """
88 def __eq__(self, other: Any) -> bool:
89 try:
90 return self.name == other.name and self.type == other.type and self.key == other.key
91 except AttributeError:
92 return NotImplemented
94 def __hash__(self) -> int:
95 return hash(self.name)
97 def __repr__(self) -> str:
98 return f"CollectionRecord(key={self.key!r}, name={self.name!r}, type={self.type!r})"
100 def __str__(self) -> str:
101 return self.name
104class RunRecord(CollectionRecord):
105 """A subclass of `CollectionRecord` that adds execution information and
106 an interface for updating it.
107 """
109 @abstractmethod
110 def update(self, host: Optional[str] = None, timespan: Optional[Timespan] = None) -> None:
111 """Update the database record for this run with new execution
112 information.
114 Values not provided will set to ``NULL`` in the database, not ignored.
116 Parameters
117 ----------
118 host : `str`, optional
119 Name of the host or system on which this run was produced.
120 Detailed form to be set by higher-level convention; from the
121 `Registry` perspective, this is an entirely opaque value.
122 timespan : `Timespan`, optional
123 Begin and end timestamps for the period over which the run was
124 produced. `None`/``NULL`` values are interpreted as infinite
125 bounds.
126 """
127 raise NotImplementedError()
129 @property
130 @abstractmethod
131 def host(self) -> Optional[str]:
132 """Return the name of the host or system on which this run was
133 produced (`str` or `None`).
134 """
135 raise NotImplementedError()
137 @property
138 @abstractmethod
139 def timespan(self) -> Timespan:
140 """Begin and end timestamps for the period over which the run was
141 produced. `None`/``NULL`` values are interpreted as infinite
142 bounds.
143 """
144 raise NotImplementedError()
146 def __repr__(self) -> str:
147 return f"RunRecord(key={self.key!r}, name={self.name!r})"
150class ChainedCollectionRecord(CollectionRecord):
151 """A subclass of `CollectionRecord` that adds the list of child collections
152 in a ``CHAINED`` collection.
154 Parameters
155 ----------
156 key
157 Unique collection ID, can be the same as ``name`` if ``name`` is used
158 for identification. Usually this is an integer or string, but can be
159 other database-specific type.
160 name : `str`
161 Name of the collection.
162 """
164 def __init__(self, key: Any, name: str, universe: DimensionUniverse):
165 super().__init__(key=key, name=name, type=CollectionType.CHAINED)
166 self._children = CollectionSearch.fromExpression([])
168 @property
169 def children(self) -> CollectionSearch:
170 """The ordered search path of child collections that define this chain
171 (`CollectionSearch`).
172 """
173 return self._children
175 def update(self, manager: CollectionManager, children: CollectionSearch, flatten: bool) -> None:
176 """Redefine this chain to search the given child collections.
178 This method should be used by all external code to set children. It
179 delegates to `_update`, which is what should be overridden by
180 subclasses.
182 Parameters
183 ----------
184 manager : `CollectionManager`
185 The object that manages this records instance and all records
186 instances that may appear as its children.
187 children : `CollectionSearch`
188 A collection search path that should be resolved to set the child
189 collections of this chain.
190 flatten : `bool`
191 If `True`, recursively flatten out any nested
192 `~CollectionType.CHAINED` collections in ``children`` first.
194 Raises
195 ------
196 ValueError
197 Raised when the child collections contain a cycle.
198 """
199 for record in children.iter(
200 manager, flattenChains=True, includeChains=True, collectionTypes={CollectionType.CHAINED}
201 ):
202 if record == self:
203 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.")
204 if flatten:
205 children = CollectionSearch.fromExpression(
206 tuple(record.name for record in children.iter(manager, flattenChains=True))
207 )
208 # Delegate to derived classes to do the database updates.
209 self._update(manager, children)
210 # Update the reverse mapping (from child to parents) in the manager,
211 # by removing the old relationships and adding back in the new ones.
212 for old_child in self._children:
213 manager._parents_by_child[manager.find(old_child).key].discard(self.key)
214 for new_child in children:
215 manager._parents_by_child[manager.find(new_child).key].add(self.key)
216 # Actually set this instances sequence of children.
217 self._children = children
219 def refresh(self, manager: CollectionManager) -> None:
220 """Load children from the database, using the given manager to resolve
221 collection primary key values into records.
223 This method exists to ensure that all collections that may appear in a
224 chain are known to the manager before any particular chain tries to
225 retrieve their records from it. `ChainedCollectionRecord` subclasses
226 can rely on it being called sometime after their own ``__init__`` to
227 finish construction.
229 Parameters
230 ----------
231 manager : `CollectionManager`
232 The object that manages this records instance and all records
233 instances that may appear as its children.
234 """
235 # Clear out the old reverse mapping (from child to parents).
236 for child in self._children:
237 manager._parents_by_child[manager.find(child).key].discard(self.key)
238 self._children = self._load(manager)
239 # Update the reverse mapping (from child to parents) in the manager.
240 for child in self._children:
241 manager._parents_by_child[manager.find(child).key].add(self.key)
243 @abstractmethod
244 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None:
245 """Protected implementation hook for `update`.
247 This method should be implemented by subclasses to update the database
248 to reflect the children given. It should never be called by anything
249 other than `update`, which should be used by all external code.
251 Parameters
252 ----------
253 manager : `CollectionManager`
254 The object that manages this records instance and all records
255 instances that may appear as its children.
256 children : `CollectionSearch`
257 A collection search path that should be resolved to set the child
258 collections of this chain. Guaranteed not to contain cycles.
259 """
260 raise NotImplementedError()
262 @abstractmethod
263 def _load(self, manager: CollectionManager) -> CollectionSearch:
264 """Protected implementation hook for `refresh`.
266 This method should be implemented by subclasses to retrieve the chain's
267 child collections from the database and return them. It should never
268 be called by anything other than `refresh`, which should be used by all
269 external code.
271 Parameters
272 ----------
273 manager : `CollectionManager`
274 The object that manages this records instance and all records
275 instances that may appear as its children.
277 Returns
278 -------
279 children : `CollectionSearch`
280 The ordered sequence of collection names that defines the chained
281 collection. Guaranteed not to contain cycles.
282 """
283 raise NotImplementedError()
285 def __repr__(self) -> str:
286 return f"ChainedCollectionRecord(key={self.key!r}, name={self.name!r}, children={self.children!r})"
289class CollectionManager(VersionedExtension):
290 """An interface for managing the collections (including runs) in a
291 `Registry`.
293 Notes
294 -----
295 Each layer in a multi-layer `Registry` has its own record for any
296 collection for which it has datasets (or quanta). Different layers may
297 use different IDs for the same collection, so any usage of the IDs
298 obtained through the `CollectionManager` APIs are strictly for internal
299 (to `Registry`) use.
300 """
302 def __init__(self) -> None:
303 self._parents_by_child: DefaultDict[Any, Set[Any]] = defaultdict(set)
305 @classmethod
306 @abstractmethod
307 def initialize(
308 cls, db: Database, context: StaticTablesContext, *, dimensions: DimensionRecordStorageManager
309 ) -> CollectionManager:
310 """Construct an instance of the manager.
312 Parameters
313 ----------
314 db : `Database`
315 Interface to the underlying database engine and namespace.
316 context : `StaticTablesContext`
317 Context object obtained from `Database.declareStaticTables`; used
318 to declare any tables that should always be present in a layer
319 implemented with this manager.
320 dimensions : `DimensionRecordStorageManager`
321 Manager object for the dimensions in this `Registry`.
323 Returns
324 -------
325 manager : `CollectionManager`
326 An instance of a concrete `CollectionManager` subclass.
327 """
328 raise NotImplementedError()
330 @classmethod
331 @abstractmethod
332 def addCollectionForeignKey(
333 cls,
334 tableSpec: ddl.TableSpec,
335 *,
336 prefix: str = "collection",
337 onDelete: Optional[str] = None,
338 constraint: bool = True,
339 **kwargs: Any,
340 ) -> ddl.FieldSpec:
341 """Add a foreign key (field and constraint) referencing the collection
342 table.
344 Parameters
345 ----------
346 tableSpec : `ddl.TableSpec`
347 Specification for the table that should reference the collection
348 table. Will be modified in place.
349 prefix: `str`, optional
350 A name to use for the prefix of the new field; the full name may
351 have a suffix (and is given in the returned `ddl.FieldSpec`).
352 onDelete: `str`, optional
353 One of "CASCADE" or "SET NULL", indicating what should happen to
354 the referencing row if the collection row is deleted. `None`
355 indicates that this should be an integrity error.
356 constraint: `bool`, optional
357 If `False` (`True` is default), add a field that can be joined to
358 the collection primary key, but do not add a foreign key
359 constraint.
360 **kwargs
361 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
362 constructor (only the ``name`` and ``dtype`` arguments are
363 otherwise provided).
365 Returns
366 -------
367 fieldSpec : `ddl.FieldSpec`
368 Specification for the field being added.
369 """
370 raise NotImplementedError()
372 @classmethod
373 @abstractmethod
374 def addRunForeignKey(
375 cls,
376 tableSpec: ddl.TableSpec,
377 *,
378 prefix: str = "run",
379 onDelete: Optional[str] = None,
380 constraint: bool = True,
381 **kwargs: Any,
382 ) -> ddl.FieldSpec:
383 """Add a foreign key (field and constraint) referencing the run
384 table.
386 Parameters
387 ----------
388 tableSpec : `ddl.TableSpec`
389 Specification for the table that should reference the run table.
390 Will be modified in place.
391 prefix: `str`, optional
392 A name to use for the prefix of the new field; the full name may
393 have a suffix (and is given in the returned `ddl.FieldSpec`).
394 onDelete: `str`, optional
395 One of "CASCADE" or "SET NULL", indicating what should happen to
396 the referencing row if the collection row is deleted. `None`
397 indicates that this should be an integrity error.
398 constraint: `bool`, optional
399 If `False` (`True` is default), add a field that can be joined to
400 the run primary key, but do not add a foreign key constraint.
401 **kwargs
402 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
403 constructor (only the ``name`` and ``dtype`` arguments are
404 otherwise provided).
406 Returns
407 -------
408 fieldSpec : `ddl.FieldSpec`
409 Specification for the field being added.
410 """
411 raise NotImplementedError()
413 @classmethod
414 @abstractmethod
415 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
416 """Return the name of the field added by `addCollectionForeignKey`
417 if called with the same prefix.
419 Parameters
420 ----------
421 prefix : `str`
422 A name to use for the prefix of the new field; the full name may
423 have a suffix.
425 Returns
426 -------
427 name : `str`
428 The field name.
429 """
430 raise NotImplementedError()
432 @classmethod
433 @abstractmethod
434 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
435 """Return the name of the field added by `addRunForeignKey`
436 if called with the same prefix.
438 Parameters
439 ----------
440 prefix : `str`
441 A name to use for the prefix of the new field; the full name may
442 have a suffix.
444 Returns
445 -------
446 name : `str`
447 The field name.
448 """
449 raise NotImplementedError()
451 @abstractmethod
452 def refresh(self) -> None:
453 """Ensure all other operations on this manager are aware of any
454 collections that may have been registered by other clients since it
455 was initialized or last refreshed.
456 """
457 raise NotImplementedError()
459 @abstractmethod
460 def register(
461 self, name: str, type: CollectionType, doc: Optional[str] = None
462 ) -> Tuple[CollectionRecord, bool]:
463 """Ensure that a collection of the given name and type are present
464 in the layer this manager is associated with.
466 Parameters
467 ----------
468 name : `str`
469 Name of the collection.
470 type : `CollectionType`
471 Enumeration value indicating the type of collection.
472 doc : `str`, optional
473 Documentation string for the collection. Ignored if the collection
474 already exists.
476 Returns
477 -------
478 record : `CollectionRecord`
479 Object representing the collection, including its type and ID.
480 If ``type is CollectionType.RUN``, this will be a `RunRecord`
481 instance. If ``type is CollectionType.CHAIN``, this will be a
482 `ChainedCollectionRecord` instance.
483 registered : `bool`
484 True if the collection was registered, `False` if it already
485 existed.
487 Raises
488 ------
489 TransactionInterruption
490 Raised if this operation is invoked within a `Database.transaction`
491 context.
492 DatabaseConflictError
493 Raised if a collection with this name but a different type already
494 exists.
496 Notes
497 -----
498 Concurrent registrations of the same collection should be safe; nothing
499 should happen if the types are consistent, and integrity errors due to
500 inconsistent types should happen before any database changes are made.
501 """
502 raise NotImplementedError()
504 @abstractmethod
505 def remove(self, name: str) -> None:
506 """Completely remove a collection.
508 Any existing `CollectionRecord` objects that correspond to the removed
509 collection are considered invalidated.
511 Parameters
512 ----------
513 name : `str`
514 Name of the collection to remove.
516 Notes
517 -----
518 If this collection is referenced by foreign keys in tables managed by
519 other objects, the ON DELETE clauses of those tables will be invoked.
520 That will frequently delete many dependent rows automatically (via
521 "CASCADE", but it may also cause this operation to fail (with rollback)
522 unless dependent rows that do not have an ON DELETE clause are removed
523 first.
524 """
525 raise NotImplementedError()
527 @abstractmethod
528 def find(self, name: str) -> CollectionRecord:
529 """Return the collection record associated with the given name.
531 Parameters
532 ----------
533 name : `str`
534 Name of the collection.
536 Returns
537 -------
538 record : `CollectionRecord`
539 Object representing the collection, including its type and ID.
540 If ``record.type is CollectionType.RUN``, this will be a
541 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
542 this will be a `ChainedCollectionRecord` instance.
544 Raises
545 ------
546 MissingCollectionError
547 Raised if the given collection does not exist.
549 Notes
550 -----
551 Collections registered by another client of the same layer since the
552 last call to `initialize` or `refresh` may not be found.
553 """
554 raise NotImplementedError()
556 @abstractmethod
557 def __getitem__(self, key: Any) -> CollectionRecord:
558 """Return the collection record associated with the given
559 primary/foreign key value.
561 Parameters
562 ----------
563 key
564 Internal primary key value for the collection.
566 Returns
567 -------
568 record : `CollectionRecord`
569 Object representing the collection, including its type and name.
570 If ``record.type is CollectionType.RUN``, this will be a
571 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
572 this will be a `ChainedCollectionRecord` instance.
574 Raises
575 ------
576 MissingCollectionError
577 Raised if no collection with this key exists.
579 Notes
580 -----
581 Collections registered by another client of the same layer since the
582 last call to `initialize` or `refresh` may not be found.
583 """
584 raise NotImplementedError()
586 @abstractmethod
587 def __iter__(self) -> Iterator[CollectionRecord]:
588 """Iterate over all collections.
590 Yields
591 ------
592 record : `CollectionRecord`
593 The record for a managed collection.
594 """
595 raise NotImplementedError()
597 @abstractmethod
598 def getDocumentation(self, key: Any) -> Optional[str]:
599 """Retrieve the documentation string for a collection.
601 Parameters
602 ----------
603 key
604 Internal primary key value for the collection.
606 Returns
607 -------
608 docs : `str` or `None`
609 Docstring for the collection with the given key.
610 """
611 raise NotImplementedError()
613 @abstractmethod
614 def setDocumentation(self, key: Any, doc: Optional[str]) -> None:
615 """Set the documentation string for a collection.
617 Parameters
618 ----------
619 key
620 Internal primary key value for the collection.
621 docs : `str`, optional
622 Docstring for the collection with the given key.
623 """
624 raise NotImplementedError()
626 def getParentChains(self, key: Any) -> Iterator[ChainedCollectionRecord]:
627 """Find all CHAINED collections that directly contain the given
628 collection.
630 Parameters
631 ----------
632 key
633 Internal primary key value for the collection.
634 """
635 for parent_key in self._parents_by_child[key]:
636 result = self[parent_key]
637 assert isinstance(result, ChainedCollectionRecord)
638 yield result