Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py: 53%
135 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-27 08:58 +0000
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-27 08:58 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "ChainedCollectionRecord",
25 "CollectionManager",
26 "CollectionRecord",
27 "RunRecord",
28]
30from abc import abstractmethod
31from collections import defaultdict
32from collections.abc import Iterator
33from typing import TYPE_CHECKING, Any
35from ...core import DimensionUniverse, Timespan, ddl
36from .._collectionType import CollectionType
37from ..wildcards import CollectionSearch
38from ._versioning import VersionedExtension
40if TYPE_CHECKING: 40 ↛ 41line 40 didn't jump to line 41, because the condition on line 40 was never true
41 from ._database import Database, StaticTablesContext
42 from ._dimensions import DimensionRecordStorageManager
45class CollectionRecord:
46 """A struct used to represent a collection in internal `Registry` APIs.
48 User-facing code should always just use a `str` to represent collections.
50 Parameters
51 ----------
52 key
53 Unique collection ID, can be the same as ``name`` if ``name`` is used
54 for identification. Usually this is an integer or string, but can be
55 other database-specific type.
56 name : `str`
57 Name of the collection.
58 type : `CollectionType`
59 Enumeration value describing the type of the collection.
61 Notes
62 -----
63 The `name`, `key`, and `type` attributes set by the base class should be
64 considered immutable by all users and derived classes (as these are used
65 in the definition of equality and this is a hashable type). Other
66 attributes defined by subclasses may be mutable, as long as they do not
67 participate in some subclass equality definition.
68 """
70 def __init__(self, key: Any, name: str, type: CollectionType):
71 self.key = key
72 self.name = name
73 self.type = type
74 assert isinstance(self.type, CollectionType)
76 name: str
77 """Name of the collection (`str`).
78 """
80 key: Any
81 """The primary/foreign key value for this collection.
82 """
84 type: CollectionType
85 """Enumeration value describing the type of the collection
86 (`CollectionType`).
87 """
89 def __eq__(self, other: Any) -> bool:
90 try:
91 return self.name == other.name and self.type == other.type and self.key == other.key
92 except AttributeError:
93 return NotImplemented
95 def __hash__(self) -> int:
96 return hash(self.name)
98 def __repr__(self) -> str:
99 return f"CollectionRecord(key={self.key!r}, name={self.name!r}, type={self.type!r})"
101 def __str__(self) -> str:
102 return self.name
105class RunRecord(CollectionRecord):
106 """A subclass of `CollectionRecord` that adds execution information and
107 an interface for updating it.
108 """
110 @abstractmethod
111 def update(self, host: str | None = None, timespan: Timespan | None = None) -> None:
112 """Update the database record for this run with new execution
113 information.
115 Values not provided will set to ``NULL`` in the database, not ignored.
117 Parameters
118 ----------
119 host : `str`, optional
120 Name of the host or system on which this run was produced.
121 Detailed form to be set by higher-level convention; from the
122 `Registry` perspective, this is an entirely opaque value.
123 timespan : `Timespan`, optional
124 Begin and end timestamps for the period over which the run was
125 produced. `None`/``NULL`` values are interpreted as infinite
126 bounds.
127 """
128 raise NotImplementedError()
130 @property
131 @abstractmethod
132 def host(self) -> str | None:
133 """Return the name of the host or system on which this run was
134 produced (`str` or `None`).
135 """
136 raise NotImplementedError()
138 @property
139 @abstractmethod
140 def timespan(self) -> Timespan:
141 """Begin and end timestamps for the period over which the run was
142 produced. `None`/``NULL`` values are interpreted as infinite
143 bounds.
144 """
145 raise NotImplementedError()
147 def __repr__(self) -> str:
148 return f"RunRecord(key={self.key!r}, name={self.name!r})"
151class ChainedCollectionRecord(CollectionRecord):
152 """A subclass of `CollectionRecord` that adds the list of child collections
153 in a ``CHAINED`` collection.
155 Parameters
156 ----------
157 key
158 Unique collection ID, can be the same as ``name`` if ``name`` is used
159 for identification. Usually this is an integer or string, but can be
160 other database-specific type.
161 name : `str`
162 Name of the collection.
163 """
165 def __init__(self, key: Any, name: str, universe: DimensionUniverse):
166 super().__init__(key=key, name=name, type=CollectionType.CHAINED)
167 self._children = CollectionSearch.fromExpression([])
169 @property
170 def children(self) -> CollectionSearch:
171 """The ordered search path of child collections that define this chain
172 (`CollectionSearch`).
173 """
174 return self._children
176 def update(self, manager: CollectionManager, children: CollectionSearch, flatten: bool) -> None:
177 """Redefine this chain to search the given child collections.
179 This method should be used by all external code to set children. It
180 delegates to `_update`, which is what should be overridden by
181 subclasses.
183 Parameters
184 ----------
185 manager : `CollectionManager`
186 The object that manages this records instance and all records
187 instances that may appear as its children.
188 children : `CollectionSearch`
189 A collection search path that should be resolved to set the child
190 collections of this chain.
191 flatten : `bool`
192 If `True`, recursively flatten out any nested
193 `~CollectionType.CHAINED` collections in ``children`` first.
195 Raises
196 ------
197 ValueError
198 Raised when the child collections contain a cycle.
199 """
200 for record in children.iter(
201 manager, flattenChains=True, includeChains=True, collectionTypes={CollectionType.CHAINED}
202 ):
203 if record == self:
204 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.")
205 if flatten:
206 children = CollectionSearch.fromExpression(
207 tuple(record.name for record in children.iter(manager, flattenChains=True))
208 )
209 # Delegate to derived classes to do the database updates.
210 self._update(manager, children)
211 # Update the reverse mapping (from child to parents) in the manager,
212 # by removing the old relationships and adding back in the new ones.
213 for old_child in self._children:
214 manager._parents_by_child[manager.find(old_child).key].discard(self.key)
215 for new_child in children:
216 manager._parents_by_child[manager.find(new_child).key].add(self.key)
217 # Actually set this instances sequence of children.
218 self._children = children
220 def refresh(self, manager: CollectionManager) -> None:
221 """Load children from the database, using the given manager to resolve
222 collection primary key values into records.
224 This method exists to ensure that all collections that may appear in a
225 chain are known to the manager before any particular chain tries to
226 retrieve their records from it. `ChainedCollectionRecord` subclasses
227 can rely on it being called sometime after their own ``__init__`` to
228 finish construction.
230 Parameters
231 ----------
232 manager : `CollectionManager`
233 The object that manages this records instance and all records
234 instances that may appear as its children.
235 """
236 # Clear out the old reverse mapping (from child to parents).
237 for child in self._children:
238 manager._parents_by_child[manager.find(child).key].discard(self.key)
239 self._children = self._load(manager)
240 # Update the reverse mapping (from child to parents) in the manager.
241 for child in self._children:
242 manager._parents_by_child[manager.find(child).key].add(self.key)
244 @abstractmethod
245 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None:
246 """Protected implementation hook for `update`.
248 This method should be implemented by subclasses to update the database
249 to reflect the children given. It should never be called by anything
250 other than `update`, which should be used by all external code.
252 Parameters
253 ----------
254 manager : `CollectionManager`
255 The object that manages this records instance and all records
256 instances that may appear as its children.
257 children : `CollectionSearch`
258 A collection search path that should be resolved to set the child
259 collections of this chain. Guaranteed not to contain cycles.
260 """
261 raise NotImplementedError()
263 @abstractmethod
264 def _load(self, manager: CollectionManager) -> CollectionSearch:
265 """Protected implementation hook for `refresh`.
267 This method should be implemented by subclasses to retrieve the chain's
268 child collections from the database and return them. It should never
269 be called by anything other than `refresh`, which should be used by all
270 external code.
272 Parameters
273 ----------
274 manager : `CollectionManager`
275 The object that manages this records instance and all records
276 instances that may appear as its children.
278 Returns
279 -------
280 children : `CollectionSearch`
281 The ordered sequence of collection names that defines the chained
282 collection. Guaranteed not to contain cycles.
283 """
284 raise NotImplementedError()
286 def __repr__(self) -> str:
287 return f"ChainedCollectionRecord(key={self.key!r}, name={self.name!r}, children={self.children!r})"
290class CollectionManager(VersionedExtension):
291 """An interface for managing the collections (including runs) in a
292 `Registry`.
294 Notes
295 -----
296 Each layer in a multi-layer `Registry` has its own record for any
297 collection for which it has datasets (or quanta). Different layers may
298 use different IDs for the same collection, so any usage of the IDs
299 obtained through the `CollectionManager` APIs are strictly for internal
300 (to `Registry`) use.
301 """
303 def __init__(self) -> None:
304 self._parents_by_child: defaultdict[Any, set[Any]] = defaultdict(set)
306 @classmethod
307 @abstractmethod
308 def initialize(
309 cls, db: Database, context: StaticTablesContext, *, dimensions: DimensionRecordStorageManager
310 ) -> CollectionManager:
311 """Construct an instance of the manager.
313 Parameters
314 ----------
315 db : `Database`
316 Interface to the underlying database engine and namespace.
317 context : `StaticTablesContext`
318 Context object obtained from `Database.declareStaticTables`; used
319 to declare any tables that should always be present in a layer
320 implemented with this manager.
321 dimensions : `DimensionRecordStorageManager`
322 Manager object for the dimensions in this `Registry`.
324 Returns
325 -------
326 manager : `CollectionManager`
327 An instance of a concrete `CollectionManager` subclass.
328 """
329 raise NotImplementedError()
331 @classmethod
332 @abstractmethod
333 def addCollectionForeignKey(
334 cls,
335 tableSpec: ddl.TableSpec,
336 *,
337 prefix: str = "collection",
338 onDelete: str | None = None,
339 constraint: bool = True,
340 **kwargs: Any,
341 ) -> ddl.FieldSpec:
342 """Add a foreign key (field and constraint) referencing the collection
343 table.
345 Parameters
346 ----------
347 tableSpec : `ddl.TableSpec`
348 Specification for the table that should reference the collection
349 table. Will be modified in place.
350 prefix: `str`, optional
351 A name to use for the prefix of the new field; the full name may
352 have a suffix (and is given in the returned `ddl.FieldSpec`).
353 onDelete: `str`, optional
354 One of "CASCADE" or "SET NULL", indicating what should happen to
355 the referencing row if the collection row is deleted. `None`
356 indicates that this should be an integrity error.
357 constraint: `bool`, optional
358 If `False` (`True` is default), add a field that can be joined to
359 the collection primary key, but do not add a foreign key
360 constraint.
361 **kwargs
362 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
363 constructor (only the ``name`` and ``dtype`` arguments are
364 otherwise provided).
366 Returns
367 -------
368 fieldSpec : `ddl.FieldSpec`
369 Specification for the field being added.
370 """
371 raise NotImplementedError()
373 @classmethod
374 @abstractmethod
375 def addRunForeignKey(
376 cls,
377 tableSpec: ddl.TableSpec,
378 *,
379 prefix: str = "run",
380 onDelete: str | None = None,
381 constraint: bool = True,
382 **kwargs: Any,
383 ) -> ddl.FieldSpec:
384 """Add a foreign key (field and constraint) referencing the run
385 table.
387 Parameters
388 ----------
389 tableSpec : `ddl.TableSpec`
390 Specification for the table that should reference the run table.
391 Will be modified in place.
392 prefix: `str`, optional
393 A name to use for the prefix of the new field; the full name may
394 have a suffix (and is given in the returned `ddl.FieldSpec`).
395 onDelete: `str`, optional
396 One of "CASCADE" or "SET NULL", indicating what should happen to
397 the referencing row if the collection row is deleted. `None`
398 indicates that this should be an integrity error.
399 constraint: `bool`, optional
400 If `False` (`True` is default), add a field that can be joined to
401 the run primary key, but do not add a foreign key constraint.
402 **kwargs
403 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
404 constructor (only the ``name`` and ``dtype`` arguments are
405 otherwise provided).
407 Returns
408 -------
409 fieldSpec : `ddl.FieldSpec`
410 Specification for the field being added.
411 """
412 raise NotImplementedError()
414 @classmethod
415 @abstractmethod
416 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
417 """Return the name of the field added by `addCollectionForeignKey`
418 if called with the same prefix.
420 Parameters
421 ----------
422 prefix : `str`
423 A name to use for the prefix of the new field; the full name may
424 have a suffix.
426 Returns
427 -------
428 name : `str`
429 The field name.
430 """
431 raise NotImplementedError()
433 @classmethod
434 @abstractmethod
435 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
436 """Return the name of the field added by `addRunForeignKey`
437 if called with the same prefix.
439 Parameters
440 ----------
441 prefix : `str`
442 A name to use for the prefix of the new field; the full name may
443 have a suffix.
445 Returns
446 -------
447 name : `str`
448 The field name.
449 """
450 raise NotImplementedError()
452 @abstractmethod
453 def refresh(self) -> None:
454 """Ensure all other operations on this manager are aware of any
455 collections that may have been registered by other clients since it
456 was initialized or last refreshed.
457 """
458 raise NotImplementedError()
460 @abstractmethod
461 def register(
462 self, name: str, type: CollectionType, doc: str | None = None
463 ) -> tuple[CollectionRecord, bool]:
464 """Ensure that a collection of the given name and type are present
465 in the layer this manager is associated with.
467 Parameters
468 ----------
469 name : `str`
470 Name of the collection.
471 type : `CollectionType`
472 Enumeration value indicating the type of collection.
473 doc : `str`, optional
474 Documentation string for the collection. Ignored if the collection
475 already exists.
477 Returns
478 -------
479 record : `CollectionRecord`
480 Object representing the collection, including its type and ID.
481 If ``type is CollectionType.RUN``, this will be a `RunRecord`
482 instance. If ``type is CollectionType.CHAIN``, this will be a
483 `ChainedCollectionRecord` instance.
484 registered : `bool`
485 True if the collection was registered, `False` if it already
486 existed.
488 Raises
489 ------
490 TransactionInterruption
491 Raised if this operation is invoked within a `Database.transaction`
492 context.
493 DatabaseConflictError
494 Raised if a collection with this name but a different type already
495 exists.
497 Notes
498 -----
499 Concurrent registrations of the same collection should be safe; nothing
500 should happen if the types are consistent, and integrity errors due to
501 inconsistent types should happen before any database changes are made.
502 """
503 raise NotImplementedError()
505 @abstractmethod
506 def remove(self, name: str) -> None:
507 """Completely remove a collection.
509 Any existing `CollectionRecord` objects that correspond to the removed
510 collection are considered invalidated.
512 Parameters
513 ----------
514 name : `str`
515 Name of the collection to remove.
517 Notes
518 -----
519 If this collection is referenced by foreign keys in tables managed by
520 other objects, the ON DELETE clauses of those tables will be invoked.
521 That will frequently delete many dependent rows automatically (via
522 "CASCADE", but it may also cause this operation to fail (with rollback)
523 unless dependent rows that do not have an ON DELETE clause are removed
524 first.
525 """
526 raise NotImplementedError()
528 @abstractmethod
529 def find(self, name: str) -> CollectionRecord:
530 """Return the collection record associated with the given name.
532 Parameters
533 ----------
534 name : `str`
535 Name of the collection.
537 Returns
538 -------
539 record : `CollectionRecord`
540 Object representing the collection, including its type and ID.
541 If ``record.type is CollectionType.RUN``, this will be a
542 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
543 this will be a `ChainedCollectionRecord` instance.
545 Raises
546 ------
547 MissingCollectionError
548 Raised if the given collection does not exist.
550 Notes
551 -----
552 Collections registered by another client of the same layer since the
553 last call to `initialize` or `refresh` may not be found.
554 """
555 raise NotImplementedError()
557 @abstractmethod
558 def __getitem__(self, key: Any) -> CollectionRecord:
559 """Return the collection record associated with the given
560 primary/foreign key value.
562 Parameters
563 ----------
564 key
565 Internal primary key value for the collection.
567 Returns
568 -------
569 record : `CollectionRecord`
570 Object representing the collection, including its type and name.
571 If ``record.type is CollectionType.RUN``, this will be a
572 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
573 this will be a `ChainedCollectionRecord` instance.
575 Raises
576 ------
577 MissingCollectionError
578 Raised if no collection with this key exists.
580 Notes
581 -----
582 Collections registered by another client of the same layer since the
583 last call to `initialize` or `refresh` may not be found.
584 """
585 raise NotImplementedError()
587 @abstractmethod
588 def __iter__(self) -> Iterator[CollectionRecord]:
589 """Iterate over all collections.
591 Yields
592 ------
593 record : `CollectionRecord`
594 The record for a managed collection.
595 """
596 raise NotImplementedError()
598 @abstractmethod
599 def getDocumentation(self, key: Any) -> str | None:
600 """Retrieve the documentation string for a collection.
602 Parameters
603 ----------
604 key
605 Internal primary key value for the collection.
607 Returns
608 -------
609 docs : `str` or `None`
610 Docstring for the collection with the given key.
611 """
612 raise NotImplementedError()
614 @abstractmethod
615 def setDocumentation(self, key: Any, doc: str | None) -> None:
616 """Set the documentation string for a collection.
618 Parameters
619 ----------
620 key
621 Internal primary key value for the collection.
622 docs : `str`, optional
623 Docstring for the collection with the given key.
624 """
625 raise NotImplementedError()
627 def getParentChains(self, key: Any) -> Iterator[ChainedCollectionRecord]:
628 """Find all CHAINED collections that directly contain the given
629 collection.
631 Parameters
632 ----------
633 key
634 Internal primary key value for the collection.
635 """
636 for parent_key in self._parents_by_child[key]:
637 result = self[parent_key]
638 assert isinstance(result, ChainedCollectionRecord)
639 yield result