Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py: 54%
119 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-26 02:22 -0700
« prev ^ index » next coverage.py v6.4.4, created at 2022-08-26 02:22 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "ChainedCollectionRecord",
25 "CollectionManager",
26 "CollectionRecord",
27 "RunRecord",
28]
30from abc import abstractmethod
31from collections import defaultdict
32from typing import TYPE_CHECKING, Any, DefaultDict, Iterator, Optional, Set, Tuple
34from ...core import DimensionUniverse, Timespan, ddl
35from .._collectionType import CollectionType
36from ..wildcards import CollectionSearch
37from ._versioning import VersionedExtension
39if TYPE_CHECKING: 39 ↛ 40line 39 didn't jump to line 40, because the condition on line 39 was never true
40 from ._database import Database, StaticTablesContext
41 from ._dimensions import DimensionRecordStorageManager
44class CollectionRecord:
45 """A struct used to represent a collection in internal `Registry` APIs.
47 User-facing code should always just use a `str` to represent collections.
49 Parameters
50 ----------
51 key
52 Unique collection ID, can be the same as ``name`` if ``name`` is used
53 for identification. Usually this is an integer or string, but can be
54 other database-specific type.
55 name : `str`
56 Name of the collection.
57 type : `CollectionType`
58 Enumeration value describing the type of the collection.
59 """
61 def __init__(self, key: Any, name: str, type: CollectionType):
62 self.key = key
63 self.name = name
64 self.type = type
65 assert isinstance(self.type, CollectionType)
67 name: str
68 """Name of the collection (`str`).
69 """
71 key: Any
72 """The primary/foreign key value for this collection.
73 """
75 type: CollectionType
76 """Enumeration value describing the type of the collection
77 (`CollectionType`).
78 """
81class RunRecord(CollectionRecord):
82 """A subclass of `CollectionRecord` that adds execution information and
83 an interface for updating it.
84 """
86 @abstractmethod
87 def update(self, host: Optional[str] = None, timespan: Optional[Timespan] = None) -> None:
88 """Update the database record for this run with new execution
89 information.
91 Values not provided will set to ``NULL`` in the database, not ignored.
93 Parameters
94 ----------
95 host : `str`, optional
96 Name of the host or system on which this run was produced.
97 Detailed form to be set by higher-level convention; from the
98 `Registry` perspective, this is an entirely opaque value.
99 timespan : `Timespan`, optional
100 Begin and end timestamps for the period over which the run was
101 produced. `None`/``NULL`` values are interpreted as infinite
102 bounds.
103 """
104 raise NotImplementedError()
106 @property
107 @abstractmethod
108 def host(self) -> Optional[str]:
109 """Return the name of the host or system on which this run was
110 produced (`str` or `None`).
111 """
112 raise NotImplementedError()
114 @property
115 @abstractmethod
116 def timespan(self) -> Timespan:
117 """Begin and end timestamps for the period over which the run was
118 produced. `None`/``NULL`` values are interpreted as infinite
119 bounds.
120 """
121 raise NotImplementedError()
124class ChainedCollectionRecord(CollectionRecord):
125 """A subclass of `CollectionRecord` that adds the list of child collections
126 in a ``CHAINED`` collection.
128 Parameters
129 ----------
130 key
131 Unique collection ID, can be the same as ``name`` if ``name`` is used
132 for identification. Usually this is an integer or string, but can be
133 other database-specific type.
134 name : `str`
135 Name of the collection.
136 """
138 def __init__(self, key: Any, name: str, universe: DimensionUniverse):
139 super().__init__(key=key, name=name, type=CollectionType.CHAINED)
140 self._children = CollectionSearch.fromExpression([])
142 @property
143 def children(self) -> CollectionSearch:
144 """The ordered search path of child collections that define this chain
145 (`CollectionSearch`).
146 """
147 return self._children
149 def update(self, manager: CollectionManager, children: CollectionSearch, flatten: bool) -> None:
150 """Redefine this chain to search the given child collections.
152 This method should be used by all external code to set children. It
153 delegates to `_update`, which is what should be overridden by
154 subclasses.
156 Parameters
157 ----------
158 manager : `CollectionManager`
159 The object that manages this records instance and all records
160 instances that may appear as its children.
161 children : `CollectionSearch`
162 A collection search path that should be resolved to set the child
163 collections of this chain.
164 flatten : `bool`
165 If `True`, recursively flatten out any nested
166 `~CollectionType.CHAINED` collections in ``children`` first.
168 Raises
169 ------
170 ValueError
171 Raised when the child collections contain a cycle.
172 """
173 for record in children.iter(
174 manager, flattenChains=True, includeChains=True, collectionTypes={CollectionType.CHAINED}
175 ):
176 if record == self:
177 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.")
178 if flatten:
179 children = CollectionSearch.fromExpression(
180 tuple(record.name for record in children.iter(manager, flattenChains=True))
181 )
182 # Delegate to derived classes to do the database updates.
183 self._update(manager, children)
184 # Update the reverse mapping (from child to parents) in the manager,
185 # by removing the old relationships and adding back in the new ones.
186 for old_child in self._children:
187 manager._parents_by_child[manager.find(old_child).key].discard(self.key)
188 for new_child in children:
189 manager._parents_by_child[manager.find(new_child).key].add(self.key)
190 # Actually set this instances sequence of children.
191 self._children = children
193 def refresh(self, manager: CollectionManager) -> None:
194 """Load children from the database, using the given manager to resolve
195 collection primary key values into records.
197 This method exists to ensure that all collections that may appear in a
198 chain are known to the manager before any particular chain tries to
199 retrieve their records from it. `ChainedCollectionRecord` subclasses
200 can rely on it being called sometime after their own ``__init__`` to
201 finish construction.
203 Parameters
204 ----------
205 manager : `CollectionManager`
206 The object that manages this records instance and all records
207 instances that may appear as its children.
208 """
209 # Clear out the old reverse mapping (from child to parents).
210 for child in self._children:
211 manager._parents_by_child[manager.find(child).key].discard(self.key)
212 self._children = self._load(manager)
213 # Update the reverse mapping (from child to parents) in the manager.
214 for child in self._children:
215 manager._parents_by_child[manager.find(child).key].add(self.key)
217 @abstractmethod
218 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None:
219 """Protected implementation hook for `update`.
221 This method should be implemented by subclasses to update the database
222 to reflect the children given. It should never be called by anything
223 other than `update`, which should be used by all external code.
225 Parameters
226 ----------
227 manager : `CollectionManager`
228 The object that manages this records instance and all records
229 instances that may appear as its children.
230 children : `CollectionSearch`
231 A collection search path that should be resolved to set the child
232 collections of this chain. Guaranteed not to contain cycles.
233 """
234 raise NotImplementedError()
236 @abstractmethod
237 def _load(self, manager: CollectionManager) -> CollectionSearch:
238 """Protected implementation hook for `refresh`.
240 This method should be implemented by subclasses to retrieve the chain's
241 child collections from the database and return them. It should never
242 be called by anything other than `refresh`, which should be used by all
243 external code.
245 Parameters
246 ----------
247 manager : `CollectionManager`
248 The object that manages this records instance and all records
249 instances that may appear as its children.
251 Returns
252 -------
253 children : `CollectionSearch`
254 The ordered sequence of collection names that defines the chained
255 collection. Guaranteed not to contain cycles.
256 """
257 raise NotImplementedError()
260class CollectionManager(VersionedExtension):
261 """An interface for managing the collections (including runs) in a
262 `Registry`.
264 Notes
265 -----
266 Each layer in a multi-layer `Registry` has its own record for any
267 collection for which it has datasets (or quanta). Different layers may
268 use different IDs for the same collection, so any usage of the IDs
269 obtained through the `CollectionManager` APIs are strictly for internal
270 (to `Registry`) use.
271 """
273 def __init__(self) -> None:
274 self._parents_by_child: DefaultDict[Any, Set[Any]] = defaultdict(set)
276 @classmethod
277 @abstractmethod
278 def initialize(
279 cls, db: Database, context: StaticTablesContext, *, dimensions: DimensionRecordStorageManager
280 ) -> CollectionManager:
281 """Construct an instance of the manager.
283 Parameters
284 ----------
285 db : `Database`
286 Interface to the underlying database engine and namespace.
287 context : `StaticTablesContext`
288 Context object obtained from `Database.declareStaticTables`; used
289 to declare any tables that should always be present in a layer
290 implemented with this manager.
291 dimensions : `DimensionRecordStorageManager`
292 Manager object for the dimensions in this `Registry`.
294 Returns
295 -------
296 manager : `CollectionManager`
297 An instance of a concrete `CollectionManager` subclass.
298 """
299 raise NotImplementedError()
301 @classmethod
302 @abstractmethod
303 def addCollectionForeignKey(
304 cls,
305 tableSpec: ddl.TableSpec,
306 *,
307 prefix: str = "collection",
308 onDelete: Optional[str] = None,
309 constraint: bool = True,
310 **kwargs: Any,
311 ) -> ddl.FieldSpec:
312 """Add a foreign key (field and constraint) referencing the collection
313 table.
315 Parameters
316 ----------
317 tableSpec : `ddl.TableSpec`
318 Specification for the table that should reference the collection
319 table. Will be modified in place.
320 prefix: `str`, optional
321 A name to use for the prefix of the new field; the full name may
322 have a suffix (and is given in the returned `ddl.FieldSpec`).
323 onDelete: `str`, optional
324 One of "CASCADE" or "SET NULL", indicating what should happen to
325 the referencing row if the collection row is deleted. `None`
326 indicates that this should be an integrity error.
327 constraint: `bool`, optional
328 If `False` (`True` is default), add a field that can be joined to
329 the collection primary key, but do not add a foreign key
330 constraint.
331 **kwargs
332 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
333 constructor (only the ``name`` and ``dtype`` arguments are
334 otherwise provided).
336 Returns
337 -------
338 fieldSpec : `ddl.FieldSpec`
339 Specification for the field being added.
340 """
341 raise NotImplementedError()
343 @classmethod
344 @abstractmethod
345 def addRunForeignKey(
346 cls,
347 tableSpec: ddl.TableSpec,
348 *,
349 prefix: str = "run",
350 onDelete: Optional[str] = None,
351 constraint: bool = True,
352 **kwargs: Any,
353 ) -> ddl.FieldSpec:
354 """Add a foreign key (field and constraint) referencing the run
355 table.
357 Parameters
358 ----------
359 tableSpec : `ddl.TableSpec`
360 Specification for the table that should reference the run table.
361 Will be modified in place.
362 prefix: `str`, optional
363 A name to use for the prefix of the new field; the full name may
364 have a suffix (and is given in the returned `ddl.FieldSpec`).
365 onDelete: `str`, optional
366 One of "CASCADE" or "SET NULL", indicating what should happen to
367 the referencing row if the collection row is deleted. `None`
368 indicates that this should be an integrity error.
369 constraint: `bool`, optional
370 If `False` (`True` is default), add a field that can be joined to
371 the run primary key, but do not add a foreign key constraint.
372 **kwargs
373 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
374 constructor (only the ``name`` and ``dtype`` arguments are
375 otherwise provided).
377 Returns
378 -------
379 fieldSpec : `ddl.FieldSpec`
380 Specification for the field being added.
381 """
382 raise NotImplementedError()
384 @classmethod
385 @abstractmethod
386 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
387 """Return the name of the field added by `addCollectionForeignKey`
388 if called with the same prefix.
390 Parameters
391 ----------
392 prefix : `str`
393 A name to use for the prefix of the new field; the full name may
394 have a suffix.
396 Returns
397 -------
398 name : `str`
399 The field name.
400 """
401 raise NotImplementedError()
403 @classmethod
404 @abstractmethod
405 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
406 """Return the name of the field added by `addRunForeignKey`
407 if called with the same prefix.
409 Parameters
410 ----------
411 prefix : `str`
412 A name to use for the prefix of the new field; the full name may
413 have a suffix.
415 Returns
416 -------
417 name : `str`
418 The field name.
419 """
420 raise NotImplementedError()
422 @abstractmethod
423 def refresh(self) -> None:
424 """Ensure all other operations on this manager are aware of any
425 collections that may have been registered by other clients since it
426 was initialized or last refreshed.
427 """
428 raise NotImplementedError()
430 @abstractmethod
431 def register(
432 self, name: str, type: CollectionType, doc: Optional[str] = None
433 ) -> Tuple[CollectionRecord, bool]:
434 """Ensure that a collection of the given name and type are present
435 in the layer this manager is associated with.
437 Parameters
438 ----------
439 name : `str`
440 Name of the collection.
441 type : `CollectionType`
442 Enumeration value indicating the type of collection.
443 doc : `str`, optional
444 Documentation string for the collection. Ignored if the collection
445 already exists.
447 Returns
448 -------
449 record : `CollectionRecord`
450 Object representing the collection, including its type and ID.
451 If ``type is CollectionType.RUN``, this will be a `RunRecord`
452 instance. If ``type is CollectionType.CHAIN``, this will be a
453 `ChainedCollectionRecord` instance.
454 registered : `bool`
455 True if the collection was registered, `False` if it already
456 existed.
458 Raises
459 ------
460 TransactionInterruption
461 Raised if this operation is invoked within a `Database.transaction`
462 context.
463 DatabaseConflictError
464 Raised if a collection with this name but a different type already
465 exists.
467 Notes
468 -----
469 Concurrent registrations of the same collection should be safe; nothing
470 should happen if the types are consistent, and integrity errors due to
471 inconsistent types should happen before any database changes are made.
472 """
473 raise NotImplementedError()
475 @abstractmethod
476 def remove(self, name: str) -> None:
477 """Completely remove a collection.
479 Any existing `CollectionRecord` objects that correspond to the removed
480 collection are considered invalidated.
482 Parameters
483 ----------
484 name : `str`
485 Name of the collection to remove.
487 Notes
488 -----
489 If this collection is referenced by foreign keys in tables managed by
490 other objects, the ON DELETE clauses of those tables will be invoked.
491 That will frequently delete many dependent rows automatically (via
492 "CASCADE", but it may also cause this operation to fail (with rollback)
493 unless dependent rows that do not have an ON DELETE clause are removed
494 first.
495 """
496 raise NotImplementedError()
498 @abstractmethod
499 def find(self, name: str) -> CollectionRecord:
500 """Return the collection record associated with the given name.
502 Parameters
503 ----------
504 name : `str`
505 Name of the collection.
507 Returns
508 -------
509 record : `CollectionRecord`
510 Object representing the collection, including its type and ID.
511 If ``record.type is CollectionType.RUN``, this will be a
512 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
513 this will be a `ChainedCollectionRecord` instance.
515 Raises
516 ------
517 MissingCollectionError
518 Raised if the given collection does not exist.
520 Notes
521 -----
522 Collections registered by another client of the same layer since the
523 last call to `initialize` or `refresh` may not be found.
524 """
525 raise NotImplementedError()
527 @abstractmethod
528 def __getitem__(self, key: Any) -> CollectionRecord:
529 """Return the collection record associated with the given
530 primary/foreign key value.
532 Parameters
533 ----------
534 key
535 Internal primary key value for the collection.
537 Returns
538 -------
539 record : `CollectionRecord`
540 Object representing the collection, including its type and name.
541 If ``record.type is CollectionType.RUN``, this will be a
542 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
543 this will be a `ChainedCollectionRecord` instance.
545 Raises
546 ------
547 MissingCollectionError
548 Raised if no collection with this key exists.
550 Notes
551 -----
552 Collections registered by another client of the same layer since the
553 last call to `initialize` or `refresh` may not be found.
554 """
555 raise NotImplementedError()
557 @abstractmethod
558 def __iter__(self) -> Iterator[CollectionRecord]:
559 """Iterate over all collections.
561 Yields
562 ------
563 record : `CollectionRecord`
564 The record for a managed collection.
565 """
566 raise NotImplementedError()
568 @abstractmethod
569 def getDocumentation(self, key: Any) -> Optional[str]:
570 """Retrieve the documentation string for a collection.
572 Parameters
573 ----------
574 key
575 Internal primary key value for the collection.
577 Returns
578 -------
579 docs : `str` or `None`
580 Docstring for the collection with the given key.
581 """
582 raise NotImplementedError()
584 @abstractmethod
585 def setDocumentation(self, key: Any, doc: Optional[str]) -> None:
586 """Set the documentation string for a collection.
588 Parameters
589 ----------
590 key
591 Internal primary key value for the collection.
592 docs : `str`, optional
593 Docstring for the collection with the given key.
594 """
595 raise NotImplementedError()
597 def getParentChains(self, key: Any) -> Iterator[ChainedCollectionRecord]:
598 """Find all CHAINED collections that directly contain the given
599 collection.
601 Parameters
602 ----------
603 key
604 Internal primary key value for the collection.
605 """
606 for parent_key in self._parents_by_child[key]:
607 result = self[parent_key]
608 assert isinstance(result, ChainedCollectionRecord)
609 yield result