Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py: 57%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "ChainedCollectionRecord",
25 "CollectionManager",
26 "CollectionRecord",
27 "RunRecord",
28]
30from abc import abstractmethod
31from collections import defaultdict
32from typing import TYPE_CHECKING, Any, DefaultDict, Iterator, Optional, Set, Tuple
34from ...core import DimensionUniverse, Timespan, ddl
35from .._collectionType import CollectionType
36from ..wildcards import CollectionSearch
37from ._versioning import VersionedExtension
39if TYPE_CHECKING: 39 ↛ 40line 39 didn't jump to line 40, because the condition on line 39 was never true
40 from ._database import Database, StaticTablesContext
41 from ._dimensions import DimensionRecordStorageManager
44class CollectionRecord:
45 """A struct used to represent a collection in internal `Registry` APIs.
47 User-facing code should always just use a `str` to represent collections.
49 Parameters
50 ----------
51 key
52 Unique collection ID, can be the same as ``name`` if ``name`` is used
53 for identification. Usually this is an integer or string, but can be
54 other database-specific type.
55 name : `str`
56 Name of the collection.
57 type : `CollectionType`
58 Enumeration value describing the type of the collection.
59 """
61 def __init__(self, key: Any, name: str, type: CollectionType):
62 self.key = key
63 self.name = name
64 self.type = type
65 assert isinstance(self.type, CollectionType)
67 name: str
68 """Name of the collection (`str`).
69 """
71 key: Any
72 """The primary/foreign key value for this collection.
73 """
75 type: CollectionType
76 """Enumeration value describing the type of the collection
77 (`CollectionType`).
78 """
81class RunRecord(CollectionRecord):
82 """A subclass of `CollectionRecord` that adds execution information and
83 an interface for updating it.
84 """
86 @abstractmethod
87 def update(self, host: Optional[str] = None, timespan: Optional[Timespan] = None) -> None:
88 """Update the database record for this run with new execution
89 information.
91 Values not provided will set to ``NULL`` in the database, not ignored.
93 Parameters
94 ----------
95 host : `str`, optional
96 Name of the host or system on which this run was produced.
97 Detailed form to be set by higher-level convention; from the
98 `Registry` perspective, this is an entirely opaque value.
99 timespan : `Timespan`, optional
100 Begin and end timestamps for the period over which the run was
101 produced. `None`/``NULL`` values are interpreted as infinite
102 bounds.
103 """
104 raise NotImplementedError()
106 @property
107 @abstractmethod
108 def host(self) -> Optional[str]:
109 """Return the name of the host or system on which this run was
110 produced (`str` or `None`).
111 """
112 raise NotImplementedError()
114 @property
115 @abstractmethod
116 def timespan(self) -> Timespan:
117 """Begin and end timestamps for the period over which the run was
118 produced. `None`/``NULL`` values are interpreted as infinite
119 bounds.
120 """
121 raise NotImplementedError()
124class ChainedCollectionRecord(CollectionRecord):
125 """A subclass of `CollectionRecord` that adds the list of child collections
126 in a ``CHAINED`` collection.
128 Parameters
129 ----------
130 key
131 Unique collection ID, can be the same as ``name`` if ``name`` is used
132 for identification. Usually this is an integer or string, but can be
133 other database-specific type.
134 name : `str`
135 Name of the collection.
136 """
138 def __init__(self, key: Any, name: str, universe: DimensionUniverse):
139 super().__init__(key=key, name=name, type=CollectionType.CHAINED)
140 self._children = CollectionSearch.fromExpression([])
142 @property
143 def children(self) -> CollectionSearch:
144 """The ordered search path of child collections that define this chain
145 (`CollectionSearch`).
146 """
147 return self._children
149 def update(self, manager: CollectionManager, children: CollectionSearch, flatten: bool) -> None:
150 """Redefine this chain to search the given child collections.
152 This method should be used by all external code to set children. It
153 delegates to `_update`, which is what should be overridden by
154 subclasses.
156 Parameters
157 ----------
158 manager : `CollectionManager`
159 The object that manages this records instance and all records
160 instances that may appear as its children.
161 children : `CollectionSearch`
162 A collection search path that should be resolved to set the child
163 collections of this chain.
164 flatten : `bool`
165 If `True`, recursively flatten out any nested
166 `~CollectionType.CHAINED` collections in ``children`` first.
168 Raises
169 ------
170 ValueError
171 Raised when the child collections contain a cycle.
172 """
173 for record in children.iter(
174 manager, flattenChains=True, includeChains=True, collectionTypes={CollectionType.CHAINED}
175 ):
176 if record == self:
177 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.")
178 if flatten:
179 children = CollectionSearch.fromExpression(
180 tuple(record.name for record in children.iter(manager, flattenChains=True))
181 )
182 # Delegate to derived classes to do the database updates.
183 self._update(manager, children)
184 # Update the reverse mapping (from child to parents) in the manager,
185 # by removing the old relationships and adding back in the new ones.
186 for old_child in self._children:
187 manager._parents_by_child[manager.find(old_child).key].discard(self.key)
188 for new_child in children:
189 manager._parents_by_child[manager.find(new_child).key].add(self.key)
190 # Actually set this instances sequence of children.
191 self._children = children
193 def refresh(self, manager: CollectionManager) -> None:
194 """Load children from the database, using the given manager to resolve
195 collection primary key values into records.
197 This method exists to ensure that all collections that may appear in a
198 chain are known to the manager before any particular chain tries to
199 retrieve their records from it. `ChainedCollectionRecord` subclasses
200 can rely on it being called sometime after their own ``__init__`` to
201 finish construction.
203 Parameters
204 ----------
205 manager : `CollectionManager`
206 The object that manages this records instance and all records
207 instances that may appear as its children.
208 """
209 self._children = self._load(manager)
211 @abstractmethod
212 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None:
213 """Protected implementation hook for `update`.
215 This method should be implemented by subclasses to update the database
216 to reflect the children given. It should never be called by anything
217 other than `update`, which should be used by all external code.
219 Parameters
220 ----------
221 manager : `CollectionManager`
222 The object that manages this records instance and all records
223 instances that may appear as its children.
224 children : `CollectionSearch`
225 A collection search path that should be resolved to set the child
226 collections of this chain. Guaranteed not to contain cycles.
227 """
228 raise NotImplementedError()
230 @abstractmethod
231 def _load(self, manager: CollectionManager) -> CollectionSearch:
232 """Protected implementation hook for `refresh`.
234 This method should be implemented by subclasses to retrieve the chain's
235 child collections from the database and return them. It should never
236 be called by anything other than `refresh`, which should be used by all
237 external code.
239 Parameters
240 ----------
241 manager : `CollectionManager`
242 The object that manages this records instance and all records
243 instances that may appear as its children.
245 Returns
246 -------
247 children : `CollectionSearch`
248 The ordered sequence of collection names that defines the chained
249 collection. Guaranteed not to contain cycles.
250 """
251 raise NotImplementedError()
254class CollectionManager(VersionedExtension):
255 """An interface for managing the collections (including runs) in a
256 `Registry`.
258 Notes
259 -----
260 Each layer in a multi-layer `Registry` has its own record for any
261 collection for which it has datasets (or quanta). Different layers may
262 use different IDs for the same collection, so any usage of the IDs
263 obtained through the `CollectionManager` APIs are strictly for internal
264 (to `Registry`) use.
265 """
267 def __init__(self) -> None:
268 self._parents_by_child: DefaultDict[Any, Set[Any]] = defaultdict(set)
270 @classmethod
271 @abstractmethod
272 def initialize(
273 cls, db: Database, context: StaticTablesContext, *, dimensions: DimensionRecordStorageManager
274 ) -> CollectionManager:
275 """Construct an instance of the manager.
277 Parameters
278 ----------
279 db : `Database`
280 Interface to the underlying database engine and namespace.
281 context : `StaticTablesContext`
282 Context object obtained from `Database.declareStaticTables`; used
283 to declare any tables that should always be present in a layer
284 implemented with this manager.
285 dimensions : `DimensionRecordStorageManager`
286 Manager object for the dimensions in this `Registry`.
288 Returns
289 -------
290 manager : `CollectionManager`
291 An instance of a concrete `CollectionManager` subclass.
292 """
293 raise NotImplementedError()
295 @classmethod
296 @abstractmethod
297 def addCollectionForeignKey(
298 cls,
299 tableSpec: ddl.TableSpec,
300 *,
301 prefix: str = "collection",
302 onDelete: Optional[str] = None,
303 constraint: bool = True,
304 **kwargs: Any,
305 ) -> ddl.FieldSpec:
306 """Add a foreign key (field and constraint) referencing the collection
307 table.
309 Parameters
310 ----------
311 tableSpec : `ddl.TableSpec`
312 Specification for the table that should reference the collection
313 table. Will be modified in place.
314 prefix: `str`, optional
315 A name to use for the prefix of the new field; the full name may
316 have a suffix (and is given in the returned `ddl.FieldSpec`).
317 onDelete: `str`, optional
318 One of "CASCADE" or "SET NULL", indicating what should happen to
319 the referencing row if the collection row is deleted. `None`
320 indicates that this should be an integrity error.
321 constraint: `bool`, optional
322 If `False` (`True` is default), add a field that can be joined to
323 the collection primary key, but do not add a foreign key
324 constraint.
325 **kwargs
326 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
327 constructor (only the ``name`` and ``dtype`` arguments are
328 otherwise provided).
330 Returns
331 -------
332 fieldSpec : `ddl.FieldSpec`
333 Specification for the field being added.
334 """
335 raise NotImplementedError()
337 @classmethod
338 @abstractmethod
339 def addRunForeignKey(
340 cls,
341 tableSpec: ddl.TableSpec,
342 *,
343 prefix: str = "run",
344 onDelete: Optional[str] = None,
345 constraint: bool = True,
346 **kwargs: Any,
347 ) -> ddl.FieldSpec:
348 """Add a foreign key (field and constraint) referencing the run
349 table.
351 Parameters
352 ----------
353 tableSpec : `ddl.TableSpec`
354 Specification for the table that should reference the run table.
355 Will be modified in place.
356 prefix: `str`, optional
357 A name to use for the prefix of the new field; the full name may
358 have a suffix (and is given in the returned `ddl.FieldSpec`).
359 onDelete: `str`, optional
360 One of "CASCADE" or "SET NULL", indicating what should happen to
361 the referencing row if the collection row is deleted. `None`
362 indicates that this should be an integrity error.
363 constraint: `bool`, optional
364 If `False` (`True` is default), add a field that can be joined to
365 the run primary key, but do not add a foreign key constraint.
366 **kwargs
367 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
368 constructor (only the ``name`` and ``dtype`` arguments are
369 otherwise provided).
371 Returns
372 -------
373 fieldSpec : `ddl.FieldSpec`
374 Specification for the field being added.
375 """
376 raise NotImplementedError()
378 @classmethod
379 @abstractmethod
380 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
381 """Return the name of the field added by `addCollectionForeignKey`
382 if called with the same prefix.
384 Parameters
385 ----------
386 prefix : `str`
387 A name to use for the prefix of the new field; the full name may
388 have a suffix.
390 Returns
391 -------
392 name : `str`
393 The field name.
394 """
395 raise NotImplementedError()
397 @classmethod
398 @abstractmethod
399 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
400 """Return the name of the field added by `addRunForeignKey`
401 if called with the same prefix.
403 Parameters
404 ----------
405 prefix : `str`
406 A name to use for the prefix of the new field; the full name may
407 have a suffix.
409 Returns
410 -------
411 name : `str`
412 The field name.
413 """
414 raise NotImplementedError()
416 @abstractmethod
417 def refresh(self) -> None:
418 """Ensure all other operations on this manager are aware of any
419 collections that may have been registered by other clients since it
420 was initialized or last refreshed.
421 """
422 raise NotImplementedError()
424 @abstractmethod
425 def register(
426 self, name: str, type: CollectionType, doc: Optional[str] = None
427 ) -> Tuple[CollectionRecord, bool]:
428 """Ensure that a collection of the given name and type are present
429 in the layer this manager is associated with.
431 Parameters
432 ----------
433 name : `str`
434 Name of the collection.
435 type : `CollectionType`
436 Enumeration value indicating the type of collection.
437 doc : `str`, optional
438 Documentation string for the collection. Ignored if the collection
439 already exists.
441 Returns
442 -------
443 record : `CollectionRecord`
444 Object representing the collection, including its type and ID.
445 If ``type is CollectionType.RUN``, this will be a `RunRecord`
446 instance. If ``type is CollectionType.CHAIN``, this will be a
447 `ChainedCollectionRecord` instance.
448 registered : `bool`
449 True if the collection was registered, `False` if it already
450 existed.
452 Raises
453 ------
454 TransactionInterruption
455 Raised if this operation is invoked within a `Database.transaction`
456 context.
457 DatabaseConflictError
458 Raised if a collection with this name but a different type already
459 exists.
461 Notes
462 -----
463 Concurrent registrations of the same collection should be safe; nothing
464 should happen if the types are consistent, and integrity errors due to
465 inconsistent types should happen before any database changes are made.
466 """
467 raise NotImplementedError()
469 @abstractmethod
470 def remove(self, name: str) -> None:
471 """Completely remove a collection.
473 Any existing `CollectionRecord` objects that correspond to the removed
474 collection are considered invalidated.
476 Parameters
477 ----------
478 name : `str`
479 Name of the collection to remove.
481 Notes
482 -----
483 If this collection is referenced by foreign keys in tables managed by
484 other objects, the ON DELETE clauses of those tables will be invoked.
485 That will frequently delete many dependent rows automatically (via
486 "CASCADE", but it may also cause this operation to fail (with rollback)
487 unless dependent rows that do not have an ON DELETE clause are removed
488 first.
489 """
490 raise NotImplementedError()
492 @abstractmethod
493 def find(self, name: str) -> CollectionRecord:
494 """Return the collection record associated with the given name.
496 Parameters
497 ----------
498 name : `str`
499 Name of the collection.
501 Returns
502 -------
503 record : `CollectionRecord`
504 Object representing the collection, including its type and ID.
505 If ``record.type is CollectionType.RUN``, this will be a
506 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
507 this will be a `ChainedCollectionRecord` instance.
509 Raises
510 ------
511 MissingCollectionError
512 Raised if the given collection does not exist.
514 Notes
515 -----
516 Collections registered by another client of the same layer since the
517 last call to `initialize` or `refresh` may not be found.
518 """
519 raise NotImplementedError()
521 @abstractmethod
522 def __getitem__(self, key: Any) -> CollectionRecord:
523 """Return the collection record associated with the given
524 primary/foreign key value.
526 Parameters
527 ----------
528 key
529 Internal primary key value for the collection.
531 Returns
532 -------
533 record : `CollectionRecord`
534 Object representing the collection, including its type and name.
535 If ``record.type is CollectionType.RUN``, this will be a
536 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
537 this will be a `ChainedCollectionRecord` instance.
539 Raises
540 ------
541 MissingCollectionError
542 Raised if no collection with this key exists.
544 Notes
545 -----
546 Collections registered by another client of the same layer since the
547 last call to `initialize` or `refresh` may not be found.
548 """
549 raise NotImplementedError()
551 @abstractmethod
552 def __iter__(self) -> Iterator[CollectionRecord]:
553 """Iterate over all collections.
555 Yields
556 ------
557 record : `CollectionRecord`
558 The record for a managed collection.
559 """
560 raise NotImplementedError()
562 @abstractmethod
563 def getDocumentation(self, key: Any) -> Optional[str]:
564 """Retrieve the documentation string for a collection.
566 Parameters
567 ----------
568 key
569 Internal primary key value for the collection.
571 Returns
572 -------
573 docs : `str` or `None`
574 Docstring for the collection with the given key.
575 """
576 raise NotImplementedError()
578 @abstractmethod
579 def setDocumentation(self, key: Any, doc: Optional[str]) -> None:
580 """Set the documentation string for a collection.
582 Parameters
583 ----------
584 key
585 Internal primary key value for the collection.
586 docs : `str`, optional
587 Docstring for the collection with the given key.
588 """
589 raise NotImplementedError()
591 def getParentChains(self, key: Any) -> Iterator[ChainedCollectionRecord]:
592 """Find all CHAINED collections that directly contain the given
593 collection.
595 Parameters
596 ----------
597 key
598 Internal primary key value for the collection.
599 """
600 for parent_key in self._parents_by_child[key]:
601 result = self[parent_key]
602 assert isinstance(result, ChainedCollectionRecord)
603 yield result