Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py: 63%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "ChainedCollectionRecord",
25 "CollectionManager",
26 "CollectionRecord",
27 "RunRecord",
28]
30from abc import abstractmethod
31from typing import TYPE_CHECKING, Any, Iterator, Optional, Tuple
33from ...core import DimensionUniverse, Timespan, ddl
34from .._collectionType import CollectionType
35from ..wildcards import CollectionSearch
36from ._versioning import VersionedExtension
38if TYPE_CHECKING: 38 ↛ 39line 38 didn't jump to line 39, because the condition on line 38 was never true
39 from ._database import Database, StaticTablesContext
40 from ._dimensions import DimensionRecordStorageManager
43class CollectionRecord:
44 """A struct used to represent a collection in internal `Registry` APIs.
46 User-facing code should always just use a `str` to represent collections.
48 Parameters
49 ----------
50 key
51 Unique collection ID, can be the same as ``name`` if ``name`` is used
52 for identification. Usually this is an integer or string, but can be
53 other database-specific type.
54 name : `str`
55 Name of the collection.
56 type : `CollectionType`
57 Enumeration value describing the type of the collection.
58 """
60 def __init__(self, key: Any, name: str, type: CollectionType):
61 self.key = key
62 self.name = name
63 self.type = type
64 assert isinstance(self.type, CollectionType)
66 name: str
67 """Name of the collection (`str`).
68 """
70 key: Any
71 """The primary/foreign key value for this collection.
72 """
74 type: CollectionType
75 """Enumeration value describing the type of the collection
76 (`CollectionType`).
77 """
80class RunRecord(CollectionRecord):
81 """A subclass of `CollectionRecord` that adds execution information and
82 an interface for updating it.
83 """
85 @abstractmethod
86 def update(self, host: Optional[str] = None, timespan: Optional[Timespan] = None) -> None:
87 """Update the database record for this run with new execution
88 information.
90 Values not provided will set to ``NULL`` in the database, not ignored.
92 Parameters
93 ----------
94 host : `str`, optional
95 Name of the host or system on which this run was produced.
96 Detailed form to be set by higher-level convention; from the
97 `Registry` perspective, this is an entirely opaque value.
98 timespan : `Timespan`, optional
99 Begin and end timestamps for the period over which the run was
100 produced. `None`/``NULL`` values are interpreted as infinite
101 bounds.
102 """
103 raise NotImplementedError()
105 @property
106 @abstractmethod
107 def host(self) -> Optional[str]:
108 """Return the name of the host or system on which this run was
109 produced (`str` or `None`).
110 """
111 raise NotImplementedError()
113 @property
114 @abstractmethod
115 def timespan(self) -> Timespan:
116 """Begin and end timestamps for the period over which the run was
117 produced. `None`/``NULL`` values are interpreted as infinite
118 bounds.
119 """
120 raise NotImplementedError()
123class ChainedCollectionRecord(CollectionRecord):
124 """A subclass of `CollectionRecord` that adds the list of child collections
125 in a ``CHAINED`` collection.
127 Parameters
128 ----------
129 key
130 Unique collection ID, can be the same as ``name`` if ``name`` is used
131 for identification. Usually this is an integer or string, but can be
132 other database-specific type.
133 name : `str`
134 Name of the collection.
135 """
137 def __init__(self, key: Any, name: str, universe: DimensionUniverse):
138 super().__init__(key=key, name=name, type=CollectionType.CHAINED)
139 self._children = CollectionSearch.fromExpression([])
141 @property
142 def children(self) -> CollectionSearch:
143 """The ordered search path of child collections that define this chain
144 (`CollectionSearch`).
145 """
146 return self._children
148 def update(self, manager: CollectionManager, children: CollectionSearch, flatten: bool) -> None:
149 """Redefine this chain to search the given child collections.
151 This method should be used by all external code to set children. It
152 delegates to `_update`, which is what should be overridden by
153 subclasses.
155 Parameters
156 ----------
157 manager : `CollectionManager`
158 The object that manages this records instance and all records
159 instances that may appear as its children.
160 children : `CollectionSearch`
161 A collection search path that should be resolved to set the child
162 collections of this chain.
163 flatten : `bool`
164 If `True`, recursively flatten out any nested
165 `~CollectionType.CHAINED` collections in ``children`` first.
167 Raises
168 ------
169 ValueError
170 Raised when the child collections contain a cycle.
171 """
172 for record in children.iter(
173 manager, flattenChains=True, includeChains=True, collectionTypes={CollectionType.CHAINED}
174 ):
175 if record == self:
176 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.")
177 if flatten:
178 children = CollectionSearch.fromExpression(
179 tuple(record.name for record in children.iter(manager, flattenChains=True))
180 )
181 self._update(manager, children)
182 self._children = children
184 def refresh(self, manager: CollectionManager) -> None:
185 """Load children from the database, using the given manager to resolve
186 collection primary key values into records.
188 This method exists to ensure that all collections that may appear in a
189 chain are known to the manager before any particular chain tries to
190 retrieve their records from it. `ChainedCollectionRecord` subclasses
191 can rely on it being called sometime after their own ``__init__`` to
192 finish construction.
194 Parameters
195 ----------
196 manager : `CollectionManager`
197 The object that manages this records instance and all records
198 instances that may appear as its children.
199 """
200 self._children = self._load(manager)
202 @abstractmethod
203 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None:
204 """Protected implementation hook for setting the `children` property.
206 This method should be implemented by subclasses to update the database
207 to reflect the children given. It should never be called by anything
208 other than the `children` setter, which should be used by all external
209 code.
211 Parameters
212 ----------
213 manager : `CollectionManager`
214 The object that manages this records instance and all records
215 instances that may appear as its children.
216 children : `CollectionSearch`
217 A collection search path that should be resolved to set the child
218 collections of this chain. Guaranteed not to contain cycles.
219 """
220 raise NotImplementedError()
222 @abstractmethod
223 def _load(self, manager: CollectionManager) -> CollectionSearch:
224 """Protected implementation hook for `refresh`.
226 This method should be implemented by subclasses to retrieve the chain's
227 child collections from the database and return them. It should never
228 be called by anything other than `refresh`, which should be used by all
229 external code.
231 Parameters
232 ----------
233 manager : `CollectionManager`
234 The object that manages this records instance and all records
235 instances that may appear as its children.
237 Returns
238 -------
239 children : `CollectionSearch`
240 The ordered sequence of collection names that defines the chained
241 collection. Guaranteed not to contain cycles.
242 """
243 raise NotImplementedError()
246class CollectionManager(VersionedExtension):
247 """An interface for managing the collections (including runs) in a
248 `Registry`.
250 Notes
251 -----
252 Each layer in a multi-layer `Registry` has its own record for any
253 collection for which it has datasets (or quanta). Different layers may
254 use different IDs for the same collection, so any usage of the IDs
255 obtained through the `CollectionManager` APIs are strictly for internal
256 (to `Registry`) use.
257 """
259 @classmethod
260 @abstractmethod
261 def initialize(
262 cls, db: Database, context: StaticTablesContext, *, dimensions: DimensionRecordStorageManager
263 ) -> CollectionManager:
264 """Construct an instance of the manager.
266 Parameters
267 ----------
268 db : `Database`
269 Interface to the underlying database engine and namespace.
270 context : `StaticTablesContext`
271 Context object obtained from `Database.declareStaticTables`; used
272 to declare any tables that should always be present in a layer
273 implemented with this manager.
274 dimensions : `DimensionRecordStorageManager`
275 Manager object for the dimensions in this `Registry`.
277 Returns
278 -------
279 manager : `CollectionManager`
280 An instance of a concrete `CollectionManager` subclass.
281 """
282 raise NotImplementedError()
284 @classmethod
285 @abstractmethod
286 def addCollectionForeignKey(
287 cls,
288 tableSpec: ddl.TableSpec,
289 *,
290 prefix: str = "collection",
291 onDelete: Optional[str] = None,
292 constraint: bool = True,
293 **kwargs: Any,
294 ) -> ddl.FieldSpec:
295 """Add a foreign key (field and constraint) referencing the collection
296 table.
298 Parameters
299 ----------
300 tableSpec : `ddl.TableSpec`
301 Specification for the table that should reference the collection
302 table. Will be modified in place.
303 prefix: `str`, optional
304 A name to use for the prefix of the new field; the full name may
305 have a suffix (and is given in the returned `ddl.FieldSpec`).
306 onDelete: `str`, optional
307 One of "CASCADE" or "SET NULL", indicating what should happen to
308 the referencing row if the collection row is deleted. `None`
309 indicates that this should be an integrity error.
310 constraint: `bool`, optional
311 If `False` (`True` is default), add a field that can be joined to
312 the collection primary key, but do not add a foreign key
313 constraint.
314 **kwargs
315 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
316 constructor (only the ``name`` and ``dtype`` arguments are
317 otherwise provided).
319 Returns
320 -------
321 fieldSpec : `ddl.FieldSpec`
322 Specification for the field being added.
323 """
324 raise NotImplementedError()
326 @classmethod
327 @abstractmethod
328 def addRunForeignKey(
329 cls,
330 tableSpec: ddl.TableSpec,
331 *,
332 prefix: str = "run",
333 onDelete: Optional[str] = None,
334 constraint: bool = True,
335 **kwargs: Any,
336 ) -> ddl.FieldSpec:
337 """Add a foreign key (field and constraint) referencing the run
338 table.
340 Parameters
341 ----------
342 tableSpec : `ddl.TableSpec`
343 Specification for the table that should reference the run table.
344 Will be modified in place.
345 prefix: `str`, optional
346 A name to use for the prefix of the new field; the full name may
347 have a suffix (and is given in the returned `ddl.FieldSpec`).
348 onDelete: `str`, optional
349 One of "CASCADE" or "SET NULL", indicating what should happen to
350 the referencing row if the collection row is deleted. `None`
351 indicates that this should be an integrity error.
352 constraint: `bool`, optional
353 If `False` (`True` is default), add a field that can be joined to
354 the run primary key, but do not add a foreign key constraint.
355 **kwargs
356 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
357 constructor (only the ``name`` and ``dtype`` arguments are
358 otherwise provided).
360 Returns
361 -------
362 fieldSpec : `ddl.FieldSpec`
363 Specification for the field being added.
364 """
365 raise NotImplementedError()
367 @classmethod
368 @abstractmethod
369 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
370 """Return the name of the field added by `addCollectionForeignKey`
371 if called with the same prefix.
373 Parameters
374 ----------
375 prefix : `str`
376 A name to use for the prefix of the new field; the full name may
377 have a suffix.
379 Returns
380 -------
381 name : `str`
382 The field name.
383 """
384 raise NotImplementedError()
386 @classmethod
387 @abstractmethod
388 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
389 """Return the name of the field added by `addRunForeignKey`
390 if called with the same prefix.
392 Parameters
393 ----------
394 prefix : `str`
395 A name to use for the prefix of the new field; the full name may
396 have a suffix.
398 Returns
399 -------
400 name : `str`
401 The field name.
402 """
403 raise NotImplementedError()
405 @abstractmethod
406 def refresh(self) -> None:
407 """Ensure all other operations on this manager are aware of any
408 collections that may have been registered by other clients since it
409 was initialized or last refreshed.
410 """
411 raise NotImplementedError()
413 @abstractmethod
414 def register(
415 self, name: str, type: CollectionType, doc: Optional[str] = None
416 ) -> Tuple[CollectionRecord, bool]:
417 """Ensure that a collection of the given name and type are present
418 in the layer this manager is associated with.
420 Parameters
421 ----------
422 name : `str`
423 Name of the collection.
424 type : `CollectionType`
425 Enumeration value indicating the type of collection.
426 doc : `str`, optional
427 Documentation string for the collection. Ignored if the collection
428 already exists.
430 Returns
431 -------
432 record : `CollectionRecord`
433 Object representing the collection, including its type and ID.
434 If ``type is CollectionType.RUN``, this will be a `RunRecord`
435 instance. If ``type is CollectionType.CHAIN``, this will be a
436 `ChainedCollectionRecord` instance.
437 registered : `bool`
438 True if the collection was registered, `False` if it already
439 existed.
441 Raises
442 ------
443 TransactionInterruption
444 Raised if this operation is invoked within a `Database.transaction`
445 context.
446 DatabaseConflictError
447 Raised if a collection with this name but a different type already
448 exists.
450 Notes
451 -----
452 Concurrent registrations of the same collection should be safe; nothing
453 should happen if the types are consistent, and integrity errors due to
454 inconsistent types should happen before any database changes are made.
455 """
456 raise NotImplementedError()
458 @abstractmethod
459 def remove(self, name: str) -> None:
460 """Completely remove a collection.
462 Any existing `CollectionRecord` objects that correspond to the removed
463 collection are considered invalidated.
465 Parameters
466 ----------
467 name : `str`
468 Name of the collection to remove.
470 Notes
471 -----
472 If this collection is referenced by foreign keys in tables managed by
473 other objects, the ON DELETE clauses of those tables will be invoked.
474 That will frequently delete many dependent rows automatically (via
475 "CASCADE", but it may also cause this operation to fail (with rollback)
476 unless dependent rows that do not have an ON DELETE clause are removed
477 first.
478 """
479 raise NotImplementedError()
481 @abstractmethod
482 def find(self, name: str) -> CollectionRecord:
483 """Return the collection record associated with the given name.
485 Parameters
486 ----------
487 name : `str`
488 Name of the collection.
490 Returns
491 -------
492 record : `CollectionRecord`
493 Object representing the collection, including its type and ID.
494 If ``record.type is CollectionType.RUN``, this will be a
495 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
496 this will be a `ChainedCollectionRecord` instance.
498 Raises
499 ------
500 MissingCollectionError
501 Raised if the given collection does not exist.
503 Notes
504 -----
505 Collections registered by another client of the same layer since the
506 last call to `initialize` or `refresh` may not be found.
507 """
508 raise NotImplementedError()
510 @abstractmethod
511 def __getitem__(self, key: Any) -> CollectionRecord:
512 """Return the collection record associated with the given
513 primary/foreign key value.
515 Parameters
516 ----------
517 key
518 Internal primary key value for the collection.
520 Returns
521 -------
522 record : `CollectionRecord`
523 Object representing the collection, including its type and name.
524 If ``record.type is CollectionType.RUN``, this will be a
525 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
526 this will be a `ChainedCollectionRecord` instance.
528 Raises
529 ------
530 MissingCollectionError
531 Raised if no collection with this key exists.
533 Notes
534 -----
535 Collections registered by another client of the same layer since the
536 last call to `initialize` or `refresh` may not be found.
537 """
538 raise NotImplementedError()
540 @abstractmethod
541 def __iter__(self) -> Iterator[CollectionRecord]:
542 """Iterate over all collections.
544 Yields
545 ------
546 record : `CollectionRecord`
547 The record for a managed collection.
548 """
549 raise NotImplementedError()
551 @abstractmethod
552 def getDocumentation(self, key: Any) -> Optional[str]:
553 """Retrieve the documentation string for a collection.
555 Parameters
556 ----------
557 key
558 Internal primary key value for the collection.
560 Returns
561 -------
562 docs : `str` or `None`
563 Docstring for the collection with the given key.
564 """
565 raise NotImplementedError()
567 @abstractmethod
568 def setDocumentation(self, key: Any, doc: Optional[str]) -> None:
569 """Set the documentation string for a collection.
571 Parameters
572 ----------
573 key
574 Internal primary key value for the collection.
575 docs : `str`, optional
576 Docstring for the collection with the given key.
577 """
578 raise NotImplementedError()