Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py : 60%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "ChainedCollectionRecord",
25 "CollectionManager",
26 "CollectionRecord",
27 "RunRecord",
28]
30from abc import abstractmethod
31from typing import (
32 Any,
33 Iterator,
34 Optional,
35 Tuple,
36 TYPE_CHECKING,
37)
39from ...core import ddl, DimensionUniverse, Timespan
40from ..wildcards import CollectionSearch
41from .._collectionType import CollectionType
42from ._versioning import VersionedExtension
44if TYPE_CHECKING: 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true
45 from ._database import Database, StaticTablesContext
46 from ._dimensions import DimensionRecordStorageManager
49class CollectionRecord:
50 """A struct used to represent a collection in internal `Registry` APIs.
52 User-facing code should always just use a `str` to represent collections.
54 Parameters
55 ----------
56 key
57 Unique collection ID, can be the same as ``name`` if ``name`` is used
58 for identification. Usually this is an integer or string, but can be
59 other database-specific type.
60 name : `str`
61 Name of the collection.
62 type : `CollectionType`
63 Enumeration value describing the type of the collection.
64 """
65 def __init__(self, key: Any, name: str, type: CollectionType):
66 self.key = key
67 self.name = name
68 self.type = type
69 assert isinstance(self.type, CollectionType)
71 name: str
72 """Name of the collection (`str`).
73 """
75 key: Any
76 """The primary/foreign key value for this collection.
77 """
79 type: CollectionType
80 """Enumeration value describing the type of the collection
81 (`CollectionType`).
82 """
85class RunRecord(CollectionRecord):
86 """A subclass of `CollectionRecord` that adds execution information and
87 an interface for updating it.
88 """
90 @abstractmethod
91 def update(self, host: Optional[str] = None,
92 timespan: Optional[Timespan] = None) -> None:
93 """Update the database record for this run with new execution
94 information.
96 Values not provided will set to ``NULL`` in the database, not ignored.
98 Parameters
99 ----------
100 host : `str`, optional
101 Name of the host or system on which this run was produced.
102 Detailed form to be set by higher-level convention; from the
103 `Registry` perspective, this is an entirely opaque value.
104 timespan : `Timespan`, optional
105 Begin and end timestamps for the period over which the run was
106 produced. `None`/``NULL`` values are interpreted as infinite
107 bounds.
108 """
109 raise NotImplementedError()
111 @property
112 @abstractmethod
113 def host(self) -> Optional[str]:
114 """Return the name of the host or system on which this run was
115 produced (`str` or `None`).
116 """
117 raise NotImplementedError()
119 @property
120 @abstractmethod
121 def timespan(self) -> Timespan:
122 """Begin and end timestamps for the period over which the run was
123 produced. `None`/``NULL`` values are interpreted as infinite
124 bounds.
125 """
126 raise NotImplementedError()
129class ChainedCollectionRecord(CollectionRecord):
130 """A subclass of `CollectionRecord` that adds the list of child collections
131 in a ``CHAINED`` collection.
133 Parameters
134 ----------
135 key
136 Unique collection ID, can be the same as ``name`` if ``name`` is used
137 for identification. Usually this is an integer or string, but can be
138 other database-specific type.
139 name : `str`
140 Name of the collection.
141 """
143 def __init__(self, key: Any, name: str, universe: DimensionUniverse):
144 super().__init__(key=key, name=name, type=CollectionType.CHAINED)
145 self._children = CollectionSearch.fromExpression([])
147 @property
148 def children(self) -> CollectionSearch:
149 """The ordered search path of child collections that define this chain
150 (`CollectionSearch`).
151 """
152 return self._children
154 def update(self, manager: CollectionManager, children: CollectionSearch, flatten: bool) -> None:
155 """Redefine this chain to search the given child collections.
157 This method should be used by all external code to set children. It
158 delegates to `_update`, which is what should be overridden by
159 subclasses.
161 Parameters
162 ----------
163 manager : `CollectionManager`
164 The object that manages this records instance and all records
165 instances that may appear as its children.
166 children : `CollectionSearch`
167 A collection search path that should be resolved to set the child
168 collections of this chain.
169 flatten : `bool`
170 If `True`, recursively flatten out any nested
171 `~CollectionType.CHAINED` collections in ``children`` first.
173 Raises
174 ------
175 ValueError
176 Raised when the child collections contain a cycle.
177 """
178 for record in children.iter(manager, flattenChains=True, includeChains=True,
179 collectionTypes={CollectionType.CHAINED}):
180 if record == self:
181 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.")
182 if flatten:
183 children = CollectionSearch.fromExpression(
184 tuple(record.name for record in children.iter(manager, flattenChains=True))
185 )
186 self._update(manager, children)
187 self._children = children
189 def refresh(self, manager: CollectionManager) -> None:
190 """Load children from the database, using the given manager to resolve
191 collection primary key values into records.
193 This method exists to ensure that all collections that may appear in a
194 chain are known to the manager before any particular chain tries to
195 retrieve their records from it. `ChainedCollectionRecord` subclasses
196 can rely on it being called sometime after their own ``__init__`` to
197 finish construction.
199 Parameters
200 ----------
201 manager : `CollectionManager`
202 The object that manages this records instance and all records
203 instances that may appear as its children.
204 """
205 self._children = self._load(manager)
207 @abstractmethod
208 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None:
209 """Protected implementation hook for setting the `children` property.
211 This method should be implemented by subclasses to update the database
212 to reflect the children given. It should never be called by anything
213 other than the `children` setter, which should be used by all external
214 code.
216 Parameters
217 ----------
218 manager : `CollectionManager`
219 The object that manages this records instance and all records
220 instances that may appear as its children.
221 children : `CollectionSearch`
222 A collection search path that should be resolved to set the child
223 collections of this chain. Guaranteed not to contain cycles.
224 """
225 raise NotImplementedError()
227 @abstractmethod
228 def _load(self, manager: CollectionManager) -> CollectionSearch:
229 """Protected implementation hook for `refresh`.
231 This method should be implemented by subclasses to retrieve the chain's
232 child collections from the database and return them. It should never
233 be called by anything other than `refresh`, which should be used by all
234 external code.
236 Parameters
237 ----------
238 manager : `CollectionManager`
239 The object that manages this records instance and all records
240 instances that may appear as its children.
242 Returns
243 -------
244 children : `CollectionSearch`
245 The ordered sequence of collection names that defines the chained
246 collection. Guaranteed not to contain cycles.
247 """
248 raise NotImplementedError()
251class CollectionManager(VersionedExtension):
252 """An interface for managing the collections (including runs) in a
253 `Registry`.
255 Notes
256 -----
257 Each layer in a multi-layer `Registry` has its own record for any
258 collection for which it has datasets (or quanta). Different layers may
259 use different IDs for the same collection, so any usage of the IDs
260 obtained through the `CollectionManager` APIs are strictly for internal
261 (to `Registry`) use.
262 """
264 @classmethod
265 @abstractmethod
266 def initialize(cls, db: Database, context: StaticTablesContext, *,
267 dimensions: DimensionRecordStorageManager) -> CollectionManager:
268 """Construct an instance of the manager.
270 Parameters
271 ----------
272 db : `Database`
273 Interface to the underlying database engine and namespace.
274 context : `StaticTablesContext`
275 Context object obtained from `Database.declareStaticTables`; used
276 to declare any tables that should always be present in a layer
277 implemented with this manager.
278 dimensions : `DimensionRecordStorageManager`
279 Manager object for the dimensions in this `Registry`.
281 Returns
282 -------
283 manager : `CollectionManager`
284 An instance of a concrete `CollectionManager` subclass.
285 """
286 raise NotImplementedError()
288 @classmethod
289 @abstractmethod
290 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection",
291 onDelete: Optional[str] = None,
292 constraint: bool = True,
293 **kwargs: Any) -> ddl.FieldSpec:
294 """Add a foreign key (field and constraint) referencing the collection
295 table.
297 Parameters
298 ----------
299 tableSpec : `ddl.TableSpec`
300 Specification for the table that should reference the collection
301 table. Will be modified in place.
302 prefix: `str`, optional
303 A name to use for the prefix of the new field; the full name may
304 have a suffix (and is given in the returned `ddl.FieldSpec`).
305 onDelete: `str`, optional
306 One of "CASCADE" or "SET NULL", indicating what should happen to
307 the referencing row if the collection row is deleted. `None`
308 indicates that this should be an integrity error.
309 constraint: `bool`, optional
310 If `False` (`True` is default), add a field that can be joined to
311 the collection primary key, but do not add a foreign key
312 constraint.
313 **kwargs
314 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
315 constructor (only the ``name`` and ``dtype`` arguments are
316 otherwise provided).
318 Returns
319 -------
320 fieldSpec : `ddl.FieldSpec`
321 Specification for the field being added.
322 """
323 raise NotImplementedError()
325 @classmethod
326 @abstractmethod
327 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run",
328 onDelete: Optional[str] = None,
329 constraint: bool = True,
330 **kwargs: Any) -> ddl.FieldSpec:
331 """Add a foreign key (field and constraint) referencing the run
332 table.
334 Parameters
335 ----------
336 tableSpec : `ddl.TableSpec`
337 Specification for the table that should reference the run table.
338 Will be modified in place.
339 prefix: `str`, optional
340 A name to use for the prefix of the new field; the full name may
341 have a suffix (and is given in the returned `ddl.FieldSpec`).
342 onDelete: `str`, optional
343 One of "CASCADE" or "SET NULL", indicating what should happen to
344 the referencing row if the collection row is deleted. `None`
345 indicates that this should be an integrity error.
346 constraint: `bool`, optional
347 If `False` (`True` is default), add a field that can be joined to
348 the run primary key, but do not add a foreign key constraint.
349 **kwargs
350 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
351 constructor (only the ``name`` and ``dtype`` arguments are
352 otherwise provided).
354 Returns
355 -------
356 fieldSpec : `ddl.FieldSpec`
357 Specification for the field being added.
358 """
359 raise NotImplementedError()
361 @classmethod
362 @abstractmethod
363 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
364 """Return the name of the field added by `addCollectionForeignKey`
365 if called with the same prefix.
367 Parameters
368 ----------
369 prefix : `str`
370 A name to use for the prefix of the new field; the full name may
371 have a suffix.
373 Returns
374 -------
375 name : `str`
376 The field name.
377 """
378 raise NotImplementedError()
380 @classmethod
381 @abstractmethod
382 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
383 """Return the name of the field added by `addRunForeignKey`
384 if called with the same prefix.
386 Parameters
387 ----------
388 prefix : `str`
389 A name to use for the prefix of the new field; the full name may
390 have a suffix.
392 Returns
393 -------
394 name : `str`
395 The field name.
396 """
397 raise NotImplementedError()
399 @abstractmethod
400 def refresh(self) -> None:
401 """Ensure all other operations on this manager are aware of any
402 collections that may have been registered by other clients since it
403 was initialized or last refreshed.
404 """
405 raise NotImplementedError()
407 @abstractmethod
408 def register(self, name: str, type: CollectionType,
409 doc: Optional[str] = None) -> Tuple[CollectionRecord, bool]:
410 """Ensure that a collection of the given name and type are present
411 in the layer this manager is associated with.
413 Parameters
414 ----------
415 name : `str`
416 Name of the collection.
417 type : `CollectionType`
418 Enumeration value indicating the type of collection.
419 doc : `str`, optional
420 Documentation string for the collection. Ignored if the collection
421 already exists.
423 Returns
424 -------
425 record : `CollectionRecord`
426 Object representing the collection, including its type and ID.
427 If ``type is CollectionType.RUN``, this will be a `RunRecord`
428 instance. If ``type is CollectionType.CHAIN``, this will be a
429 `ChainedCollectionRecord` instance.
430 registered : `bool`
431 True if the collection was registered, `False` if it already
432 existed.
434 Raises
435 ------
436 TransactionInterruption
437 Raised if this operation is invoked within a `Database.transaction`
438 context.
439 DatabaseConflictError
440 Raised if a collection with this name but a different type already
441 exists.
443 Notes
444 -----
445 Concurrent registrations of the same collection should be safe; nothing
446 should happen if the types are consistent, and integrity errors due to
447 inconsistent types should happen before any database changes are made.
448 """
449 raise NotImplementedError()
451 @abstractmethod
452 def remove(self, name: str) -> None:
453 """Completely remove a collection.
455 Any existing `CollectionRecord` objects that correspond to the removed
456 collection are considered invalidated.
458 Parameters
459 ----------
460 name : `str`
461 Name of the collection to remove.
463 Notes
464 -----
465 If this collection is referenced by foreign keys in tables managed by
466 other objects, the ON DELETE clauses of those tables will be invoked.
467 That will frequently delete many dependent rows automatically (via
468 "CASCADE", but it may also cause this operation to fail (with rollback)
469 unless dependent rows that do not have an ON DELETE clause are removed
470 first.
471 """
472 raise NotImplementedError()
474 @abstractmethod
475 def find(self, name: str) -> CollectionRecord:
476 """Return the collection record associated with the given name.
478 Parameters
479 ----------
480 name : `str`
481 Name of the collection.
483 Returns
484 -------
485 record : `CollectionRecord`
486 Object representing the collection, including its type and ID.
487 If ``record.type is CollectionType.RUN``, this will be a
488 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
489 this will be a `ChainedCollectionRecord` instance.
491 Raises
492 ------
493 MissingCollectionError
494 Raised if the given collection does not exist.
496 Notes
497 -----
498 Collections registered by another client of the same layer since the
499 last call to `initialize` or `refresh` may not be found.
500 """
501 raise NotImplementedError()
503 @abstractmethod
504 def __getitem__(self, key: Any) -> CollectionRecord:
505 """Return the collection record associated with the given
506 primary/foreign key value.
508 Parameters
509 ----------
510 key
511 Internal primary key value for the collection.
513 Returns
514 -------
515 record : `CollectionRecord`
516 Object representing the collection, including its type and name.
517 If ``record.type is CollectionType.RUN``, this will be a
518 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
519 this will be a `ChainedCollectionRecord` instance.
521 Raises
522 ------
523 MissingCollectionError
524 Raised if no collection with this key exists.
526 Notes
527 -----
528 Collections registered by another client of the same layer since the
529 last call to `initialize` or `refresh` may not be found.
530 """
531 raise NotImplementedError()
533 @abstractmethod
534 def __iter__(self) -> Iterator[CollectionRecord]:
535 """Iterate over all collections.
537 Yields
538 ------
539 record : `CollectionRecord`
540 The record for a managed collection.
541 """
542 raise NotImplementedError()
544 @abstractmethod
545 def getDocumentation(self, key: Any) -> Optional[str]:
546 """Retrieve the documentation string for a collection.
548 Parameters
549 ----------
550 key
551 Internal primary key value for the collection.
553 Returns
554 -------
555 docs : `str` or `None`
556 Docstring for the collection with the given key.
557 """
558 raise NotImplementedError()
560 @abstractmethod
561 def setDocumentation(self, key: Any, doc: Optional[str]) -> None:
562 """Set the documentation string for a collection.
564 Parameters
565 ----------
566 key
567 Internal primary key value for the collection.
568 docs : `str`, optional
569 Docstring for the collection with the given key.
570 """
571 raise NotImplementedError()