Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py : 60%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "ChainedCollectionRecord",
25 "CollectionManager",
26 "CollectionRecord",
27 "RunRecord",
28]
30from abc import abstractmethod
31from typing import (
32 Any,
33 Iterator,
34 Optional,
35 TYPE_CHECKING,
36)
38from ...core import ddl, DimensionUniverse, Timespan
39from ..wildcards import CollectionSearch
40from .._collectionType import CollectionType
41from ._versioning import VersionedExtension
43if TYPE_CHECKING: 43 ↛ 44line 43 didn't jump to line 44, because the condition on line 43 was never true
44 from ._database import Database, StaticTablesContext
45 from ._dimensions import DimensionRecordStorageManager
48class CollectionRecord:
49 """A struct used to represent a collection in internal `Registry` APIs.
51 User-facing code should always just use a `str` to represent collections.
53 Parameters
54 ----------
55 key
56 Unique collection ID, can be the same as ``name`` if ``name`` is used
57 for identification. Usually this is an integer or string, but can be
58 other database-specific type.
59 name : `str`
60 Name of the collection.
61 type : `CollectionType`
62 Enumeration value describing the type of the collection.
63 """
64 def __init__(self, key: Any, name: str, type: CollectionType):
65 self.key = key
66 self.name = name
67 self.type = type
68 assert isinstance(self.type, CollectionType)
70 name: str
71 """Name of the collection (`str`).
72 """
74 key: Any
75 """The primary/foreign key value for this collection.
76 """
78 type: CollectionType
79 """Enumeration value describing the type of the collection
80 (`CollectionType`).
81 """
84class RunRecord(CollectionRecord):
85 """A subclass of `CollectionRecord` that adds execution information and
86 an interface for updating it.
87 """
89 @abstractmethod
90 def update(self, host: Optional[str] = None,
91 timespan: Optional[Timespan] = None) -> None:
92 """Update the database record for this run with new execution
93 information.
95 Values not provided will set to ``NULL`` in the database, not ignored.
97 Parameters
98 ----------
99 host : `str`, optional
100 Name of the host or system on which this run was produced.
101 Detailed form to be set by higher-level convention; from the
102 `Registry` perspective, this is an entirely opaque value.
103 timespan : `Timespan`, optional
104 Begin and end timestamps for the period over which the run was
105 produced. `None`/``NULL`` values are interpreted as infinite
106 bounds.
107 """
108 raise NotImplementedError()
110 @property
111 @abstractmethod
112 def host(self) -> Optional[str]:
113 """Return the name of the host or system on which this run was
114 produced (`str` or `None`).
115 """
116 raise NotImplementedError()
118 @property
119 @abstractmethod
120 def timespan(self) -> Timespan:
121 """Begin and end timestamps for the period over which the run was
122 produced. `None`/``NULL`` values are interpreted as infinite
123 bounds.
124 """
125 raise NotImplementedError()
128class ChainedCollectionRecord(CollectionRecord):
129 """A subclass of `CollectionRecord` that adds the list of child collections
130 in a ``CHAINED`` collection.
132 Parameters
133 ----------
134 key
135 Unique collection ID, can be the same as ``name`` if ``name`` is used
136 for identification. Usually this is an integer or string, but can be
137 other database-specific type.
138 name : `str`
139 Name of the collection.
140 """
142 def __init__(self, key: Any, name: str, universe: DimensionUniverse):
143 super().__init__(key=key, name=name, type=CollectionType.CHAINED)
144 self._children = CollectionSearch.fromExpression([])
146 @property
147 def children(self) -> CollectionSearch:
148 """The ordered search path of child collections that define this chain
149 (`CollectionSearch`).
150 """
151 return self._children
153 def update(self, manager: CollectionManager, children: CollectionSearch, flatten: bool) -> None:
154 """Redefine this chain to search the given child collections.
156 This method should be used by all external code to set children. It
157 delegates to `_update`, which is what should be overridden by
158 subclasses.
160 Parameters
161 ----------
162 manager : `CollectionManager`
163 The object that manages this records instance and all records
164 instances that may appear as its children.
165 children : `CollectionSearch`
166 A collection search path that should be resolved to set the child
167 collections of this chain.
168 flatten : `bool`
169 If `True`, recursively flatten out any nested
170 `~CollectionType.CHAINED` collections in ``children`` first.
172 Raises
173 ------
174 ValueError
175 Raised when the child collections contain a cycle.
176 """
177 for record in children.iter(manager, flattenChains=True, includeChains=True,
178 collectionTypes={CollectionType.CHAINED}):
179 if record == self:
180 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.")
181 if flatten:
182 children = CollectionSearch(
183 tuple(record.name for record in children.iter(manager, flattenChains=True))
184 )
185 self._update(manager, children)
186 self._children = children
188 def refresh(self, manager: CollectionManager) -> None:
189 """Load children from the database, using the given manager to resolve
190 collection primary key values into records.
192 This method exists to ensure that all collections that may appear in a
193 chain are known to the manager before any particular chain tries to
194 retrieve their records from it. `ChainedCollectionRecord` subclasses
195 can rely on it being called sometime after their own ``__init__`` to
196 finish construction.
198 Parameters
199 ----------
200 manager : `CollectionManager`
201 The object that manages this records instance and all records
202 instances that may appear as its children.
203 """
204 self._children = self._load(manager)
206 @abstractmethod
207 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None:
208 """Protected implementation hook for setting the `children` property.
210 This method should be implemented by subclasses to update the database
211 to reflect the children given. It should never be called by anything
212 other than the `children` setter, which should be used by all external
213 code.
215 Parameters
216 ----------
217 manager : `CollectionManager`
218 The object that manages this records instance and all records
219 instances that may appear as its children.
220 children : `CollectionSearch`
221 A collection search path that should be resolved to set the child
222 collections of this chain. Guaranteed not to contain cycles.
223 """
224 raise NotImplementedError()
226 @abstractmethod
227 def _load(self, manager: CollectionManager) -> CollectionSearch:
228 """Protected implementation hook for `refresh`.
230 This method should be implemented by subclasses to retrieve the chain's
231 child collections from the database and return them. It should never
232 be called by anything other than `refresh`, which should be used by all
233 external code.
235 Parameters
236 ----------
237 manager : `CollectionManager`
238 The object that manages this records instance and all records
239 instances that may appear as its children.
241 Returns
242 -------
243 children : `CollectionSearch`
244 The ordered sequence of collection names that defines the chained
245 collection. Guaranteed not to contain cycles.
246 """
247 raise NotImplementedError()
250class CollectionManager(VersionedExtension):
251 """An interface for managing the collections (including runs) in a
252 `Registry`.
254 Notes
255 -----
256 Each layer in a multi-layer `Registry` has its own record for any
257 collection for which it has datasets (or quanta). Different layers may
258 use different IDs for the same collection, so any usage of the IDs
259 obtained through the `CollectionManager` APIs are strictly for internal
260 (to `Registry`) use.
261 """
263 @classmethod
264 @abstractmethod
265 def initialize(cls, db: Database, context: StaticTablesContext, *,
266 dimensions: DimensionRecordStorageManager) -> CollectionManager:
267 """Construct an instance of the manager.
269 Parameters
270 ----------
271 db : `Database`
272 Interface to the underlying database engine and namespace.
273 context : `StaticTablesContext`
274 Context object obtained from `Database.declareStaticTables`; used
275 to declare any tables that should always be present in a layer
276 implemented with this manager.
277 dimensions : `DimensionRecordStorageManager`
278 Manager object for the dimensions in this `Registry`.
280 Returns
281 -------
282 manager : `CollectionManager`
283 An instance of a concrete `CollectionManager` subclass.
284 """
285 raise NotImplementedError()
287 @classmethod
288 @abstractmethod
289 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection",
290 onDelete: Optional[str] = None,
291 constraint: bool = True,
292 **kwargs: Any) -> ddl.FieldSpec:
293 """Add a foreign key (field and constraint) referencing the collection
294 table.
296 Parameters
297 ----------
298 tableSpec : `ddl.TableSpec`
299 Specification for the table that should reference the collection
300 table. Will be modified in place.
301 prefix: `str`, optional
302 A name to use for the prefix of the new field; the full name may
303 have a suffix (and is given in the returned `ddl.FieldSpec`).
304 onDelete: `str`, optional
305 One of "CASCADE" or "SET NULL", indicating what should happen to
306 the referencing row if the collection row is deleted. `None`
307 indicates that this should be an integrity error.
308 constraint: `bool`, optional
309 If `False` (`True` is default), add a field that can be joined to
310 the collection primary key, but do not add a foreign key
311 constraint.
312 **kwargs
313 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
314 constructor (only the ``name`` and ``dtype`` arguments are
315 otherwise provided).
317 Returns
318 -------
319 fieldSpec : `ddl.FieldSpec`
320 Specification for the field being added.
321 """
322 raise NotImplementedError()
324 @classmethod
325 @abstractmethod
326 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run",
327 onDelete: Optional[str] = None,
328 constraint: bool = True,
329 **kwargs: Any) -> ddl.FieldSpec:
330 """Add a foreign key (field and constraint) referencing the run
331 table.
333 Parameters
334 ----------
335 tableSpec : `ddl.TableSpec`
336 Specification for the table that should reference the run table.
337 Will be modified in place.
338 prefix: `str`, optional
339 A name to use for the prefix of the new field; the full name may
340 have a suffix (and is given in the returned `ddl.FieldSpec`).
341 onDelete: `str`, optional
342 One of "CASCADE" or "SET NULL", indicating what should happen to
343 the referencing row if the collection row is deleted. `None`
344 indicates that this should be an integrity error.
345 constraint: `bool`, optional
346 If `False` (`True` is default), add a field that can be joined to
347 the run primary key, but do not add a foreign key constraint.
348 **kwds
349 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
350 constructor (only the ``name`` and ``dtype`` arguments are
351 otherwise provided).
353 Returns
354 -------
355 fieldSpec : `ddl.FieldSpec`
356 Specification for the field being added.
357 """
358 raise NotImplementedError()
360 @classmethod
361 @abstractmethod
362 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
363 """Return the name of the field added by `addCollectionForeignKey`
364 if called with the same prefix.
366 Parameters
367 ----------
368 prefix : `str`
369 A name to use for the prefix of the new field; the full name may
370 have a suffix.
372 Returns
373 -------
374 name : `str`
375 The field name.
376 """
377 raise NotImplementedError()
379 @classmethod
380 @abstractmethod
381 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
382 """Return the name of the field added by `addRunForeignKey`
383 if called with the same prefix.
385 Parameters
386 ----------
387 prefix : `str`
388 A name to use for the prefix of the new field; the full name may
389 have a suffix.
391 Returns
392 -------
393 name : `str`
394 The field name.
395 """
396 raise NotImplementedError()
398 @abstractmethod
399 def refresh(self) -> None:
400 """Ensure all other operations on this manager are aware of any
401 collections that may have been registered by other clients since it
402 was initialized or last refreshed.
403 """
404 raise NotImplementedError()
406 @abstractmethod
407 def register(self, name: str, type: CollectionType, doc: Optional[str] = None) -> CollectionRecord:
408 """Ensure that a collection of the given name and type are present
409 in the layer this manager is associated with.
411 Parameters
412 ----------
413 name : `str`
414 Name of the collection.
415 type : `CollectionType`
416 Enumeration value indicating the type of collection.
417 doc : `str`, optional
418 Documentation string for the collection. Ignored if the collection
419 already exists.
421 Returns
422 -------
423 record : `CollectionRecord`
424 Object representing the collection, including its type and ID.
425 If ``type is CollectionType.RUN``, this will be a `RunRecord`
426 instance. If ``type is CollectionType.CHAIN``, this will be a
427 `ChainedCollectionRecord` instance.
429 Raises
430 ------
431 TransactionInterruption
432 Raised if this operation is invoked within a `Database.transaction`
433 context.
434 DatabaseConflictError
435 Raised if a collection with this name but a different type already
436 exists.
438 Notes
439 -----
440 Concurrent registrations of the same collection should be safe; nothing
441 should happen if the types are consistent, and integrity errors due to
442 inconsistent types should happen before any database changes are made.
443 """
444 raise NotImplementedError()
446 @abstractmethod
447 def remove(self, name: str) -> None:
448 """Completely remove a collection.
450 Any existing `CollectionRecord` objects that correspond to the removed
451 collection are considered invalidated.
453 Parameters
454 ----------
455 name : `str`
456 Name of the collection to remove.
458 Notes
459 -----
460 If this collection is referenced by foreign keys in tables managed by
461 other objects, the ON DELETE clauses of those tables will be invoked.
462 That will frequently delete many dependent rows automatically (via
463 "CASCADE", but it may also cause this operation to fail (with rollback)
464 unless dependent rows that do not have an ON DELETE clause are removed
465 first.
466 """
467 raise NotImplementedError()
469 @abstractmethod
470 def find(self, name: str) -> CollectionRecord:
471 """Return the collection record associated with the given name.
473 Parameters
474 ----------
475 name : `str`
476 Name of the collection.
478 Returns
479 -------
480 record : `CollectionRecord`
481 Object representing the collection, including its type and ID.
482 If ``record.type is CollectionType.RUN``, this will be a
483 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
484 this will be a `ChainedCollectionRecord` instance.
486 Raises
487 ------
488 MissingCollectionError
489 Raised if the given collection does not exist.
491 Notes
492 -----
493 Collections registered by another client of the same layer since the
494 last call to `initialize` or `refresh` may not be found.
495 """
496 raise NotImplementedError()
498 @abstractmethod
499 def __getitem__(self, key: Any) -> CollectionRecord:
500 """Return the collection record associated with the given
501 primary/foreign key value.
503 Parameters
504 ----------
505 key
506 Internal primary key value for the collection.
508 Returns
509 -------
510 record : `CollectionRecord`
511 Object representing the collection, including its type and name.
512 If ``record.type is CollectionType.RUN``, this will be a
513 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
514 this will be a `ChainedCollectionRecord` instance.
516 Raises
517 ------
518 MissingCollectionError
519 Raised if no collection with this key exists.
521 Notes
522 -----
523 Collections registered by another client of the same layer since the
524 last call to `initialize` or `refresh` may not be found.
525 """
526 raise NotImplementedError()
528 @abstractmethod
529 def __iter__(self) -> Iterator[CollectionRecord]:
530 """Iterate over all collections.
532 Yields
533 ------
534 record : `CollectionRecord`
535 The record for a managed collection.
536 """
537 raise NotImplementedError()
539 @abstractmethod
540 def getDocumentation(self, key: Any) -> Optional[str]:
541 """Retrieve the documentation string for a collection.
543 Parameters
544 ----------
545 key
546 Internal primary key value for the collection.
548 Returns
549 -------
550 docs : `str` or `None`
551 Docstring for the collection with the given key.
552 """
553 raise NotImplementedError()
555 @abstractmethod
556 def setDocumentation(self, key: Any, doc: Optional[str]) -> None:
557 """Set the documentation string for a collection.
559 Parameters
560 ----------
561 key
562 Internal primary key value for the collection.
563 docs : `str`, optional
564 Docstring for the collection with the given key.
565 """
566 raise NotImplementedError()