Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py : 59%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "ChainedCollectionRecord",
25 "CollectionManager",
26 "CollectionRecord",
27 "RunRecord",
28]
30from abc import abstractmethod
31from typing import (
32 Any,
33 Iterator,
34 Optional,
35 TYPE_CHECKING,
36)
38from ...core import ddl, DimensionUniverse, Timespan
39from ..wildcards import CollectionSearch
40from .._collectionType import CollectionType
41from ._versioning import VersionedExtension
43if TYPE_CHECKING: 43 ↛ 44line 43 didn't jump to line 44, because the condition on line 43 was never true
44 from ._database import Database, StaticTablesContext
45 from ._dimensions import DimensionRecordStorageManager
48class CollectionRecord:
49 """A struct used to represent a collection in internal `Registry` APIs.
51 User-facing code should always just use a `str` to represent collections.
53 Parameters
54 ----------
55 key
56 Unique collection ID, can be the same as ``name`` if ``name`` is used
57 for identification. Usually this is an integer or string, but can be
58 other database-specific type.
59 name : `str`
60 Name of the collection.
61 type : `CollectionType`
62 Enumeration value describing the type of the collection.
63 """
64 def __init__(self, key: Any, name: str, type: CollectionType):
65 self.key = key
66 self.name = name
67 self.type = type
68 assert isinstance(self.type, CollectionType)
70 name: str
71 """Name of the collection (`str`).
72 """
74 key: Any
75 """The primary/foreign key value for this collection.
76 """
78 type: CollectionType
79 """Enumeration value describing the type of the collection
80 (`CollectionType`).
81 """
84class RunRecord(CollectionRecord):
85 """A subclass of `CollectionRecord` that adds execution information and
86 an interface for updating it.
87 """
89 @abstractmethod
90 def update(self, host: Optional[str] = None,
91 timespan: Optional[Timespan] = None) -> None:
92 """Update the database record for this run with new execution
93 information.
95 Values not provided will set to ``NULL`` in the database, not ignored.
97 Parameters
98 ----------
99 host : `str`, optional
100 Name of the host or system on which this run was produced.
101 Detailed form to be set by higher-level convention; from the
102 `Registry` perspective, this is an entirely opaque value.
103 timespan : `Timespan`, optional
104 Begin and end timestamps for the period over which the run was
105 produced. `None`/``NULL`` values are interpreted as infinite
106 bounds.
107 """
108 raise NotImplementedError()
110 @property
111 @abstractmethod
112 def host(self) -> Optional[str]:
113 """Return the name of the host or system on which this run was
114 produced (`str` or `None`).
115 """
116 raise NotImplementedError()
118 @property
119 @abstractmethod
120 def timespan(self) -> Timespan:
121 """Begin and end timestamps for the period over which the run was
122 produced. `None`/``NULL`` values are interpreted as infinite
123 bounds.
124 """
125 raise NotImplementedError()
128class ChainedCollectionRecord(CollectionRecord):
129 """A subclass of `CollectionRecord` that adds the list of child collections
130 in a ``CHAINED`` collection.
132 Parameters
133 ----------
134 key
135 Unique collection ID, can be the same as ``name`` if ``name`` is used
136 for identification. Usually this is an integer or string, but can be
137 other database-specific type.
138 name : `str`
139 Name of the collection.
140 """
142 def __init__(self, key: Any, name: str, universe: DimensionUniverse):
143 super().__init__(key=key, name=name, type=CollectionType.CHAINED)
144 self._children = CollectionSearch.fromExpression([])
146 @property
147 def children(self) -> CollectionSearch:
148 """The ordered search path of child collections that define this chain
149 (`CollectionSearch`).
150 """
151 return self._children
153 def update(self, manager: CollectionManager, children: CollectionSearch) -> None:
154 """Redefine this chain to search the given child collections.
156 This method should be used by all external code to set children. It
157 delegates to `_update`, which is what should be overridden by
158 subclasses.
160 Parameters
161 ----------
162 manager : `CollectionManager`
163 The object that manages this records instance and all records
164 instances that may appear as its children.
165 children : `CollectionSearch`
166 A collection search path that should be resolved to set the child
167 collections of this chain.
169 Raises
170 ------
171 ValueError
172 Raised when the child collections contain a cycle.
173 """
174 for record in children.iter(manager, flattenChains=True, includeChains=True,
175 collectionTypes={CollectionType.CHAINED}):
176 if record == self:
177 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.")
178 self._update(manager, children)
179 self._children = children
181 def refresh(self, manager: CollectionManager) -> None:
182 """Load children from the database, using the given manager to resolve
183 collection primary key values into records.
185 This method exists to ensure that all collections that may appear in a
186 chain are known to the manager before any particular chain tries to
187 retrieve their records from it. `ChainedCollectionRecord` subclasses
188 can rely on it being called sometime after their own ``__init__`` to
189 finish construction.
191 Parameters
192 ----------
193 manager : `CollectionManager`
194 The object that manages this records instance and all records
195 instances that may appear as its children.
196 """
197 self._children = self._load(manager)
199 @abstractmethod
200 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None:
201 """Protected implementation hook for setting the `children` property.
203 This method should be implemented by subclasses to update the database
204 to reflect the children given. It should never be called by anything
205 other than the `children` setter, which should be used by all external
206 code.
208 Parameters
209 ----------
210 manager : `CollectionManager`
211 The object that manages this records instance and all records
212 instances that may appear as its children.
213 children : `CollectionSearch`
214 A collection search path that should be resolved to set the child
215 collections of this chain. Guaranteed not to contain cycles.
216 """
217 raise NotImplementedError()
219 @abstractmethod
220 def _load(self, manager: CollectionManager) -> CollectionSearch:
221 """Protected implementation hook for `refresh`.
223 This method should be implemented by subclasses to retrieve the chain's
224 child collections from the database and return them. It should never
225 be called by anything other than `refresh`, which should be used by all
226 external code.
228 Parameters
229 ----------
230 manager : `CollectionManager`
231 The object that manages this records instance and all records
232 instances that may appear as its children.
234 Returns
235 -------
236 children : `CollectionSearch`
237 The ordered sequence of collection names that defines the chained
238 collection. Guaranteed not to contain cycles.
239 """
240 raise NotImplementedError()
243class CollectionManager(VersionedExtension):
244 """An interface for managing the collections (including runs) in a
245 `Registry`.
247 Notes
248 -----
249 Each layer in a multi-layer `Registry` has its own record for any
250 collection for which it has datasets (or quanta). Different layers may
251 use different IDs for the same collection, so any usage of the IDs
252 obtained through the `CollectionManager` APIs are strictly for internal
253 (to `Registry`) use.
254 """
256 @classmethod
257 @abstractmethod
258 def initialize(cls, db: Database, context: StaticTablesContext, *,
259 dimensions: DimensionRecordStorageManager) -> CollectionManager:
260 """Construct an instance of the manager.
262 Parameters
263 ----------
264 db : `Database`
265 Interface to the underlying database engine and namespace.
266 context : `StaticTablesContext`
267 Context object obtained from `Database.declareStaticTables`; used
268 to declare any tables that should always be present in a layer
269 implemented with this manager.
270 dimensions : `DimensionRecordStorageManager`
271 Manager object for the dimensions in this `Registry`.
273 Returns
274 -------
275 manager : `CollectionManager`
276 An instance of a concrete `CollectionManager` subclass.
277 """
278 raise NotImplementedError()
280 @classmethod
281 @abstractmethod
282 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection",
283 onDelete: Optional[str] = None,
284 constraint: bool = True,
285 **kwargs: Any) -> ddl.FieldSpec:
286 """Add a foreign key (field and constraint) referencing the collection
287 table.
289 Parameters
290 ----------
291 tableSpec : `ddl.TableSpec`
292 Specification for the table that should reference the collection
293 table. Will be modified in place.
294 prefix: `str`, optional
295 A name to use for the prefix of the new field; the full name may
296 have a suffix (and is given in the returned `ddl.FieldSpec`).
297 onDelete: `str`, optional
298 One of "CASCADE" or "SET NULL", indicating what should happen to
299 the referencing row if the collection row is deleted. `None`
300 indicates that this should be an integrity error.
301 constraint: `bool`, optional
302 If `False` (`True` is default), add a field that can be joined to
303 the collection primary key, but do not add a foreign key
304 constraint.
305 **kwargs
306 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
307 constructor (only the ``name`` and ``dtype`` arguments are
308 otherwise provided).
310 Returns
311 -------
312 fieldSpec : `ddl.FieldSpec`
313 Specification for the field being added.
314 """
315 raise NotImplementedError()
317 @classmethod
318 @abstractmethod
319 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run",
320 onDelete: Optional[str] = None,
321 constraint: bool = True,
322 **kwargs: Any) -> ddl.FieldSpec:
323 """Add a foreign key (field and constraint) referencing the run
324 table.
326 Parameters
327 ----------
328 tableSpec : `ddl.TableSpec`
329 Specification for the table that should reference the run table.
330 Will be modified in place.
331 prefix: `str`, optional
332 A name to use for the prefix of the new field; the full name may
333 have a suffix (and is given in the returned `ddl.FieldSpec`).
334 onDelete: `str`, optional
335 One of "CASCADE" or "SET NULL", indicating what should happen to
336 the referencing row if the collection row is deleted. `None`
337 indicates that this should be an integrity error.
338 constraint: `bool`, optional
339 If `False` (`True` is default), add a field that can be joined to
340 the run primary key, but do not add a foreign key constraint.
341 **kwds
342 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
343 constructor (only the ``name`` and ``dtype`` arguments are
344 otherwise provided).
346 Returns
347 -------
348 fieldSpec : `ddl.FieldSpec`
349 Specification for the field being added.
350 """
351 raise NotImplementedError()
353 @classmethod
354 @abstractmethod
355 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
356 """Return the name of the field added by `addCollectionForeignKey`
357 if called with the same prefix.
359 Parameters
360 ----------
361 prefix : `str`
362 A name to use for the prefix of the new field; the full name may
363 have a suffix.
365 Returns
366 -------
367 name : `str`
368 The field name.
369 """
370 raise NotImplementedError()
372 @classmethod
373 @abstractmethod
374 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
375 """Return the name of the field added by `addRunForeignKey`
376 if called with the same prefix.
378 Parameters
379 ----------
380 prefix : `str`
381 A name to use for the prefix of the new field; the full name may
382 have a suffix.
384 Returns
385 -------
386 name : `str`
387 The field name.
388 """
389 raise NotImplementedError()
391 @abstractmethod
392 def refresh(self) -> None:
393 """Ensure all other operations on this manager are aware of any
394 collections that may have been registered by other clients since it
395 was initialized or last refreshed.
396 """
397 raise NotImplementedError()
399 @abstractmethod
400 def register(self, name: str, type: CollectionType, doc: Optional[str] = None) -> CollectionRecord:
401 """Ensure that a collection of the given name and type are present
402 in the layer this manager is associated with.
404 Parameters
405 ----------
406 name : `str`
407 Name of the collection.
408 type : `CollectionType`
409 Enumeration value indicating the type of collection.
410 doc : `str`, optional
411 Documentation string for the collection. Ignored if the collection
412 already exists.
414 Returns
415 -------
416 record : `CollectionRecord`
417 Object representing the collection, including its type and ID.
418 If ``type is CollectionType.RUN``, this will be a `RunRecord`
419 instance. If ``type is CollectionType.CHAIN``, this will be a
420 `ChainedCollectionRecord` instance.
422 Raises
423 ------
424 TransactionInterruption
425 Raised if this operation is invoked within a `Database.transaction`
426 context.
427 DatabaseConflictError
428 Raised if a collection with this name but a different type already
429 exists.
431 Notes
432 -----
433 Concurrent registrations of the same collection should be safe; nothing
434 should happen if the types are consistent, and integrity errors due to
435 inconsistent types should happen before any database changes are made.
436 """
437 raise NotImplementedError()
439 @abstractmethod
440 def remove(self, name: str) -> None:
441 """Completely remove a collection.
443 Any existing `CollectionRecord` objects that correspond to the removed
444 collection are considered invalidated.
446 Parameters
447 ----------
448 name : `str`
449 Name of the collection to remove.
451 Notes
452 -----
453 If this collection is referenced by foreign keys in tables managed by
454 other objects, the ON DELETE clauses of those tables will be invoked.
455 That will frequently delete many dependent rows automatically (via
456 "CASCADE", but it may also cause this operation to fail (with rollback)
457 unless dependent rows that do not have an ON DELETE clause are removed
458 first.
459 """
460 raise NotImplementedError()
462 @abstractmethod
463 def find(self, name: str) -> CollectionRecord:
464 """Return the collection record associated with the given name.
466 Parameters
467 ----------
468 name : `str`
469 Name of the collection.
471 Returns
472 -------
473 record : `CollectionRecord`
474 Object representing the collection, including its type and ID.
475 If ``record.type is CollectionType.RUN``, this will be a
476 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
477 this will be a `ChainedCollectionRecord` instance.
479 Raises
480 ------
481 MissingCollectionError
482 Raised if the given collection does not exist.
484 Notes
485 -----
486 Collections registered by another client of the same layer since the
487 last call to `initialize` or `refresh` may not be found.
488 """
489 raise NotImplementedError()
491 @abstractmethod
492 def __getitem__(self, key: Any) -> CollectionRecord:
493 """Return the collection record associated with the given
494 primary/foreign key value.
496 Parameters
497 ----------
498 key
499 Internal primary key value for the collection.
501 Returns
502 -------
503 record : `CollectionRecord`
504 Object representing the collection, including its type and name.
505 If ``record.type is CollectionType.RUN``, this will be a
506 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
507 this will be a `ChainedCollectionRecord` instance.
509 Raises
510 ------
511 MissingCollectionError
512 Raised if no collection with this key exists.
514 Notes
515 -----
516 Collections registered by another client of the same layer since the
517 last call to `initialize` or `refresh` may not be found.
518 """
519 raise NotImplementedError()
521 @abstractmethod
522 def __iter__(self) -> Iterator[CollectionRecord]:
523 """Iterate over all collections.
525 Yields
526 ------
527 record : `CollectionRecord`
528 The record for a managed collection.
529 """
530 raise NotImplementedError()
532 @abstractmethod
533 def getDocumentation(self, key: Any) -> Optional[str]:
534 """Retrieve the documentation string for a collection.
536 Parameters
537 ----------
538 key
539 Internal primary key value for the collection.
541 Returns
542 -------
543 docs : `str` or `None`
544 Docstring for the collection with the given key.
545 """
546 raise NotImplementedError()
548 @abstractmethod
549 def setDocumentation(self, key: Any, doc: Optional[str]) -> None:
550 """Set the documentation string for a collection.
552 Parameters
553 ----------
554 key
555 Internal primary key value for the collection.
556 docs : `str`, optional
557 Docstring for the collection with the given key.
558 """
559 raise NotImplementedError()