Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py : 60%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "ChainedCollectionRecord",
25 "CollectionManager",
26 "CollectionRecord",
27 "MissingCollectionError",
28 "RunRecord",
29]
31from abc import abstractmethod
32from typing import (
33 Any,
34 Iterator,
35 Optional,
36 TYPE_CHECKING,
37)
39import astropy.time
41from ...core import ddl, Timespan
42from ..wildcards import CollectionSearch
43from .._collectionType import CollectionType
44from ._versioning import VersionedExtension
46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true
47 from ._database import Database, StaticTablesContext
50class MissingCollectionError(Exception):
51 """Exception raised when an operation attempts to use a collection that
52 does not exist.
53 """
56class CollectionRecord:
57 """A struct used to represent a collection in internal `Registry` APIs.
59 User-facing code should always just use a `str` to represent collections.
61 Parameters
62 ----------
63 key
64 Unique collection ID, can be the same as ``name`` if ``name`` is used
65 for identification. Usually this is an integer or string, but can be
66 other database-specific type.
67 name : `str`
68 Name of the collection.
69 type : `CollectionType`
70 Enumeration value describing the type of the collection.
71 """
72 def __init__(self, key: Any, name: str, type: CollectionType):
73 self.key = key
74 self.name = name
75 self.type = type
76 assert isinstance(self.type, CollectionType)
78 name: str
79 """Name of the collection (`str`).
80 """
82 key: Any
83 """The primary/foreign key value for this collection.
84 """
86 type: CollectionType
87 """Enumeration value describing the type of the collection
88 (`CollectionType`).
89 """
92class RunRecord(CollectionRecord):
93 """A subclass of `CollectionRecord` that adds execution information and
94 an interface for updating it.
95 """
97 @abstractmethod
98 def update(self, host: Optional[str] = None,
99 timespan: Optional[Timespan[astropy.time.Time]] = None) -> None:
100 """Update the database record for this run with new execution
101 information.
103 Values not provided will set to ``NULL`` in the database, not ignored.
105 Parameters
106 ----------
107 host : `str`, optional
108 Name of the host or system on which this run was produced.
109 Detailed form to be set by higher-level convention; from the
110 `Registry` perspective, this is an entirely opaque value.
111 timespan : `Timespan`, optional
112 Begin and end timestamps for the period over which the run was
113 produced. `None`/``NULL`` values are interpreted as infinite
114 bounds.
115 """
116 raise NotImplementedError()
118 @property
119 @abstractmethod
120 def host(self) -> Optional[str]:
121 """Return the name of the host or system on which this run was
122 produced (`str` or `None`).
123 """
124 raise NotImplementedError()
126 @property
127 @abstractmethod
128 def timespan(self) -> Timespan[astropy.time.Time]:
129 """Begin and end timestamps for the period over which the run was
130 produced. `None`/``NULL`` values are interpreted as infinite
131 bounds.
132 """
133 raise NotImplementedError()
136class ChainedCollectionRecord(CollectionRecord):
137 """A subclass of `CollectionRecord` that adds the list of child collections
138 in a ``CHAINED`` collection.
140 Parameters
141 ----------
142 key
143 Unique collection ID, can be the same as ``name`` if ``name`` is used
144 for identification. Usually this is an integer or string, but can be
145 other database-specific type.
146 name : `str`
147 Name of the collection.
148 """
150 def __init__(self, key: Any, name: str):
151 super().__init__(key=key, name=name, type=CollectionType.CHAINED)
152 self._children = CollectionSearch.fromExpression([])
154 @property
155 def children(self) -> CollectionSearch:
156 """The ordered search path of child collections that define this chain
157 (`CollectionSearch`).
158 """
159 return self._children
161 def update(self, manager: CollectionManager, children: CollectionSearch) -> None:
162 """Redefine this chain to search the given child collections.
164 This method should be used by all external code to set children. It
165 delegates to `_update`, which is what should be overridden by
166 subclasses.
168 Parameters
169 ----------
170 manager : `CollectionManager`
171 The object that manages this records instance and all records
172 instances that may appear as its children.
173 children : `CollectionSearch`
174 A collection search path that should be resolved to set the child
175 collections of this chain.
177 Raises
178 ------
179 ValueError
180 Raised when the child collections contain a cycle.
181 """
182 for record in children.iter(manager, flattenChains=True, includeChains=True,
183 collectionType=CollectionType.CHAINED):
184 if record == self:
185 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.")
186 self._update(manager, children)
187 self._children = children
189 def refresh(self, manager: CollectionManager) -> None:
190 """Load children from the database, using the given manager to resolve
191 collection primary key values into records.
193 This method exists to ensure that all collections that may appear in a
194 chain are known to the manager before any particular chain tries to
195 retrieve their records from it. `ChainedCollectionRecord` subclasses
196 can rely on it being called sometime after their own ``__init__`` to
197 finish construction.
199 Parameters
200 ----------
201 manager : `CollectionManager`
202 The object that manages this records instance and all records
203 instances that may appear as its children.
204 """
205 self._children = self._load(manager)
207 @abstractmethod
208 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None:
209 """Protected implementation hook for setting the `children` property.
211 This method should be implemented by subclasses to update the database
212 to reflect the children given. It should never be called by anything
213 other than the `children` setter, which should be used by all external
214 code.
216 Parameters
217 ----------
218 manager : `CollectionManager`
219 The object that manages this records instance and all records
220 instances that may appear as its children.
221 children : `CollectionSearch`
222 A collection search path that should be resolved to set the child
223 collections of this chain. Guaranteed not to contain cycles.
224 """
225 raise NotImplementedError()
227 @abstractmethod
228 def _load(self, manager: CollectionManager) -> CollectionSearch:
229 """Protected implementation hook for `refresh`.
231 This method should be implemented by subclasses to retrieve the chain's
232 child collections from the database and return them. It should never
233 be called by anything other than `refresh`, which should be used by all
234 external code.
236 Parameters
237 ----------
238 manager : `CollectionManager`
239 The object that manages this records instance and all records
240 instances that may appear as its children.
241 """
242 raise NotImplementedError()
245class CollectionManager(VersionedExtension):
246 """An interface for managing the collections (including runs) in a
247 `Registry`.
249 Notes
250 -----
251 Each layer in a multi-layer `Registry` has its own record for any
252 collection for which it has datasets (or quanta). Different layers may
253 use different IDs for the same collection, so any usage of the IDs
254 obtained through the `CollectionManager` APIs are strictly for internal
255 (to `Registry`) use.
256 """
258 @classmethod
259 @abstractmethod
260 def initialize(cls, db: Database, context: StaticTablesContext) -> CollectionManager:
261 """Construct an instance of the manager.
263 Parameters
264 ----------
265 db : `Database`
266 Interface to the underlying database engine and namespace.
267 context : `StaticTablesContext`
268 Context object obtained from `Database.declareStaticTables`; used
269 to declare any tables that should always be present in a layer
270 implemented with this manager.
272 Returns
273 -------
274 manager : `CollectionManager`
275 An instance of a concrete `CollectionManager` subclass.
276 """
277 raise NotImplementedError()
279 @classmethod
280 @abstractmethod
281 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection",
282 onDelete: Optional[str] = None,
283 constraint: bool = True,
284 **kwargs: Any) -> ddl.FieldSpec:
285 """Add a foreign key (field and constraint) referencing the collection
286 table.
288 Parameters
289 ----------
290 tableSpec : `ddl.TableSpec`
291 Specification for the table that should reference the collection
292 table. Will be modified in place.
293 prefix: `str`, optional
294 A name to use for the prefix of the new field; the full name may
295 have a suffix (and is given in the returned `ddl.FieldSpec`).
296 onDelete: `str`, optional
297 One of "CASCADE" or "SET NULL", indicating what should happen to
298 the referencing row if the collection row is deleted. `None`
299 indicates that this should be an integrity error.
300 constraint: `bool`, optional
301 If `False` (`True` is default), add a field that can be joined to
302 the collection primary key, but do not add a foreign key
303 constraint.
304 **kwargs
305 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
306 constructor (only the ``name`` and ``dtype`` arguments are
307 otherwise provided).
309 Returns
310 -------
311 fieldSpec : `ddl.FieldSpec`
312 Specification for the field being added.
313 """
314 raise NotImplementedError()
316 @classmethod
317 @abstractmethod
318 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run",
319 onDelete: Optional[str] = None,
320 constraint: bool = True,
321 **kwargs: Any) -> ddl.FieldSpec:
322 """Add a foreign key (field and constraint) referencing the run
323 table.
325 Parameters
326 ----------
327 tableSpec : `ddl.TableSpec`
328 Specification for the table that should reference the run table.
329 Will be modified in place.
330 prefix: `str`, optional
331 A name to use for the prefix of the new field; the full name may
332 have a suffix (and is given in the returned `ddl.FieldSpec`).
333 onDelete: `str`, optional
334 One of "CASCADE" or "SET NULL", indicating what should happen to
335 the referencing row if the collection row is deleted. `None`
336 indicates that this should be an integrity error.
337 constraint: `bool`, optional
338 If `False` (`True` is default), add a field that can be joined to
339 the run primary key, but do not add a foreign key constraint.
340 **kwds
341 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
342 constructor (only the ``name`` and ``dtype`` arguments are
343 otherwise provided).
345 Returns
346 -------
347 fieldSpec : `ddl.FieldSpec`
348 Specification for the field being added.
349 """
350 raise NotImplementedError()
352 @classmethod
353 @abstractmethod
354 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
355 """Return the name of the field added by `addCollectionForeignKey`
356 if called with the same prefix.
358 Parameters
359 ----------
360 prefix : `str`
361 A name to use for the prefix of the new field; the full name may
362 have a suffix.
364 Returns
365 -------
366 name : `str`
367 The field name.
368 """
369 raise NotImplementedError()
371 @classmethod
372 @abstractmethod
373 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
374 """Return the name of the field added by `addRunForeignKey`
375 if called with the same prefix.
377 Parameters
378 ----------
379 prefix : `str`
380 A name to use for the prefix of the new field; the full name may
381 have a suffix.
383 Returns
384 -------
385 name : `str`
386 The field name.
387 """
388 raise NotImplementedError()
390 @abstractmethod
391 def refresh(self) -> None:
392 """Ensure all other operations on this manager are aware of any
393 collections that may have been registered by other clients since it
394 was initialized or last refreshed.
395 """
396 raise NotImplementedError()
398 @abstractmethod
399 def register(self, name: str, type: CollectionType) -> CollectionRecord:
400 """Ensure that a collection of the given name and type are present
401 in the layer this manager is associated with.
403 Parameters
404 ----------
405 name : `str`
406 Name of the collection.
407 type : `CollectionType`
408 Enumeration value indicating the type of collection.
410 Returns
411 -------
412 record : `CollectionRecord`
413 Object representing the collection, including its type and ID.
414 If ``type is CollectionType.RUN``, this will be a `RunRecord`
415 instance. If ``type is CollectionType.CHAIN``, this will be a
416 `ChainedCollectionRecord` instance.
418 Raises
419 ------
420 TransactionInterruption
421 Raised if this operation is invoked within a `Database.transaction`
422 context.
423 DatabaseConflictError
424 Raised if a collection with this name but a different type already
425 exists.
427 Notes
428 -----
429 Concurrent registrations of the same collection should be safe; nothing
430 should happen if the types are consistent, and integrity errors due to
431 inconsistent types should happen before any database changes are made.
432 """
433 raise NotImplementedError()
435 @abstractmethod
436 def remove(self, name: str) -> None:
437 """Completely remove a collection.
439 Any existing `CollectionRecord` objects that correspond to the removed
440 collection are considered invalidated.
442 Parameters
443 ----------
444 name : `str`
445 Name of the collection to remove.
447 Notes
448 -----
449 If this collection is referenced by foreign keys in tables managed by
450 other objects, the ON DELETE clauses of those tables will be invoked.
451 That will frequently delete many dependent rows automatically (via
452 "CASCADE", but it may also cause this operation to fail (with rollback)
453 unless dependent rows that do not have an ON DELETE clause are removed
454 first.
455 """
456 raise NotImplementedError()
458 @abstractmethod
459 def find(self, name: str) -> CollectionRecord:
460 """Return the collection record associated with the given name.
462 Parameters
463 ----------
464 name : `str`
465 Name of the collection.
467 Returns
468 -------
469 record : `CollectionRecord`
470 Object representing the collection, including its type and ID.
471 If ``record.type is CollectionType.RUN``, this will be a
472 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
473 this will be a `ChainedCollectionRecord` instance.
475 Raises
476 ------
477 MissingCollectionError
478 Raised if the given collection does not exist.
480 Notes
481 -----
482 Collections registered by another client of the same layer since the
483 last call to `initialize` or `refresh` may not be found.
484 """
485 raise NotImplementedError()
487 @abstractmethod
488 def __getitem__(self, key: Any) -> CollectionRecord:
489 """Return the collection record associated with the given
490 primary/foreign key value.
492 Parameters
493 ----------
494 key
495 Internal primary key value for the collection.
497 Returns
498 -------
499 record : `CollectionRecord`
500 Object representing the collection, including its type and name.
501 If ``record.type is CollectionType.RUN``, this will be a
502 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
503 this will be a `ChainedCollectionRecord` instance.
505 Raises
506 ------
507 MissingCollectionError
508 Raised if no collection with this key exists.
510 Notes
511 -----
512 Collections registered by another client of the same layer since the
513 last call to `initialize` or `refresh` may not be found.
514 """
515 raise NotImplementedError()
517 @abstractmethod
518 def __iter__(self) -> Iterator[CollectionRecord]:
519 """Iterate over all collections.
521 Yields
522 ------
523 record : `CollectionRecord`
524 The record for a managed collection.
525 """
526 raise NotImplementedError()