Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py : 60%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "ChainedCollectionRecord",
25 "CollectionManager",
26 "CollectionRecord",
27 "MissingCollectionError",
28 "RunRecord",
29]
31from abc import abstractmethod
32from typing import (
33 Any,
34 Iterator,
35 Optional,
36 TYPE_CHECKING,
37)
39import astropy.time
41from ...core import ddl, Timespan
42from ..wildcards import CollectionSearch
43from .._collectionType import CollectionType
44from ._versioning import VersionedExtension
46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true
47 from ._database import Database, StaticTablesContext
50class MissingCollectionError(Exception):
51 """Exception raised when an operation attempts to use a collection that
52 does not exist.
53 """
56class CollectionRecord:
57 """A struct used to represent a collection in internal `Registry` APIs.
59 User-facing code should always just use a `str` to represent collections.
61 Parameters
62 ----------
63 key
64 Unique collection ID, can be the same as ``name`` if ``name`` is used
65 for identification. Usually this is an integer or string, but can be
66 other database-specific type.
67 name : `str`
68 Name of the collection.
69 type : `CollectionType`
70 Enumeration value describing the type of the collection.
71 """
72 def __init__(self, key: Any, name: str, type: CollectionType):
73 self.key = key
74 self.name = name
75 self.type = type
76 assert isinstance(self.type, CollectionType)
78 name: str
79 """Name of the collection (`str`).
80 """
82 key: Any
83 """The primary/foreign key value for this collection.
84 """
86 type: CollectionType
87 """Enumeration value describing the type of the collection
88 (`CollectionType`).
89 """
92class RunRecord(CollectionRecord):
93 """A subclass of `CollectionRecord` that adds execution information and
94 an interface for updating it.
95 """
97 @abstractmethod
98 def update(self, host: Optional[str] = None,
99 timespan: Optional[Timespan[astropy.time.Time]] = None) -> None:
100 """Update the database record for this run with new execution
101 information.
103 Values not provided will set to ``NULL`` in the database, not ignored.
105 Parameters
106 ----------
107 host : `str`, optional
108 Name of the host or system on which this run was produced.
109 Detailed form to be set by higher-level convention; from the
110 `Registry` perspective, this is an entirely opaque value.
111 timespan : `Timespan`, optional
112 Begin and end timestamps for the period over which the run was
113 produced. `None`/``NULL`` values are interpreted as infinite
114 bounds.
115 """
116 raise NotImplementedError()
118 @property
119 @abstractmethod
120 def host(self) -> Optional[str]:
121 """Return the name of the host or system on which this run was
122 produced (`str` or `None`).
123 """
124 raise NotImplementedError()
126 @property
127 @abstractmethod
128 def timespan(self) -> Timespan[astropy.time.Time]:
129 """Begin and end timestamps for the period over which the run was
130 produced. `None`/``NULL`` values are interpreted as infinite
131 bounds.
132 """
133 raise NotImplementedError()
136class ChainedCollectionRecord(CollectionRecord):
137 """A subclass of `CollectionRecord` that adds the list of child collections
138 in a ``CHAINED`` collection.
140 Parameters
141 ----------
142 key
143 Unique collection ID, can be the same as ``name`` if ``name`` is used
144 for identification. Usually this is an integer or string, but can be
145 other database-specific type.
146 name : `str`
147 Name of the collection.
148 """
150 def __init__(self, key: Any, name: str):
151 super().__init__(key=key, name=name, type=CollectionType.CHAINED)
152 self._children = CollectionSearch.fromExpression([])
154 @property
155 def children(self) -> CollectionSearch:
156 """The ordered search path of child collections that define this chain
157 (`CollectionSearch`).
158 """
159 return self._children
161 def update(self, manager: CollectionManager, children: CollectionSearch) -> None:
162 """Redefine this chain to search the given child collections.
164 This method should be used by all external code to set children. It
165 delegates to `_update`, which is what should be overridden by
166 subclasses.
168 Parameters
169 ----------
170 manager : `CollectionManager`
171 The object that manages this records instance and all records
172 instances that may appear as its children.
173 children : `CollectionSearch`
174 A collection search path that should be resolved to set the child
175 collections of this chain.
177 Raises
178 ------
179 ValueError
180 Raised when the child collections contain a cycle.
181 """
182 for record in children.iter(manager, flattenChains=True, includeChains=True,
183 collectionType=CollectionType.CHAINED):
184 if record == self:
185 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.")
186 self._update(manager, children)
187 self._children = children
189 def refresh(self, manager: CollectionManager) -> None:
190 """Load children from the database, using the given manager to resolve
191 collection primary key values into records.
193 This method exists to ensure that all collections that may appear in a
194 chain are known to the manager before any particular chain tries to
195 retrieve their records from it. `ChainedCollectionRecord` subclasses
196 can rely on it being called sometime after their own ``__init__`` to
197 finish construction.
199 Parameters
200 ----------
201 manager : `CollectionManager`
202 The object that manages this records instance and all records
203 instances that may appear as its children.
204 """
205 self._children = self._load(manager)
207 @abstractmethod
208 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None:
209 """Protected implementation hook for setting the `children` property.
211 This method should be implemented by subclasses to update the database
212 to reflect the children given. It should never be called by anything
213 other than the `children` setter, which should be used by all external
214 code.
216 Parameters
217 ----------
218 manager : `CollectionManager`
219 The object that manages this records instance and all records
220 instances that may appear as its children.
221 children : `CollectionSearch`
222 A collection search path that should be resolved to set the child
223 collections of this chain. Guaranteed not to contain cycles.
224 """
225 raise NotImplementedError()
227 @abstractmethod
228 def _load(self, manager: CollectionManager) -> CollectionSearch:
229 """Protected implementation hook for `refresh`.
231 This method should be implemented by subclasses to retrieve the chain's
232 child collections from the database and return them. It should never
233 be called by anything other than `refresh`, which should be used by all
234 external code.
236 Parameters
237 ----------
238 manager : `CollectionManager`
239 The object that manages this records instance and all records
240 instances that may appear as its children.
241 """
242 raise NotImplementedError()
245class CollectionManager(VersionedExtension):
246 """An interface for managing the collections (including runs) in a
247 `Registry`.
249 Notes
250 -----
251 Each layer in a multi-layer `Registry` has its own record for any
252 collection for which it has datasets (or quanta). Different layers may
253 use different IDs for the same collection, so any usage of the IDs
254 obtained through the `CollectionManager` APIs are strictly for internal
255 (to `Registry`) use.
256 """
258 @classmethod
259 @abstractmethod
260 def initialize(cls, db: Database, context: StaticTablesContext) -> CollectionManager:
261 """Construct an instance of the manager.
263 Parameters
264 ----------
265 db : `Database`
266 Interface to the underlying database engine and namespace.
267 context : `StaticTablesContext`
268 Context object obtained from `Database.declareStaticTables`; used
269 to declare any tables that should always be present in a layer
270 implemented with this manager.
272 Returns
273 -------
274 manager : `CollectionManager`
275 An instance of a concrete `CollectionManager` subclass.
276 """
277 raise NotImplementedError()
279 @classmethod
280 @abstractmethod
281 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection",
282 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec:
283 """Add a foreign key (field and constraint) referencing the collection
284 table.
286 Parameters
287 ----------
288 tableSpec : `ddl.TableSpec`
289 Specification for the table that should reference the collection
290 table. Will be modified in place.
291 prefix: `str`, optional
292 A name to use for the prefix of the new field; the full name may
293 have a suffix (and is given in the returned `ddl.FieldSpec`).
294 onDelete: `str`, optional
295 One of "CASCADE" or "SET NULL", indicating what should happen to
296 the referencing row if the collection row is deleted. `None`
297 indicates that this should be an integrity error.
298 **kwds
299 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
300 constructor (only the ``name`` and ``dtype`` arguments are
301 otherwise provided).
303 Returns
304 -------
305 fieldSpec : `ddl.FieldSpec`
306 Specification for the field being added.
307 """
308 raise NotImplementedError()
310 @classmethod
311 @abstractmethod
312 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run",
313 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec:
314 """Add a foreign key (field and constraint) referencing the run
315 table.
317 Parameters
318 ----------
319 tableSpec : `ddl.TableSpec`
320 Specification for the table that should reference the run table.
321 Will be modified in place.
322 prefix: `str`, optional
323 A name to use for the prefix of the new field; the full name may
324 have a suffix (and is given in the returned `ddl.FieldSpec`).
325 onDelete: `str`, optional
326 One of "CASCADE" or "SET NULL", indicating what should happen to
327 the referencing row if the collection row is deleted. `None`
328 indicates that this should be an integrity error.
329 **kwds
330 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
331 constructor (only the ``name`` and ``dtype`` arguments are
332 otherwise provided).
334 Returns
335 -------
336 fieldSpec : `ddl.FieldSpec`
337 Specification for the field being added.
338 """
339 raise NotImplementedError()
341 @classmethod
342 @abstractmethod
343 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
344 """Return the name of the field added by `addCollectionForeignKey`
345 if called with the same prefix.
347 Parameters
348 ----------
349 prefix : `str`
350 A name to use for the prefix of the new field; the full name may
351 have a suffix.
353 Returns
354 -------
355 name : `str`
356 The field name.
357 """
358 raise NotImplementedError()
360 @classmethod
361 @abstractmethod
362 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
363 """Return the name of the field added by `addRunForeignKey`
364 if called with the same prefix.
366 Parameters
367 ----------
368 prefix : `str`
369 A name to use for the prefix of the new field; the full name may
370 have a suffix.
372 Returns
373 -------
374 name : `str`
375 The field name.
376 """
377 raise NotImplementedError()
379 @abstractmethod
380 def refresh(self) -> None:
381 """Ensure all other operations on this manager are aware of any
382 collections that may have been registered by other clients since it
383 was initialized or last refreshed.
384 """
385 raise NotImplementedError()
387 @abstractmethod
388 def register(self, name: str, type: CollectionType) -> CollectionRecord:
389 """Ensure that a collection of the given name and type are present
390 in the layer this manager is associated with.
392 Parameters
393 ----------
394 name : `str`
395 Name of the collection.
396 type : `CollectionType`
397 Enumeration value indicating the type of collection.
399 Returns
400 -------
401 record : `CollectionRecord`
402 Object representing the collection, including its type and ID.
403 If ``type is CollectionType.RUN``, this will be a `RunRecord`
404 instance. If ``type is CollectionType.CHAIN``, this will be a
405 `ChainedCollectionRecord` instance.
407 Raises
408 ------
409 TransactionInterruption
410 Raised if this operation is invoked within a `Database.transaction`
411 context.
412 DatabaseConflictError
413 Raised if a collection with this name but a different type already
414 exists.
416 Notes
417 -----
418 Concurrent registrations of the same collection should be safe; nothing
419 should happen if the types are consistent, and integrity errors due to
420 inconsistent types should happen before any database changes are made.
421 """
422 raise NotImplementedError()
424 @abstractmethod
425 def remove(self, name: str) -> None:
426 """Completely remove a collection.
428 Any existing `CollectionRecord` objects that correspond to the removed
429 collection are considered invalidated.
431 Parameters
432 ----------
433 name : `str`
434 Name of the collection to remove.
436 Notes
437 -----
438 If this collection is referenced by foreign keys in tables managed by
439 other objects, the ON DELETE clauses of those tables will be invoked.
440 That will frequently delete many dependent rows automatically (via
441 "CASCADE", but it may also cause this operation to fail (with rollback)
442 unless dependent rows that do not have an ON DELETE clause are removed
443 first.
444 """
445 raise NotImplementedError()
447 @abstractmethod
448 def find(self, name: str) -> CollectionRecord:
449 """Return the collection record associated with the given name.
451 Parameters
452 ----------
453 name : `str`
454 Name of the collection.
456 Returns
457 -------
458 record : `CollectionRecord`
459 Object representing the collection, including its type and ID.
460 If ``record.type is CollectionType.RUN``, this will be a
461 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
462 this will be a `ChainedCollectionRecord` instance.
464 Raises
465 ------
466 MissingCollectionError
467 Raised if the given collection does not exist.
469 Notes
470 -----
471 Collections registered by another client of the same layer since the
472 last call to `initialize` or `refresh` may not be found.
473 """
474 raise NotImplementedError()
476 @abstractmethod
477 def __getitem__(self, key: Any) -> CollectionRecord:
478 """Return the collection record associated with the given
479 primary/foreign key value.
481 Parameters
482 ----------
483 key
484 Internal primary key value for the collection.
486 Returns
487 -------
488 record : `CollectionRecord`
489 Object representing the collection, including its type and name.
490 If ``record.type is CollectionType.RUN``, this will be a
491 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
492 this will be a `ChainedCollectionRecord` instance.
494 Raises
495 ------
496 MissingCollectionError
497 Raised if no collection with this key exists.
499 Notes
500 -----
501 Collections registered by another client of the same layer since the
502 last call to `initialize` or `refresh` may not be found.
503 """
504 raise NotImplementedError()
506 @abstractmethod
507 def __iter__(self) -> Iterator[CollectionRecord]:
508 """Iterate over all collections.
510 Yields
511 ------
512 record : `CollectionRecord`
513 The record for a managed collection.
514 """
515 raise NotImplementedError()