Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py : 60%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "ChainedCollectionRecord",
25 "CollectionManager",
26 "CollectionRecord",
27 "MissingCollectionError",
28 "RunRecord",
29]
31from abc import ABC, abstractmethod
32from typing import (
33 Any,
34 Iterator,
35 Optional,
36 TYPE_CHECKING,
37)
39import astropy.time
41from ...core import ddl, Timespan
42from ..wildcards import CollectionSearch
43from .._collectionType import CollectionType
45if TYPE_CHECKING: 45 ↛ 46line 45 didn't jump to line 46, because the condition on line 45 was never true
46 from .database import Database, StaticTablesContext
49class MissingCollectionError(Exception):
50 """Exception raised when an operation attempts to use a collection that
51 does not exist.
52 """
55class CollectionRecord:
56 """A struct used to represent a collection in internal `Registry` APIs.
58 User-facing code should always just use a `str` to represent collections.
60 Parameters
61 ----------
62 key
63 Unique collection ID, can be the same as ``name`` if ``name`` is used
64 for identification. Usually this is an integer or string, but can be
65 other database-specific type.
66 name : `str`
67 Name of the collection.
68 type : `CollectionType`
69 Enumeration value describing the type of the collection.
70 """
71 def __init__(self, key: Any, name: str, type: CollectionType):
72 self.key = key
73 self.name = name
74 self.type = type
75 assert isinstance(self.type, CollectionType)
77 name: str
78 """Name of the collection (`str`).
79 """
81 key: Any
82 """The primary/foreign key value for this collection.
83 """
85 type: CollectionType
86 """Enumeration value describing the type of the collection
87 (`CollectionType`).
88 """
91class RunRecord(CollectionRecord):
92 """A subclass of `CollectionRecord` that adds execution information and
93 an interface for updating it.
94 """
96 @abstractmethod
97 def update(self, host: Optional[str] = None, timespan: Optional[Timespan[astropy.time.Time]] = None):
98 """Update the database record for this run with new execution
99 information.
101 Values not provided will set to ``NULL`` in the database, not ignored.
103 Parameters
104 ----------
105 host : `str`, optional
106 Name of the host or system on which this run was produced.
107 Detailed form to be set by higher-level convention; from the
108 `Registry` perspective, this is an entirely opaque value.
109 timespan : `Timespan`, optional
110 Begin and end timestamps for the period over which the run was
111 produced. `None`/``NULL`` values are interpreted as infinite
112 bounds.
113 """
114 raise NotImplementedError()
116 @property
117 @abstractmethod
118 def host(self) -> Optional[str]:
119 """Return the name of the host or system on which this run was
120 produced (`str` or `None`).
121 """
122 raise NotImplementedError()
124 @property
125 @abstractmethod
126 def timespan(self) -> Timespan[astropy.time.Time]:
127 """Begin and end timestamps for the period over which the run was
128 produced. `None`/``NULL`` values are interpreted as infinite
129 bounds.
130 """
131 raise NotImplementedError()
134class ChainedCollectionRecord(CollectionRecord):
135 """A subclass of `CollectionRecord` that adds the list of child collections
136 in a ``CHAINED`` collection.
138 Parameters
139 ----------
140 key
141 Unique collection ID, can be the same as ``name`` if ``name`` is used
142 for identification. Usually this is an integer or string, but can be
143 other database-specific type.
144 name : `str`
145 Name of the collection.
146 """
148 def __init__(self, key: Any, name: str):
149 super().__init__(key=key, name=name, type=CollectionType.CHAINED)
150 self._children = CollectionSearch.fromExpression([])
152 @property
153 def children(self) -> CollectionSearch:
154 """The ordered search path of child collections that define this chain
155 (`CollectionSearch`).
156 """
157 return self._children
159 def update(self, manager: CollectionManager, children: CollectionSearch):
160 """Redefine this chain to search the given child collections.
162 This method should be used by all external code to set children. It
163 delegates to `_update`, which is what should be overridden by
164 subclasses.
166 Parameters
167 ----------
168 manager : `CollectionManager`
169 The object that manages this records instance and all records
170 instances that may appear as its children.
171 children : `CollectionSearch`
172 A collection search path that should be resolved to set the child
173 collections of this chain.
175 Raises
176 ------
177 ValueError
178 Raised when the child collections contain a cycle.
179 """
180 for record in children.iter(manager, flattenChains=True, includeChains=True,
181 collectionType=CollectionType.CHAINED):
182 if record == self:
183 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.")
184 self._update(manager, children)
185 self._children = children
187 def refresh(self, manager: CollectionManager):
188 """Load children from the database, using the given manager to resolve
189 collection primary key values into records.
191 This method exists to ensure that all collections that may appear in a
192 chain are known to the manager before any particular chain tries to
193 retrieve their records from it. `ChainedCollectionRecord` subclasses
194 can rely on it being called sometime after their own ``__init__`` to
195 finish construction.
197 Parameters
198 ----------
199 manager : `CollectionManager`
200 The object that manages this records instance and all records
201 instances that may appear as its children.
202 """
203 self._children = self._load(manager)
205 @abstractmethod
206 def _update(self, manager: CollectionManager, children: CollectionSearch):
207 """Protected implementation hook for setting the `children` property.
209 This method should be implemented by subclasses to update the database
210 to reflect the children given. It should never be called by anything
211 other than the `children` setter, which should be used by all external
212 code.
214 Parameters
215 ----------
216 manager : `CollectionManager`
217 The object that manages this records instance and all records
218 instances that may appear as its children.
219 children : `CollectionSearch`
220 A collection search path that should be resolved to set the child
221 collections of this chain. Guaranteed not to contain cycles.
222 """
223 raise NotImplementedError()
225 @abstractmethod
226 def _load(self, manager: CollectionManager) -> CollectionSearch:
227 """Protected implementation hook for `refresh`.
229 This method should be implemented by subclasses to retrieve the chain's
230 child collections from the database and return them. It should never
231 be called by anything other than `refresh`, which should be used by all
232 external code.
234 Parameters
235 ----------
236 manager : `CollectionManager`
237 The object that manages this records instance and all records
238 instances that may appear as its children.
239 """
240 raise NotImplementedError()
243class CollectionManager(ABC):
244 """An interface for managing the collections (including runs) in a
245 `Registry`.
247 Notes
248 -----
249 Each layer in a multi-layer `Registry` has its own record for any
250 collection for which it has datasets (or quanta). Different layers may
251 use different IDs for the same collection, so any usage of the IDs
252 obtained through the `CollectionManager` APIs are strictly for internal
253 (to `Registry`) use.
254 """
256 @classmethod
257 @abstractmethod
258 def initialize(cls, db: Database, context: StaticTablesContext) -> CollectionManager:
259 """Construct an instance of the manager.
261 Parameters
262 ----------
263 db : `Database`
264 Interface to the underlying database engine and namespace.
265 context : `StaticTablesContext`
266 Context object obtained from `Database.declareStaticTables`; used
267 to declare any tables that should always be present in a layer
268 implemented with this manager.
270 Returns
271 -------
272 manager : `CollectionManager`
273 An instance of a concrete `CollectionManager` subclass.
274 """
275 raise NotImplementedError()
277 @classmethod
278 @abstractmethod
279 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection",
280 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec:
281 """Add a foreign key (field and constraint) referencing the collection
282 table.
284 Parameters
285 ----------
286 tableSpec : `ddl.TableSpec`
287 Specification for the table that should reference the collection
288 table. Will be modified in place.
289 prefix: `str`, optional
290 A name to use for the prefix of the new field; the full name may
291 have a suffix (and is given in the returned `ddl.FieldSpec`).
292 onDelete: `str`, optional
293 One of "CASCADE" or "SET NULL", indicating what should happen to
294 the referencing row if the collection row is deleted. `None`
295 indicates that this should be an integrity error.
296 **kwds
297 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
298 constructor (only the ``name`` and ``dtype`` arguments are
299 otherwise provided).
301 Returns
302 -------
303 fieldSpec : `ddl.FieldSpec`
304 Specification for the field being added.
305 """
306 raise NotImplementedError()
308 @classmethod
309 @abstractmethod
310 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run",
311 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec:
312 """Add a foreign key (field and constraint) referencing the run
313 table.
315 Parameters
316 ----------
317 tableSpec : `ddl.TableSpec`
318 Specification for the table that should reference the run table.
319 Will be modified in place.
320 prefix: `str`, optional
321 A name to use for the prefix of the new field; the full name may
322 have a suffix (and is given in the returned `ddl.FieldSpec`).
323 onDelete: `str`, optional
324 One of "CASCADE" or "SET NULL", indicating what should happen to
325 the referencing row if the collection row is deleted. `None`
326 indicates that this should be an integrity error.
327 **kwds
328 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
329 constructor (only the ``name`` and ``dtype`` arguments are
330 otherwise provided).
332 Returns
333 -------
334 fieldSpec : `ddl.FieldSpec`
335 Specification for the field being added.
336 """
337 raise NotImplementedError()
339 @classmethod
340 @abstractmethod
341 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
342 """Return the name of the field added by `addCollectionForeignKey`
343 if called with the same prefix.
345 Parameters
346 ----------
347 prefix : `str`
348 A name to use for the prefix of the new field; the full name may
349 have a suffix.
351 Returns
352 -------
353 name : `str`
354 The field name.
355 """
356 raise NotImplementedError()
358 @classmethod
359 @abstractmethod
360 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
361 """Return the name of the field added by `addRunForeignKey`
362 if called with the same prefix.
364 Parameters
365 ----------
366 prefix : `str`
367 A name to use for the prefix of the new field; the full name may
368 have a suffix.
370 Returns
371 -------
372 name : `str`
373 The field name.
374 """
375 raise NotImplementedError()
377 @abstractmethod
378 def refresh(self):
379 """Ensure all other operations on this manager are aware of any
380 collections that may have been registered by other clients since it
381 was initialized or last refreshed.
382 """
383 raise NotImplementedError()
385 @abstractmethod
386 def register(self, name: str, type: CollectionType) -> CollectionRecord:
387 """Ensure that a collection of the given name and type are present
388 in the layer this manager is associated with.
390 Parameters
391 ----------
392 name : `str`
393 Name of the collection.
394 type : `CollectionType`
395 Enumeration value indicating the type of collection.
397 Returns
398 -------
399 record : `CollectionRecord`
400 Object representing the collection, including its type and ID.
401 If ``type is CollectionType.RUN``, this will be a `RunRecord`
402 instance. If ``type is CollectionType.CHAIN``, this will be a
403 `ChainedCollectionRecord` instance.
405 Raises
406 ------
407 TransactionInterruption
408 Raised if this operation is invoked within a `Database.transaction`
409 context.
410 DatabaseConflictError
411 Raised if a collection with this name but a different type already
412 exists.
414 Notes
415 -----
416 Concurrent registrations of the same collection should be safe; nothing
417 should happen if the types are consistent, and integrity errors due to
418 inconsistent types should happen before any database changes are made.
419 """
420 raise NotImplementedError()
422 @abstractmethod
423 def remove(self, name: str):
424 """Completely remove a collection.
426 Any existing `CollectionRecord` objects that correspond to the removed
427 collection are considered invalidated.
429 Parameters
430 ----------
431 name : `str`
432 Name of the collection to remove.
434 Notes
435 -----
436 If this collection is referenced by foreign keys in tables managed by
437 other objects, the ON DELETE clauses of those tables will be invoked.
438 That will frequently delete many dependent rows automatically (via
439 "CASCADE", but it may also cause this operation to fail (with rollback)
440 unless dependent rows that do not have an ON DELETE clause are removed
441 first.
442 """
443 raise NotImplementedError()
445 @abstractmethod
446 def find(self, name: str) -> CollectionRecord:
447 """Return the collection record associated with the given name.
449 Parameters
450 ----------
451 name : `str`
452 Name of the collection.
454 Returns
455 -------
456 record : `CollectionRecord`
457 Object representing the collection, including its type and ID.
458 If ``record.type is CollectionType.RUN``, this will be a
459 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
460 this will be a `ChainedCollectionRecord` instance.
462 Raises
463 ------
464 MissingCollectionError
465 Raised if the given collection does not exist.
467 Notes
468 -----
469 Collections registered by another client of the same layer since the
470 last call to `initialize` or `refresh` may not be found.
471 """
472 raise NotImplementedError()
474 @abstractmethod
475 def __getitem__(self, key: Any) -> CollectionRecord:
476 """Return the collection record associated with the given
477 primary/foreign key value.
479 Parameters
480 ----------
481 key
482 Internal primary key value for the collection.
484 Returns
485 -------
486 record : `CollectionRecord`
487 Object representing the collection, including its type and name.
488 If ``record.type is CollectionType.RUN``, this will be a
489 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
490 this will be a `ChainedCollectionRecord` instance.
492 Raises
493 ------
494 MissingCollectionError
495 Raised if no collection with this key exists.
497 Notes
498 -----
499 Collections registered by another client of the same layer since the
500 last call to `initialize` or `refresh` may not be found.
501 """
502 raise NotImplementedError()
504 @abstractmethod
505 def __iter__(self) -> Iterator[CollectionRecord]:
506 """Iterate over all collections.
508 Yields
509 ------
510 record : `CollectionRecord`
511 The record for a managed collection.
512 """
513 raise NotImplementedError()