Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py : 60%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "ChainedCollectionRecord",
25 "CollectionManager",
26 "CollectionRecord",
27 "MissingCollectionError",
28 "RunRecord",
29]
31from abc import abstractmethod
32from typing import (
33 Any,
34 Iterator,
35 Optional,
36 TYPE_CHECKING,
37)
39from ...core import ddl, Timespan
40from ..wildcards import CollectionSearch
41from .._collectionType import CollectionType
42from ._versioning import VersionedExtension
44if TYPE_CHECKING: 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true
45 from ._database import Database, StaticTablesContext
48class MissingCollectionError(Exception):
49 """Exception raised when an operation attempts to use a collection that
50 does not exist.
51 """
54class CollectionRecord:
55 """A struct used to represent a collection in internal `Registry` APIs.
57 User-facing code should always just use a `str` to represent collections.
59 Parameters
60 ----------
61 key
62 Unique collection ID, can be the same as ``name`` if ``name`` is used
63 for identification. Usually this is an integer or string, but can be
64 other database-specific type.
65 name : `str`
66 Name of the collection.
67 type : `CollectionType`
68 Enumeration value describing the type of the collection.
69 """
70 def __init__(self, key: Any, name: str, type: CollectionType):
71 self.key = key
72 self.name = name
73 self.type = type
74 assert isinstance(self.type, CollectionType)
76 name: str
77 """Name of the collection (`str`).
78 """
80 key: Any
81 """The primary/foreign key value for this collection.
82 """
84 type: CollectionType
85 """Enumeration value describing the type of the collection
86 (`CollectionType`).
87 """
90class RunRecord(CollectionRecord):
91 """A subclass of `CollectionRecord` that adds execution information and
92 an interface for updating it.
93 """
95 @abstractmethod
96 def update(self, host: Optional[str] = None,
97 timespan: Optional[Timespan] = None) -> None:
98 """Update the database record for this run with new execution
99 information.
101 Values not provided will set to ``NULL`` in the database, not ignored.
103 Parameters
104 ----------
105 host : `str`, optional
106 Name of the host or system on which this run was produced.
107 Detailed form to be set by higher-level convention; from the
108 `Registry` perspective, this is an entirely opaque value.
109 timespan : `Timespan`, optional
110 Begin and end timestamps for the period over which the run was
111 produced. `None`/``NULL`` values are interpreted as infinite
112 bounds.
113 """
114 raise NotImplementedError()
116 @property
117 @abstractmethod
118 def host(self) -> Optional[str]:
119 """Return the name of the host or system on which this run was
120 produced (`str` or `None`).
121 """
122 raise NotImplementedError()
124 @property
125 @abstractmethod
126 def timespan(self) -> Timespan:
127 """Begin and end timestamps for the period over which the run was
128 produced. `None`/``NULL`` values are interpreted as infinite
129 bounds.
130 """
131 raise NotImplementedError()
134class ChainedCollectionRecord(CollectionRecord):
135 """A subclass of `CollectionRecord` that adds the list of child collections
136 in a ``CHAINED`` collection.
138 Parameters
139 ----------
140 key
141 Unique collection ID, can be the same as ``name`` if ``name`` is used
142 for identification. Usually this is an integer or string, but can be
143 other database-specific type.
144 name : `str`
145 Name of the collection.
146 """
148 def __init__(self, key: Any, name: str):
149 super().__init__(key=key, name=name, type=CollectionType.CHAINED)
150 self._children = CollectionSearch.fromExpression([])
152 @property
153 def children(self) -> CollectionSearch:
154 """The ordered search path of child collections that define this chain
155 (`CollectionSearch`).
156 """
157 return self._children
159 def update(self, manager: CollectionManager, children: CollectionSearch) -> None:
160 """Redefine this chain to search the given child collections.
162 This method should be used by all external code to set children. It
163 delegates to `_update`, which is what should be overridden by
164 subclasses.
166 Parameters
167 ----------
168 manager : `CollectionManager`
169 The object that manages this records instance and all records
170 instances that may appear as its children.
171 children : `CollectionSearch`
172 A collection search path that should be resolved to set the child
173 collections of this chain.
175 Raises
176 ------
177 ValueError
178 Raised when the child collections contain a cycle.
179 """
180 for record in children.iter(manager, flattenChains=True, includeChains=True,
181 collectionTypes={CollectionType.CHAINED}):
182 if record == self:
183 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.")
184 self._update(manager, children)
185 self._children = children
187 def refresh(self, manager: CollectionManager) -> None:
188 """Load children from the database, using the given manager to resolve
189 collection primary key values into records.
191 This method exists to ensure that all collections that may appear in a
192 chain are known to the manager before any particular chain tries to
193 retrieve their records from it. `ChainedCollectionRecord` subclasses
194 can rely on it being called sometime after their own ``__init__`` to
195 finish construction.
197 Parameters
198 ----------
199 manager : `CollectionManager`
200 The object that manages this records instance and all records
201 instances that may appear as its children.
202 """
203 self._children = self._load(manager)
205 @abstractmethod
206 def _update(self, manager: CollectionManager, children: CollectionSearch) -> None:
207 """Protected implementation hook for setting the `children` property.
209 This method should be implemented by subclasses to update the database
210 to reflect the children given. It should never be called by anything
211 other than the `children` setter, which should be used by all external
212 code.
214 Parameters
215 ----------
216 manager : `CollectionManager`
217 The object that manages this records instance and all records
218 instances that may appear as its children.
219 children : `CollectionSearch`
220 A collection search path that should be resolved to set the child
221 collections of this chain. Guaranteed not to contain cycles.
222 """
223 raise NotImplementedError()
225 @abstractmethod
226 def _load(self, manager: CollectionManager) -> CollectionSearch:
227 """Protected implementation hook for `refresh`.
229 This method should be implemented by subclasses to retrieve the chain's
230 child collections from the database and return them. It should never
231 be called by anything other than `refresh`, which should be used by all
232 external code.
234 Parameters
235 ----------
236 manager : `CollectionManager`
237 The object that manages this records instance and all records
238 instances that may appear as its children.
239 """
240 raise NotImplementedError()
243class CollectionManager(VersionedExtension):
244 """An interface for managing the collections (including runs) in a
245 `Registry`.
247 Notes
248 -----
249 Each layer in a multi-layer `Registry` has its own record for any
250 collection for which it has datasets (or quanta). Different layers may
251 use different IDs for the same collection, so any usage of the IDs
252 obtained through the `CollectionManager` APIs are strictly for internal
253 (to `Registry`) use.
254 """
256 @classmethod
257 @abstractmethod
258 def initialize(cls, db: Database, context: StaticTablesContext) -> CollectionManager:
259 """Construct an instance of the manager.
261 Parameters
262 ----------
263 db : `Database`
264 Interface to the underlying database engine and namespace.
265 context : `StaticTablesContext`
266 Context object obtained from `Database.declareStaticTables`; used
267 to declare any tables that should always be present in a layer
268 implemented with this manager.
270 Returns
271 -------
272 manager : `CollectionManager`
273 An instance of a concrete `CollectionManager` subclass.
274 """
275 raise NotImplementedError()
277 @classmethod
278 @abstractmethod
279 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection",
280 onDelete: Optional[str] = None,
281 constraint: bool = True,
282 **kwargs: Any) -> ddl.FieldSpec:
283 """Add a foreign key (field and constraint) referencing the collection
284 table.
286 Parameters
287 ----------
288 tableSpec : `ddl.TableSpec`
289 Specification for the table that should reference the collection
290 table. Will be modified in place.
291 prefix: `str`, optional
292 A name to use for the prefix of the new field; the full name may
293 have a suffix (and is given in the returned `ddl.FieldSpec`).
294 onDelete: `str`, optional
295 One of "CASCADE" or "SET NULL", indicating what should happen to
296 the referencing row if the collection row is deleted. `None`
297 indicates that this should be an integrity error.
298 constraint: `bool`, optional
299 If `False` (`True` is default), add a field that can be joined to
300 the collection primary key, but do not add a foreign key
301 constraint.
302 **kwargs
303 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
304 constructor (only the ``name`` and ``dtype`` arguments are
305 otherwise provided).
307 Returns
308 -------
309 fieldSpec : `ddl.FieldSpec`
310 Specification for the field being added.
311 """
312 raise NotImplementedError()
314 @classmethod
315 @abstractmethod
316 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run",
317 onDelete: Optional[str] = None,
318 constraint: bool = True,
319 **kwargs: Any) -> ddl.FieldSpec:
320 """Add a foreign key (field and constraint) referencing the run
321 table.
323 Parameters
324 ----------
325 tableSpec : `ddl.TableSpec`
326 Specification for the table that should reference the run table.
327 Will be modified in place.
328 prefix: `str`, optional
329 A name to use for the prefix of the new field; the full name may
330 have a suffix (and is given in the returned `ddl.FieldSpec`).
331 onDelete: `str`, optional
332 One of "CASCADE" or "SET NULL", indicating what should happen to
333 the referencing row if the collection row is deleted. `None`
334 indicates that this should be an integrity error.
335 constraint: `bool`, optional
336 If `False` (`True` is default), add a field that can be joined to
337 the run primary key, but do not add a foreign key constraint.
338 **kwds
339 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
340 constructor (only the ``name`` and ``dtype`` arguments are
341 otherwise provided).
343 Returns
344 -------
345 fieldSpec : `ddl.FieldSpec`
346 Specification for the field being added.
347 """
348 raise NotImplementedError()
350 @classmethod
351 @abstractmethod
352 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
353 """Return the name of the field added by `addCollectionForeignKey`
354 if called with the same prefix.
356 Parameters
357 ----------
358 prefix : `str`
359 A name to use for the prefix of the new field; the full name may
360 have a suffix.
362 Returns
363 -------
364 name : `str`
365 The field name.
366 """
367 raise NotImplementedError()
369 @classmethod
370 @abstractmethod
371 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
372 """Return the name of the field added by `addRunForeignKey`
373 if called with the same prefix.
375 Parameters
376 ----------
377 prefix : `str`
378 A name to use for the prefix of the new field; the full name may
379 have a suffix.
381 Returns
382 -------
383 name : `str`
384 The field name.
385 """
386 raise NotImplementedError()
388 @abstractmethod
389 def refresh(self) -> None:
390 """Ensure all other operations on this manager are aware of any
391 collections that may have been registered by other clients since it
392 was initialized or last refreshed.
393 """
394 raise NotImplementedError()
396 @abstractmethod
397 def register(self, name: str, type: CollectionType) -> CollectionRecord:
398 """Ensure that a collection of the given name and type are present
399 in the layer this manager is associated with.
401 Parameters
402 ----------
403 name : `str`
404 Name of the collection.
405 type : `CollectionType`
406 Enumeration value indicating the type of collection.
408 Returns
409 -------
410 record : `CollectionRecord`
411 Object representing the collection, including its type and ID.
412 If ``type is CollectionType.RUN``, this will be a `RunRecord`
413 instance. If ``type is CollectionType.CHAIN``, this will be a
414 `ChainedCollectionRecord` instance.
416 Raises
417 ------
418 TransactionInterruption
419 Raised if this operation is invoked within a `Database.transaction`
420 context.
421 DatabaseConflictError
422 Raised if a collection with this name but a different type already
423 exists.
425 Notes
426 -----
427 Concurrent registrations of the same collection should be safe; nothing
428 should happen if the types are consistent, and integrity errors due to
429 inconsistent types should happen before any database changes are made.
430 """
431 raise NotImplementedError()
433 @abstractmethod
434 def remove(self, name: str) -> None:
435 """Completely remove a collection.
437 Any existing `CollectionRecord` objects that correspond to the removed
438 collection are considered invalidated.
440 Parameters
441 ----------
442 name : `str`
443 Name of the collection to remove.
445 Notes
446 -----
447 If this collection is referenced by foreign keys in tables managed by
448 other objects, the ON DELETE clauses of those tables will be invoked.
449 That will frequently delete many dependent rows automatically (via
450 "CASCADE", but it may also cause this operation to fail (with rollback)
451 unless dependent rows that do not have an ON DELETE clause are removed
452 first.
453 """
454 raise NotImplementedError()
456 @abstractmethod
457 def find(self, name: str) -> CollectionRecord:
458 """Return the collection record associated with the given name.
460 Parameters
461 ----------
462 name : `str`
463 Name of the collection.
465 Returns
466 -------
467 record : `CollectionRecord`
468 Object representing the collection, including its type and ID.
469 If ``record.type is CollectionType.RUN``, this will be a
470 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
471 this will be a `ChainedCollectionRecord` instance.
473 Raises
474 ------
475 MissingCollectionError
476 Raised if the given collection does not exist.
478 Notes
479 -----
480 Collections registered by another client of the same layer since the
481 last call to `initialize` or `refresh` may not be found.
482 """
483 raise NotImplementedError()
485 @abstractmethod
486 def __getitem__(self, key: Any) -> CollectionRecord:
487 """Return the collection record associated with the given
488 primary/foreign key value.
490 Parameters
491 ----------
492 key
493 Internal primary key value for the collection.
495 Returns
496 -------
497 record : `CollectionRecord`
498 Object representing the collection, including its type and name.
499 If ``record.type is CollectionType.RUN``, this will be a
500 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
501 this will be a `ChainedCollectionRecord` instance.
503 Raises
504 ------
505 MissingCollectionError
506 Raised if no collection with this key exists.
508 Notes
509 -----
510 Collections registered by another client of the same layer since the
511 last call to `initialize` or `refresh` may not be found.
512 """
513 raise NotImplementedError()
515 @abstractmethod
516 def __iter__(self) -> Iterator[CollectionRecord]:
517 """Iterate over all collections.
519 Yields
520 ------
521 record : `CollectionRecord`
522 The record for a managed collection.
523 """
524 raise NotImplementedError()