Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py : 60%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "ChainedCollectionRecord",
25 "CollectionManager",
26 "CollectionRecord",
27 "MissingCollectionError",
28 "RunRecord",
29]
31from abc import ABC, abstractmethod
32from typing import (
33 Any,
34 Iterator,
35 Optional,
36 TYPE_CHECKING,
37)
39import astropy.time
41from ...core import ddl, Timespan
42from ..wildcards import CollectionSearch
43from .._collectionType import CollectionType
45if TYPE_CHECKING: 45 ↛ 46line 45 didn't jump to line 46, because the condition on line 45 was never true
46 from .database import Database, StaticTablesContext
49class MissingCollectionError(Exception):
50 """Exception raised when an operation attempts to use a collection that
51 does not exist.
52 """
55class CollectionRecord(ABC):
56 """A struct used to represent a collection in internal `Registry` APIs.
58 User-facing code should always just use a `str` to represent collections.
60 Parameters
61 ----------
62 name : `str`
63 Name of the collection.
64 type : `CollectionType`
65 Enumeration value describing the type of the collection.
66 """
67 def __init__(self, name: str, type: CollectionType):
68 self.name = name
69 self.type = type
70 assert isinstance(self.type, CollectionType)
72 @property
73 @abstractmethod
74 def key(self) -> Any:
75 """The primary/foreign key value for this collection.
76 """
77 raise NotImplementedError()
79 name: str
80 """Name of the collection (`str`).
81 """
83 type: CollectionType
84 """Enumeration value describing the type of the collection
85 (`CollectionType`).
86 """
89class RunRecord(CollectionRecord):
90 """A subclass of `CollectionRecord` that adds execution information and
91 an interface for updating it.
92 """
94 @abstractmethod
95 def update(self, host: Optional[str] = None, timespan: Optional[Timespan[astropy.time.Time]] = None):
96 """Update the database record for this run with new execution
97 information.
99 Values not provided will set to ``NULL`` in the database, not ignored.
101 Parameters
102 ----------
103 host : `str`, optional
104 Name of the host or system on which this run was produced.
105 Detailed form to be set by higher-level convention; from the
106 `Registry` perspective, this is an entirely opaque value.
107 timespan : `Timespan`, optional
108 Begin and end timestamps for the period over which the run was
109 produced. `None`/``NULL`` values are interpreted as infinite
110 bounds.
111 """
112 raise NotImplementedError()
114 @property
115 @abstractmethod
116 def host(self) -> Optional[str]:
117 """Return the name of the host or system on which this run was
118 produced (`str` or `None`).
119 """
120 raise NotImplementedError()
122 @property
123 @abstractmethod
124 def timespan(self) -> Timespan[astropy.time.Time]:
125 """Begin and end timestamps for the period over which the run was
126 produced. `None`/``NULL`` values are interpreted as infinite
127 bounds.
128 """
129 raise NotImplementedError()
132class ChainedCollectionRecord(CollectionRecord):
133 """A subclass of `CollectionRecord` that adds the list of child collections
134 in a ``CHAINED`` collection.
136 Parameters
137 ----------
138 name : `str`
139 Name of the collection.
140 """
142 def __init__(self, name: str):
143 super().__init__(name=name, type=CollectionType.CHAINED)
144 self._children = CollectionSearch.fromExpression([])
146 @property
147 def children(self) -> CollectionSearch:
148 """The ordered search path of child collections that define this chain
149 (`CollectionSearch`).
150 """
151 return self._children
153 def update(self, manager: CollectionManager, children: CollectionSearch):
154 """Redefine this chain to search the given child collections.
156 This method should be used by all external code to set children. It
157 delegates to `_update`, which is what should be overridden by
158 subclasses.
160 Parameters
161 ----------
162 manager : `CollectionManager`
163 The object that manages this records instance and all records
164 instances that may appear as its children.
165 children : `CollectionSearch`
166 A collection search path that should be resolved to set the child
167 collections of this chain.
169 Raises
170 ------
171 ValueError
172 Raised when the child collections contain a cycle.
173 """
174 for record in children.iter(manager, flattenChains=True, includeChains=True,
175 collectionType=CollectionType.CHAINED):
176 if record == self:
177 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.")
178 self._update(manager, children)
179 self._children = children
181 def refresh(self, manager: CollectionManager):
182 """Load children from the database, using the given manager to resolve
183 collection primary key values into records.
185 This method exists to ensure that all collections that may appear in a
186 chain are known to the manager before any particular chain tries to
187 retrieve their records from it. `ChainedCollectionRecord` subclasses
188 can rely on it being called sometime after their own ``__init__`` to
189 finish construction.
191 Parameters
192 ----------
193 manager : `CollectionManager`
194 The object that manages this records instance and all records
195 instances that may appear as its children.
196 """
197 self._children = self._load(manager)
199 @abstractmethod
200 def _update(self, manager: CollectionManager, children: CollectionSearch):
201 """Protected implementation hook for setting the `children` property.
203 This method should be implemented by subclasses to update the database
204 to reflect the children given. It should never be called by anything
205 other than the `children` setter, which should be used by all external
206 code.
208 Parameters
209 ----------
210 manager : `CollectionManager`
211 The object that manages this records instance and all records
212 instances that may appear as its children.
213 children : `CollectionSearch`
214 A collection search path that should be resolved to set the child
215 collections of this chain. Guaranteed not to contain cycles.
216 """
217 raise NotImplementedError()
219 @abstractmethod
220 def _load(self, manager: CollectionManager) -> CollectionSearch:
221 """Protected implementation hook for `refresh`.
223 This method should be implemented by subclasses to retrieve the chain's
224 child collections from the database and return them. It should never
225 be called by anything other than `refresh`, which should be used by all
226 external code.
228 Parameters
229 ----------
230 manager : `CollectionManager`
231 The object that manages this records instance and all records
232 instances that may appear as its children.
233 """
234 raise NotImplementedError()
237class CollectionManager(ABC):
238 """An interface for managing the collections (including runs) in a
239 `Registry`.
241 Notes
242 -----
243 Each layer in a multi-layer `Registry` has its own record for any
244 collection for which it has datasets (or quanta). Different layers may
245 use different IDs for the same collection, so any usage of the IDs
246 obtained through the `CollectionManager` APIs are strictly for internal
247 (to `Registry`) use.
248 """
250 @classmethod
251 @abstractmethod
252 def initialize(cls, db: Database, context: StaticTablesContext) -> CollectionManager:
253 """Construct an instance of the manager.
255 Parameters
256 ----------
257 db : `Database`
258 Interface to the underlying database engine and namespace.
259 context : `StaticTablesContext`
260 Context object obtained from `Database.declareStaticTables`; used
261 to declare any tables that should always be present in a layer
262 implemented with this manager.
264 Returns
265 -------
266 manager : `CollectionManager`
267 An instance of a concrete `CollectionManager` subclass.
268 """
269 raise NotImplementedError()
271 @classmethod
272 @abstractmethod
273 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection",
274 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec:
275 """Add a foreign key (field and constraint) referencing the collection
276 table.
278 Parameters
279 ----------
280 tableSpec : `ddl.TableSpec`
281 Specification for the table that should reference the collection
282 table. Will be modified in place.
283 prefix: `str`, optional
284 A name to use for the prefix of the new field; the full name may
285 have a suffix (and is given in the returned `ddl.FieldSpec`).
286 onDelete: `str`, optional
287 One of "CASCADE" or "SET NULL", indicating what should happen to
288 the referencing row if the collection row is deleted. `None`
289 indicates that this should be an integrity error.
290 **kwds
291 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
292 constructor (only the ``name`` and ``dtype`` arguments are
293 otherwise provided).
295 Returns
296 -------
297 fieldSpec : `ddl.FieldSpec`
298 Specification for the field being added.
299 """
300 raise NotImplementedError()
302 @classmethod
303 @abstractmethod
304 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run",
305 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec:
306 """Add a foreign key (field and constraint) referencing the run
307 table.
309 Parameters
310 ----------
311 tableSpec : `ddl.TableSpec`
312 Specification for the table that should reference the run table.
313 Will be modified in place.
314 prefix: `str`, optional
315 A name to use for the prefix of the new field; the full name may
316 have a suffix (and is given in the returned `ddl.FieldSpec`).
317 onDelete: `str`, optional
318 One of "CASCADE" or "SET NULL", indicating what should happen to
319 the referencing row if the collection row is deleted. `None`
320 indicates that this should be an integrity error.
321 **kwds
322 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
323 constructor (only the ``name`` and ``dtype`` arguments are
324 otherwise provided).
326 Returns
327 -------
328 fieldSpec : `ddl.FieldSpec`
329 Specification for the field being added.
330 """
331 raise NotImplementedError()
333 @classmethod
334 @abstractmethod
335 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
336 """Return the name of the field added by `addCollectionForeignKey`
337 if called with the same prefix.
339 Parameters
340 ----------
341 prefix : `str`
342 A name to use for the prefix of the new field; the full name may
343 have a suffix.
345 Returns
346 -------
347 name : `str`
348 The field name.
349 """
350 raise NotImplementedError()
352 @classmethod
353 @abstractmethod
354 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
355 """Return the name of the field added by `addRunForeignKey`
356 if called with the same prefix.
358 Parameters
359 ----------
360 prefix : `str`
361 A name to use for the prefix of the new field; the full name may
362 have a suffix.
364 Returns
365 -------
366 name : `str`
367 The field name.
368 """
369 raise NotImplementedError()
371 @abstractmethod
372 def refresh(self):
373 """Ensure all other operations on this manager are aware of any
374 collections that may have been registered by other clients since it
375 was initialized or last refreshed.
376 """
377 raise NotImplementedError()
379 @abstractmethod
380 def register(self, name: str, type: CollectionType) -> CollectionRecord:
381 """Ensure that a collection of the given name and type are present
382 in the layer this manager is associated with.
384 Parameters
385 ----------
386 name : `str`
387 Name of the collection.
388 type : `CollectionType`
389 Enumeration value indicating the type of collection.
391 Returns
392 -------
393 record : `CollectionRecord`
394 Object representing the collection, including its type and ID.
395 If ``type is CollectionType.RUN``, this will be a `RunRecord`
396 instance. If ``type is CollectionType.CHAIN``, this will be a
397 `ChainedCollectionRecord` instance.
399 Raises
400 ------
401 TransactionInterruption
402 Raised if this operation is invoked within a `Database.transaction`
403 context.
404 DatabaseConflictError
405 Raised if a collection with this name but a different type already
406 exists.
408 Notes
409 -----
410 Concurrent registrations of the same collection should be safe; nothing
411 should happen if the types are consistent, and integrity errors due to
412 inconsistent types should happen before any database changes are made.
413 """
414 raise NotImplementedError()
416 @abstractmethod
417 def remove(self, name: str):
418 """Completely remove a collection.
420 Any existing `CollectionRecord` objects that correspond to the removed
421 collection are considered invalidated.
423 Parameters
424 ----------
425 name : `str`
426 Name of the collection to remove.
428 Notes
429 -----
430 If this collection is referenced by foreign keys in tables managed by
431 other objects, the ON DELETE clauses of those tables will be invoked.
432 That will frequently delete many dependent rows automatically (via
433 "CASCADE", but it may also cause this operation to fail (with rollback)
434 unless dependent rows that do not have an ON DELETE clause are removed
435 first.
436 """
437 raise NotImplementedError()
439 @abstractmethod
440 def find(self, name: str) -> CollectionRecord:
441 """Return the collection record associated with the given name.
443 Parameters
444 ----------
445 name : `str`
446 Name of the collection.
448 Returns
449 -------
450 record : `CollectionRecord`
451 Object representing the collection, including its type and ID.
452 If ``record.type is CollectionType.RUN``, this will be a
453 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
454 this will be a `ChainedCollectionRecord` instance.
456 Raises
457 ------
458 MissingCollectionError
459 Raised if the given collection does not exist.
461 Notes
462 -----
463 Collections registered by another client of the same layer since the
464 last call to `initialize` or `refresh` may not be found.
465 """
466 raise NotImplementedError()
468 @abstractmethod
469 def __getitem__(self, key: Any) -> CollectionRecord:
470 """Return the collection record associated with the given
471 primary/foreign key value.
473 Parameters
474 ----------
475 key
476 Internal primary key value for the collection.
478 Returns
479 -------
480 record : `CollectionRecord`
481 Object representing the collection, including its type and name.
482 If ``record.type is CollectionType.RUN``, this will be a
483 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
484 this will be a `ChainedCollectionRecord` instance.
486 Raises
487 ------
488 MissingCollectionError
489 Raised if no collection with this key exists.
491 Notes
492 -----
493 Collections registered by another client of the same layer since the
494 last call to `initialize` or `refresh` may not be found.
495 """
496 raise NotImplementedError()
498 @abstractmethod
499 def __iter__(self) -> Iterator[CollectionRecord]:
500 """Iterate over all collections.
502 Yields
503 ------
504 record : `CollectionRecord`
505 The record for a managed collection.
506 """
507 raise NotImplementedError()