Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py : 60%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "ChainedCollectionRecord",
25 "CollectionManager",
26 "CollectionRecord",
27 "MissingCollectionError",
28 "RunRecord",
29]
31from abc import ABC, abstractmethod
32from datetime import datetime
33from typing import (
34 Any,
35 Iterator,
36 Optional,
37 TYPE_CHECKING,
38)
40from ...core import ddl, Timespan
41from ..wildcards import CollectionSearch
42from .._collectionType import CollectionType
44if TYPE_CHECKING: 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true
45 from .database import Database, StaticTablesContext
48class MissingCollectionError(Exception):
49 """Exception raised when an operation attempts to use a collection that
50 does not exist.
51 """
54class CollectionRecord(ABC):
55 """A struct used to represent a collection in internal `Registry` APIs.
57 User-facing code should always just use a `str` to represent collections.
59 Parameters
60 ----------
61 name : `str`
62 Name of the collection.
63 type : `CollectionType`
64 Enumeration value describing the type of the collection.
65 """
66 def __init__(self, name: str, type: CollectionType):
67 self.name = name
68 self.type = type
69 assert isinstance(self.type, CollectionType)
71 @property
72 @abstractmethod
73 def key(self) -> Any:
74 """The primary/foreign key value for this collection.
75 """
76 raise NotImplementedError()
78 name: str
79 """Name of the collection (`str`).
80 """
82 type: CollectionType
83 """Enumeration value describing the type of the collection
84 (`CollectionType`).
85 """
88class RunRecord(CollectionRecord):
89 """A subclass of `CollectionRecord` that adds execution information and
90 an interface for updating it.
91 """
93 @abstractmethod
94 def update(self, host: Optional[str] = None, timespan: Optional[Timespan[Optional[datetime]]] = None):
95 """Update the database record for this run with new execution
96 information.
98 Values not provided will set to ``NULL`` in the database, not ignored.
100 Parameters
101 ----------
102 host : `str`, optional
103 Name of the host or system on which this run was produced.
104 Detailed form to be set by higher-level convention; from the
105 `Registry` perspective, this is an entirely opaque value.
106 timespan : `Timespan`, optional
107 Begin and end timestamps for the period over which the run was
108 produced. `None`/``NULL`` values are interpreted as infinite
109 bounds.
110 """
111 raise NotImplementedError()
113 @property
114 @abstractmethod
115 def host(self) -> Optional[str]:
116 """Return the name of the host or system on which this run was
117 produced (`str` or `None`).
118 """
119 raise NotImplementedError()
121 @property
122 @abstractmethod
123 def timespan(self) -> Timespan[Optional[datetime]]:
124 """Begin and end timestamps for the period over which the run was
125 produced. `None`/``NULL`` values are interpreted as infinite
126 bounds.
127 """
128 raise NotImplementedError()
131class ChainedCollectionRecord(CollectionRecord):
132 """A subclass of `CollectionRecord` that adds the list of child collections
133 in a ``CHAINED`` collection.
135 Parameters
136 ----------
137 name : `str`
138 Name of the collection.
139 """
141 def __init__(self, name: str):
142 super().__init__(name=name, type=CollectionType.CHAINED)
143 self._children = CollectionSearch.fromExpression([])
145 @property
146 def children(self) -> CollectionSearch:
147 """The ordered search path of child collections that define this chain
148 (`CollectionSearch`).
149 """
150 return self._children
152 def update(self, manager: CollectionManager, children: CollectionSearch):
153 """Redefine this chain to search the given child collections.
155 This method should be used by all external code to set children. It
156 delegates to `_update`, which is what should be overridden by
157 subclasses.
159 Parameters
160 ----------
161 manager : `CollectionManager`
162 The object that manages this records instance and all records
163 instances that may appear as its children.
164 children : `CollectionSearch`
165 A collection search path that should be resolved to set the child
166 collections of this chain.
168 Raises
169 ------
170 ValueError
171 Raised when the child collections contain a cycle.
172 """
173 for record in children.iter(manager, flattenChains=True, includeChains=True,
174 collectionType=CollectionType.CHAINED):
175 if record == self:
176 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.")
177 self._update(manager, children)
178 self._children = children
180 def refresh(self, manager: CollectionManager):
181 """Load children from the database, using the given manager to resolve
182 collection primary key values into records.
184 This method exists to ensure that all collections that may appear in a
185 chain are known to the manager before any particular chain tries to
186 retrieve their records from it. `ChainedCollectionRecord` subclasses
187 can rely on it being called sometime after their own ``__init__`` to
188 finish construction.
190 Parameters
191 ----------
192 manager : `CollectionManager`
193 The object that manages this records instance and all records
194 instances that may appear as its children.
195 """
196 self._children = self._load(manager)
198 @abstractmethod
199 def _update(self, manager: CollectionManager, children: CollectionSearch):
200 """Protected implementation hook for setting the `children` property.
202 This method should be implemented by subclasses to update the database
203 to reflect the children given. It should never be called by anything
204 other than the `children` setter, which should be used by all external
205 code.
207 Parameters
208 ----------
209 manager : `CollectionManager`
210 The object that manages this records instance and all records
211 instances that may appear as its children.
212 children : `CollectionSearch`
213 A collection search path that should be resolved to set the child
214 collections of this chain. Guaranteed not to contain cycles.
215 """
216 raise NotImplementedError()
218 @abstractmethod
219 def _load(self, manager: CollectionManager) -> CollectionSearch:
220 """Protected implementation hook for `refresh`.
222 This method should be implemented by subclasses to retrieve the chain's
223 child collections from the database and return them. It should never
224 be called by anything other than `refresh`, which should be used by all
225 external code.
227 Parameters
228 ----------
229 manager : `CollectionManager`
230 The object that manages this records instance and all records
231 instances that may appear as its children.
232 """
233 raise NotImplementedError()
236class CollectionManager(ABC):
237 """An interface for managing the collections (including runs) in a
238 `Registry`.
240 Notes
241 -----
242 Each layer in a multi-layer `Registry` has its own record for any
243 collection for which it has datasets (or quanta). Different layers may
244 use different IDs for the same collection, so any usage of the IDs
245 obtained through the `CollectionManager` APIs are strictly for internal
246 (to `Registry`) use.
247 """
249 @classmethod
250 @abstractmethod
251 def initialize(cls, db: Database, context: StaticTablesContext) -> CollectionManager:
252 """Construct an instance of the manager.
254 Parameters
255 ----------
256 db : `Database`
257 Interface to the underlying database engine and namespace.
258 context : `StaticTablesContext`
259 Context object obtained from `Database.declareStaticTables`; used
260 to declare any tables that should always be present in a layer
261 implemented with this manager.
263 Returns
264 -------
265 manager : `CollectionManager`
266 An instance of a concrete `CollectionManager` subclass.
267 """
268 raise NotImplementedError()
270 @classmethod
271 @abstractmethod
272 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection",
273 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec:
274 """Add a foreign key (field and constraint) referencing the collection
275 table.
277 Parameters
278 ----------
279 tableSpec : `ddl.TableSpec`
280 Specification for the table that should reference the collection
281 table. Will be modified in place.
282 prefix: `str`, optional
283 A name to use for the prefix of the new field; the full name may
284 have a suffix (and is given in the returned `ddl.FieldSpec`).
285 onDelete: `str`, optional
286 One of "CASCADE" or "SET NULL", indicating what should happen to
287 the referencing row if the collection row is deleted. `None`
288 indicates that this should be an integrity error.
289 **kwds
290 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
291 constructor (only the ``name`` and ``dtype`` arguments are
292 otherwise provided).
294 Returns
295 -------
296 fieldSpec : `ddl.FieldSpec`
297 Specification for the field being added.
298 """
299 raise NotImplementedError()
301 @classmethod
302 @abstractmethod
303 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run",
304 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec:
305 """Add a foreign key (field and constraint) referencing the run
306 table.
308 Parameters
309 ----------
310 tableSpec : `ddl.TableSpec`
311 Specification for the table that should reference the run table.
312 Will be modified in place.
313 prefix: `str`, optional
314 A name to use for the prefix of the new field; the full name may
315 have a suffix (and is given in the returned `ddl.FieldSpec`).
316 onDelete: `str`, optional
317 One of "CASCADE" or "SET NULL", indicating what should happen to
318 the referencing row if the collection row is deleted. `None`
319 indicates that this should be an integrity error.
320 **kwds
321 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
322 constructor (only the ``name`` and ``dtype`` arguments are
323 otherwise provided).
325 Returns
326 -------
327 fieldSpec : `ddl.FieldSpec`
328 Specification for the field being added.
329 """
330 raise NotImplementedError()
332 @classmethod
333 @abstractmethod
334 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
335 """Return the name of the field added by `addCollectionForeignKey`
336 if called with the same prefix.
338 Parameters
339 ----------
340 prefix : `str`
341 A name to use for the prefix of the new field; the full name may
342 have a suffix.
344 Returns
345 -------
346 name : `str`
347 The field name.
348 """
349 raise NotImplementedError()
351 @classmethod
352 @abstractmethod
353 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
354 """Return the name of the field added by `addRunForeignKey`
355 if called with the same prefix.
357 Parameters
358 ----------
359 prefix : `str`
360 A name to use for the prefix of the new field; the full name may
361 have a suffix.
363 Returns
364 -------
365 name : `str`
366 The field name.
367 """
368 raise NotImplementedError()
370 @abstractmethod
371 def refresh(self):
372 """Ensure all other operations on this manager are aware of any
373 collections that may have been registered by other clients since it
374 was initialized or last refreshed.
375 """
376 raise NotImplementedError()
378 @abstractmethod
379 def register(self, name: str, type: CollectionType) -> CollectionRecord:
380 """Ensure that a collection of the given name and type are present
381 in the layer this manager is associated with.
383 Parameters
384 ----------
385 name : `str`
386 Name of the collection.
387 type : `CollectionType`
388 Enumeration value indicating the type of collection.
390 Returns
391 -------
392 record : `CollectionRecord`
393 Object representing the collection, including its type and ID.
394 If ``type is CollectionType.RUN``, this will be a `RunRecord`
395 instance. If ``type is CollectionType.CHAIN``, this will be a
396 `ChainedCollectionRecord` instance.
398 Raises
399 ------
400 TransactionInterruption
401 Raised if this operation is invoked within a `Database.transaction`
402 context.
403 DatabaseConflictError
404 Raised if a collection with this name but a different type already
405 exists.
407 Notes
408 -----
409 Concurrent registrations of the same collection should be safe; nothing
410 should happen if the types are consistent, and integrity errors due to
411 inconsistent types should happen before any database changes are made.
412 """
413 raise NotImplementedError()
415 @abstractmethod
416 def find(self, name: str) -> CollectionRecord:
417 """Return the collection record associated with the given name.
419 Parameters
420 ----------
421 name : `str`
422 Name of the collection.
424 Returns
425 -------
426 record : `CollectionRecord`
427 Object representing the collection, including its type and ID.
428 If ``record.type is CollectionType.RUN``, this will be a
429 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
430 this will be a `ChainedCollectionRecord` instance.
432 Raises
433 ------
434 MissingCollectionError
435 Raised if the given collection does not exist.
437 Notes
438 -----
439 Collections registered by another client of the same layer since the
440 last call to `initialize` or `refresh` may not be found.
441 """
442 raise NotImplementedError()
444 @abstractmethod
445 def __getitem__(self, key: Any) -> CollectionRecord:
446 """Return the collection record associated with the given
447 primary/foreign key value.
449 Parameters
450 ----------
451 key
452 Internal primary key value for the collection.
454 Returns
455 -------
456 record : `CollectionRecord`
457 Object representing the collection, including its type and name.
458 If ``record.type is CollectionType.RUN``, this will be a
459 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
460 this will be a `ChainedCollectionRecord` instance.
462 Raises
463 ------
464 MissingCollectionError
465 Raised if no collection with this key exists.
467 Notes
468 -----
469 Collections registered by another client of the same layer since the
470 last call to `initialize` or `refresh` may not be found.
471 """
472 raise NotImplementedError()
474 @abstractmethod
475 def __iter__(self) -> Iterator[CollectionRecord]:
476 """Iterate over all collections.
478 Yields
479 ------
480 record : `CollectionRecord`
481 The record for a managed collection.
482 """
483 raise NotImplementedError()