Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py : 61%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = [
24 "ChainedCollectionRecord",
25 "CollectionManager",
26 "CollectionRecord",
27 "MissingCollectionError",
28 "RunRecord",
29]
31from abc import ABC, abstractmethod
32from datetime import datetime
33from typing import (
34 Any,
35 Iterator,
36 Optional,
37 TYPE_CHECKING,
38)
40from ...core import ddl, Timespan
41from ..wildcards import CollectionSearch
42from .._collectionType import CollectionType
44if TYPE_CHECKING: 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true
45 from .database import Database, StaticTablesContext
48class MissingCollectionError(Exception):
49 """Exception raised when an operation attempts to use a collection that
50 does not exist.
51 """
54class CollectionRecord(ABC):
55 """A struct used to represent a collection in internal `Registry` APIs.
57 User-facing code should always just use a `str` to represent collections.
59 Parameters
60 ----------
61 name : `str`
62 Name of the collection.
63 type : `CollectionType`
64 Enumeration value describing the type of the collection.
65 """
66 def __init__(self, name: str, type: CollectionType):
67 self.name = name
68 self.type = type
70 @property
71 @abstractmethod
72 def key(self) -> Any:
73 """The primary/foreign key value for this collection.
74 """
75 raise NotImplementedError()
77 name: str
78 """Name of the collection (`str`).
79 """
81 type: CollectionType
82 """Enumeration value describing the type of the collection
83 (`CollectionType`).
84 """
87class RunRecord(CollectionRecord):
88 """A subclass of `CollectionRecord` that adds execution information and
89 an interface for updating it.
90 """
92 @abstractmethod
93 def update(self, host: Optional[str] = None, timespan: Optional[Timespan[Optional[datetime]]] = None):
94 """Update the database record for this run with new execution
95 information.
97 Values not provided will set to ``NULL`` in the database, not ignored.
99 Parameters
100 ----------
101 host : `str`, optional
102 Name of the host or system on which this run was produced.
103 Detailed form to be set by higher-level convention; from the
104 `Registry` perspective, this is an entirely opaque value.
105 timespan : `Timespan`, optional
106 Begin and end timestamps for the period over which the run was
107 produced. `None`/``NULL`` values are interpreted as infinite
108 bounds.
109 """
110 raise NotImplementedError()
112 @property
113 @abstractmethod
114 def host(self) -> Optional[str]:
115 """Return the name of the host or system on which this run was
116 produced (`str` or `None`).
117 """
118 raise NotImplementedError()
120 @property
121 @abstractmethod
122 def timespan(self) -> Timespan[Optional[datetime]]:
123 """Begin and end timestamps for the period over which the run was
124 produced. `None`/``NULL`` values are interpreted as infinite
125 bounds.
126 """
127 raise NotImplementedError()
130class ChainedCollectionRecord(CollectionRecord):
131 """A subclass of `CollectionRecord` that adds the list of child collections
132 in a ``CHAINED`` collection.
134 Parameters
135 ----------
136 name : `str`
137 Name of the collection.
138 """
140 def __init__(self, name: str):
141 super().__init__(name=name, type=CollectionType.CHAINED)
142 self._children = CollectionSearch.fromExpression([])
144 @property
145 def children(self) -> CollectionSearch:
146 """The ordered search path of child collections that define this chain
147 (`CollectionSearch`).
148 """
149 return self._children
151 def update(self, manager: CollectionManager, children: CollectionSearch):
152 """Redefine this chain to search the given child collections.
154 This method should be used by all external code to set children. It
155 delegates to `_update`, which is what should be overridden by
156 subclasses.
158 Parameters
159 ----------
160 manager : `CollectionManager`
161 The object that manages this records instance and all records
162 instances that may appear as its children.
163 children : `CollectionSearch`
164 A collection search path that should be resolved to set the child
165 collections of this chain.
167 Raises
168 ------
169 ValueError
170 Raised when the child collections contain a cycle.
171 """
172 for record in children.iter(manager, flattenChains=True, includeChains=True,
173 collectionType=CollectionType.CHAINED):
174 if record == self:
175 raise ValueError(f"Cycle in collection chaining when defining '{self.name}'.")
176 self._update(manager, children)
177 self._children = children
179 def refresh(self, manager: CollectionManager):
180 """Load children from the database, using the given manager to resolve
181 collection primary key values into records.
183 This method exists to ensure that all collections that may appear in a
184 chain are known to the manager before any particular chain tries to
185 retrieve their records from it. `ChainedCollectionRecord` subclasses
186 can rely on it being called sometime after their own ``__init__`` to
187 finish construction.
189 Parameters
190 ----------
191 manager : `CollectionManager`
192 The object that manages this records instance and all records
193 instances that may appear as its children.
194 """
195 self._children = self._load(manager)
197 @abstractmethod
198 def _update(self, manager: CollectionManager, children: CollectionSearch):
199 """Protected implementation hook for setting the `children` property.
201 This method should be implemented by subclasses to update the database
202 to reflect the children given. It should never be called by anything
203 other than the `children` setter, which should be used by all external
204 code.
206 Parameters
207 ----------
208 manager : `CollectionManager`
209 The object that manages this records instance and all records
210 instances that may appear as its children.
211 children : `CollectionSearch`
212 A collection search path that should be resolved to set the child
213 collections of this chain. Guaranteed not to contain cycles.
214 """
215 raise NotImplementedError()
217 @abstractmethod
218 def _load(self, manager: CollectionManager) -> CollectionSearch:
219 """Protected implementation hook for `refresh`.
221 This method should be implemented by subclasses to retrieve the chain's
222 child collections from the database and return them. It should never
223 be called by anything other than `refresh`, which should be used by all
224 external code.
226 Parameters
227 ----------
228 manager : `CollectionManager`
229 The object that manages this records instance and all records
230 instances that may appear as its children.
231 """
232 raise NotImplementedError()
235class CollectionManager(ABC):
236 """An interface for managing the collections (including runs) in a
237 `Registry`.
239 Notes
240 -----
241 Each layer in a multi-layer `Registry` has its own record for any
242 collection for which it has datasets (or quanta). Different layers may
243 use different IDs for the same collection, so any usage of the IDs
244 obtained through the `CollectionManager` APIs are strictly for internal
245 (to `Registry`) use.
246 """
248 @classmethod
249 @abstractmethod
250 def initialize(cls, db: Database, context: StaticTablesContext) -> CollectionManager:
251 """Construct an instance of the manager.
253 Parameters
254 ----------
255 db : `Database`
256 Interface to the underlying database engine and namespace.
257 context : `StaticTablesContext`
258 Context object obtained from `Database.declareStaticTables`; used
259 to declare any tables that should always be present in a layer
260 implemented with this manager.
262 Returns
263 -------
264 manager : `CollectionManager`
265 An instance of a concrete `CollectionManager` subclass.
266 """
267 raise NotImplementedError()
269 @classmethod
270 @abstractmethod
271 def addCollectionForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "collection",
272 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec:
273 """Add a foreign key (field and constraint) referencing the collection
274 table.
276 Parameters
277 ----------
278 tableSpec : `ddl.TableSpec`
279 Specification for the table that should reference the collection
280 table. Will be modified in place.
281 prefix: `str`, optional
282 A name to use for the prefix of the new field; the full name may
283 have a suffix (and is given in the returned `ddl.FieldSpec`).
284 onDelete: `str`, optional
285 One of "CASCADE" or "SET NULL", indicating what should happen to
286 the referencing row if the collection row is deleted. `None`
287 indicates that this should be an integrity error.
288 **kwds
289 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
290 constructor (only the ``name`` and ``dtype`` arguments are
291 otherwise provided).
293 Returns
294 -------
295 fieldSpec : `ddl.FieldSpec`
296 Specification for the field being added.
297 """
298 raise NotImplementedError()
300 @classmethod
301 @abstractmethod
302 def addRunForeignKey(cls, tableSpec: ddl.TableSpec, *, prefix: str = "run",
303 onDelete: Optional[str] = None, **kwds: Any) -> ddl.FieldSpec:
304 """Add a foreign key (field and constraint) referencing the run
305 table.
307 Parameters
308 ----------
309 tableSpec : `ddl.TableSpec`
310 Specification for the table that should reference the run table.
311 Will be modified in place.
312 prefix: `str`, optional
313 A name to use for the prefix of the new field; the full name may
314 have a suffix (and is given in the returned `ddl.FieldSpec`).
315 onDelete: `str`, optional
316 One of "CASCADE" or "SET NULL", indicating what should happen to
317 the referencing row if the collection row is deleted. `None`
318 indicates that this should be an integrity error.
319 **kwds
320 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
321 constructor (only the ``name`` and ``dtype`` arguments are
322 otherwise provided).
324 Returns
325 -------
326 fieldSpec : `ddl.FieldSpec`
327 Specification for the field being added.
328 """
329 raise NotImplementedError()
331 @classmethod
332 @abstractmethod
333 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
334 """Return the name of the field added by `addCollectionForeignKey`
335 if called with the same prefix.
337 Parameters
338 ----------
339 prefix : `str`
340 A name to use for the prefix of the new field; the full name may
341 have a suffix.
343 Returns
344 -------
345 name : `str`
346 The field name.
347 """
348 raise NotImplementedError()
350 @classmethod
351 @abstractmethod
352 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
353 """Return the name of the field added by `addRunForeignKey`
354 if called with the same prefix.
356 Parameters
357 ----------
358 prefix : `str`
359 A name to use for the prefix of the new field; the full name may
360 have a suffix.
362 Returns
363 -------
364 name : `str`
365 The field name.
366 """
367 raise NotImplementedError()
369 @abstractmethod
370 def refresh(self):
371 """Ensure all other operations on this manager are aware of any
372 collections that may have been registered by other clients since it
373 was initialized or last refreshed.
374 """
375 raise NotImplementedError()
377 @abstractmethod
378 def register(self, name: str, type: CollectionType) -> CollectionRecord:
379 """Ensure that a collection of the given name and type are present
380 in the layer this manager is associated with.
382 Parameters
383 ----------
384 name : `str`
385 Name of the collection.
386 type : `CollectionType`
387 Enumeration value indicating the type of collection.
389 Returns
390 -------
391 record : `CollectionRecord`
392 Object representing the collection, including its type and ID.
393 If ``type is CollectionType.RUN``, this will be a `RunRecord`
394 instance. If ``type is CollectionType.CHAIN``, this will be a
395 `ChainedCollectionRecord` instance.
397 Raises
398 ------
399 TransactionInterruption
400 Raised if this operation is invoked within a `Database.transaction`
401 context.
402 DatabaseConflictError
403 Raised if a collection with this name but a different type already
404 exists.
406 Notes
407 -----
408 Concurrent registrations of the same collection should be safe; nothing
409 should happen if the types are consistent, and integrity errors due to
410 inconsistent types should happen before any database changes are made.
411 """
412 raise NotImplementedError()
414 @abstractmethod
415 def find(self, name: str) -> CollectionRecord:
416 """Return the collection record associated with the given name.
418 Parameters
419 ----------
420 name : `str`
421 Name of the collection.
423 Returns
424 -------
425 record : `CollectionRecord`
426 Object representing the collection, including its type and ID.
427 If ``record.type is CollectionType.RUN``, this will be a
428 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
429 this will be a `ChainedCollectionRecord` instance.
431 Raises
432 ------
433 MissingCollectionError
434 Raised if the given collection does not exist.
436 Notes
437 -----
438 Collections registered by another client of the same layer since the
439 last call to `initialize` or `refresh` may not be found.
440 """
441 raise NotImplementedError()
443 @abstractmethod
444 def __getitem__(self, key: Any) -> CollectionRecord:
445 """Return the collection record associated with the given
446 primary/foreign key value.
448 Parameters
449 ----------
450 key
451 Internal primary key value for the collection.
453 Returns
454 -------
455 record : `CollectionRecord`
456 Object representing the collection, including its type and name.
457 If ``record.type is CollectionType.RUN``, this will be a
458 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
459 this will be a `ChainedCollectionRecord` instance.
461 Raises
462 ------
463 MissingCollectionError
464 Raised if no collection with this key exists.
466 Notes
467 -----
468 Collections registered by another client of the same layer since the
469 last call to `initialize` or `refresh` may not be found.
470 """
471 raise NotImplementedError()
473 @abstractmethod
474 def __iter__(self) -> Iterator[CollectionRecord]:
475 """Iterate over all collections.
477 Yields
478 ------
479 record : `CollectionRecord`
480 The record for a managed collection.
481 """
482 raise NotImplementedError()