Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py: 84%
93 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-16 10:44 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-16 10:44 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import ddl
31__all__ = [
32 "ChainedCollectionRecord",
33 "CollectionManager",
34 "CollectionRecord",
35 "RunRecord",
36]
38from abc import abstractmethod
39from collections.abc import Iterable, Set
40from typing import TYPE_CHECKING, Any, Generic, TypeVar
42from ..._timespan import Timespan
43from .._collection_type import CollectionType
44from ..wildcards import CollectionWildcard
45from ._versioning import VersionedExtension, VersionTuple
47if TYPE_CHECKING:
48 from .._caching_context import CachingContext
49 from ._database import Database, StaticTablesContext
50 from ._dimensions import DimensionRecordStorageManager
53_Key = TypeVar("_Key")
56class CollectionRecord(Generic[_Key]):
57 """A struct used to represent a collection in internal `Registry` APIs.
59 User-facing code should always just use a `str` to represent collections.
61 Parameters
62 ----------
63 key : _Key
64 Unique collection ID, can be the same as ``name`` if ``name`` is used
65 for identification. Usually this is an integer or string, but can be
66 other database-specific type.
67 name : `str`
68 Name of the collection.
69 type : `CollectionType`
70 Enumeration value describing the type of the collection.
72 Notes
73 -----
74 The `name`, `key`, and `type` attributes set by the base class should be
75 considered immutable by all users and derived classes (as these are used
76 in the definition of equality and this is a hashable type). Other
77 attributes defined by subclasses may be mutable, as long as they do not
78 participate in some subclass equality definition.
79 """
81 def __init__(self, key: _Key, name: str, type: CollectionType):
82 self.key = key
83 self.name = name
84 self.type = type
85 assert isinstance(self.type, CollectionType)
87 name: str
88 """Name of the collection (`str`).
89 """
91 key: _Key
92 """The primary/foreign key value for this collection.
93 """
95 type: CollectionType
96 """Enumeration value describing the type of the collection
97 (`CollectionType`).
98 """
100 def __eq__(self, other: Any) -> bool:
101 try:
102 return self.name == other.name and self.type == other.type and self.key == other.key
103 except AttributeError:
104 return NotImplemented
106 def __hash__(self) -> int:
107 return hash(self.name)
109 def __repr__(self) -> str:
110 return f"CollectionRecord(key={self.key!r}, name={self.name!r}, type={self.type!r})"
112 def __str__(self) -> str:
113 return self.name
116class RunRecord(CollectionRecord[_Key]):
117 """A subclass of `CollectionRecord` that adds execution information and
118 an interface for updating it.
120 Parameters
121 ----------
122 key : `object`
123 Unique collection key.
124 name : `str`
125 Name of the collection.
126 host : `str`, optional
127 Name of the host or system on which this run was produced.
128 timespan : `Timespan`, optional
129 Begin and end timestamps for the period over which the run was
130 produced.
131 """
133 host: str | None
134 """Name of the host or system on which this run was produced (`str` or
135 `None`).
136 """
138 timespan: Timespan
139 """Begin and end timestamps for the period over which the run was produced.
140 None`/``NULL`` values are interpreted as infinite bounds.
141 """
143 def __init__(
144 self,
145 key: _Key,
146 name: str,
147 *,
148 host: str | None = None,
149 timespan: Timespan | None = None,
150 ):
151 super().__init__(key=key, name=name, type=CollectionType.RUN)
152 self.host = host
153 if timespan is None:
154 timespan = Timespan(begin=None, end=None)
155 self.timespan = timespan
157 def __repr__(self) -> str:
158 return f"RunRecord(key={self.key!r}, name={self.name!r})"
161class ChainedCollectionRecord(CollectionRecord[_Key]):
162 """A subclass of `CollectionRecord` that adds the list of child collections
163 in a ``CHAINED`` collection.
165 Parameters
166 ----------
167 key : `object`
168 Unique collection key.
169 name : `str`
170 Name of the collection.
171 children : Iterable[str],
172 Ordered sequence of names of child collections.
173 """
175 children: tuple[str, ...]
176 """The ordered search path of child collections that define this chain
177 (`tuple` [ `str` ]).
178 """
180 def __init__(
181 self,
182 key: Any,
183 name: str,
184 *,
185 children: Iterable[str],
186 ):
187 super().__init__(key=key, name=name, type=CollectionType.CHAINED)
188 self.children = tuple(children)
190 def __repr__(self) -> str:
191 return f"ChainedCollectionRecord(key={self.key!r}, name={self.name!r}, children={self.children!r})"
194class CollectionManager(Generic[_Key], VersionedExtension):
195 """An interface for managing the collections (including runs) in a
196 `Registry`.
198 Parameters
199 ----------
200 registry_schema_version : `VersionTuple` or `None`, optional
201 Version of registry schema.
203 Notes
204 -----
205 Each layer in a multi-layer `Registry` has its own record for any
206 collection for which it has datasets (or quanta). Different layers may
207 use different IDs for the same collection, so any usage of the IDs
208 obtained through the `CollectionManager` APIs are strictly for internal
209 (to `Registry`) use.
210 """
212 def __init__(self, *, registry_schema_version: VersionTuple | None = None) -> None:
213 super().__init__(registry_schema_version=registry_schema_version)
215 @classmethod
216 @abstractmethod
217 def initialize(
218 cls,
219 db: Database,
220 context: StaticTablesContext,
221 *,
222 dimensions: DimensionRecordStorageManager,
223 caching_context: CachingContext,
224 registry_schema_version: VersionTuple | None = None,
225 ) -> CollectionManager:
226 """Construct an instance of the manager.
228 Parameters
229 ----------
230 db : `Database`
231 Interface to the underlying database engine and namespace.
232 context : `StaticTablesContext`
233 Context object obtained from `Database.declareStaticTables`; used
234 to declare any tables that should always be present in a layer
235 implemented with this manager.
236 dimensions : `DimensionRecordStorageManager`
237 Manager object for the dimensions in this `Registry`.
238 caching_context : `CachingContext`
239 Object controlling caching of information returned by managers.
240 registry_schema_version : `VersionTuple` or `None`
241 Schema version of this extension as defined in registry.
243 Returns
244 -------
245 manager : `CollectionManager`
246 An instance of a concrete `CollectionManager` subclass.
247 """
248 raise NotImplementedError()
250 @classmethod
251 @abstractmethod
252 def addCollectionForeignKey(
253 cls,
254 tableSpec: ddl.TableSpec,
255 *,
256 prefix: str = "collection",
257 onDelete: str | None = None,
258 constraint: bool = True,
259 **kwargs: Any,
260 ) -> ddl.FieldSpec:
261 """Add a foreign key (field and constraint) referencing the collection
262 table.
264 Parameters
265 ----------
266 tableSpec : `ddl.TableSpec`
267 Specification for the table that should reference the collection
268 table. Will be modified in place.
269 prefix : `str`, optional
270 A name to use for the prefix of the new field; the full name may
271 have a suffix (and is given in the returned `ddl.FieldSpec`).
272 onDelete : `str`, optional
273 One of "CASCADE" or "SET NULL", indicating what should happen to
274 the referencing row if the collection row is deleted. `None`
275 indicates that this should be an integrity error.
276 constraint : `bool`, optional
277 If `False` (`True` is default), add a field that can be joined to
278 the collection primary key, but do not add a foreign key
279 constraint.
280 **kwargs
281 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
282 constructor (only the ``name`` and ``dtype`` arguments are
283 otherwise provided).
285 Returns
286 -------
287 fieldSpec : `ddl.FieldSpec`
288 Specification for the field being added.
289 """
290 raise NotImplementedError()
292 @classmethod
293 @abstractmethod
294 def addRunForeignKey(
295 cls,
296 tableSpec: ddl.TableSpec,
297 *,
298 prefix: str = "run",
299 onDelete: str | None = None,
300 constraint: bool = True,
301 **kwargs: Any,
302 ) -> ddl.FieldSpec:
303 """Add a foreign key (field and constraint) referencing the run
304 table.
306 Parameters
307 ----------
308 tableSpec : `ddl.TableSpec`
309 Specification for the table that should reference the run table.
310 Will be modified in place.
311 prefix : `str`, optional
312 A name to use for the prefix of the new field; the full name may
313 have a suffix (and is given in the returned `ddl.FieldSpec`).
314 onDelete : `str`, optional
315 One of "CASCADE" or "SET NULL", indicating what should happen to
316 the referencing row if the collection row is deleted. `None`
317 indicates that this should be an integrity error.
318 constraint : `bool`, optional
319 If `False` (`True` is default), add a field that can be joined to
320 the run primary key, but do not add a foreign key constraint.
321 **kwargs
322 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
323 constructor (only the ``name`` and ``dtype`` arguments are
324 otherwise provided).
326 Returns
327 -------
328 fieldSpec : `ddl.FieldSpec`
329 Specification for the field being added.
330 """
331 raise NotImplementedError()
333 @classmethod
334 @abstractmethod
335 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
336 """Return the name of the field added by `addCollectionForeignKey`
337 if called with the same prefix.
339 Parameters
340 ----------
341 prefix : `str`
342 A name to use for the prefix of the new field; the full name may
343 have a suffix.
345 Returns
346 -------
347 name : `str`
348 The field name.
349 """
350 raise NotImplementedError()
352 @classmethod
353 @abstractmethod
354 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
355 """Return the name of the field added by `addRunForeignKey`
356 if called with the same prefix.
358 Parameters
359 ----------
360 prefix : `str`
361 A name to use for the prefix of the new field; the full name may
362 have a suffix.
364 Returns
365 -------
366 name : `str`
367 The field name.
368 """
369 raise NotImplementedError()
371 @abstractmethod
372 def refresh(self) -> None:
373 """Ensure all other operations on this manager are aware of any
374 collections that may have been registered by other clients since it
375 was initialized or last refreshed.
376 """
377 raise NotImplementedError()
379 @abstractmethod
380 def register(
381 self, name: str, type: CollectionType, doc: str | None = None
382 ) -> tuple[CollectionRecord[_Key], bool]:
383 """Ensure that a collection of the given name and type are present
384 in the layer this manager is associated with.
386 Parameters
387 ----------
388 name : `str`
389 Name of the collection.
390 type : `CollectionType`
391 Enumeration value indicating the type of collection.
392 doc : `str`, optional
393 Documentation string for the collection. Ignored if the collection
394 already exists.
396 Returns
397 -------
398 record : `CollectionRecord`
399 Object representing the collection, including its type and ID.
400 If ``type is CollectionType.RUN``, this will be a `RunRecord`
401 instance. If ``type is CollectionType.CHAIN``, this will be a
402 `ChainedCollectionRecord` instance.
403 registered : `bool`
404 True if the collection was registered, `False` if it already
405 existed.
407 Raises
408 ------
409 TransactionInterruption
410 Raised if this operation is invoked within a `Database.transaction`
411 context.
412 DatabaseConflictError
413 Raised if a collection with this name but a different type already
414 exists.
416 Notes
417 -----
418 Concurrent registrations of the same collection should be safe; nothing
419 should happen if the types are consistent, and integrity errors due to
420 inconsistent types should happen before any database changes are made.
421 """
422 raise NotImplementedError()
424 @abstractmethod
425 def remove(self, name: str) -> None:
426 """Completely remove a collection.
428 Any existing `CollectionRecord` objects that correspond to the removed
429 collection are considered invalidated.
431 Parameters
432 ----------
433 name : `str`
434 Name of the collection to remove.
436 Notes
437 -----
438 If this collection is referenced by foreign keys in tables managed by
439 other objects, the ON DELETE clauses of those tables will be invoked.
440 That will frequently delete many dependent rows automatically (via
441 "CASCADE", but it may also cause this operation to fail (with rollback)
442 unless dependent rows that do not have an ON DELETE clause are removed
443 first.
444 """
445 raise NotImplementedError()
447 @abstractmethod
448 def find(self, name: str) -> CollectionRecord[_Key]:
449 """Return the collection record associated with the given name.
451 Parameters
452 ----------
453 name : `str`
454 Name of the collection.
456 Returns
457 -------
458 record : `CollectionRecord`
459 Object representing the collection, including its type and ID.
460 If ``record.type is CollectionType.RUN``, this will be a
461 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
462 this will be a `ChainedCollectionRecord` instance.
464 Raises
465 ------
466 MissingCollectionError
467 Raised if the given collection does not exist.
469 Notes
470 -----
471 Collections registered by another client of the same layer since the
472 last call to `initialize` or `refresh` may not be found.
473 """
474 raise NotImplementedError()
476 @abstractmethod
477 def __getitem__(self, key: Any) -> CollectionRecord[_Key]:
478 """Return the collection record associated with the given
479 primary/foreign key value.
481 Parameters
482 ----------
483 key : `typing.Any`
484 Internal primary key value for the collection.
486 Returns
487 -------
488 record : `CollectionRecord`
489 Object representing the collection, including its type and name.
490 If ``record.type is CollectionType.RUN``, this will be a
491 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
492 this will be a `ChainedCollectionRecord` instance.
494 Raises
495 ------
496 MissingCollectionError
497 Raised if no collection with this key exists.
499 Notes
500 -----
501 Collections registered by another client of the same layer since the
502 last call to `initialize` or `refresh` may not be found.
503 """
504 raise NotImplementedError()
506 @abstractmethod
507 def resolve_wildcard(
508 self,
509 wildcard: CollectionWildcard,
510 *,
511 collection_types: Set[CollectionType] = CollectionType.all(),
512 done: set[str] | None = None,
513 flatten_chains: bool = True,
514 include_chains: bool | None = None,
515 ) -> list[CollectionRecord[_Key]]:
516 """Iterate over collection records that match a wildcard.
518 Parameters
519 ----------
520 wildcard : `CollectionWildcard`
521 Names and/or patterns for collections.
522 collection_types : `collections.abc.Set` [ `CollectionType` ], optional
523 If provided, only yield collections of these types.
524 done : `set` [ `str` ], optional
525 A `set` of collection names that will not be returned (presumably
526 because they have already been returned in some higher-level logic)
527 that will also be updated with the names of the collections
528 returned.
529 flatten_chains : `bool`, optional
530 If `True` (default) recursively yield the child collections of
531 `~CollectionType.CHAINED` collections.
532 include_chains : `bool`, optional
533 If `True`, return records for `~CollectionType.CHAINED`
534 collections themselves. The default is the opposite of
535 ``flatten_chains``: either return records for CHAINED collections
536 or their children, but not both.
538 Returns
539 -------
540 records : `list` [ `CollectionRecord` ]
541 Matching collection records.
542 """
543 raise NotImplementedError()
545 @abstractmethod
546 def getDocumentation(self, key: _Key) -> str | None:
547 """Retrieve the documentation string for a collection.
549 Parameters
550 ----------
551 key : _Key
552 Internal primary key value for the collection.
554 Returns
555 -------
556 docs : `str` or `None`
557 Docstring for the collection with the given key.
558 """
559 raise NotImplementedError()
561 @abstractmethod
562 def setDocumentation(self, key: _Key, doc: str | None) -> None:
563 """Set the documentation string for a collection.
565 Parameters
566 ----------
567 key : _Key
568 Internal primary key value for the collection.
569 doc : `str`, optional
570 Docstring for the collection with the given key.
571 """
572 raise NotImplementedError()
574 @abstractmethod
575 def getParentChains(self, key: _Key) -> set[str]:
576 """Find all CHAINED collection names that directly contain the given
577 collection.
579 Parameters
580 ----------
581 key : _Key
582 Internal primary key value for the collection.
584 Returns
585 -------
586 names : `set` [`str`]
587 Parent collection names.
588 """
589 raise NotImplementedError()
591 @abstractmethod
592 def update_chain(
593 self, record: ChainedCollectionRecord[_Key], children: Iterable[str], flatten: bool = False
594 ) -> ChainedCollectionRecord[_Key]:
595 """Update chained collection composition.
597 Parameters
598 ----------
599 record : `ChainedCollectionRecord`
600 Chained collection record.
601 children : `~collections.abc.Iterable` [`str`]
602 Ordered names of children collections.
603 flatten : `bool`, optional
604 If `True`, recursively flatten out any nested
605 `~CollectionType.CHAINED` collections in ``children`` first.
606 """
607 raise NotImplementedError()