Coverage for python/lsst/daf/butler/registry/interfaces/_collections.py: 84%
93 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-06 10:53 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-06 10:53 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import ddl
31__all__ = [
32 "ChainedCollectionRecord",
33 "CollectionManager",
34 "CollectionRecord",
35 "RunRecord",
36]
38from abc import abstractmethod
39from collections.abc import Iterable, Set
40from typing import TYPE_CHECKING, Any, Generic, TypeVar
42from ..._timespan import Timespan
43from .._collection_type import CollectionType
44from ..wildcards import CollectionWildcard
45from ._versioning import VersionedExtension, VersionTuple
47if TYPE_CHECKING:
48 from .._caching_context import CachingContext
49 from ._database import Database, StaticTablesContext
50 from ._dimensions import DimensionRecordStorageManager
53_Key = TypeVar("_Key")
56class CollectionRecord(Generic[_Key]):
57 """A struct used to represent a collection in internal `Registry` APIs.
59 User-facing code should always just use a `str` to represent collections.
61 Parameters
62 ----------
63 key
64 Unique collection ID, can be the same as ``name`` if ``name`` is used
65 for identification. Usually this is an integer or string, but can be
66 other database-specific type.
67 name : `str`
68 Name of the collection.
69 type : `CollectionType`
70 Enumeration value describing the type of the collection.
72 Notes
73 -----
74 The `name`, `key`, and `type` attributes set by the base class should be
75 considered immutable by all users and derived classes (as these are used
76 in the definition of equality and this is a hashable type). Other
77 attributes defined by subclasses may be mutable, as long as they do not
78 participate in some subclass equality definition.
79 """
81 def __init__(self, key: _Key, name: str, type: CollectionType):
82 self.key = key
83 self.name = name
84 self.type = type
85 assert isinstance(self.type, CollectionType)
87 name: str
88 """Name of the collection (`str`).
89 """
91 key: _Key
92 """The primary/foreign key value for this collection.
93 """
95 type: CollectionType
96 """Enumeration value describing the type of the collection
97 (`CollectionType`).
98 """
100 def __eq__(self, other: Any) -> bool:
101 try:
102 return self.name == other.name and self.type == other.type and self.key == other.key
103 except AttributeError:
104 return NotImplemented
106 def __hash__(self) -> int:
107 return hash(self.name)
109 def __repr__(self) -> str:
110 return f"CollectionRecord(key={self.key!r}, name={self.name!r}, type={self.type!r})"
112 def __str__(self) -> str:
113 return self.name
116class RunRecord(CollectionRecord[_Key]):
117 """A subclass of `CollectionRecord` that adds execution information and
118 an interface for updating it.
120 Parameters
121 ----------
122 key: `object`
123 Unique collection key.
124 name : `str`
125 Name of the collection.
126 host : `str`, optional
127 Name of the host or system on which this run was produced.
128 timespan: `Timespan`, optional
129 Begin and end timestamps for the period over which the run was
130 produced.
131 """
133 host: str | None
134 """Name of the host or system on which this run was produced (`str` or
135 `None`).
136 """
138 timespan: Timespan
139 """Begin and end timestamps for the period over which the run was produced.
140 None`/``NULL`` values are interpreted as infinite bounds.
141 """
143 def __init__(
144 self,
145 key: _Key,
146 name: str,
147 *,
148 host: str | None = None,
149 timespan: Timespan | None = None,
150 ):
151 super().__init__(key=key, name=name, type=CollectionType.RUN)
152 self.host = host
153 if timespan is None:
154 timespan = Timespan(begin=None, end=None)
155 self.timespan = timespan
157 def __repr__(self) -> str:
158 return f"RunRecord(key={self.key!r}, name={self.name!r})"
161class ChainedCollectionRecord(CollectionRecord[_Key]):
162 """A subclass of `CollectionRecord` that adds the list of child collections
163 in a ``CHAINED`` collection.
165 Parameters
166 ----------
167 key: `object`
168 Unique collection key.
169 name : `str`
170 Name of the collection.
171 children: Iterable[str],
172 Ordered sequence of names of child collections.
173 """
175 children: tuple[str, ...]
176 """The ordered search path of child collections that define this chain
177 (`tuple` [ `str` ]).
178 """
180 def __init__(
181 self,
182 key: Any,
183 name: str,
184 *,
185 children: Iterable[str],
186 ):
187 super().__init__(key=key, name=name, type=CollectionType.CHAINED)
188 self.children = tuple(children)
190 def __repr__(self) -> str:
191 return f"ChainedCollectionRecord(key={self.key!r}, name={self.name!r}, children={self.children!r})"
194class CollectionManager(Generic[_Key], VersionedExtension):
195 """An interface for managing the collections (including runs) in a
196 `Registry`.
198 Notes
199 -----
200 Each layer in a multi-layer `Registry` has its own record for any
201 collection for which it has datasets (or quanta). Different layers may
202 use different IDs for the same collection, so any usage of the IDs
203 obtained through the `CollectionManager` APIs are strictly for internal
204 (to `Registry`) use.
205 """
207 def __init__(self, *, registry_schema_version: VersionTuple | None = None) -> None:
208 super().__init__(registry_schema_version=registry_schema_version)
210 @classmethod
211 @abstractmethod
212 def initialize(
213 cls,
214 db: Database,
215 context: StaticTablesContext,
216 *,
217 dimensions: DimensionRecordStorageManager,
218 caching_context: CachingContext,
219 registry_schema_version: VersionTuple | None = None,
220 ) -> CollectionManager:
221 """Construct an instance of the manager.
223 Parameters
224 ----------
225 db : `Database`
226 Interface to the underlying database engine and namespace.
227 context : `StaticTablesContext`
228 Context object obtained from `Database.declareStaticTables`; used
229 to declare any tables that should always be present in a layer
230 implemented with this manager.
231 dimensions : `DimensionRecordStorageManager`
232 Manager object for the dimensions in this `Registry`.
233 caching_context : `CachingContext`
234 Object controlling caching of information returned by managers.
235 registry_schema_version : `VersionTuple` or `None`
236 Schema version of this extension as defined in registry.
238 Returns
239 -------
240 manager : `CollectionManager`
241 An instance of a concrete `CollectionManager` subclass.
242 """
243 raise NotImplementedError()
245 @classmethod
246 @abstractmethod
247 def addCollectionForeignKey(
248 cls,
249 tableSpec: ddl.TableSpec,
250 *,
251 prefix: str = "collection",
252 onDelete: str | None = None,
253 constraint: bool = True,
254 **kwargs: Any,
255 ) -> ddl.FieldSpec:
256 """Add a foreign key (field and constraint) referencing the collection
257 table.
259 Parameters
260 ----------
261 tableSpec : `ddl.TableSpec`
262 Specification for the table that should reference the collection
263 table. Will be modified in place.
264 prefix: `str`, optional
265 A name to use for the prefix of the new field; the full name may
266 have a suffix (and is given in the returned `ddl.FieldSpec`).
267 onDelete: `str`, optional
268 One of "CASCADE" or "SET NULL", indicating what should happen to
269 the referencing row if the collection row is deleted. `None`
270 indicates that this should be an integrity error.
271 constraint: `bool`, optional
272 If `False` (`True` is default), add a field that can be joined to
273 the collection primary key, but do not add a foreign key
274 constraint.
275 **kwargs
276 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
277 constructor (only the ``name`` and ``dtype`` arguments are
278 otherwise provided).
280 Returns
281 -------
282 fieldSpec : `ddl.FieldSpec`
283 Specification for the field being added.
284 """
285 raise NotImplementedError()
287 @classmethod
288 @abstractmethod
289 def addRunForeignKey(
290 cls,
291 tableSpec: ddl.TableSpec,
292 *,
293 prefix: str = "run",
294 onDelete: str | None = None,
295 constraint: bool = True,
296 **kwargs: Any,
297 ) -> ddl.FieldSpec:
298 """Add a foreign key (field and constraint) referencing the run
299 table.
301 Parameters
302 ----------
303 tableSpec : `ddl.TableSpec`
304 Specification for the table that should reference the run table.
305 Will be modified in place.
306 prefix: `str`, optional
307 A name to use for the prefix of the new field; the full name may
308 have a suffix (and is given in the returned `ddl.FieldSpec`).
309 onDelete: `str`, optional
310 One of "CASCADE" or "SET NULL", indicating what should happen to
311 the referencing row if the collection row is deleted. `None`
312 indicates that this should be an integrity error.
313 constraint: `bool`, optional
314 If `False` (`True` is default), add a field that can be joined to
315 the run primary key, but do not add a foreign key constraint.
316 **kwargs
317 Additional keyword arguments are forwarded to the `ddl.FieldSpec`
318 constructor (only the ``name`` and ``dtype`` arguments are
319 otherwise provided).
321 Returns
322 -------
323 fieldSpec : `ddl.FieldSpec`
324 Specification for the field being added.
325 """
326 raise NotImplementedError()
328 @classmethod
329 @abstractmethod
330 def getCollectionForeignKeyName(cls, prefix: str = "collection") -> str:
331 """Return the name of the field added by `addCollectionForeignKey`
332 if called with the same prefix.
334 Parameters
335 ----------
336 prefix : `str`
337 A name to use for the prefix of the new field; the full name may
338 have a suffix.
340 Returns
341 -------
342 name : `str`
343 The field name.
344 """
345 raise NotImplementedError()
347 @classmethod
348 @abstractmethod
349 def getRunForeignKeyName(cls, prefix: str = "run") -> str:
350 """Return the name of the field added by `addRunForeignKey`
351 if called with the same prefix.
353 Parameters
354 ----------
355 prefix : `str`
356 A name to use for the prefix of the new field; the full name may
357 have a suffix.
359 Returns
360 -------
361 name : `str`
362 The field name.
363 """
364 raise NotImplementedError()
366 @abstractmethod
367 def refresh(self) -> None:
368 """Ensure all other operations on this manager are aware of any
369 collections that may have been registered by other clients since it
370 was initialized or last refreshed.
371 """
372 raise NotImplementedError()
374 @abstractmethod
375 def register(
376 self, name: str, type: CollectionType, doc: str | None = None
377 ) -> tuple[CollectionRecord[_Key], bool]:
378 """Ensure that a collection of the given name and type are present
379 in the layer this manager is associated with.
381 Parameters
382 ----------
383 name : `str`
384 Name of the collection.
385 type : `CollectionType`
386 Enumeration value indicating the type of collection.
387 doc : `str`, optional
388 Documentation string for the collection. Ignored if the collection
389 already exists.
391 Returns
392 -------
393 record : `CollectionRecord`
394 Object representing the collection, including its type and ID.
395 If ``type is CollectionType.RUN``, this will be a `RunRecord`
396 instance. If ``type is CollectionType.CHAIN``, this will be a
397 `ChainedCollectionRecord` instance.
398 registered : `bool`
399 True if the collection was registered, `False` if it already
400 existed.
402 Raises
403 ------
404 TransactionInterruption
405 Raised if this operation is invoked within a `Database.transaction`
406 context.
407 DatabaseConflictError
408 Raised if a collection with this name but a different type already
409 exists.
411 Notes
412 -----
413 Concurrent registrations of the same collection should be safe; nothing
414 should happen if the types are consistent, and integrity errors due to
415 inconsistent types should happen before any database changes are made.
416 """
417 raise NotImplementedError()
419 @abstractmethod
420 def remove(self, name: str) -> None:
421 """Completely remove a collection.
423 Any existing `CollectionRecord` objects that correspond to the removed
424 collection are considered invalidated.
426 Parameters
427 ----------
428 name : `str`
429 Name of the collection to remove.
431 Notes
432 -----
433 If this collection is referenced by foreign keys in tables managed by
434 other objects, the ON DELETE clauses of those tables will be invoked.
435 That will frequently delete many dependent rows automatically (via
436 "CASCADE", but it may also cause this operation to fail (with rollback)
437 unless dependent rows that do not have an ON DELETE clause are removed
438 first.
439 """
440 raise NotImplementedError()
442 @abstractmethod
443 def find(self, name: str) -> CollectionRecord[_Key]:
444 """Return the collection record associated with the given name.
446 Parameters
447 ----------
448 name : `str`
449 Name of the collection.
451 Returns
452 -------
453 record : `CollectionRecord`
454 Object representing the collection, including its type and ID.
455 If ``record.type is CollectionType.RUN``, this will be a
456 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
457 this will be a `ChainedCollectionRecord` instance.
459 Raises
460 ------
461 MissingCollectionError
462 Raised if the given collection does not exist.
464 Notes
465 -----
466 Collections registered by another client of the same layer since the
467 last call to `initialize` or `refresh` may not be found.
468 """
469 raise NotImplementedError()
471 @abstractmethod
472 def __getitem__(self, key: Any) -> CollectionRecord[_Key]:
473 """Return the collection record associated with the given
474 primary/foreign key value.
476 Parameters
477 ----------
478 key
479 Internal primary key value for the collection.
481 Returns
482 -------
483 record : `CollectionRecord`
484 Object representing the collection, including its type and name.
485 If ``record.type is CollectionType.RUN``, this will be a
486 `RunRecord` instance. If ``record.type is CollectionType.CHAIN``,
487 this will be a `ChainedCollectionRecord` instance.
489 Raises
490 ------
491 MissingCollectionError
492 Raised if no collection with this key exists.
494 Notes
495 -----
496 Collections registered by another client of the same layer since the
497 last call to `initialize` or `refresh` may not be found.
498 """
499 raise NotImplementedError()
501 @abstractmethod
502 def resolve_wildcard(
503 self,
504 wildcard: CollectionWildcard,
505 *,
506 collection_types: Set[CollectionType] = CollectionType.all(),
507 done: set[str] | None = None,
508 flatten_chains: bool = True,
509 include_chains: bool | None = None,
510 ) -> list[CollectionRecord[_Key]]:
511 """Iterate over collection records that match a wildcard.
513 Parameters
514 ----------
515 wildcard : `CollectionWildcard`
516 Names and/or patterns for collections.
517 collection_types : `collections.abc.Set` [ `CollectionType` ], optional
518 If provided, only yield collections of these types.
519 done : `set` [ `str` ], optional
520 A `set` of collection names that will not be returned (presumably
521 because they have already been returned in some higher-level logic)
522 that will also be updated with the names of the collections
523 returned.
524 flatten_chains : `bool`, optional
525 If `True` (default) recursively yield the child collections of
526 `~CollectionType.CHAINED` collections.
527 include_chains : `bool`, optional
528 If `True`, return records for `~CollectionType.CHAINED`
529 collections themselves. The default is the opposite of
530 ``flatten_chains``: either return records for CHAINED collections
531 or their children, but not both.
533 Returns
534 -------
535 records : `list` [ `CollectionRecord` ]
536 Matching collection records.
537 """
538 raise NotImplementedError()
540 @abstractmethod
541 def getDocumentation(self, key: _Key) -> str | None:
542 """Retrieve the documentation string for a collection.
544 Parameters
545 ----------
546 key
547 Internal primary key value for the collection.
549 Returns
550 -------
551 docs : `str` or `None`
552 Docstring for the collection with the given key.
553 """
554 raise NotImplementedError()
556 @abstractmethod
557 def setDocumentation(self, key: _Key, doc: str | None) -> None:
558 """Set the documentation string for a collection.
560 Parameters
561 ----------
562 key
563 Internal primary key value for the collection.
564 docs : `str`, optional
565 Docstring for the collection with the given key.
566 """
567 raise NotImplementedError()
569 @abstractmethod
570 def getParentChains(self, key: _Key) -> set[str]:
571 """Find all CHAINED collection names that directly contain the given
572 collection.
574 Parameters
575 ----------
576 key
577 Internal primary key value for the collection.
579 Returns
580 -------
581 names : `set` [`str`]
582 Parent collection names.
583 """
584 raise NotImplementedError()
586 @abstractmethod
587 def update_chain(
588 self, record: ChainedCollectionRecord[_Key], children: Iterable[str], flatten: bool = False
589 ) -> ChainedCollectionRecord[_Key]:
590 """Update chained collection composition.
592 Parameters
593 ----------
594 record : `ChainedCollectionRecord`
595 Chained collection record.
596 children : `~collections.abc.Iterable` [`str`]
597 Ordered names of children collections.
598 flatten : `bool`, optional
599 If `True`, recursively flatten out any nested
600 `~CollectionType.CHAINED` collections in ``children`` first.
601 """
602 raise NotImplementedError()