Coverage for python/lsst/daf/butler/registry/sql_registry.py: 17%
585 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-01 11:20 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-01 11:20 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30from .. import ddl
32__all__ = ("SqlRegistry",)
34import contextlib
35import logging
36import warnings
37from collections.abc import Iterable, Iterator, Mapping, Sequence
38from typing import TYPE_CHECKING, Any, Literal, cast
40import sqlalchemy
41from lsst.daf.relation import LeafRelation, Relation
42from lsst.resources import ResourcePathExpression
43from lsst.utils.introspection import find_outside_stacklevel
44from lsst.utils.iteration import ensure_iterable
46from .._column_tags import DatasetColumnTag
47from .._config import Config
48from .._dataset_association import DatasetAssociation
49from .._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef
50from .._dataset_type import DatasetType
51from .._named import NamedKeyMapping, NameLookupMapping
52from .._storage_class import StorageClassFactory
53from .._timespan import Timespan
54from ..dimensions import (
55 DataCoordinate,
56 DataId,
57 Dimension,
58 DimensionConfig,
59 DimensionElement,
60 DimensionGraph,
61 DimensionGroup,
62 DimensionRecord,
63 DimensionUniverse,
64)
65from ..dimensions.record_cache import DimensionRecordCache
66from ..progress import Progress
67from ..registry import (
68 ArgumentError,
69 CollectionExpressionError,
70 CollectionSummary,
71 CollectionType,
72 CollectionTypeError,
73 ConflictingDefinitionError,
74 DataIdValueError,
75 DatasetTypeError,
76 DimensionNameError,
77 InconsistentDataIdError,
78 MissingDatasetTypeError,
79 NoDefaultCollectionError,
80 OrphanedRecordError,
81 RegistryConfig,
82 RegistryConsistencyError,
83 RegistryDefaults,
84 queries,
85)
86from ..registry.interfaces import ChainedCollectionRecord, ReadOnlyDatabaseError, RunRecord
87from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes
88from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard
89from ..utils import _DefaultMarker, _Marker, transactional
91if TYPE_CHECKING:
92 from .._butler_config import ButlerConfig
93 from ..datastore._datastore import DatastoreOpaqueTable
94 from ..datastore.stored_file_info import StoredDatastoreItemInfo
95 from ..registry._registry import CollectionArgType
96 from ..registry.interfaces import (
97 CollectionRecord,
98 Database,
99 DatastoreRegistryBridgeManager,
100 ObsCoreTableManager,
101 )
104_LOG = logging.getLogger(__name__)
107class SqlRegistry:
108 """Butler Registry implementation that uses SQL database as backend.
110 Parameters
111 ----------
112 database : `Database`
113 Database instance to store Registry.
114 defaults : `RegistryDefaults`
115 Default collection search path and/or output `~CollectionType.RUN`
116 collection.
117 managers : `RegistryManagerInstances`
118 All the managers required for this registry.
119 """
121 defaultConfigFile: str | None = None
122 """Path to configuration defaults. Accessed within the ``configs`` resource
123 or relative to a search path. Can be None if no defaults specified.
124 """
126 @classmethod
127 def forceRegistryConfig(
128 cls, config: ButlerConfig | RegistryConfig | Config | str | None
129 ) -> RegistryConfig:
130 """Force the supplied config to a `RegistryConfig`.
132 Parameters
133 ----------
134 config : `RegistryConfig`, `Config` or `str` or `None`
135 Registry configuration, if missing then default configuration will
136 be loaded from registry.yaml.
138 Returns
139 -------
140 registry_config : `RegistryConfig`
141 A registry config.
142 """
143 if not isinstance(config, RegistryConfig):
144 if isinstance(config, str | Config) or config is None:
145 config = RegistryConfig(config)
146 else:
147 raise ValueError(f"Incompatible Registry configuration: {config}")
148 return config
150 @classmethod
151 def createFromConfig(
152 cls,
153 config: RegistryConfig | str | None = None,
154 dimensionConfig: DimensionConfig | str | None = None,
155 butlerRoot: ResourcePathExpression | None = None,
156 ) -> SqlRegistry:
157 """Create registry database and return `SqlRegistry` instance.
159 This method initializes database contents, database must be empty
160 prior to calling this method.
162 Parameters
163 ----------
164 config : `RegistryConfig` or `str`, optional
165 Registry configuration, if missing then default configuration will
166 be loaded from registry.yaml.
167 dimensionConfig : `DimensionConfig` or `str`, optional
168 Dimensions configuration, if missing then default configuration
169 will be loaded from dimensions.yaml.
170 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
171 Path to the repository root this `SqlRegistry` will manage.
173 Returns
174 -------
175 registry : `SqlRegistry`
176 A new `SqlRegistry` instance.
177 """
178 config = cls.forceRegistryConfig(config)
179 config.replaceRoot(butlerRoot)
181 if isinstance(dimensionConfig, str):
182 dimensionConfig = DimensionConfig(dimensionConfig)
183 elif dimensionConfig is None:
184 dimensionConfig = DimensionConfig()
185 elif not isinstance(dimensionConfig, DimensionConfig):
186 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}")
188 DatabaseClass = config.getDatabaseClass()
189 database = DatabaseClass.fromUri(
190 config.connectionString, origin=config.get("origin", 0), namespace=config.get("namespace")
191 )
192 managerTypes = RegistryManagerTypes.fromConfig(config)
193 managers = managerTypes.makeRepo(database, dimensionConfig)
194 return cls(database, RegistryDefaults(), managers)
196 @classmethod
197 def fromConfig(
198 cls,
199 config: ButlerConfig | RegistryConfig | Config | str,
200 butlerRoot: ResourcePathExpression | None = None,
201 writeable: bool = True,
202 defaults: RegistryDefaults | None = None,
203 ) -> SqlRegistry:
204 """Create `Registry` subclass instance from `config`.
206 Registry database must be initialized prior to calling this method.
208 Parameters
209 ----------
210 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
211 Registry configuration.
212 butlerRoot : `lsst.resources.ResourcePathExpression`, optional
213 Path to the repository root this `Registry` will manage.
214 writeable : `bool`, optional
215 If `True` (default) create a read-write connection to the database.
216 defaults : `RegistryDefaults`, optional
217 Default collection search path and/or output `~CollectionType.RUN`
218 collection.
220 Returns
221 -------
222 registry : `SqlRegistry`
223 A new `SqlRegistry` subclass instance.
224 """
225 config = cls.forceRegistryConfig(config)
226 config.replaceRoot(butlerRoot)
227 DatabaseClass = config.getDatabaseClass()
228 database = DatabaseClass.fromUri(
229 config.connectionString,
230 origin=config.get("origin", 0),
231 namespace=config.get("namespace"),
232 writeable=writeable,
233 )
234 managerTypes = RegistryManagerTypes.fromConfig(config)
235 with database.session():
236 managers = managerTypes.loadRepo(database)
237 if defaults is None:
238 defaults = RegistryDefaults()
239 return cls(database, defaults, managers)
241 def __init__(
242 self,
243 database: Database,
244 defaults: RegistryDefaults,
245 managers: RegistryManagerInstances,
246 ):
247 self._db = database
248 self._managers = managers
249 self.storageClasses = StorageClassFactory()
250 # This is public to SqlRegistry's internal-to-daf_butler callers, but
251 # it is intentionally not part of RegistryShim.
252 self.dimension_record_cache = DimensionRecordCache(
253 self._managers.dimensions.universe,
254 fetch=self._managers.dimensions.fetch_cache_dict,
255 )
256 # Intentionally invoke property setter to initialize defaults. This
257 # can only be done after most of the rest of Registry has already been
258 # initialized, and must be done before the property getter is used.
259 self.defaults = defaults
260 # TODO: This is currently initialized by `make_datastore_tables`,
261 # eventually we'll need to do it during construction.
262 # The mapping is indexed by the opaque table name.
263 self._datastore_record_classes: Mapping[str, type[StoredDatastoreItemInfo]] = {}
265 def __str__(self) -> str:
266 return str(self._db)
268 def __repr__(self) -> str:
269 return f"SqlRegistry({self._db!r}, {self.dimensions!r})"
271 def isWriteable(self) -> bool:
272 """Return `True` if this registry allows write operations, and `False`
273 otherwise.
274 """
275 return self._db.isWriteable()
277 def copy(self, defaults: RegistryDefaults | None = None) -> SqlRegistry:
278 """Create a new `SqlRegistry` backed by the same data repository
279 as this one and sharing a database connection pool with it, but with
280 independent defaults and database sessions.
282 Parameters
283 ----------
284 defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional
285 Default collections and data ID values for the new registry. If
286 not provided, ``self.defaults`` will be used (but future changes
287 to either registry's defaults will not affect the other).
289 Returns
290 -------
291 copy : `SqlRegistry`
292 A new `SqlRegistry` instance with its own defaults.
293 """
294 if defaults is None:
295 # No need to copy, because `RegistryDefaults` is immutable; we
296 # effectively copy on write.
297 defaults = self.defaults
298 db = self._db.clone()
299 result = SqlRegistry(db, defaults, self._managers.clone(db))
300 result._datastore_record_classes = dict(self._datastore_record_classes)
301 result.dimension_record_cache.load_from(self.dimension_record_cache)
302 return result
304 @property
305 def dimensions(self) -> DimensionUniverse:
306 """Definitions of all dimensions recognized by this `Registry`
307 (`DimensionUniverse`).
308 """
309 return self._managers.dimensions.universe
311 @property
312 def defaults(self) -> RegistryDefaults:
313 """Default collection search path and/or output `~CollectionType.RUN`
314 collection (`~lsst.daf.butler.registry.RegistryDefaults`).
316 This is an immutable struct whose components may not be set
317 individually, but the entire struct can be set by assigning to this
318 property.
319 """
320 return self._defaults
322 @defaults.setter
323 def defaults(self, value: RegistryDefaults) -> None:
324 if value.run is not None:
325 self.registerRun(value.run)
326 value.finish(self)
327 self._defaults = value
329 def refresh(self) -> None:
330 """Refresh all in-memory state by querying the database.
332 This may be necessary to enable querying for entities added by other
333 registry instances after this one was constructed.
334 """
335 self.dimension_record_cache.reset()
336 with self._db.transaction():
337 self._managers.refresh()
339 def caching_context(self) -> contextlib.AbstractContextManager[None]:
340 """Return context manager that enables caching.
342 Returns
343 -------
344 manager
345 A context manager that enables client-side caching. Entering
346 the context returns `None`.
347 """
348 return self._managers.caching_context_manager()
350 @contextlib.contextmanager
351 def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
352 """Return a context manager that represents a transaction.
354 Parameters
355 ----------
356 savepoint : `bool`
357 Whether to issue a SAVEPOINT in the database.
359 Yields
360 ------
361 `None`
362 """
363 with self._db.transaction(savepoint=savepoint):
364 yield
366 def resetConnectionPool(self) -> None:
367 """Reset SQLAlchemy connection pool for `SqlRegistry` database.
369 This operation is useful when using registry with fork-based
370 multiprocessing. To use registry across fork boundary one has to make
371 sure that there are no currently active connections (no session or
372 transaction is in progress) and connection pool is reset using this
373 method. This method should be called by the child process immediately
374 after the fork.
375 """
376 self._db._engine.dispose()
378 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None:
379 """Add an opaque (to the `Registry`) table for use by a `Datastore` or
380 other data repository client.
382 Opaque table records can be added via `insertOpaqueData`, retrieved via
383 `fetchOpaqueData`, and removed via `deleteOpaqueData`.
385 Parameters
386 ----------
387 tableName : `str`
388 Logical name of the opaque table. This may differ from the
389 actual name used in the database by a prefix and/or suffix.
390 spec : `ddl.TableSpec`
391 Specification for the table to be added.
392 """
393 self._managers.opaque.register(tableName, spec)
395 @transactional
396 def insertOpaqueData(self, tableName: str, *data: dict) -> None:
397 """Insert records into an opaque table.
399 Parameters
400 ----------
401 tableName : `str`
402 Logical name of the opaque table. Must match the name used in a
403 previous call to `registerOpaqueTable`.
404 *data
405 Each additional positional argument is a dictionary that represents
406 a single row to be added.
407 """
408 self._managers.opaque[tableName].insert(*data)
410 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]:
411 """Retrieve records from an opaque table.
413 Parameters
414 ----------
415 tableName : `str`
416 Logical name of the opaque table. Must match the name used in a
417 previous call to `registerOpaqueTable`.
418 **where
419 Additional keyword arguments are interpreted as equality
420 constraints that restrict the returned rows (combined with AND);
421 keyword arguments are column names and values are the values they
422 must have.
424 Yields
425 ------
426 row : `dict`
427 A dictionary representing a single result row.
428 """
429 yield from self._managers.opaque[tableName].fetch(**where)
431 @transactional
432 def deleteOpaqueData(self, tableName: str, **where: Any) -> None:
433 """Remove records from an opaque table.
435 Parameters
436 ----------
437 tableName : `str`
438 Logical name of the opaque table. Must match the name used in a
439 previous call to `registerOpaqueTable`.
440 **where
441 Additional keyword arguments are interpreted as equality
442 constraints that restrict the deleted rows (combined with AND);
443 keyword arguments are column names and values are the values they
444 must have.
445 """
446 self._managers.opaque[tableName].delete(where.keys(), where)
448 def registerCollection(
449 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: str | None = None
450 ) -> bool:
451 """Add a new collection if one with the given name does not exist.
453 Parameters
454 ----------
455 name : `str`
456 The name of the collection to create.
457 type : `CollectionType`
458 Enum value indicating the type of collection to create.
459 doc : `str`, optional
460 Documentation string for the collection.
462 Returns
463 -------
464 registered : `bool`
465 Boolean indicating whether the collection was already registered
466 or was created by this call.
468 Notes
469 -----
470 This method cannot be called within transactions, as it needs to be
471 able to perform its own transaction to be concurrent.
472 """
473 _, registered = self._managers.collections.register(name, type, doc=doc)
474 return registered
476 def getCollectionType(self, name: str) -> CollectionType:
477 """Return an enumeration value indicating the type of the given
478 collection.
480 Parameters
481 ----------
482 name : `str`
483 The name of the collection.
485 Returns
486 -------
487 type : `CollectionType`
488 Enum value indicating the type of this collection.
490 Raises
491 ------
492 lsst.daf.butler.registry.MissingCollectionError
493 Raised if no collection with the given name exists.
494 """
495 return self._managers.collections.find(name).type
497 def _get_collection_record(self, name: str) -> CollectionRecord:
498 """Return the record for this collection.
500 Parameters
501 ----------
502 name : `str`
503 Name of the collection for which the record is to be retrieved.
505 Returns
506 -------
507 record : `CollectionRecord`
508 The record for this collection.
509 """
510 return self._managers.collections.find(name)
512 def registerRun(self, name: str, doc: str | None = None) -> bool:
513 """Add a new run if one with the given name does not exist.
515 Parameters
516 ----------
517 name : `str`
518 The name of the run to create.
519 doc : `str`, optional
520 Documentation string for the collection.
522 Returns
523 -------
524 registered : `bool`
525 Boolean indicating whether a new run was registered. `False`
526 if it already existed.
528 Notes
529 -----
530 This method cannot be called within transactions, as it needs to be
531 able to perform its own transaction to be concurrent.
532 """
533 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc)
534 return registered
536 @transactional
537 def removeCollection(self, name: str) -> None:
538 """Remove the given collection from the registry.
540 Parameters
541 ----------
542 name : `str`
543 The name of the collection to remove.
545 Raises
546 ------
547 lsst.daf.butler.registry.MissingCollectionError
548 Raised if no collection with the given name exists.
549 sqlalchemy.exc.IntegrityError
550 Raised if the database rows associated with the collection are
551 still referenced by some other table, such as a dataset in a
552 datastore (for `~CollectionType.RUN` collections only) or a
553 `~CollectionType.CHAINED` collection of which this collection is
554 a child.
556 Notes
557 -----
558 If this is a `~CollectionType.RUN` collection, all datasets and quanta
559 in it will removed from the `Registry` database. This requires that
560 those datasets be removed (or at least trashed) from any datastores
561 that hold them first.
563 A collection may not be deleted as long as it is referenced by a
564 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must
565 be deleted or redefined first.
566 """
567 self._managers.collections.remove(name)
569 def getCollectionChain(self, parent: str) -> tuple[str, ...]:
570 """Return the child collections in a `~CollectionType.CHAINED`
571 collection.
573 Parameters
574 ----------
575 parent : `str`
576 Name of the chained collection. Must have already been added via
577 a call to `Registry.registerCollection`.
579 Returns
580 -------
581 children : `~collections.abc.Sequence` [ `str` ]
582 An ordered sequence of collection names that are searched when the
583 given chained collection is searched.
585 Raises
586 ------
587 lsst.daf.butler.registry.MissingCollectionError
588 Raised if ``parent`` does not exist in the `Registry`.
589 lsst.daf.butler.registry.CollectionTypeError
590 Raised if ``parent`` does not correspond to a
591 `~CollectionType.CHAINED` collection.
592 """
593 record = self._managers.collections.find(parent)
594 if record.type is not CollectionType.CHAINED:
595 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
596 assert isinstance(record, ChainedCollectionRecord)
597 return record.children
599 @transactional
600 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None:
601 """Define or redefine a `~CollectionType.CHAINED` collection.
603 Parameters
604 ----------
605 parent : `str`
606 Name of the chained collection. Must have already been added via
607 a call to `Registry.registerCollection`.
608 children : collection expression
609 An expression defining an ordered search of child collections,
610 generally an iterable of `str`; see
611 :ref:`daf_butler_collection_expressions` for more information.
612 flatten : `bool`, optional
613 If `True` (`False` is default), recursively flatten out any nested
614 `~CollectionType.CHAINED` collections in ``children`` first.
616 Raises
617 ------
618 lsst.daf.butler.registry.MissingCollectionError
619 Raised when any of the given collections do not exist in the
620 `Registry`.
621 lsst.daf.butler.registry.CollectionTypeError
622 Raised if ``parent`` does not correspond to a
623 `~CollectionType.CHAINED` collection.
624 ValueError
625 Raised if the given collections contains a cycle.
626 """
627 record = self._managers.collections.find(parent)
628 if record.type is not CollectionType.CHAINED:
629 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
630 assert isinstance(record, ChainedCollectionRecord)
631 children = CollectionWildcard.from_expression(children).require_ordered()
632 if children != record.children or flatten:
633 self._managers.collections.update_chain(record, children, flatten=flatten)
635 def getCollectionParentChains(self, collection: str) -> set[str]:
636 """Return the CHAINED collections that directly contain the given one.
638 Parameters
639 ----------
640 collection : `str`
641 Name of the collection.
643 Returns
644 -------
645 chains : `set` of `str`
646 Set of `~CollectionType.CHAINED` collection names.
647 """
648 return self._managers.collections.getParentChains(self._managers.collections.find(collection).key)
650 def getCollectionDocumentation(self, collection: str) -> str | None:
651 """Retrieve the documentation string for a collection.
653 Parameters
654 ----------
655 collection : `str`
656 Name of the collection.
658 Returns
659 -------
660 docs : `str` or `None`
661 Docstring for the collection with the given name.
662 """
663 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key)
665 def setCollectionDocumentation(self, collection: str, doc: str | None) -> None:
666 """Set the documentation string for a collection.
668 Parameters
669 ----------
670 collection : `str`
671 Name of the collection.
672 doc : `str` or `None`
673 Docstring for the collection with the given name; will replace any
674 existing docstring. Passing `None` will remove any existing
675 docstring.
676 """
677 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc)
679 def getCollectionSummary(self, collection: str) -> CollectionSummary:
680 """Return a summary for the given collection.
682 Parameters
683 ----------
684 collection : `str`
685 Name of the collection for which a summary is to be retrieved.
687 Returns
688 -------
689 summary : `~lsst.daf.butler.registry.CollectionSummary`
690 Summary of the dataset types and governor dimension values in
691 this collection.
692 """
693 record = self._managers.collections.find(collection)
694 return self._managers.datasets.getCollectionSummary(record)
696 def registerDatasetType(self, datasetType: DatasetType) -> bool:
697 """Add a new `DatasetType` to the Registry.
699 It is not an error to register the same `DatasetType` twice.
701 Parameters
702 ----------
703 datasetType : `DatasetType`
704 The `DatasetType` to be added.
706 Returns
707 -------
708 inserted : `bool`
709 `True` if ``datasetType`` was inserted, `False` if an identical
710 existing `DatasetType` was found. Note that in either case the
711 DatasetType is guaranteed to be defined in the Registry
712 consistently with the given definition.
714 Raises
715 ------
716 ValueError
717 Raised if the dimensions or storage class are invalid.
718 lsst.daf.butler.registry.ConflictingDefinitionError
719 Raised if this `DatasetType` is already registered with a different
720 definition.
722 Notes
723 -----
724 This method cannot be called within transactions, as it needs to be
725 able to perform its own transaction to be concurrent.
726 """
727 return self._managers.datasets.register(datasetType)
729 def removeDatasetType(self, name: str | tuple[str, ...]) -> None:
730 """Remove the named `DatasetType` from the registry.
732 .. warning::
734 Registry implementations can cache the dataset type definitions.
735 This means that deleting the dataset type definition may result in
736 unexpected behavior from other butler processes that are active
737 that have not seen the deletion.
739 Parameters
740 ----------
741 name : `str` or `tuple` [`str`]
742 Name of the type to be removed or tuple containing a list of type
743 names to be removed. Wildcards are allowed.
745 Raises
746 ------
747 lsst.daf.butler.registry.OrphanedRecordError
748 Raised if an attempt is made to remove the dataset type definition
749 when there are already datasets associated with it.
751 Notes
752 -----
753 If the dataset type is not registered the method will return without
754 action.
755 """
756 for datasetTypeExpression in ensure_iterable(name):
757 # Catch any warnings from the caller specifying a component
758 # dataset type. This will result in an error later but the
759 # warning could be confusing when the caller is not querying
760 # anything.
761 with warnings.catch_warnings():
762 warnings.simplefilter("ignore", category=FutureWarning)
763 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression))
764 if not datasetTypes:
765 _LOG.info("Dataset type %r not defined", datasetTypeExpression)
766 else:
767 for datasetType in datasetTypes:
768 self._managers.datasets.remove(datasetType.name)
769 _LOG.info("Removed dataset type %r", datasetType.name)
771 def getDatasetType(self, name: str) -> DatasetType:
772 """Get the `DatasetType`.
774 Parameters
775 ----------
776 name : `str`
777 Name of the type.
779 Returns
780 -------
781 type : `DatasetType`
782 The `DatasetType` associated with the given name.
784 Raises
785 ------
786 lsst.daf.butler.registry.MissingDatasetTypeError
787 Raised if the requested dataset type has not been registered.
789 Notes
790 -----
791 This method handles component dataset types automatically, though most
792 other registry operations do not.
793 """
794 parent_name, component = DatasetType.splitDatasetTypeName(name)
795 storage = self._managers.datasets[parent_name]
796 if component is None:
797 return storage.datasetType
798 else:
799 return storage.datasetType.makeComponentDatasetType(component)
801 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool:
802 """Test whether the given dataset ID generation mode is supported by
803 `insertDatasets`.
805 Parameters
806 ----------
807 mode : `DatasetIdGenEnum`
808 Enum value for the mode to test.
810 Returns
811 -------
812 supported : `bool`
813 Whether the given mode is supported.
814 """
815 return self._managers.datasets.supportsIdGenerationMode(mode)
817 def findDataset(
818 self,
819 datasetType: DatasetType | str,
820 dataId: DataId | None = None,
821 *,
822 collections: CollectionArgType | None = None,
823 timespan: Timespan | None = None,
824 datastore_records: bool = False,
825 **kwargs: Any,
826 ) -> DatasetRef | None:
827 """Find a dataset given its `DatasetType` and data ID.
829 This can be used to obtain a `DatasetRef` that permits the dataset to
830 be read from a `Datastore`. If the dataset is a component and can not
831 be found using the provided dataset type, a dataset ref for the parent
832 will be returned instead but with the correct dataset type.
834 Parameters
835 ----------
836 datasetType : `DatasetType` or `str`
837 A `DatasetType` or the name of one. If this is a `DatasetType`
838 instance, its storage class will be respected and propagated to
839 the output, even if it differs from the dataset type definition
840 in the registry, as long as the storage classes are convertible.
841 dataId : `dict` or `DataCoordinate`, optional
842 A `dict`-like object containing the `Dimension` links that identify
843 the dataset within a collection.
844 collections : collection expression, optional
845 An expression that fully or partially identifies the collections to
846 search for the dataset; see
847 :ref:`daf_butler_collection_expressions` for more information.
848 Defaults to ``self.defaults.collections``.
849 timespan : `Timespan`, optional
850 A timespan that the validity range of the dataset must overlap.
851 If not provided, any `~CollectionType.CALIBRATION` collections
852 matched by the ``collections`` argument will not be searched.
853 datastore_records : `bool`, optional
854 Whether to attach datastore records to the `DatasetRef`.
855 **kwargs
856 Additional keyword arguments passed to
857 `DataCoordinate.standardize` to convert ``dataId`` to a true
858 `DataCoordinate` or augment an existing one.
860 Returns
861 -------
862 ref : `DatasetRef`
863 A reference to the dataset, or `None` if no matching Dataset
864 was found.
866 Raises
867 ------
868 lsst.daf.butler.registry.NoDefaultCollectionError
869 Raised if ``collections`` is `None` and
870 ``self.defaults.collections`` is `None`.
871 LookupError
872 Raised if one or more data ID keys are missing.
873 lsst.daf.butler.registry.MissingDatasetTypeError
874 Raised if the dataset type does not exist.
875 lsst.daf.butler.registry.MissingCollectionError
876 Raised if any of ``collections`` does not exist in the registry.
878 Notes
879 -----
880 This method simply returns `None` and does not raise an exception even
881 when the set of collections searched is intrinsically incompatible with
882 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but
883 only `~CollectionType.CALIBRATION` collections are being searched.
884 This may make it harder to debug some lookup failures, but the behavior
885 is intentional; we consider it more important that failed searches are
886 reported consistently, regardless of the reason, and that adding
887 additional collections that do not contain a match to the search path
888 never changes the behavior.
890 This method handles component dataset types automatically, though most
891 other registry operations do not.
892 """
893 if collections is None:
894 if not self.defaults.collections:
895 raise NoDefaultCollectionError(
896 "No collections provided to findDataset, and no defaults from registry construction."
897 )
898 collections = self.defaults.collections
899 backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache)
900 with backend.caching_context():
901 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True)
902 if collection_wildcard.empty():
903 return None
904 matched_collections = backend.resolve_collection_wildcard(collection_wildcard)
905 resolved_dataset_type = backend.resolve_single_dataset_type_wildcard(datasetType)
906 dataId = DataCoordinate.standardize(
907 dataId,
908 dimensions=resolved_dataset_type.dimensions,
909 universe=self.dimensions,
910 defaults=self.defaults.dataId,
911 **kwargs,
912 )
913 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.dimensions.governors}
914 (filtered_collections,) = backend.filter_dataset_collections(
915 [resolved_dataset_type],
916 matched_collections,
917 governor_constraints=governor_constraints,
918 ).values()
919 if not filtered_collections:
920 return None
921 if timespan is None:
922 filtered_collections = [
923 collection_record
924 for collection_record in filtered_collections
925 if collection_record.type is not CollectionType.CALIBRATION
926 ]
927 if filtered_collections:
928 requested_columns = {"dataset_id", "run", "collection"}
929 with backend.context() as context:
930 predicate = context.make_data_coordinate_predicate(
931 dataId.subset(resolved_dataset_type.dimensions), full=False
932 )
933 if timespan is not None:
934 requested_columns.add("timespan")
935 predicate = predicate.logical_and(
936 context.make_timespan_overlap_predicate(
937 DatasetColumnTag(resolved_dataset_type.name, "timespan"), timespan
938 )
939 )
940 relation = backend.make_dataset_query_relation(
941 resolved_dataset_type, filtered_collections, requested_columns, context
942 ).with_rows_satisfying(predicate)
943 rows = list(context.fetch_iterable(relation))
944 else:
945 rows = []
946 if not rows:
947 return None
948 elif len(rows) == 1:
949 best_row = rows[0]
950 else:
951 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)}
952 collection_tag = DatasetColumnTag(resolved_dataset_type.name, "collection")
953 row_iter = iter(rows)
954 best_row = next(row_iter)
955 best_rank = rank_by_collection_key[best_row[collection_tag]]
956 have_tie = False
957 for row in row_iter:
958 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank:
959 best_row = row
960 best_rank = rank
961 have_tie = False
962 elif rank == best_rank:
963 have_tie = True
964 assert timespan is not None, "Rank ties should be impossible given DB constraints."
965 if have_tie:
966 raise LookupError(
967 f"Ambiguous calibration lookup for {resolved_dataset_type.name} in collections "
968 f"{collection_wildcard.strings} with timespan {timespan}."
969 )
970 reader = queries.DatasetRefReader(
971 resolved_dataset_type,
972 translate_collection=lambda k: self._managers.collections[k].name,
973 )
974 ref = reader.read(best_row, data_id=dataId)
975 if datastore_records:
976 ref = self.get_datastore_records(ref)
978 return ref
980 @transactional
981 def insertDatasets(
982 self,
983 datasetType: DatasetType | str,
984 dataIds: Iterable[DataId],
985 run: str | None = None,
986 expand: bool = True,
987 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
988 ) -> list[DatasetRef]:
989 """Insert one or more datasets into the `Registry`.
991 This always adds new datasets; to associate existing datasets with
992 a new collection, use ``associate``.
994 Parameters
995 ----------
996 datasetType : `DatasetType` or `str`
997 A `DatasetType` or the name of one.
998 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate`
999 Dimension-based identifiers for the new datasets.
1000 run : `str`, optional
1001 The name of the run that produced the datasets. Defaults to
1002 ``self.defaults.run``.
1003 expand : `bool`, optional
1004 If `True` (default), expand data IDs as they are inserted. This is
1005 necessary in general to allow datastore to generate file templates,
1006 but it may be disabled if the caller can guarantee this is
1007 unnecessary.
1008 idGenerationMode : `DatasetIdGenEnum`, optional
1009 Specifies option for generating dataset IDs. By default unique IDs
1010 are generated for each inserted dataset.
1012 Returns
1013 -------
1014 refs : `list` of `DatasetRef`
1015 Resolved `DatasetRef` instances for all given data IDs (in the same
1016 order).
1018 Raises
1019 ------
1020 lsst.daf.butler.registry.DatasetTypeError
1021 Raised if ``datasetType`` is not known to registry.
1022 lsst.daf.butler.registry.CollectionTypeError
1023 Raised if ``run`` collection type is not `~CollectionType.RUN`.
1024 lsst.daf.butler.registry.NoDefaultCollectionError
1025 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`.
1026 lsst.daf.butler.registry.ConflictingDefinitionError
1027 If a dataset with the same dataset type and data ID as one of those
1028 given already exists in ``run``.
1029 lsst.daf.butler.registry.MissingCollectionError
1030 Raised if ``run`` does not exist in the registry.
1031 """
1032 if isinstance(datasetType, DatasetType):
1033 storage = self._managers.datasets.find(datasetType.name)
1034 if storage is None:
1035 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
1036 else:
1037 storage = self._managers.datasets.find(datasetType)
1038 if storage is None:
1039 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.")
1040 if run is None:
1041 if self.defaults.run is None:
1042 raise NoDefaultCollectionError(
1043 "No run provided to insertDatasets, and no default from registry construction."
1044 )
1045 run = self.defaults.run
1046 runRecord = self._managers.collections.find(run)
1047 if runRecord.type is not CollectionType.RUN:
1048 raise CollectionTypeError(
1049 f"Given collection is of type {runRecord.type.name}; RUN collection required."
1050 )
1051 assert isinstance(runRecord, RunRecord)
1052 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
1053 if expand:
1054 expandedDataIds = [
1055 self.expandDataId(dataId, dimensions=storage.datasetType.dimensions)
1056 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs")
1057 ]
1058 else:
1059 expandedDataIds = [
1060 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds
1061 ]
1062 try:
1063 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode))
1064 if self._managers.obscore:
1065 context = queries.SqlQueryContext(self._db, self._managers.column_types)
1066 self._managers.obscore.add_datasets(refs, context)
1067 except sqlalchemy.exc.IntegrityError as err:
1068 raise ConflictingDefinitionError(
1069 "A database constraint failure was triggered by inserting "
1070 f"one or more datasets of type {storage.datasetType} into "
1071 f"collection '{run}'. "
1072 "This probably means a dataset with the same data ID "
1073 "and dataset type already exists, but it may also mean a "
1074 "dimension row is missing."
1075 ) from err
1076 return refs
1078 @transactional
1079 def _importDatasets(
1080 self,
1081 datasets: Iterable[DatasetRef],
1082 expand: bool = True,
1083 ) -> list[DatasetRef]:
1084 """Import one or more datasets into the `Registry`.
1086 Difference from `insertDatasets` method is that this method accepts
1087 `DatasetRef` instances which should already be resolved and have a
1088 dataset ID. If registry supports globally-unique dataset IDs (e.g.
1089 `uuid.UUID`) then datasets which already exist in the registry will be
1090 ignored if imported again.
1092 Parameters
1093 ----------
1094 datasets : `~collections.abc.Iterable` of `DatasetRef`
1095 Datasets to be inserted. All `DatasetRef` instances must have
1096 identical ``datasetType`` and ``run`` attributes. ``run``
1097 attribute can be `None` and defaults to ``self.defaults.run``.
1098 Datasets can specify ``id`` attribute which will be used for
1099 inserted datasets. All dataset IDs must have the same type
1100 (`int` or `uuid.UUID`), if type of dataset IDs does not match
1101 configured backend then IDs will be ignored and new IDs will be
1102 generated by backend.
1103 expand : `bool`, optional
1104 If `True` (default), expand data IDs as they are inserted. This is
1105 necessary in general, but it may be disabled if the caller can
1106 guarantee this is unnecessary.
1108 Returns
1109 -------
1110 refs : `list` of `DatasetRef`
1111 Resolved `DatasetRef` instances for all given data IDs (in the same
1112 order). If any of ``datasets`` has an ID which already exists in
1113 the database then it will not be inserted or updated, but a
1114 resolved `DatasetRef` will be returned for it in any case.
1116 Raises
1117 ------
1118 lsst.daf.butler.registry.NoDefaultCollectionError
1119 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`.
1120 lsst.daf.butler.registry.DatasetTypeError
1121 Raised if datasets correspond to more than one dataset type or
1122 dataset type is not known to registry.
1123 lsst.daf.butler.registry.ConflictingDefinitionError
1124 If a dataset with the same dataset type and data ID as one of those
1125 given already exists in ``run``.
1126 lsst.daf.butler.registry.MissingCollectionError
1127 Raised if ``run`` does not exist in the registry.
1129 Notes
1130 -----
1131 This method is considered package-private and internal to Butler
1132 implementation. Clients outside daf_butler package should not use this
1133 method.
1134 """
1135 datasets = list(datasets)
1136 if not datasets:
1137 # nothing to do
1138 return []
1140 # find dataset type
1141 datasetTypes = {dataset.datasetType for dataset in datasets}
1142 if len(datasetTypes) != 1:
1143 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}")
1144 datasetType = datasetTypes.pop()
1146 # get storage handler for this dataset type
1147 storage = self._managers.datasets.find(datasetType.name)
1148 if storage is None:
1149 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
1151 # find run name
1152 runs = {dataset.run for dataset in datasets}
1153 if len(runs) != 1:
1154 raise ValueError(f"Multiple run names in input datasets: {runs}")
1155 run = runs.pop()
1157 runRecord = self._managers.collections.find(run)
1158 if runRecord.type is not CollectionType.RUN:
1159 raise CollectionTypeError(
1160 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};"
1161 " RUN collection required."
1162 )
1163 assert isinstance(runRecord, RunRecord)
1165 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
1166 if expand:
1167 expandedDatasets = [
1168 dataset.expanded(self.expandDataId(dataset.dataId, dimensions=storage.datasetType.dimensions))
1169 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs")
1170 ]
1171 else:
1172 expandedDatasets = [
1173 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True)
1174 for dataset in datasets
1175 ]
1177 try:
1178 refs = list(storage.import_(runRecord, expandedDatasets))
1179 if self._managers.obscore:
1180 context = queries.SqlQueryContext(self._db, self._managers.column_types)
1181 self._managers.obscore.add_datasets(refs, context)
1182 except sqlalchemy.exc.IntegrityError as err:
1183 raise ConflictingDefinitionError(
1184 "A database constraint failure was triggered by inserting "
1185 f"one or more datasets of type {storage.datasetType} into "
1186 f"collection '{run}'. "
1187 "This probably means a dataset with the same data ID "
1188 "and dataset type already exists, but it may also mean a "
1189 "dimension row is missing."
1190 ) from err
1191 # Check that imported dataset IDs match the input
1192 for imported_ref, input_ref in zip(refs, datasets, strict=True):
1193 if imported_ref.id != input_ref.id:
1194 raise RegistryConsistencyError(
1195 "Imported dataset ID differs from input dataset ID, "
1196 f"input ref: {input_ref}, imported ref: {imported_ref}"
1197 )
1198 return refs
1200 def getDataset(self, id: DatasetId) -> DatasetRef | None:
1201 """Retrieve a Dataset entry.
1203 Parameters
1204 ----------
1205 id : `DatasetId`
1206 The unique identifier for the dataset.
1208 Returns
1209 -------
1210 ref : `DatasetRef` or `None`
1211 A ref to the Dataset, or `None` if no matching Dataset
1212 was found.
1213 """
1214 return self._managers.datasets.getDatasetRef(id)
1216 @transactional
1217 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
1218 """Remove datasets from the Registry.
1220 The datasets will be removed unconditionally from all collections, and
1221 any `Quantum` that consumed this dataset will instead be marked with
1222 having a NULL input. `Datastore` records will *not* be deleted; the
1223 caller is responsible for ensuring that the dataset has already been
1224 removed from all Datastores.
1226 Parameters
1227 ----------
1228 refs : `~collections.abc.Iterable` [`DatasetRef`]
1229 References to the datasets to be removed. Must include a valid
1230 ``id`` attribute, and should be considered invalidated upon return.
1232 Raises
1233 ------
1234 lsst.daf.butler.AmbiguousDatasetError
1235 Raised if any ``ref.id`` is `None`.
1236 lsst.daf.butler.registry.OrphanedRecordError
1237 Raised if any dataset is still present in any `Datastore`.
1238 """
1239 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG)
1240 for datasetType, refsForType in progress.iter_item_chunks(
1241 DatasetRef.iter_by_type(refs), desc="Removing datasets by type"
1242 ):
1243 storage = self._managers.datasets[datasetType.name]
1244 try:
1245 storage.delete(refsForType)
1246 except sqlalchemy.exc.IntegrityError as err:
1247 raise OrphanedRecordError(
1248 "One or more datasets is still present in one or more Datastores."
1249 ) from err
1251 @transactional
1252 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
1253 """Add existing datasets to a `~CollectionType.TAGGED` collection.
1255 If a DatasetRef with the same exact ID is already in a collection
1256 nothing is changed. If a `DatasetRef` with the same `DatasetType` and
1257 data ID but with different ID exists in the collection,
1258 `~lsst.daf.butler.registry.ConflictingDefinitionError` is raised.
1260 Parameters
1261 ----------
1262 collection : `str`
1263 Indicates the collection the datasets should be associated with.
1264 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
1265 An iterable of resolved `DatasetRef` instances that already exist
1266 in this `Registry`.
1268 Raises
1269 ------
1270 lsst.daf.butler.registry.ConflictingDefinitionError
1271 If a Dataset with the given `DatasetRef` already exists in the
1272 given collection.
1273 lsst.daf.butler.registry.MissingCollectionError
1274 Raised if ``collection`` does not exist in the registry.
1275 lsst.daf.butler.registry.CollectionTypeError
1276 Raise adding new datasets to the given ``collection`` is not
1277 allowed.
1278 """
1279 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG)
1280 collectionRecord = self._managers.collections.find(collection)
1281 if collectionRecord.type is not CollectionType.TAGGED:
1282 raise CollectionTypeError(
1283 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED."
1284 )
1285 for datasetType, refsForType in progress.iter_item_chunks(
1286 DatasetRef.iter_by_type(refs), desc="Associating datasets by type"
1287 ):
1288 storage = self._managers.datasets[datasetType.name]
1289 try:
1290 storage.associate(collectionRecord, refsForType)
1291 if self._managers.obscore:
1292 # If a TAGGED collection is being monitored by ObsCore
1293 # manager then we may need to save the dataset.
1294 context = queries.SqlQueryContext(self._db, self._managers.column_types)
1295 self._managers.obscore.associate(refsForType, collectionRecord, context)
1296 except sqlalchemy.exc.IntegrityError as err:
1297 raise ConflictingDefinitionError(
1298 f"Constraint violation while associating dataset of type {datasetType.name} with "
1299 f"collection {collection}. This probably means that one or more datasets with the same "
1300 "dataset type and data ID already exist in the collection, but it may also indicate "
1301 "that the datasets do not exist."
1302 ) from err
1304 @transactional
1305 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
1306 """Remove existing datasets from a `~CollectionType.TAGGED` collection.
1308 ``collection`` and ``ref`` combinations that are not currently
1309 associated are silently ignored.
1311 Parameters
1312 ----------
1313 collection : `str`
1314 The collection the datasets should no longer be associated with.
1315 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
1316 An iterable of resolved `DatasetRef` instances that already exist
1317 in this `Registry`.
1319 Raises
1320 ------
1321 lsst.daf.butler.AmbiguousDatasetError
1322 Raised if any of the given dataset references is unresolved.
1323 lsst.daf.butler.registry.MissingCollectionError
1324 Raised if ``collection`` does not exist in the registry.
1325 lsst.daf.butler.registry.CollectionTypeError
1326 Raise adding new datasets to the given ``collection`` is not
1327 allowed.
1328 """
1329 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG)
1330 collectionRecord = self._managers.collections.find(collection)
1331 if collectionRecord.type is not CollectionType.TAGGED:
1332 raise CollectionTypeError(
1333 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED."
1334 )
1335 for datasetType, refsForType in progress.iter_item_chunks(
1336 DatasetRef.iter_by_type(refs), desc="Disassociating datasets by type"
1337 ):
1338 storage = self._managers.datasets[datasetType.name]
1339 storage.disassociate(collectionRecord, refsForType)
1340 if self._managers.obscore:
1341 self._managers.obscore.disassociate(refsForType, collectionRecord)
1343 @transactional
1344 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None:
1345 """Associate one or more datasets with a calibration collection and a
1346 validity range within it.
1348 Parameters
1349 ----------
1350 collection : `str`
1351 The name of an already-registered `~CollectionType.CALIBRATION`
1352 collection.
1353 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
1354 Datasets to be associated.
1355 timespan : `Timespan`
1356 The validity range for these datasets within the collection.
1358 Raises
1359 ------
1360 lsst.daf.butler.AmbiguousDatasetError
1361 Raised if any of the given `DatasetRef` instances is unresolved.
1362 lsst.daf.butler.registry.ConflictingDefinitionError
1363 Raised if the collection already contains a different dataset with
1364 the same `DatasetType` and data ID and an overlapping validity
1365 range.
1366 lsst.daf.butler.registry.CollectionTypeError
1367 Raised if ``collection`` is not a `~CollectionType.CALIBRATION`
1368 collection or if one or more datasets are of a dataset type for
1369 which `DatasetType.isCalibration` returns `False`.
1370 """
1371 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG)
1372 collectionRecord = self._managers.collections.find(collection)
1373 for datasetType, refsForType in progress.iter_item_chunks(
1374 DatasetRef.iter_by_type(refs), desc="Certifying datasets by type"
1375 ):
1376 storage = self._managers.datasets[datasetType.name]
1377 storage.certify(
1378 collectionRecord,
1379 refsForType,
1380 timespan,
1381 context=queries.SqlQueryContext(self._db, self._managers.column_types),
1382 )
1384 @transactional
1385 def decertify(
1386 self,
1387 collection: str,
1388 datasetType: str | DatasetType,
1389 timespan: Timespan,
1390 *,
1391 dataIds: Iterable[DataId] | None = None,
1392 ) -> None:
1393 """Remove or adjust datasets to clear a validity range within a
1394 calibration collection.
1396 Parameters
1397 ----------
1398 collection : `str`
1399 The name of an already-registered `~CollectionType.CALIBRATION`
1400 collection.
1401 datasetType : `str` or `DatasetType`
1402 Name or `DatasetType` instance for the datasets to be decertified.
1403 timespan : `Timespan`, optional
1404 The validity range to remove datasets from within the collection.
1405 Datasets that overlap this range but are not contained by it will
1406 have their validity ranges adjusted to not overlap it, which may
1407 split a single dataset validity range into two.
1408 dataIds : iterable [`dict` or `DataCoordinate`], optional
1409 Data IDs that should be decertified within the given validity range
1410 If `None`, all data IDs for ``self.datasetType`` will be
1411 decertified.
1413 Raises
1414 ------
1415 lsst.daf.butler.registry.CollectionTypeError
1416 Raised if ``collection`` is not a `~CollectionType.CALIBRATION`
1417 collection or if ``datasetType.isCalibration() is False``.
1418 """
1419 collectionRecord = self._managers.collections.find(collection)
1420 if isinstance(datasetType, str):
1421 storage = self._managers.datasets[datasetType]
1422 else:
1423 storage = self._managers.datasets[datasetType.name]
1424 standardizedDataIds = None
1425 if dataIds is not None:
1426 standardizedDataIds = [
1427 DataCoordinate.standardize(d, dimensions=storage.datasetType.dimensions) for d in dataIds
1428 ]
1429 storage.decertify(
1430 collectionRecord,
1431 timespan,
1432 dataIds=standardizedDataIds,
1433 context=queries.SqlQueryContext(self._db, self._managers.column_types),
1434 )
1436 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
1437 """Return an object that allows a new `Datastore` instance to
1438 communicate with this `Registry`.
1440 Returns
1441 -------
1442 manager : `~.interfaces.DatastoreRegistryBridgeManager`
1443 Object that mediates communication between this `Registry` and its
1444 associated datastores.
1445 """
1446 return self._managers.datastores
1448 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
1449 """Retrieve datastore locations for a given dataset.
1451 Parameters
1452 ----------
1453 ref : `DatasetRef`
1454 A reference to the dataset for which to retrieve storage
1455 information.
1457 Returns
1458 -------
1459 datastores : `~collections.abc.Iterable` [ `str` ]
1460 All the matching datastores holding this dataset.
1462 Raises
1463 ------
1464 lsst.daf.butler.AmbiguousDatasetError
1465 Raised if ``ref.id`` is `None`.
1466 """
1467 return self._managers.datastores.findDatastores(ref)
1469 def expandDataId(
1470 self,
1471 dataId: DataId | None = None,
1472 *,
1473 dimensions: Iterable[str] | DimensionGroup | DimensionGraph | None = None,
1474 graph: DimensionGraph | None = None,
1475 records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None,
1476 withDefaults: bool = True,
1477 **kwargs: Any,
1478 ) -> DataCoordinate:
1479 """Expand a dimension-based data ID to include additional information.
1481 Parameters
1482 ----------
1483 dataId : `DataCoordinate` or `dict`, optional
1484 Data ID to be expanded; augmented and overridden by ``kwargs``.
1485 dimensions : `~collections.abc.Iterable` [ `str` ], \
1486 `DimensionGroup`, or `DimensionGraph`, optional
1487 The dimensions to be identified by the new `DataCoordinate`.
1488 If not provided, will be inferred from the keys of ``mapping`` and
1489 ``**kwargs``, and ``universe`` must be provided unless ``mapping``
1490 is already a `DataCoordinate`.
1491 graph : `DimensionGraph`, optional
1492 Like ``dimensions``, but as a ``DimensionGraph`` instance. Ignored
1493 if ``dimensions`` is provided. Deprecated and will be removed
1494 after v27.
1495 records : `~collections.abc.Mapping` [`str`, `DimensionRecord`], \
1496 optional
1497 Dimension record data to use before querying the database for that
1498 data, keyed by element name.
1499 withDefaults : `bool`, optional
1500 Utilize ``self.defaults.dataId`` to fill in missing governor
1501 dimension key-value pairs. Defaults to `True` (i.e. defaults are
1502 used).
1503 **kwargs
1504 Additional keywords are treated like additional key-value pairs for
1505 ``dataId``, extending and overriding.
1507 Returns
1508 -------
1509 expanded : `DataCoordinate`
1510 A data ID that includes full metadata for all of the dimensions it
1511 identifies, i.e. guarantees that ``expanded.hasRecords()`` and
1512 ``expanded.hasFull()`` both return `True`.
1514 Raises
1515 ------
1516 lsst.daf.butler.registry.DataIdError
1517 Raised when ``dataId`` or keyword arguments specify unknown
1518 dimensions or values, or when a resulting data ID contains
1519 contradictory key-value pairs, according to dimension
1520 relationships.
1522 Notes
1523 -----
1524 This method cannot be relied upon to reject invalid data ID values
1525 for dimensions that do actually not have any record columns. For
1526 efficiency reasons the records for these dimensions (which have only
1527 dimension key values that are given by the caller) may be constructed
1528 directly rather than obtained from the registry database.
1529 """
1530 if not withDefaults:
1531 defaults = None
1532 else:
1533 defaults = self.defaults.dataId
1534 try:
1535 standardized = DataCoordinate.standardize(
1536 dataId,
1537 graph=graph,
1538 dimensions=dimensions,
1539 universe=self.dimensions,
1540 defaults=defaults,
1541 **kwargs,
1542 )
1543 except KeyError as exc:
1544 # This means either kwargs have some odd name or required
1545 # dimension is missing.
1546 raise DimensionNameError(str(exc)) from exc
1547 if standardized.hasRecords():
1548 return standardized
1549 if records is None:
1550 records = {}
1551 elif isinstance(records, NamedKeyMapping):
1552 records = records.byName()
1553 else:
1554 records = dict(records)
1555 if isinstance(dataId, DataCoordinate) and dataId.hasRecords():
1556 for element_name in dataId.dimensions.elements:
1557 records[element_name] = dataId.records[element_name]
1558 keys = dict(standardized.mapping)
1559 for element_name in standardized.dimensions.lookup_order:
1560 element = self.dimensions[element_name]
1561 record = records.get(element_name, ...) # Use ... to mean not found; None might mean NULL
1562 if record is ...:
1563 if element_name in self.dimensions.dimensions.names and keys.get(element_name) is None:
1564 if element_name in standardized.dimensions.required:
1565 raise DimensionNameError(
1566 f"No value or null value for required dimension {element_name}."
1567 )
1568 keys[element_name] = None
1569 record = None
1570 else:
1571 record = self._managers.dimensions.fetch_one(
1572 element_name,
1573 DataCoordinate.standardize(keys, dimensions=element.minimal_group),
1574 self.dimension_record_cache,
1575 )
1576 records[element_name] = record
1577 if record is not None:
1578 for d in element.implied:
1579 value = getattr(record, d.name)
1580 if keys.setdefault(d.name, value) != value:
1581 raise InconsistentDataIdError(
1582 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, "
1583 f"but {element_name} implies {d.name}={value!r}."
1584 )
1585 else:
1586 if element_name in standardized.dimensions.required:
1587 raise DataIdValueError(
1588 f"Could not fetch record for required dimension {element.name} via keys {keys}."
1589 )
1590 if element.defines_relationships:
1591 raise InconsistentDataIdError(
1592 f"Could not fetch record for element {element_name} via keys {keys}, ",
1593 "but it is marked as defining relationships; this means one or more dimensions are "
1594 "have inconsistent values.",
1595 )
1596 for d in element.implied:
1597 keys.setdefault(d.name, None)
1598 records.setdefault(d.name, None)
1599 return DataCoordinate.standardize(keys, dimensions=standardized.dimensions).expanded(records=records)
1601 def insertDimensionData(
1602 self,
1603 element: DimensionElement | str,
1604 *data: Mapping[str, Any] | DimensionRecord,
1605 conform: bool = True,
1606 replace: bool = False,
1607 skip_existing: bool = False,
1608 ) -> None:
1609 """Insert one or more dimension records into the database.
1611 Parameters
1612 ----------
1613 element : `DimensionElement` or `str`
1614 The `DimensionElement` or name thereof that identifies the table
1615 records will be inserted into.
1616 *data : `dict` or `DimensionRecord`
1617 One or more records to insert.
1618 conform : `bool`, optional
1619 If `False` (`True` is default) perform no checking or conversions,
1620 and assume that ``element`` is a `DimensionElement` instance and
1621 ``data`` is a one or more `DimensionRecord` instances of the
1622 appropriate subclass.
1623 replace : `bool`, optional
1624 If `True` (`False` is default), replace existing records in the
1625 database if there is a conflict.
1626 skip_existing : `bool`, optional
1627 If `True` (`False` is default), skip insertion if a record with
1628 the same primary key values already exists. Unlike
1629 `syncDimensionData`, this will not detect when the given record
1630 differs from what is in the database, and should not be used when
1631 this is a concern.
1632 """
1633 if isinstance(element, str):
1634 element = self.dimensions[element]
1635 if conform:
1636 records = [
1637 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data
1638 ]
1639 else:
1640 # Ignore typing since caller said to trust them with conform=False.
1641 records = data # type: ignore
1642 if element.name in self.dimension_record_cache:
1643 self.dimension_record_cache.reset()
1644 self._managers.dimensions.insert(
1645 element,
1646 *records,
1647 replace=replace,
1648 skip_existing=skip_existing,
1649 )
1651 def syncDimensionData(
1652 self,
1653 element: DimensionElement | str,
1654 row: Mapping[str, Any] | DimensionRecord,
1655 conform: bool = True,
1656 update: bool = False,
1657 ) -> bool | dict[str, Any]:
1658 """Synchronize the given dimension record with the database, inserting
1659 if it does not already exist and comparing values if it does.
1661 Parameters
1662 ----------
1663 element : `DimensionElement` or `str`
1664 The `DimensionElement` or name thereof that identifies the table
1665 records will be inserted into.
1666 row : `dict` or `DimensionRecord`
1667 The record to insert.
1668 conform : `bool`, optional
1669 If `False` (`True` is default) perform no checking or conversions,
1670 and assume that ``element`` is a `DimensionElement` instance and
1671 ``data`` is a `DimensionRecord` instances of the appropriate
1672 subclass.
1673 update : `bool`, optional
1674 If `True` (`False` is default), update the existing record in the
1675 database if there is a conflict.
1677 Returns
1678 -------
1679 inserted_or_updated : `bool` or `dict`
1680 `True` if a new row was inserted, `False` if no changes were
1681 needed, or a `dict` mapping updated column names to their old
1682 values if an update was performed (only possible if
1683 ``update=True``).
1685 Raises
1686 ------
1687 lsst.daf.butler.registry.ConflictingDefinitionError
1688 Raised if the record exists in the database (according to primary
1689 key lookup) but is inconsistent with the given one.
1690 """
1691 if conform:
1692 if isinstance(element, str):
1693 element = self.dimensions[element]
1694 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row)
1695 else:
1696 # Ignore typing since caller said to trust them with conform=False.
1697 record = row # type: ignore
1698 if record.definition.name in self.dimension_record_cache:
1699 self.dimension_record_cache.reset()
1700 return self._managers.dimensions.sync(record, update=update)
1702 def queryDatasetTypes(
1703 self,
1704 expression: Any = ...,
1705 *,
1706 components: bool | _Marker = _DefaultMarker,
1707 missing: list[str] | None = None,
1708 ) -> Iterable[DatasetType]:
1709 """Iterate over the dataset types whose names match an expression.
1711 Parameters
1712 ----------
1713 expression : dataset type expression, optional
1714 An expression that fully or partially identifies the dataset types
1715 to return, such as a `str`, `re.Pattern`, or iterable thereof.
1716 ``...`` can be used to return all dataset types, and is the
1717 default. See :ref:`daf_butler_dataset_type_expressions` for more
1718 information.
1719 components : `bool`, optional
1720 Must be `False`. Provided only for backwards compatibility. After
1721 v27 this argument will be removed entirely.
1722 missing : `list` of `str`, optional
1723 String dataset type names that were explicitly given (i.e. not
1724 regular expression patterns) but not found will be appended to this
1725 list, if it is provided.
1727 Returns
1728 -------
1729 dataset_types : `~collections.abc.Iterable` [ `DatasetType`]
1730 An `~collections.abc.Iterable` of `DatasetType` instances whose
1731 names match ``expression``.
1733 Raises
1734 ------
1735 lsst.daf.butler.registry.DatasetTypeExpressionError
1736 Raised when ``expression`` is invalid.
1737 """
1738 if components is not _DefaultMarker:
1739 if components is not False:
1740 raise DatasetTypeError(
1741 "Dataset component queries are no longer supported by Registry. Use "
1742 "DatasetType methods to obtain components from parent dataset types instead."
1743 )
1744 else:
1745 warnings.warn(
1746 "The components parameter is ignored. It will be removed after v27.",
1747 category=FutureWarning,
1748 stacklevel=find_outside_stacklevel("lsst.daf.butler"),
1749 )
1750 wildcard = DatasetTypeWildcard.from_expression(expression)
1751 return self._managers.datasets.resolve_wildcard(wildcard, missing=missing)
1753 def queryCollections(
1754 self,
1755 expression: Any = ...,
1756 datasetType: DatasetType | None = None,
1757 collectionTypes: Iterable[CollectionType] | CollectionType = CollectionType.all(),
1758 flattenChains: bool = False,
1759 includeChains: bool | None = None,
1760 ) -> Sequence[str]:
1761 """Iterate over the collections whose names match an expression.
1763 Parameters
1764 ----------
1765 expression : collection expression, optional
1766 An expression that identifies the collections to return, such as
1767 a `str` (for full matches or partial matches via globs),
1768 `re.Pattern` (for partial matches), or iterable thereof. ``...``
1769 can be used to return all collections, and is the default.
1770 See :ref:`daf_butler_collection_expressions` for more information.
1771 datasetType : `DatasetType`, optional
1772 If provided, only yield collections that may contain datasets of
1773 this type. This is a conservative approximation in general; it may
1774 yield collections that do not have any such datasets.
1775 collectionTypes : `~collections.abc.Set` [`CollectionType`] or \
1776 `CollectionType`, optional
1777 If provided, only yield collections of these types.
1778 flattenChains : `bool`, optional
1779 If `True` (`False` is default), recursively yield the child
1780 collections of matching `~CollectionType.CHAINED` collections.
1781 includeChains : `bool`, optional
1782 If `True`, yield records for matching `~CollectionType.CHAINED`
1783 collections. Default is the opposite of ``flattenChains``: include
1784 either CHAINED collections or their children, but not both.
1786 Returns
1787 -------
1788 collections : `~collections.abc.Sequence` [ `str` ]
1789 The names of collections that match ``expression``.
1791 Raises
1792 ------
1793 lsst.daf.butler.registry.CollectionExpressionError
1794 Raised when ``expression`` is invalid.
1796 Notes
1797 -----
1798 The order in which collections are returned is unspecified, except that
1799 the children of a `~CollectionType.CHAINED` collection are guaranteed
1800 to be in the order in which they are searched. When multiple parent
1801 `~CollectionType.CHAINED` collections match the same criteria, the
1802 order in which the two lists appear is unspecified, and the lists of
1803 children may be incomplete if a child has multiple parents.
1804 """
1805 # Right now the datasetTypes argument is completely ignored, but that
1806 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up
1807 # ticket will take care of that.
1808 try:
1809 wildcard = CollectionWildcard.from_expression(expression)
1810 except TypeError as exc:
1811 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc
1812 collectionTypes = ensure_iterable(collectionTypes)
1813 return [
1814 record.name
1815 for record in self._managers.collections.resolve_wildcard(
1816 wildcard,
1817 collection_types=frozenset(collectionTypes),
1818 flatten_chains=flattenChains,
1819 include_chains=includeChains,
1820 )
1821 ]
1823 def _makeQueryBuilder(
1824 self,
1825 summary: queries.QuerySummary,
1826 doomed_by: Iterable[str] = (),
1827 ) -> queries.QueryBuilder:
1828 """Return a `QueryBuilder` instance capable of constructing and
1829 managing more complex queries than those obtainable via `Registry`
1830 interfaces.
1832 This is an advanced interface; downstream code should prefer
1833 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those
1834 are sufficient.
1836 Parameters
1837 ----------
1838 summary : `queries.QuerySummary`
1839 Object describing and categorizing the full set of dimensions that
1840 will be included in the query.
1841 doomed_by : `~collections.abc.Iterable` of `str`, optional
1842 A list of diagnostic messages that indicate why the query is going
1843 to yield no results and should not even be executed. If an empty
1844 container (default) the query will be executed unless other code
1845 determines that it is doomed.
1847 Returns
1848 -------
1849 builder : `queries.QueryBuilder`
1850 Object that can be used to construct and perform advanced queries.
1851 """
1852 doomed_by = list(doomed_by)
1853 backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache)
1854 context = backend.context()
1855 relation: Relation | None = None
1856 if doomed_by:
1857 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by)
1858 return queries.QueryBuilder(
1859 summary,
1860 backend=backend,
1861 context=context,
1862 relation=relation,
1863 )
1865 def _standardize_query_data_id_args(
1866 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any
1867 ) -> DataCoordinate:
1868 """Preprocess the data ID arguments passed to query* methods.
1870 Parameters
1871 ----------
1872 data_id : `DataId` or `None`
1873 Data ID that constrains the query results.
1874 doomed_by : `list` [ `str` ]
1875 List to append messages indicating why the query is doomed to
1876 yield no results.
1877 **kwargs
1878 Additional data ID key-value pairs, extending and overriding
1879 ``data_id``.
1881 Returns
1882 -------
1883 data_id : `DataCoordinate`
1884 Standardized data ID. Will be fully expanded unless expansion
1885 fails, in which case a message will be appended to ``doomed_by``
1886 on return.
1887 """
1888 try:
1889 return self.expandDataId(data_id, **kwargs)
1890 except DataIdValueError as err:
1891 doomed_by.append(str(err))
1892 return DataCoordinate.standardize(
1893 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId
1894 )
1896 def _standardize_query_dataset_args(
1897 self,
1898 datasets: Any,
1899 collections: CollectionArgType | None,
1900 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain",
1901 *,
1902 doomed_by: list[str],
1903 ) -> tuple[list[DatasetType], CollectionWildcard | None]:
1904 """Preprocess dataset arguments passed to query* methods.
1906 Parameters
1907 ----------
1908 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these
1909 Expression identifying dataset types. See `queryDatasetTypes` for
1910 details.
1911 collections : `str`, `re.Pattern`, or iterable of these
1912 Expression identifying collections to be searched. See
1913 `queryCollections` for details.
1914 mode : `str`, optional
1915 The way in which datasets are being used in this query; one of:
1917 - "find_first": this is a query for the first dataset in an
1918 ordered list of collections. Prohibits collection wildcards,
1919 but permits dataset type wildcards.
1921 - "find_all": this is a query for all datasets in all matched
1922 collections. Permits collection and dataset type wildcards.
1924 - "constrain": this is a query for something other than datasets,
1925 with results constrained by dataset existence. Permits
1926 collection wildcards and prohibits ``...`` as a dataset type
1927 wildcard.
1928 doomed_by : `list` [ `str` ]
1929 List to append messages indicating why the query is doomed to
1930 yield no results.
1932 Returns
1933 -------
1934 dataset_types : `list` [ `DatasetType` ]
1935 List of matched dataset types.
1936 collections : `CollectionWildcard`
1937 Processed collection expression.
1938 """
1939 dataset_types: list[DatasetType] = []
1940 collection_wildcard: CollectionWildcard | None = None
1941 if datasets is not None:
1942 if collections is None:
1943 if not self.defaults.collections:
1944 raise NoDefaultCollectionError("No collections, and no registry default collections.")
1945 collection_wildcard = CollectionWildcard.from_expression(self.defaults.collections)
1946 else:
1947 collection_wildcard = CollectionWildcard.from_expression(collections)
1948 if mode == "find_first" and collection_wildcard.patterns:
1949 raise TypeError(
1950 f"Collection pattern(s) {collection_wildcard.patterns} not allowed in this context."
1951 )
1952 missing: list[str] = []
1953 dataset_types = self._managers.datasets.resolve_wildcard(
1954 datasets, missing=missing, explicit_only=(mode == "constrain")
1955 )
1956 if missing and mode == "constrain":
1957 raise MissingDatasetTypeError(
1958 f"Dataset type(s) {missing} are not registered.",
1959 )
1960 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing)
1961 elif collections:
1962 # I think this check should actually be `collections is not None`,
1963 # but it looks like some CLI scripts use empty tuple as default.
1964 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.")
1965 return dataset_types, collection_wildcard
1967 def queryDatasets(
1968 self,
1969 datasetType: Any,
1970 *,
1971 collections: CollectionArgType | None = None,
1972 dimensions: Iterable[Dimension | str] | None = None,
1973 dataId: DataId | None = None,
1974 where: str = "",
1975 findFirst: bool = False,
1976 components: bool | _Marker = _DefaultMarker,
1977 bind: Mapping[str, Any] | None = None,
1978 check: bool = True,
1979 **kwargs: Any,
1980 ) -> queries.DatasetQueryResults:
1981 """Query for and iterate over dataset references matching user-provided
1982 criteria.
1984 Parameters
1985 ----------
1986 datasetType : dataset type expression
1987 An expression that fully or partially identifies the dataset types
1988 to be queried. Allowed types include `DatasetType`, `str`,
1989 `re.Pattern`, and iterables thereof. The special value ``...`` can
1990 be used to query all dataset types. See
1991 :ref:`daf_butler_dataset_type_expressions` for more information.
1992 collections : collection expression, optional
1993 An expression that identifies the collections to search, such as a
1994 `str` (for full matches or partial matches via globs), `re.Pattern`
1995 (for partial matches), or iterable thereof. ``...`` can be used to
1996 search all collections (actually just all `~CollectionType.RUN`
1997 collections, because this will still find all datasets).
1998 If not provided, ``self.default.collections`` is used. See
1999 :ref:`daf_butler_collection_expressions` for more information.
2000 dimensions : `~collections.abc.Iterable` of `Dimension` or `str`
2001 Dimensions to include in the query (in addition to those used
2002 to identify the queried dataset type(s)), either to constrain
2003 the resulting datasets to those for which a matching dimension
2004 exists, or to relate the dataset type's dimensions to dimensions
2005 referenced by the ``dataId`` or ``where`` arguments.
2006 dataId : `dict` or `DataCoordinate`, optional
2007 A data ID whose key-value pairs are used as equality constraints
2008 in the query.
2009 where : `str`, optional
2010 A string expression similar to a SQL WHERE clause. May involve
2011 any column of a dimension table or (as a shortcut for the primary
2012 key column of a dimension table) dimension name. See
2013 :ref:`daf_butler_dimension_expressions` for more information.
2014 findFirst : `bool`, optional
2015 If `True` (`False` is default), for each result data ID, only
2016 yield one `DatasetRef` of each `DatasetType`, from the first
2017 collection in which a dataset of that dataset type appears
2018 (according to the order of ``collections`` passed in). If `True`,
2019 ``collections`` must not contain regular expressions and may not
2020 be ``...``.
2021 components : `bool`, optional
2022 Must be `False`. Provided only for backwards compatibility. After
2023 v27 this argument will be removed entirely.
2024 bind : `~collections.abc.Mapping`, optional
2025 Mapping containing literal values that should be injected into the
2026 ``where`` expression, keyed by the identifiers they replace.
2027 Values of collection type can be expanded in some cases; see
2028 :ref:`daf_butler_dimension_expressions_identifiers` for more
2029 information.
2030 check : `bool`, optional
2031 If `True` (default) check the query for consistency before
2032 executing it. This may reject some valid queries that resemble
2033 common mistakes (e.g. queries for visits without specifying an
2034 instrument).
2035 **kwargs
2036 Additional keyword arguments are forwarded to
2037 `DataCoordinate.standardize` when processing the ``dataId``
2038 argument (and may be used to provide a constraining data ID even
2039 when the ``dataId`` argument is `None`).
2041 Returns
2042 -------
2043 refs : `.queries.DatasetQueryResults`
2044 Dataset references matching the given query criteria. Nested data
2045 IDs are guaranteed to include values for all implied dimensions
2046 (i.e. `DataCoordinate.hasFull` will return `True`), but will not
2047 include dimension records (`DataCoordinate.hasRecords` will be
2048 `False`) unless `~.queries.DatasetQueryResults.expanded` is
2049 called on the result object (which returns a new one).
2051 Raises
2052 ------
2053 lsst.daf.butler.registry.DatasetTypeExpressionError
2054 Raised when ``datasetType`` expression is invalid.
2055 TypeError
2056 Raised when the arguments are incompatible, such as when a
2057 collection wildcard is passed when ``findFirst`` is `True`, or
2058 when ``collections`` is `None` and ``self.defaults.collections`` is
2059 also `None`.
2060 lsst.daf.butler.registry.DataIdError
2061 Raised when ``dataId`` or keyword arguments specify unknown
2062 dimensions or values, or when they contain inconsistent values.
2063 lsst.daf.butler.registry.UserExpressionError
2064 Raised when ``where`` expression is invalid.
2066 Notes
2067 -----
2068 When multiple dataset types are queried in a single call, the
2069 results of this operation are equivalent to querying for each dataset
2070 type separately in turn, and no information about the relationships
2071 between datasets of different types is included. In contexts where
2072 that kind of information is important, the recommended pattern is to
2073 use `queryDataIds` to first obtain data IDs (possibly with the
2074 desired dataset types and collections passed as constraints to the
2075 query), and then use multiple (generally much simpler) calls to
2076 `queryDatasets` with the returned data IDs passed as constraints.
2077 """
2078 if components is not _DefaultMarker:
2079 if components is not False:
2080 raise DatasetTypeError(
2081 "Dataset component queries are no longer supported by Registry. Use "
2082 "DatasetType methods to obtain components from parent dataset types instead."
2083 )
2084 else:
2085 warnings.warn(
2086 "The components parameter is ignored. It will be removed after v27.",
2087 category=FutureWarning,
2088 stacklevel=find_outside_stacklevel("lsst.daf.butler"),
2089 )
2090 doomed_by: list[str] = []
2091 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
2092 resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args(
2093 datasetType,
2094 collections,
2095 mode="find_first" if findFirst else "find_all",
2096 doomed_by=doomed_by,
2097 )
2098 if collection_wildcard is not None and collection_wildcard.empty():
2099 doomed_by.append("No datasets can be found because collection list is empty.")
2100 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by)
2101 parent_results: list[queries.ParentDatasetQueryResults] = []
2102 for resolved_dataset_type in resolved_dataset_types:
2103 # The full set of dimensions in the query is the combination of
2104 # those needed for the DatasetType and those explicitly requested,
2105 # if any.
2106 dimension_names = set(resolved_dataset_type.dimensions.names)
2107 if dimensions is not None:
2108 dimension_names.update(self.dimensions.conform(dimensions).names)
2109 # Construct the summary structure needed to construct a
2110 # QueryBuilder.
2111 summary = queries.QuerySummary(
2112 requested=self.dimensions.conform(dimension_names),
2113 column_types=self._managers.column_types,
2114 data_id=data_id,
2115 expression=where,
2116 bind=bind,
2117 defaults=self.defaults.dataId,
2118 check=check,
2119 datasets=[resolved_dataset_type],
2120 )
2121 builder = self._makeQueryBuilder(summary)
2122 # Add the dataset subquery to the query, telling the QueryBuilder
2123 # to include the rank of the selected collection in the results
2124 # only if we need to findFirst. Note that if any of the
2125 # collections are actually wildcard expressions, and
2126 # findFirst=True, this will raise TypeError for us.
2127 builder.joinDataset(
2128 resolved_dataset_type, collection_wildcard, isResult=True, findFirst=findFirst
2129 )
2130 query = builder.finish()
2131 parent_results.append(
2132 queries.ParentDatasetQueryResults(query, resolved_dataset_type, components=[None])
2133 )
2134 if not parent_results:
2135 doomed_by.extend(
2136 f"No registered dataset type matching {t!r} found, so no matching datasets can "
2137 "exist in any collection."
2138 for t in ensure_iterable(datasetType)
2139 )
2140 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by)
2141 elif len(parent_results) == 1:
2142 return parent_results[0]
2143 else:
2144 return queries.ChainedDatasetQueryResults(parent_results)
2146 def queryDataIds(
2147 self,
2148 # TODO: Drop Dimension support on DM-41326.
2149 dimensions: DimensionGroup | Iterable[Dimension | str] | Dimension | str,
2150 *,
2151 dataId: DataId | None = None,
2152 datasets: Any = None,
2153 collections: CollectionArgType | None = None,
2154 where: str = "",
2155 components: bool | _Marker = _DefaultMarker,
2156 bind: Mapping[str, Any] | None = None,
2157 check: bool = True,
2158 **kwargs: Any,
2159 ) -> queries.DataCoordinateQueryResults:
2160 """Query for data IDs matching user-provided criteria.
2162 Parameters
2163 ----------
2164 dimensions : `DimensionGroup`, `Dimension`, or `str`, or \
2165 `~collections.abc.Iterable` [ `Dimension` or `str` ]
2166 The dimensions of the data IDs to yield, as either `Dimension`
2167 instances or `str`. Will be automatically expanded to a complete
2168 `DimensionGroup`. Support for `Dimension` instances is deprecated
2169 and will not be supported after v27.
2170 dataId : `dict` or `DataCoordinate`, optional
2171 A data ID whose key-value pairs are used as equality constraints
2172 in the query.
2173 datasets : dataset type expression, optional
2174 An expression that fully or partially identifies dataset types
2175 that should constrain the yielded data IDs. For example, including
2176 "raw" here would constrain the yielded ``instrument``,
2177 ``exposure``, ``detector``, and ``physical_filter`` values to only
2178 those for which at least one "raw" dataset exists in
2179 ``collections``. Allowed types include `DatasetType`, `str`,
2180 and iterables thereof. Regular expression objects (i.e.
2181 `re.Pattern`) are deprecated and will be removed after the v26
2182 release. See :ref:`daf_butler_dataset_type_expressions` for more
2183 information.
2184 collections : collection expression, optional
2185 An expression that identifies the collections to search for
2186 datasets, such as a `str` (for full matches or partial matches
2187 via globs), `re.Pattern` (for partial matches), or iterable
2188 thereof. ``...`` can be used to search all collections (actually
2189 just all `~CollectionType.RUN` collections, because this will
2190 still find all datasets). If not provided,
2191 ``self.default.collections`` is used. Ignored unless ``datasets``
2192 is also passed. See :ref:`daf_butler_collection_expressions` for
2193 more information.
2194 where : `str`, optional
2195 A string expression similar to a SQL WHERE clause. May involve
2196 any column of a dimension table or (as a shortcut for the primary
2197 key column of a dimension table) dimension name. See
2198 :ref:`daf_butler_dimension_expressions` for more information.
2199 components : `bool`, optional
2200 Must be `False`. Provided only for backwards compatibility. After
2201 v27 this argument will be removed entirely.
2202 bind : `~collections.abc.Mapping`, optional
2203 Mapping containing literal values that should be injected into the
2204 ``where`` expression, keyed by the identifiers they replace.
2205 Values of collection type can be expanded in some cases; see
2206 :ref:`daf_butler_dimension_expressions_identifiers` for more
2207 information.
2208 check : `bool`, optional
2209 If `True` (default) check the query for consistency before
2210 executing it. This may reject some valid queries that resemble
2211 common mistakes (e.g. queries for visits without specifying an
2212 instrument).
2213 **kwargs
2214 Additional keyword arguments are forwarded to
2215 `DataCoordinate.standardize` when processing the ``dataId``
2216 argument (and may be used to provide a constraining data ID even
2217 when the ``dataId`` argument is `None`).
2219 Returns
2220 -------
2221 dataIds : `.queries.DataCoordinateQueryResults`
2222 Data IDs matching the given query parameters. These are guaranteed
2223 to identify all dimensions (`DataCoordinate.hasFull` returns
2224 `True`), but will not contain `DimensionRecord` objects
2225 (`DataCoordinate.hasRecords` returns `False`). Call
2226 `~.queries.DataCoordinateQueryResults.expanded` on the
2227 returned object to fetch those (and consider using
2228 `~.queries.DataCoordinateQueryResults.materialize` on the
2229 returned object first if the expected number of rows is very
2230 large). See documentation for those methods for additional
2231 information.
2233 Raises
2234 ------
2235 lsst.daf.butler.registry.NoDefaultCollectionError
2236 Raised if ``collections`` is `None` and
2237 ``self.defaults.collections`` is `None`.
2238 lsst.daf.butler.registry.CollectionExpressionError
2239 Raised when ``collections`` expression is invalid.
2240 lsst.daf.butler.registry.DataIdError
2241 Raised when ``dataId`` or keyword arguments specify unknown
2242 dimensions or values, or when they contain inconsistent values.
2243 lsst.daf.butler.registry.DatasetTypeExpressionError
2244 Raised when ``datasetType`` expression is invalid.
2245 lsst.daf.butler.registry.UserExpressionError
2246 Raised when ``where`` expression is invalid.
2247 """
2248 if components is not _DefaultMarker:
2249 if components is not False:
2250 raise DatasetTypeError(
2251 "Dataset component queries are no longer supported by Registry. Use "
2252 "DatasetType methods to obtain components from parent dataset types instead."
2253 )
2254 else:
2255 warnings.warn(
2256 "The components parameter is ignored. It will be removed after v27.",
2257 category=FutureWarning,
2258 stacklevel=find_outside_stacklevel("lsst.daf.butler"),
2259 )
2260 requested_dimensions = self.dimensions.conform(dimensions)
2261 doomed_by: list[str] = []
2262 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
2263 resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args(
2264 datasets, collections, doomed_by=doomed_by
2265 )
2266 if collection_wildcard is not None and collection_wildcard.empty():
2267 doomed_by.append("No data coordinates can be found because collection list is empty.")
2268 summary = queries.QuerySummary(
2269 requested=requested_dimensions,
2270 column_types=self._managers.column_types,
2271 data_id=data_id,
2272 expression=where,
2273 bind=bind,
2274 defaults=self.defaults.dataId,
2275 check=check,
2276 datasets=resolved_dataset_types,
2277 )
2278 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
2279 for datasetType in resolved_dataset_types:
2280 builder.joinDataset(datasetType, collection_wildcard, isResult=False)
2281 query = builder.finish()
2283 return queries.DataCoordinateQueryResults(query)
2285 def queryDimensionRecords(
2286 self,
2287 element: DimensionElement | str,
2288 *,
2289 dataId: DataId | None = None,
2290 datasets: Any = None,
2291 collections: CollectionArgType | None = None,
2292 where: str = "",
2293 components: bool | _Marker = _DefaultMarker,
2294 bind: Mapping[str, Any] | None = None,
2295 check: bool = True,
2296 **kwargs: Any,
2297 ) -> queries.DimensionRecordQueryResults:
2298 """Query for dimension information matching user-provided criteria.
2300 Parameters
2301 ----------
2302 element : `DimensionElement` or `str`
2303 The dimension element to obtain records for.
2304 dataId : `dict` or `DataCoordinate`, optional
2305 A data ID whose key-value pairs are used as equality constraints
2306 in the query.
2307 datasets : dataset type expression, optional
2308 An expression that fully or partially identifies dataset types
2309 that should constrain the yielded records. See `queryDataIds` and
2310 :ref:`daf_butler_dataset_type_expressions` for more information.
2311 collections : collection expression, optional
2312 An expression that identifies the collections to search for
2313 datasets, such as a `str` (for full matches or partial matches
2314 via globs), `re.Pattern` (for partial matches), or iterable
2315 thereof. ``...`` can be used to search all collections (actually
2316 just all `~CollectionType.RUN` collections, because this will
2317 still find all datasets). If not provided,
2318 ``self.default.collections`` is used. Ignored unless ``datasets``
2319 is also passed. See :ref:`daf_butler_collection_expressions` for
2320 more information.
2321 where : `str`, optional
2322 A string expression similar to a SQL WHERE clause. See
2323 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more
2324 information.
2325 components : `bool`, optional
2326 Whether to apply dataset expressions to components as well.
2327 See `queryDataIds` for more information.
2329 Must be `False`. Provided only for backwards compatibility. After
2330 v27 this argument will be removed entirely.
2331 bind : `~collections.abc.Mapping`, optional
2332 Mapping containing literal values that should be injected into the
2333 ``where`` expression, keyed by the identifiers they replace.
2334 Values of collection type can be expanded in some cases; see
2335 :ref:`daf_butler_dimension_expressions_identifiers` for more
2336 information.
2337 check : `bool`, optional
2338 If `True` (default) check the query for consistency before
2339 executing it. This may reject some valid queries that resemble
2340 common mistakes (e.g. queries for visits without specifying an
2341 instrument).
2342 **kwargs
2343 Additional keyword arguments are forwarded to
2344 `DataCoordinate.standardize` when processing the ``dataId``
2345 argument (and may be used to provide a constraining data ID even
2346 when the ``dataId`` argument is `None`).
2348 Returns
2349 -------
2350 dataIds : `.queries.DimensionRecordQueryResults`
2351 Data IDs matching the given query parameters.
2353 Raises
2354 ------
2355 lsst.daf.butler.registry.NoDefaultCollectionError
2356 Raised if ``collections`` is `None` and
2357 ``self.defaults.collections`` is `None`.
2358 lsst.daf.butler.registry.CollectionExpressionError
2359 Raised when ``collections`` expression is invalid.
2360 lsst.daf.butler.registry.DataIdError
2361 Raised when ``dataId`` or keyword arguments specify unknown
2362 dimensions or values, or when they contain inconsistent values.
2363 lsst.daf.butler.registry.DatasetTypeExpressionError
2364 Raised when ``datasetType`` expression is invalid.
2365 lsst.daf.butler.registry.UserExpressionError
2366 Raised when ``where`` expression is invalid.
2367 """
2368 if components is not _DefaultMarker:
2369 if components is not False:
2370 raise DatasetTypeError(
2371 "Dataset component queries are no longer supported by Registry. Use "
2372 "DatasetType methods to obtain components from parent dataset types instead."
2373 )
2374 else:
2375 warnings.warn(
2376 "The components parameter is ignored. It will be removed after v27.",
2377 category=FutureWarning,
2378 stacklevel=find_outside_stacklevel("lsst.daf.butler"),
2379 )
2380 if not isinstance(element, DimensionElement):
2381 try:
2382 element = self.dimensions[element]
2383 except KeyError as e:
2384 raise DimensionNameError(
2385 f"No such dimension '{element}', available dimensions: " + str(self.dimensions.elements)
2386 ) from e
2387 doomed_by: list[str] = []
2388 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
2389 resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args(
2390 datasets, collections, doomed_by=doomed_by
2391 )
2392 if collection_wildcard is not None and collection_wildcard.empty():
2393 doomed_by.append("No dimension records can be found because collection list is empty.")
2394 summary = queries.QuerySummary(
2395 requested=element.minimal_group,
2396 column_types=self._managers.column_types,
2397 data_id=data_id,
2398 expression=where,
2399 bind=bind,
2400 defaults=self.defaults.dataId,
2401 check=check,
2402 datasets=resolved_dataset_types,
2403 )
2404 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
2405 for datasetType in resolved_dataset_types:
2406 builder.joinDataset(datasetType, collection_wildcard, isResult=False)
2407 query = builder.finish().with_record_columns(element.name)
2408 return queries.DatabaseDimensionRecordQueryResults(query, element)
2410 def queryDatasetAssociations(
2411 self,
2412 datasetType: str | DatasetType,
2413 collections: CollectionArgType | None = ...,
2414 *,
2415 collectionTypes: Iterable[CollectionType] = CollectionType.all(),
2416 flattenChains: bool = False,
2417 ) -> Iterator[DatasetAssociation]:
2418 """Iterate over dataset-collection combinations where the dataset is in
2419 the collection.
2421 This method is a temporary placeholder for better support for
2422 association results in `queryDatasets`. It will probably be
2423 removed in the future, and should be avoided in production code
2424 whenever possible.
2426 Parameters
2427 ----------
2428 datasetType : `DatasetType` or `str`
2429 A dataset type object or the name of one.
2430 collections : collection expression, optional
2431 An expression that identifies the collections to search for
2432 datasets, such as a `str` (for full matches or partial matches
2433 via globs), `re.Pattern` (for partial matches), or iterable
2434 thereof. ``...`` can be used to search all collections (actually
2435 just all `~CollectionType.RUN` collections, because this will still
2436 find all datasets). If not provided, ``self.default.collections``
2437 is used. See :ref:`daf_butler_collection_expressions` for more
2438 information.
2439 collectionTypes : `~collections.abc.Set` [ `CollectionType` ], optional
2440 If provided, only yield associations from collections of these
2441 types.
2442 flattenChains : `bool`, optional
2443 If `True`, search in the children of `~CollectionType.CHAINED`
2444 collections. If `False`, ``CHAINED`` collections are ignored.
2446 Yields
2447 ------
2448 association : `.DatasetAssociation`
2449 Object representing the relationship between a single dataset and
2450 a single collection.
2452 Raises
2453 ------
2454 lsst.daf.butler.registry.NoDefaultCollectionError
2455 Raised if ``collections`` is `None` and
2456 ``self.defaults.collections`` is `None`.
2457 lsst.daf.butler.registry.CollectionExpressionError
2458 Raised when ``collections`` expression is invalid.
2459 """
2460 if collections is None:
2461 if not self.defaults.collections:
2462 raise NoDefaultCollectionError(
2463 "No collections provided to queryDatasetAssociations, "
2464 "and no defaults from registry construction."
2465 )
2466 collections = self.defaults.collections
2467 collection_wildcard = CollectionWildcard.from_expression(collections)
2468 backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache)
2469 parent_dataset_type = backend.resolve_single_dataset_type_wildcard(datasetType)
2470 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan")
2471 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
2472 for parent_collection_record in backend.resolve_collection_wildcard(
2473 collection_wildcard,
2474 collection_types=frozenset(collectionTypes),
2475 flatten_chains=flattenChains,
2476 ):
2477 # Resolve this possibly-chained collection into a list of
2478 # non-CHAINED collections that actually hold datasets of this
2479 # type.
2480 candidate_collection_records = backend.resolve_dataset_collections(
2481 parent_dataset_type,
2482 CollectionWildcard.from_names([parent_collection_record.name]),
2483 allow_calibration_collections=True,
2484 governor_constraints={},
2485 )
2486 if not candidate_collection_records:
2487 continue
2488 with backend.context() as context:
2489 relation = backend.make_dataset_query_relation(
2490 parent_dataset_type,
2491 candidate_collection_records,
2492 columns={"dataset_id", "run", "timespan", "collection"},
2493 context=context,
2494 )
2495 reader = queries.DatasetRefReader(
2496 parent_dataset_type,
2497 translate_collection=lambda k: self._managers.collections[k].name,
2498 full=False,
2499 )
2500 for row in context.fetch_iterable(relation):
2501 ref = reader.read(row)
2502 collection_record = self._managers.collections[row[collection_tag]]
2503 if collection_record.type is CollectionType.CALIBRATION:
2504 timespan = row[timespan_tag]
2505 else:
2506 # For backwards compatibility and (possibly?) user
2507 # convenience we continue to define the timespan of a
2508 # DatasetAssociation row for a non-CALIBRATION
2509 # collection to be None rather than a fully unbounded
2510 # timespan.
2511 timespan = None
2512 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan)
2514 def get_datastore_records(self, ref: DatasetRef) -> DatasetRef:
2515 """Retrieve datastore records for given ref.
2517 Parameters
2518 ----------
2519 ref : `DatasetRef`
2520 Dataset reference for which to retrieve its corresponding datastore
2521 records.
2523 Returns
2524 -------
2525 updated_ref : `DatasetRef`
2526 Dataset reference with filled datastore records.
2528 Notes
2529 -----
2530 If this method is called with the dataset ref that is not known to the
2531 registry then the reference with an empty set of records is returned.
2532 """
2533 datastore_records: dict[str, list[StoredDatastoreItemInfo]] = {}
2534 for opaque, record_class in self._datastore_record_classes.items():
2535 records = self.fetchOpaqueData(opaque, dataset_id=ref.id)
2536 datastore_records[opaque] = [record_class.from_record(record) for record in records]
2537 return ref.replace(datastore_records=datastore_records)
2539 def store_datastore_records(self, refs: Mapping[str, DatasetRef]) -> None:
2540 """Store datastore records for given refs.
2542 Parameters
2543 ----------
2544 refs : `~collections.abc.Mapping` [`str`, `DatasetRef`]
2545 Mapping of a datastore name to dataset reference stored in that
2546 datastore, reference must include datastore records.
2547 """
2548 for datastore_name, ref in refs.items():
2549 # Store ref IDs in the bridge table.
2550 bridge = self._managers.datastores.register(datastore_name)
2551 bridge.insert([ref])
2553 # store records in opaque tables
2554 assert ref._datastore_records is not None, "Dataset ref must have datastore records"
2555 for table_name, records in ref._datastore_records.items():
2556 opaque_table = self._managers.opaque.get(table_name)
2557 assert opaque_table is not None, f"Unexpected opaque table name {table_name}"
2558 opaque_table.insert(*(record.to_record(dataset_id=ref.id) for record in records))
2560 def make_datastore_tables(self, tables: Mapping[str, DatastoreOpaqueTable]) -> None:
2561 """Create opaque tables used by datastores.
2563 Parameters
2564 ----------
2565 tables : `~collections.abc.Mapping`
2566 Maps opaque table name to its definition.
2568 Notes
2569 -----
2570 This method should disappear in the future when opaque table
2571 definitions will be provided during `Registry` construction.
2572 """
2573 datastore_record_classes = {}
2574 for table_name, table_def in tables.items():
2575 datastore_record_classes[table_name] = table_def.record_class
2576 try:
2577 self._managers.opaque.register(table_name, table_def.table_spec)
2578 except ReadOnlyDatabaseError:
2579 # If the database is read only and we just tried and failed to
2580 # create a table, it means someone is trying to create a
2581 # read-only butler client for an empty repo. That should be
2582 # okay, as long as they then try to get any datasets before
2583 # some other client creates the table. Chances are they're
2584 # just validating configuration.
2585 pass
2586 self._datastore_record_classes = datastore_record_classes
2588 def preload_cache(self) -> None:
2589 """Immediately load caches that are used for common operations."""
2590 self.dimension_record_cache.preload_cache()
2592 @property
2593 def obsCoreTableManager(self) -> ObsCoreTableManager | None:
2594 """The ObsCore manager instance for this registry
2595 (`~.interfaces.ObsCoreTableManager`
2596 or `None`).
2598 ObsCore manager may not be implemented for all registry backend, or
2599 may not be enabled for many repositories.
2600 """
2601 return self._managers.obscore
2603 storageClasses: StorageClassFactory
2604 """All storage classes known to the registry (`StorageClassFactory`).
2605 """
2607 _defaults: RegistryDefaults
2608 """Default collections used for registry queries (`RegistryDefaults`)."""