Coverage for python/lsst/daf/butler/registry/sql_registry.py: 18%
583 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-05 02:53 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-05 02:53 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30from .. import ddl
32__all__ = ("SqlRegistry",)
34import contextlib
35import logging
36import warnings
37from collections.abc import Iterable, Iterator, Mapping, Sequence
38from typing import TYPE_CHECKING, Any, Literal, cast
40import sqlalchemy
41from lsst.daf.relation import LeafRelation, Relation
42from lsst.resources import ResourcePathExpression
43from lsst.utils.introspection import find_outside_stacklevel
44from lsst.utils.iteration import ensure_iterable
46from .._column_tags import DatasetColumnTag
47from .._config import Config
48from .._dataset_association import DatasetAssociation
49from .._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef
50from .._dataset_type import DatasetType
51from .._exceptions import CalibrationLookupError, DimensionNameError
52from .._named import NamedKeyMapping, NameLookupMapping
53from .._storage_class import StorageClassFactory
54from .._timespan import Timespan
55from ..dimensions import (
56 DataCoordinate,
57 DataId,
58 Dimension,
59 DimensionConfig,
60 DimensionElement,
61 DimensionGraph,
62 DimensionGroup,
63 DimensionRecord,
64 DimensionUniverse,
65)
66from ..dimensions.record_cache import DimensionRecordCache
67from ..progress import Progress
68from ..registry import (
69 ArgumentError,
70 CollectionExpressionError,
71 CollectionSummary,
72 CollectionType,
73 CollectionTypeError,
74 ConflictingDefinitionError,
75 DataIdValueError,
76 DatasetTypeError,
77 InconsistentDataIdError,
78 MissingDatasetTypeError,
79 NoDefaultCollectionError,
80 OrphanedRecordError,
81 RegistryConfig,
82 RegistryConsistencyError,
83 RegistryDefaults,
84 queries,
85)
86from ..registry.interfaces import ChainedCollectionRecord, ReadOnlyDatabaseError, RunRecord
87from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes
88from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard
89from ..utils import _DefaultMarker, _Marker, transactional
91if TYPE_CHECKING:
92 from .._butler_config import ButlerConfig
93 from ..datastore._datastore import DatastoreOpaqueTable
94 from ..datastore.stored_file_info import StoredDatastoreItemInfo
95 from ..registry._registry import CollectionArgType
96 from ..registry.interfaces import (
97 CollectionRecord,
98 Database,
99 DatastoreRegistryBridgeManager,
100 ObsCoreTableManager,
101 )
104_LOG = logging.getLogger(__name__)
107class SqlRegistry:
108 """Butler Registry implementation that uses SQL database as backend.
110 Parameters
111 ----------
112 database : `Database`
113 Database instance to store Registry.
114 defaults : `RegistryDefaults`
115 Default collection search path and/or output `~CollectionType.RUN`
116 collection.
117 managers : `RegistryManagerInstances`
118 All the managers required for this registry.
119 """
121 defaultConfigFile: str | None = None
122 """Path to configuration defaults. Accessed within the ``configs`` resource
123 or relative to a search path. Can be None if no defaults specified.
124 """
126 @classmethod
127 def forceRegistryConfig(
128 cls, config: ButlerConfig | RegistryConfig | Config | str | None
129 ) -> RegistryConfig:
130 """Force the supplied config to a `RegistryConfig`.
132 Parameters
133 ----------
134 config : `RegistryConfig`, `Config` or `str` or `None`
135 Registry configuration, if missing then default configuration will
136 be loaded from registry.yaml.
138 Returns
139 -------
140 registry_config : `RegistryConfig`
141 A registry config.
142 """
143 if not isinstance(config, RegistryConfig):
144 if isinstance(config, str | Config) or config is None:
145 config = RegistryConfig(config)
146 else:
147 raise ValueError(f"Incompatible Registry configuration: {config}")
148 return config
150 @classmethod
151 def createFromConfig(
152 cls,
153 config: RegistryConfig | str | None = None,
154 dimensionConfig: DimensionConfig | str | None = None,
155 butlerRoot: ResourcePathExpression | None = None,
156 ) -> SqlRegistry:
157 """Create registry database and return `SqlRegistry` instance.
159 This method initializes database contents, database must be empty
160 prior to calling this method.
162 Parameters
163 ----------
164 config : `RegistryConfig` or `str`, optional
165 Registry configuration, if missing then default configuration will
166 be loaded from registry.yaml.
167 dimensionConfig : `DimensionConfig` or `str`, optional
168 Dimensions configuration, if missing then default configuration
169 will be loaded from dimensions.yaml.
170 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
171 Path to the repository root this `SqlRegistry` will manage.
173 Returns
174 -------
175 registry : `SqlRegistry`
176 A new `SqlRegistry` instance.
177 """
178 config = cls.forceRegistryConfig(config)
179 config.replaceRoot(butlerRoot)
181 if isinstance(dimensionConfig, str):
182 dimensionConfig = DimensionConfig(dimensionConfig)
183 elif dimensionConfig is None:
184 dimensionConfig = DimensionConfig()
185 elif not isinstance(dimensionConfig, DimensionConfig):
186 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}")
188 DatabaseClass = config.getDatabaseClass()
189 database = DatabaseClass.fromUri(
190 config.connectionString, origin=config.get("origin", 0), namespace=config.get("namespace")
191 )
192 managerTypes = RegistryManagerTypes.fromConfig(config)
193 managers = managerTypes.makeRepo(database, dimensionConfig)
194 return cls(database, RegistryDefaults(), managers)
196 @classmethod
197 def fromConfig(
198 cls,
199 config: ButlerConfig | RegistryConfig | Config | str,
200 butlerRoot: ResourcePathExpression | None = None,
201 writeable: bool = True,
202 defaults: RegistryDefaults | None = None,
203 ) -> SqlRegistry:
204 """Create `Registry` subclass instance from `config`.
206 Registry database must be initialized prior to calling this method.
208 Parameters
209 ----------
210 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
211 Registry configuration.
212 butlerRoot : `lsst.resources.ResourcePathExpression`, optional
213 Path to the repository root this `Registry` will manage.
214 writeable : `bool`, optional
215 If `True` (default) create a read-write connection to the database.
216 defaults : `RegistryDefaults`, optional
217 Default collection search path and/or output `~CollectionType.RUN`
218 collection.
220 Returns
221 -------
222 registry : `SqlRegistry`
223 A new `SqlRegistry` subclass instance.
224 """
225 config = cls.forceRegistryConfig(config)
226 config.replaceRoot(butlerRoot)
227 DatabaseClass = config.getDatabaseClass()
228 database = DatabaseClass.fromUri(
229 config.connectionString,
230 origin=config.get("origin", 0),
231 namespace=config.get("namespace"),
232 writeable=writeable,
233 )
234 managerTypes = RegistryManagerTypes.fromConfig(config)
235 with database.session():
236 managers = managerTypes.loadRepo(database)
237 if defaults is None:
238 defaults = RegistryDefaults()
239 return cls(database, defaults, managers)
241 def __init__(
242 self,
243 database: Database,
244 defaults: RegistryDefaults,
245 managers: RegistryManagerInstances,
246 ):
247 self._db = database
248 self._managers = managers
249 self.storageClasses = StorageClassFactory()
250 # This is public to SqlRegistry's internal-to-daf_butler callers, but
251 # it is intentionally not part of RegistryShim.
252 self.dimension_record_cache = DimensionRecordCache(
253 self._managers.dimensions.universe,
254 fetch=self._managers.dimensions.fetch_cache_dict,
255 )
256 # Intentionally invoke property setter to initialize defaults. This
257 # can only be done after most of the rest of Registry has already been
258 # initialized, and must be done before the property getter is used.
259 self.defaults = defaults
260 # TODO: This is currently initialized by `make_datastore_tables`,
261 # eventually we'll need to do it during construction.
262 # The mapping is indexed by the opaque table name.
263 self._datastore_record_classes: Mapping[str, type[StoredDatastoreItemInfo]] = {}
265 def __str__(self) -> str:
266 return str(self._db)
268 def __repr__(self) -> str:
269 return f"SqlRegistry({self._db!r}, {self.dimensions!r})"
271 def isWriteable(self) -> bool:
272 """Return `True` if this registry allows write operations, and `False`
273 otherwise.
274 """
275 return self._db.isWriteable()
277 def copy(self, defaults: RegistryDefaults | None = None) -> SqlRegistry:
278 """Create a new `SqlRegistry` backed by the same data repository
279 as this one and sharing a database connection pool with it, but with
280 independent defaults and database sessions.
282 Parameters
283 ----------
284 defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional
285 Default collections and data ID values for the new registry. If
286 not provided, ``self.defaults`` will be used (but future changes
287 to either registry's defaults will not affect the other).
289 Returns
290 -------
291 copy : `SqlRegistry`
292 A new `SqlRegistry` instance with its own defaults.
293 """
294 if defaults is None:
295 # No need to copy, because `RegistryDefaults` is immutable; we
296 # effectively copy on write.
297 defaults = self.defaults
298 db = self._db.clone()
299 result = SqlRegistry(db, defaults, self._managers.clone(db))
300 result._datastore_record_classes = dict(self._datastore_record_classes)
301 result.dimension_record_cache.load_from(self.dimension_record_cache)
302 return result
304 @property
305 def dimensions(self) -> DimensionUniverse:
306 """Definitions of all dimensions recognized by this `Registry`
307 (`DimensionUniverse`).
308 """
309 return self._managers.dimensions.universe
311 @property
312 def defaults(self) -> RegistryDefaults:
313 """Default collection search path and/or output `~CollectionType.RUN`
314 collection (`~lsst.daf.butler.registry.RegistryDefaults`).
316 This is an immutable struct whose components may not be set
317 individually, but the entire struct can be set by assigning to this
318 property.
319 """
320 return self._defaults
322 @defaults.setter
323 def defaults(self, value: RegistryDefaults) -> None:
324 if value.run is not None:
325 self.registerRun(value.run)
326 value.finish(self)
327 self._defaults = value
329 def refresh(self) -> None:
330 """Refresh all in-memory state by querying the database.
332 This may be necessary to enable querying for entities added by other
333 registry instances after this one was constructed.
334 """
335 self.dimension_record_cache.reset()
336 with self._db.transaction():
337 self._managers.refresh()
339 def caching_context(self) -> contextlib.AbstractContextManager[None]:
340 """Return context manager that enables caching.
342 Returns
343 -------
344 manager
345 A context manager that enables client-side caching. Entering
346 the context returns `None`.
347 """
348 return self._managers.caching_context_manager()
350 @contextlib.contextmanager
351 def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
352 """Return a context manager that represents a transaction.
354 Parameters
355 ----------
356 savepoint : `bool`
357 Whether to issue a SAVEPOINT in the database.
359 Yields
360 ------
361 `None`
362 """
363 with self._db.transaction(savepoint=savepoint):
364 yield
366 def resetConnectionPool(self) -> None:
367 """Reset SQLAlchemy connection pool for `SqlRegistry` database.
369 This operation is useful when using registry with fork-based
370 multiprocessing. To use registry across fork boundary one has to make
371 sure that there are no currently active connections (no session or
372 transaction is in progress) and connection pool is reset using this
373 method. This method should be called by the child process immediately
374 after the fork.
375 """
376 self._db._engine.dispose()
378 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None:
379 """Add an opaque (to the `Registry`) table for use by a `Datastore` or
380 other data repository client.
382 Opaque table records can be added via `insertOpaqueData`, retrieved via
383 `fetchOpaqueData`, and removed via `deleteOpaqueData`.
385 Parameters
386 ----------
387 tableName : `str`
388 Logical name of the opaque table. This may differ from the
389 actual name used in the database by a prefix and/or suffix.
390 spec : `ddl.TableSpec`
391 Specification for the table to be added.
392 """
393 self._managers.opaque.register(tableName, spec)
395 @transactional
396 def insertOpaqueData(self, tableName: str, *data: dict) -> None:
397 """Insert records into an opaque table.
399 Parameters
400 ----------
401 tableName : `str`
402 Logical name of the opaque table. Must match the name used in a
403 previous call to `registerOpaqueTable`.
404 *data
405 Each additional positional argument is a dictionary that represents
406 a single row to be added.
407 """
408 self._managers.opaque[tableName].insert(*data)
410 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]:
411 """Retrieve records from an opaque table.
413 Parameters
414 ----------
415 tableName : `str`
416 Logical name of the opaque table. Must match the name used in a
417 previous call to `registerOpaqueTable`.
418 **where
419 Additional keyword arguments are interpreted as equality
420 constraints that restrict the returned rows (combined with AND);
421 keyword arguments are column names and values are the values they
422 must have.
424 Yields
425 ------
426 row : `dict`
427 A dictionary representing a single result row.
428 """
429 yield from self._managers.opaque[tableName].fetch(**where)
431 @transactional
432 def deleteOpaqueData(self, tableName: str, **where: Any) -> None:
433 """Remove records from an opaque table.
435 Parameters
436 ----------
437 tableName : `str`
438 Logical name of the opaque table. Must match the name used in a
439 previous call to `registerOpaqueTable`.
440 **where
441 Additional keyword arguments are interpreted as equality
442 constraints that restrict the deleted rows (combined with AND);
443 keyword arguments are column names and values are the values they
444 must have.
445 """
446 self._managers.opaque[tableName].delete(where.keys(), where)
448 def registerCollection(
449 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: str | None = None
450 ) -> bool:
451 """Add a new collection if one with the given name does not exist.
453 Parameters
454 ----------
455 name : `str`
456 The name of the collection to create.
457 type : `CollectionType`
458 Enum value indicating the type of collection to create.
459 doc : `str`, optional
460 Documentation string for the collection.
462 Returns
463 -------
464 registered : `bool`
465 Boolean indicating whether the collection was already registered
466 or was created by this call.
468 Notes
469 -----
470 This method cannot be called within transactions, as it needs to be
471 able to perform its own transaction to be concurrent.
472 """
473 _, registered = self._managers.collections.register(name, type, doc=doc)
474 return registered
476 def getCollectionType(self, name: str) -> CollectionType:
477 """Return an enumeration value indicating the type of the given
478 collection.
480 Parameters
481 ----------
482 name : `str`
483 The name of the collection.
485 Returns
486 -------
487 type : `CollectionType`
488 Enum value indicating the type of this collection.
490 Raises
491 ------
492 lsst.daf.butler.registry.MissingCollectionError
493 Raised if no collection with the given name exists.
494 """
495 return self._managers.collections.find(name).type
497 def get_collection_record(self, name: str) -> CollectionRecord:
498 """Return the record for this collection.
500 Parameters
501 ----------
502 name : `str`
503 Name of the collection for which the record is to be retrieved.
505 Returns
506 -------
507 record : `CollectionRecord`
508 The record for this collection.
509 """
510 return self._managers.collections.find(name)
512 def registerRun(self, name: str, doc: str | None = None) -> bool:
513 """Add a new run if one with the given name does not exist.
515 Parameters
516 ----------
517 name : `str`
518 The name of the run to create.
519 doc : `str`, optional
520 Documentation string for the collection.
522 Returns
523 -------
524 registered : `bool`
525 Boolean indicating whether a new run was registered. `False`
526 if it already existed.
528 Notes
529 -----
530 This method cannot be called within transactions, as it needs to be
531 able to perform its own transaction to be concurrent.
532 """
533 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc)
534 return registered
536 @transactional
537 def removeCollection(self, name: str) -> None:
538 """Remove the given collection from the registry.
540 Parameters
541 ----------
542 name : `str`
543 The name of the collection to remove.
545 Raises
546 ------
547 lsst.daf.butler.registry.MissingCollectionError
548 Raised if no collection with the given name exists.
549 sqlalchemy.exc.IntegrityError
550 Raised if the database rows associated with the collection are
551 still referenced by some other table, such as a dataset in a
552 datastore (for `~CollectionType.RUN` collections only) or a
553 `~CollectionType.CHAINED` collection of which this collection is
554 a child.
556 Notes
557 -----
558 If this is a `~CollectionType.RUN` collection, all datasets and quanta
559 in it will removed from the `Registry` database. This requires that
560 those datasets be removed (or at least trashed) from any datastores
561 that hold them first.
563 A collection may not be deleted as long as it is referenced by a
564 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must
565 be deleted or redefined first.
566 """
567 self._managers.collections.remove(name)
569 def getCollectionChain(self, parent: str) -> tuple[str, ...]:
570 """Return the child collections in a `~CollectionType.CHAINED`
571 collection.
573 Parameters
574 ----------
575 parent : `str`
576 Name of the chained collection. Must have already been added via
577 a call to `Registry.registerCollection`.
579 Returns
580 -------
581 children : `~collections.abc.Sequence` [ `str` ]
582 An ordered sequence of collection names that are searched when the
583 given chained collection is searched.
585 Raises
586 ------
587 lsst.daf.butler.registry.MissingCollectionError
588 Raised if ``parent`` does not exist in the `Registry`.
589 lsst.daf.butler.registry.CollectionTypeError
590 Raised if ``parent`` does not correspond to a
591 `~CollectionType.CHAINED` collection.
592 """
593 record = self._managers.collections.find(parent)
594 if record.type is not CollectionType.CHAINED:
595 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
596 assert isinstance(record, ChainedCollectionRecord)
597 return record.children
599 @transactional
600 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None:
601 """Define or redefine a `~CollectionType.CHAINED` collection.
603 Parameters
604 ----------
605 parent : `str`
606 Name of the chained collection. Must have already been added via
607 a call to `Registry.registerCollection`.
608 children : collection expression
609 An expression defining an ordered search of child collections,
610 generally an iterable of `str`; see
611 :ref:`daf_butler_collection_expressions` for more information.
612 flatten : `bool`, optional
613 If `True` (`False` is default), recursively flatten out any nested
614 `~CollectionType.CHAINED` collections in ``children`` first.
616 Raises
617 ------
618 lsst.daf.butler.registry.MissingCollectionError
619 Raised when any of the given collections do not exist in the
620 `Registry`.
621 lsst.daf.butler.registry.CollectionTypeError
622 Raised if ``parent`` does not correspond to a
623 `~CollectionType.CHAINED` collection.
624 CollectionCycleError
625 Raised if the given collections contains a cycle.
627 Notes
628 -----
629 If this function is called within a call to ``Butler.transaction``, it
630 will hold a lock that prevents other processes from modifying the
631 parent collection until the end of the transaction. Keep these
632 transactions short.
633 """
634 record = self._managers.collections.find(parent)
635 if record.type is not CollectionType.CHAINED:
636 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
637 assert isinstance(record, ChainedCollectionRecord)
638 children = CollectionWildcard.from_expression(children).require_ordered()
639 if children != record.children or flatten:
640 self._managers.collections.update_chain(record, children, flatten=flatten)
642 def getCollectionParentChains(self, collection: str) -> set[str]:
643 """Return the CHAINED collections that directly contain the given one.
645 Parameters
646 ----------
647 collection : `str`
648 Name of the collection.
650 Returns
651 -------
652 chains : `set` of `str`
653 Set of `~CollectionType.CHAINED` collection names.
654 """
655 return self._managers.collections.getParentChains(self._managers.collections.find(collection).key)
657 def getCollectionDocumentation(self, collection: str) -> str | None:
658 """Retrieve the documentation string for a collection.
660 Parameters
661 ----------
662 collection : `str`
663 Name of the collection.
665 Returns
666 -------
667 docs : `str` or `None`
668 Docstring for the collection with the given name.
669 """
670 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key)
672 def setCollectionDocumentation(self, collection: str, doc: str | None) -> None:
673 """Set the documentation string for a collection.
675 Parameters
676 ----------
677 collection : `str`
678 Name of the collection.
679 doc : `str` or `None`
680 Docstring for the collection with the given name; will replace any
681 existing docstring. Passing `None` will remove any existing
682 docstring.
683 """
684 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc)
686 def getCollectionSummary(self, collection: str) -> CollectionSummary:
687 """Return a summary for the given collection.
689 Parameters
690 ----------
691 collection : `str`
692 Name of the collection for which a summary is to be retrieved.
694 Returns
695 -------
696 summary : `~lsst.daf.butler.registry.CollectionSummary`
697 Summary of the dataset types and governor dimension values in
698 this collection.
699 """
700 record = self._managers.collections.find(collection)
701 return self._managers.datasets.getCollectionSummary(record)
703 def registerDatasetType(self, datasetType: DatasetType) -> bool:
704 """Add a new `DatasetType` to the Registry.
706 It is not an error to register the same `DatasetType` twice.
708 Parameters
709 ----------
710 datasetType : `DatasetType`
711 The `DatasetType` to be added.
713 Returns
714 -------
715 inserted : `bool`
716 `True` if ``datasetType`` was inserted, `False` if an identical
717 existing `DatasetType` was found. Note that in either case the
718 DatasetType is guaranteed to be defined in the Registry
719 consistently with the given definition.
721 Raises
722 ------
723 ValueError
724 Raised if the dimensions or storage class are invalid.
725 lsst.daf.butler.registry.ConflictingDefinitionError
726 Raised if this `DatasetType` is already registered with a different
727 definition.
729 Notes
730 -----
731 This method cannot be called within transactions, as it needs to be
732 able to perform its own transaction to be concurrent.
733 """
734 return self._managers.datasets.register(datasetType)
736 def removeDatasetType(self, name: str | tuple[str, ...]) -> None:
737 """Remove the named `DatasetType` from the registry.
739 .. warning::
741 Registry implementations can cache the dataset type definitions.
742 This means that deleting the dataset type definition may result in
743 unexpected behavior from other butler processes that are active
744 that have not seen the deletion.
746 Parameters
747 ----------
748 name : `str` or `tuple` [`str`]
749 Name of the type to be removed or tuple containing a list of type
750 names to be removed. Wildcards are allowed.
752 Raises
753 ------
754 lsst.daf.butler.registry.OrphanedRecordError
755 Raised if an attempt is made to remove the dataset type definition
756 when there are already datasets associated with it.
758 Notes
759 -----
760 If the dataset type is not registered the method will return without
761 action.
762 """
763 for datasetTypeExpression in ensure_iterable(name):
764 # Catch any warnings from the caller specifying a component
765 # dataset type. This will result in an error later but the
766 # warning could be confusing when the caller is not querying
767 # anything.
768 with warnings.catch_warnings():
769 warnings.simplefilter("ignore", category=FutureWarning)
770 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression))
771 if not datasetTypes:
772 _LOG.info("Dataset type %r not defined", datasetTypeExpression)
773 else:
774 for datasetType in datasetTypes:
775 self._managers.datasets.remove(datasetType.name)
776 _LOG.info("Removed dataset type %r", datasetType.name)
778 def getDatasetType(self, name: str) -> DatasetType:
779 """Get the `DatasetType`.
781 Parameters
782 ----------
783 name : `str`
784 Name of the type.
786 Returns
787 -------
788 type : `DatasetType`
789 The `DatasetType` associated with the given name.
791 Raises
792 ------
793 lsst.daf.butler.registry.MissingDatasetTypeError
794 Raised if the requested dataset type has not been registered.
796 Notes
797 -----
798 This method handles component dataset types automatically, though most
799 other registry operations do not.
800 """
801 parent_name, component = DatasetType.splitDatasetTypeName(name)
802 storage = self._managers.datasets[parent_name]
803 if component is None:
804 return storage.datasetType
805 else:
806 return storage.datasetType.makeComponentDatasetType(component)
808 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool:
809 """Test whether the given dataset ID generation mode is supported by
810 `insertDatasets`.
812 Parameters
813 ----------
814 mode : `DatasetIdGenEnum`
815 Enum value for the mode to test.
817 Returns
818 -------
819 supported : `bool`
820 Whether the given mode is supported.
821 """
822 return self._managers.datasets.supportsIdGenerationMode(mode)
824 def findDataset(
825 self,
826 datasetType: DatasetType | str,
827 dataId: DataId | None = None,
828 *,
829 collections: CollectionArgType | None = None,
830 timespan: Timespan | None = None,
831 datastore_records: bool = False,
832 **kwargs: Any,
833 ) -> DatasetRef | None:
834 """Find a dataset given its `DatasetType` and data ID.
836 This can be used to obtain a `DatasetRef` that permits the dataset to
837 be read from a `Datastore`. If the dataset is a component and can not
838 be found using the provided dataset type, a dataset ref for the parent
839 will be returned instead but with the correct dataset type.
841 Parameters
842 ----------
843 datasetType : `DatasetType` or `str`
844 A `DatasetType` or the name of one. If this is a `DatasetType`
845 instance, its storage class will be respected and propagated to
846 the output, even if it differs from the dataset type definition
847 in the registry, as long as the storage classes are convertible.
848 dataId : `dict` or `DataCoordinate`, optional
849 A `dict`-like object containing the `Dimension` links that identify
850 the dataset within a collection.
851 collections : collection expression, optional
852 An expression that fully or partially identifies the collections to
853 search for the dataset; see
854 :ref:`daf_butler_collection_expressions` for more information.
855 Defaults to ``self.defaults.collections``.
856 timespan : `Timespan`, optional
857 A timespan that the validity range of the dataset must overlap.
858 If not provided, any `~CollectionType.CALIBRATION` collections
859 matched by the ``collections`` argument will not be searched.
860 datastore_records : `bool`, optional
861 Whether to attach datastore records to the `DatasetRef`.
862 **kwargs
863 Additional keyword arguments passed to
864 `DataCoordinate.standardize` to convert ``dataId`` to a true
865 `DataCoordinate` or augment an existing one.
867 Returns
868 -------
869 ref : `DatasetRef`
870 A reference to the dataset, or `None` if no matching Dataset
871 was found.
873 Raises
874 ------
875 lsst.daf.butler.registry.NoDefaultCollectionError
876 Raised if ``collections`` is `None` and
877 ``self.defaults.collections`` is `None`.
878 LookupError
879 Raised if one or more data ID keys are missing.
880 lsst.daf.butler.registry.MissingDatasetTypeError
881 Raised if the dataset type does not exist.
882 lsst.daf.butler.registry.MissingCollectionError
883 Raised if any of ``collections`` does not exist in the registry.
885 Notes
886 -----
887 This method simply returns `None` and does not raise an exception even
888 when the set of collections searched is intrinsically incompatible with
889 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but
890 only `~CollectionType.CALIBRATION` collections are being searched.
891 This may make it harder to debug some lookup failures, but the behavior
892 is intentional; we consider it more important that failed searches are
893 reported consistently, regardless of the reason, and that adding
894 additional collections that do not contain a match to the search path
895 never changes the behavior.
897 This method handles component dataset types automatically, though most
898 other registry operations do not.
899 """
900 if collections is None:
901 if not self.defaults.collections:
902 raise NoDefaultCollectionError(
903 "No collections provided to findDataset, and no defaults from registry construction."
904 )
905 collections = self.defaults.collections
906 backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache)
907 with backend.caching_context():
908 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True)
909 if collection_wildcard.empty():
910 return None
911 matched_collections = backend.resolve_collection_wildcard(collection_wildcard)
912 resolved_dataset_type = backend.resolve_single_dataset_type_wildcard(datasetType)
913 dataId = DataCoordinate.standardize(
914 dataId,
915 dimensions=resolved_dataset_type.dimensions,
916 universe=self.dimensions,
917 defaults=self.defaults.dataId,
918 **kwargs,
919 )
920 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.dimensions.governors}
921 (filtered_collections,) = backend.filter_dataset_collections(
922 [resolved_dataset_type],
923 matched_collections,
924 governor_constraints=governor_constraints,
925 ).values()
926 if not filtered_collections:
927 return None
928 if timespan is None:
929 filtered_collections = [
930 collection_record
931 for collection_record in filtered_collections
932 if collection_record.type is not CollectionType.CALIBRATION
933 ]
934 if filtered_collections:
935 requested_columns = {"dataset_id", "run", "collection"}
936 with backend.context() as context:
937 predicate = context.make_data_coordinate_predicate(
938 dataId.subset(resolved_dataset_type.dimensions), full=False
939 )
940 if timespan is not None:
941 requested_columns.add("timespan")
942 predicate = predicate.logical_and(
943 context.make_timespan_overlap_predicate(
944 DatasetColumnTag(resolved_dataset_type.name, "timespan"), timespan
945 )
946 )
947 relation = backend.make_dataset_query_relation(
948 resolved_dataset_type, filtered_collections, requested_columns, context
949 ).with_rows_satisfying(predicate)
950 rows = list(context.fetch_iterable(relation))
951 else:
952 rows = []
953 if not rows:
954 return None
955 elif len(rows) == 1:
956 best_row = rows[0]
957 else:
958 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)}
959 collection_tag = DatasetColumnTag(resolved_dataset_type.name, "collection")
960 row_iter = iter(rows)
961 best_row = next(row_iter)
962 best_rank = rank_by_collection_key[best_row[collection_tag]]
963 have_tie = False
964 for row in row_iter:
965 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank:
966 best_row = row
967 best_rank = rank
968 have_tie = False
969 elif rank == best_rank:
970 have_tie = True
971 assert timespan is not None, "Rank ties should be impossible given DB constraints."
972 if have_tie:
973 raise CalibrationLookupError(
974 f"Ambiguous calibration lookup for {resolved_dataset_type.name} in collections "
975 f"{collection_wildcard.strings} with timespan {timespan}."
976 )
977 reader = queries.DatasetRefReader(
978 resolved_dataset_type,
979 translate_collection=lambda k: self._managers.collections[k].name,
980 )
981 ref = reader.read(best_row, data_id=dataId)
982 if datastore_records:
983 ref = self.get_datastore_records(ref)
985 return ref
987 @transactional
988 def insertDatasets(
989 self,
990 datasetType: DatasetType | str,
991 dataIds: Iterable[DataId],
992 run: str | None = None,
993 expand: bool = True,
994 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
995 ) -> list[DatasetRef]:
996 """Insert one or more datasets into the `Registry`.
998 This always adds new datasets; to associate existing datasets with
999 a new collection, use ``associate``.
1001 Parameters
1002 ----------
1003 datasetType : `DatasetType` or `str`
1004 A `DatasetType` or the name of one.
1005 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate`
1006 Dimension-based identifiers for the new datasets.
1007 run : `str`, optional
1008 The name of the run that produced the datasets. Defaults to
1009 ``self.defaults.run``.
1010 expand : `bool`, optional
1011 If `True` (default), expand data IDs as they are inserted. This is
1012 necessary in general to allow datastore to generate file templates,
1013 but it may be disabled if the caller can guarantee this is
1014 unnecessary.
1015 idGenerationMode : `DatasetIdGenEnum`, optional
1016 Specifies option for generating dataset IDs. By default unique IDs
1017 are generated for each inserted dataset.
1019 Returns
1020 -------
1021 refs : `list` of `DatasetRef`
1022 Resolved `DatasetRef` instances for all given data IDs (in the same
1023 order).
1025 Raises
1026 ------
1027 lsst.daf.butler.registry.DatasetTypeError
1028 Raised if ``datasetType`` is not known to registry.
1029 lsst.daf.butler.registry.CollectionTypeError
1030 Raised if ``run`` collection type is not `~CollectionType.RUN`.
1031 lsst.daf.butler.registry.NoDefaultCollectionError
1032 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`.
1033 lsst.daf.butler.registry.ConflictingDefinitionError
1034 If a dataset with the same dataset type and data ID as one of those
1035 given already exists in ``run``.
1036 lsst.daf.butler.registry.MissingCollectionError
1037 Raised if ``run`` does not exist in the registry.
1038 """
1039 if isinstance(datasetType, DatasetType):
1040 storage = self._managers.datasets.find(datasetType.name)
1041 if storage is None:
1042 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
1043 else:
1044 storage = self._managers.datasets.find(datasetType)
1045 if storage is None:
1046 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.")
1047 if run is None:
1048 if self.defaults.run is None:
1049 raise NoDefaultCollectionError(
1050 "No run provided to insertDatasets, and no default from registry construction."
1051 )
1052 run = self.defaults.run
1053 runRecord = self._managers.collections.find(run)
1054 if runRecord.type is not CollectionType.RUN:
1055 raise CollectionTypeError(
1056 f"Given collection is of type {runRecord.type.name}; RUN collection required."
1057 )
1058 assert isinstance(runRecord, RunRecord)
1059 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
1060 if expand:
1061 expandedDataIds = [
1062 self.expandDataId(dataId, dimensions=storage.datasetType.dimensions)
1063 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs")
1064 ]
1065 else:
1066 expandedDataIds = [
1067 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds
1068 ]
1069 try:
1070 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode))
1071 if self._managers.obscore:
1072 context = queries.SqlQueryContext(self._db, self._managers.column_types)
1073 self._managers.obscore.add_datasets(refs, context)
1074 except sqlalchemy.exc.IntegrityError as err:
1075 raise ConflictingDefinitionError(
1076 "A database constraint failure was triggered by inserting "
1077 f"one or more datasets of type {storage.datasetType} into "
1078 f"collection '{run}'. "
1079 "This probably means a dataset with the same data ID "
1080 "and dataset type already exists, but it may also mean a "
1081 "dimension row is missing."
1082 ) from err
1083 return refs
1085 @transactional
1086 def _importDatasets(
1087 self,
1088 datasets: Iterable[DatasetRef],
1089 expand: bool = True,
1090 ) -> list[DatasetRef]:
1091 """Import one or more datasets into the `Registry`.
1093 Difference from `insertDatasets` method is that this method accepts
1094 `DatasetRef` instances which should already be resolved and have a
1095 dataset ID. If registry supports globally-unique dataset IDs (e.g.
1096 `uuid.UUID`) then datasets which already exist in the registry will be
1097 ignored if imported again.
1099 Parameters
1100 ----------
1101 datasets : `~collections.abc.Iterable` of `DatasetRef`
1102 Datasets to be inserted. All `DatasetRef` instances must have
1103 identical ``datasetType`` and ``run`` attributes. ``run``
1104 attribute can be `None` and defaults to ``self.defaults.run``.
1105 Datasets can specify ``id`` attribute which will be used for
1106 inserted datasets. All dataset IDs must have the same type
1107 (`int` or `uuid.UUID`), if type of dataset IDs does not match
1108 configured backend then IDs will be ignored and new IDs will be
1109 generated by backend.
1110 expand : `bool`, optional
1111 If `True` (default), expand data IDs as they are inserted. This is
1112 necessary in general, but it may be disabled if the caller can
1113 guarantee this is unnecessary.
1115 Returns
1116 -------
1117 refs : `list` of `DatasetRef`
1118 Resolved `DatasetRef` instances for all given data IDs (in the same
1119 order). If any of ``datasets`` has an ID which already exists in
1120 the database then it will not be inserted or updated, but a
1121 resolved `DatasetRef` will be returned for it in any case.
1123 Raises
1124 ------
1125 lsst.daf.butler.registry.NoDefaultCollectionError
1126 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`.
1127 lsst.daf.butler.registry.DatasetTypeError
1128 Raised if datasets correspond to more than one dataset type or
1129 dataset type is not known to registry.
1130 lsst.daf.butler.registry.ConflictingDefinitionError
1131 If a dataset with the same dataset type and data ID as one of those
1132 given already exists in ``run``.
1133 lsst.daf.butler.registry.MissingCollectionError
1134 Raised if ``run`` does not exist in the registry.
1136 Notes
1137 -----
1138 This method is considered package-private and internal to Butler
1139 implementation. Clients outside daf_butler package should not use this
1140 method.
1141 """
1142 datasets = list(datasets)
1143 if not datasets:
1144 # nothing to do
1145 return []
1147 # find dataset type
1148 datasetTypes = {dataset.datasetType for dataset in datasets}
1149 if len(datasetTypes) != 1:
1150 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}")
1151 datasetType = datasetTypes.pop()
1153 # get storage handler for this dataset type
1154 storage = self._managers.datasets.find(datasetType.name)
1155 if storage is None:
1156 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
1158 # find run name
1159 runs = {dataset.run for dataset in datasets}
1160 if len(runs) != 1:
1161 raise ValueError(f"Multiple run names in input datasets: {runs}")
1162 run = runs.pop()
1164 runRecord = self._managers.collections.find(run)
1165 if runRecord.type is not CollectionType.RUN:
1166 raise CollectionTypeError(
1167 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};"
1168 " RUN collection required."
1169 )
1170 assert isinstance(runRecord, RunRecord)
1172 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
1173 if expand:
1174 expandedDatasets = [
1175 dataset.expanded(self.expandDataId(dataset.dataId, dimensions=storage.datasetType.dimensions))
1176 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs")
1177 ]
1178 else:
1179 expandedDatasets = [
1180 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True)
1181 for dataset in datasets
1182 ]
1184 try:
1185 refs = list(storage.import_(runRecord, expandedDatasets))
1186 if self._managers.obscore:
1187 context = queries.SqlQueryContext(self._db, self._managers.column_types)
1188 self._managers.obscore.add_datasets(refs, context)
1189 except sqlalchemy.exc.IntegrityError as err:
1190 raise ConflictingDefinitionError(
1191 "A database constraint failure was triggered by inserting "
1192 f"one or more datasets of type {storage.datasetType} into "
1193 f"collection '{run}'. "
1194 "This probably means a dataset with the same data ID "
1195 "and dataset type already exists, but it may also mean a "
1196 "dimension row is missing."
1197 ) from err
1198 # Check that imported dataset IDs match the input
1199 for imported_ref, input_ref in zip(refs, datasets, strict=True):
1200 if imported_ref.id != input_ref.id:
1201 raise RegistryConsistencyError(
1202 "Imported dataset ID differs from input dataset ID, "
1203 f"input ref: {input_ref}, imported ref: {imported_ref}"
1204 )
1205 return refs
1207 def getDataset(self, id: DatasetId) -> DatasetRef | None:
1208 """Retrieve a Dataset entry.
1210 Parameters
1211 ----------
1212 id : `DatasetId`
1213 The unique identifier for the dataset.
1215 Returns
1216 -------
1217 ref : `DatasetRef` or `None`
1218 A ref to the Dataset, or `None` if no matching Dataset
1219 was found.
1220 """
1221 return self._managers.datasets.getDatasetRef(id)
1223 @transactional
1224 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
1225 """Remove datasets from the Registry.
1227 The datasets will be removed unconditionally from all collections, and
1228 any `Quantum` that consumed this dataset will instead be marked with
1229 having a NULL input. `Datastore` records will *not* be deleted; the
1230 caller is responsible for ensuring that the dataset has already been
1231 removed from all Datastores.
1233 Parameters
1234 ----------
1235 refs : `~collections.abc.Iterable` [`DatasetRef`]
1236 References to the datasets to be removed. Must include a valid
1237 ``id`` attribute, and should be considered invalidated upon return.
1239 Raises
1240 ------
1241 lsst.daf.butler.AmbiguousDatasetError
1242 Raised if any ``ref.id`` is `None`.
1243 lsst.daf.butler.registry.OrphanedRecordError
1244 Raised if any dataset is still present in any `Datastore`.
1245 """
1246 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG)
1247 for datasetType, refsForType in progress.iter_item_chunks(
1248 DatasetRef.iter_by_type(refs), desc="Removing datasets by type"
1249 ):
1250 storage = self._managers.datasets[datasetType.name]
1251 try:
1252 storage.delete(refsForType)
1253 except sqlalchemy.exc.IntegrityError as err:
1254 raise OrphanedRecordError(
1255 "One or more datasets is still present in one or more Datastores."
1256 ) from err
1258 @transactional
1259 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
1260 """Add existing datasets to a `~CollectionType.TAGGED` collection.
1262 If a DatasetRef with the same exact ID is already in a collection
1263 nothing is changed. If a `DatasetRef` with the same `DatasetType` and
1264 data ID but with different ID exists in the collection,
1265 `~lsst.daf.butler.registry.ConflictingDefinitionError` is raised.
1267 Parameters
1268 ----------
1269 collection : `str`
1270 Indicates the collection the datasets should be associated with.
1271 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
1272 An iterable of resolved `DatasetRef` instances that already exist
1273 in this `Registry`.
1275 Raises
1276 ------
1277 lsst.daf.butler.registry.ConflictingDefinitionError
1278 If a Dataset with the given `DatasetRef` already exists in the
1279 given collection.
1280 lsst.daf.butler.registry.MissingCollectionError
1281 Raised if ``collection`` does not exist in the registry.
1282 lsst.daf.butler.registry.CollectionTypeError
1283 Raise adding new datasets to the given ``collection`` is not
1284 allowed.
1285 """
1286 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG)
1287 collectionRecord = self._managers.collections.find(collection)
1288 if collectionRecord.type is not CollectionType.TAGGED:
1289 raise CollectionTypeError(
1290 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED."
1291 )
1292 for datasetType, refsForType in progress.iter_item_chunks(
1293 DatasetRef.iter_by_type(refs), desc="Associating datasets by type"
1294 ):
1295 storage = self._managers.datasets[datasetType.name]
1296 try:
1297 storage.associate(collectionRecord, refsForType)
1298 if self._managers.obscore:
1299 # If a TAGGED collection is being monitored by ObsCore
1300 # manager then we may need to save the dataset.
1301 context = queries.SqlQueryContext(self._db, self._managers.column_types)
1302 self._managers.obscore.associate(refsForType, collectionRecord, context)
1303 except sqlalchemy.exc.IntegrityError as err:
1304 raise ConflictingDefinitionError(
1305 f"Constraint violation while associating dataset of type {datasetType.name} with "
1306 f"collection {collection}. This probably means that one or more datasets with the same "
1307 "dataset type and data ID already exist in the collection, but it may also indicate "
1308 "that the datasets do not exist."
1309 ) from err
1311 @transactional
1312 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
1313 """Remove existing datasets from a `~CollectionType.TAGGED` collection.
1315 ``collection`` and ``ref`` combinations that are not currently
1316 associated are silently ignored.
1318 Parameters
1319 ----------
1320 collection : `str`
1321 The collection the datasets should no longer be associated with.
1322 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
1323 An iterable of resolved `DatasetRef` instances that already exist
1324 in this `Registry`.
1326 Raises
1327 ------
1328 lsst.daf.butler.AmbiguousDatasetError
1329 Raised if any of the given dataset references is unresolved.
1330 lsst.daf.butler.registry.MissingCollectionError
1331 Raised if ``collection`` does not exist in the registry.
1332 lsst.daf.butler.registry.CollectionTypeError
1333 Raise adding new datasets to the given ``collection`` is not
1334 allowed.
1335 """
1336 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG)
1337 collectionRecord = self._managers.collections.find(collection)
1338 if collectionRecord.type is not CollectionType.TAGGED:
1339 raise CollectionTypeError(
1340 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED."
1341 )
1342 for datasetType, refsForType in progress.iter_item_chunks(
1343 DatasetRef.iter_by_type(refs), desc="Disassociating datasets by type"
1344 ):
1345 storage = self._managers.datasets[datasetType.name]
1346 storage.disassociate(collectionRecord, refsForType)
1347 if self._managers.obscore:
1348 self._managers.obscore.disassociate(refsForType, collectionRecord)
1350 @transactional
1351 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None:
1352 """Associate one or more datasets with a calibration collection and a
1353 validity range within it.
1355 Parameters
1356 ----------
1357 collection : `str`
1358 The name of an already-registered `~CollectionType.CALIBRATION`
1359 collection.
1360 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
1361 Datasets to be associated.
1362 timespan : `Timespan`
1363 The validity range for these datasets within the collection.
1365 Raises
1366 ------
1367 lsst.daf.butler.AmbiguousDatasetError
1368 Raised if any of the given `DatasetRef` instances is unresolved.
1369 lsst.daf.butler.registry.ConflictingDefinitionError
1370 Raised if the collection already contains a different dataset with
1371 the same `DatasetType` and data ID and an overlapping validity
1372 range.
1373 lsst.daf.butler.registry.CollectionTypeError
1374 Raised if ``collection`` is not a `~CollectionType.CALIBRATION`
1375 collection or if one or more datasets are of a dataset type for
1376 which `DatasetType.isCalibration` returns `False`.
1377 """
1378 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG)
1379 collectionRecord = self._managers.collections.find(collection)
1380 for datasetType, refsForType in progress.iter_item_chunks(
1381 DatasetRef.iter_by_type(refs), desc="Certifying datasets by type"
1382 ):
1383 storage = self._managers.datasets[datasetType.name]
1384 storage.certify(
1385 collectionRecord,
1386 refsForType,
1387 timespan,
1388 context=queries.SqlQueryContext(self._db, self._managers.column_types),
1389 )
1391 @transactional
1392 def decertify(
1393 self,
1394 collection: str,
1395 datasetType: str | DatasetType,
1396 timespan: Timespan,
1397 *,
1398 dataIds: Iterable[DataId] | None = None,
1399 ) -> None:
1400 """Remove or adjust datasets to clear a validity range within a
1401 calibration collection.
1403 Parameters
1404 ----------
1405 collection : `str`
1406 The name of an already-registered `~CollectionType.CALIBRATION`
1407 collection.
1408 datasetType : `str` or `DatasetType`
1409 Name or `DatasetType` instance for the datasets to be decertified.
1410 timespan : `Timespan`, optional
1411 The validity range to remove datasets from within the collection.
1412 Datasets that overlap this range but are not contained by it will
1413 have their validity ranges adjusted to not overlap it, which may
1414 split a single dataset validity range into two.
1415 dataIds : iterable [`dict` or `DataCoordinate`], optional
1416 Data IDs that should be decertified within the given validity range
1417 If `None`, all data IDs for ``self.datasetType`` will be
1418 decertified.
1420 Raises
1421 ------
1422 lsst.daf.butler.registry.CollectionTypeError
1423 Raised if ``collection`` is not a `~CollectionType.CALIBRATION`
1424 collection or if ``datasetType.isCalibration() is False``.
1425 """
1426 collectionRecord = self._managers.collections.find(collection)
1427 if isinstance(datasetType, str):
1428 storage = self._managers.datasets[datasetType]
1429 else:
1430 storage = self._managers.datasets[datasetType.name]
1431 standardizedDataIds = None
1432 if dataIds is not None:
1433 standardizedDataIds = [
1434 DataCoordinate.standardize(d, dimensions=storage.datasetType.dimensions) for d in dataIds
1435 ]
1436 storage.decertify(
1437 collectionRecord,
1438 timespan,
1439 dataIds=standardizedDataIds,
1440 context=queries.SqlQueryContext(self._db, self._managers.column_types),
1441 )
1443 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
1444 """Return an object that allows a new `Datastore` instance to
1445 communicate with this `Registry`.
1447 Returns
1448 -------
1449 manager : `~.interfaces.DatastoreRegistryBridgeManager`
1450 Object that mediates communication between this `Registry` and its
1451 associated datastores.
1452 """
1453 return self._managers.datastores
1455 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
1456 """Retrieve datastore locations for a given dataset.
1458 Parameters
1459 ----------
1460 ref : `DatasetRef`
1461 A reference to the dataset for which to retrieve storage
1462 information.
1464 Returns
1465 -------
1466 datastores : `~collections.abc.Iterable` [ `str` ]
1467 All the matching datastores holding this dataset.
1469 Raises
1470 ------
1471 lsst.daf.butler.AmbiguousDatasetError
1472 Raised if ``ref.id`` is `None`.
1473 """
1474 return self._managers.datastores.findDatastores(ref)
1476 def expandDataId(
1477 self,
1478 dataId: DataId | None = None,
1479 *,
1480 dimensions: Iterable[str] | DimensionGroup | DimensionGraph | None = None,
1481 graph: DimensionGraph | None = None,
1482 records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None,
1483 withDefaults: bool = True,
1484 **kwargs: Any,
1485 ) -> DataCoordinate:
1486 """Expand a dimension-based data ID to include additional information.
1488 Parameters
1489 ----------
1490 dataId : `DataCoordinate` or `dict`, optional
1491 Data ID to be expanded; augmented and overridden by ``kwargs``.
1492 dimensions : `~collections.abc.Iterable` [ `str` ], \
1493 `DimensionGroup`, or `DimensionGraph`, optional
1494 The dimensions to be identified by the new `DataCoordinate`.
1495 If not provided, will be inferred from the keys of ``mapping`` and
1496 ``**kwargs``, and ``universe`` must be provided unless ``mapping``
1497 is already a `DataCoordinate`.
1498 graph : `DimensionGraph`, optional
1499 Like ``dimensions``, but as a ``DimensionGraph`` instance. Ignored
1500 if ``dimensions`` is provided. Deprecated and will be removed
1501 after v27.
1502 records : `~collections.abc.Mapping` [`str`, `DimensionRecord`], \
1503 optional
1504 Dimension record data to use before querying the database for that
1505 data, keyed by element name.
1506 withDefaults : `bool`, optional
1507 Utilize ``self.defaults.dataId`` to fill in missing governor
1508 dimension key-value pairs. Defaults to `True` (i.e. defaults are
1509 used).
1510 **kwargs
1511 Additional keywords are treated like additional key-value pairs for
1512 ``dataId``, extending and overriding.
1514 Returns
1515 -------
1516 expanded : `DataCoordinate`
1517 A data ID that includes full metadata for all of the dimensions it
1518 identifies, i.e. guarantees that ``expanded.hasRecords()`` and
1519 ``expanded.hasFull()`` both return `True`.
1521 Raises
1522 ------
1523 lsst.daf.butler.registry.DataIdError
1524 Raised when ``dataId`` or keyword arguments specify unknown
1525 dimensions or values, or when a resulting data ID contains
1526 contradictory key-value pairs, according to dimension
1527 relationships.
1529 Notes
1530 -----
1531 This method cannot be relied upon to reject invalid data ID values
1532 for dimensions that do actually not have any record columns. For
1533 efficiency reasons the records for these dimensions (which have only
1534 dimension key values that are given by the caller) may be constructed
1535 directly rather than obtained from the registry database.
1536 """
1537 if not withDefaults:
1538 defaults = None
1539 else:
1540 defaults = self.defaults.dataId
1541 standardized = DataCoordinate.standardize(
1542 dataId,
1543 graph=graph,
1544 dimensions=dimensions,
1545 universe=self.dimensions,
1546 defaults=defaults,
1547 **kwargs,
1548 )
1549 if standardized.hasRecords():
1550 return standardized
1551 if records is None:
1552 records = {}
1553 elif isinstance(records, NamedKeyMapping):
1554 records = records.byName()
1555 else:
1556 records = dict(records)
1557 if isinstance(dataId, DataCoordinate) and dataId.hasRecords():
1558 for element_name in dataId.dimensions.elements:
1559 records[element_name] = dataId.records[element_name]
1560 keys = dict(standardized.mapping)
1561 for element_name in standardized.dimensions.lookup_order:
1562 element = self.dimensions[element_name]
1563 record = records.get(element_name, ...) # Use ... to mean not found; None might mean NULL
1564 if record is ...:
1565 if element_name in self.dimensions.dimensions.names and keys.get(element_name) is None:
1566 if element_name in standardized.dimensions.required:
1567 raise DimensionNameError(
1568 f"No value or null value for required dimension {element_name}."
1569 )
1570 keys[element_name] = None
1571 record = None
1572 else:
1573 record = self._managers.dimensions.fetch_one(
1574 element_name,
1575 DataCoordinate.standardize(keys, dimensions=element.minimal_group),
1576 self.dimension_record_cache,
1577 )
1578 records[element_name] = record
1579 if record is not None:
1580 for d in element.implied:
1581 value = getattr(record, d.name)
1582 if keys.setdefault(d.name, value) != value:
1583 raise InconsistentDataIdError(
1584 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, "
1585 f"but {element_name} implies {d.name}={value!r}."
1586 )
1587 else:
1588 if element_name in standardized.dimensions.required:
1589 raise DataIdValueError(
1590 f"Could not fetch record for required dimension {element.name} via keys {keys}."
1591 )
1592 if element.defines_relationships:
1593 raise InconsistentDataIdError(
1594 f"Could not fetch record for element {element_name} via keys {keys}, ",
1595 "but it is marked as defining relationships; this means one or more dimensions are "
1596 "have inconsistent values.",
1597 )
1598 for d in element.implied:
1599 keys.setdefault(d.name, None)
1600 records.setdefault(d.name, None)
1601 return DataCoordinate.standardize(keys, dimensions=standardized.dimensions).expanded(records=records)
1603 def insertDimensionData(
1604 self,
1605 element: DimensionElement | str,
1606 *data: Mapping[str, Any] | DimensionRecord,
1607 conform: bool = True,
1608 replace: bool = False,
1609 skip_existing: bool = False,
1610 ) -> None:
1611 """Insert one or more dimension records into the database.
1613 Parameters
1614 ----------
1615 element : `DimensionElement` or `str`
1616 The `DimensionElement` or name thereof that identifies the table
1617 records will be inserted into.
1618 *data : `dict` or `DimensionRecord`
1619 One or more records to insert.
1620 conform : `bool`, optional
1621 If `False` (`True` is default) perform no checking or conversions,
1622 and assume that ``element`` is a `DimensionElement` instance and
1623 ``data`` is a one or more `DimensionRecord` instances of the
1624 appropriate subclass.
1625 replace : `bool`, optional
1626 If `True` (`False` is default), replace existing records in the
1627 database if there is a conflict.
1628 skip_existing : `bool`, optional
1629 If `True` (`False` is default), skip insertion if a record with
1630 the same primary key values already exists. Unlike
1631 `syncDimensionData`, this will not detect when the given record
1632 differs from what is in the database, and should not be used when
1633 this is a concern.
1634 """
1635 if isinstance(element, str):
1636 element = self.dimensions[element]
1637 if conform:
1638 records = [
1639 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data
1640 ]
1641 else:
1642 # Ignore typing since caller said to trust them with conform=False.
1643 records = data # type: ignore
1644 if element.name in self.dimension_record_cache:
1645 self.dimension_record_cache.reset()
1646 self._managers.dimensions.insert(
1647 element,
1648 *records,
1649 replace=replace,
1650 skip_existing=skip_existing,
1651 )
1653 def syncDimensionData(
1654 self,
1655 element: DimensionElement | str,
1656 row: Mapping[str, Any] | DimensionRecord,
1657 conform: bool = True,
1658 update: bool = False,
1659 ) -> bool | dict[str, Any]:
1660 """Synchronize the given dimension record with the database, inserting
1661 if it does not already exist and comparing values if it does.
1663 Parameters
1664 ----------
1665 element : `DimensionElement` or `str`
1666 The `DimensionElement` or name thereof that identifies the table
1667 records will be inserted into.
1668 row : `dict` or `DimensionRecord`
1669 The record to insert.
1670 conform : `bool`, optional
1671 If `False` (`True` is default) perform no checking or conversions,
1672 and assume that ``element`` is a `DimensionElement` instance and
1673 ``data`` is a `DimensionRecord` instances of the appropriate
1674 subclass.
1675 update : `bool`, optional
1676 If `True` (`False` is default), update the existing record in the
1677 database if there is a conflict.
1679 Returns
1680 -------
1681 inserted_or_updated : `bool` or `dict`
1682 `True` if a new row was inserted, `False` if no changes were
1683 needed, or a `dict` mapping updated column names to their old
1684 values if an update was performed (only possible if
1685 ``update=True``).
1687 Raises
1688 ------
1689 lsst.daf.butler.registry.ConflictingDefinitionError
1690 Raised if the record exists in the database (according to primary
1691 key lookup) but is inconsistent with the given one.
1692 """
1693 if conform:
1694 if isinstance(element, str):
1695 element = self.dimensions[element]
1696 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row)
1697 else:
1698 # Ignore typing since caller said to trust them with conform=False.
1699 record = row # type: ignore
1700 if record.definition.name in self.dimension_record_cache:
1701 self.dimension_record_cache.reset()
1702 return self._managers.dimensions.sync(record, update=update)
1704 def queryDatasetTypes(
1705 self,
1706 expression: Any = ...,
1707 *,
1708 components: bool | _Marker = _DefaultMarker,
1709 missing: list[str] | None = None,
1710 ) -> Iterable[DatasetType]:
1711 """Iterate over the dataset types whose names match an expression.
1713 Parameters
1714 ----------
1715 expression : dataset type expression, optional
1716 An expression that fully or partially identifies the dataset types
1717 to return, such as a `str`, `re.Pattern`, or iterable thereof.
1718 ``...`` can be used to return all dataset types, and is the
1719 default. See :ref:`daf_butler_dataset_type_expressions` for more
1720 information.
1721 components : `bool`, optional
1722 Must be `False`. Provided only for backwards compatibility. After
1723 v27 this argument will be removed entirely.
1724 missing : `list` of `str`, optional
1725 String dataset type names that were explicitly given (i.e. not
1726 regular expression patterns) but not found will be appended to this
1727 list, if it is provided.
1729 Returns
1730 -------
1731 dataset_types : `~collections.abc.Iterable` [ `DatasetType`]
1732 An `~collections.abc.Iterable` of `DatasetType` instances whose
1733 names match ``expression``.
1735 Raises
1736 ------
1737 lsst.daf.butler.registry.DatasetTypeExpressionError
1738 Raised when ``expression`` is invalid.
1739 """
1740 if components is not _DefaultMarker:
1741 if components is not False:
1742 raise DatasetTypeError(
1743 "Dataset component queries are no longer supported by Registry. Use "
1744 "DatasetType methods to obtain components from parent dataset types instead."
1745 )
1746 else:
1747 warnings.warn(
1748 "The components parameter is ignored. It will be removed after v27.",
1749 category=FutureWarning,
1750 stacklevel=find_outside_stacklevel("lsst.daf.butler"),
1751 )
1752 wildcard = DatasetTypeWildcard.from_expression(expression)
1753 return self._managers.datasets.resolve_wildcard(wildcard, missing=missing)
1755 def queryCollections(
1756 self,
1757 expression: Any = ...,
1758 datasetType: DatasetType | None = None,
1759 collectionTypes: Iterable[CollectionType] | CollectionType = CollectionType.all(),
1760 flattenChains: bool = False,
1761 includeChains: bool | None = None,
1762 ) -> Sequence[str]:
1763 """Iterate over the collections whose names match an expression.
1765 Parameters
1766 ----------
1767 expression : collection expression, optional
1768 An expression that identifies the collections to return, such as
1769 a `str` (for full matches or partial matches via globs),
1770 `re.Pattern` (for partial matches), or iterable thereof. ``...``
1771 can be used to return all collections, and is the default.
1772 See :ref:`daf_butler_collection_expressions` for more information.
1773 datasetType : `DatasetType`, optional
1774 If provided, only yield collections that may contain datasets of
1775 this type. This is a conservative approximation in general; it may
1776 yield collections that do not have any such datasets.
1777 collectionTypes : `~collections.abc.Set` [`CollectionType`] or \
1778 `CollectionType`, optional
1779 If provided, only yield collections of these types.
1780 flattenChains : `bool`, optional
1781 If `True` (`False` is default), recursively yield the child
1782 collections of matching `~CollectionType.CHAINED` collections.
1783 includeChains : `bool`, optional
1784 If `True`, yield records for matching `~CollectionType.CHAINED`
1785 collections. Default is the opposite of ``flattenChains``: include
1786 either CHAINED collections or their children, but not both.
1788 Returns
1789 -------
1790 collections : `~collections.abc.Sequence` [ `str` ]
1791 The names of collections that match ``expression``.
1793 Raises
1794 ------
1795 lsst.daf.butler.registry.CollectionExpressionError
1796 Raised when ``expression`` is invalid.
1798 Notes
1799 -----
1800 The order in which collections are returned is unspecified, except that
1801 the children of a `~CollectionType.CHAINED` collection are guaranteed
1802 to be in the order in which they are searched. When multiple parent
1803 `~CollectionType.CHAINED` collections match the same criteria, the
1804 order in which the two lists appear is unspecified, and the lists of
1805 children may be incomplete if a child has multiple parents.
1806 """
1807 # Right now the datasetTypes argument is completely ignored, but that
1808 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up
1809 # ticket will take care of that.
1810 try:
1811 wildcard = CollectionWildcard.from_expression(expression)
1812 except TypeError as exc:
1813 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc
1814 collectionTypes = ensure_iterable(collectionTypes)
1815 return [
1816 record.name
1817 for record in self._managers.collections.resolve_wildcard(
1818 wildcard,
1819 collection_types=frozenset(collectionTypes),
1820 flatten_chains=flattenChains,
1821 include_chains=includeChains,
1822 )
1823 ]
1825 def _makeQueryBuilder(
1826 self,
1827 summary: queries.QuerySummary,
1828 doomed_by: Iterable[str] = (),
1829 ) -> queries.QueryBuilder:
1830 """Return a `QueryBuilder` instance capable of constructing and
1831 managing more complex queries than those obtainable via `Registry`
1832 interfaces.
1834 This is an advanced interface; downstream code should prefer
1835 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those
1836 are sufficient.
1838 Parameters
1839 ----------
1840 summary : `queries.QuerySummary`
1841 Object describing and categorizing the full set of dimensions that
1842 will be included in the query.
1843 doomed_by : `~collections.abc.Iterable` of `str`, optional
1844 A list of diagnostic messages that indicate why the query is going
1845 to yield no results and should not even be executed. If an empty
1846 container (default) the query will be executed unless other code
1847 determines that it is doomed.
1849 Returns
1850 -------
1851 builder : `queries.QueryBuilder`
1852 Object that can be used to construct and perform advanced queries.
1853 """
1854 doomed_by = list(doomed_by)
1855 backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache)
1856 context = backend.context()
1857 relation: Relation | None = None
1858 if doomed_by:
1859 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by)
1860 return queries.QueryBuilder(
1861 summary,
1862 backend=backend,
1863 context=context,
1864 relation=relation,
1865 )
1867 def _standardize_query_data_id_args(
1868 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any
1869 ) -> DataCoordinate:
1870 """Preprocess the data ID arguments passed to query* methods.
1872 Parameters
1873 ----------
1874 data_id : `DataId` or `None`
1875 Data ID that constrains the query results.
1876 doomed_by : `list` [ `str` ]
1877 List to append messages indicating why the query is doomed to
1878 yield no results.
1879 **kwargs
1880 Additional data ID key-value pairs, extending and overriding
1881 ``data_id``.
1883 Returns
1884 -------
1885 data_id : `DataCoordinate`
1886 Standardized data ID. Will be fully expanded unless expansion
1887 fails, in which case a message will be appended to ``doomed_by``
1888 on return.
1889 """
1890 try:
1891 return self.expandDataId(data_id, **kwargs)
1892 except DataIdValueError as err:
1893 doomed_by.append(str(err))
1894 return DataCoordinate.standardize(
1895 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId
1896 )
1898 def _standardize_query_dataset_args(
1899 self,
1900 datasets: Any,
1901 collections: CollectionArgType | None,
1902 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain",
1903 *,
1904 doomed_by: list[str],
1905 ) -> tuple[list[DatasetType], CollectionWildcard | None]:
1906 """Preprocess dataset arguments passed to query* methods.
1908 Parameters
1909 ----------
1910 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these
1911 Expression identifying dataset types. See `queryDatasetTypes` for
1912 details.
1913 collections : `str`, `re.Pattern`, or iterable of these
1914 Expression identifying collections to be searched. See
1915 `queryCollections` for details.
1916 mode : `str`, optional
1917 The way in which datasets are being used in this query; one of:
1919 - "find_first": this is a query for the first dataset in an
1920 ordered list of collections. Prohibits collection wildcards,
1921 but permits dataset type wildcards.
1923 - "find_all": this is a query for all datasets in all matched
1924 collections. Permits collection and dataset type wildcards.
1926 - "constrain": this is a query for something other than datasets,
1927 with results constrained by dataset existence. Permits
1928 collection wildcards and prohibits ``...`` as a dataset type
1929 wildcard.
1930 doomed_by : `list` [ `str` ]
1931 List to append messages indicating why the query is doomed to
1932 yield no results.
1934 Returns
1935 -------
1936 dataset_types : `list` [ `DatasetType` ]
1937 List of matched dataset types.
1938 collections : `CollectionWildcard`
1939 Processed collection expression.
1940 """
1941 dataset_types: list[DatasetType] = []
1942 collection_wildcard: CollectionWildcard | None = None
1943 if datasets is not None:
1944 if collections is None:
1945 if not self.defaults.collections:
1946 raise NoDefaultCollectionError("No collections, and no registry default collections.")
1947 collection_wildcard = CollectionWildcard.from_expression(self.defaults.collections)
1948 else:
1949 collection_wildcard = CollectionWildcard.from_expression(collections)
1950 if mode == "find_first" and collection_wildcard.patterns:
1951 raise TypeError(
1952 f"Collection pattern(s) {collection_wildcard.patterns} not allowed in this context."
1953 )
1954 missing: list[str] = []
1955 dataset_types = self._managers.datasets.resolve_wildcard(
1956 datasets, missing=missing, explicit_only=(mode == "constrain")
1957 )
1958 if missing and mode == "constrain":
1959 raise MissingDatasetTypeError(
1960 f"Dataset type(s) {missing} are not registered.",
1961 )
1962 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing)
1963 elif collections:
1964 # I think this check should actually be `collections is not None`,
1965 # but it looks like some CLI scripts use empty tuple as default.
1966 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.")
1967 return dataset_types, collection_wildcard
1969 def queryDatasets(
1970 self,
1971 datasetType: Any,
1972 *,
1973 collections: CollectionArgType | None = None,
1974 dimensions: Iterable[Dimension | str] | None = None,
1975 dataId: DataId | None = None,
1976 where: str = "",
1977 findFirst: bool = False,
1978 components: bool | _Marker = _DefaultMarker,
1979 bind: Mapping[str, Any] | None = None,
1980 check: bool = True,
1981 **kwargs: Any,
1982 ) -> queries.DatasetQueryResults:
1983 """Query for and iterate over dataset references matching user-provided
1984 criteria.
1986 Parameters
1987 ----------
1988 datasetType : dataset type expression
1989 An expression that fully or partially identifies the dataset types
1990 to be queried. Allowed types include `DatasetType`, `str`,
1991 `re.Pattern`, and iterables thereof. The special value ``...`` can
1992 be used to query all dataset types. See
1993 :ref:`daf_butler_dataset_type_expressions` for more information.
1994 collections : collection expression, optional
1995 An expression that identifies the collections to search, such as a
1996 `str` (for full matches or partial matches via globs), `re.Pattern`
1997 (for partial matches), or iterable thereof. ``...`` can be used to
1998 search all collections (actually just all `~CollectionType.RUN`
1999 collections, because this will still find all datasets).
2000 If not provided, ``self.default.collections`` is used. See
2001 :ref:`daf_butler_collection_expressions` for more information.
2002 dimensions : `~collections.abc.Iterable` of `Dimension` or `str`
2003 Dimensions to include in the query (in addition to those used
2004 to identify the queried dataset type(s)), either to constrain
2005 the resulting datasets to those for which a matching dimension
2006 exists, or to relate the dataset type's dimensions to dimensions
2007 referenced by the ``dataId`` or ``where`` arguments.
2008 dataId : `dict` or `DataCoordinate`, optional
2009 A data ID whose key-value pairs are used as equality constraints
2010 in the query.
2011 where : `str`, optional
2012 A string expression similar to a SQL WHERE clause. May involve
2013 any column of a dimension table or (as a shortcut for the primary
2014 key column of a dimension table) dimension name. See
2015 :ref:`daf_butler_dimension_expressions` for more information.
2016 findFirst : `bool`, optional
2017 If `True` (`False` is default), for each result data ID, only
2018 yield one `DatasetRef` of each `DatasetType`, from the first
2019 collection in which a dataset of that dataset type appears
2020 (according to the order of ``collections`` passed in). If `True`,
2021 ``collections`` must not contain regular expressions and may not
2022 be ``...``.
2023 components : `bool`, optional
2024 Must be `False`. Provided only for backwards compatibility. After
2025 v27 this argument will be removed entirely.
2026 bind : `~collections.abc.Mapping`, optional
2027 Mapping containing literal values that should be injected into the
2028 ``where`` expression, keyed by the identifiers they replace.
2029 Values of collection type can be expanded in some cases; see
2030 :ref:`daf_butler_dimension_expressions_identifiers` for more
2031 information.
2032 check : `bool`, optional
2033 If `True` (default) check the query for consistency before
2034 executing it. This may reject some valid queries that resemble
2035 common mistakes (e.g. queries for visits without specifying an
2036 instrument).
2037 **kwargs
2038 Additional keyword arguments are forwarded to
2039 `DataCoordinate.standardize` when processing the ``dataId``
2040 argument (and may be used to provide a constraining data ID even
2041 when the ``dataId`` argument is `None`).
2043 Returns
2044 -------
2045 refs : `.queries.DatasetQueryResults`
2046 Dataset references matching the given query criteria. Nested data
2047 IDs are guaranteed to include values for all implied dimensions
2048 (i.e. `DataCoordinate.hasFull` will return `True`), but will not
2049 include dimension records (`DataCoordinate.hasRecords` will be
2050 `False`) unless `~.queries.DatasetQueryResults.expanded` is
2051 called on the result object (which returns a new one).
2053 Raises
2054 ------
2055 lsst.daf.butler.registry.DatasetTypeExpressionError
2056 Raised when ``datasetType`` expression is invalid.
2057 TypeError
2058 Raised when the arguments are incompatible, such as when a
2059 collection wildcard is passed when ``findFirst`` is `True`, or
2060 when ``collections`` is `None` and ``self.defaults.collections`` is
2061 also `None`.
2062 lsst.daf.butler.registry.DataIdError
2063 Raised when ``dataId`` or keyword arguments specify unknown
2064 dimensions or values, or when they contain inconsistent values.
2065 lsst.daf.butler.registry.UserExpressionError
2066 Raised when ``where`` expression is invalid.
2068 Notes
2069 -----
2070 When multiple dataset types are queried in a single call, the
2071 results of this operation are equivalent to querying for each dataset
2072 type separately in turn, and no information about the relationships
2073 between datasets of different types is included. In contexts where
2074 that kind of information is important, the recommended pattern is to
2075 use `queryDataIds` to first obtain data IDs (possibly with the
2076 desired dataset types and collections passed as constraints to the
2077 query), and then use multiple (generally much simpler) calls to
2078 `queryDatasets` with the returned data IDs passed as constraints.
2079 """
2080 if components is not _DefaultMarker:
2081 if components is not False:
2082 raise DatasetTypeError(
2083 "Dataset component queries are no longer supported by Registry. Use "
2084 "DatasetType methods to obtain components from parent dataset types instead."
2085 )
2086 else:
2087 warnings.warn(
2088 "The components parameter is ignored. It will be removed after v27.",
2089 category=FutureWarning,
2090 stacklevel=find_outside_stacklevel("lsst.daf.butler"),
2091 )
2092 doomed_by: list[str] = []
2093 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
2094 resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args(
2095 datasetType,
2096 collections,
2097 mode="find_first" if findFirst else "find_all",
2098 doomed_by=doomed_by,
2099 )
2100 if collection_wildcard is not None and collection_wildcard.empty():
2101 doomed_by.append("No datasets can be found because collection list is empty.")
2102 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by)
2103 parent_results: list[queries.ParentDatasetQueryResults] = []
2104 for resolved_dataset_type in resolved_dataset_types:
2105 # The full set of dimensions in the query is the combination of
2106 # those needed for the DatasetType and those explicitly requested,
2107 # if any.
2108 dimension_names = set(resolved_dataset_type.dimensions.names)
2109 if dimensions is not None:
2110 dimension_names.update(self.dimensions.conform(dimensions).names)
2111 # Construct the summary structure needed to construct a
2112 # QueryBuilder.
2113 summary = queries.QuerySummary(
2114 requested=self.dimensions.conform(dimension_names),
2115 column_types=self._managers.column_types,
2116 data_id=data_id,
2117 expression=where,
2118 bind=bind,
2119 defaults=self.defaults.dataId,
2120 check=check,
2121 datasets=[resolved_dataset_type],
2122 )
2123 builder = self._makeQueryBuilder(summary)
2124 # Add the dataset subquery to the query, telling the QueryBuilder
2125 # to include the rank of the selected collection in the results
2126 # only if we need to findFirst. Note that if any of the
2127 # collections are actually wildcard expressions, and
2128 # findFirst=True, this will raise TypeError for us.
2129 builder.joinDataset(
2130 resolved_dataset_type, collection_wildcard, isResult=True, findFirst=findFirst
2131 )
2132 query = builder.finish()
2133 parent_results.append(
2134 queries.ParentDatasetQueryResults(query, resolved_dataset_type, components=[None])
2135 )
2136 if not parent_results:
2137 doomed_by.extend(
2138 f"No registered dataset type matching {t!r} found, so no matching datasets can "
2139 "exist in any collection."
2140 for t in ensure_iterable(datasetType)
2141 )
2142 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by)
2143 elif len(parent_results) == 1:
2144 return parent_results[0]
2145 else:
2146 return queries.ChainedDatasetQueryResults(parent_results)
2148 def queryDataIds(
2149 self,
2150 # TODO: Drop Dimension support on DM-41326.
2151 dimensions: DimensionGroup | Iterable[Dimension | str] | Dimension | str,
2152 *,
2153 dataId: DataId | None = None,
2154 datasets: Any = None,
2155 collections: CollectionArgType | None = None,
2156 where: str = "",
2157 components: bool | _Marker = _DefaultMarker,
2158 bind: Mapping[str, Any] | None = None,
2159 check: bool = True,
2160 **kwargs: Any,
2161 ) -> queries.DataCoordinateQueryResults:
2162 """Query for data IDs matching user-provided criteria.
2164 Parameters
2165 ----------
2166 dimensions : `DimensionGroup`, `Dimension`, or `str`, or \
2167 `~collections.abc.Iterable` [ `Dimension` or `str` ]
2168 The dimensions of the data IDs to yield, as either `Dimension`
2169 instances or `str`. Will be automatically expanded to a complete
2170 `DimensionGroup`. Support for `Dimension` instances is deprecated
2171 and will not be supported after v27.
2172 dataId : `dict` or `DataCoordinate`, optional
2173 A data ID whose key-value pairs are used as equality constraints
2174 in the query.
2175 datasets : dataset type expression, optional
2176 An expression that fully or partially identifies dataset types
2177 that should constrain the yielded data IDs. For example, including
2178 "raw" here would constrain the yielded ``instrument``,
2179 ``exposure``, ``detector``, and ``physical_filter`` values to only
2180 those for which at least one "raw" dataset exists in
2181 ``collections``. Allowed types include `DatasetType`, `str`,
2182 and iterables thereof. Regular expression objects (i.e.
2183 `re.Pattern`) are deprecated and will be removed after the v26
2184 release. See :ref:`daf_butler_dataset_type_expressions` for more
2185 information.
2186 collections : collection expression, optional
2187 An expression that identifies the collections to search for
2188 datasets, such as a `str` (for full matches or partial matches
2189 via globs), `re.Pattern` (for partial matches), or iterable
2190 thereof. ``...`` can be used to search all collections (actually
2191 just all `~CollectionType.RUN` collections, because this will
2192 still find all datasets). If not provided,
2193 ``self.default.collections`` is used. Ignored unless ``datasets``
2194 is also passed. See :ref:`daf_butler_collection_expressions` for
2195 more information.
2196 where : `str`, optional
2197 A string expression similar to a SQL WHERE clause. May involve
2198 any column of a dimension table or (as a shortcut for the primary
2199 key column of a dimension table) dimension name. See
2200 :ref:`daf_butler_dimension_expressions` for more information.
2201 components : `bool`, optional
2202 Must be `False`. Provided only for backwards compatibility. After
2203 v27 this argument will be removed entirely.
2204 bind : `~collections.abc.Mapping`, optional
2205 Mapping containing literal values that should be injected into the
2206 ``where`` expression, keyed by the identifiers they replace.
2207 Values of collection type can be expanded in some cases; see
2208 :ref:`daf_butler_dimension_expressions_identifiers` for more
2209 information.
2210 check : `bool`, optional
2211 If `True` (default) check the query for consistency before
2212 executing it. This may reject some valid queries that resemble
2213 common mistakes (e.g. queries for visits without specifying an
2214 instrument).
2215 **kwargs
2216 Additional keyword arguments are forwarded to
2217 `DataCoordinate.standardize` when processing the ``dataId``
2218 argument (and may be used to provide a constraining data ID even
2219 when the ``dataId`` argument is `None`).
2221 Returns
2222 -------
2223 dataIds : `.queries.DataCoordinateQueryResults`
2224 Data IDs matching the given query parameters. These are guaranteed
2225 to identify all dimensions (`DataCoordinate.hasFull` returns
2226 `True`), but will not contain `DimensionRecord` objects
2227 (`DataCoordinate.hasRecords` returns `False`). Call
2228 `~.queries.DataCoordinateQueryResults.expanded` on the
2229 returned object to fetch those (and consider using
2230 `~.queries.DataCoordinateQueryResults.materialize` on the
2231 returned object first if the expected number of rows is very
2232 large). See documentation for those methods for additional
2233 information.
2235 Raises
2236 ------
2237 lsst.daf.butler.registry.NoDefaultCollectionError
2238 Raised if ``collections`` is `None` and
2239 ``self.defaults.collections`` is `None`.
2240 lsst.daf.butler.registry.CollectionExpressionError
2241 Raised when ``collections`` expression is invalid.
2242 lsst.daf.butler.registry.DataIdError
2243 Raised when ``dataId`` or keyword arguments specify unknown
2244 dimensions or values, or when they contain inconsistent values.
2245 lsst.daf.butler.registry.DatasetTypeExpressionError
2246 Raised when ``datasetType`` expression is invalid.
2247 lsst.daf.butler.registry.UserExpressionError
2248 Raised when ``where`` expression is invalid.
2249 """
2250 if components is not _DefaultMarker:
2251 if components is not False:
2252 raise DatasetTypeError(
2253 "Dataset component queries are no longer supported by Registry. Use "
2254 "DatasetType methods to obtain components from parent dataset types instead."
2255 )
2256 else:
2257 warnings.warn(
2258 "The components parameter is ignored. It will be removed after v27.",
2259 category=FutureWarning,
2260 stacklevel=find_outside_stacklevel("lsst.daf.butler"),
2261 )
2262 requested_dimensions = self.dimensions.conform(dimensions)
2263 doomed_by: list[str] = []
2264 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
2265 resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args(
2266 datasets, collections, doomed_by=doomed_by
2267 )
2268 if collection_wildcard is not None and collection_wildcard.empty():
2269 doomed_by.append("No data coordinates can be found because collection list is empty.")
2270 summary = queries.QuerySummary(
2271 requested=requested_dimensions,
2272 column_types=self._managers.column_types,
2273 data_id=data_id,
2274 expression=where,
2275 bind=bind,
2276 defaults=self.defaults.dataId,
2277 check=check,
2278 datasets=resolved_dataset_types,
2279 )
2280 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
2281 for datasetType in resolved_dataset_types:
2282 builder.joinDataset(datasetType, collection_wildcard, isResult=False)
2283 query = builder.finish()
2285 return queries.DataCoordinateQueryResults(query)
2287 def queryDimensionRecords(
2288 self,
2289 element: DimensionElement | str,
2290 *,
2291 dataId: DataId | None = None,
2292 datasets: Any = None,
2293 collections: CollectionArgType | None = None,
2294 where: str = "",
2295 components: bool | _Marker = _DefaultMarker,
2296 bind: Mapping[str, Any] | None = None,
2297 check: bool = True,
2298 **kwargs: Any,
2299 ) -> queries.DimensionRecordQueryResults:
2300 """Query for dimension information matching user-provided criteria.
2302 Parameters
2303 ----------
2304 element : `DimensionElement` or `str`
2305 The dimension element to obtain records for.
2306 dataId : `dict` or `DataCoordinate`, optional
2307 A data ID whose key-value pairs are used as equality constraints
2308 in the query.
2309 datasets : dataset type expression, optional
2310 An expression that fully or partially identifies dataset types
2311 that should constrain the yielded records. See `queryDataIds` and
2312 :ref:`daf_butler_dataset_type_expressions` for more information.
2313 collections : collection expression, optional
2314 An expression that identifies the collections to search for
2315 datasets, such as a `str` (for full matches or partial matches
2316 via globs), `re.Pattern` (for partial matches), or iterable
2317 thereof. ``...`` can be used to search all collections (actually
2318 just all `~CollectionType.RUN` collections, because this will
2319 still find all datasets). If not provided,
2320 ``self.default.collections`` is used. Ignored unless ``datasets``
2321 is also passed. See :ref:`daf_butler_collection_expressions` for
2322 more information.
2323 where : `str`, optional
2324 A string expression similar to a SQL WHERE clause. See
2325 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more
2326 information.
2327 components : `bool`, optional
2328 Whether to apply dataset expressions to components as well.
2329 See `queryDataIds` for more information.
2331 Must be `False`. Provided only for backwards compatibility. After
2332 v27 this argument will be removed entirely.
2333 bind : `~collections.abc.Mapping`, optional
2334 Mapping containing literal values that should be injected into the
2335 ``where`` expression, keyed by the identifiers they replace.
2336 Values of collection type can be expanded in some cases; see
2337 :ref:`daf_butler_dimension_expressions_identifiers` for more
2338 information.
2339 check : `bool`, optional
2340 If `True` (default) check the query for consistency before
2341 executing it. This may reject some valid queries that resemble
2342 common mistakes (e.g. queries for visits without specifying an
2343 instrument).
2344 **kwargs
2345 Additional keyword arguments are forwarded to
2346 `DataCoordinate.standardize` when processing the ``dataId``
2347 argument (and may be used to provide a constraining data ID even
2348 when the ``dataId`` argument is `None`).
2350 Returns
2351 -------
2352 dataIds : `.queries.DimensionRecordQueryResults`
2353 Data IDs matching the given query parameters.
2355 Raises
2356 ------
2357 lsst.daf.butler.registry.NoDefaultCollectionError
2358 Raised if ``collections`` is `None` and
2359 ``self.defaults.collections`` is `None`.
2360 lsst.daf.butler.registry.CollectionExpressionError
2361 Raised when ``collections`` expression is invalid.
2362 lsst.daf.butler.registry.DataIdError
2363 Raised when ``dataId`` or keyword arguments specify unknown
2364 dimensions or values, or when they contain inconsistent values.
2365 lsst.daf.butler.registry.DatasetTypeExpressionError
2366 Raised when ``datasetType`` expression is invalid.
2367 lsst.daf.butler.registry.UserExpressionError
2368 Raised when ``where`` expression is invalid.
2369 """
2370 if components is not _DefaultMarker:
2371 if components is not False:
2372 raise DatasetTypeError(
2373 "Dataset component queries are no longer supported by Registry. Use "
2374 "DatasetType methods to obtain components from parent dataset types instead."
2375 )
2376 else:
2377 warnings.warn(
2378 "The components parameter is ignored. It will be removed after v27.",
2379 category=FutureWarning,
2380 stacklevel=find_outside_stacklevel("lsst.daf.butler"),
2381 )
2382 if not isinstance(element, DimensionElement):
2383 try:
2384 element = self.dimensions[element]
2385 except KeyError as e:
2386 raise DimensionNameError(
2387 f"No such dimension '{element}', available dimensions: " + str(self.dimensions.elements)
2388 ) from e
2389 doomed_by: list[str] = []
2390 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
2391 resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args(
2392 datasets, collections, doomed_by=doomed_by
2393 )
2394 if collection_wildcard is not None and collection_wildcard.empty():
2395 doomed_by.append("No dimension records can be found because collection list is empty.")
2396 summary = queries.QuerySummary(
2397 requested=element.minimal_group,
2398 column_types=self._managers.column_types,
2399 data_id=data_id,
2400 expression=where,
2401 bind=bind,
2402 defaults=self.defaults.dataId,
2403 check=check,
2404 datasets=resolved_dataset_types,
2405 )
2406 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
2407 for datasetType in resolved_dataset_types:
2408 builder.joinDataset(datasetType, collection_wildcard, isResult=False)
2409 query = builder.finish().with_record_columns(element.name)
2410 return queries.DatabaseDimensionRecordQueryResults(query, element)
2412 def queryDatasetAssociations(
2413 self,
2414 datasetType: str | DatasetType,
2415 collections: CollectionArgType | None = ...,
2416 *,
2417 collectionTypes: Iterable[CollectionType] = CollectionType.all(),
2418 flattenChains: bool = False,
2419 ) -> Iterator[DatasetAssociation]:
2420 """Iterate over dataset-collection combinations where the dataset is in
2421 the collection.
2423 This method is a temporary placeholder for better support for
2424 association results in `queryDatasets`. It will probably be
2425 removed in the future, and should be avoided in production code
2426 whenever possible.
2428 Parameters
2429 ----------
2430 datasetType : `DatasetType` or `str`
2431 A dataset type object or the name of one.
2432 collections : collection expression, optional
2433 An expression that identifies the collections to search for
2434 datasets, such as a `str` (for full matches or partial matches
2435 via globs), `re.Pattern` (for partial matches), or iterable
2436 thereof. ``...`` can be used to search all collections (actually
2437 just all `~CollectionType.RUN` collections, because this will still
2438 find all datasets). If not provided, ``self.default.collections``
2439 is used. See :ref:`daf_butler_collection_expressions` for more
2440 information.
2441 collectionTypes : `~collections.abc.Set` [ `CollectionType` ], optional
2442 If provided, only yield associations from collections of these
2443 types.
2444 flattenChains : `bool`, optional
2445 If `True`, search in the children of `~CollectionType.CHAINED`
2446 collections. If `False`, ``CHAINED`` collections are ignored.
2448 Yields
2449 ------
2450 association : `.DatasetAssociation`
2451 Object representing the relationship between a single dataset and
2452 a single collection.
2454 Raises
2455 ------
2456 lsst.daf.butler.registry.NoDefaultCollectionError
2457 Raised if ``collections`` is `None` and
2458 ``self.defaults.collections`` is `None`.
2459 lsst.daf.butler.registry.CollectionExpressionError
2460 Raised when ``collections`` expression is invalid.
2461 """
2462 if collections is None:
2463 if not self.defaults.collections:
2464 raise NoDefaultCollectionError(
2465 "No collections provided to queryDatasetAssociations, "
2466 "and no defaults from registry construction."
2467 )
2468 collections = self.defaults.collections
2469 collection_wildcard = CollectionWildcard.from_expression(collections)
2470 backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache)
2471 parent_dataset_type = backend.resolve_single_dataset_type_wildcard(datasetType)
2472 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan")
2473 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
2474 for parent_collection_record in backend.resolve_collection_wildcard(
2475 collection_wildcard,
2476 collection_types=frozenset(collectionTypes),
2477 flatten_chains=flattenChains,
2478 ):
2479 # Resolve this possibly-chained collection into a list of
2480 # non-CHAINED collections that actually hold datasets of this
2481 # type.
2482 candidate_collection_records = backend.resolve_dataset_collections(
2483 parent_dataset_type,
2484 CollectionWildcard.from_names([parent_collection_record.name]),
2485 allow_calibration_collections=True,
2486 governor_constraints={},
2487 )
2488 if not candidate_collection_records:
2489 continue
2490 with backend.context() as context:
2491 relation = backend.make_dataset_query_relation(
2492 parent_dataset_type,
2493 candidate_collection_records,
2494 columns={"dataset_id", "run", "timespan", "collection"},
2495 context=context,
2496 )
2497 reader = queries.DatasetRefReader(
2498 parent_dataset_type,
2499 translate_collection=lambda k: self._managers.collections[k].name,
2500 full=False,
2501 )
2502 for row in context.fetch_iterable(relation):
2503 ref = reader.read(row)
2504 collection_record = self._managers.collections[row[collection_tag]]
2505 if collection_record.type is CollectionType.CALIBRATION:
2506 timespan = row[timespan_tag]
2507 else:
2508 # For backwards compatibility and (possibly?) user
2509 # convenience we continue to define the timespan of a
2510 # DatasetAssociation row for a non-CALIBRATION
2511 # collection to be None rather than a fully unbounded
2512 # timespan.
2513 timespan = None
2514 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan)
2516 def get_datastore_records(self, ref: DatasetRef) -> DatasetRef:
2517 """Retrieve datastore records for given ref.
2519 Parameters
2520 ----------
2521 ref : `DatasetRef`
2522 Dataset reference for which to retrieve its corresponding datastore
2523 records.
2525 Returns
2526 -------
2527 updated_ref : `DatasetRef`
2528 Dataset reference with filled datastore records.
2530 Notes
2531 -----
2532 If this method is called with the dataset ref that is not known to the
2533 registry then the reference with an empty set of records is returned.
2534 """
2535 datastore_records: dict[str, list[StoredDatastoreItemInfo]] = {}
2536 for opaque, record_class in self._datastore_record_classes.items():
2537 records = self.fetchOpaqueData(opaque, dataset_id=ref.id)
2538 datastore_records[opaque] = [record_class.from_record(record) for record in records]
2539 return ref.replace(datastore_records=datastore_records)
2541 def store_datastore_records(self, refs: Mapping[str, DatasetRef]) -> None:
2542 """Store datastore records for given refs.
2544 Parameters
2545 ----------
2546 refs : `~collections.abc.Mapping` [`str`, `DatasetRef`]
2547 Mapping of a datastore name to dataset reference stored in that
2548 datastore, reference must include datastore records.
2549 """
2550 for datastore_name, ref in refs.items():
2551 # Store ref IDs in the bridge table.
2552 bridge = self._managers.datastores.register(datastore_name)
2553 bridge.insert([ref])
2555 # store records in opaque tables
2556 assert ref._datastore_records is not None, "Dataset ref must have datastore records"
2557 for table_name, records in ref._datastore_records.items():
2558 opaque_table = self._managers.opaque.get(table_name)
2559 assert opaque_table is not None, f"Unexpected opaque table name {table_name}"
2560 opaque_table.insert(*(record.to_record(dataset_id=ref.id) for record in records))
2562 def make_datastore_tables(self, tables: Mapping[str, DatastoreOpaqueTable]) -> None:
2563 """Create opaque tables used by datastores.
2565 Parameters
2566 ----------
2567 tables : `~collections.abc.Mapping`
2568 Maps opaque table name to its definition.
2570 Notes
2571 -----
2572 This method should disappear in the future when opaque table
2573 definitions will be provided during `Registry` construction.
2574 """
2575 datastore_record_classes = {}
2576 for table_name, table_def in tables.items():
2577 datastore_record_classes[table_name] = table_def.record_class
2578 try:
2579 self._managers.opaque.register(table_name, table_def.table_spec)
2580 except ReadOnlyDatabaseError:
2581 # If the database is read only and we just tried and failed to
2582 # create a table, it means someone is trying to create a
2583 # read-only butler client for an empty repo. That should be
2584 # okay, as long as they then try to get any datasets before
2585 # some other client creates the table. Chances are they're
2586 # just validating configuration.
2587 pass
2588 self._datastore_record_classes = datastore_record_classes
2590 def preload_cache(self) -> None:
2591 """Immediately load caches that are used for common operations."""
2592 self.dimension_record_cache.preload_cache()
2594 @property
2595 def obsCoreTableManager(self) -> ObsCoreTableManager | None:
2596 """The ObsCore manager instance for this registry
2597 (`~.interfaces.ObsCoreTableManager`
2598 or `None`).
2600 ObsCore manager may not be implemented for all registry backend, or
2601 may not be enabled for many repositories.
2602 """
2603 return self._managers.obscore
2605 storageClasses: StorageClassFactory
2606 """All storage classes known to the registry (`StorageClassFactory`).
2607 """
2609 _defaults: RegistryDefaults
2610 """Default collections used for registry queries (`RegistryDefaults`)."""