Coverage for python/lsst/daf/butler/registry/sql_registry.py: 18%
580 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-03 02:48 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-03 02:48 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30from .. import ddl
32__all__ = ("SqlRegistry",)
34import contextlib
35import logging
36import warnings
37from collections.abc import Iterable, Iterator, Mapping, Sequence
38from typing import TYPE_CHECKING, Any, Literal, cast
40import sqlalchemy
41from lsst.daf.relation import LeafRelation, Relation
42from lsst.resources import ResourcePathExpression
43from lsst.utils.introspection import find_outside_stacklevel
44from lsst.utils.iteration import ensure_iterable
46from .._column_tags import DatasetColumnTag
47from .._config import Config
48from .._dataset_association import DatasetAssociation
49from .._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef
50from .._dataset_type import DatasetType
51from .._exceptions import (
52 CalibrationLookupError,
53 DataIdValueError,
54 DimensionNameError,
55 InconsistentDataIdError,
56)
57from .._named import NamedKeyMapping, NameLookupMapping
58from .._storage_class import StorageClassFactory
59from .._timespan import Timespan
60from ..dimensions import (
61 DataCoordinate,
62 DataId,
63 Dimension,
64 DimensionConfig,
65 DimensionElement,
66 DimensionGraph,
67 DimensionGroup,
68 DimensionRecord,
69 DimensionUniverse,
70)
71from ..dimensions.record_cache import DimensionRecordCache
72from ..progress import Progress
73from ..registry import (
74 ArgumentError,
75 CollectionExpressionError,
76 CollectionSummary,
77 CollectionType,
78 CollectionTypeError,
79 ConflictingDefinitionError,
80 DatasetTypeError,
81 MissingDatasetTypeError,
82 NoDefaultCollectionError,
83 OrphanedRecordError,
84 RegistryConfig,
85 RegistryConsistencyError,
86 RegistryDefaults,
87 queries,
88)
89from ..registry.interfaces import ChainedCollectionRecord, ReadOnlyDatabaseError, RunRecord
90from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes
91from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard
92from ..utils import _DefaultMarker, _Marker, transactional
94if TYPE_CHECKING:
95 from .._butler_config import ButlerConfig
96 from ..datastore._datastore import DatastoreOpaqueTable
97 from ..datastore.stored_file_info import StoredDatastoreItemInfo
98 from ..registry._registry import CollectionArgType
99 from ..registry.interfaces import (
100 CollectionRecord,
101 Database,
102 DatastoreRegistryBridgeManager,
103 ObsCoreTableManager,
104 )
107_LOG = logging.getLogger(__name__)
110class SqlRegistry:
111 """Butler Registry implementation that uses SQL database as backend.
113 Parameters
114 ----------
115 database : `Database`
116 Database instance to store Registry.
117 defaults : `RegistryDefaults`
118 Default collection search path and/or output `~CollectionType.RUN`
119 collection.
120 managers : `RegistryManagerInstances`
121 All the managers required for this registry.
122 """
124 defaultConfigFile: str | None = None
125 """Path to configuration defaults. Accessed within the ``configs`` resource
126 or relative to a search path. Can be None if no defaults specified.
127 """
129 @classmethod
130 def forceRegistryConfig(
131 cls, config: ButlerConfig | RegistryConfig | Config | str | None
132 ) -> RegistryConfig:
133 """Force the supplied config to a `RegistryConfig`.
135 Parameters
136 ----------
137 config : `RegistryConfig`, `Config` or `str` or `None`
138 Registry configuration, if missing then default configuration will
139 be loaded from registry.yaml.
141 Returns
142 -------
143 registry_config : `RegistryConfig`
144 A registry config.
145 """
146 if not isinstance(config, RegistryConfig):
147 if isinstance(config, str | Config) or config is None:
148 config = RegistryConfig(config)
149 else:
150 raise ValueError(f"Incompatible Registry configuration: {config}")
151 return config
153 @classmethod
154 def createFromConfig(
155 cls,
156 config: RegistryConfig | str | None = None,
157 dimensionConfig: DimensionConfig | str | None = None,
158 butlerRoot: ResourcePathExpression | None = None,
159 ) -> SqlRegistry:
160 """Create registry database and return `SqlRegistry` instance.
162 This method initializes database contents, database must be empty
163 prior to calling this method.
165 Parameters
166 ----------
167 config : `RegistryConfig` or `str`, optional
168 Registry configuration, if missing then default configuration will
169 be loaded from registry.yaml.
170 dimensionConfig : `DimensionConfig` or `str`, optional
171 Dimensions configuration, if missing then default configuration
172 will be loaded from dimensions.yaml.
173 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
174 Path to the repository root this `SqlRegistry` will manage.
176 Returns
177 -------
178 registry : `SqlRegistry`
179 A new `SqlRegistry` instance.
180 """
181 config = cls.forceRegistryConfig(config)
182 config.replaceRoot(butlerRoot)
184 if isinstance(dimensionConfig, str):
185 dimensionConfig = DimensionConfig(dimensionConfig)
186 elif dimensionConfig is None:
187 dimensionConfig = DimensionConfig()
188 elif not isinstance(dimensionConfig, DimensionConfig):
189 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}")
191 DatabaseClass = config.getDatabaseClass()
192 database = DatabaseClass.fromUri(
193 config.connectionString, origin=config.get("origin", 0), namespace=config.get("namespace")
194 )
195 managerTypes = RegistryManagerTypes.fromConfig(config)
196 managers = managerTypes.makeRepo(database, dimensionConfig)
197 return cls(database, RegistryDefaults(), managers)
199 @classmethod
200 def fromConfig(
201 cls,
202 config: ButlerConfig | RegistryConfig | Config | str,
203 butlerRoot: ResourcePathExpression | None = None,
204 writeable: bool = True,
205 defaults: RegistryDefaults | None = None,
206 ) -> SqlRegistry:
207 """Create `Registry` subclass instance from `config`.
209 Registry database must be initialized prior to calling this method.
211 Parameters
212 ----------
213 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
214 Registry configuration.
215 butlerRoot : `lsst.resources.ResourcePathExpression`, optional
216 Path to the repository root this `Registry` will manage.
217 writeable : `bool`, optional
218 If `True` (default) create a read-write connection to the database.
219 defaults : `RegistryDefaults`, optional
220 Default collection search path and/or output `~CollectionType.RUN`
221 collection.
223 Returns
224 -------
225 registry : `SqlRegistry`
226 A new `SqlRegistry` subclass instance.
227 """
228 config = cls.forceRegistryConfig(config)
229 config.replaceRoot(butlerRoot)
230 DatabaseClass = config.getDatabaseClass()
231 database = DatabaseClass.fromUri(
232 config.connectionString,
233 origin=config.get("origin", 0),
234 namespace=config.get("namespace"),
235 writeable=writeable,
236 )
237 managerTypes = RegistryManagerTypes.fromConfig(config)
238 with database.session():
239 managers = managerTypes.loadRepo(database)
240 if defaults is None:
241 defaults = RegistryDefaults()
242 return cls(database, defaults, managers)
244 def __init__(
245 self,
246 database: Database,
247 defaults: RegistryDefaults,
248 managers: RegistryManagerInstances,
249 ):
250 self._db = database
251 self._managers = managers
252 self.storageClasses = StorageClassFactory()
253 # This is public to SqlRegistry's internal-to-daf_butler callers, but
254 # it is intentionally not part of RegistryShim.
255 self.dimension_record_cache = DimensionRecordCache(
256 self._managers.dimensions.universe,
257 fetch=self._managers.dimensions.fetch_cache_dict,
258 )
259 # Intentionally invoke property setter to initialize defaults. This
260 # can only be done after most of the rest of Registry has already been
261 # initialized, and must be done before the property getter is used.
262 self.defaults = defaults
263 # TODO: This is currently initialized by `make_datastore_tables`,
264 # eventually we'll need to do it during construction.
265 # The mapping is indexed by the opaque table name.
266 self._datastore_record_classes: Mapping[str, type[StoredDatastoreItemInfo]] = {}
268 def __str__(self) -> str:
269 return str(self._db)
271 def __repr__(self) -> str:
272 return f"SqlRegistry({self._db!r}, {self.dimensions!r})"
274 def isWriteable(self) -> bool:
275 """Return `True` if this registry allows write operations, and `False`
276 otherwise.
277 """
278 return self._db.isWriteable()
280 def copy(self, defaults: RegistryDefaults | None = None) -> SqlRegistry:
281 """Create a new `SqlRegistry` backed by the same data repository
282 as this one and sharing a database connection pool with it, but with
283 independent defaults and database sessions.
285 Parameters
286 ----------
287 defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional
288 Default collections and data ID values for the new registry. If
289 not provided, ``self.defaults`` will be used (but future changes
290 to either registry's defaults will not affect the other).
292 Returns
293 -------
294 copy : `SqlRegistry`
295 A new `SqlRegistry` instance with its own defaults.
296 """
297 if defaults is None:
298 # No need to copy, because `RegistryDefaults` is immutable; we
299 # effectively copy on write.
300 defaults = self.defaults
301 db = self._db.clone()
302 result = SqlRegistry(db, defaults, self._managers.clone(db))
303 result._datastore_record_classes = dict(self._datastore_record_classes)
304 result.dimension_record_cache.load_from(self.dimension_record_cache)
305 return result
307 @property
308 def dimensions(self) -> DimensionUniverse:
309 """Definitions of all dimensions recognized by this `Registry`
310 (`DimensionUniverse`).
311 """
312 return self._managers.dimensions.universe
314 @property
315 def defaults(self) -> RegistryDefaults:
316 """Default collection search path and/or output `~CollectionType.RUN`
317 collection (`~lsst.daf.butler.registry.RegistryDefaults`).
319 This is an immutable struct whose components may not be set
320 individually, but the entire struct can be set by assigning to this
321 property.
322 """
323 return self._defaults
325 @defaults.setter
326 def defaults(self, value: RegistryDefaults) -> None:
327 if value.run is not None:
328 self.registerRun(value.run)
329 value.finish(self)
330 self._defaults = value
332 def refresh(self) -> None:
333 """Refresh all in-memory state by querying the database.
335 This may be necessary to enable querying for entities added by other
336 registry instances after this one was constructed.
337 """
338 self.dimension_record_cache.reset()
339 with self._db.transaction():
340 self._managers.refresh()
342 def caching_context(self) -> contextlib.AbstractContextManager[None]:
343 """Return context manager that enables caching.
345 Returns
346 -------
347 manager
348 A context manager that enables client-side caching. Entering
349 the context returns `None`.
350 """
351 return self._managers.caching_context_manager()
353 @contextlib.contextmanager
354 def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
355 """Return a context manager that represents a transaction.
357 Parameters
358 ----------
359 savepoint : `bool`
360 Whether to issue a SAVEPOINT in the database.
362 Yields
363 ------
364 `None`
365 """
366 with self._db.transaction(savepoint=savepoint):
367 yield
369 def resetConnectionPool(self) -> None:
370 """Reset SQLAlchemy connection pool for `SqlRegistry` database.
372 This operation is useful when using registry with fork-based
373 multiprocessing. To use registry across fork boundary one has to make
374 sure that there are no currently active connections (no session or
375 transaction is in progress) and connection pool is reset using this
376 method. This method should be called by the child process immediately
377 after the fork.
378 """
379 self._db._engine.dispose()
381 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None:
382 """Add an opaque (to the `Registry`) table for use by a `Datastore` or
383 other data repository client.
385 Opaque table records can be added via `insertOpaqueData`, retrieved via
386 `fetchOpaqueData`, and removed via `deleteOpaqueData`.
388 Parameters
389 ----------
390 tableName : `str`
391 Logical name of the opaque table. This may differ from the
392 actual name used in the database by a prefix and/or suffix.
393 spec : `ddl.TableSpec`
394 Specification for the table to be added.
395 """
396 self._managers.opaque.register(tableName, spec)
398 @transactional
399 def insertOpaqueData(self, tableName: str, *data: dict) -> None:
400 """Insert records into an opaque table.
402 Parameters
403 ----------
404 tableName : `str`
405 Logical name of the opaque table. Must match the name used in a
406 previous call to `registerOpaqueTable`.
407 *data
408 Each additional positional argument is a dictionary that represents
409 a single row to be added.
410 """
411 self._managers.opaque[tableName].insert(*data)
413 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]:
414 """Retrieve records from an opaque table.
416 Parameters
417 ----------
418 tableName : `str`
419 Logical name of the opaque table. Must match the name used in a
420 previous call to `registerOpaqueTable`.
421 **where
422 Additional keyword arguments are interpreted as equality
423 constraints that restrict the returned rows (combined with AND);
424 keyword arguments are column names and values are the values they
425 must have.
427 Yields
428 ------
429 row : `dict`
430 A dictionary representing a single result row.
431 """
432 yield from self._managers.opaque[tableName].fetch(**where)
434 @transactional
435 def deleteOpaqueData(self, tableName: str, **where: Any) -> None:
436 """Remove records from an opaque table.
438 Parameters
439 ----------
440 tableName : `str`
441 Logical name of the opaque table. Must match the name used in a
442 previous call to `registerOpaqueTable`.
443 **where
444 Additional keyword arguments are interpreted as equality
445 constraints that restrict the deleted rows (combined with AND);
446 keyword arguments are column names and values are the values they
447 must have.
448 """
449 self._managers.opaque[tableName].delete(where.keys(), where)
451 def registerCollection(
452 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: str | None = None
453 ) -> bool:
454 """Add a new collection if one with the given name does not exist.
456 Parameters
457 ----------
458 name : `str`
459 The name of the collection to create.
460 type : `CollectionType`
461 Enum value indicating the type of collection to create.
462 doc : `str`, optional
463 Documentation string for the collection.
465 Returns
466 -------
467 registered : `bool`
468 Boolean indicating whether the collection was already registered
469 or was created by this call.
471 Notes
472 -----
473 This method cannot be called within transactions, as it needs to be
474 able to perform its own transaction to be concurrent.
475 """
476 _, registered = self._managers.collections.register(name, type, doc=doc)
477 return registered
479 def getCollectionType(self, name: str) -> CollectionType:
480 """Return an enumeration value indicating the type of the given
481 collection.
483 Parameters
484 ----------
485 name : `str`
486 The name of the collection.
488 Returns
489 -------
490 type : `CollectionType`
491 Enum value indicating the type of this collection.
493 Raises
494 ------
495 lsst.daf.butler.registry.MissingCollectionError
496 Raised if no collection with the given name exists.
497 """
498 return self._managers.collections.find(name).type
500 def get_collection_record(self, name: str) -> CollectionRecord:
501 """Return the record for this collection.
503 Parameters
504 ----------
505 name : `str`
506 Name of the collection for which the record is to be retrieved.
508 Returns
509 -------
510 record : `CollectionRecord`
511 The record for this collection.
512 """
513 return self._managers.collections.find(name)
515 def registerRun(self, name: str, doc: str | None = None) -> bool:
516 """Add a new run if one with the given name does not exist.
518 Parameters
519 ----------
520 name : `str`
521 The name of the run to create.
522 doc : `str`, optional
523 Documentation string for the collection.
525 Returns
526 -------
527 registered : `bool`
528 Boolean indicating whether a new run was registered. `False`
529 if it already existed.
531 Notes
532 -----
533 This method cannot be called within transactions, as it needs to be
534 able to perform its own transaction to be concurrent.
535 """
536 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc)
537 return registered
539 @transactional
540 def removeCollection(self, name: str) -> None:
541 """Remove the given collection from the registry.
543 Parameters
544 ----------
545 name : `str`
546 The name of the collection to remove.
548 Raises
549 ------
550 lsst.daf.butler.registry.MissingCollectionError
551 Raised if no collection with the given name exists.
552 sqlalchemy.exc.IntegrityError
553 Raised if the database rows associated with the collection are
554 still referenced by some other table, such as a dataset in a
555 datastore (for `~CollectionType.RUN` collections only) or a
556 `~CollectionType.CHAINED` collection of which this collection is
557 a child.
559 Notes
560 -----
561 If this is a `~CollectionType.RUN` collection, all datasets and quanta
562 in it will removed from the `Registry` database. This requires that
563 those datasets be removed (or at least trashed) from any datastores
564 that hold them first.
566 A collection may not be deleted as long as it is referenced by a
567 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must
568 be deleted or redefined first.
569 """
570 self._managers.collections.remove(name)
572 def getCollectionChain(self, parent: str) -> tuple[str, ...]:
573 """Return the child collections in a `~CollectionType.CHAINED`
574 collection.
576 Parameters
577 ----------
578 parent : `str`
579 Name of the chained collection. Must have already been added via
580 a call to `Registry.registerCollection`.
582 Returns
583 -------
584 children : `~collections.abc.Sequence` [ `str` ]
585 An ordered sequence of collection names that are searched when the
586 given chained collection is searched.
588 Raises
589 ------
590 lsst.daf.butler.registry.MissingCollectionError
591 Raised if ``parent`` does not exist in the `Registry`.
592 lsst.daf.butler.registry.CollectionTypeError
593 Raised if ``parent`` does not correspond to a
594 `~CollectionType.CHAINED` collection.
595 """
596 record = self._managers.collections.find(parent)
597 if record.type is not CollectionType.CHAINED:
598 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
599 assert isinstance(record, ChainedCollectionRecord)
600 return record.children
602 @transactional
603 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None:
604 """Define or redefine a `~CollectionType.CHAINED` collection.
606 Parameters
607 ----------
608 parent : `str`
609 Name of the chained collection. Must have already been added via
610 a call to `Registry.registerCollection`.
611 children : collection expression
612 An expression defining an ordered search of child collections,
613 generally an iterable of `str`; see
614 :ref:`daf_butler_collection_expressions` for more information.
615 flatten : `bool`, optional
616 If `True` (`False` is default), recursively flatten out any nested
617 `~CollectionType.CHAINED` collections in ``children`` first.
619 Raises
620 ------
621 lsst.daf.butler.registry.MissingCollectionError
622 Raised when any of the given collections do not exist in the
623 `Registry`.
624 lsst.daf.butler.registry.CollectionTypeError
625 Raised if ``parent`` does not correspond to a
626 `~CollectionType.CHAINED` collection.
627 CollectionCycleError
628 Raised if the given collections contains a cycle.
630 Notes
631 -----
632 If this function is called within a call to ``Butler.transaction``, it
633 will hold a lock that prevents other processes from modifying the
634 parent collection until the end of the transaction. Keep these
635 transactions short.
636 """
637 children = CollectionWildcard.from_expression(children).require_ordered()
638 if flatten:
639 children = self.queryCollections(children, flattenChains=True)
641 self._managers.collections.update_chain(parent, list(children), allow_use_in_caching_context=True)
643 def getCollectionParentChains(self, collection: str) -> set[str]:
644 """Return the CHAINED collections that directly contain the given one.
646 Parameters
647 ----------
648 collection : `str`
649 Name of the collection.
651 Returns
652 -------
653 chains : `set` of `str`
654 Set of `~CollectionType.CHAINED` collection names.
655 """
656 return self._managers.collections.getParentChains(self._managers.collections.find(collection).key)
658 def getCollectionDocumentation(self, collection: str) -> str | None:
659 """Retrieve the documentation string for a collection.
661 Parameters
662 ----------
663 collection : `str`
664 Name of the collection.
666 Returns
667 -------
668 docs : `str` or `None`
669 Docstring for the collection with the given name.
670 """
671 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key)
673 def setCollectionDocumentation(self, collection: str, doc: str | None) -> None:
674 """Set the documentation string for a collection.
676 Parameters
677 ----------
678 collection : `str`
679 Name of the collection.
680 doc : `str` or `None`
681 Docstring for the collection with the given name; will replace any
682 existing docstring. Passing `None` will remove any existing
683 docstring.
684 """
685 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc)
687 def getCollectionSummary(self, collection: str) -> CollectionSummary:
688 """Return a summary for the given collection.
690 Parameters
691 ----------
692 collection : `str`
693 Name of the collection for which a summary is to be retrieved.
695 Returns
696 -------
697 summary : `~lsst.daf.butler.registry.CollectionSummary`
698 Summary of the dataset types and governor dimension values in
699 this collection.
700 """
701 record = self._managers.collections.find(collection)
702 return self._managers.datasets.getCollectionSummary(record)
704 def registerDatasetType(self, datasetType: DatasetType) -> bool:
705 """Add a new `DatasetType` to the Registry.
707 It is not an error to register the same `DatasetType` twice.
709 Parameters
710 ----------
711 datasetType : `DatasetType`
712 The `DatasetType` to be added.
714 Returns
715 -------
716 inserted : `bool`
717 `True` if ``datasetType`` was inserted, `False` if an identical
718 existing `DatasetType` was found. Note that in either case the
719 DatasetType is guaranteed to be defined in the Registry
720 consistently with the given definition.
722 Raises
723 ------
724 ValueError
725 Raised if the dimensions or storage class are invalid.
726 lsst.daf.butler.registry.ConflictingDefinitionError
727 Raised if this `DatasetType` is already registered with a different
728 definition.
730 Notes
731 -----
732 This method cannot be called within transactions, as it needs to be
733 able to perform its own transaction to be concurrent.
734 """
735 return self._managers.datasets.register(datasetType)
737 def removeDatasetType(self, name: str | tuple[str, ...]) -> None:
738 """Remove the named `DatasetType` from the registry.
740 .. warning::
742 Registry implementations can cache the dataset type definitions.
743 This means that deleting the dataset type definition may result in
744 unexpected behavior from other butler processes that are active
745 that have not seen the deletion.
747 Parameters
748 ----------
749 name : `str` or `tuple` [`str`]
750 Name of the type to be removed or tuple containing a list of type
751 names to be removed. Wildcards are allowed.
753 Raises
754 ------
755 lsst.daf.butler.registry.OrphanedRecordError
756 Raised if an attempt is made to remove the dataset type definition
757 when there are already datasets associated with it.
759 Notes
760 -----
761 If the dataset type is not registered the method will return without
762 action.
763 """
764 for datasetTypeExpression in ensure_iterable(name):
765 # Catch any warnings from the caller specifying a component
766 # dataset type. This will result in an error later but the
767 # warning could be confusing when the caller is not querying
768 # anything.
769 with warnings.catch_warnings():
770 warnings.simplefilter("ignore", category=FutureWarning)
771 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression))
772 if not datasetTypes:
773 _LOG.info("Dataset type %r not defined", datasetTypeExpression)
774 else:
775 for datasetType in datasetTypes:
776 self._managers.datasets.remove(datasetType.name)
777 _LOG.info("Removed dataset type %r", datasetType.name)
779 def getDatasetType(self, name: str) -> DatasetType:
780 """Get the `DatasetType`.
782 Parameters
783 ----------
784 name : `str`
785 Name of the type.
787 Returns
788 -------
789 type : `DatasetType`
790 The `DatasetType` associated with the given name.
792 Raises
793 ------
794 lsst.daf.butler.registry.MissingDatasetTypeError
795 Raised if the requested dataset type has not been registered.
797 Notes
798 -----
799 This method handles component dataset types automatically, though most
800 other registry operations do not.
801 """
802 parent_name, component = DatasetType.splitDatasetTypeName(name)
803 storage = self._managers.datasets[parent_name]
804 if component is None:
805 return storage.datasetType
806 else:
807 return storage.datasetType.makeComponentDatasetType(component)
809 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool:
810 """Test whether the given dataset ID generation mode is supported by
811 `insertDatasets`.
813 Parameters
814 ----------
815 mode : `DatasetIdGenEnum`
816 Enum value for the mode to test.
818 Returns
819 -------
820 supported : `bool`
821 Whether the given mode is supported.
822 """
823 return self._managers.datasets.supportsIdGenerationMode(mode)
825 def findDataset(
826 self,
827 datasetType: DatasetType | str,
828 dataId: DataId | None = None,
829 *,
830 collections: CollectionArgType | None = None,
831 timespan: Timespan | None = None,
832 datastore_records: bool = False,
833 **kwargs: Any,
834 ) -> DatasetRef | None:
835 """Find a dataset given its `DatasetType` and data ID.
837 This can be used to obtain a `DatasetRef` that permits the dataset to
838 be read from a `Datastore`. If the dataset is a component and can not
839 be found using the provided dataset type, a dataset ref for the parent
840 will be returned instead but with the correct dataset type.
842 Parameters
843 ----------
844 datasetType : `DatasetType` or `str`
845 A `DatasetType` or the name of one. If this is a `DatasetType`
846 instance, its storage class will be respected and propagated to
847 the output, even if it differs from the dataset type definition
848 in the registry, as long as the storage classes are convertible.
849 dataId : `dict` or `DataCoordinate`, optional
850 A `dict`-like object containing the `Dimension` links that identify
851 the dataset within a collection.
852 collections : collection expression, optional
853 An expression that fully or partially identifies the collections to
854 search for the dataset; see
855 :ref:`daf_butler_collection_expressions` for more information.
856 Defaults to ``self.defaults.collections``.
857 timespan : `Timespan`, optional
858 A timespan that the validity range of the dataset must overlap.
859 If not provided, any `~CollectionType.CALIBRATION` collections
860 matched by the ``collections`` argument will not be searched.
861 datastore_records : `bool`, optional
862 Whether to attach datastore records to the `DatasetRef`.
863 **kwargs
864 Additional keyword arguments passed to
865 `DataCoordinate.standardize` to convert ``dataId`` to a true
866 `DataCoordinate` or augment an existing one.
868 Returns
869 -------
870 ref : `DatasetRef`
871 A reference to the dataset, or `None` if no matching Dataset
872 was found.
874 Raises
875 ------
876 lsst.daf.butler.registry.NoDefaultCollectionError
877 Raised if ``collections`` is `None` and
878 ``self.defaults.collections`` is `None`.
879 LookupError
880 Raised if one or more data ID keys are missing.
881 lsst.daf.butler.registry.MissingDatasetTypeError
882 Raised if the dataset type does not exist.
883 lsst.daf.butler.registry.MissingCollectionError
884 Raised if any of ``collections`` does not exist in the registry.
886 Notes
887 -----
888 This method simply returns `None` and does not raise an exception even
889 when the set of collections searched is intrinsically incompatible with
890 the dataset type, e.g. if ``datasetType.isCalibration() is False``, but
891 only `~CollectionType.CALIBRATION` collections are being searched.
892 This may make it harder to debug some lookup failures, but the behavior
893 is intentional; we consider it more important that failed searches are
894 reported consistently, regardless of the reason, and that adding
895 additional collections that do not contain a match to the search path
896 never changes the behavior.
898 This method handles component dataset types automatically, though most
899 other registry operations do not.
900 """
901 if collections is None:
902 if not self.defaults.collections:
903 raise NoDefaultCollectionError(
904 "No collections provided to findDataset, and no defaults from registry construction."
905 )
906 collections = self.defaults.collections
907 backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache)
908 with backend.caching_context():
909 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True)
910 if collection_wildcard.empty():
911 return None
912 matched_collections = backend.resolve_collection_wildcard(collection_wildcard)
913 resolved_dataset_type = backend.resolve_single_dataset_type_wildcard(datasetType)
914 dataId = DataCoordinate.standardize(
915 dataId,
916 dimensions=resolved_dataset_type.dimensions,
917 universe=self.dimensions,
918 defaults=self.defaults.dataId,
919 **kwargs,
920 )
921 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.dimensions.governors}
922 (filtered_collections,) = backend.filter_dataset_collections(
923 [resolved_dataset_type],
924 matched_collections,
925 governor_constraints=governor_constraints,
926 ).values()
927 if not filtered_collections:
928 return None
929 if timespan is None:
930 filtered_collections = [
931 collection_record
932 for collection_record in filtered_collections
933 if collection_record.type is not CollectionType.CALIBRATION
934 ]
935 if filtered_collections:
936 requested_columns = {"dataset_id", "run", "collection"}
937 with backend.context() as context:
938 predicate = context.make_data_coordinate_predicate(
939 dataId.subset(resolved_dataset_type.dimensions), full=False
940 )
941 if timespan is not None:
942 requested_columns.add("timespan")
943 predicate = predicate.logical_and(
944 context.make_timespan_overlap_predicate(
945 DatasetColumnTag(resolved_dataset_type.name, "timespan"), timespan
946 )
947 )
948 relation = backend.make_dataset_query_relation(
949 resolved_dataset_type, filtered_collections, requested_columns, context
950 ).with_rows_satisfying(predicate)
951 rows = list(context.fetch_iterable(relation))
952 else:
953 rows = []
954 if not rows:
955 return None
956 elif len(rows) == 1:
957 best_row = rows[0]
958 else:
959 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)}
960 collection_tag = DatasetColumnTag(resolved_dataset_type.name, "collection")
961 row_iter = iter(rows)
962 best_row = next(row_iter)
963 best_rank = rank_by_collection_key[best_row[collection_tag]]
964 have_tie = False
965 for row in row_iter:
966 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank:
967 best_row = row
968 best_rank = rank
969 have_tie = False
970 elif rank == best_rank:
971 have_tie = True
972 assert timespan is not None, "Rank ties should be impossible given DB constraints."
973 if have_tie:
974 raise CalibrationLookupError(
975 f"Ambiguous calibration lookup for {resolved_dataset_type.name} in collections "
976 f"{collection_wildcard.strings} with timespan {timespan}."
977 )
978 reader = queries.DatasetRefReader(
979 resolved_dataset_type,
980 translate_collection=lambda k: self._managers.collections[k].name,
981 )
982 ref = reader.read(best_row, data_id=dataId)
983 if datastore_records:
984 ref = self.get_datastore_records(ref)
986 return ref
988 @transactional
989 def insertDatasets(
990 self,
991 datasetType: DatasetType | str,
992 dataIds: Iterable[DataId],
993 run: str | None = None,
994 expand: bool = True,
995 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
996 ) -> list[DatasetRef]:
997 """Insert one or more datasets into the `Registry`.
999 This always adds new datasets; to associate existing datasets with
1000 a new collection, use ``associate``.
1002 Parameters
1003 ----------
1004 datasetType : `DatasetType` or `str`
1005 A `DatasetType` or the name of one.
1006 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate`
1007 Dimension-based identifiers for the new datasets.
1008 run : `str`, optional
1009 The name of the run that produced the datasets. Defaults to
1010 ``self.defaults.run``.
1011 expand : `bool`, optional
1012 If `True` (default), expand data IDs as they are inserted. This is
1013 necessary in general to allow datastore to generate file templates,
1014 but it may be disabled if the caller can guarantee this is
1015 unnecessary.
1016 idGenerationMode : `DatasetIdGenEnum`, optional
1017 Specifies option for generating dataset IDs. By default unique IDs
1018 are generated for each inserted dataset.
1020 Returns
1021 -------
1022 refs : `list` of `DatasetRef`
1023 Resolved `DatasetRef` instances for all given data IDs (in the same
1024 order).
1026 Raises
1027 ------
1028 lsst.daf.butler.registry.DatasetTypeError
1029 Raised if ``datasetType`` is not known to registry.
1030 lsst.daf.butler.registry.CollectionTypeError
1031 Raised if ``run`` collection type is not `~CollectionType.RUN`.
1032 lsst.daf.butler.registry.NoDefaultCollectionError
1033 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`.
1034 lsst.daf.butler.registry.ConflictingDefinitionError
1035 If a dataset with the same dataset type and data ID as one of those
1036 given already exists in ``run``.
1037 lsst.daf.butler.registry.MissingCollectionError
1038 Raised if ``run`` does not exist in the registry.
1039 """
1040 if isinstance(datasetType, DatasetType):
1041 storage = self._managers.datasets.find(datasetType.name)
1042 if storage is None:
1043 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
1044 else:
1045 storage = self._managers.datasets.find(datasetType)
1046 if storage is None:
1047 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.")
1048 if run is None:
1049 if self.defaults.run is None:
1050 raise NoDefaultCollectionError(
1051 "No run provided to insertDatasets, and no default from registry construction."
1052 )
1053 run = self.defaults.run
1054 runRecord = self._managers.collections.find(run)
1055 if runRecord.type is not CollectionType.RUN:
1056 raise CollectionTypeError(
1057 f"Given collection is of type {runRecord.type.name}; RUN collection required."
1058 )
1059 assert isinstance(runRecord, RunRecord)
1060 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
1061 if expand:
1062 expandedDataIds = [
1063 self.expandDataId(dataId, dimensions=storage.datasetType.dimensions)
1064 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs")
1065 ]
1066 else:
1067 expandedDataIds = [
1068 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds
1069 ]
1070 try:
1071 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode))
1072 if self._managers.obscore:
1073 context = queries.SqlQueryContext(self._db, self._managers.column_types)
1074 self._managers.obscore.add_datasets(refs, context)
1075 except sqlalchemy.exc.IntegrityError as err:
1076 raise ConflictingDefinitionError(
1077 "A database constraint failure was triggered by inserting "
1078 f"one or more datasets of type {storage.datasetType} into "
1079 f"collection '{run}'. "
1080 "This probably means a dataset with the same data ID "
1081 "and dataset type already exists, but it may also mean a "
1082 "dimension row is missing."
1083 ) from err
1084 return refs
1086 @transactional
1087 def _importDatasets(
1088 self,
1089 datasets: Iterable[DatasetRef],
1090 expand: bool = True,
1091 ) -> list[DatasetRef]:
1092 """Import one or more datasets into the `Registry`.
1094 Difference from `insertDatasets` method is that this method accepts
1095 `DatasetRef` instances which should already be resolved and have a
1096 dataset ID. If registry supports globally-unique dataset IDs (e.g.
1097 `uuid.UUID`) then datasets which already exist in the registry will be
1098 ignored if imported again.
1100 Parameters
1101 ----------
1102 datasets : `~collections.abc.Iterable` of `DatasetRef`
1103 Datasets to be inserted. All `DatasetRef` instances must have
1104 identical ``datasetType`` and ``run`` attributes. ``run``
1105 attribute can be `None` and defaults to ``self.defaults.run``.
1106 Datasets can specify ``id`` attribute which will be used for
1107 inserted datasets. All dataset IDs must have the same type
1108 (`int` or `uuid.UUID`), if type of dataset IDs does not match
1109 configured backend then IDs will be ignored and new IDs will be
1110 generated by backend.
1111 expand : `bool`, optional
1112 If `True` (default), expand data IDs as they are inserted. This is
1113 necessary in general, but it may be disabled if the caller can
1114 guarantee this is unnecessary.
1116 Returns
1117 -------
1118 refs : `list` of `DatasetRef`
1119 Resolved `DatasetRef` instances for all given data IDs (in the same
1120 order). If any of ``datasets`` has an ID which already exists in
1121 the database then it will not be inserted or updated, but a
1122 resolved `DatasetRef` will be returned for it in any case.
1124 Raises
1125 ------
1126 lsst.daf.butler.registry.NoDefaultCollectionError
1127 Raised if ``run`` is `None` and ``self.defaults.run`` is `None`.
1128 lsst.daf.butler.registry.DatasetTypeError
1129 Raised if datasets correspond to more than one dataset type or
1130 dataset type is not known to registry.
1131 lsst.daf.butler.registry.ConflictingDefinitionError
1132 If a dataset with the same dataset type and data ID as one of those
1133 given already exists in ``run``.
1134 lsst.daf.butler.registry.MissingCollectionError
1135 Raised if ``run`` does not exist in the registry.
1137 Notes
1138 -----
1139 This method is considered package-private and internal to Butler
1140 implementation. Clients outside daf_butler package should not use this
1141 method.
1142 """
1143 datasets = list(datasets)
1144 if not datasets:
1145 # nothing to do
1146 return []
1148 # find dataset type
1149 datasetTypes = {dataset.datasetType for dataset in datasets}
1150 if len(datasetTypes) != 1:
1151 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}")
1152 datasetType = datasetTypes.pop()
1154 # get storage handler for this dataset type
1155 storage = self._managers.datasets.find(datasetType.name)
1156 if storage is None:
1157 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
1159 # find run name
1160 runs = {dataset.run for dataset in datasets}
1161 if len(runs) != 1:
1162 raise ValueError(f"Multiple run names in input datasets: {runs}")
1163 run = runs.pop()
1165 runRecord = self._managers.collections.find(run)
1166 if runRecord.type is not CollectionType.RUN:
1167 raise CollectionTypeError(
1168 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};"
1169 " RUN collection required."
1170 )
1171 assert isinstance(runRecord, RunRecord)
1173 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
1174 if expand:
1175 expandedDatasets = [
1176 dataset.expanded(self.expandDataId(dataset.dataId, dimensions=storage.datasetType.dimensions))
1177 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs")
1178 ]
1179 else:
1180 expandedDatasets = [
1181 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True)
1182 for dataset in datasets
1183 ]
1185 try:
1186 refs = list(storage.import_(runRecord, expandedDatasets))
1187 if self._managers.obscore:
1188 context = queries.SqlQueryContext(self._db, self._managers.column_types)
1189 self._managers.obscore.add_datasets(refs, context)
1190 except sqlalchemy.exc.IntegrityError as err:
1191 raise ConflictingDefinitionError(
1192 "A database constraint failure was triggered by inserting "
1193 f"one or more datasets of type {storage.datasetType} into "
1194 f"collection '{run}'. "
1195 "This probably means a dataset with the same data ID "
1196 "and dataset type already exists, but it may also mean a "
1197 "dimension row is missing."
1198 ) from err
1199 # Check that imported dataset IDs match the input
1200 for imported_ref, input_ref in zip(refs, datasets, strict=True):
1201 if imported_ref.id != input_ref.id:
1202 raise RegistryConsistencyError(
1203 "Imported dataset ID differs from input dataset ID, "
1204 f"input ref: {input_ref}, imported ref: {imported_ref}"
1205 )
1206 return refs
1208 def getDataset(self, id: DatasetId) -> DatasetRef | None:
1209 """Retrieve a Dataset entry.
1211 Parameters
1212 ----------
1213 id : `DatasetId`
1214 The unique identifier for the dataset.
1216 Returns
1217 -------
1218 ref : `DatasetRef` or `None`
1219 A ref to the Dataset, or `None` if no matching Dataset
1220 was found.
1221 """
1222 return self._managers.datasets.getDatasetRef(id)
1224 @transactional
1225 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
1226 """Remove datasets from the Registry.
1228 The datasets will be removed unconditionally from all collections, and
1229 any `Quantum` that consumed this dataset will instead be marked with
1230 having a NULL input. `Datastore` records will *not* be deleted; the
1231 caller is responsible for ensuring that the dataset has already been
1232 removed from all Datastores.
1234 Parameters
1235 ----------
1236 refs : `~collections.abc.Iterable` [`DatasetRef`]
1237 References to the datasets to be removed. Must include a valid
1238 ``id`` attribute, and should be considered invalidated upon return.
1240 Raises
1241 ------
1242 lsst.daf.butler.AmbiguousDatasetError
1243 Raised if any ``ref.id`` is `None`.
1244 lsst.daf.butler.registry.OrphanedRecordError
1245 Raised if any dataset is still present in any `Datastore`.
1246 """
1247 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG)
1248 for datasetType, refsForType in progress.iter_item_chunks(
1249 DatasetRef.iter_by_type(refs), desc="Removing datasets by type"
1250 ):
1251 storage = self._managers.datasets[datasetType.name]
1252 try:
1253 storage.delete(refsForType)
1254 except sqlalchemy.exc.IntegrityError as err:
1255 raise OrphanedRecordError(
1256 "One or more datasets is still present in one or more Datastores."
1257 ) from err
1259 @transactional
1260 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
1261 """Add existing datasets to a `~CollectionType.TAGGED` collection.
1263 If a DatasetRef with the same exact ID is already in a collection
1264 nothing is changed. If a `DatasetRef` with the same `DatasetType` and
1265 data ID but with different ID exists in the collection,
1266 `~lsst.daf.butler.registry.ConflictingDefinitionError` is raised.
1268 Parameters
1269 ----------
1270 collection : `str`
1271 Indicates the collection the datasets should be associated with.
1272 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
1273 An iterable of resolved `DatasetRef` instances that already exist
1274 in this `Registry`.
1276 Raises
1277 ------
1278 lsst.daf.butler.registry.ConflictingDefinitionError
1279 If a Dataset with the given `DatasetRef` already exists in the
1280 given collection.
1281 lsst.daf.butler.registry.MissingCollectionError
1282 Raised if ``collection`` does not exist in the registry.
1283 lsst.daf.butler.registry.CollectionTypeError
1284 Raise adding new datasets to the given ``collection`` is not
1285 allowed.
1286 """
1287 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG)
1288 collectionRecord = self._managers.collections.find(collection)
1289 if collectionRecord.type is not CollectionType.TAGGED:
1290 raise CollectionTypeError(
1291 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED."
1292 )
1293 for datasetType, refsForType in progress.iter_item_chunks(
1294 DatasetRef.iter_by_type(refs), desc="Associating datasets by type"
1295 ):
1296 storage = self._managers.datasets[datasetType.name]
1297 try:
1298 storage.associate(collectionRecord, refsForType)
1299 if self._managers.obscore:
1300 # If a TAGGED collection is being monitored by ObsCore
1301 # manager then we may need to save the dataset.
1302 context = queries.SqlQueryContext(self._db, self._managers.column_types)
1303 self._managers.obscore.associate(refsForType, collectionRecord, context)
1304 except sqlalchemy.exc.IntegrityError as err:
1305 raise ConflictingDefinitionError(
1306 f"Constraint violation while associating dataset of type {datasetType.name} with "
1307 f"collection {collection}. This probably means that one or more datasets with the same "
1308 "dataset type and data ID already exist in the collection, but it may also indicate "
1309 "that the datasets do not exist."
1310 ) from err
1312 @transactional
1313 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
1314 """Remove existing datasets from a `~CollectionType.TAGGED` collection.
1316 ``collection`` and ``ref`` combinations that are not currently
1317 associated are silently ignored.
1319 Parameters
1320 ----------
1321 collection : `str`
1322 The collection the datasets should no longer be associated with.
1323 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
1324 An iterable of resolved `DatasetRef` instances that already exist
1325 in this `Registry`.
1327 Raises
1328 ------
1329 lsst.daf.butler.AmbiguousDatasetError
1330 Raised if any of the given dataset references is unresolved.
1331 lsst.daf.butler.registry.MissingCollectionError
1332 Raised if ``collection`` does not exist in the registry.
1333 lsst.daf.butler.registry.CollectionTypeError
1334 Raise adding new datasets to the given ``collection`` is not
1335 allowed.
1336 """
1337 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG)
1338 collectionRecord = self._managers.collections.find(collection)
1339 if collectionRecord.type is not CollectionType.TAGGED:
1340 raise CollectionTypeError(
1341 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED."
1342 )
1343 for datasetType, refsForType in progress.iter_item_chunks(
1344 DatasetRef.iter_by_type(refs), desc="Disassociating datasets by type"
1345 ):
1346 storage = self._managers.datasets[datasetType.name]
1347 storage.disassociate(collectionRecord, refsForType)
1348 if self._managers.obscore:
1349 self._managers.obscore.disassociate(refsForType, collectionRecord)
1351 @transactional
1352 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None:
1353 """Associate one or more datasets with a calibration collection and a
1354 validity range within it.
1356 Parameters
1357 ----------
1358 collection : `str`
1359 The name of an already-registered `~CollectionType.CALIBRATION`
1360 collection.
1361 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
1362 Datasets to be associated.
1363 timespan : `Timespan`
1364 The validity range for these datasets within the collection.
1366 Raises
1367 ------
1368 lsst.daf.butler.AmbiguousDatasetError
1369 Raised if any of the given `DatasetRef` instances is unresolved.
1370 lsst.daf.butler.registry.ConflictingDefinitionError
1371 Raised if the collection already contains a different dataset with
1372 the same `DatasetType` and data ID and an overlapping validity
1373 range.
1374 lsst.daf.butler.registry.CollectionTypeError
1375 Raised if ``collection`` is not a `~CollectionType.CALIBRATION`
1376 collection or if one or more datasets are of a dataset type for
1377 which `DatasetType.isCalibration` returns `False`.
1378 """
1379 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG)
1380 collectionRecord = self._managers.collections.find(collection)
1381 for datasetType, refsForType in progress.iter_item_chunks(
1382 DatasetRef.iter_by_type(refs), desc="Certifying datasets by type"
1383 ):
1384 storage = self._managers.datasets[datasetType.name]
1385 storage.certify(
1386 collectionRecord,
1387 refsForType,
1388 timespan,
1389 context=queries.SqlQueryContext(self._db, self._managers.column_types),
1390 )
1392 @transactional
1393 def decertify(
1394 self,
1395 collection: str,
1396 datasetType: str | DatasetType,
1397 timespan: Timespan,
1398 *,
1399 dataIds: Iterable[DataId] | None = None,
1400 ) -> None:
1401 """Remove or adjust datasets to clear a validity range within a
1402 calibration collection.
1404 Parameters
1405 ----------
1406 collection : `str`
1407 The name of an already-registered `~CollectionType.CALIBRATION`
1408 collection.
1409 datasetType : `str` or `DatasetType`
1410 Name or `DatasetType` instance for the datasets to be decertified.
1411 timespan : `Timespan`, optional
1412 The validity range to remove datasets from within the collection.
1413 Datasets that overlap this range but are not contained by it will
1414 have their validity ranges adjusted to not overlap it, which may
1415 split a single dataset validity range into two.
1416 dataIds : iterable [`dict` or `DataCoordinate`], optional
1417 Data IDs that should be decertified within the given validity range
1418 If `None`, all data IDs for ``self.datasetType`` will be
1419 decertified.
1421 Raises
1422 ------
1423 lsst.daf.butler.registry.CollectionTypeError
1424 Raised if ``collection`` is not a `~CollectionType.CALIBRATION`
1425 collection or if ``datasetType.isCalibration() is False``.
1426 """
1427 collectionRecord = self._managers.collections.find(collection)
1428 if isinstance(datasetType, str):
1429 storage = self._managers.datasets[datasetType]
1430 else:
1431 storage = self._managers.datasets[datasetType.name]
1432 standardizedDataIds = None
1433 if dataIds is not None:
1434 standardizedDataIds = [
1435 DataCoordinate.standardize(d, dimensions=storage.datasetType.dimensions) for d in dataIds
1436 ]
1437 storage.decertify(
1438 collectionRecord,
1439 timespan,
1440 dataIds=standardizedDataIds,
1441 context=queries.SqlQueryContext(self._db, self._managers.column_types),
1442 )
1444 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
1445 """Return an object that allows a new `Datastore` instance to
1446 communicate with this `Registry`.
1448 Returns
1449 -------
1450 manager : `~.interfaces.DatastoreRegistryBridgeManager`
1451 Object that mediates communication between this `Registry` and its
1452 associated datastores.
1453 """
1454 return self._managers.datastores
1456 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
1457 """Retrieve datastore locations for a given dataset.
1459 Parameters
1460 ----------
1461 ref : `DatasetRef`
1462 A reference to the dataset for which to retrieve storage
1463 information.
1465 Returns
1466 -------
1467 datastores : `~collections.abc.Iterable` [ `str` ]
1468 All the matching datastores holding this dataset.
1470 Raises
1471 ------
1472 lsst.daf.butler.AmbiguousDatasetError
1473 Raised if ``ref.id`` is `None`.
1474 """
1475 return self._managers.datastores.findDatastores(ref)
1477 def expandDataId(
1478 self,
1479 dataId: DataId | None = None,
1480 *,
1481 dimensions: Iterable[str] | DimensionGroup | DimensionGraph | None = None,
1482 graph: DimensionGraph | None = None,
1483 records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None,
1484 withDefaults: bool = True,
1485 **kwargs: Any,
1486 ) -> DataCoordinate:
1487 """Expand a dimension-based data ID to include additional information.
1489 Parameters
1490 ----------
1491 dataId : `DataCoordinate` or `dict`, optional
1492 Data ID to be expanded; augmented and overridden by ``kwargs``.
1493 dimensions : `~collections.abc.Iterable` [ `str` ], \
1494 `DimensionGroup`, or `DimensionGraph`, optional
1495 The dimensions to be identified by the new `DataCoordinate`.
1496 If not provided, will be inferred from the keys of ``mapping`` and
1497 ``**kwargs``, and ``universe`` must be provided unless ``mapping``
1498 is already a `DataCoordinate`.
1499 graph : `DimensionGraph`, optional
1500 Like ``dimensions``, but as a ``DimensionGraph`` instance. Ignored
1501 if ``dimensions`` is provided. Deprecated and will be removed
1502 after v27.
1503 records : `~collections.abc.Mapping` [`str`, `DimensionRecord`], \
1504 optional
1505 Dimension record data to use before querying the database for that
1506 data, keyed by element name.
1507 withDefaults : `bool`, optional
1508 Utilize ``self.defaults.dataId`` to fill in missing governor
1509 dimension key-value pairs. Defaults to `True` (i.e. defaults are
1510 used).
1511 **kwargs
1512 Additional keywords are treated like additional key-value pairs for
1513 ``dataId``, extending and overriding.
1515 Returns
1516 -------
1517 expanded : `DataCoordinate`
1518 A data ID that includes full metadata for all of the dimensions it
1519 identifies, i.e. guarantees that ``expanded.hasRecords()`` and
1520 ``expanded.hasFull()`` both return `True`.
1522 Raises
1523 ------
1524 lsst.daf.butler.registry.DataIdError
1525 Raised when ``dataId`` or keyword arguments specify unknown
1526 dimensions or values, or when a resulting data ID contains
1527 contradictory key-value pairs, according to dimension
1528 relationships.
1530 Notes
1531 -----
1532 This method cannot be relied upon to reject invalid data ID values
1533 for dimensions that do actually not have any record columns. For
1534 efficiency reasons the records for these dimensions (which have only
1535 dimension key values that are given by the caller) may be constructed
1536 directly rather than obtained from the registry database.
1537 """
1538 if not withDefaults:
1539 defaults = None
1540 else:
1541 defaults = self.defaults.dataId
1542 standardized = DataCoordinate.standardize(
1543 dataId,
1544 graph=graph,
1545 dimensions=dimensions,
1546 universe=self.dimensions,
1547 defaults=defaults,
1548 **kwargs,
1549 )
1550 if standardized.hasRecords():
1551 return standardized
1552 if records is None:
1553 records = {}
1554 elif isinstance(records, NamedKeyMapping):
1555 records = records.byName()
1556 else:
1557 records = dict(records)
1558 if isinstance(dataId, DataCoordinate) and dataId.hasRecords():
1559 for element_name in dataId.dimensions.elements:
1560 records[element_name] = dataId.records[element_name]
1561 keys = dict(standardized.mapping)
1562 for element_name in standardized.dimensions.lookup_order:
1563 element = self.dimensions[element_name]
1564 record = records.get(element_name, ...) # Use ... to mean not found; None might mean NULL
1565 if record is ...:
1566 if element_name in self.dimensions.dimensions.names and keys.get(element_name) is None:
1567 if element_name in standardized.dimensions.required:
1568 raise DimensionNameError(
1569 f"No value or null value for required dimension {element_name}."
1570 )
1571 keys[element_name] = None
1572 record = None
1573 else:
1574 record = self._managers.dimensions.fetch_one(
1575 element_name,
1576 DataCoordinate.standardize(keys, dimensions=element.minimal_group),
1577 self.dimension_record_cache,
1578 )
1579 records[element_name] = record
1580 if record is not None:
1581 for d in element.implied:
1582 value = getattr(record, d.name)
1583 if keys.setdefault(d.name, value) != value:
1584 raise InconsistentDataIdError(
1585 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, "
1586 f"but {element_name} implies {d.name}={value!r}."
1587 )
1588 else:
1589 if element_name in standardized.dimensions.required:
1590 raise DataIdValueError(
1591 f"Could not fetch record for required dimension {element.name} via keys {keys}."
1592 )
1593 if element.defines_relationships:
1594 raise InconsistentDataIdError(
1595 f"Could not fetch record for element {element_name} via keys {keys}, "
1596 "but it is marked as defining relationships; this means one or more dimensions are "
1597 "have inconsistent values.",
1598 )
1599 for d in element.implied:
1600 keys.setdefault(d.name, None)
1601 records.setdefault(d.name, None)
1602 return DataCoordinate.standardize(keys, dimensions=standardized.dimensions).expanded(records=records)
1604 def insertDimensionData(
1605 self,
1606 element: DimensionElement | str,
1607 *data: Mapping[str, Any] | DimensionRecord,
1608 conform: bool = True,
1609 replace: bool = False,
1610 skip_existing: bool = False,
1611 ) -> None:
1612 """Insert one or more dimension records into the database.
1614 Parameters
1615 ----------
1616 element : `DimensionElement` or `str`
1617 The `DimensionElement` or name thereof that identifies the table
1618 records will be inserted into.
1619 *data : `dict` or `DimensionRecord`
1620 One or more records to insert.
1621 conform : `bool`, optional
1622 If `False` (`True` is default) perform no checking or conversions,
1623 and assume that ``element`` is a `DimensionElement` instance and
1624 ``data`` is a one or more `DimensionRecord` instances of the
1625 appropriate subclass.
1626 replace : `bool`, optional
1627 If `True` (`False` is default), replace existing records in the
1628 database if there is a conflict.
1629 skip_existing : `bool`, optional
1630 If `True` (`False` is default), skip insertion if a record with
1631 the same primary key values already exists. Unlike
1632 `syncDimensionData`, this will not detect when the given record
1633 differs from what is in the database, and should not be used when
1634 this is a concern.
1635 """
1636 if isinstance(element, str):
1637 element = self.dimensions[element]
1638 if conform:
1639 records = [
1640 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data
1641 ]
1642 else:
1643 # Ignore typing since caller said to trust them with conform=False.
1644 records = data # type: ignore
1645 if element.name in self.dimension_record_cache:
1646 self.dimension_record_cache.reset()
1647 self._managers.dimensions.insert(
1648 element,
1649 *records,
1650 replace=replace,
1651 skip_existing=skip_existing,
1652 )
1654 def syncDimensionData(
1655 self,
1656 element: DimensionElement | str,
1657 row: Mapping[str, Any] | DimensionRecord,
1658 conform: bool = True,
1659 update: bool = False,
1660 ) -> bool | dict[str, Any]:
1661 """Synchronize the given dimension record with the database, inserting
1662 if it does not already exist and comparing values if it does.
1664 Parameters
1665 ----------
1666 element : `DimensionElement` or `str`
1667 The `DimensionElement` or name thereof that identifies the table
1668 records will be inserted into.
1669 row : `dict` or `DimensionRecord`
1670 The record to insert.
1671 conform : `bool`, optional
1672 If `False` (`True` is default) perform no checking or conversions,
1673 and assume that ``element`` is a `DimensionElement` instance and
1674 ``data`` is a `DimensionRecord` instances of the appropriate
1675 subclass.
1676 update : `bool`, optional
1677 If `True` (`False` is default), update the existing record in the
1678 database if there is a conflict.
1680 Returns
1681 -------
1682 inserted_or_updated : `bool` or `dict`
1683 `True` if a new row was inserted, `False` if no changes were
1684 needed, or a `dict` mapping updated column names to their old
1685 values if an update was performed (only possible if
1686 ``update=True``).
1688 Raises
1689 ------
1690 lsst.daf.butler.registry.ConflictingDefinitionError
1691 Raised if the record exists in the database (according to primary
1692 key lookup) but is inconsistent with the given one.
1693 """
1694 if conform:
1695 if isinstance(element, str):
1696 element = self.dimensions[element]
1697 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row)
1698 else:
1699 # Ignore typing since caller said to trust them with conform=False.
1700 record = row # type: ignore
1701 if record.definition.name in self.dimension_record_cache:
1702 self.dimension_record_cache.reset()
1703 return self._managers.dimensions.sync(record, update=update)
1705 def queryDatasetTypes(
1706 self,
1707 expression: Any = ...,
1708 *,
1709 components: bool | _Marker = _DefaultMarker,
1710 missing: list[str] | None = None,
1711 ) -> Iterable[DatasetType]:
1712 """Iterate over the dataset types whose names match an expression.
1714 Parameters
1715 ----------
1716 expression : dataset type expression, optional
1717 An expression that fully or partially identifies the dataset types
1718 to return, such as a `str`, `re.Pattern`, or iterable thereof.
1719 ``...`` can be used to return all dataset types, and is the
1720 default. See :ref:`daf_butler_dataset_type_expressions` for more
1721 information.
1722 components : `bool`, optional
1723 Must be `False`. Provided only for backwards compatibility. After
1724 v27 this argument will be removed entirely.
1725 missing : `list` of `str`, optional
1726 String dataset type names that were explicitly given (i.e. not
1727 regular expression patterns) but not found will be appended to this
1728 list, if it is provided.
1730 Returns
1731 -------
1732 dataset_types : `~collections.abc.Iterable` [ `DatasetType`]
1733 An `~collections.abc.Iterable` of `DatasetType` instances whose
1734 names match ``expression``.
1736 Raises
1737 ------
1738 lsst.daf.butler.registry.DatasetTypeExpressionError
1739 Raised when ``expression`` is invalid.
1740 """
1741 if components is not _DefaultMarker:
1742 if components is not False:
1743 raise DatasetTypeError(
1744 "Dataset component queries are no longer supported by Registry. Use "
1745 "DatasetType methods to obtain components from parent dataset types instead."
1746 )
1747 else:
1748 warnings.warn(
1749 "The components parameter is ignored. It will be removed after v27.",
1750 category=FutureWarning,
1751 stacklevel=find_outside_stacklevel("lsst.daf.butler"),
1752 )
1753 wildcard = DatasetTypeWildcard.from_expression(expression)
1754 return self._managers.datasets.resolve_wildcard(wildcard, missing=missing)
1756 def queryCollections(
1757 self,
1758 expression: Any = ...,
1759 datasetType: DatasetType | None = None,
1760 collectionTypes: Iterable[CollectionType] | CollectionType = CollectionType.all(),
1761 flattenChains: bool = False,
1762 includeChains: bool | None = None,
1763 ) -> Sequence[str]:
1764 """Iterate over the collections whose names match an expression.
1766 Parameters
1767 ----------
1768 expression : collection expression, optional
1769 An expression that identifies the collections to return, such as
1770 a `str` (for full matches or partial matches via globs),
1771 `re.Pattern` (for partial matches), or iterable thereof. ``...``
1772 can be used to return all collections, and is the default.
1773 See :ref:`daf_butler_collection_expressions` for more information.
1774 datasetType : `DatasetType`, optional
1775 If provided, only yield collections that may contain datasets of
1776 this type. This is a conservative approximation in general; it may
1777 yield collections that do not have any such datasets.
1778 collectionTypes : `~collections.abc.Set` [`CollectionType`] or \
1779 `CollectionType`, optional
1780 If provided, only yield collections of these types.
1781 flattenChains : `bool`, optional
1782 If `True` (`False` is default), recursively yield the child
1783 collections of matching `~CollectionType.CHAINED` collections.
1784 includeChains : `bool`, optional
1785 If `True`, yield records for matching `~CollectionType.CHAINED`
1786 collections. Default is the opposite of ``flattenChains``: include
1787 either CHAINED collections or their children, but not both.
1789 Returns
1790 -------
1791 collections : `~collections.abc.Sequence` [ `str` ]
1792 The names of collections that match ``expression``.
1794 Raises
1795 ------
1796 lsst.daf.butler.registry.CollectionExpressionError
1797 Raised when ``expression`` is invalid.
1799 Notes
1800 -----
1801 The order in which collections are returned is unspecified, except that
1802 the children of a `~CollectionType.CHAINED` collection are guaranteed
1803 to be in the order in which they are searched. When multiple parent
1804 `~CollectionType.CHAINED` collections match the same criteria, the
1805 order in which the two lists appear is unspecified, and the lists of
1806 children may be incomplete if a child has multiple parents.
1807 """
1808 # Right now the datasetTypes argument is completely ignored, but that
1809 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up
1810 # ticket will take care of that.
1811 try:
1812 wildcard = CollectionWildcard.from_expression(expression)
1813 except TypeError as exc:
1814 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc
1815 collectionTypes = ensure_iterable(collectionTypes)
1816 return [
1817 record.name
1818 for record in self._managers.collections.resolve_wildcard(
1819 wildcard,
1820 collection_types=frozenset(collectionTypes),
1821 flatten_chains=flattenChains,
1822 include_chains=includeChains,
1823 )
1824 ]
1826 def _makeQueryBuilder(
1827 self,
1828 summary: queries.QuerySummary,
1829 doomed_by: Iterable[str] = (),
1830 ) -> queries.QueryBuilder:
1831 """Return a `QueryBuilder` instance capable of constructing and
1832 managing more complex queries than those obtainable via `Registry`
1833 interfaces.
1835 This is an advanced interface; downstream code should prefer
1836 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those
1837 are sufficient.
1839 Parameters
1840 ----------
1841 summary : `queries.QuerySummary`
1842 Object describing and categorizing the full set of dimensions that
1843 will be included in the query.
1844 doomed_by : `~collections.abc.Iterable` of `str`, optional
1845 A list of diagnostic messages that indicate why the query is going
1846 to yield no results and should not even be executed. If an empty
1847 container (default) the query will be executed unless other code
1848 determines that it is doomed.
1850 Returns
1851 -------
1852 builder : `queries.QueryBuilder`
1853 Object that can be used to construct and perform advanced queries.
1854 """
1855 doomed_by = list(doomed_by)
1856 backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache)
1857 context = backend.context()
1858 relation: Relation | None = None
1859 if doomed_by:
1860 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by)
1861 return queries.QueryBuilder(
1862 summary,
1863 backend=backend,
1864 context=context,
1865 relation=relation,
1866 )
1868 def _standardize_query_data_id_args(
1869 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any
1870 ) -> DataCoordinate:
1871 """Preprocess the data ID arguments passed to query* methods.
1873 Parameters
1874 ----------
1875 data_id : `DataId` or `None`
1876 Data ID that constrains the query results.
1877 doomed_by : `list` [ `str` ]
1878 List to append messages indicating why the query is doomed to
1879 yield no results.
1880 **kwargs
1881 Additional data ID key-value pairs, extending and overriding
1882 ``data_id``.
1884 Returns
1885 -------
1886 data_id : `DataCoordinate`
1887 Standardized data ID. Will be fully expanded unless expansion
1888 fails, in which case a message will be appended to ``doomed_by``
1889 on return.
1890 """
1891 try:
1892 return self.expandDataId(data_id, **kwargs)
1893 except DataIdValueError as err:
1894 doomed_by.append(str(err))
1895 return DataCoordinate.standardize(
1896 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId
1897 )
1899 def _standardize_query_dataset_args(
1900 self,
1901 datasets: Any,
1902 collections: CollectionArgType | None,
1903 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain",
1904 *,
1905 doomed_by: list[str],
1906 ) -> tuple[list[DatasetType], CollectionWildcard | None]:
1907 """Preprocess dataset arguments passed to query* methods.
1909 Parameters
1910 ----------
1911 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these
1912 Expression identifying dataset types. See `queryDatasetTypes` for
1913 details.
1914 collections : `str`, `re.Pattern`, or iterable of these
1915 Expression identifying collections to be searched. See
1916 `queryCollections` for details.
1917 mode : `str`, optional
1918 The way in which datasets are being used in this query; one of:
1920 - "find_first": this is a query for the first dataset in an
1921 ordered list of collections. Prohibits collection wildcards,
1922 but permits dataset type wildcards.
1924 - "find_all": this is a query for all datasets in all matched
1925 collections. Permits collection and dataset type wildcards.
1927 - "constrain": this is a query for something other than datasets,
1928 with results constrained by dataset existence. Permits
1929 collection wildcards and prohibits ``...`` as a dataset type
1930 wildcard.
1931 doomed_by : `list` [ `str` ]
1932 List to append messages indicating why the query is doomed to
1933 yield no results.
1935 Returns
1936 -------
1937 dataset_types : `list` [ `DatasetType` ]
1938 List of matched dataset types.
1939 collections : `CollectionWildcard`
1940 Processed collection expression.
1941 """
1942 dataset_types: list[DatasetType] = []
1943 collection_wildcard: CollectionWildcard | None = None
1944 if datasets is not None:
1945 if collections is None:
1946 if not self.defaults.collections:
1947 raise NoDefaultCollectionError("No collections, and no registry default collections.")
1948 collection_wildcard = CollectionWildcard.from_expression(self.defaults.collections)
1949 else:
1950 collection_wildcard = CollectionWildcard.from_expression(collections)
1951 if mode == "find_first" and collection_wildcard.patterns:
1952 raise TypeError(
1953 f"Collection pattern(s) {collection_wildcard.patterns} not allowed in this context."
1954 )
1955 missing: list[str] = []
1956 dataset_types = self._managers.datasets.resolve_wildcard(
1957 datasets, missing=missing, explicit_only=(mode == "constrain")
1958 )
1959 if missing and mode == "constrain":
1960 raise MissingDatasetTypeError(
1961 f"Dataset type(s) {missing} are not registered.",
1962 )
1963 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing)
1964 elif collections:
1965 # I think this check should actually be `collections is not None`,
1966 # but it looks like some CLI scripts use empty tuple as default.
1967 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.")
1968 return dataset_types, collection_wildcard
1970 def queryDatasets(
1971 self,
1972 datasetType: Any,
1973 *,
1974 collections: CollectionArgType | None = None,
1975 dimensions: Iterable[Dimension | str] | None = None,
1976 dataId: DataId | None = None,
1977 where: str = "",
1978 findFirst: bool = False,
1979 components: bool | _Marker = _DefaultMarker,
1980 bind: Mapping[str, Any] | None = None,
1981 check: bool = True,
1982 **kwargs: Any,
1983 ) -> queries.DatasetQueryResults:
1984 """Query for and iterate over dataset references matching user-provided
1985 criteria.
1987 Parameters
1988 ----------
1989 datasetType : dataset type expression
1990 An expression that fully or partially identifies the dataset types
1991 to be queried. Allowed types include `DatasetType`, `str`,
1992 `re.Pattern`, and iterables thereof. The special value ``...`` can
1993 be used to query all dataset types. See
1994 :ref:`daf_butler_dataset_type_expressions` for more information.
1995 collections : collection expression, optional
1996 An expression that identifies the collections to search, such as a
1997 `str` (for full matches or partial matches via globs), `re.Pattern`
1998 (for partial matches), or iterable thereof. ``...`` can be used to
1999 search all collections (actually just all `~CollectionType.RUN`
2000 collections, because this will still find all datasets).
2001 If not provided, ``self.default.collections`` is used. See
2002 :ref:`daf_butler_collection_expressions` for more information.
2003 dimensions : `~collections.abc.Iterable` of `Dimension` or `str`
2004 Dimensions to include in the query (in addition to those used
2005 to identify the queried dataset type(s)), either to constrain
2006 the resulting datasets to those for which a matching dimension
2007 exists, or to relate the dataset type's dimensions to dimensions
2008 referenced by the ``dataId`` or ``where`` arguments.
2009 dataId : `dict` or `DataCoordinate`, optional
2010 A data ID whose key-value pairs are used as equality constraints
2011 in the query.
2012 where : `str`, optional
2013 A string expression similar to a SQL WHERE clause. May involve
2014 any column of a dimension table or (as a shortcut for the primary
2015 key column of a dimension table) dimension name. See
2016 :ref:`daf_butler_dimension_expressions` for more information.
2017 findFirst : `bool`, optional
2018 If `True` (`False` is default), for each result data ID, only
2019 yield one `DatasetRef` of each `DatasetType`, from the first
2020 collection in which a dataset of that dataset type appears
2021 (according to the order of ``collections`` passed in). If `True`,
2022 ``collections`` must not contain regular expressions and may not
2023 be ``...``.
2024 components : `bool`, optional
2025 Must be `False`. Provided only for backwards compatibility. After
2026 v27 this argument will be removed entirely.
2027 bind : `~collections.abc.Mapping`, optional
2028 Mapping containing literal values that should be injected into the
2029 ``where`` expression, keyed by the identifiers they replace.
2030 Values of collection type can be expanded in some cases; see
2031 :ref:`daf_butler_dimension_expressions_identifiers` for more
2032 information.
2033 check : `bool`, optional
2034 If `True` (default) check the query for consistency before
2035 executing it. This may reject some valid queries that resemble
2036 common mistakes (e.g. queries for visits without specifying an
2037 instrument).
2038 **kwargs
2039 Additional keyword arguments are forwarded to
2040 `DataCoordinate.standardize` when processing the ``dataId``
2041 argument (and may be used to provide a constraining data ID even
2042 when the ``dataId`` argument is `None`).
2044 Returns
2045 -------
2046 refs : `.queries.DatasetQueryResults`
2047 Dataset references matching the given query criteria. Nested data
2048 IDs are guaranteed to include values for all implied dimensions
2049 (i.e. `DataCoordinate.hasFull` will return `True`), but will not
2050 include dimension records (`DataCoordinate.hasRecords` will be
2051 `False`) unless `~.queries.DatasetQueryResults.expanded` is
2052 called on the result object (which returns a new one).
2054 Raises
2055 ------
2056 lsst.daf.butler.registry.DatasetTypeExpressionError
2057 Raised when ``datasetType`` expression is invalid.
2058 TypeError
2059 Raised when the arguments are incompatible, such as when a
2060 collection wildcard is passed when ``findFirst`` is `True`, or
2061 when ``collections`` is `None` and ``self.defaults.collections`` is
2062 also `None`.
2063 lsst.daf.butler.registry.DataIdError
2064 Raised when ``dataId`` or keyword arguments specify unknown
2065 dimensions or values, or when they contain inconsistent values.
2066 lsst.daf.butler.registry.UserExpressionError
2067 Raised when ``where`` expression is invalid.
2069 Notes
2070 -----
2071 When multiple dataset types are queried in a single call, the
2072 results of this operation are equivalent to querying for each dataset
2073 type separately in turn, and no information about the relationships
2074 between datasets of different types is included. In contexts where
2075 that kind of information is important, the recommended pattern is to
2076 use `queryDataIds` to first obtain data IDs (possibly with the
2077 desired dataset types and collections passed as constraints to the
2078 query), and then use multiple (generally much simpler) calls to
2079 `queryDatasets` with the returned data IDs passed as constraints.
2080 """
2081 if components is not _DefaultMarker:
2082 if components is not False:
2083 raise DatasetTypeError(
2084 "Dataset component queries are no longer supported by Registry. Use "
2085 "DatasetType methods to obtain components from parent dataset types instead."
2086 )
2087 else:
2088 warnings.warn(
2089 "The components parameter is ignored. It will be removed after v27.",
2090 category=FutureWarning,
2091 stacklevel=find_outside_stacklevel("lsst.daf.butler"),
2092 )
2093 doomed_by: list[str] = []
2094 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
2095 resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args(
2096 datasetType,
2097 collections,
2098 mode="find_first" if findFirst else "find_all",
2099 doomed_by=doomed_by,
2100 )
2101 if collection_wildcard is not None and collection_wildcard.empty():
2102 doomed_by.append("No datasets can be found because collection list is empty.")
2103 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by)
2104 parent_results: list[queries.ParentDatasetQueryResults] = []
2105 for resolved_dataset_type in resolved_dataset_types:
2106 # The full set of dimensions in the query is the combination of
2107 # those needed for the DatasetType and those explicitly requested,
2108 # if any.
2109 dimension_names = set(resolved_dataset_type.dimensions.names)
2110 if dimensions is not None:
2111 dimension_names.update(self.dimensions.conform(dimensions).names)
2112 # Construct the summary structure needed to construct a
2113 # QueryBuilder.
2114 summary = queries.QuerySummary(
2115 requested=self.dimensions.conform(dimension_names),
2116 column_types=self._managers.column_types,
2117 data_id=data_id,
2118 expression=where,
2119 bind=bind,
2120 defaults=self.defaults.dataId,
2121 check=check,
2122 datasets=[resolved_dataset_type],
2123 )
2124 builder = self._makeQueryBuilder(summary)
2125 # Add the dataset subquery to the query, telling the QueryBuilder
2126 # to include the rank of the selected collection in the results
2127 # only if we need to findFirst. Note that if any of the
2128 # collections are actually wildcard expressions, and
2129 # findFirst=True, this will raise TypeError for us.
2130 builder.joinDataset(
2131 resolved_dataset_type, collection_wildcard, isResult=True, findFirst=findFirst
2132 )
2133 query = builder.finish()
2134 parent_results.append(
2135 queries.ParentDatasetQueryResults(query, resolved_dataset_type, components=[None])
2136 )
2137 if not parent_results:
2138 doomed_by.extend(
2139 f"No registered dataset type matching {t!r} found, so no matching datasets can "
2140 "exist in any collection."
2141 for t in ensure_iterable(datasetType)
2142 )
2143 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by)
2144 elif len(parent_results) == 1:
2145 return parent_results[0]
2146 else:
2147 return queries.ChainedDatasetQueryResults(parent_results)
2149 def queryDataIds(
2150 self,
2151 # TODO: Drop Dimension support on DM-41326.
2152 dimensions: DimensionGroup | Iterable[Dimension | str] | Dimension | str,
2153 *,
2154 dataId: DataId | None = None,
2155 datasets: Any = None,
2156 collections: CollectionArgType | None = None,
2157 where: str = "",
2158 components: bool | _Marker = _DefaultMarker,
2159 bind: Mapping[str, Any] | None = None,
2160 check: bool = True,
2161 **kwargs: Any,
2162 ) -> queries.DataCoordinateQueryResults:
2163 """Query for data IDs matching user-provided criteria.
2165 Parameters
2166 ----------
2167 dimensions : `DimensionGroup`, `Dimension`, or `str`, or \
2168 `~collections.abc.Iterable` [ `Dimension` or `str` ]
2169 The dimensions of the data IDs to yield, as either `Dimension`
2170 instances or `str`. Will be automatically expanded to a complete
2171 `DimensionGroup`. Support for `Dimension` instances is deprecated
2172 and will not be supported after v27.
2173 dataId : `dict` or `DataCoordinate`, optional
2174 A data ID whose key-value pairs are used as equality constraints
2175 in the query.
2176 datasets : dataset type expression, optional
2177 An expression that fully or partially identifies dataset types
2178 that should constrain the yielded data IDs. For example, including
2179 "raw" here would constrain the yielded ``instrument``,
2180 ``exposure``, ``detector``, and ``physical_filter`` values to only
2181 those for which at least one "raw" dataset exists in
2182 ``collections``. Allowed types include `DatasetType`, `str`,
2183 and iterables thereof. Regular expression objects (i.e.
2184 `re.Pattern`) are deprecated and will be removed after the v26
2185 release. See :ref:`daf_butler_dataset_type_expressions` for more
2186 information.
2187 collections : collection expression, optional
2188 An expression that identifies the collections to search for
2189 datasets, such as a `str` (for full matches or partial matches
2190 via globs), `re.Pattern` (for partial matches), or iterable
2191 thereof. ``...`` can be used to search all collections (actually
2192 just all `~CollectionType.RUN` collections, because this will
2193 still find all datasets). If not provided,
2194 ``self.default.collections`` is used. Ignored unless ``datasets``
2195 is also passed. See :ref:`daf_butler_collection_expressions` for
2196 more information.
2197 where : `str`, optional
2198 A string expression similar to a SQL WHERE clause. May involve
2199 any column of a dimension table or (as a shortcut for the primary
2200 key column of a dimension table) dimension name. See
2201 :ref:`daf_butler_dimension_expressions` for more information.
2202 components : `bool`, optional
2203 Must be `False`. Provided only for backwards compatibility. After
2204 v27 this argument will be removed entirely.
2205 bind : `~collections.abc.Mapping`, optional
2206 Mapping containing literal values that should be injected into the
2207 ``where`` expression, keyed by the identifiers they replace.
2208 Values of collection type can be expanded in some cases; see
2209 :ref:`daf_butler_dimension_expressions_identifiers` for more
2210 information.
2211 check : `bool`, optional
2212 If `True` (default) check the query for consistency before
2213 executing it. This may reject some valid queries that resemble
2214 common mistakes (e.g. queries for visits without specifying an
2215 instrument).
2216 **kwargs
2217 Additional keyword arguments are forwarded to
2218 `DataCoordinate.standardize` when processing the ``dataId``
2219 argument (and may be used to provide a constraining data ID even
2220 when the ``dataId`` argument is `None`).
2222 Returns
2223 -------
2224 dataIds : `.queries.DataCoordinateQueryResults`
2225 Data IDs matching the given query parameters. These are guaranteed
2226 to identify all dimensions (`DataCoordinate.hasFull` returns
2227 `True`), but will not contain `DimensionRecord` objects
2228 (`DataCoordinate.hasRecords` returns `False`). Call
2229 `~.queries.DataCoordinateQueryResults.expanded` on the
2230 returned object to fetch those (and consider using
2231 `~.queries.DataCoordinateQueryResults.materialize` on the
2232 returned object first if the expected number of rows is very
2233 large). See documentation for those methods for additional
2234 information.
2236 Raises
2237 ------
2238 lsst.daf.butler.registry.NoDefaultCollectionError
2239 Raised if ``collections`` is `None` and
2240 ``self.defaults.collections`` is `None`.
2241 lsst.daf.butler.registry.CollectionExpressionError
2242 Raised when ``collections`` expression is invalid.
2243 lsst.daf.butler.registry.DataIdError
2244 Raised when ``dataId`` or keyword arguments specify unknown
2245 dimensions or values, or when they contain inconsistent values.
2246 lsst.daf.butler.registry.DatasetTypeExpressionError
2247 Raised when ``datasetType`` expression is invalid.
2248 lsst.daf.butler.registry.UserExpressionError
2249 Raised when ``where`` expression is invalid.
2250 """
2251 if components is not _DefaultMarker:
2252 if components is not False:
2253 raise DatasetTypeError(
2254 "Dataset component queries are no longer supported by Registry. Use "
2255 "DatasetType methods to obtain components from parent dataset types instead."
2256 )
2257 else:
2258 warnings.warn(
2259 "The components parameter is ignored. It will be removed after v27.",
2260 category=FutureWarning,
2261 stacklevel=find_outside_stacklevel("lsst.daf.butler"),
2262 )
2263 requested_dimensions = self.dimensions.conform(dimensions)
2264 doomed_by: list[str] = []
2265 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
2266 resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args(
2267 datasets, collections, doomed_by=doomed_by
2268 )
2269 if collection_wildcard is not None and collection_wildcard.empty():
2270 doomed_by.append("No data coordinates can be found because collection list is empty.")
2271 summary = queries.QuerySummary(
2272 requested=requested_dimensions,
2273 column_types=self._managers.column_types,
2274 data_id=data_id,
2275 expression=where,
2276 bind=bind,
2277 defaults=self.defaults.dataId,
2278 check=check,
2279 datasets=resolved_dataset_types,
2280 )
2281 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
2282 for datasetType in resolved_dataset_types:
2283 builder.joinDataset(datasetType, collection_wildcard, isResult=False)
2284 query = builder.finish()
2286 return queries.DataCoordinateQueryResults(query)
2288 def queryDimensionRecords(
2289 self,
2290 element: DimensionElement | str,
2291 *,
2292 dataId: DataId | None = None,
2293 datasets: Any = None,
2294 collections: CollectionArgType | None = None,
2295 where: str = "",
2296 components: bool | _Marker = _DefaultMarker,
2297 bind: Mapping[str, Any] | None = None,
2298 check: bool = True,
2299 **kwargs: Any,
2300 ) -> queries.DimensionRecordQueryResults:
2301 """Query for dimension information matching user-provided criteria.
2303 Parameters
2304 ----------
2305 element : `DimensionElement` or `str`
2306 The dimension element to obtain records for.
2307 dataId : `dict` or `DataCoordinate`, optional
2308 A data ID whose key-value pairs are used as equality constraints
2309 in the query.
2310 datasets : dataset type expression, optional
2311 An expression that fully or partially identifies dataset types
2312 that should constrain the yielded records. See `queryDataIds` and
2313 :ref:`daf_butler_dataset_type_expressions` for more information.
2314 collections : collection expression, optional
2315 An expression that identifies the collections to search for
2316 datasets, such as a `str` (for full matches or partial matches
2317 via globs), `re.Pattern` (for partial matches), or iterable
2318 thereof. ``...`` can be used to search all collections (actually
2319 just all `~CollectionType.RUN` collections, because this will
2320 still find all datasets). If not provided,
2321 ``self.default.collections`` is used. Ignored unless ``datasets``
2322 is also passed. See :ref:`daf_butler_collection_expressions` for
2323 more information.
2324 where : `str`, optional
2325 A string expression similar to a SQL WHERE clause. See
2326 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more
2327 information.
2328 components : `bool`, optional
2329 Whether to apply dataset expressions to components as well.
2330 See `queryDataIds` for more information.
2332 Must be `False`. Provided only for backwards compatibility. After
2333 v27 this argument will be removed entirely.
2334 bind : `~collections.abc.Mapping`, optional
2335 Mapping containing literal values that should be injected into the
2336 ``where`` expression, keyed by the identifiers they replace.
2337 Values of collection type can be expanded in some cases; see
2338 :ref:`daf_butler_dimension_expressions_identifiers` for more
2339 information.
2340 check : `bool`, optional
2341 If `True` (default) check the query for consistency before
2342 executing it. This may reject some valid queries that resemble
2343 common mistakes (e.g. queries for visits without specifying an
2344 instrument).
2345 **kwargs
2346 Additional keyword arguments are forwarded to
2347 `DataCoordinate.standardize` when processing the ``dataId``
2348 argument (and may be used to provide a constraining data ID even
2349 when the ``dataId`` argument is `None`).
2351 Returns
2352 -------
2353 dataIds : `.queries.DimensionRecordQueryResults`
2354 Data IDs matching the given query parameters.
2356 Raises
2357 ------
2358 lsst.daf.butler.registry.NoDefaultCollectionError
2359 Raised if ``collections`` is `None` and
2360 ``self.defaults.collections`` is `None`.
2361 lsst.daf.butler.registry.CollectionExpressionError
2362 Raised when ``collections`` expression is invalid.
2363 lsst.daf.butler.registry.DataIdError
2364 Raised when ``dataId`` or keyword arguments specify unknown
2365 dimensions or values, or when they contain inconsistent values.
2366 lsst.daf.butler.registry.DatasetTypeExpressionError
2367 Raised when ``datasetType`` expression is invalid.
2368 lsst.daf.butler.registry.UserExpressionError
2369 Raised when ``where`` expression is invalid.
2370 """
2371 if components is not _DefaultMarker:
2372 if components is not False:
2373 raise DatasetTypeError(
2374 "Dataset component queries are no longer supported by Registry. Use "
2375 "DatasetType methods to obtain components from parent dataset types instead."
2376 )
2377 else:
2378 warnings.warn(
2379 "The components parameter is ignored. It will be removed after v27.",
2380 category=FutureWarning,
2381 stacklevel=find_outside_stacklevel("lsst.daf.butler"),
2382 )
2383 if not isinstance(element, DimensionElement):
2384 try:
2385 element = self.dimensions[element]
2386 except KeyError as e:
2387 raise DimensionNameError(
2388 f"No such dimension '{element}', available dimensions: " + str(self.dimensions.elements)
2389 ) from e
2390 doomed_by: list[str] = []
2391 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
2392 resolved_dataset_types, collection_wildcard = self._standardize_query_dataset_args(
2393 datasets, collections, doomed_by=doomed_by
2394 )
2395 if collection_wildcard is not None and collection_wildcard.empty():
2396 doomed_by.append("No dimension records can be found because collection list is empty.")
2397 summary = queries.QuerySummary(
2398 requested=element.minimal_group,
2399 column_types=self._managers.column_types,
2400 data_id=data_id,
2401 expression=where,
2402 bind=bind,
2403 defaults=self.defaults.dataId,
2404 check=check,
2405 datasets=resolved_dataset_types,
2406 )
2407 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
2408 for datasetType in resolved_dataset_types:
2409 builder.joinDataset(datasetType, collection_wildcard, isResult=False)
2410 query = builder.finish().with_record_columns(element.name)
2411 return queries.DatabaseDimensionRecordQueryResults(query, element)
2413 def queryDatasetAssociations(
2414 self,
2415 datasetType: str | DatasetType,
2416 collections: CollectionArgType | None = ...,
2417 *,
2418 collectionTypes: Iterable[CollectionType] = CollectionType.all(),
2419 flattenChains: bool = False,
2420 ) -> Iterator[DatasetAssociation]:
2421 """Iterate over dataset-collection combinations where the dataset is in
2422 the collection.
2424 This method is a temporary placeholder for better support for
2425 association results in `queryDatasets`. It will probably be
2426 removed in the future, and should be avoided in production code
2427 whenever possible.
2429 Parameters
2430 ----------
2431 datasetType : `DatasetType` or `str`
2432 A dataset type object or the name of one.
2433 collections : collection expression, optional
2434 An expression that identifies the collections to search for
2435 datasets, such as a `str` (for full matches or partial matches
2436 via globs), `re.Pattern` (for partial matches), or iterable
2437 thereof. ``...`` can be used to search all collections (actually
2438 just all `~CollectionType.RUN` collections, because this will still
2439 find all datasets). If not provided, ``self.default.collections``
2440 is used. See :ref:`daf_butler_collection_expressions` for more
2441 information.
2442 collectionTypes : `~collections.abc.Set` [ `CollectionType` ], optional
2443 If provided, only yield associations from collections of these
2444 types.
2445 flattenChains : `bool`, optional
2446 If `True`, search in the children of `~CollectionType.CHAINED`
2447 collections. If `False`, ``CHAINED`` collections are ignored.
2449 Yields
2450 ------
2451 association : `.DatasetAssociation`
2452 Object representing the relationship between a single dataset and
2453 a single collection.
2455 Raises
2456 ------
2457 lsst.daf.butler.registry.NoDefaultCollectionError
2458 Raised if ``collections`` is `None` and
2459 ``self.defaults.collections`` is `None`.
2460 lsst.daf.butler.registry.CollectionExpressionError
2461 Raised when ``collections`` expression is invalid.
2462 """
2463 if collections is None:
2464 if not self.defaults.collections:
2465 raise NoDefaultCollectionError(
2466 "No collections provided to queryDatasetAssociations, "
2467 "and no defaults from registry construction."
2468 )
2469 collections = self.defaults.collections
2470 collection_wildcard = CollectionWildcard.from_expression(collections)
2471 backend = queries.SqlQueryBackend(self._db, self._managers, self.dimension_record_cache)
2472 parent_dataset_type = backend.resolve_single_dataset_type_wildcard(datasetType)
2473 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan")
2474 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
2475 for parent_collection_record in backend.resolve_collection_wildcard(
2476 collection_wildcard,
2477 collection_types=frozenset(collectionTypes),
2478 flatten_chains=flattenChains,
2479 ):
2480 # Resolve this possibly-chained collection into a list of
2481 # non-CHAINED collections that actually hold datasets of this
2482 # type.
2483 candidate_collection_records = backend.resolve_dataset_collections(
2484 parent_dataset_type,
2485 CollectionWildcard.from_names([parent_collection_record.name]),
2486 allow_calibration_collections=True,
2487 governor_constraints={},
2488 )
2489 if not candidate_collection_records:
2490 continue
2491 with backend.context() as context:
2492 relation = backend.make_dataset_query_relation(
2493 parent_dataset_type,
2494 candidate_collection_records,
2495 columns={"dataset_id", "run", "timespan", "collection"},
2496 context=context,
2497 )
2498 reader = queries.DatasetRefReader(
2499 parent_dataset_type,
2500 translate_collection=lambda k: self._managers.collections[k].name,
2501 full=False,
2502 )
2503 for row in context.fetch_iterable(relation):
2504 ref = reader.read(row)
2505 collection_record = self._managers.collections[row[collection_tag]]
2506 if collection_record.type is CollectionType.CALIBRATION:
2507 timespan = row[timespan_tag]
2508 else:
2509 # For backwards compatibility and (possibly?) user
2510 # convenience we continue to define the timespan of a
2511 # DatasetAssociation row for a non-CALIBRATION
2512 # collection to be None rather than a fully unbounded
2513 # timespan.
2514 timespan = None
2515 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan)
2517 def get_datastore_records(self, ref: DatasetRef) -> DatasetRef:
2518 """Retrieve datastore records for given ref.
2520 Parameters
2521 ----------
2522 ref : `DatasetRef`
2523 Dataset reference for which to retrieve its corresponding datastore
2524 records.
2526 Returns
2527 -------
2528 updated_ref : `DatasetRef`
2529 Dataset reference with filled datastore records.
2531 Notes
2532 -----
2533 If this method is called with the dataset ref that is not known to the
2534 registry then the reference with an empty set of records is returned.
2535 """
2536 datastore_records: dict[str, list[StoredDatastoreItemInfo]] = {}
2537 for opaque, record_class in self._datastore_record_classes.items():
2538 records = self.fetchOpaqueData(opaque, dataset_id=ref.id)
2539 datastore_records[opaque] = [record_class.from_record(record) for record in records]
2540 return ref.replace(datastore_records=datastore_records)
2542 def store_datastore_records(self, refs: Mapping[str, DatasetRef]) -> None:
2543 """Store datastore records for given refs.
2545 Parameters
2546 ----------
2547 refs : `~collections.abc.Mapping` [`str`, `DatasetRef`]
2548 Mapping of a datastore name to dataset reference stored in that
2549 datastore, reference must include datastore records.
2550 """
2551 for datastore_name, ref in refs.items():
2552 # Store ref IDs in the bridge table.
2553 bridge = self._managers.datastores.register(datastore_name)
2554 bridge.insert([ref])
2556 # store records in opaque tables
2557 assert ref._datastore_records is not None, "Dataset ref must have datastore records"
2558 for table_name, records in ref._datastore_records.items():
2559 opaque_table = self._managers.opaque.get(table_name)
2560 assert opaque_table is not None, f"Unexpected opaque table name {table_name}"
2561 opaque_table.insert(*(record.to_record(dataset_id=ref.id) for record in records))
2563 def make_datastore_tables(self, tables: Mapping[str, DatastoreOpaqueTable]) -> None:
2564 """Create opaque tables used by datastores.
2566 Parameters
2567 ----------
2568 tables : `~collections.abc.Mapping`
2569 Maps opaque table name to its definition.
2571 Notes
2572 -----
2573 This method should disappear in the future when opaque table
2574 definitions will be provided during `Registry` construction.
2575 """
2576 datastore_record_classes = {}
2577 for table_name, table_def in tables.items():
2578 datastore_record_classes[table_name] = table_def.record_class
2579 try:
2580 self._managers.opaque.register(table_name, table_def.table_spec)
2581 except ReadOnlyDatabaseError:
2582 # If the database is read only and we just tried and failed to
2583 # create a table, it means someone is trying to create a
2584 # read-only butler client for an empty repo. That should be
2585 # okay, as long as they then try to get any datasets before
2586 # some other client creates the table. Chances are they're
2587 # just validating configuration.
2588 pass
2589 self._datastore_record_classes = datastore_record_classes
2591 def preload_cache(self) -> None:
2592 """Immediately load caches that are used for common operations."""
2593 self.dimension_record_cache.preload_cache()
2595 @property
2596 def obsCoreTableManager(self) -> ObsCoreTableManager | None:
2597 """The ObsCore manager instance for this registry
2598 (`~.interfaces.ObsCoreTableManager`
2599 or `None`).
2601 ObsCore manager may not be implemented for all registry backend, or
2602 may not be enabled for many repositories.
2603 """
2604 return self._managers.obscore
2606 storageClasses: StorageClassFactory
2607 """All storage classes known to the registry (`StorageClassFactory`).
2608 """
2610 _defaults: RegistryDefaults
2611 """Default collections used for registry queries (`RegistryDefaults`)."""