Coverage for python/lsst/daf/butler/registries/sql.py: 12%
513 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-14 09:11 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-14 09:11 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("SqlRegistry",)
26import contextlib
27import logging
28import warnings
29from collections.abc import Iterable, Iterator, Mapping, Sequence
30from typing import TYPE_CHECKING, Any, Literal, cast
32import sqlalchemy
33from lsst.daf.relation import LeafRelation, Relation
34from lsst.resources import ResourcePathExpression
35from lsst.utils.iteration import ensure_iterable
37from ..core import (
38 Config,
39 DataCoordinate,
40 DataId,
41 DatasetAssociation,
42 DatasetColumnTag,
43 DatasetId,
44 DatasetIdFactory,
45 DatasetIdGenEnum,
46 DatasetRef,
47 DatasetType,
48 Dimension,
49 DimensionConfig,
50 DimensionElement,
51 DimensionGraph,
52 DimensionRecord,
53 DimensionUniverse,
54 NamedKeyMapping,
55 NameLookupMapping,
56 Progress,
57 StorageClassFactory,
58 Timespan,
59 ddl,
60)
61from ..core.utils import transactional
62from ..registry import (
63 ArgumentError,
64 CollectionExpressionError,
65 CollectionSummary,
66 CollectionType,
67 CollectionTypeError,
68 ConflictingDefinitionError,
69 DataIdValueError,
70 DatasetTypeError,
71 DimensionNameError,
72 InconsistentDataIdError,
73 NoDefaultCollectionError,
74 OrphanedRecordError,
75 Registry,
76 RegistryConfig,
77 RegistryConsistencyError,
78 RegistryDefaults,
79 queries,
80)
81from ..registry.interfaces import ChainedCollectionRecord, RunRecord
82from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes
83from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard
85if TYPE_CHECKING:
86 from .._butlerConfig import ButlerConfig
87 from ..registry._registry import CollectionArgType
88 from ..registry.interfaces import (
89 CollectionRecord,
90 Database,
91 DatastoreRegistryBridgeManager,
92 ObsCoreTableManager,
93 )
96_LOG = logging.getLogger(__name__)
99class SqlRegistry(Registry):
100 """Registry implementation based on SQLAlchemy.
102 Parameters
103 ----------
104 database : `Database`
105 Database instance to store Registry.
106 defaults : `RegistryDefaults`
107 Default collection search path and/or output `~CollectionType.RUN`
108 collection.
109 managers : `RegistryManagerInstances`
110 All the managers required for this registry.
111 """
113 defaultConfigFile: str | None = None
114 """Path to configuration defaults. Accessed within the ``configs`` resource
115 or relative to a search path. Can be None if no defaults specified.
116 """
118 @classmethod
119 def createFromConfig(
120 cls,
121 config: RegistryConfig | str | None = None,
122 dimensionConfig: DimensionConfig | str | None = None,
123 butlerRoot: ResourcePathExpression | None = None,
124 ) -> Registry:
125 """Create registry database and return `SqlRegistry` instance.
127 This method initializes database contents, database must be empty
128 prior to calling this method.
130 Parameters
131 ----------
132 config : `RegistryConfig` or `str`, optional
133 Registry configuration, if missing then default configuration will
134 be loaded from registry.yaml.
135 dimensionConfig : `DimensionConfig` or `str`, optional
136 Dimensions configuration, if missing then default configuration
137 will be loaded from dimensions.yaml.
138 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
139 Path to the repository root this `SqlRegistry` will manage.
141 Returns
142 -------
143 registry : `SqlRegistry`
144 A new `SqlRegistry` instance.
145 """
146 config = cls.forceRegistryConfig(config)
147 config.replaceRoot(butlerRoot)
149 if isinstance(dimensionConfig, str):
150 dimensionConfig = DimensionConfig(dimensionConfig)
151 elif dimensionConfig is None:
152 dimensionConfig = DimensionConfig()
153 elif not isinstance(dimensionConfig, DimensionConfig):
154 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}")
156 DatabaseClass = config.getDatabaseClass()
157 database = DatabaseClass.fromUri(
158 config.connectionString, origin=config.get("origin", 0), namespace=config.get("namespace")
159 )
160 managerTypes = RegistryManagerTypes.fromConfig(config)
161 managers = managerTypes.makeRepo(database, dimensionConfig)
162 return cls(database, RegistryDefaults(), managers)
164 @classmethod
165 def fromConfig(
166 cls,
167 config: ButlerConfig | RegistryConfig | Config | str,
168 butlerRoot: ResourcePathExpression | None = None,
169 writeable: bool = True,
170 defaults: RegistryDefaults | None = None,
171 ) -> Registry:
172 """Create `Registry` subclass instance from `config`.
174 Registry database must be initialized prior to calling this method.
176 Parameters
177 ----------
178 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
179 Registry configuration
180 butlerRoot : `lsst.resources.ResourcePathExpression`, optional
181 Path to the repository root this `Registry` will manage.
182 writeable : `bool`, optional
183 If `True` (default) create a read-write connection to the database.
184 defaults : `RegistryDefaults`, optional
185 Default collection search path and/or output `~CollectionType.RUN`
186 collection.
188 Returns
189 -------
190 registry : `SqlRegistry` (subclass)
191 A new `SqlRegistry` subclass instance.
192 """
193 config = cls.forceRegistryConfig(config)
194 config.replaceRoot(butlerRoot)
195 DatabaseClass = config.getDatabaseClass()
196 database = DatabaseClass.fromUri(
197 config.connectionString,
198 origin=config.get("origin", 0),
199 namespace=config.get("namespace"),
200 writeable=writeable,
201 )
202 managerTypes = RegistryManagerTypes.fromConfig(config)
203 with database.session():
204 managers = managerTypes.loadRepo(database)
205 if defaults is None:
206 defaults = RegistryDefaults()
207 return cls(database, defaults, managers)
209 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances):
210 self._db = database
211 self._managers = managers
212 self.storageClasses = StorageClassFactory()
213 # Intentionally invoke property setter to initialize defaults. This
214 # can only be done after most of the rest of Registry has already been
215 # initialized, and must be done before the property getter is used.
216 self.defaults = defaults
217 # In the future DatasetIdFactory may become configurable and this
218 # instance will need to be shared with datasets manager.
219 self.datasetIdFactory = DatasetIdFactory()
221 def __str__(self) -> str:
222 return str(self._db)
224 def __repr__(self) -> str:
225 return f"SqlRegistry({self._db!r}, {self.dimensions!r})"
227 def isWriteable(self) -> bool:
228 # Docstring inherited from lsst.daf.butler.registry.Registry
229 return self._db.isWriteable()
231 def copy(self, defaults: RegistryDefaults | None = None) -> Registry:
232 # Docstring inherited from lsst.daf.butler.registry.Registry
233 if defaults is None:
234 # No need to copy, because `RegistryDefaults` is immutable; we
235 # effectively copy on write.
236 defaults = self.defaults
237 return type(self)(self._db, defaults, self._managers)
239 @property
240 def dimensions(self) -> DimensionUniverse:
241 # Docstring inherited from lsst.daf.butler.registry.Registry
242 return self._managers.dimensions.universe
244 def refresh(self) -> None:
245 # Docstring inherited from lsst.daf.butler.registry.Registry
246 with self._db.transaction():
247 self._managers.refresh()
249 @contextlib.contextmanager
250 def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
251 # Docstring inherited from lsst.daf.butler.registry.Registry
252 try:
253 with self._db.transaction(savepoint=savepoint):
254 yield
255 except BaseException:
256 # TODO: this clears the caches sometimes when we wouldn't actually
257 # need to. Can we avoid that?
258 self._managers.dimensions.clearCaches()
259 raise
261 def resetConnectionPool(self) -> None:
262 """Reset SQLAlchemy connection pool for `SqlRegistry` database.
264 This operation is useful when using registry with fork-based
265 multiprocessing. To use registry across fork boundary one has to make
266 sure that there are no currently active connections (no session or
267 transaction is in progress) and connection pool is reset using this
268 method. This method should be called by the child process immediately
269 after the fork.
270 """
271 self._db._engine.dispose()
273 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None:
274 """Add an opaque (to the `Registry`) table for use by a `Datastore` or
275 other data repository client.
277 Opaque table records can be added via `insertOpaqueData`, retrieved via
278 `fetchOpaqueData`, and removed via `deleteOpaqueData`.
280 Parameters
281 ----------
282 tableName : `str`
283 Logical name of the opaque table. This may differ from the
284 actual name used in the database by a prefix and/or suffix.
285 spec : `ddl.TableSpec`
286 Specification for the table to be added.
287 """
288 self._managers.opaque.register(tableName, spec)
290 @transactional
291 def insertOpaqueData(self, tableName: str, *data: dict) -> None:
292 """Insert records into an opaque table.
294 Parameters
295 ----------
296 tableName : `str`
297 Logical name of the opaque table. Must match the name used in a
298 previous call to `registerOpaqueTable`.
299 data
300 Each additional positional argument is a dictionary that represents
301 a single row to be added.
302 """
303 self._managers.opaque[tableName].insert(*data)
305 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]:
306 """Retrieve records from an opaque table.
308 Parameters
309 ----------
310 tableName : `str`
311 Logical name of the opaque table. Must match the name used in a
312 previous call to `registerOpaqueTable`.
313 where
314 Additional keyword arguments are interpreted as equality
315 constraints that restrict the returned rows (combined with AND);
316 keyword arguments are column names and values are the values they
317 must have.
319 Yields
320 ------
321 row : `dict`
322 A dictionary representing a single result row.
323 """
324 yield from self._managers.opaque[tableName].fetch(**where)
326 @transactional
327 def deleteOpaqueData(self, tableName: str, **where: Any) -> None:
328 """Remove records from an opaque table.
330 Parameters
331 ----------
332 tableName : `str`
333 Logical name of the opaque table. Must match the name used in a
334 previous call to `registerOpaqueTable`.
335 where
336 Additional keyword arguments are interpreted as equality
337 constraints that restrict the deleted rows (combined with AND);
338 keyword arguments are column names and values are the values they
339 must have.
340 """
341 self._managers.opaque[tableName].delete(where.keys(), where)
343 def registerCollection(
344 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: str | None = None
345 ) -> bool:
346 # Docstring inherited from lsst.daf.butler.registry.Registry
347 _, registered = self._managers.collections.register(name, type, doc=doc)
348 return registered
350 def getCollectionType(self, name: str) -> CollectionType:
351 # Docstring inherited from lsst.daf.butler.registry.Registry
352 return self._managers.collections.find(name).type
354 def _get_collection_record(self, name: str) -> CollectionRecord:
355 # Docstring inherited from lsst.daf.butler.registry.Registry
356 return self._managers.collections.find(name)
358 def registerRun(self, name: str, doc: str | None = None) -> bool:
359 # Docstring inherited from lsst.daf.butler.registry.Registry
360 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc)
361 return registered
363 @transactional
364 def removeCollection(self, name: str) -> None:
365 # Docstring inherited from lsst.daf.butler.registry.Registry
366 self._managers.collections.remove(name)
368 def getCollectionChain(self, parent: str) -> tuple[str, ...]:
369 # Docstring inherited from lsst.daf.butler.registry.Registry
370 record = self._managers.collections.find(parent)
371 if record.type is not CollectionType.CHAINED:
372 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
373 assert isinstance(record, ChainedCollectionRecord)
374 return record.children
376 @transactional
377 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None:
378 # Docstring inherited from lsst.daf.butler.registry.Registry
379 record = self._managers.collections.find(parent)
380 if record.type is not CollectionType.CHAINED:
381 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
382 assert isinstance(record, ChainedCollectionRecord)
383 children = CollectionWildcard.from_expression(children).require_ordered()
384 if children != record.children or flatten:
385 record.update(self._managers.collections, children, flatten=flatten)
387 def getCollectionParentChains(self, collection: str) -> set[str]:
388 # Docstring inherited from lsst.daf.butler.registry.Registry
389 return {
390 record.name
391 for record in self._managers.collections.getParentChains(
392 self._managers.collections.find(collection).key
393 )
394 }
396 def getCollectionDocumentation(self, collection: str) -> str | None:
397 # Docstring inherited from lsst.daf.butler.registry.Registry
398 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key)
400 def setCollectionDocumentation(self, collection: str, doc: str | None) -> None:
401 # Docstring inherited from lsst.daf.butler.registry.Registry
402 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc)
404 def getCollectionSummary(self, collection: str) -> CollectionSummary:
405 # Docstring inherited from lsst.daf.butler.registry.Registry
406 record = self._managers.collections.find(collection)
407 return self._managers.datasets.getCollectionSummary(record)
409 def registerDatasetType(self, datasetType: DatasetType) -> bool:
410 # Docstring inherited from lsst.daf.butler.registry.Registry
411 _, inserted = self._managers.datasets.register(datasetType)
412 return inserted
414 def removeDatasetType(self, name: str | tuple[str, ...]) -> None:
415 # Docstring inherited from lsst.daf.butler.registry.Registry
417 for datasetTypeExpression in ensure_iterable(name):
418 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression))
419 if not datasetTypes:
420 _LOG.info("Dataset type %r not defined", datasetTypeExpression)
421 else:
422 for datasetType in datasetTypes:
423 self._managers.datasets.remove(datasetType.name)
424 _LOG.info("Removed dataset type %r", datasetType.name)
426 def getDatasetType(self, name: str) -> DatasetType:
427 # Docstring inherited from lsst.daf.butler.registry.Registry
428 parent_name, component = DatasetType.splitDatasetTypeName(name)
429 storage = self._managers.datasets[parent_name]
430 if component is None:
431 return storage.datasetType
432 else:
433 return storage.datasetType.makeComponentDatasetType(component)
435 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool:
436 # Docstring inherited from lsst.daf.butler.registry.Registry
437 return self._managers.datasets.supportsIdGenerationMode(mode)
439 def findDataset(
440 self,
441 datasetType: DatasetType | str,
442 dataId: DataId | None = None,
443 *,
444 collections: CollectionArgType | None = None,
445 timespan: Timespan | None = None,
446 **kwargs: Any,
447 ) -> DatasetRef | None:
448 # Docstring inherited from lsst.daf.butler.registry.Registry
449 if collections is None:
450 if not self.defaults.collections:
451 raise NoDefaultCollectionError(
452 "No collections provided to findDataset, and no defaults from registry construction."
453 )
454 collections = self.defaults.collections
455 backend = queries.SqlQueryBackend(self._db, self._managers)
456 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True)
457 if collection_wildcard.empty():
458 return None
459 matched_collections = backend.resolve_collection_wildcard(collection_wildcard)
460 parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard(
461 datasetType, components_deprecated=False
462 )
463 if len(components) > 1:
464 raise DatasetTypeError(
465 f"findDataset requires exactly one dataset type; got multiple components {components} "
466 f"for parent dataset type {parent_dataset_type.name}."
467 )
468 component = components[0]
469 dataId = DataCoordinate.standardize(
470 dataId,
471 graph=parent_dataset_type.dimensions,
472 universe=self.dimensions,
473 defaults=self.defaults.dataId,
474 **kwargs,
475 )
476 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.graph.governors.names}
477 (filtered_collections,) = backend.filter_dataset_collections(
478 [parent_dataset_type],
479 matched_collections,
480 governor_constraints=governor_constraints,
481 ).values()
482 if not filtered_collections:
483 return None
484 if timespan is None:
485 filtered_collections = [
486 collection_record
487 for collection_record in filtered_collections
488 if collection_record.type is not CollectionType.CALIBRATION
489 ]
490 if filtered_collections:
491 requested_columns = {"dataset_id", "run", "collection"}
492 with backend.context() as context:
493 predicate = context.make_data_coordinate_predicate(
494 dataId.subset(parent_dataset_type.dimensions), full=False
495 )
496 if timespan is not None:
497 requested_columns.add("timespan")
498 predicate = predicate.logical_and(
499 context.make_timespan_overlap_predicate(
500 DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan
501 )
502 )
503 relation = backend.make_dataset_query_relation(
504 parent_dataset_type, filtered_collections, requested_columns, context
505 ).with_rows_satisfying(predicate)
506 rows = list(context.fetch_iterable(relation))
507 else:
508 rows = []
509 if not rows:
510 return None
511 elif len(rows) == 1:
512 best_row = rows[0]
513 else:
514 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)}
515 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
516 row_iter = iter(rows)
517 best_row = next(row_iter)
518 best_rank = rank_by_collection_key[best_row[collection_tag]]
519 have_tie = False
520 for row in row_iter:
521 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank:
522 best_row = row
523 best_rank = rank
524 have_tie = False
525 elif rank == best_rank:
526 have_tie = True
527 assert timespan is not None, "Rank ties should be impossible given DB constraints."
528 if have_tie:
529 raise LookupError(
530 f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections "
531 f"{collection_wildcard.strings} with timespan {timespan}."
532 )
533 reader = queries.DatasetRefReader(
534 parent_dataset_type,
535 translate_collection=lambda k: self._managers.collections[k].name,
536 )
537 ref = reader.read(best_row, data_id=dataId)
538 if component is not None:
539 ref = ref.makeComponentRef(component)
540 return ref
542 @transactional
543 def insertDatasets(
544 self,
545 datasetType: DatasetType | str,
546 dataIds: Iterable[DataId],
547 run: str | None = None,
548 expand: bool = True,
549 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
550 ) -> list[DatasetRef]:
551 # Docstring inherited from lsst.daf.butler.registry.Registry
552 if isinstance(datasetType, DatasetType):
553 storage = self._managers.datasets.find(datasetType.name)
554 if storage is None:
555 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
556 else:
557 storage = self._managers.datasets.find(datasetType)
558 if storage is None:
559 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.")
560 if run is None:
561 if self.defaults.run is None:
562 raise NoDefaultCollectionError(
563 "No run provided to insertDatasets, and no default from registry construction."
564 )
565 run = self.defaults.run
566 runRecord = self._managers.collections.find(run)
567 if runRecord.type is not CollectionType.RUN:
568 raise CollectionTypeError(
569 f"Given collection is of type {runRecord.type.name}; RUN collection required."
570 )
571 assert isinstance(runRecord, RunRecord)
572 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
573 if expand:
574 expandedDataIds = [
575 self.expandDataId(dataId, graph=storage.datasetType.dimensions)
576 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs")
577 ]
578 else:
579 expandedDataIds = [
580 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds
581 ]
582 try:
583 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode))
584 if self._managers.obscore:
585 context = queries.SqlQueryContext(self._db, self._managers.column_types)
586 self._managers.obscore.add_datasets(refs, context)
587 except sqlalchemy.exc.IntegrityError as err:
588 raise ConflictingDefinitionError(
589 "A database constraint failure was triggered by inserting "
590 f"one or more datasets of type {storage.datasetType} into "
591 f"collection '{run}'. "
592 "This probably means a dataset with the same data ID "
593 "and dataset type already exists, but it may also mean a "
594 "dimension row is missing."
595 ) from err
596 return refs
598 @transactional
599 def _importDatasets(
600 self,
601 datasets: Iterable[DatasetRef],
602 expand: bool = True,
603 ) -> list[DatasetRef]:
604 # Docstring inherited from lsst.daf.butler.registry.Registry
605 datasets = list(datasets)
606 if not datasets:
607 # nothing to do
608 return []
610 # find dataset type
611 datasetTypes = {dataset.datasetType for dataset in datasets}
612 if len(datasetTypes) != 1:
613 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}")
614 datasetType = datasetTypes.pop()
616 # get storage handler for this dataset type
617 storage = self._managers.datasets.find(datasetType.name)
618 if storage is None:
619 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
621 # find run name
622 runs = {dataset.run for dataset in datasets}
623 if len(runs) != 1:
624 raise ValueError(f"Multiple run names in input datasets: {runs}")
625 run = runs.pop()
627 runRecord = self._managers.collections.find(run)
628 if runRecord.type is not CollectionType.RUN:
629 raise CollectionTypeError(
630 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};"
631 " RUN collection required."
632 )
633 assert isinstance(runRecord, RunRecord)
635 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
636 if expand:
637 expandedDatasets = [
638 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions))
639 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs")
640 ]
641 else:
642 expandedDatasets = [
643 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True)
644 for dataset in datasets
645 ]
647 try:
648 refs = list(storage.import_(runRecord, expandedDatasets))
649 if self._managers.obscore:
650 context = queries.SqlQueryContext(self._db, self._managers.column_types)
651 self._managers.obscore.add_datasets(refs, context)
652 except sqlalchemy.exc.IntegrityError as err:
653 raise ConflictingDefinitionError(
654 "A database constraint failure was triggered by inserting "
655 f"one or more datasets of type {storage.datasetType} into "
656 f"collection '{run}'. "
657 "This probably means a dataset with the same data ID "
658 "and dataset type already exists, but it may also mean a "
659 "dimension row is missing."
660 ) from err
661 # Check that imported dataset IDs match the input
662 for imported_ref, input_ref in zip(refs, datasets):
663 if imported_ref.id != input_ref.id:
664 raise RegistryConsistencyError(
665 "Imported dataset ID differs from input dataset ID, "
666 f"input ref: {input_ref}, imported ref: {imported_ref}"
667 )
668 return refs
670 def getDataset(self, id: DatasetId) -> DatasetRef | None:
671 # Docstring inherited from lsst.daf.butler.registry.Registry
672 return self._managers.datasets.getDatasetRef(id)
674 @transactional
675 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
676 # Docstring inherited from lsst.daf.butler.registry.Registry
677 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG)
678 for datasetType, refsForType in progress.iter_item_chunks(
679 DatasetRef.groupByType(refs).items(), desc="Removing datasets by type"
680 ):
681 storage = self._managers.datasets[datasetType.name]
682 try:
683 storage.delete(refsForType)
684 except sqlalchemy.exc.IntegrityError as err:
685 raise OrphanedRecordError(
686 "One or more datasets is still present in one or more Datastores."
687 ) from err
689 @transactional
690 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
691 # Docstring inherited from lsst.daf.butler.registry.Registry
692 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG)
693 collectionRecord = self._managers.collections.find(collection)
694 if collectionRecord.type is not CollectionType.TAGGED:
695 raise CollectionTypeError(
696 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED."
697 )
698 for datasetType, refsForType in progress.iter_item_chunks(
699 DatasetRef.groupByType(refs).items(), desc="Associating datasets by type"
700 ):
701 storage = self._managers.datasets[datasetType.name]
702 try:
703 storage.associate(collectionRecord, refsForType)
704 if self._managers.obscore:
705 # If a TAGGED collection is being monitored by ObsCore
706 # manager then we may need to save the dataset.
707 context = queries.SqlQueryContext(self._db, self._managers.column_types)
708 self._managers.obscore.associate(refsForType, collectionRecord, context)
709 except sqlalchemy.exc.IntegrityError as err:
710 raise ConflictingDefinitionError(
711 f"Constraint violation while associating dataset of type {datasetType.name} with "
712 f"collection {collection}. This probably means that one or more datasets with the same "
713 "dataset type and data ID already exist in the collection, but it may also indicate "
714 "that the datasets do not exist."
715 ) from err
717 @transactional
718 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
719 # Docstring inherited from lsst.daf.butler.registry.Registry
720 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG)
721 collectionRecord = self._managers.collections.find(collection)
722 if collectionRecord.type is not CollectionType.TAGGED:
723 raise CollectionTypeError(
724 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED."
725 )
726 for datasetType, refsForType in progress.iter_item_chunks(
727 DatasetRef.groupByType(refs).items(), desc="Disassociating datasets by type"
728 ):
729 storage = self._managers.datasets[datasetType.name]
730 storage.disassociate(collectionRecord, refsForType)
731 if self._managers.obscore:
732 self._managers.obscore.disassociate(refsForType, collectionRecord)
734 @transactional
735 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None:
736 # Docstring inherited from lsst.daf.butler.registry.Registry
737 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG)
738 collectionRecord = self._managers.collections.find(collection)
739 for datasetType, refsForType in progress.iter_item_chunks(
740 DatasetRef.groupByType(refs).items(), desc="Certifying datasets by type"
741 ):
742 storage = self._managers.datasets[datasetType.name]
743 storage.certify(
744 collectionRecord,
745 refsForType,
746 timespan,
747 context=queries.SqlQueryContext(self._db, self._managers.column_types),
748 )
750 @transactional
751 def decertify(
752 self,
753 collection: str,
754 datasetType: str | DatasetType,
755 timespan: Timespan,
756 *,
757 dataIds: Iterable[DataId] | None = None,
758 ) -> None:
759 # Docstring inherited from lsst.daf.butler.registry.Registry
760 collectionRecord = self._managers.collections.find(collection)
761 if isinstance(datasetType, str):
762 storage = self._managers.datasets[datasetType]
763 else:
764 storage = self._managers.datasets[datasetType.name]
765 standardizedDataIds = None
766 if dataIds is not None:
767 standardizedDataIds = [
768 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds
769 ]
770 storage.decertify(
771 collectionRecord,
772 timespan,
773 dataIds=standardizedDataIds,
774 context=queries.SqlQueryContext(self._db, self._managers.column_types),
775 )
777 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
778 """Return an object that allows a new `Datastore` instance to
779 communicate with this `Registry`.
781 Returns
782 -------
783 manager : `DatastoreRegistryBridgeManager`
784 Object that mediates communication between this `Registry` and its
785 associated datastores.
786 """
787 return self._managers.datastores
789 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
790 # Docstring inherited from lsst.daf.butler.registry.Registry
791 return self._managers.datastores.findDatastores(ref)
793 def expandDataId(
794 self,
795 dataId: DataId | None = None,
796 *,
797 graph: DimensionGraph | None = None,
798 records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None,
799 withDefaults: bool = True,
800 **kwargs: Any,
801 ) -> DataCoordinate:
802 # Docstring inherited from lsst.daf.butler.registry.Registry
803 if not withDefaults:
804 defaults = None
805 else:
806 defaults = self.defaults.dataId
807 try:
808 standardized = DataCoordinate.standardize(
809 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs
810 )
811 except KeyError as exc:
812 # This means either kwargs have some odd name or required
813 # dimension is missing.
814 raise DimensionNameError(str(exc)) from exc
815 if standardized.hasRecords():
816 return standardized
817 if records is None:
818 records = {}
819 elif isinstance(records, NamedKeyMapping):
820 records = records.byName()
821 else:
822 records = dict(records)
823 if isinstance(dataId, DataCoordinate) and dataId.hasRecords():
824 records.update(dataId.records.byName())
825 keys = standardized.byName()
826 context = queries.SqlQueryContext(self._db, self._managers.column_types)
827 for element in standardized.graph.primaryKeyTraversalOrder:
828 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL
829 if record is ...:
830 if isinstance(element, Dimension) and keys.get(element.name) is None:
831 if element in standardized.graph.required:
832 raise DimensionNameError(
833 f"No value or null value for required dimension {element.name}."
834 )
835 keys[element.name] = None
836 record = None
837 else:
838 storage = self._managers.dimensions[element]
839 record = storage.fetch_one(DataCoordinate.standardize(keys, graph=element.graph), context)
840 records[element.name] = record
841 if record is not None:
842 for d in element.implied:
843 value = getattr(record, d.name)
844 if keys.setdefault(d.name, value) != value:
845 raise InconsistentDataIdError(
846 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, "
847 f"but {element.name} implies {d.name}={value!r}."
848 )
849 else:
850 if element in standardized.graph.required:
851 raise DataIdValueError(
852 f"Could not fetch record for required dimension {element.name} via keys {keys}."
853 )
854 if element.alwaysJoin:
855 raise InconsistentDataIdError(
856 f"Could not fetch record for element {element.name} via keys {keys}, ",
857 "but it is marked alwaysJoin=True; this means one or more dimensions are not "
858 "related.",
859 )
860 for d in element.implied:
861 keys.setdefault(d.name, None)
862 records.setdefault(d.name, None)
863 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records)
865 def insertDimensionData(
866 self,
867 element: DimensionElement | str,
868 *data: Mapping[str, Any] | DimensionRecord,
869 conform: bool = True,
870 replace: bool = False,
871 skip_existing: bool = False,
872 ) -> None:
873 # Docstring inherited from lsst.daf.butler.registry.Registry
874 if conform:
875 if isinstance(element, str):
876 element = self.dimensions[element]
877 records = [
878 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data
879 ]
880 else:
881 # Ignore typing since caller said to trust them with conform=False.
882 records = data # type: ignore
883 storage = self._managers.dimensions[element]
884 storage.insert(*records, replace=replace, skip_existing=skip_existing)
886 def syncDimensionData(
887 self,
888 element: DimensionElement | str,
889 row: Mapping[str, Any] | DimensionRecord,
890 conform: bool = True,
891 update: bool = False,
892 ) -> bool | dict[str, Any]:
893 # Docstring inherited from lsst.daf.butler.registry.Registry
894 if conform:
895 if isinstance(element, str):
896 element = self.dimensions[element]
897 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row)
898 else:
899 # Ignore typing since caller said to trust them with conform=False.
900 record = row # type: ignore
901 storage = self._managers.dimensions[element]
902 return storage.sync(record, update=update)
904 def queryDatasetTypes(
905 self,
906 expression: Any = ...,
907 *,
908 components: bool | None = None,
909 missing: list[str] | None = None,
910 ) -> Iterable[DatasetType]:
911 # Docstring inherited from lsst.daf.butler.registry.Registry
912 wildcard = DatasetTypeWildcard.from_expression(expression)
913 composition_dict = self._managers.datasets.resolve_wildcard(
914 wildcard,
915 components=components,
916 missing=missing,
917 )
918 result: list[DatasetType] = []
919 for parent_dataset_type, components_for_parent in composition_dict.items():
920 result.extend(
921 parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type
922 for c in components_for_parent
923 )
924 return result
926 def queryCollections(
927 self,
928 expression: Any = ...,
929 datasetType: DatasetType | None = None,
930 collectionTypes: Iterable[CollectionType] | CollectionType = CollectionType.all(),
931 flattenChains: bool = False,
932 includeChains: bool | None = None,
933 ) -> Sequence[str]:
934 # Docstring inherited from lsst.daf.butler.registry.Registry
936 # Right now the datasetTypes argument is completely ignored, but that
937 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up
938 # ticket will take care of that.
939 try:
940 wildcard = CollectionWildcard.from_expression(expression)
941 except TypeError as exc:
942 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc
943 collectionTypes = ensure_iterable(collectionTypes)
944 return [
945 record.name
946 for record in self._managers.collections.resolve_wildcard(
947 wildcard,
948 collection_types=frozenset(collectionTypes),
949 flatten_chains=flattenChains,
950 include_chains=includeChains,
951 )
952 ]
954 def _makeQueryBuilder(
955 self,
956 summary: queries.QuerySummary,
957 doomed_by: Iterable[str] = (),
958 ) -> queries.QueryBuilder:
959 """Return a `QueryBuilder` instance capable of constructing and
960 managing more complex queries than those obtainable via `Registry`
961 interfaces.
963 This is an advanced interface; downstream code should prefer
964 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those
965 are sufficient.
967 Parameters
968 ----------
969 summary : `queries.QuerySummary`
970 Object describing and categorizing the full set of dimensions that
971 will be included in the query.
972 doomed_by : `~collections.abc.Iterable` of `str`, optional
973 A list of diagnostic messages that indicate why the query is going
974 to yield no results and should not even be executed. If an empty
975 container (default) the query will be executed unless other code
976 determines that it is doomed.
978 Returns
979 -------
980 builder : `queries.QueryBuilder`
981 Object that can be used to construct and perform advanced queries.
982 """
983 doomed_by = list(doomed_by)
984 backend = queries.SqlQueryBackend(self._db, self._managers)
985 context = backend.context()
986 relation: Relation | None = None
987 if doomed_by:
988 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by)
989 return queries.QueryBuilder(
990 summary,
991 backend=backend,
992 context=context,
993 relation=relation,
994 )
996 def _standardize_query_data_id_args(
997 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any
998 ) -> DataCoordinate:
999 """Preprocess the data ID arguments passed to query* methods.
1001 Parameters
1002 ----------
1003 data_id : `DataId` or `None`
1004 Data ID that constrains the query results.
1005 doomed_by : `list` [ `str` ]
1006 List to append messages indicating why the query is doomed to
1007 yield no results.
1008 **kwargs
1009 Additional data ID key-value pairs, extending and overriding
1010 ``data_id``.
1012 Returns
1013 -------
1014 data_id : `DataCoordinate`
1015 Standardized data ID. Will be fully expanded unless expansion
1016 fails, in which case a message will be appended to ``doomed_by``
1017 on return.
1018 """
1019 try:
1020 return self.expandDataId(data_id, **kwargs)
1021 except DataIdValueError as err:
1022 doomed_by.append(str(err))
1023 return DataCoordinate.standardize(
1024 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId
1025 )
1027 def _standardize_query_dataset_args(
1028 self,
1029 datasets: Any,
1030 collections: CollectionArgType | None,
1031 components: bool | None,
1032 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain",
1033 *,
1034 doomed_by: list[str],
1035 ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]:
1036 """Preprocess dataset arguments passed to query* methods.
1038 Parameters
1039 ----------
1040 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these
1041 Expression identifying dataset types. See `queryDatasetTypes` for
1042 details.
1043 collections : `str`, `re.Pattern`, or iterable of these
1044 Expression identifying collections to be searched. See
1045 `queryCollections` for details.
1046 components : `bool`, optional
1047 If `True`, apply all expression patterns to component dataset type
1048 names as well. If `False`, never apply patterns to components.
1049 If `None` (default), apply patterns to components only if their
1050 parent datasets were not matched by the expression.
1051 Fully-specified component datasets (`str` or `DatasetType`
1052 instances) are always included.
1054 Values other than `False` are deprecated, and only `False` will be
1055 supported after v26. After v27 this argument will be removed
1056 entirely.
1057 mode : `str`, optional
1058 The way in which datasets are being used in this query; one of:
1060 - "find_first": this is a query for the first dataset in an
1061 ordered list of collections. Prohibits collection wildcards,
1062 but permits dataset type wildcards.
1064 - "find_all": this is a query for all datasets in all matched
1065 collections. Permits collection and dataset type wildcards.
1067 - "constrain": this is a query for something other than datasets,
1068 with results constrained by dataset existence. Permits
1069 collection wildcards and prohibits ``...`` as a dataset type
1070 wildcard.
1071 doomed_by : `list` [ `str` ]
1072 List to append messages indicating why the query is doomed to
1073 yield no results.
1075 Returns
1076 -------
1077 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ]
1078 Dictionary mapping parent dataset type to `list` of components
1079 matched for that dataset type (or `None` for the parent itself).
1080 collections : `CollectionWildcard`
1081 Processed collection expression.
1082 """
1083 composition: dict[DatasetType, list[str | None]] = {}
1084 collection_wildcard: CollectionWildcard | None = None
1085 if datasets is not None:
1086 if collections is None:
1087 if not self.defaults.collections:
1088 raise NoDefaultCollectionError("No collections, and no registry default collections.")
1089 collection_wildcard = CollectionWildcard.from_expression(self.defaults.collections)
1090 else:
1091 collection_wildcard = CollectionWildcard.from_expression(collections)
1092 if mode == "find_first" and collection_wildcard.patterns:
1093 raise TypeError(
1094 f"Collection pattern(s) {collection_wildcard.patterns} not allowed in this context."
1095 )
1096 missing: list[str] = []
1097 composition = self._managers.datasets.resolve_wildcard(
1098 datasets, components=components, missing=missing, explicit_only=(mode == "constrain")
1099 )
1100 if missing and mode == "constrain":
1101 # After v26 this should raise MissingDatasetTypeError, to be
1102 # implemented on DM-36303.
1103 warnings.warn(
1104 f"Dataset type(s) {missing} are not registered; this will be an error after v26.",
1105 FutureWarning,
1106 )
1107 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing)
1108 elif collections:
1109 # I think this check should actually be `collections is not None`,
1110 # but it looks like some CLI scripts use empty tuple as default.
1111 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.")
1112 return composition, collection_wildcard
1114 def queryDatasets(
1115 self,
1116 datasetType: Any,
1117 *,
1118 collections: CollectionArgType | None = None,
1119 dimensions: Iterable[Dimension | str] | None = None,
1120 dataId: DataId | None = None,
1121 where: str = "",
1122 findFirst: bool = False,
1123 components: bool | None = None,
1124 bind: Mapping[str, Any] | None = None,
1125 check: bool = True,
1126 **kwargs: Any,
1127 ) -> queries.DatasetQueryResults:
1128 # Docstring inherited from lsst.daf.butler.registry.Registry
1129 doomed_by: list[str] = []
1130 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1131 dataset_composition, collection_wildcard = self._standardize_query_dataset_args(
1132 datasetType,
1133 collections,
1134 components,
1135 mode="find_first" if findFirst else "find_all",
1136 doomed_by=doomed_by,
1137 )
1138 if collection_wildcard is not None and collection_wildcard.empty():
1139 doomed_by.append("No datasets can be found because collection list is empty.")
1140 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by)
1141 parent_results: list[queries.ParentDatasetQueryResults] = []
1142 for parent_dataset_type, components_for_parent in dataset_composition.items():
1143 # The full set of dimensions in the query is the combination of
1144 # those needed for the DatasetType and those explicitly requested,
1145 # if any.
1146 dimension_names = set(parent_dataset_type.dimensions.names)
1147 if dimensions is not None:
1148 dimension_names.update(self.dimensions.extract(dimensions).names)
1149 # Construct the summary structure needed to construct a
1150 # QueryBuilder.
1151 summary = queries.QuerySummary(
1152 requested=DimensionGraph(self.dimensions, names=dimension_names),
1153 column_types=self._managers.column_types,
1154 data_id=data_id,
1155 expression=where,
1156 bind=bind,
1157 defaults=self.defaults.dataId,
1158 check=check,
1159 datasets=[parent_dataset_type],
1160 )
1161 builder = self._makeQueryBuilder(summary)
1162 # Add the dataset subquery to the query, telling the QueryBuilder
1163 # to include the rank of the selected collection in the results
1164 # only if we need to findFirst. Note that if any of the
1165 # collections are actually wildcard expressions, and
1166 # findFirst=True, this will raise TypeError for us.
1167 builder.joinDataset(parent_dataset_type, collection_wildcard, isResult=True, findFirst=findFirst)
1168 query = builder.finish()
1169 parent_results.append(
1170 queries.ParentDatasetQueryResults(
1171 query, parent_dataset_type, components=components_for_parent
1172 )
1173 )
1174 if not parent_results:
1175 doomed_by.extend(
1176 f"No registered dataset type matching {t!r} found, so no matching datasets can "
1177 "exist in any collection."
1178 for t in ensure_iterable(datasetType)
1179 )
1180 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by)
1181 elif len(parent_results) == 1:
1182 return parent_results[0]
1183 else:
1184 return queries.ChainedDatasetQueryResults(parent_results)
1186 def queryDataIds(
1187 self,
1188 dimensions: Iterable[Dimension | str] | Dimension | str,
1189 *,
1190 dataId: DataId | None = None,
1191 datasets: Any = None,
1192 collections: CollectionArgType | None = None,
1193 where: str = "",
1194 components: bool | None = None,
1195 bind: Mapping[str, Any] | None = None,
1196 check: bool = True,
1197 **kwargs: Any,
1198 ) -> queries.DataCoordinateQueryResults:
1199 # Docstring inherited from lsst.daf.butler.registry.Registry
1200 dimensions = ensure_iterable(dimensions)
1201 requestedDimensions = self.dimensions.extract(dimensions)
1202 doomed_by: list[str] = []
1203 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1204 dataset_composition, collection_wildcard = self._standardize_query_dataset_args(
1205 datasets, collections, components, doomed_by=doomed_by
1206 )
1207 if collection_wildcard is not None and collection_wildcard.empty():
1208 doomed_by.append("No data coordinates can be found because collection list is empty.")
1209 summary = queries.QuerySummary(
1210 requested=requestedDimensions,
1211 column_types=self._managers.column_types,
1212 data_id=data_id,
1213 expression=where,
1214 bind=bind,
1215 defaults=self.defaults.dataId,
1216 check=check,
1217 datasets=dataset_composition.keys(),
1218 )
1219 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
1220 for datasetType in dataset_composition.keys():
1221 builder.joinDataset(datasetType, collection_wildcard, isResult=False)
1222 query = builder.finish()
1224 return queries.DataCoordinateQueryResults(query)
1226 def queryDimensionRecords(
1227 self,
1228 element: DimensionElement | str,
1229 *,
1230 dataId: DataId | None = None,
1231 datasets: Any = None,
1232 collections: CollectionArgType | None = None,
1233 where: str = "",
1234 components: bool | None = None,
1235 bind: Mapping[str, Any] | None = None,
1236 check: bool = True,
1237 **kwargs: Any,
1238 ) -> queries.DimensionRecordQueryResults:
1239 # Docstring inherited from lsst.daf.butler.registry.Registry
1240 if not isinstance(element, DimensionElement):
1241 try:
1242 element = self.dimensions[element]
1243 except KeyError as e:
1244 raise DimensionNameError(
1245 f"No such dimension '{element}', available dimensions: "
1246 + str(self.dimensions.getStaticElements())
1247 ) from e
1248 doomed_by: list[str] = []
1249 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1250 dataset_composition, collection_wildcard = self._standardize_query_dataset_args(
1251 datasets, collections, components, doomed_by=doomed_by
1252 )
1253 if collection_wildcard is not None and collection_wildcard.empty():
1254 doomed_by.append("No dimension records can be found because collection list is empty.")
1255 summary = queries.QuerySummary(
1256 requested=element.graph,
1257 column_types=self._managers.column_types,
1258 data_id=data_id,
1259 expression=where,
1260 bind=bind,
1261 defaults=self.defaults.dataId,
1262 check=check,
1263 datasets=dataset_composition.keys(),
1264 )
1265 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
1266 for datasetType in dataset_composition.keys():
1267 builder.joinDataset(datasetType, collection_wildcard, isResult=False)
1268 query = builder.finish().with_record_columns(element)
1269 return queries.DatabaseDimensionRecordQueryResults(query, element)
1271 def queryDatasetAssociations(
1272 self,
1273 datasetType: str | DatasetType,
1274 collections: CollectionArgType | None = ...,
1275 *,
1276 collectionTypes: Iterable[CollectionType] = CollectionType.all(),
1277 flattenChains: bool = False,
1278 ) -> Iterator[DatasetAssociation]:
1279 # Docstring inherited from lsst.daf.butler.registry.Registry
1280 if collections is None:
1281 if not self.defaults.collections:
1282 raise NoDefaultCollectionError(
1283 "No collections provided to queryDatasetAssociations, "
1284 "and no defaults from registry construction."
1285 )
1286 collections = self.defaults.collections
1287 collection_wildcard = CollectionWildcard.from_expression(collections)
1288 backend = queries.SqlQueryBackend(self._db, self._managers)
1289 parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False)
1290 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan")
1291 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
1292 for parent_collection_record in backend.resolve_collection_wildcard(
1293 collection_wildcard,
1294 collection_types=frozenset(collectionTypes),
1295 flatten_chains=flattenChains,
1296 ):
1297 # Resolve this possibly-chained collection into a list of
1298 # non-CHAINED collections that actually hold datasets of this
1299 # type.
1300 candidate_collection_records = backend.resolve_dataset_collections(
1301 parent_dataset_type,
1302 CollectionWildcard.from_names([parent_collection_record.name]),
1303 allow_calibration_collections=True,
1304 governor_constraints={},
1305 )
1306 if not candidate_collection_records:
1307 continue
1308 with backend.context() as context:
1309 relation = backend.make_dataset_query_relation(
1310 parent_dataset_type,
1311 candidate_collection_records,
1312 columns={"dataset_id", "run", "timespan", "collection"},
1313 context=context,
1314 )
1315 reader = queries.DatasetRefReader(
1316 parent_dataset_type,
1317 translate_collection=lambda k: self._managers.collections[k].name,
1318 full=False,
1319 )
1320 for row in context.fetch_iterable(relation):
1321 ref = reader.read(row)
1322 collection_record = self._managers.collections[row[collection_tag]]
1323 if collection_record.type is CollectionType.CALIBRATION:
1324 timespan = row[timespan_tag]
1325 else:
1326 # For backwards compatibility and (possibly?) user
1327 # convenience we continue to define the timespan of a
1328 # DatasetAssociation row for a non-CALIBRATION
1329 # collection to be None rather than a fully unbounded
1330 # timespan.
1331 timespan = None
1332 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan)
1334 @property
1335 def obsCoreTableManager(self) -> ObsCoreTableManager | None:
1336 # Docstring inherited from lsst.daf.butler.registry.Registry
1337 return self._managers.obscore
1339 storageClasses: StorageClassFactory
1340 """All storage classes known to the registry (`StorageClassFactory`).
1341 """