Coverage for python/lsst/daf/butler/registries/sql.py: 12%
503 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-04 02:04 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-04 02:04 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("SqlRegistry",)
26import contextlib
27import logging
28import warnings
29from typing import (
30 TYPE_CHECKING,
31 Any,
32 Dict,
33 Iterable,
34 Iterator,
35 List,
36 Literal,
37 Mapping,
38 Optional,
39 Sequence,
40 Set,
41 Union,
42 cast,
43)
45import sqlalchemy
46from lsst.daf.relation import LeafRelation, Relation
47from lsst.resources import ResourcePathExpression
48from lsst.utils.iteration import ensure_iterable
50from ..core import (
51 Config,
52 DataCoordinate,
53 DataId,
54 DatasetAssociation,
55 DatasetColumnTag,
56 DatasetId,
57 DatasetRef,
58 DatasetType,
59 Dimension,
60 DimensionConfig,
61 DimensionElement,
62 DimensionGraph,
63 DimensionRecord,
64 DimensionUniverse,
65 NamedKeyMapping,
66 NameLookupMapping,
67 Progress,
68 StorageClassFactory,
69 Timespan,
70 ddl,
71)
72from ..core.utils import transactional
73from ..registry import (
74 ArgumentError,
75 CollectionExpressionError,
76 CollectionSummary,
77 CollectionType,
78 CollectionTypeError,
79 ConflictingDefinitionError,
80 DataIdValueError,
81 DatasetTypeError,
82 DimensionNameError,
83 InconsistentDataIdError,
84 NoDefaultCollectionError,
85 OrphanedRecordError,
86 Registry,
87 RegistryConfig,
88 RegistryDefaults,
89 queries,
90)
91from ..registry.interfaces import ChainedCollectionRecord, DatasetIdFactory, DatasetIdGenEnum, RunRecord
92from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes
93from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard
95if TYPE_CHECKING: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true
96 from .._butlerConfig import ButlerConfig
97 from ..registry.interfaces import CollectionRecord, Database, DatastoreRegistryBridgeManager
100_LOG = logging.getLogger(__name__)
103class SqlRegistry(Registry):
104 """Registry implementation based on SQLAlchemy.
106 Parameters
107 ----------
108 database : `Database`
109 Database instance to store Registry.
110 defaults : `RegistryDefaults`
111 Default collection search path and/or output `~CollectionType.RUN`
112 collection.
113 managers : `RegistryManagerInstances`
114 All the managers required for this registry.
115 """
117 defaultConfigFile: Optional[str] = None
118 """Path to configuration defaults. Accessed within the ``configs`` resource
119 or relative to a search path. Can be None if no defaults specified.
120 """
122 @classmethod
123 def createFromConfig(
124 cls,
125 config: Optional[Union[RegistryConfig, str]] = None,
126 dimensionConfig: Optional[Union[DimensionConfig, str]] = None,
127 butlerRoot: Optional[ResourcePathExpression] = None,
128 ) -> Registry:
129 """Create registry database and return `SqlRegistry` instance.
131 This method initializes database contents, database must be empty
132 prior to calling this method.
134 Parameters
135 ----------
136 config : `RegistryConfig` or `str`, optional
137 Registry configuration, if missing then default configuration will
138 be loaded from registry.yaml.
139 dimensionConfig : `DimensionConfig` or `str`, optional
140 Dimensions configuration, if missing then default configuration
141 will be loaded from dimensions.yaml.
142 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
143 Path to the repository root this `SqlRegistry` will manage.
145 Returns
146 -------
147 registry : `SqlRegistry`
148 A new `SqlRegistry` instance.
149 """
150 config = cls.forceRegistryConfig(config)
151 config.replaceRoot(butlerRoot)
153 if isinstance(dimensionConfig, str):
154 dimensionConfig = DimensionConfig(dimensionConfig)
155 elif dimensionConfig is None:
156 dimensionConfig = DimensionConfig()
157 elif not isinstance(dimensionConfig, DimensionConfig):
158 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}")
160 DatabaseClass = config.getDatabaseClass()
161 database = DatabaseClass.fromUri(
162 str(config.connectionString), origin=config.get("origin", 0), namespace=config.get("namespace")
163 )
164 managerTypes = RegistryManagerTypes.fromConfig(config)
165 managers = managerTypes.makeRepo(database, dimensionConfig)
166 return cls(database, RegistryDefaults(), managers)
168 @classmethod
169 def fromConfig(
170 cls,
171 config: Union[ButlerConfig, RegistryConfig, Config, str],
172 butlerRoot: Optional[ResourcePathExpression] = None,
173 writeable: bool = True,
174 defaults: Optional[RegistryDefaults] = None,
175 ) -> Registry:
176 """Create `Registry` subclass instance from `config`.
178 Registry database must be initialized prior to calling this method.
180 Parameters
181 ----------
182 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
183 Registry configuration
184 butlerRoot : `lsst.resources.ResourcePathExpression`, optional
185 Path to the repository root this `Registry` will manage.
186 writeable : `bool`, optional
187 If `True` (default) create a read-write connection to the database.
188 defaults : `RegistryDefaults`, optional
189 Default collection search path and/or output `~CollectionType.RUN`
190 collection.
192 Returns
193 -------
194 registry : `SqlRegistry` (subclass)
195 A new `SqlRegistry` subclass instance.
196 """
197 config = cls.forceRegistryConfig(config)
198 config.replaceRoot(butlerRoot)
199 DatabaseClass = config.getDatabaseClass()
200 database = DatabaseClass.fromUri(
201 config.connectionString.render_as_string(hide_password=False),
202 origin=config.get("origin", 0),
203 namespace=config.get("namespace"),
204 writeable=writeable,
205 )
206 managerTypes = RegistryManagerTypes.fromConfig(config)
207 with database.session():
208 managers = managerTypes.loadRepo(database)
209 if defaults is None:
210 defaults = RegistryDefaults()
211 return cls(database, defaults, managers)
213 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances):
214 self._db = database
215 self._managers = managers
216 self.storageClasses = StorageClassFactory()
217 # Intentionally invoke property setter to initialize defaults. This
218 # can only be done after most of the rest of Registry has already been
219 # initialized, and must be done before the property getter is used.
220 self.defaults = defaults
221 # In the future DatasetIdFactory may become configurable and this
222 # instance will need to be shared with datasets manager.
223 self.datasetIdFactory = DatasetIdFactory()
225 def __str__(self) -> str:
226 return str(self._db)
228 def __repr__(self) -> str:
229 return f"SqlRegistry({self._db!r}, {self.dimensions!r})"
231 def isWriteable(self) -> bool:
232 # Docstring inherited from lsst.daf.butler.registry.Registry
233 return self._db.isWriteable()
235 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry:
236 # Docstring inherited from lsst.daf.butler.registry.Registry
237 if defaults is None:
238 # No need to copy, because `RegistryDefaults` is immutable; we
239 # effectively copy on write.
240 defaults = self.defaults
241 return type(self)(self._db, defaults, self._managers)
243 @property
244 def dimensions(self) -> DimensionUniverse:
245 # Docstring inherited from lsst.daf.butler.registry.Registry
246 return self._managers.dimensions.universe
248 def refresh(self) -> None:
249 # Docstring inherited from lsst.daf.butler.registry.Registry
250 with self._db.transaction():
251 self._managers.refresh()
253 @contextlib.contextmanager
254 def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
255 # Docstring inherited from lsst.daf.butler.registry.Registry
256 try:
257 with self._db.transaction(savepoint=savepoint):
258 yield
259 except BaseException:
260 # TODO: this clears the caches sometimes when we wouldn't actually
261 # need to. Can we avoid that?
262 self._managers.dimensions.clearCaches()
263 raise
265 def resetConnectionPool(self) -> None:
266 """Reset SQLAlchemy connection pool for `SqlRegistry` database.
268 This operation is useful when using registry with fork-based
269 multiprocessing. To use registry across fork boundary one has to make
270 sure that there are no currently active connections (no session or
271 transaction is in progress) and connection pool is reset using this
272 method. This method should be called by the child process immediately
273 after the fork.
274 """
275 self._db._engine.dispose()
277 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None:
278 """Add an opaque (to the `Registry`) table for use by a `Datastore` or
279 other data repository client.
281 Opaque table records can be added via `insertOpaqueData`, retrieved via
282 `fetchOpaqueData`, and removed via `deleteOpaqueData`.
284 Parameters
285 ----------
286 tableName : `str`
287 Logical name of the opaque table. This may differ from the
288 actual name used in the database by a prefix and/or suffix.
289 spec : `ddl.TableSpec`
290 Specification for the table to be added.
291 """
292 self._managers.opaque.register(tableName, spec)
294 @transactional
295 def insertOpaqueData(self, tableName: str, *data: dict) -> None:
296 """Insert records into an opaque table.
298 Parameters
299 ----------
300 tableName : `str`
301 Logical name of the opaque table. Must match the name used in a
302 previous call to `registerOpaqueTable`.
303 data
304 Each additional positional argument is a dictionary that represents
305 a single row to be added.
306 """
307 self._managers.opaque[tableName].insert(*data)
309 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]:
310 """Retrieve records from an opaque table.
312 Parameters
313 ----------
314 tableName : `str`
315 Logical name of the opaque table. Must match the name used in a
316 previous call to `registerOpaqueTable`.
317 where
318 Additional keyword arguments are interpreted as equality
319 constraints that restrict the returned rows (combined with AND);
320 keyword arguments are column names and values are the values they
321 must have.
323 Yields
324 ------
325 row : `dict`
326 A dictionary representing a single result row.
327 """
328 yield from self._managers.opaque[tableName].fetch(**where)
330 @transactional
331 def deleteOpaqueData(self, tableName: str, **where: Any) -> None:
332 """Remove records from an opaque table.
334 Parameters
335 ----------
336 tableName : `str`
337 Logical name of the opaque table. Must match the name used in a
338 previous call to `registerOpaqueTable`.
339 where
340 Additional keyword arguments are interpreted as equality
341 constraints that restrict the deleted rows (combined with AND);
342 keyword arguments are column names and values are the values they
343 must have.
344 """
345 self._managers.opaque[tableName].delete(where.keys(), where)
347 def registerCollection(
348 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None
349 ) -> bool:
350 # Docstring inherited from lsst.daf.butler.registry.Registry
351 _, registered = self._managers.collections.register(name, type, doc=doc)
352 return registered
354 def getCollectionType(self, name: str) -> CollectionType:
355 # Docstring inherited from lsst.daf.butler.registry.Registry
356 return self._managers.collections.find(name).type
358 def _get_collection_record(self, name: str) -> CollectionRecord:
359 # Docstring inherited from lsst.daf.butler.registry.Registry
360 return self._managers.collections.find(name)
362 def registerRun(self, name: str, doc: Optional[str] = None) -> bool:
363 # Docstring inherited from lsst.daf.butler.registry.Registry
364 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc)
365 return registered
367 @transactional
368 def removeCollection(self, name: str) -> None:
369 # Docstring inherited from lsst.daf.butler.registry.Registry
370 self._managers.collections.remove(name)
372 def getCollectionChain(self, parent: str) -> tuple[str, ...]:
373 # Docstring inherited from lsst.daf.butler.registry.Registry
374 record = self._managers.collections.find(parent)
375 if record.type is not CollectionType.CHAINED:
376 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
377 assert isinstance(record, ChainedCollectionRecord)
378 return record.children
380 @transactional
381 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None:
382 # Docstring inherited from lsst.daf.butler.registry.Registry
383 record = self._managers.collections.find(parent)
384 if record.type is not CollectionType.CHAINED:
385 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
386 assert isinstance(record, ChainedCollectionRecord)
387 children = CollectionWildcard.from_expression(children).require_ordered()
388 if children != record.children or flatten:
389 record.update(self._managers.collections, children, flatten=flatten)
391 def getCollectionParentChains(self, collection: str) -> Set[str]:
392 # Docstring inherited from lsst.daf.butler.registry.Registry
393 return {
394 record.name
395 for record in self._managers.collections.getParentChains(
396 self._managers.collections.find(collection).key
397 )
398 }
400 def getCollectionDocumentation(self, collection: str) -> Optional[str]:
401 # Docstring inherited from lsst.daf.butler.registry.Registry
402 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key)
404 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None:
405 # Docstring inherited from lsst.daf.butler.registry.Registry
406 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc)
408 def getCollectionSummary(self, collection: str) -> CollectionSummary:
409 # Docstring inherited from lsst.daf.butler.registry.Registry
410 record = self._managers.collections.find(collection)
411 return self._managers.datasets.getCollectionSummary(record)
413 def registerDatasetType(self, datasetType: DatasetType) -> bool:
414 # Docstring inherited from lsst.daf.butler.registry.Registry
415 _, inserted = self._managers.datasets.register(datasetType)
416 return inserted
418 def removeDatasetType(self, name: str | tuple[str, ...]) -> None:
419 # Docstring inherited from lsst.daf.butler.registry.Registry
421 for datasetTypeExpression in ensure_iterable(name):
422 datasetTypes = list(self.queryDatasetTypes(datasetTypeExpression))
423 if not datasetTypes:
424 _LOG.info("Dataset type %r not defined", datasetTypeExpression)
425 else:
426 for datasetType in datasetTypes:
427 self._managers.datasets.remove(datasetType.name)
428 _LOG.info("Removed dataset type %r", datasetType.name)
430 def getDatasetType(self, name: str) -> DatasetType:
431 # Docstring inherited from lsst.daf.butler.registry.Registry
432 parent_name, component = DatasetType.splitDatasetTypeName(name)
433 storage = self._managers.datasets[parent_name]
434 if component is None:
435 return storage.datasetType
436 else:
437 return storage.datasetType.makeComponentDatasetType(component)
439 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool:
440 # Docstring inherited from lsst.daf.butler.registry.Registry
441 return self._managers.datasets.supportsIdGenerationMode(mode)
443 def findDataset(
444 self,
445 datasetType: Union[DatasetType, str],
446 dataId: Optional[DataId] = None,
447 *,
448 collections: Any = None,
449 timespan: Optional[Timespan] = None,
450 **kwargs: Any,
451 ) -> Optional[DatasetRef]:
452 # Docstring inherited from lsst.daf.butler.registry.Registry
453 if collections is None:
454 if not self.defaults.collections:
455 raise NoDefaultCollectionError(
456 "No collections provided to findDataset, and no defaults from registry construction."
457 )
458 collections = self.defaults.collections
459 backend = queries.SqlQueryBackend(self._db, self._managers)
460 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True)
461 matched_collections = backend.resolve_collection_wildcard(collection_wildcard)
462 parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard(
463 datasetType, components_deprecated=False
464 )
465 if len(components) > 1:
466 raise DatasetTypeError(
467 f"findDataset requires exactly one dataset type; got multiple components {components} "
468 f"for parent dataset type {parent_dataset_type.name}."
469 )
470 component = components[0]
471 dataId = DataCoordinate.standardize(
472 dataId,
473 graph=parent_dataset_type.dimensions,
474 universe=self.dimensions,
475 defaults=self.defaults.dataId,
476 **kwargs,
477 )
478 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.graph.governors.names}
479 (filtered_collections,) = backend.filter_dataset_collections(
480 [parent_dataset_type],
481 matched_collections,
482 governor_constraints=governor_constraints,
483 ).values()
484 if not filtered_collections:
485 return None
486 if timespan is None:
487 filtered_collections = [
488 collection_record
489 for collection_record in filtered_collections
490 if collection_record.type is not CollectionType.CALIBRATION
491 ]
492 if filtered_collections:
493 requested_columns = {"dataset_id", "run", "collection"}
494 with backend.context() as context:
495 predicate = context.make_data_coordinate_predicate(
496 dataId.subset(parent_dataset_type.dimensions), full=False
497 )
498 if timespan is not None:
499 requested_columns.add("timespan")
500 predicate = predicate.logical_and(
501 context.make_timespan_overlap_predicate(
502 DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan
503 )
504 )
505 relation = backend.make_dataset_query_relation(
506 parent_dataset_type, filtered_collections, requested_columns, context
507 ).with_rows_satisfying(predicate)
508 rows = list(context.fetch_iterable(relation))
509 else:
510 rows = []
511 if not rows:
512 return None
513 elif len(rows) == 1:
514 best_row = rows[0]
515 else:
516 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)}
517 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
518 row_iter = iter(rows)
519 best_row = next(row_iter)
520 best_rank = rank_by_collection_key[best_row[collection_tag]]
521 have_tie = False
522 for row in row_iter:
523 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank:
524 best_row = row
525 best_rank = rank
526 have_tie = False
527 elif rank == best_rank:
528 have_tie = True
529 assert timespan is not None, "Rank ties should be impossible given DB constraints."
530 if have_tie:
531 raise LookupError(
532 f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections "
533 f"{collection_wildcard.strings} with timespan {timespan}."
534 )
535 reader = queries.DatasetRefReader(
536 parent_dataset_type,
537 translate_collection=lambda k: self._managers.collections[k].name,
538 )
539 ref = reader.read(best_row, data_id=dataId)
540 if component is not None:
541 ref = ref.makeComponentRef(component)
542 return ref
544 @transactional
545 def insertDatasets(
546 self,
547 datasetType: Union[DatasetType, str],
548 dataIds: Iterable[DataId],
549 run: Optional[str] = None,
550 expand: bool = True,
551 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
552 ) -> List[DatasetRef]:
553 # Docstring inherited from lsst.daf.butler.registry.Registry
554 if isinstance(datasetType, DatasetType):
555 storage = self._managers.datasets.find(datasetType.name)
556 if storage is None:
557 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
558 else:
559 storage = self._managers.datasets.find(datasetType)
560 if storage is None:
561 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.")
562 if run is None:
563 if self.defaults.run is None:
564 raise NoDefaultCollectionError(
565 "No run provided to insertDatasets, and no default from registry construction."
566 )
567 run = self.defaults.run
568 runRecord = self._managers.collections.find(run)
569 if runRecord.type is not CollectionType.RUN:
570 raise CollectionTypeError(
571 f"Given collection is of type {runRecord.type.name}; RUN collection required."
572 )
573 assert isinstance(runRecord, RunRecord)
574 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
575 if expand:
576 expandedDataIds = [
577 self.expandDataId(dataId, graph=storage.datasetType.dimensions)
578 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs")
579 ]
580 else:
581 expandedDataIds = [
582 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds
583 ]
584 try:
585 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode))
586 if self._managers.obscore:
587 context = queries.SqlQueryContext(self._db, self._managers.column_types)
588 self._managers.obscore.add_datasets(refs, context)
589 except sqlalchemy.exc.IntegrityError as err:
590 raise ConflictingDefinitionError(
591 "A database constraint failure was triggered by inserting "
592 f"one or more datasets of type {storage.datasetType} into "
593 f"collection '{run}'. "
594 "This probably means a dataset with the same data ID "
595 "and dataset type already exists, but it may also mean a "
596 "dimension row is missing."
597 ) from err
598 return refs
600 @transactional
601 def _importDatasets(
602 self,
603 datasets: Iterable[DatasetRef],
604 expand: bool = True,
605 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
606 reuseIds: bool = False,
607 ) -> List[DatasetRef]:
608 # Docstring inherited from lsst.daf.butler.registry.Registry
609 datasets = list(datasets)
610 if not datasets:
611 # nothing to do
612 return []
614 # find dataset type
615 datasetTypes = set(dataset.datasetType for dataset in datasets)
616 if len(datasetTypes) != 1:
617 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}")
618 datasetType = datasetTypes.pop()
620 # get storage handler for this dataset type
621 storage = self._managers.datasets.find(datasetType.name)
622 if storage is None:
623 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
625 # find run name
626 runs = set(dataset.run for dataset in datasets)
627 if len(runs) != 1:
628 raise ValueError(f"Multiple run names in input datasets: {runs}")
629 run = runs.pop()
630 if run is None:
631 if self.defaults.run is None:
632 raise NoDefaultCollectionError(
633 "No run provided to ingestDatasets, and no default from registry construction."
634 )
635 run = self.defaults.run
637 runRecord = self._managers.collections.find(run)
638 if runRecord.type is not CollectionType.RUN:
639 raise CollectionTypeError(
640 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};"
641 " RUN collection required."
642 )
643 assert isinstance(runRecord, RunRecord)
645 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
646 if expand:
647 expandedDatasets = [
648 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions))
649 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs")
650 ]
651 else:
652 expandedDatasets = [
653 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True)
654 for dataset in datasets
655 ]
657 try:
658 refs = list(storage.import_(runRecord, expandedDatasets, idGenerationMode, reuseIds))
659 if self._managers.obscore:
660 context = queries.SqlQueryContext(self._db, self._managers.column_types)
661 self._managers.obscore.add_datasets(refs, context)
662 except sqlalchemy.exc.IntegrityError as err:
663 raise ConflictingDefinitionError(
664 "A database constraint failure was triggered by inserting "
665 f"one or more datasets of type {storage.datasetType} into "
666 f"collection '{run}'. "
667 "This probably means a dataset with the same data ID "
668 "and dataset type already exists, but it may also mean a "
669 "dimension row is missing."
670 ) from err
671 return refs
673 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]:
674 # Docstring inherited from lsst.daf.butler.registry.Registry
675 return self._managers.datasets.getDatasetRef(id)
677 @transactional
678 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
679 # Docstring inherited from lsst.daf.butler.registry.Registry
680 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG)
681 for datasetType, refsForType in progress.iter_item_chunks(
682 DatasetRef.groupByType(refs).items(), desc="Removing datasets by type"
683 ):
684 storage = self._managers.datasets[datasetType.name]
685 try:
686 storage.delete(refsForType)
687 except sqlalchemy.exc.IntegrityError as err:
688 raise OrphanedRecordError(
689 "One or more datasets is still present in one or more Datastores."
690 ) from err
692 @transactional
693 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
694 # Docstring inherited from lsst.daf.butler.registry.Registry
695 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG)
696 collectionRecord = self._managers.collections.find(collection)
697 if collectionRecord.type is not CollectionType.TAGGED:
698 raise CollectionTypeError(
699 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED."
700 )
701 for datasetType, refsForType in progress.iter_item_chunks(
702 DatasetRef.groupByType(refs).items(), desc="Associating datasets by type"
703 ):
704 storage = self._managers.datasets[datasetType.name]
705 try:
706 storage.associate(collectionRecord, refsForType)
707 if self._managers.obscore:
708 # If a TAGGED collection is being monitored by ObsCore
709 # manager then we may need to save the dataset.
710 context = queries.SqlQueryContext(self._db, self._managers.column_types)
711 self._managers.obscore.associate(refsForType, collectionRecord, context)
712 except sqlalchemy.exc.IntegrityError as err:
713 raise ConflictingDefinitionError(
714 f"Constraint violation while associating dataset of type {datasetType.name} with "
715 f"collection {collection}. This probably means that one or more datasets with the same "
716 "dataset type and data ID already exist in the collection, but it may also indicate "
717 "that the datasets do not exist."
718 ) from err
720 @transactional
721 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
722 # Docstring inherited from lsst.daf.butler.registry.Registry
723 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG)
724 collectionRecord = self._managers.collections.find(collection)
725 if collectionRecord.type is not CollectionType.TAGGED:
726 raise CollectionTypeError(
727 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED."
728 )
729 for datasetType, refsForType in progress.iter_item_chunks(
730 DatasetRef.groupByType(refs).items(), desc="Disassociating datasets by type"
731 ):
732 storage = self._managers.datasets[datasetType.name]
733 storage.disassociate(collectionRecord, refsForType)
734 if self._managers.obscore:
735 self._managers.obscore.disassociate(refsForType, collectionRecord)
737 @transactional
738 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None:
739 # Docstring inherited from lsst.daf.butler.registry.Registry
740 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG)
741 collectionRecord = self._managers.collections.find(collection)
742 for datasetType, refsForType in progress.iter_item_chunks(
743 DatasetRef.groupByType(refs).items(), desc="Certifying datasets by type"
744 ):
745 storage = self._managers.datasets[datasetType.name]
746 storage.certify(
747 collectionRecord,
748 refsForType,
749 timespan,
750 context=queries.SqlQueryContext(self._db, self._managers.column_types),
751 )
753 @transactional
754 def decertify(
755 self,
756 collection: str,
757 datasetType: Union[str, DatasetType],
758 timespan: Timespan,
759 *,
760 dataIds: Optional[Iterable[DataId]] = None,
761 ) -> None:
762 # Docstring inherited from lsst.daf.butler.registry.Registry
763 collectionRecord = self._managers.collections.find(collection)
764 if isinstance(datasetType, str):
765 storage = self._managers.datasets[datasetType]
766 else:
767 storage = self._managers.datasets[datasetType.name]
768 standardizedDataIds = None
769 if dataIds is not None:
770 standardizedDataIds = [
771 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds
772 ]
773 storage.decertify(
774 collectionRecord,
775 timespan,
776 dataIds=standardizedDataIds,
777 context=queries.SqlQueryContext(self._db, self._managers.column_types),
778 )
780 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
781 """Return an object that allows a new `Datastore` instance to
782 communicate with this `Registry`.
784 Returns
785 -------
786 manager : `DatastoreRegistryBridgeManager`
787 Object that mediates communication between this `Registry` and its
788 associated datastores.
789 """
790 return self._managers.datastores
792 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
793 # Docstring inherited from lsst.daf.butler.registry.Registry
794 return self._managers.datastores.findDatastores(ref)
796 def expandDataId(
797 self,
798 dataId: Optional[DataId] = None,
799 *,
800 graph: Optional[DimensionGraph] = None,
801 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None,
802 withDefaults: bool = True,
803 **kwargs: Any,
804 ) -> DataCoordinate:
805 # Docstring inherited from lsst.daf.butler.registry.Registry
806 if not withDefaults:
807 defaults = None
808 else:
809 defaults = self.defaults.dataId
810 try:
811 standardized = DataCoordinate.standardize(
812 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs
813 )
814 except KeyError as exc:
815 # This means either kwargs have some odd name or required
816 # dimension is missing.
817 raise DimensionNameError(str(exc)) from exc
818 if standardized.hasRecords():
819 return standardized
820 if records is None:
821 records = {}
822 elif isinstance(records, NamedKeyMapping):
823 records = records.byName()
824 else:
825 records = dict(records)
826 if isinstance(dataId, DataCoordinate) and dataId.hasRecords():
827 records.update(dataId.records.byName())
828 keys = standardized.byName()
829 context = queries.SqlQueryContext(self._db, self._managers.column_types)
830 for element in standardized.graph.primaryKeyTraversalOrder:
831 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL
832 if record is ...:
833 if isinstance(element, Dimension) and keys.get(element.name) is None:
834 if element in standardized.graph.required:
835 raise DimensionNameError(
836 f"No value or null value for required dimension {element.name}."
837 )
838 keys[element.name] = None
839 record = None
840 else:
841 storage = self._managers.dimensions[element]
842 record = storage.fetch_one(DataCoordinate.standardize(keys, graph=element.graph), context)
843 records[element.name] = record
844 if record is not None:
845 for d in element.implied:
846 value = getattr(record, d.name)
847 if keys.setdefault(d.name, value) != value:
848 raise InconsistentDataIdError(
849 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, "
850 f"but {element.name} implies {d.name}={value!r}."
851 )
852 else:
853 if element in standardized.graph.required:
854 raise DataIdValueError(
855 f"Could not fetch record for required dimension {element.name} via keys {keys}."
856 )
857 if element.alwaysJoin:
858 raise InconsistentDataIdError(
859 f"Could not fetch record for element {element.name} via keys {keys}, ",
860 "but it is marked alwaysJoin=True; this means one or more dimensions are not "
861 "related.",
862 )
863 for d in element.implied:
864 keys.setdefault(d.name, None)
865 records.setdefault(d.name, None)
866 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records)
868 def insertDimensionData(
869 self,
870 element: Union[DimensionElement, str],
871 *data: Union[Mapping[str, Any], DimensionRecord],
872 conform: bool = True,
873 replace: bool = False,
874 skip_existing: bool = False,
875 ) -> None:
876 # Docstring inherited from lsst.daf.butler.registry.Registry
877 if conform:
878 if isinstance(element, str):
879 element = self.dimensions[element]
880 records = [
881 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data
882 ]
883 else:
884 # Ignore typing since caller said to trust them with conform=False.
885 records = data # type: ignore
886 storage = self._managers.dimensions[element]
887 storage.insert(*records, replace=replace, skip_existing=skip_existing)
889 def syncDimensionData(
890 self,
891 element: Union[DimensionElement, str],
892 row: Union[Mapping[str, Any], DimensionRecord],
893 conform: bool = True,
894 update: bool = False,
895 ) -> Union[bool, Dict[str, Any]]:
896 # Docstring inherited from lsst.daf.butler.registry.Registry
897 if conform:
898 if isinstance(element, str):
899 element = self.dimensions[element]
900 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row)
901 else:
902 # Ignore typing since caller said to trust them with conform=False.
903 record = row # type: ignore
904 storage = self._managers.dimensions[element]
905 return storage.sync(record, update=update)
907 def queryDatasetTypes(
908 self,
909 expression: Any = ...,
910 *,
911 components: Optional[bool] = None,
912 missing: Optional[List[str]] = None,
913 ) -> Iterable[DatasetType]:
914 # Docstring inherited from lsst.daf.butler.registry.Registry
915 wildcard = DatasetTypeWildcard.from_expression(expression)
916 composition_dict = self._managers.datasets.resolve_wildcard(
917 wildcard,
918 components=components,
919 missing=missing,
920 )
921 result: list[DatasetType] = []
922 for parent_dataset_type, components_for_parent in composition_dict.items():
923 result.extend(
924 parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type
925 for c in components_for_parent
926 )
927 return result
929 def queryCollections(
930 self,
931 expression: Any = ...,
932 datasetType: Optional[DatasetType] = None,
933 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(),
934 flattenChains: bool = False,
935 includeChains: Optional[bool] = None,
936 ) -> Sequence[str]:
937 # Docstring inherited from lsst.daf.butler.registry.Registry
939 # Right now the datasetTypes argument is completely ignored, but that
940 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up
941 # ticket will take care of that.
942 try:
943 wildcard = CollectionWildcard.from_expression(expression)
944 except TypeError as exc:
945 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc
946 collectionTypes = ensure_iterable(collectionTypes)
947 return [
948 record.name
949 for record in self._managers.collections.resolve_wildcard(
950 wildcard,
951 collection_types=frozenset(collectionTypes),
952 flatten_chains=flattenChains,
953 include_chains=includeChains,
954 )
955 ]
957 def _makeQueryBuilder(
958 self,
959 summary: queries.QuerySummary,
960 doomed_by: Iterable[str] = (),
961 ) -> queries.QueryBuilder:
962 """Return a `QueryBuilder` instance capable of constructing and
963 managing more complex queries than those obtainable via `Registry`
964 interfaces.
966 This is an advanced interface; downstream code should prefer
967 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those
968 are sufficient.
970 Parameters
971 ----------
972 summary : `queries.QuerySummary`
973 Object describing and categorizing the full set of dimensions that
974 will be included in the query.
975 doomed_by : `Iterable` of `str`, optional
976 A list of diagnostic messages that indicate why the query is going
977 to yield no results and should not even be executed. If an empty
978 container (default) the query will be executed unless other code
979 determines that it is doomed.
981 Returns
982 -------
983 builder : `queries.QueryBuilder`
984 Object that can be used to construct and perform advanced queries.
985 """
986 doomed_by = list(doomed_by)
987 backend = queries.SqlQueryBackend(self._db, self._managers)
988 context = backend.context()
989 relation: Relation | None = None
990 if doomed_by:
991 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by)
992 return queries.QueryBuilder(
993 summary,
994 backend=backend,
995 context=context,
996 relation=relation,
997 )
999 def _standardize_query_data_id_args(
1000 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any
1001 ) -> DataCoordinate:
1002 """Preprocess the data ID arguments passed to query* methods.
1004 Parameters
1005 ----------
1006 data_id : `DataId` or `None`
1007 Data ID that constrains the query results.
1008 doomed_by : `list` [ `str` ]
1009 List to append messages indicating why the query is doomed to
1010 yield no results.
1011 **kwargs
1012 Additional data ID key-value pairs, extending and overriding
1013 ``data_id``.
1015 Returns
1016 -------
1017 data_id : `DataCoordinate`
1018 Standardized data ID. Will be fully expanded unless expansion
1019 fails, in which case a message will be appended to ``doomed_by``
1020 on return.
1021 """
1022 try:
1023 return self.expandDataId(data_id, **kwargs)
1024 except DataIdValueError as err:
1025 doomed_by.append(str(err))
1026 return DataCoordinate.standardize(
1027 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId
1028 )
1030 def _standardize_query_dataset_args(
1031 self,
1032 datasets: Any,
1033 collections: Any,
1034 components: bool | None,
1035 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain",
1036 *,
1037 doomed_by: list[str],
1038 ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]:
1039 """Preprocess dataset arguments passed to query* methods.
1041 Parameters
1042 ----------
1043 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these
1044 Expression identifying dataset types. See `queryDatasetTypes` for
1045 details.
1046 collections : `str`, `re.Pattern`, or iterable of these
1047 Expression identifying collections to be searched. See
1048 `queryCollections` for details.
1049 components : `bool`, optional
1050 If `True`, apply all expression patterns to component dataset type
1051 names as well. If `False`, never apply patterns to components.
1052 If `None` (default), apply patterns to components only if their
1053 parent datasets were not matched by the expression.
1054 Fully-specified component datasets (`str` or `DatasetType`
1055 instances) are always included.
1057 Values other than `False` are deprecated, and only `False` will be
1058 supported after v26. After v27 this argument will be removed
1059 entirely.
1060 mode : `str`, optional
1061 The way in which datasets are being used in this query; one of:
1063 - "find_first": this is a query for the first dataset in an
1064 ordered list of collections. Prohibits collection wildcards,
1065 but permits dataset type wildcards.
1067 - "find_all": this is a query for all datasets in all matched
1068 collections. Permits collection and dataset type wildcards.
1070 - "constrain": this is a query for something other than datasets,
1071 with results constrained by dataset existence. Permits
1072 collection wildcards and prohibits ``...`` as a dataset type
1073 wildcard.
1074 doomed_by : `list` [ `str` ]
1075 List to append messages indicating why the query is doomed to
1076 yield no results.
1078 Returns
1079 -------
1080 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ]
1081 Dictionary mapping parent dataset type to `list` of components
1082 matched for that dataset type (or `None` for the parent itself).
1083 collections : `CollectionWildcard`
1084 Processed collection expression.
1085 """
1086 composition: dict[DatasetType, list[str | None]] = {}
1087 if datasets is not None:
1088 if not collections:
1089 if not self.defaults.collections:
1090 raise NoDefaultCollectionError("No collections, and no registry default collections.")
1091 collections = self.defaults.collections
1092 else:
1093 collections = CollectionWildcard.from_expression(collections)
1094 if mode == "find_first" and collections.patterns:
1095 raise TypeError(
1096 f"Collection pattern(s) {collections.patterns} not allowed in this context."
1097 )
1098 missing: list[str] = []
1099 composition = self._managers.datasets.resolve_wildcard(
1100 datasets, components=components, missing=missing, explicit_only=(mode == "constrain")
1101 )
1102 if missing and mode == "constrain":
1103 # After v26 this should raise MissingDatasetTypeError, to be
1104 # implemented on DM-36303.
1105 warnings.warn(
1106 f"Dataset type(s) {missing} are not registered; this will be an error after v26.",
1107 FutureWarning,
1108 )
1109 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing)
1110 elif collections:
1111 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.")
1112 return composition, collections
1114 def queryDatasets(
1115 self,
1116 datasetType: Any,
1117 *,
1118 collections: Any = None,
1119 dimensions: Optional[Iterable[Union[Dimension, str]]] = None,
1120 dataId: Optional[DataId] = None,
1121 where: str = "",
1122 findFirst: bool = False,
1123 components: Optional[bool] = None,
1124 bind: Optional[Mapping[str, Any]] = None,
1125 check: bool = True,
1126 **kwargs: Any,
1127 ) -> queries.DatasetQueryResults:
1128 # Docstring inherited from lsst.daf.butler.registry.Registry
1129 doomed_by: list[str] = []
1130 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1131 dataset_composition, collections = self._standardize_query_dataset_args(
1132 datasetType,
1133 collections,
1134 components,
1135 mode="find_first" if findFirst else "find_all",
1136 doomed_by=doomed_by,
1137 )
1138 parent_results: list[queries.ParentDatasetQueryResults] = []
1139 for parent_dataset_type, components_for_parent in dataset_composition.items():
1140 # The full set of dimensions in the query is the combination of
1141 # those needed for the DatasetType and those explicitly requested,
1142 # if any.
1143 dimension_names = set(parent_dataset_type.dimensions.names)
1144 if dimensions is not None:
1145 dimension_names.update(self.dimensions.extract(dimensions).names)
1146 # Construct the summary structure needed to construct a
1147 # QueryBuilder.
1148 summary = queries.QuerySummary(
1149 requested=DimensionGraph(self.dimensions, names=dimension_names),
1150 data_id=data_id,
1151 expression=where,
1152 bind=bind,
1153 defaults=self.defaults.dataId,
1154 check=check,
1155 datasets=[parent_dataset_type],
1156 )
1157 builder = self._makeQueryBuilder(summary)
1158 # Add the dataset subquery to the query, telling the QueryBuilder
1159 # to include the rank of the selected collection in the results
1160 # only if we need to findFirst. Note that if any of the
1161 # collections are actually wildcard expressions, and
1162 # findFirst=True, this will raise TypeError for us.
1163 builder.joinDataset(parent_dataset_type, collections, isResult=True, findFirst=findFirst)
1164 query = builder.finish()
1165 parent_results.append(
1166 queries.ParentDatasetQueryResults(
1167 query, parent_dataset_type, components=components_for_parent
1168 )
1169 )
1170 if not parent_results:
1171 doomed_by.extend(
1172 f"No registered dataset type matching {t!r} found, so no matching datasets can "
1173 "exist in any collection."
1174 for t in ensure_iterable(datasetType)
1175 )
1176 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by)
1177 elif len(parent_results) == 1:
1178 return parent_results[0]
1179 else:
1180 return queries.ChainedDatasetQueryResults(parent_results)
1182 def queryDataIds(
1183 self,
1184 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str],
1185 *,
1186 dataId: Optional[DataId] = None,
1187 datasets: Any = None,
1188 collections: Any = None,
1189 where: str = "",
1190 components: Optional[bool] = None,
1191 bind: Optional[Mapping[str, Any]] = None,
1192 check: bool = True,
1193 **kwargs: Any,
1194 ) -> queries.DataCoordinateQueryResults:
1195 # Docstring inherited from lsst.daf.butler.registry.Registry
1196 dimensions = ensure_iterable(dimensions)
1197 requestedDimensions = self.dimensions.extract(dimensions)
1198 doomed_by: list[str] = []
1199 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1200 dataset_composition, collections = self._standardize_query_dataset_args(
1201 datasets, collections, components, doomed_by=doomed_by
1202 )
1203 summary = queries.QuerySummary(
1204 requested=requestedDimensions,
1205 data_id=data_id,
1206 expression=where,
1207 bind=bind,
1208 defaults=self.defaults.dataId,
1209 check=check,
1210 datasets=dataset_composition.keys(),
1211 )
1212 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
1213 for datasetType in dataset_composition.keys():
1214 builder.joinDataset(datasetType, collections, isResult=False)
1215 query = builder.finish()
1217 return queries.DataCoordinateQueryResults(query)
1219 def queryDimensionRecords(
1220 self,
1221 element: Union[DimensionElement, str],
1222 *,
1223 dataId: Optional[DataId] = None,
1224 datasets: Any = None,
1225 collections: Any = None,
1226 where: str = "",
1227 components: Optional[bool] = None,
1228 bind: Optional[Mapping[str, Any]] = None,
1229 check: bool = True,
1230 **kwargs: Any,
1231 ) -> queries.DimensionRecordQueryResults:
1232 # Docstring inherited from lsst.daf.butler.registry.Registry
1233 if not isinstance(element, DimensionElement):
1234 try:
1235 element = self.dimensions[element]
1236 except KeyError as e:
1237 raise DimensionNameError(
1238 f"No such dimension '{element}', available dimensions: "
1239 + str(self.dimensions.getStaticElements())
1240 ) from e
1241 doomed_by: list[str] = []
1242 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1243 dataset_composition, collections = self._standardize_query_dataset_args(
1244 datasets, collections, components, doomed_by=doomed_by
1245 )
1246 summary = queries.QuerySummary(
1247 requested=element.graph,
1248 data_id=data_id,
1249 expression=where,
1250 bind=bind,
1251 defaults=self.defaults.dataId,
1252 check=check,
1253 datasets=dataset_composition.keys(),
1254 )
1255 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
1256 for datasetType in dataset_composition.keys():
1257 builder.joinDataset(datasetType, collections, isResult=False)
1258 query = builder.finish().with_record_columns(element)
1259 return queries.DatabaseDimensionRecordQueryResults(query, element)
1261 def queryDatasetAssociations(
1262 self,
1263 datasetType: Union[str, DatasetType],
1264 collections: Any = ...,
1265 *,
1266 collectionTypes: Iterable[CollectionType] = CollectionType.all(),
1267 flattenChains: bool = False,
1268 ) -> Iterator[DatasetAssociation]:
1269 # Docstring inherited from lsst.daf.butler.registry.Registry
1270 if collections is None:
1271 if not self.defaults.collections:
1272 raise NoDefaultCollectionError(
1273 "No collections provided to queryDatasetAssociations, "
1274 "and no defaults from registry construction."
1275 )
1276 collections = self.defaults.collections
1277 collections = CollectionWildcard.from_expression(collections)
1278 backend = queries.SqlQueryBackend(self._db, self._managers)
1279 parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False)
1280 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan")
1281 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
1282 for parent_collection_record in backend.resolve_collection_wildcard(
1283 collections,
1284 collection_types=frozenset(collectionTypes),
1285 flatten_chains=flattenChains,
1286 ):
1287 # Resolve this possibly-chained collection into a list of
1288 # non-CHAINED collections that actually hold datasets of this
1289 # type.
1290 candidate_collection_records = backend.resolve_dataset_collections(
1291 parent_dataset_type,
1292 CollectionWildcard.from_names([parent_collection_record.name]),
1293 allow_calibration_collections=True,
1294 governor_constraints={},
1295 )
1296 if not candidate_collection_records:
1297 continue
1298 with backend.context() as context:
1299 relation = backend.make_dataset_query_relation(
1300 parent_dataset_type,
1301 candidate_collection_records,
1302 columns={"dataset_id", "run", "timespan", "collection"},
1303 context=context,
1304 )
1305 reader = queries.DatasetRefReader(
1306 parent_dataset_type,
1307 translate_collection=lambda k: self._managers.collections[k].name,
1308 full=False,
1309 )
1310 for row in context.fetch_iterable(relation):
1311 ref = reader.read(row)
1312 collection_record = self._managers.collections[row[collection_tag]]
1313 if collection_record.type is CollectionType.CALIBRATION:
1314 timespan = row[timespan_tag]
1315 else:
1316 # For backwards compatibility and (possibly?) user
1317 # convenience we continue to define the timespan of a
1318 # DatasetAssociation row for a non-CALIBRATION
1319 # collection to be None rather than a fully unbounded
1320 # timespan.
1321 timespan = None
1322 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan)
1324 storageClasses: StorageClassFactory
1325 """All storage classes known to the registry (`StorageClassFactory`).
1326 """