Coverage for python/lsst/daf/butler/registries/sql.py: 12%
506 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-07 02:05 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-07 02:05 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("SqlRegistry",)
26import contextlib
27import logging
28import warnings
29from typing import (
30 TYPE_CHECKING,
31 Any,
32 Dict,
33 Iterable,
34 Iterator,
35 List,
36 Literal,
37 Mapping,
38 Optional,
39 Sequence,
40 Set,
41 Union,
42 cast,
43)
45import sqlalchemy
46from lsst.daf.relation import LeafRelation, Relation
47from lsst.resources import ResourcePathExpression
48from lsst.utils.iteration import ensure_iterable
50from ..core import (
51 Config,
52 DataCoordinate,
53 DataId,
54 DatasetAssociation,
55 DatasetColumnTag,
56 DatasetId,
57 DatasetRef,
58 DatasetType,
59 Dimension,
60 DimensionConfig,
61 DimensionElement,
62 DimensionGraph,
63 DimensionRecord,
64 DimensionUniverse,
65 NamedKeyMapping,
66 NameLookupMapping,
67 Progress,
68 StorageClassFactory,
69 Timespan,
70 ddl,
71)
72from ..core.utils import transactional
73from ..registry import (
74 ArgumentError,
75 CollectionExpressionError,
76 CollectionSummary,
77 CollectionType,
78 CollectionTypeError,
79 ConflictingDefinitionError,
80 DataIdValueError,
81 DatasetTypeError,
82 DimensionNameError,
83 InconsistentDataIdError,
84 NoDefaultCollectionError,
85 OrphanedRecordError,
86 Registry,
87 RegistryConfig,
88 RegistryDefaults,
89 queries,
90)
91from ..registry.interfaces import ChainedCollectionRecord, DatasetIdFactory, DatasetIdGenEnum, RunRecord
92from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes
93from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard
95if TYPE_CHECKING: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true
96 from .._butlerConfig import ButlerConfig
97 from ..registry.interfaces import CollectionRecord, Database, DatastoreRegistryBridgeManager
100_LOG = logging.getLogger(__name__)
103class SqlRegistry(Registry):
104 """Registry implementation based on SQLAlchemy.
106 Parameters
107 ----------
108 database : `Database`
109 Database instance to store Registry.
110 defaults : `RegistryDefaults`
111 Default collection search path and/or output `~CollectionType.RUN`
112 collection.
113 managers : `RegistryManagerInstances`
114 All the managers required for this registry.
115 """
117 defaultConfigFile: Optional[str] = None
118 """Path to configuration defaults. Accessed within the ``configs`` resource
119 or relative to a search path. Can be None if no defaults specified.
120 """
122 @classmethod
123 def createFromConfig(
124 cls,
125 config: Optional[Union[RegistryConfig, str]] = None,
126 dimensionConfig: Optional[Union[DimensionConfig, str]] = None,
127 butlerRoot: Optional[ResourcePathExpression] = None,
128 ) -> Registry:
129 """Create registry database and return `SqlRegistry` instance.
131 This method initializes database contents, database must be empty
132 prior to calling this method.
134 Parameters
135 ----------
136 config : `RegistryConfig` or `str`, optional
137 Registry configuration, if missing then default configuration will
138 be loaded from registry.yaml.
139 dimensionConfig : `DimensionConfig` or `str`, optional
140 Dimensions configuration, if missing then default configuration
141 will be loaded from dimensions.yaml.
142 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
143 Path to the repository root this `SqlRegistry` will manage.
145 Returns
146 -------
147 registry : `SqlRegistry`
148 A new `SqlRegistry` instance.
149 """
150 config = cls.forceRegistryConfig(config)
151 config.replaceRoot(butlerRoot)
153 if isinstance(dimensionConfig, str):
154 dimensionConfig = DimensionConfig(dimensionConfig)
155 elif dimensionConfig is None:
156 dimensionConfig = DimensionConfig()
157 elif not isinstance(dimensionConfig, DimensionConfig):
158 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}")
160 DatabaseClass = config.getDatabaseClass()
161 database = DatabaseClass.fromUri(
162 str(config.connectionString), origin=config.get("origin", 0), namespace=config.get("namespace")
163 )
164 managerTypes = RegistryManagerTypes.fromConfig(config)
165 managers = managerTypes.makeRepo(database, dimensionConfig)
166 return cls(database, RegistryDefaults(), managers)
168 @classmethod
169 def fromConfig(
170 cls,
171 config: Union[ButlerConfig, RegistryConfig, Config, str],
172 butlerRoot: Optional[ResourcePathExpression] = None,
173 writeable: bool = True,
174 defaults: Optional[RegistryDefaults] = None,
175 ) -> Registry:
176 """Create `Registry` subclass instance from `config`.
178 Registry database must be initialized prior to calling this method.
180 Parameters
181 ----------
182 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
183 Registry configuration
184 butlerRoot : `lsst.resources.ResourcePathExpression`, optional
185 Path to the repository root this `Registry` will manage.
186 writeable : `bool`, optional
187 If `True` (default) create a read-write connection to the database.
188 defaults : `RegistryDefaults`, optional
189 Default collection search path and/or output `~CollectionType.RUN`
190 collection.
192 Returns
193 -------
194 registry : `SqlRegistry` (subclass)
195 A new `SqlRegistry` subclass instance.
196 """
197 config = cls.forceRegistryConfig(config)
198 config.replaceRoot(butlerRoot)
199 DatabaseClass = config.getDatabaseClass()
200 database = DatabaseClass.fromUri(
201 str(config.connectionString),
202 origin=config.get("origin", 0),
203 namespace=config.get("namespace"),
204 writeable=writeable,
205 )
206 managerTypes = RegistryManagerTypes.fromConfig(config)
207 with database.session():
208 managers = managerTypes.loadRepo(database)
209 if defaults is None:
210 defaults = RegistryDefaults()
211 return cls(database, defaults, managers)
213 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances):
214 self._db = database
215 self._managers = managers
216 self.storageClasses = StorageClassFactory()
217 # Intentionally invoke property setter to initialize defaults. This
218 # can only be done after most of the rest of Registry has already been
219 # initialized, and must be done before the property getter is used.
220 self.defaults = defaults
221 # In the future DatasetIdFactory may become configurable and this
222 # instance will need to be shared with datasets manager.
223 self.datasetIdFactory = DatasetIdFactory()
225 def __str__(self) -> str:
226 return str(self._db)
228 def __repr__(self) -> str:
229 return f"SqlRegistry({self._db!r}, {self.dimensions!r})"
231 def isWriteable(self) -> bool:
232 # Docstring inherited from lsst.daf.butler.registry.Registry
233 return self._db.isWriteable()
235 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry:
236 # Docstring inherited from lsst.daf.butler.registry.Registry
237 if defaults is None:
238 # No need to copy, because `RegistryDefaults` is immutable; we
239 # effectively copy on write.
240 defaults = self.defaults
241 return type(self)(self._db, defaults, self._managers)
243 @property
244 def dimensions(self) -> DimensionUniverse:
245 # Docstring inherited from lsst.daf.butler.registry.Registry
246 return self._managers.dimensions.universe
248 def refresh(self) -> None:
249 # Docstring inherited from lsst.daf.butler.registry.Registry
250 with self._db.transaction():
251 self._managers.refresh()
253 @contextlib.contextmanager
254 def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
255 # Docstring inherited from lsst.daf.butler.registry.Registry
256 try:
257 with self._db.transaction(savepoint=savepoint):
258 yield
259 except BaseException:
260 # TODO: this clears the caches sometimes when we wouldn't actually
261 # need to. Can we avoid that?
262 self._managers.dimensions.clearCaches()
263 raise
265 def resetConnectionPool(self) -> None:
266 """Reset SQLAlchemy connection pool for `SqlRegistry` database.
268 This operation is useful when using registry with fork-based
269 multiprocessing. To use registry across fork boundary one has to make
270 sure that there are no currently active connections (no session or
271 transaction is in progress) and connection pool is reset using this
272 method. This method should be called by the child process immediately
273 after the fork.
274 """
275 self._db._engine.dispose()
277 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None:
278 """Add an opaque (to the `Registry`) table for use by a `Datastore` or
279 other data repository client.
281 Opaque table records can be added via `insertOpaqueData`, retrieved via
282 `fetchOpaqueData`, and removed via `deleteOpaqueData`.
284 Parameters
285 ----------
286 tableName : `str`
287 Logical name of the opaque table. This may differ from the
288 actual name used in the database by a prefix and/or suffix.
289 spec : `ddl.TableSpec`
290 Specification for the table to be added.
291 """
292 self._managers.opaque.register(tableName, spec)
294 @transactional
295 def insertOpaqueData(self, tableName: str, *data: dict) -> None:
296 """Insert records into an opaque table.
298 Parameters
299 ----------
300 tableName : `str`
301 Logical name of the opaque table. Must match the name used in a
302 previous call to `registerOpaqueTable`.
303 data
304 Each additional positional argument is a dictionary that represents
305 a single row to be added.
306 """
307 self._managers.opaque[tableName].insert(*data)
309 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[dict]:
310 """Retrieve records from an opaque table.
312 Parameters
313 ----------
314 tableName : `str`
315 Logical name of the opaque table. Must match the name used in a
316 previous call to `registerOpaqueTable`.
317 where
318 Additional keyword arguments are interpreted as equality
319 constraints that restrict the returned rows (combined with AND);
320 keyword arguments are column names and values are the values they
321 must have.
323 Yields
324 ------
325 row : `dict`
326 A dictionary representing a single result row.
327 """
328 yield from self._managers.opaque[tableName].fetch(**where)
330 @transactional
331 def deleteOpaqueData(self, tableName: str, **where: Any) -> None:
332 """Remove records from an opaque table.
334 Parameters
335 ----------
336 tableName : `str`
337 Logical name of the opaque table. Must match the name used in a
338 previous call to `registerOpaqueTable`.
339 where
340 Additional keyword arguments are interpreted as equality
341 constraints that restrict the deleted rows (combined with AND);
342 keyword arguments are column names and values are the values they
343 must have.
344 """
345 self._managers.opaque[tableName].delete(where.keys(), where)
347 def registerCollection(
348 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None
349 ) -> bool:
350 # Docstring inherited from lsst.daf.butler.registry.Registry
351 _, registered = self._managers.collections.register(name, type, doc=doc)
352 return registered
354 def getCollectionType(self, name: str) -> CollectionType:
355 # Docstring inherited from lsst.daf.butler.registry.Registry
356 return self._managers.collections.find(name).type
358 def _get_collection_record(self, name: str) -> CollectionRecord:
359 # Docstring inherited from lsst.daf.butler.registry.Registry
360 return self._managers.collections.find(name)
362 def registerRun(self, name: str, doc: Optional[str] = None) -> bool:
363 # Docstring inherited from lsst.daf.butler.registry.Registry
364 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc)
365 return registered
367 @transactional
368 def removeCollection(self, name: str) -> None:
369 # Docstring inherited from lsst.daf.butler.registry.Registry
370 self._managers.collections.remove(name)
372 def getCollectionChain(self, parent: str) -> tuple[str, ...]:
373 # Docstring inherited from lsst.daf.butler.registry.Registry
374 record = self._managers.collections.find(parent)
375 if record.type is not CollectionType.CHAINED:
376 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
377 assert isinstance(record, ChainedCollectionRecord)
378 return record.children
380 @transactional
381 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None:
382 # Docstring inherited from lsst.daf.butler.registry.Registry
383 record = self._managers.collections.find(parent)
384 if record.type is not CollectionType.CHAINED:
385 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
386 assert isinstance(record, ChainedCollectionRecord)
387 children = CollectionWildcard.from_expression(children).require_ordered()
388 if children != record.children or flatten:
389 record.update(self._managers.collections, children, flatten=flatten)
391 def getCollectionParentChains(self, collection: str) -> Set[str]:
392 # Docstring inherited from lsst.daf.butler.registry.Registry
393 return {
394 record.name
395 for record in self._managers.collections.getParentChains(
396 self._managers.collections.find(collection).key
397 )
398 }
400 def getCollectionDocumentation(self, collection: str) -> Optional[str]:
401 # Docstring inherited from lsst.daf.butler.registry.Registry
402 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key)
404 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None:
405 # Docstring inherited from lsst.daf.butler.registry.Registry
406 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc)
408 def getCollectionSummary(self, collection: str) -> CollectionSummary:
409 # Docstring inherited from lsst.daf.butler.registry.Registry
410 record = self._managers.collections.find(collection)
411 return self._managers.datasets.getCollectionSummary(record)
413 def registerDatasetType(self, datasetType: DatasetType) -> bool:
414 # Docstring inherited from lsst.daf.butler.registry.Registry
415 _, inserted = self._managers.datasets.register(datasetType)
416 return inserted
418 def removeDatasetType(self, name: str) -> None:
419 # Docstring inherited from lsst.daf.butler.registry.Registry
420 self._managers.datasets.remove(name)
422 def getDatasetType(self, name: str) -> DatasetType:
423 # Docstring inherited from lsst.daf.butler.registry.Registry
424 parent_name, component = DatasetType.splitDatasetTypeName(name)
425 storage = self._managers.datasets[parent_name]
426 if component is None:
427 return storage.datasetType
428 else:
429 return storage.datasetType.makeComponentDatasetType(component)
431 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool:
432 # Docstring inherited from lsst.daf.butler.registry.Registry
433 return self._managers.datasets.supportsIdGenerationMode(mode)
435 def findDataset(
436 self,
437 datasetType: Union[DatasetType, str],
438 dataId: Optional[DataId] = None,
439 *,
440 collections: Any = None,
441 timespan: Optional[Timespan] = None,
442 **kwargs: Any,
443 ) -> Optional[DatasetRef]:
444 # Docstring inherited from lsst.daf.butler.registry.Registry
445 if collections is None:
446 if not self.defaults.collections:
447 raise NoDefaultCollectionError(
448 "No collections provided to findDataset, and no defaults from registry construction."
449 )
450 collections = self.defaults.collections
451 backend = queries.SqlQueryBackend(self._db, self._managers)
452 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True)
453 matched_collections = backend.resolve_collection_wildcard(collection_wildcard)
454 parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard(
455 datasetType, components_deprecated=False
456 )
457 if len(components) > 1:
458 raise DatasetTypeError(
459 f"findDataset requires exactly one dataset type; got multiple components {components} "
460 f"for parent dataset type {parent_dataset_type.name}."
461 )
462 component = components[0]
463 dataId = DataCoordinate.standardize(
464 dataId,
465 graph=parent_dataset_type.dimensions,
466 universe=self.dimensions,
467 defaults=self.defaults.dataId,
468 **kwargs,
469 )
470 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.graph.governors.names}
471 (filtered_collections,) = backend.filter_dataset_collections(
472 [parent_dataset_type],
473 matched_collections,
474 governor_constraints=governor_constraints,
475 ).values()
476 if not filtered_collections:
477 return None
478 tail_collections: list[CollectionRecord] = []
479 if timespan is None:
480 for n, collection_record in enumerate(filtered_collections):
481 if collection_record.type is CollectionType.CALIBRATION:
482 tail_collections.extend(filtered_collections[n:])
483 del filtered_collections[n:]
484 break
485 requested_columns = {"dataset_id", "run", "collection"}
486 with backend.context() as context:
487 predicate = context.make_data_coordinate_predicate(
488 dataId.subset(parent_dataset_type.dimensions), full=False
489 )
490 if timespan is not None:
491 requested_columns.add("timespan")
492 predicate = predicate.logical_and(
493 context.make_timespan_overlap_predicate(
494 DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan
495 )
496 )
497 relation = backend.make_dataset_query_relation(
498 parent_dataset_type, filtered_collections, requested_columns, context
499 ).with_rows_satisfying(predicate)
500 rows = list(context.fetch_iterable(relation))
501 if not rows:
502 if tail_collections:
503 msg = (
504 f"Cannot search for dataset '{parent_dataset_type.name}' in CALIBRATION collection "
505 f"{tail_collections[0].name} without an input timespan."
506 )
507 if len(tail_collections) > 1:
508 remainder_names = [", ".join(c.name for c in tail_collections[1:])]
509 msg += f" This also blocks searching collections [{remainder_names}] that follow it."
510 raise TypeError(msg)
511 return None
512 elif len(rows) == 1:
513 best_row = rows[0]
514 else:
515 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)}
516 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
517 row_iter = iter(rows)
518 best_row = next(row_iter)
519 best_rank = rank_by_collection_key[best_row[collection_tag]]
520 have_tie = False
521 for row in row_iter:
522 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank:
523 best_row = row
524 best_rank = rank
525 have_tie = False
526 elif rank == best_rank:
527 have_tie = True
528 assert timespan is not None, "Rank ties should be impossible given DB constraints."
529 if have_tie:
530 raise LookupError(
531 f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections "
532 f"{collection_wildcard.strings} with timespan {timespan}."
533 )
534 reader = queries.DatasetRefReader(
535 parent_dataset_type,
536 translate_collection=lambda k: self._managers.collections[k].name,
537 )
538 ref = reader.read(best_row, data_id=dataId)
539 if component is not None:
540 ref = ref.makeComponentRef(component)
541 return ref
543 @transactional
544 def insertDatasets(
545 self,
546 datasetType: Union[DatasetType, str],
547 dataIds: Iterable[DataId],
548 run: Optional[str] = None,
549 expand: bool = True,
550 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
551 ) -> List[DatasetRef]:
552 # Docstring inherited from lsst.daf.butler.registry.Registry
553 if isinstance(datasetType, DatasetType):
554 storage = self._managers.datasets.find(datasetType.name)
555 if storage is None:
556 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
557 else:
558 storage = self._managers.datasets.find(datasetType)
559 if storage is None:
560 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.")
561 if run is None:
562 if self.defaults.run is None:
563 raise NoDefaultCollectionError(
564 "No run provided to insertDatasets, and no default from registry construction."
565 )
566 run = self.defaults.run
567 runRecord = self._managers.collections.find(run)
568 if runRecord.type is not CollectionType.RUN:
569 raise CollectionTypeError(
570 f"Given collection is of type {runRecord.type.name}; RUN collection required."
571 )
572 assert isinstance(runRecord, RunRecord)
573 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
574 if expand:
575 expandedDataIds = [
576 self.expandDataId(dataId, graph=storage.datasetType.dimensions)
577 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs")
578 ]
579 else:
580 expandedDataIds = [
581 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds
582 ]
583 try:
584 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode))
585 if self._managers.obscore:
586 context = queries.SqlQueryContext(self._db, self._managers.column_types)
587 self._managers.obscore.add_datasets(refs, context)
588 except sqlalchemy.exc.IntegrityError as err:
589 raise ConflictingDefinitionError(
590 f"A database constraint failure was triggered by inserting "
591 f"one or more datasets of type {storage.datasetType} into "
592 f"collection '{run}'. "
593 f"This probably means a dataset with the same data ID "
594 f"and dataset type already exists, but it may also mean a "
595 f"dimension row is missing."
596 ) from err
597 return refs
599 @transactional
600 def _importDatasets(
601 self,
602 datasets: Iterable[DatasetRef],
603 expand: bool = True,
604 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
605 reuseIds: bool = False,
606 ) -> List[DatasetRef]:
607 # Docstring inherited from lsst.daf.butler.registry.Registry
608 datasets = list(datasets)
609 if not datasets:
610 # nothing to do
611 return []
613 # find dataset type
614 datasetTypes = set(dataset.datasetType for dataset in datasets)
615 if len(datasetTypes) != 1:
616 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}")
617 datasetType = datasetTypes.pop()
619 # get storage handler for this dataset type
620 storage = self._managers.datasets.find(datasetType.name)
621 if storage is None:
622 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
624 # find run name
625 runs = set(dataset.run for dataset in datasets)
626 if len(runs) != 1:
627 raise ValueError(f"Multiple run names in input datasets: {runs}")
628 run = runs.pop()
629 if run is None:
630 if self.defaults.run is None:
631 raise NoDefaultCollectionError(
632 "No run provided to ingestDatasets, and no default from registry construction."
633 )
634 run = self.defaults.run
636 runRecord = self._managers.collections.find(run)
637 if runRecord.type is not CollectionType.RUN:
638 raise CollectionTypeError(
639 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};"
640 " RUN collection required."
641 )
642 assert isinstance(runRecord, RunRecord)
644 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
645 if expand:
646 expandedDatasets = [
647 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions))
648 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs")
649 ]
650 else:
651 expandedDatasets = [
652 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True)
653 for dataset in datasets
654 ]
656 try:
657 refs = list(storage.import_(runRecord, expandedDatasets, idGenerationMode, reuseIds))
658 if self._managers.obscore:
659 context = queries.SqlQueryContext(self._db, self._managers.column_types)
660 self._managers.obscore.add_datasets(refs, context)
661 except sqlalchemy.exc.IntegrityError as err:
662 raise ConflictingDefinitionError(
663 f"A database constraint failure was triggered by inserting "
664 f"one or more datasets of type {storage.datasetType} into "
665 f"collection '{run}'. "
666 f"This probably means a dataset with the same data ID "
667 f"and dataset type already exists, but it may also mean a "
668 f"dimension row is missing."
669 ) from err
670 return refs
672 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]:
673 # Docstring inherited from lsst.daf.butler.registry.Registry
674 return self._managers.datasets.getDatasetRef(id)
676 @transactional
677 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
678 # Docstring inherited from lsst.daf.butler.registry.Registry
679 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG)
680 for datasetType, refsForType in progress.iter_item_chunks(
681 DatasetRef.groupByType(refs).items(), desc="Removing datasets by type"
682 ):
683 storage = self._managers.datasets[datasetType.name]
684 try:
685 storage.delete(refsForType)
686 except sqlalchemy.exc.IntegrityError as err:
687 raise OrphanedRecordError(
688 "One or more datasets is still present in one or more Datastores."
689 ) from err
691 @transactional
692 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
693 # Docstring inherited from lsst.daf.butler.registry.Registry
694 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG)
695 collectionRecord = self._managers.collections.find(collection)
696 if collectionRecord.type is not CollectionType.TAGGED:
697 raise CollectionTypeError(
698 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED."
699 )
700 for datasetType, refsForType in progress.iter_item_chunks(
701 DatasetRef.groupByType(refs).items(), desc="Associating datasets by type"
702 ):
703 storage = self._managers.datasets[datasetType.name]
704 try:
705 storage.associate(collectionRecord, refsForType)
706 if self._managers.obscore:
707 # If a TAGGED collection is being monitored by ObsCore
708 # manager then we may need to save the dataset.
709 context = queries.SqlQueryContext(self._db, self._managers.column_types)
710 self._managers.obscore.associate(refsForType, collectionRecord, context)
711 except sqlalchemy.exc.IntegrityError as err:
712 raise ConflictingDefinitionError(
713 f"Constraint violation while associating dataset of type {datasetType.name} with "
714 f"collection {collection}. This probably means that one or more datasets with the same "
715 f"dataset type and data ID already exist in the collection, but it may also indicate "
716 f"that the datasets do not exist."
717 ) from err
719 @transactional
720 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
721 # Docstring inherited from lsst.daf.butler.registry.Registry
722 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG)
723 collectionRecord = self._managers.collections.find(collection)
724 if collectionRecord.type is not CollectionType.TAGGED:
725 raise CollectionTypeError(
726 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED."
727 )
728 for datasetType, refsForType in progress.iter_item_chunks(
729 DatasetRef.groupByType(refs).items(), desc="Disassociating datasets by type"
730 ):
731 storage = self._managers.datasets[datasetType.name]
732 storage.disassociate(collectionRecord, refsForType)
733 if self._managers.obscore:
734 self._managers.obscore.disassociate(refsForType, collectionRecord)
736 @transactional
737 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None:
738 # Docstring inherited from lsst.daf.butler.registry.Registry
739 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG)
740 collectionRecord = self._managers.collections.find(collection)
741 for datasetType, refsForType in progress.iter_item_chunks(
742 DatasetRef.groupByType(refs).items(), desc="Certifying datasets by type"
743 ):
744 storage = self._managers.datasets[datasetType.name]
745 storage.certify(
746 collectionRecord,
747 refsForType,
748 timespan,
749 context=queries.SqlQueryContext(self._db, self._managers.column_types),
750 )
752 @transactional
753 def decertify(
754 self,
755 collection: str,
756 datasetType: Union[str, DatasetType],
757 timespan: Timespan,
758 *,
759 dataIds: Optional[Iterable[DataId]] = None,
760 ) -> None:
761 # Docstring inherited from lsst.daf.butler.registry.Registry
762 collectionRecord = self._managers.collections.find(collection)
763 if isinstance(datasetType, str):
764 storage = self._managers.datasets[datasetType]
765 else:
766 storage = self._managers.datasets[datasetType.name]
767 standardizedDataIds = None
768 if dataIds is not None:
769 standardizedDataIds = [
770 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds
771 ]
772 storage.decertify(
773 collectionRecord,
774 timespan,
775 dataIds=standardizedDataIds,
776 context=queries.SqlQueryContext(self._db, self._managers.column_types),
777 )
779 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
780 """Return an object that allows a new `Datastore` instance to
781 communicate with this `Registry`.
783 Returns
784 -------
785 manager : `DatastoreRegistryBridgeManager`
786 Object that mediates communication between this `Registry` and its
787 associated datastores.
788 """
789 return self._managers.datastores
791 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
792 # Docstring inherited from lsst.daf.butler.registry.Registry
793 return self._managers.datastores.findDatastores(ref)
795 def expandDataId(
796 self,
797 dataId: Optional[DataId] = None,
798 *,
799 graph: Optional[DimensionGraph] = None,
800 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None,
801 withDefaults: bool = True,
802 **kwargs: Any,
803 ) -> DataCoordinate:
804 # Docstring inherited from lsst.daf.butler.registry.Registry
805 if not withDefaults:
806 defaults = None
807 else:
808 defaults = self.defaults.dataId
809 try:
810 standardized = DataCoordinate.standardize(
811 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs
812 )
813 except KeyError as exc:
814 # This means either kwargs have some odd name or required
815 # dimension is missing.
816 raise DimensionNameError(str(exc)) from exc
817 if standardized.hasRecords():
818 return standardized
819 if records is None:
820 records = {}
821 elif isinstance(records, NamedKeyMapping):
822 records = records.byName()
823 else:
824 records = dict(records)
825 if isinstance(dataId, DataCoordinate) and dataId.hasRecords():
826 records.update(dataId.records.byName())
827 keys = standardized.byName()
828 context = queries.SqlQueryContext(self._db, self._managers.column_types)
829 for element in standardized.graph.primaryKeyTraversalOrder:
830 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL
831 if record is ...:
832 if isinstance(element, Dimension) and keys.get(element.name) is None:
833 if element in standardized.graph.required:
834 raise DimensionNameError(
835 f"No value or null value for required dimension {element.name}."
836 )
837 keys[element.name] = None
838 record = None
839 else:
840 storage = self._managers.dimensions[element]
841 record = storage.fetch_one(DataCoordinate.standardize(keys, graph=element.graph), context)
842 records[element.name] = record
843 if record is not None:
844 for d in element.implied:
845 value = getattr(record, d.name)
846 if keys.setdefault(d.name, value) != value:
847 raise InconsistentDataIdError(
848 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, "
849 f"but {element.name} implies {d.name}={value!r}."
850 )
851 else:
852 if element in standardized.graph.required:
853 raise DataIdValueError(
854 f"Could not fetch record for required dimension {element.name} via keys {keys}."
855 )
856 if element.alwaysJoin:
857 raise InconsistentDataIdError(
858 f"Could not fetch record for element {element.name} via keys {keys}, ",
859 "but it is marked alwaysJoin=True; this means one or more dimensions are not "
860 "related.",
861 )
862 for d in element.implied:
863 keys.setdefault(d.name, None)
864 records.setdefault(d.name, None)
865 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records)
867 def insertDimensionData(
868 self,
869 element: Union[DimensionElement, str],
870 *data: Union[Mapping[str, Any], DimensionRecord],
871 conform: bool = True,
872 replace: bool = False,
873 skip_existing: bool = False,
874 ) -> None:
875 # Docstring inherited from lsst.daf.butler.registry.Registry
876 if conform:
877 if isinstance(element, str):
878 element = self.dimensions[element]
879 records = [
880 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data
881 ]
882 else:
883 # Ignore typing since caller said to trust them with conform=False.
884 records = data # type: ignore
885 storage = self._managers.dimensions[element]
886 storage.insert(*records, replace=replace, skip_existing=skip_existing)
888 def syncDimensionData(
889 self,
890 element: Union[DimensionElement, str],
891 row: Union[Mapping[str, Any], DimensionRecord],
892 conform: bool = True,
893 update: bool = False,
894 ) -> Union[bool, Dict[str, Any]]:
895 # Docstring inherited from lsst.daf.butler.registry.Registry
896 if conform:
897 if isinstance(element, str):
898 element = self.dimensions[element]
899 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row)
900 else:
901 # Ignore typing since caller said to trust them with conform=False.
902 record = row # type: ignore
903 storage = self._managers.dimensions[element]
904 return storage.sync(record, update=update)
906 def queryDatasetTypes(
907 self,
908 expression: Any = ...,
909 *,
910 components: Optional[bool] = None,
911 missing: Optional[List[str]] = None,
912 ) -> Iterable[DatasetType]:
913 # Docstring inherited from lsst.daf.butler.registry.Registry
914 wildcard = DatasetTypeWildcard.from_expression(expression)
915 composition_dict = self._managers.datasets.resolve_wildcard(
916 wildcard,
917 components=components,
918 missing=missing,
919 )
920 result: list[DatasetType] = []
921 for parent_dataset_type, components_for_parent in composition_dict.items():
922 result.extend(
923 parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type
924 for c in components_for_parent
925 )
926 return result
928 def queryCollections(
929 self,
930 expression: Any = ...,
931 datasetType: Optional[DatasetType] = None,
932 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(),
933 flattenChains: bool = False,
934 includeChains: Optional[bool] = None,
935 ) -> Sequence[str]:
936 # Docstring inherited from lsst.daf.butler.registry.Registry
938 # Right now the datasetTypes argument is completely ignored, but that
939 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up
940 # ticket will take care of that.
941 try:
942 wildcard = CollectionWildcard.from_expression(expression)
943 except TypeError as exc:
944 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc
945 collectionTypes = ensure_iterable(collectionTypes)
946 return [
947 record.name
948 for record in self._managers.collections.resolve_wildcard(
949 wildcard,
950 collection_types=frozenset(collectionTypes),
951 flatten_chains=flattenChains,
952 include_chains=includeChains,
953 )
954 ]
956 def _makeQueryBuilder(
957 self,
958 summary: queries.QuerySummary,
959 doomed_by: Iterable[str] = (),
960 ) -> queries.QueryBuilder:
961 """Return a `QueryBuilder` instance capable of constructing and
962 managing more complex queries than those obtainable via `Registry`
963 interfaces.
965 This is an advanced interface; downstream code should prefer
966 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those
967 are sufficient.
969 Parameters
970 ----------
971 summary : `queries.QuerySummary`
972 Object describing and categorizing the full set of dimensions that
973 will be included in the query.
974 doomed_by : `Iterable` of `str`, optional
975 A list of diagnostic messages that indicate why the query is going
976 to yield no results and should not even be executed. If an empty
977 container (default) the query will be executed unless other code
978 determines that it is doomed.
980 Returns
981 -------
982 builder : `queries.QueryBuilder`
983 Object that can be used to construct and perform advanced queries.
984 """
985 doomed_by = list(doomed_by)
986 backend = queries.SqlQueryBackend(self._db, self._managers)
987 context = backend.context()
988 relation: Relation | None = None
989 if doomed_by:
990 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by)
991 return queries.QueryBuilder(
992 summary,
993 backend=backend,
994 context=context,
995 relation=relation,
996 )
998 def _standardize_query_data_id_args(
999 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any
1000 ) -> DataCoordinate:
1001 """Preprocess the data ID arguments passed to query* methods.
1003 Parameters
1004 ----------
1005 data_id : `DataId` or `None`
1006 Data ID that constrains the query results.
1007 doomed_by : `list` [ `str` ]
1008 List to append messages indicating why the query is doomed to
1009 yield no results.
1010 **kwargs
1011 Additional data ID key-value pairs, extending and overriding
1012 ``data_id``.
1014 Returns
1015 -------
1016 data_id : `DataCoordinate`
1017 Standardized data ID. Will be fully expanded unless expansion
1018 fails, in which case a message will be appended to ``doomed_by``
1019 on return.
1020 """
1021 try:
1022 return self.expandDataId(data_id, **kwargs)
1023 except DataIdValueError as err:
1024 doomed_by.append(str(err))
1025 return DataCoordinate.standardize(
1026 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId
1027 )
1029 def _standardize_query_dataset_args(
1030 self,
1031 datasets: Any,
1032 collections: Any,
1033 components: bool | None,
1034 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain",
1035 *,
1036 doomed_by: list[str],
1037 ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]:
1038 """Preprocess dataset arguments passed to query* methods.
1040 Parameters
1041 ----------
1042 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these
1043 Expression identifying dataset types. See `queryDatasetTypes` for
1044 details.
1045 collections : `str`, `re.Pattern`, or iterable of these
1046 Expression identifying collections to be searched. See
1047 `queryCollections` for details.
1048 components : `bool`, optional
1049 If `True`, apply all expression patterns to component dataset type
1050 names as well. If `False`, never apply patterns to components.
1051 If `None` (default), apply patterns to components only if their
1052 parent datasets were not matched by the expression.
1053 Fully-specified component datasets (`str` or `DatasetType`
1054 instances) are always included.
1056 Values other than `False` are deprecated, and only `False` will be
1057 supported after v26. After v27 this argument will be removed
1058 entirely.
1059 mode : `str`, optional
1060 The way in which datasets are being used in this query; one of:
1062 - "find_first": this is a query for the first dataset in an
1063 ordered list of collections. Prohibits collection wildcards,
1064 but permits dataset type wildcards.
1066 - "find_all": this is a query for all datasets in all matched
1067 collections. Permits collection and dataset type wildcards.
1069 - "constrain": this is a query for something other than datasets,
1070 with results constrained by dataset existence. Permits
1071 collection wildcards and prohibits ``...`` as a dataset type
1072 wildcard.
1073 doomed_by : `list` [ `str` ]
1074 List to append messages indicating why the query is doomed to
1075 yield no results.
1077 Returns
1078 -------
1079 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ]
1080 Dictionary mapping parent dataset type to `list` of components
1081 matched for that dataset type (or `None` for the parent itself).
1082 collections : `CollectionWildcard`
1083 Processed collection expression.
1084 """
1085 composition: dict[DatasetType, list[str | None]] = {}
1086 if datasets is not None:
1087 if not collections:
1088 if not self.defaults.collections:
1089 raise NoDefaultCollectionError("No collections, and no registry default collections.")
1090 collections = self.defaults.collections
1091 else:
1092 collections = CollectionWildcard.from_expression(collections)
1093 if mode == "find_first" and collections.patterns:
1094 raise TypeError(
1095 f"Collection pattern(s) {collections.patterns} not allowed in this context."
1096 )
1097 missing: list[str] = []
1098 composition = self._managers.datasets.resolve_wildcard(
1099 datasets, components=components, missing=missing, explicit_only=(mode == "constrain")
1100 )
1101 if missing and mode == "constrain":
1102 # After v26 this should raise MissingDatasetTypeError, to be
1103 # implemented on DM-36303.
1104 warnings.warn(
1105 f"Dataset type(s) {missing} are not registered; this will be an error after v26.",
1106 FutureWarning,
1107 )
1108 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing)
1109 elif collections:
1110 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.")
1111 return composition, collections
1113 def queryDatasets(
1114 self,
1115 datasetType: Any,
1116 *,
1117 collections: Any = None,
1118 dimensions: Optional[Iterable[Union[Dimension, str]]] = None,
1119 dataId: Optional[DataId] = None,
1120 where: str = "",
1121 findFirst: bool = False,
1122 components: Optional[bool] = None,
1123 bind: Optional[Mapping[str, Any]] = None,
1124 check: bool = True,
1125 **kwargs: Any,
1126 ) -> queries.DatasetQueryResults:
1127 # Docstring inherited from lsst.daf.butler.registry.Registry
1128 doomed_by: list[str] = []
1129 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1130 dataset_composition, collections = self._standardize_query_dataset_args(
1131 datasetType,
1132 collections,
1133 components,
1134 mode="find_first" if findFirst else "find_all",
1135 doomed_by=doomed_by,
1136 )
1137 parent_results: list[queries.ParentDatasetQueryResults] = []
1138 for parent_dataset_type, components_for_parent in dataset_composition.items():
1139 # The full set of dimensions in the query is the combination of
1140 # those needed for the DatasetType and those explicitly requested,
1141 # if any.
1142 dimension_names = set(parent_dataset_type.dimensions.names)
1143 if dimensions is not None:
1144 dimension_names.update(self.dimensions.extract(dimensions).names)
1145 # Construct the summary structure needed to construct a
1146 # QueryBuilder.
1147 summary = queries.QuerySummary(
1148 requested=DimensionGraph(self.dimensions, names=dimension_names),
1149 data_id=data_id,
1150 expression=where,
1151 bind=bind,
1152 defaults=self.defaults.dataId,
1153 check=check,
1154 datasets=[parent_dataset_type],
1155 )
1156 builder = self._makeQueryBuilder(summary)
1157 # Add the dataset subquery to the query, telling the QueryBuilder
1158 # to include the rank of the selected collection in the results
1159 # only if we need to findFirst. Note that if any of the
1160 # collections are actually wildcard expressions, and
1161 # findFirst=True, this will raise TypeError for us.
1162 builder.joinDataset(parent_dataset_type, collections, isResult=True, findFirst=findFirst)
1163 query = builder.finish()
1164 parent_results.append(
1165 queries.ParentDatasetQueryResults(
1166 query, parent_dataset_type, components=components_for_parent
1167 )
1168 )
1169 if not parent_results:
1170 doomed_by.extend(
1171 f"No registered dataset type matching {t!r} found, so no matching datasets can "
1172 "exist in any collection."
1173 for t in ensure_iterable(datasetType)
1174 )
1175 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by)
1176 elif len(parent_results) == 1:
1177 return parent_results[0]
1178 else:
1179 return queries.ChainedDatasetQueryResults(parent_results)
1181 def queryDataIds(
1182 self,
1183 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str],
1184 *,
1185 dataId: Optional[DataId] = None,
1186 datasets: Any = None,
1187 collections: Any = None,
1188 where: str = "",
1189 components: Optional[bool] = None,
1190 bind: Optional[Mapping[str, Any]] = None,
1191 check: bool = True,
1192 **kwargs: Any,
1193 ) -> queries.DataCoordinateQueryResults:
1194 # Docstring inherited from lsst.daf.butler.registry.Registry
1195 dimensions = ensure_iterable(dimensions)
1196 requestedDimensions = self.dimensions.extract(dimensions)
1197 doomed_by: list[str] = []
1198 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1199 dataset_composition, collections = self._standardize_query_dataset_args(
1200 datasets, collections, components, doomed_by=doomed_by
1201 )
1202 summary = queries.QuerySummary(
1203 requested=requestedDimensions,
1204 data_id=data_id,
1205 expression=where,
1206 bind=bind,
1207 defaults=self.defaults.dataId,
1208 check=check,
1209 datasets=dataset_composition.keys(),
1210 )
1211 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
1212 for datasetType in dataset_composition.keys():
1213 builder.joinDataset(datasetType, collections, isResult=False)
1214 query = builder.finish()
1216 return queries.DataCoordinateQueryResults(query)
1218 def queryDimensionRecords(
1219 self,
1220 element: Union[DimensionElement, str],
1221 *,
1222 dataId: Optional[DataId] = None,
1223 datasets: Any = None,
1224 collections: Any = None,
1225 where: str = "",
1226 components: Optional[bool] = None,
1227 bind: Optional[Mapping[str, Any]] = None,
1228 check: bool = True,
1229 **kwargs: Any,
1230 ) -> queries.DimensionRecordQueryResults:
1231 # Docstring inherited from lsst.daf.butler.registry.Registry
1232 if not isinstance(element, DimensionElement):
1233 try:
1234 element = self.dimensions[element]
1235 except KeyError as e:
1236 raise DimensionNameError(
1237 f"No such dimension '{element}', available dimensions: "
1238 + str(self.dimensions.getStaticElements())
1239 ) from e
1240 doomed_by: list[str] = []
1241 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1242 dataset_composition, collections = self._standardize_query_dataset_args(
1243 datasets, collections, components, doomed_by=doomed_by
1244 )
1245 summary = queries.QuerySummary(
1246 requested=element.graph,
1247 data_id=data_id,
1248 expression=where,
1249 bind=bind,
1250 defaults=self.defaults.dataId,
1251 check=check,
1252 datasets=dataset_composition.keys(),
1253 )
1254 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
1255 for datasetType in dataset_composition.keys():
1256 builder.joinDataset(datasetType, collections, isResult=False)
1257 query = builder.finish().with_record_columns(element)
1258 return queries.DatabaseDimensionRecordQueryResults(query, element)
1260 def queryDatasetAssociations(
1261 self,
1262 datasetType: Union[str, DatasetType],
1263 collections: Any = ...,
1264 *,
1265 collectionTypes: Iterable[CollectionType] = CollectionType.all(),
1266 flattenChains: bool = False,
1267 ) -> Iterator[DatasetAssociation]:
1268 # Docstring inherited from lsst.daf.butler.registry.Registry
1269 if collections is None:
1270 if not self.defaults.collections:
1271 raise NoDefaultCollectionError(
1272 "No collections provided to queryDatasetAssociations, "
1273 "and no defaults from registry construction."
1274 )
1275 collections = self.defaults.collections
1276 collections = CollectionWildcard.from_expression(collections)
1277 backend = queries.SqlQueryBackend(self._db, self._managers)
1278 parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False)
1279 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan")
1280 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
1281 for parent_collection_record in backend.resolve_collection_wildcard(
1282 collections,
1283 collection_types=frozenset(collectionTypes),
1284 flatten_chains=flattenChains,
1285 ):
1286 # Resolve this possibly-chained collection into a list of
1287 # non-CHAINED collections that actually hold datasets of this
1288 # type.
1289 candidate_collection_records = backend.resolve_dataset_collections(
1290 parent_dataset_type,
1291 CollectionWildcard.from_names([parent_collection_record.name]),
1292 allow_calibration_collections=True,
1293 governor_constraints={},
1294 )
1295 if not candidate_collection_records:
1296 continue
1297 with backend.context() as context:
1298 relation = backend.make_dataset_query_relation(
1299 parent_dataset_type,
1300 candidate_collection_records,
1301 columns={"dataset_id", "run", "timespan", "collection"},
1302 context=context,
1303 )
1304 reader = queries.DatasetRefReader(
1305 parent_dataset_type,
1306 translate_collection=lambda k: self._managers.collections[k].name,
1307 full=False,
1308 )
1309 for row in context.fetch_iterable(relation):
1310 ref = reader.read(row)
1311 collection_record = self._managers.collections[row[collection_tag]]
1312 if collection_record.type is CollectionType.CALIBRATION:
1313 timespan = row[timespan_tag]
1314 else:
1315 # For backwards compatibility and (possibly?) user
1316 # convenience we continue to define the timespan of a
1317 # DatasetAssociation row for a non-CALIBRATION
1318 # collection to be None rather than a fully unbounded
1319 # timespan.
1320 timespan = None
1321 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan)
1323 storageClasses: StorageClassFactory
1324 """All storage classes known to the registry (`StorageClassFactory`).
1325 """