Coverage for python/lsst/daf/butler/registries/sql.py: 12%
508 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-07 10:26 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-07 10:26 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("SqlRegistry",)
26import contextlib
27import logging
28import warnings
29from typing import (
30 TYPE_CHECKING,
31 Any,
32 Dict,
33 Iterable,
34 Iterator,
35 List,
36 Literal,
37 Mapping,
38 Optional,
39 Sequence,
40 Set,
41 Union,
42 cast,
43)
45import sqlalchemy
46from lsst.daf.relation import LeafRelation, Relation
47from lsst.resources import ResourcePathExpression
48from lsst.utils.iteration import ensure_iterable
50from ..core import (
51 Config,
52 DataCoordinate,
53 DataId,
54 DatasetAssociation,
55 DatasetColumnTag,
56 DatasetId,
57 DatasetRef,
58 DatasetType,
59 Dimension,
60 DimensionConfig,
61 DimensionElement,
62 DimensionGraph,
63 DimensionRecord,
64 DimensionUniverse,
65 NamedKeyMapping,
66 NameLookupMapping,
67 Progress,
68 StorageClassFactory,
69 Timespan,
70 ddl,
71)
72from ..core.utils import transactional
73from ..registry import (
74 ArgumentError,
75 CollectionExpressionError,
76 CollectionSummary,
77 CollectionType,
78 CollectionTypeError,
79 ConflictingDefinitionError,
80 DataIdValueError,
81 DatasetTypeError,
82 DimensionNameError,
83 InconsistentDataIdError,
84 NoDefaultCollectionError,
85 OrphanedRecordError,
86 Registry,
87 RegistryConfig,
88 RegistryDefaults,
89 queries,
90)
91from ..registry.interfaces import ChainedCollectionRecord, DatasetIdFactory, DatasetIdGenEnum, RunRecord
92from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes
93from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard
95if TYPE_CHECKING: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true
96 from .._butlerConfig import ButlerConfig
97 from ..registry.interfaces import CollectionRecord, Database, DatastoreRegistryBridgeManager
100_LOG = logging.getLogger(__name__)
103class SqlRegistry(Registry):
104 """Registry implementation based on SQLAlchemy.
106 Parameters
107 ----------
108 database : `Database`
109 Database instance to store Registry.
110 defaults : `RegistryDefaults`
111 Default collection search path and/or output `~CollectionType.RUN`
112 collection.
113 managers : `RegistryManagerInstances`
114 All the managers required for this registry.
115 """
117 defaultConfigFile: Optional[str] = None
118 """Path to configuration defaults. Accessed within the ``configs`` resource
119 or relative to a search path. Can be None if no defaults specified.
120 """
122 @classmethod
123 def createFromConfig(
124 cls,
125 config: Optional[Union[RegistryConfig, str]] = None,
126 dimensionConfig: Optional[Union[DimensionConfig, str]] = None,
127 butlerRoot: Optional[ResourcePathExpression] = None,
128 ) -> Registry:
129 """Create registry database and return `SqlRegistry` instance.
131 This method initializes database contents, database must be empty
132 prior to calling this method.
134 Parameters
135 ----------
136 config : `RegistryConfig` or `str`, optional
137 Registry configuration, if missing then default configuration will
138 be loaded from registry.yaml.
139 dimensionConfig : `DimensionConfig` or `str`, optional
140 Dimensions configuration, if missing then default configuration
141 will be loaded from dimensions.yaml.
142 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
143 Path to the repository root this `SqlRegistry` will manage.
145 Returns
146 -------
147 registry : `SqlRegistry`
148 A new `SqlRegistry` instance.
149 """
150 config = cls.forceRegistryConfig(config)
151 config.replaceRoot(butlerRoot)
153 if isinstance(dimensionConfig, str):
154 dimensionConfig = DimensionConfig(dimensionConfig)
155 elif dimensionConfig is None:
156 dimensionConfig = DimensionConfig()
157 elif not isinstance(dimensionConfig, DimensionConfig):
158 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}")
160 DatabaseClass = config.getDatabaseClass()
161 database = DatabaseClass.fromUri(
162 str(config.connectionString), origin=config.get("origin", 0), namespace=config.get("namespace")
163 )
164 managerTypes = RegistryManagerTypes.fromConfig(config)
165 managers = managerTypes.makeRepo(database, dimensionConfig)
166 return cls(database, RegistryDefaults(), managers)
168 @classmethod
169 def fromConfig(
170 cls,
171 config: Union[ButlerConfig, RegistryConfig, Config, str],
172 butlerRoot: Optional[ResourcePathExpression] = None,
173 writeable: bool = True,
174 defaults: Optional[RegistryDefaults] = None,
175 ) -> Registry:
176 """Create `Registry` subclass instance from `config`.
178 Registry database must be initialized prior to calling this method.
180 Parameters
181 ----------
182 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
183 Registry configuration
184 butlerRoot : `lsst.resources.ResourcePathExpression`, optional
185 Path to the repository root this `Registry` will manage.
186 writeable : `bool`, optional
187 If `True` (default) create a read-write connection to the database.
188 defaults : `RegistryDefaults`, optional
189 Default collection search path and/or output `~CollectionType.RUN`
190 collection.
192 Returns
193 -------
194 registry : `SqlRegistry` (subclass)
195 A new `SqlRegistry` subclass instance.
196 """
197 config = cls.forceRegistryConfig(config)
198 config.replaceRoot(butlerRoot)
199 DatabaseClass = config.getDatabaseClass()
200 database = DatabaseClass.fromUri(
201 str(config.connectionString),
202 origin=config.get("origin", 0),
203 namespace=config.get("namespace"),
204 writeable=writeable,
205 )
206 managerTypes = RegistryManagerTypes.fromConfig(config)
207 with database.session():
208 managers = managerTypes.loadRepo(database)
209 if defaults is None:
210 defaults = RegistryDefaults()
211 return cls(database, defaults, managers)
213 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances):
214 self._db = database
215 self._managers = managers
216 self.storageClasses = StorageClassFactory()
217 # Intentionally invoke property setter to initialize defaults. This
218 # can only be done after most of the rest of Registry has already been
219 # initialized, and must be done before the property getter is used.
220 self.defaults = defaults
221 # In the future DatasetIdFactory may become configurable and this
222 # instance will need to be shared with datasets manager.
223 self.datasetIdFactory = DatasetIdFactory()
225 def __str__(self) -> str:
226 return str(self._db)
228 def __repr__(self) -> str:
229 return f"SqlRegistry({self._db!r}, {self.dimensions!r})"
231 def isWriteable(self) -> bool:
232 # Docstring inherited from lsst.daf.butler.registry.Registry
233 return self._db.isWriteable()
235 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry:
236 # Docstring inherited from lsst.daf.butler.registry.Registry
237 if defaults is None:
238 # No need to copy, because `RegistryDefaults` is immutable; we
239 # effectively copy on write.
240 defaults = self.defaults
241 return type(self)(self._db, defaults, self._managers)
243 @property
244 def dimensions(self) -> DimensionUniverse:
245 # Docstring inherited from lsst.daf.butler.registry.Registry
246 return self._managers.dimensions.universe
248 def refresh(self) -> None:
249 # Docstring inherited from lsst.daf.butler.registry.Registry
250 with self._db.transaction():
251 self._managers.refresh()
253 @contextlib.contextmanager
254 def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
255 # Docstring inherited from lsst.daf.butler.registry.Registry
256 try:
257 with self._db.transaction(savepoint=savepoint):
258 yield
259 except BaseException:
260 # TODO: this clears the caches sometimes when we wouldn't actually
261 # need to. Can we avoid that?
262 self._managers.dimensions.clearCaches()
263 raise
265 def resetConnectionPool(self) -> None:
266 """Reset SQLAlchemy connection pool for `SqlRegistry` database.
268 This operation is useful when using registry with fork-based
269 multiprocessing. To use registry across fork boundary one has to make
270 sure that there are no currently active connections (no session or
271 transaction is in progress) and connection pool is reset using this
272 method. This method should be called by the child process immediately
273 after the fork.
274 """
275 self._db._engine.dispose()
277 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None:
278 """Add an opaque (to the `Registry`) table for use by a `Datastore` or
279 other data repository client.
281 Opaque table records can be added via `insertOpaqueData`, retrieved via
282 `fetchOpaqueData`, and removed via `deleteOpaqueData`.
284 Parameters
285 ----------
286 tableName : `str`
287 Logical name of the opaque table. This may differ from the
288 actual name used in the database by a prefix and/or suffix.
289 spec : `ddl.TableSpec`
290 Specification for the table to be added.
291 """
292 self._managers.opaque.register(tableName, spec)
294 @transactional
295 def insertOpaqueData(self, tableName: str, *data: dict) -> None:
296 """Insert records into an opaque table.
298 Parameters
299 ----------
300 tableName : `str`
301 Logical name of the opaque table. Must match the name used in a
302 previous call to `registerOpaqueTable`.
303 data
304 Each additional positional argument is a dictionary that represents
305 a single row to be added.
306 """
307 self._managers.opaque[tableName].insert(*data)
309 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[dict]:
310 """Retrieve records from an opaque table.
312 Parameters
313 ----------
314 tableName : `str`
315 Logical name of the opaque table. Must match the name used in a
316 previous call to `registerOpaqueTable`.
317 where
318 Additional keyword arguments are interpreted as equality
319 constraints that restrict the returned rows (combined with AND);
320 keyword arguments are column names and values are the values they
321 must have.
323 Yields
324 ------
325 row : `dict`
326 A dictionary representing a single result row.
327 """
328 yield from self._managers.opaque[tableName].fetch(**where)
330 @transactional
331 def deleteOpaqueData(self, tableName: str, **where: Any) -> None:
332 """Remove records from an opaque table.
334 Parameters
335 ----------
336 tableName : `str`
337 Logical name of the opaque table. Must match the name used in a
338 previous call to `registerOpaqueTable`.
339 where
340 Additional keyword arguments are interpreted as equality
341 constraints that restrict the deleted rows (combined with AND);
342 keyword arguments are column names and values are the values they
343 must have.
344 """
345 self._managers.opaque[tableName].delete(where.keys(), where)
347 def registerCollection(
348 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None
349 ) -> bool:
350 # Docstring inherited from lsst.daf.butler.registry.Registry
351 _, registered = self._managers.collections.register(name, type, doc=doc)
352 return registered
354 def getCollectionType(self, name: str) -> CollectionType:
355 # Docstring inherited from lsst.daf.butler.registry.Registry
356 return self._managers.collections.find(name).type
358 def _get_collection_record(self, name: str) -> CollectionRecord:
359 # Docstring inherited from lsst.daf.butler.registry.Registry
360 return self._managers.collections.find(name)
362 def registerRun(self, name: str, doc: Optional[str] = None) -> bool:
363 # Docstring inherited from lsst.daf.butler.registry.Registry
364 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc)
365 return registered
367 @transactional
368 def removeCollection(self, name: str) -> None:
369 # Docstring inherited from lsst.daf.butler.registry.Registry
370 self._managers.collections.remove(name)
372 def getCollectionChain(self, parent: str) -> tuple[str, ...]:
373 # Docstring inherited from lsst.daf.butler.registry.Registry
374 record = self._managers.collections.find(parent)
375 if record.type is not CollectionType.CHAINED:
376 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
377 assert isinstance(record, ChainedCollectionRecord)
378 return record.children
380 @transactional
381 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None:
382 # Docstring inherited from lsst.daf.butler.registry.Registry
383 record = self._managers.collections.find(parent)
384 if record.type is not CollectionType.CHAINED:
385 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
386 assert isinstance(record, ChainedCollectionRecord)
387 children = CollectionWildcard.from_expression(children).require_ordered()
388 if children != record.children or flatten:
389 record.update(self._managers.collections, children, flatten=flatten)
391 def getCollectionParentChains(self, collection: str) -> Set[str]:
392 # Docstring inherited from lsst.daf.butler.registry.Registry
393 return {
394 record.name
395 for record in self._managers.collections.getParentChains(
396 self._managers.collections.find(collection).key
397 )
398 }
400 def getCollectionDocumentation(self, collection: str) -> Optional[str]:
401 # Docstring inherited from lsst.daf.butler.registry.Registry
402 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key)
404 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None:
405 # Docstring inherited from lsst.daf.butler.registry.Registry
406 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc)
408 def getCollectionSummary(self, collection: str) -> CollectionSummary:
409 # Docstring inherited from lsst.daf.butler.registry.Registry
410 record = self._managers.collections.find(collection)
411 return self._managers.datasets.getCollectionSummary(record)
413 def registerDatasetType(self, datasetType: DatasetType) -> bool:
414 # Docstring inherited from lsst.daf.butler.registry.Registry
415 _, inserted = self._managers.datasets.register(datasetType)
416 return inserted
418 def removeDatasetType(self, name: str) -> None:
419 # Docstring inherited from lsst.daf.butler.registry.Registry
420 self._managers.datasets.remove(name)
422 def getDatasetType(self, name: str) -> DatasetType:
423 # Docstring inherited from lsst.daf.butler.registry.Registry
424 parent_name, component = DatasetType.splitDatasetTypeName(name)
425 storage = self._managers.datasets[parent_name]
426 if component is None:
427 return storage.datasetType
428 else:
429 return storage.datasetType.makeComponentDatasetType(component)
431 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool:
432 # Docstring inherited from lsst.daf.butler.registry.Registry
433 return self._managers.datasets.supportsIdGenerationMode(mode)
435 def findDataset(
436 self,
437 datasetType: Union[DatasetType, str],
438 dataId: Optional[DataId] = None,
439 *,
440 collections: Any = None,
441 timespan: Optional[Timespan] = None,
442 **kwargs: Any,
443 ) -> Optional[DatasetRef]:
444 # Docstring inherited from lsst.daf.butler.registry.Registry
445 if collections is None:
446 if not self.defaults.collections:
447 raise NoDefaultCollectionError(
448 "No collections provided to findDataset, and no defaults from registry construction."
449 )
450 collections = self.defaults.collections
451 backend = queries.SqlQueryBackend(self._db, self._managers)
452 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True)
453 matched_collections = backend.resolve_collection_wildcard(collection_wildcard)
454 parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard(
455 datasetType, components_deprecated=False
456 )
457 if len(components) > 1:
458 raise DatasetTypeError(
459 f"findDataset requires exactly one dataset type; got multiple components {components} "
460 f"for parent dataset type {parent_dataset_type.name}."
461 )
462 component = components[0]
463 dataId = DataCoordinate.standardize(
464 dataId,
465 graph=parent_dataset_type.dimensions,
466 universe=self.dimensions,
467 defaults=self.defaults.dataId,
468 **kwargs,
469 )
470 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.graph.governors.names}
471 (filtered_collections,) = backend.filter_dataset_collections(
472 [parent_dataset_type],
473 matched_collections,
474 governor_constraints=governor_constraints,
475 ).values()
476 if not filtered_collections:
477 return None
478 tail_collections: list[CollectionRecord] = []
479 if timespan is None:
480 for n, collection_record in enumerate(filtered_collections):
481 if collection_record.type is CollectionType.CALIBRATION:
482 tail_collections.extend(filtered_collections[n:])
483 del filtered_collections[n:]
484 break
485 if filtered_collections:
486 requested_columns = {"dataset_id", "run", "collection"}
487 with backend.context() as context:
488 predicate = context.make_data_coordinate_predicate(
489 dataId.subset(parent_dataset_type.dimensions), full=False
490 )
491 if timespan is not None:
492 requested_columns.add("timespan")
493 predicate = predicate.logical_and(
494 context.make_timespan_overlap_predicate(
495 DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan
496 )
497 )
498 relation = backend.make_dataset_query_relation(
499 parent_dataset_type, filtered_collections, requested_columns, context
500 ).with_rows_satisfying(predicate)
501 rows = list(context.fetch_iterable(relation))
502 else:
503 rows = []
504 if not rows:
505 if tail_collections:
506 msg = (
507 f"Cannot search for dataset '{parent_dataset_type.name}' in CALIBRATION collection "
508 f"{tail_collections[0].name} without an input timespan."
509 )
510 if len(tail_collections) > 1:
511 remainder_names = [", ".join(c.name for c in tail_collections[1:])]
512 msg += f" This also blocks searching collections [{remainder_names}] that follow it."
513 raise TypeError(msg)
514 return None
515 elif len(rows) == 1:
516 best_row = rows[0]
517 else:
518 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)}
519 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
520 row_iter = iter(rows)
521 best_row = next(row_iter)
522 best_rank = rank_by_collection_key[best_row[collection_tag]]
523 have_tie = False
524 for row in row_iter:
525 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank:
526 best_row = row
527 best_rank = rank
528 have_tie = False
529 elif rank == best_rank:
530 have_tie = True
531 assert timespan is not None, "Rank ties should be impossible given DB constraints."
532 if have_tie:
533 raise LookupError(
534 f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections "
535 f"{collection_wildcard.strings} with timespan {timespan}."
536 )
537 reader = queries.DatasetRefReader(
538 parent_dataset_type,
539 translate_collection=lambda k: self._managers.collections[k].name,
540 )
541 ref = reader.read(best_row, data_id=dataId)
542 if component is not None:
543 ref = ref.makeComponentRef(component)
544 return ref
546 @transactional
547 def insertDatasets(
548 self,
549 datasetType: Union[DatasetType, str],
550 dataIds: Iterable[DataId],
551 run: Optional[str] = None,
552 expand: bool = True,
553 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
554 ) -> List[DatasetRef]:
555 # Docstring inherited from lsst.daf.butler.registry.Registry
556 if isinstance(datasetType, DatasetType):
557 storage = self._managers.datasets.find(datasetType.name)
558 if storage is None:
559 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
560 else:
561 storage = self._managers.datasets.find(datasetType)
562 if storage is None:
563 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.")
564 if run is None:
565 if self.defaults.run is None:
566 raise NoDefaultCollectionError(
567 "No run provided to insertDatasets, and no default from registry construction."
568 )
569 run = self.defaults.run
570 runRecord = self._managers.collections.find(run)
571 if runRecord.type is not CollectionType.RUN:
572 raise CollectionTypeError(
573 f"Given collection is of type {runRecord.type.name}; RUN collection required."
574 )
575 assert isinstance(runRecord, RunRecord)
576 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
577 if expand:
578 expandedDataIds = [
579 self.expandDataId(dataId, graph=storage.datasetType.dimensions)
580 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs")
581 ]
582 else:
583 expandedDataIds = [
584 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds
585 ]
586 try:
587 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode))
588 if self._managers.obscore:
589 context = queries.SqlQueryContext(self._db, self._managers.column_types)
590 self._managers.obscore.add_datasets(refs, context)
591 except sqlalchemy.exc.IntegrityError as err:
592 raise ConflictingDefinitionError(
593 "A database constraint failure was triggered by inserting "
594 f"one or more datasets of type {storage.datasetType} into "
595 f"collection '{run}'. "
596 "This probably means a dataset with the same data ID "
597 "and dataset type already exists, but it may also mean a "
598 "dimension row is missing."
599 ) from err
600 return refs
602 @transactional
603 def _importDatasets(
604 self,
605 datasets: Iterable[DatasetRef],
606 expand: bool = True,
607 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
608 reuseIds: bool = False,
609 ) -> List[DatasetRef]:
610 # Docstring inherited from lsst.daf.butler.registry.Registry
611 datasets = list(datasets)
612 if not datasets:
613 # nothing to do
614 return []
616 # find dataset type
617 datasetTypes = set(dataset.datasetType for dataset in datasets)
618 if len(datasetTypes) != 1:
619 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}")
620 datasetType = datasetTypes.pop()
622 # get storage handler for this dataset type
623 storage = self._managers.datasets.find(datasetType.name)
624 if storage is None:
625 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
627 # find run name
628 runs = set(dataset.run for dataset in datasets)
629 if len(runs) != 1:
630 raise ValueError(f"Multiple run names in input datasets: {runs}")
631 run = runs.pop()
632 if run is None:
633 if self.defaults.run is None:
634 raise NoDefaultCollectionError(
635 "No run provided to ingestDatasets, and no default from registry construction."
636 )
637 run = self.defaults.run
639 runRecord = self._managers.collections.find(run)
640 if runRecord.type is not CollectionType.RUN:
641 raise CollectionTypeError(
642 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};"
643 " RUN collection required."
644 )
645 assert isinstance(runRecord, RunRecord)
647 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
648 if expand:
649 expandedDatasets = [
650 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions))
651 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs")
652 ]
653 else:
654 expandedDatasets = [
655 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True)
656 for dataset in datasets
657 ]
659 try:
660 refs = list(storage.import_(runRecord, expandedDatasets, idGenerationMode, reuseIds))
661 if self._managers.obscore:
662 context = queries.SqlQueryContext(self._db, self._managers.column_types)
663 self._managers.obscore.add_datasets(refs, context)
664 except sqlalchemy.exc.IntegrityError as err:
665 raise ConflictingDefinitionError(
666 "A database constraint failure was triggered by inserting "
667 f"one or more datasets of type {storage.datasetType} into "
668 f"collection '{run}'. "
669 "This probably means a dataset with the same data ID "
670 "and dataset type already exists, but it may also mean a "
671 "dimension row is missing."
672 ) from err
673 return refs
675 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]:
676 # Docstring inherited from lsst.daf.butler.registry.Registry
677 return self._managers.datasets.getDatasetRef(id)
679 @transactional
680 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
681 # Docstring inherited from lsst.daf.butler.registry.Registry
682 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG)
683 for datasetType, refsForType in progress.iter_item_chunks(
684 DatasetRef.groupByType(refs).items(), desc="Removing datasets by type"
685 ):
686 storage = self._managers.datasets[datasetType.name]
687 try:
688 storage.delete(refsForType)
689 except sqlalchemy.exc.IntegrityError as err:
690 raise OrphanedRecordError(
691 "One or more datasets is still present in one or more Datastores."
692 ) from err
694 @transactional
695 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
696 # Docstring inherited from lsst.daf.butler.registry.Registry
697 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG)
698 collectionRecord = self._managers.collections.find(collection)
699 if collectionRecord.type is not CollectionType.TAGGED:
700 raise CollectionTypeError(
701 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED."
702 )
703 for datasetType, refsForType in progress.iter_item_chunks(
704 DatasetRef.groupByType(refs).items(), desc="Associating datasets by type"
705 ):
706 storage = self._managers.datasets[datasetType.name]
707 try:
708 storage.associate(collectionRecord, refsForType)
709 if self._managers.obscore:
710 # If a TAGGED collection is being monitored by ObsCore
711 # manager then we may need to save the dataset.
712 context = queries.SqlQueryContext(self._db, self._managers.column_types)
713 self._managers.obscore.associate(refsForType, collectionRecord, context)
714 except sqlalchemy.exc.IntegrityError as err:
715 raise ConflictingDefinitionError(
716 f"Constraint violation while associating dataset of type {datasetType.name} with "
717 f"collection {collection}. This probably means that one or more datasets with the same "
718 "dataset type and data ID already exist in the collection, but it may also indicate "
719 "that the datasets do not exist."
720 ) from err
722 @transactional
723 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
724 # Docstring inherited from lsst.daf.butler.registry.Registry
725 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG)
726 collectionRecord = self._managers.collections.find(collection)
727 if collectionRecord.type is not CollectionType.TAGGED:
728 raise CollectionTypeError(
729 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED."
730 )
731 for datasetType, refsForType in progress.iter_item_chunks(
732 DatasetRef.groupByType(refs).items(), desc="Disassociating datasets by type"
733 ):
734 storage = self._managers.datasets[datasetType.name]
735 storage.disassociate(collectionRecord, refsForType)
736 if self._managers.obscore:
737 self._managers.obscore.disassociate(refsForType, collectionRecord)
739 @transactional
740 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None:
741 # Docstring inherited from lsst.daf.butler.registry.Registry
742 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG)
743 collectionRecord = self._managers.collections.find(collection)
744 for datasetType, refsForType in progress.iter_item_chunks(
745 DatasetRef.groupByType(refs).items(), desc="Certifying datasets by type"
746 ):
747 storage = self._managers.datasets[datasetType.name]
748 storage.certify(
749 collectionRecord,
750 refsForType,
751 timespan,
752 context=queries.SqlQueryContext(self._db, self._managers.column_types),
753 )
755 @transactional
756 def decertify(
757 self,
758 collection: str,
759 datasetType: Union[str, DatasetType],
760 timespan: Timespan,
761 *,
762 dataIds: Optional[Iterable[DataId]] = None,
763 ) -> None:
764 # Docstring inherited from lsst.daf.butler.registry.Registry
765 collectionRecord = self._managers.collections.find(collection)
766 if isinstance(datasetType, str):
767 storage = self._managers.datasets[datasetType]
768 else:
769 storage = self._managers.datasets[datasetType.name]
770 standardizedDataIds = None
771 if dataIds is not None:
772 standardizedDataIds = [
773 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds
774 ]
775 storage.decertify(
776 collectionRecord,
777 timespan,
778 dataIds=standardizedDataIds,
779 context=queries.SqlQueryContext(self._db, self._managers.column_types),
780 )
782 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
783 """Return an object that allows a new `Datastore` instance to
784 communicate with this `Registry`.
786 Returns
787 -------
788 manager : `DatastoreRegistryBridgeManager`
789 Object that mediates communication between this `Registry` and its
790 associated datastores.
791 """
792 return self._managers.datastores
794 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
795 # Docstring inherited from lsst.daf.butler.registry.Registry
796 return self._managers.datastores.findDatastores(ref)
798 def expandDataId(
799 self,
800 dataId: Optional[DataId] = None,
801 *,
802 graph: Optional[DimensionGraph] = None,
803 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None,
804 withDefaults: bool = True,
805 **kwargs: Any,
806 ) -> DataCoordinate:
807 # Docstring inherited from lsst.daf.butler.registry.Registry
808 if not withDefaults:
809 defaults = None
810 else:
811 defaults = self.defaults.dataId
812 try:
813 standardized = DataCoordinate.standardize(
814 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs
815 )
816 except KeyError as exc:
817 # This means either kwargs have some odd name or required
818 # dimension is missing.
819 raise DimensionNameError(str(exc)) from exc
820 if standardized.hasRecords():
821 return standardized
822 if records is None:
823 records = {}
824 elif isinstance(records, NamedKeyMapping):
825 records = records.byName()
826 else:
827 records = dict(records)
828 if isinstance(dataId, DataCoordinate) and dataId.hasRecords():
829 records.update(dataId.records.byName())
830 keys = standardized.byName()
831 context = queries.SqlQueryContext(self._db, self._managers.column_types)
832 for element in standardized.graph.primaryKeyTraversalOrder:
833 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL
834 if record is ...:
835 if isinstance(element, Dimension) and keys.get(element.name) is None:
836 if element in standardized.graph.required:
837 raise DimensionNameError(
838 f"No value or null value for required dimension {element.name}."
839 )
840 keys[element.name] = None
841 record = None
842 else:
843 storage = self._managers.dimensions[element]
844 record = storage.fetch_one(DataCoordinate.standardize(keys, graph=element.graph), context)
845 records[element.name] = record
846 if record is not None:
847 for d in element.implied:
848 value = getattr(record, d.name)
849 if keys.setdefault(d.name, value) != value:
850 raise InconsistentDataIdError(
851 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, "
852 f"but {element.name} implies {d.name}={value!r}."
853 )
854 else:
855 if element in standardized.graph.required:
856 raise DataIdValueError(
857 f"Could not fetch record for required dimension {element.name} via keys {keys}."
858 )
859 if element.alwaysJoin:
860 raise InconsistentDataIdError(
861 f"Could not fetch record for element {element.name} via keys {keys}, ",
862 "but it is marked alwaysJoin=True; this means one or more dimensions are not "
863 "related.",
864 )
865 for d in element.implied:
866 keys.setdefault(d.name, None)
867 records.setdefault(d.name, None)
868 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records)
870 def insertDimensionData(
871 self,
872 element: Union[DimensionElement, str],
873 *data: Union[Mapping[str, Any], DimensionRecord],
874 conform: bool = True,
875 replace: bool = False,
876 skip_existing: bool = False,
877 ) -> None:
878 # Docstring inherited from lsst.daf.butler.registry.Registry
879 if conform:
880 if isinstance(element, str):
881 element = self.dimensions[element]
882 records = [
883 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data
884 ]
885 else:
886 # Ignore typing since caller said to trust them with conform=False.
887 records = data # type: ignore
888 storage = self._managers.dimensions[element]
889 storage.insert(*records, replace=replace, skip_existing=skip_existing)
891 def syncDimensionData(
892 self,
893 element: Union[DimensionElement, str],
894 row: Union[Mapping[str, Any], DimensionRecord],
895 conform: bool = True,
896 update: bool = False,
897 ) -> Union[bool, Dict[str, Any]]:
898 # Docstring inherited from lsst.daf.butler.registry.Registry
899 if conform:
900 if isinstance(element, str):
901 element = self.dimensions[element]
902 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row)
903 else:
904 # Ignore typing since caller said to trust them with conform=False.
905 record = row # type: ignore
906 storage = self._managers.dimensions[element]
907 return storage.sync(record, update=update)
909 def queryDatasetTypes(
910 self,
911 expression: Any = ...,
912 *,
913 components: Optional[bool] = None,
914 missing: Optional[List[str]] = None,
915 ) -> Iterable[DatasetType]:
916 # Docstring inherited from lsst.daf.butler.registry.Registry
917 wildcard = DatasetTypeWildcard.from_expression(expression)
918 composition_dict = self._managers.datasets.resolve_wildcard(
919 wildcard,
920 components=components,
921 missing=missing,
922 )
923 result: list[DatasetType] = []
924 for parent_dataset_type, components_for_parent in composition_dict.items():
925 result.extend(
926 parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type
927 for c in components_for_parent
928 )
929 return result
931 def queryCollections(
932 self,
933 expression: Any = ...,
934 datasetType: Optional[DatasetType] = None,
935 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(),
936 flattenChains: bool = False,
937 includeChains: Optional[bool] = None,
938 ) -> Sequence[str]:
939 # Docstring inherited from lsst.daf.butler.registry.Registry
941 # Right now the datasetTypes argument is completely ignored, but that
942 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up
943 # ticket will take care of that.
944 try:
945 wildcard = CollectionWildcard.from_expression(expression)
946 except TypeError as exc:
947 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc
948 collectionTypes = ensure_iterable(collectionTypes)
949 return [
950 record.name
951 for record in self._managers.collections.resolve_wildcard(
952 wildcard,
953 collection_types=frozenset(collectionTypes),
954 flatten_chains=flattenChains,
955 include_chains=includeChains,
956 )
957 ]
959 def _makeQueryBuilder(
960 self,
961 summary: queries.QuerySummary,
962 doomed_by: Iterable[str] = (),
963 ) -> queries.QueryBuilder:
964 """Return a `QueryBuilder` instance capable of constructing and
965 managing more complex queries than those obtainable via `Registry`
966 interfaces.
968 This is an advanced interface; downstream code should prefer
969 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those
970 are sufficient.
972 Parameters
973 ----------
974 summary : `queries.QuerySummary`
975 Object describing and categorizing the full set of dimensions that
976 will be included in the query.
977 doomed_by : `Iterable` of `str`, optional
978 A list of diagnostic messages that indicate why the query is going
979 to yield no results and should not even be executed. If an empty
980 container (default) the query will be executed unless other code
981 determines that it is doomed.
983 Returns
984 -------
985 builder : `queries.QueryBuilder`
986 Object that can be used to construct and perform advanced queries.
987 """
988 doomed_by = list(doomed_by)
989 backend = queries.SqlQueryBackend(self._db, self._managers)
990 context = backend.context()
991 relation: Relation | None = None
992 if doomed_by:
993 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by)
994 return queries.QueryBuilder(
995 summary,
996 backend=backend,
997 context=context,
998 relation=relation,
999 )
1001 def _standardize_query_data_id_args(
1002 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any
1003 ) -> DataCoordinate:
1004 """Preprocess the data ID arguments passed to query* methods.
1006 Parameters
1007 ----------
1008 data_id : `DataId` or `None`
1009 Data ID that constrains the query results.
1010 doomed_by : `list` [ `str` ]
1011 List to append messages indicating why the query is doomed to
1012 yield no results.
1013 **kwargs
1014 Additional data ID key-value pairs, extending and overriding
1015 ``data_id``.
1017 Returns
1018 -------
1019 data_id : `DataCoordinate`
1020 Standardized data ID. Will be fully expanded unless expansion
1021 fails, in which case a message will be appended to ``doomed_by``
1022 on return.
1023 """
1024 try:
1025 return self.expandDataId(data_id, **kwargs)
1026 except DataIdValueError as err:
1027 doomed_by.append(str(err))
1028 return DataCoordinate.standardize(
1029 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId
1030 )
1032 def _standardize_query_dataset_args(
1033 self,
1034 datasets: Any,
1035 collections: Any,
1036 components: bool | None,
1037 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain",
1038 *,
1039 doomed_by: list[str],
1040 ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]:
1041 """Preprocess dataset arguments passed to query* methods.
1043 Parameters
1044 ----------
1045 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these
1046 Expression identifying dataset types. See `queryDatasetTypes` for
1047 details.
1048 collections : `str`, `re.Pattern`, or iterable of these
1049 Expression identifying collections to be searched. See
1050 `queryCollections` for details.
1051 components : `bool`, optional
1052 If `True`, apply all expression patterns to component dataset type
1053 names as well. If `False`, never apply patterns to components.
1054 If `None` (default), apply patterns to components only if their
1055 parent datasets were not matched by the expression.
1056 Fully-specified component datasets (`str` or `DatasetType`
1057 instances) are always included.
1059 Values other than `False` are deprecated, and only `False` will be
1060 supported after v26. After v27 this argument will be removed
1061 entirely.
1062 mode : `str`, optional
1063 The way in which datasets are being used in this query; one of:
1065 - "find_first": this is a query for the first dataset in an
1066 ordered list of collections. Prohibits collection wildcards,
1067 but permits dataset type wildcards.
1069 - "find_all": this is a query for all datasets in all matched
1070 collections. Permits collection and dataset type wildcards.
1072 - "constrain": this is a query for something other than datasets,
1073 with results constrained by dataset existence. Permits
1074 collection wildcards and prohibits ``...`` as a dataset type
1075 wildcard.
1076 doomed_by : `list` [ `str` ]
1077 List to append messages indicating why the query is doomed to
1078 yield no results.
1080 Returns
1081 -------
1082 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ]
1083 Dictionary mapping parent dataset type to `list` of components
1084 matched for that dataset type (or `None` for the parent itself).
1085 collections : `CollectionWildcard`
1086 Processed collection expression.
1087 """
1088 composition: dict[DatasetType, list[str | None]] = {}
1089 if datasets is not None:
1090 if not collections:
1091 if not self.defaults.collections:
1092 raise NoDefaultCollectionError("No collections, and no registry default collections.")
1093 collections = self.defaults.collections
1094 else:
1095 collections = CollectionWildcard.from_expression(collections)
1096 if mode == "find_first" and collections.patterns:
1097 raise TypeError(
1098 f"Collection pattern(s) {collections.patterns} not allowed in this context."
1099 )
1100 missing: list[str] = []
1101 composition = self._managers.datasets.resolve_wildcard(
1102 datasets, components=components, missing=missing, explicit_only=(mode == "constrain")
1103 )
1104 if missing and mode == "constrain":
1105 # After v26 this should raise MissingDatasetTypeError, to be
1106 # implemented on DM-36303.
1107 warnings.warn(
1108 f"Dataset type(s) {missing} are not registered; this will be an error after v26.",
1109 FutureWarning,
1110 )
1111 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing)
1112 elif collections:
1113 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.")
1114 return composition, collections
1116 def queryDatasets(
1117 self,
1118 datasetType: Any,
1119 *,
1120 collections: Any = None,
1121 dimensions: Optional[Iterable[Union[Dimension, str]]] = None,
1122 dataId: Optional[DataId] = None,
1123 where: str = "",
1124 findFirst: bool = False,
1125 components: Optional[bool] = None,
1126 bind: Optional[Mapping[str, Any]] = None,
1127 check: bool = True,
1128 **kwargs: Any,
1129 ) -> queries.DatasetQueryResults:
1130 # Docstring inherited from lsst.daf.butler.registry.Registry
1131 doomed_by: list[str] = []
1132 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1133 dataset_composition, collections = self._standardize_query_dataset_args(
1134 datasetType,
1135 collections,
1136 components,
1137 mode="find_first" if findFirst else "find_all",
1138 doomed_by=doomed_by,
1139 )
1140 parent_results: list[queries.ParentDatasetQueryResults] = []
1141 for parent_dataset_type, components_for_parent in dataset_composition.items():
1142 # The full set of dimensions in the query is the combination of
1143 # those needed for the DatasetType and those explicitly requested,
1144 # if any.
1145 dimension_names = set(parent_dataset_type.dimensions.names)
1146 if dimensions is not None:
1147 dimension_names.update(self.dimensions.extract(dimensions).names)
1148 # Construct the summary structure needed to construct a
1149 # QueryBuilder.
1150 summary = queries.QuerySummary(
1151 requested=DimensionGraph(self.dimensions, names=dimension_names),
1152 data_id=data_id,
1153 expression=where,
1154 bind=bind,
1155 defaults=self.defaults.dataId,
1156 check=check,
1157 datasets=[parent_dataset_type],
1158 )
1159 builder = self._makeQueryBuilder(summary)
1160 # Add the dataset subquery to the query, telling the QueryBuilder
1161 # to include the rank of the selected collection in the results
1162 # only if we need to findFirst. Note that if any of the
1163 # collections are actually wildcard expressions, and
1164 # findFirst=True, this will raise TypeError for us.
1165 builder.joinDataset(parent_dataset_type, collections, isResult=True, findFirst=findFirst)
1166 query = builder.finish()
1167 parent_results.append(
1168 queries.ParentDatasetQueryResults(
1169 query, parent_dataset_type, components=components_for_parent
1170 )
1171 )
1172 if not parent_results:
1173 doomed_by.extend(
1174 f"No registered dataset type matching {t!r} found, so no matching datasets can "
1175 "exist in any collection."
1176 for t in ensure_iterable(datasetType)
1177 )
1178 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by)
1179 elif len(parent_results) == 1:
1180 return parent_results[0]
1181 else:
1182 return queries.ChainedDatasetQueryResults(parent_results)
1184 def queryDataIds(
1185 self,
1186 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str],
1187 *,
1188 dataId: Optional[DataId] = None,
1189 datasets: Any = None,
1190 collections: Any = None,
1191 where: str = "",
1192 components: Optional[bool] = None,
1193 bind: Optional[Mapping[str, Any]] = None,
1194 check: bool = True,
1195 **kwargs: Any,
1196 ) -> queries.DataCoordinateQueryResults:
1197 # Docstring inherited from lsst.daf.butler.registry.Registry
1198 dimensions = ensure_iterable(dimensions)
1199 requestedDimensions = self.dimensions.extract(dimensions)
1200 doomed_by: list[str] = []
1201 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1202 dataset_composition, collections = self._standardize_query_dataset_args(
1203 datasets, collections, components, doomed_by=doomed_by
1204 )
1205 summary = queries.QuerySummary(
1206 requested=requestedDimensions,
1207 data_id=data_id,
1208 expression=where,
1209 bind=bind,
1210 defaults=self.defaults.dataId,
1211 check=check,
1212 datasets=dataset_composition.keys(),
1213 )
1214 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
1215 for datasetType in dataset_composition.keys():
1216 builder.joinDataset(datasetType, collections, isResult=False)
1217 query = builder.finish()
1219 return queries.DataCoordinateQueryResults(query)
1221 def queryDimensionRecords(
1222 self,
1223 element: Union[DimensionElement, str],
1224 *,
1225 dataId: Optional[DataId] = None,
1226 datasets: Any = None,
1227 collections: Any = None,
1228 where: str = "",
1229 components: Optional[bool] = None,
1230 bind: Optional[Mapping[str, Any]] = None,
1231 check: bool = True,
1232 **kwargs: Any,
1233 ) -> queries.DimensionRecordQueryResults:
1234 # Docstring inherited from lsst.daf.butler.registry.Registry
1235 if not isinstance(element, DimensionElement):
1236 try:
1237 element = self.dimensions[element]
1238 except KeyError as e:
1239 raise DimensionNameError(
1240 f"No such dimension '{element}', available dimensions: "
1241 + str(self.dimensions.getStaticElements())
1242 ) from e
1243 doomed_by: list[str] = []
1244 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1245 dataset_composition, collections = self._standardize_query_dataset_args(
1246 datasets, collections, components, doomed_by=doomed_by
1247 )
1248 summary = queries.QuerySummary(
1249 requested=element.graph,
1250 data_id=data_id,
1251 expression=where,
1252 bind=bind,
1253 defaults=self.defaults.dataId,
1254 check=check,
1255 datasets=dataset_composition.keys(),
1256 )
1257 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
1258 for datasetType in dataset_composition.keys():
1259 builder.joinDataset(datasetType, collections, isResult=False)
1260 query = builder.finish().with_record_columns(element)
1261 return queries.DatabaseDimensionRecordQueryResults(query, element)
1263 def queryDatasetAssociations(
1264 self,
1265 datasetType: Union[str, DatasetType],
1266 collections: Any = ...,
1267 *,
1268 collectionTypes: Iterable[CollectionType] = CollectionType.all(),
1269 flattenChains: bool = False,
1270 ) -> Iterator[DatasetAssociation]:
1271 # Docstring inherited from lsst.daf.butler.registry.Registry
1272 if collections is None:
1273 if not self.defaults.collections:
1274 raise NoDefaultCollectionError(
1275 "No collections provided to queryDatasetAssociations, "
1276 "and no defaults from registry construction."
1277 )
1278 collections = self.defaults.collections
1279 collections = CollectionWildcard.from_expression(collections)
1280 backend = queries.SqlQueryBackend(self._db, self._managers)
1281 parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False)
1282 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan")
1283 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
1284 for parent_collection_record in backend.resolve_collection_wildcard(
1285 collections,
1286 collection_types=frozenset(collectionTypes),
1287 flatten_chains=flattenChains,
1288 ):
1289 # Resolve this possibly-chained collection into a list of
1290 # non-CHAINED collections that actually hold datasets of this
1291 # type.
1292 candidate_collection_records = backend.resolve_dataset_collections(
1293 parent_dataset_type,
1294 CollectionWildcard.from_names([parent_collection_record.name]),
1295 allow_calibration_collections=True,
1296 governor_constraints={},
1297 )
1298 if not candidate_collection_records:
1299 continue
1300 with backend.context() as context:
1301 relation = backend.make_dataset_query_relation(
1302 parent_dataset_type,
1303 candidate_collection_records,
1304 columns={"dataset_id", "run", "timespan", "collection"},
1305 context=context,
1306 )
1307 reader = queries.DatasetRefReader(
1308 parent_dataset_type,
1309 translate_collection=lambda k: self._managers.collections[k].name,
1310 full=False,
1311 )
1312 for row in context.fetch_iterable(relation):
1313 ref = reader.read(row)
1314 collection_record = self._managers.collections[row[collection_tag]]
1315 if collection_record.type is CollectionType.CALIBRATION:
1316 timespan = row[timespan_tag]
1317 else:
1318 # For backwards compatibility and (possibly?) user
1319 # convenience we continue to define the timespan of a
1320 # DatasetAssociation row for a non-CALIBRATION
1321 # collection to be None rather than a fully unbounded
1322 # timespan.
1323 timespan = None
1324 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan)
1326 storageClasses: StorageClassFactory
1327 """All storage classes known to the registry (`StorageClassFactory`).
1328 """