Coverage for python/lsst/daf/butler/registries/sql.py: 13%
497 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-22 03:05 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-22 03:05 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = ("SqlRegistry",)
26import contextlib
27import logging
28import warnings
29from typing import (
30 TYPE_CHECKING,
31 Any,
32 Dict,
33 Iterable,
34 Iterator,
35 List,
36 Literal,
37 Mapping,
38 Optional,
39 Sequence,
40 Set,
41 Union,
42 cast,
43)
45import sqlalchemy
46from lsst.daf.relation import LeafRelation, Relation
47from lsst.resources import ResourcePathExpression
48from lsst.utils.iteration import ensure_iterable
50from ..core import (
51 Config,
52 DataCoordinate,
53 DataId,
54 DatasetAssociation,
55 DatasetColumnTag,
56 DatasetId,
57 DatasetRef,
58 DatasetType,
59 Dimension,
60 DimensionConfig,
61 DimensionElement,
62 DimensionGraph,
63 DimensionRecord,
64 DimensionUniverse,
65 NamedKeyMapping,
66 NameLookupMapping,
67 Progress,
68 StorageClassFactory,
69 Timespan,
70 ddl,
71)
72from ..core.utils import transactional
73from ..registry import (
74 ArgumentError,
75 CollectionExpressionError,
76 CollectionSummary,
77 CollectionType,
78 CollectionTypeError,
79 ConflictingDefinitionError,
80 DataIdValueError,
81 DatasetTypeError,
82 DimensionNameError,
83 InconsistentDataIdError,
84 NoDefaultCollectionError,
85 OrphanedRecordError,
86 Registry,
87 RegistryConfig,
88 RegistryDefaults,
89 queries,
90)
91from ..registry.interfaces import ChainedCollectionRecord, DatasetIdFactory, DatasetIdGenEnum, RunRecord
92from ..registry.managers import RegistryManagerInstances, RegistryManagerTypes
93from ..registry.wildcards import CollectionWildcard, DatasetTypeWildcard
95if TYPE_CHECKING: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true
96 from .._butlerConfig import ButlerConfig
97 from ..registry.interfaces import CollectionRecord, Database, DatastoreRegistryBridgeManager
100_LOG = logging.getLogger(__name__)
103class SqlRegistry(Registry):
104 """Registry implementation based on SQLAlchemy.
106 Parameters
107 ----------
108 database : `Database`
109 Database instance to store Registry.
110 defaults : `RegistryDefaults`
111 Default collection search path and/or output `~CollectionType.RUN`
112 collection.
113 managers : `RegistryManagerInstances`
114 All the managers required for this registry.
115 """
117 defaultConfigFile: Optional[str] = None
118 """Path to configuration defaults. Accessed within the ``configs`` resource
119 or relative to a search path. Can be None if no defaults specified.
120 """
122 @classmethod
123 def createFromConfig(
124 cls,
125 config: Optional[Union[RegistryConfig, str]] = None,
126 dimensionConfig: Optional[Union[DimensionConfig, str]] = None,
127 butlerRoot: Optional[ResourcePathExpression] = None,
128 ) -> Registry:
129 """Create registry database and return `SqlRegistry` instance.
131 This method initializes database contents, database must be empty
132 prior to calling this method.
134 Parameters
135 ----------
136 config : `RegistryConfig` or `str`, optional
137 Registry configuration, if missing then default configuration will
138 be loaded from registry.yaml.
139 dimensionConfig : `DimensionConfig` or `str`, optional
140 Dimensions configuration, if missing then default configuration
141 will be loaded from dimensions.yaml.
142 butlerRoot : convertible to `lsst.resources.ResourcePath`, optional
143 Path to the repository root this `SqlRegistry` will manage.
145 Returns
146 -------
147 registry : `SqlRegistry`
148 A new `SqlRegistry` instance.
149 """
150 config = cls.forceRegistryConfig(config)
151 config.replaceRoot(butlerRoot)
153 if isinstance(dimensionConfig, str):
154 dimensionConfig = DimensionConfig(dimensionConfig)
155 elif dimensionConfig is None:
156 dimensionConfig = DimensionConfig()
157 elif not isinstance(dimensionConfig, DimensionConfig):
158 raise TypeError(f"Incompatible Dimension configuration type: {type(dimensionConfig)}")
160 DatabaseClass = config.getDatabaseClass()
161 database = DatabaseClass.fromUri(
162 str(config.connectionString), origin=config.get("origin", 0), namespace=config.get("namespace")
163 )
164 managerTypes = RegistryManagerTypes.fromConfig(config)
165 managers = managerTypes.makeRepo(database, dimensionConfig)
166 return cls(database, RegistryDefaults(), managers)
168 @classmethod
169 def fromConfig(
170 cls,
171 config: Union[ButlerConfig, RegistryConfig, Config, str],
172 butlerRoot: Optional[ResourcePathExpression] = None,
173 writeable: bool = True,
174 defaults: Optional[RegistryDefaults] = None,
175 ) -> Registry:
176 """Create `Registry` subclass instance from `config`.
178 Registry database must be initialized prior to calling this method.
180 Parameters
181 ----------
182 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
183 Registry configuration
184 butlerRoot : `lsst.resources.ResourcePathExpression`, optional
185 Path to the repository root this `Registry` will manage.
186 writeable : `bool`, optional
187 If `True` (default) create a read-write connection to the database.
188 defaults : `RegistryDefaults`, optional
189 Default collection search path and/or output `~CollectionType.RUN`
190 collection.
192 Returns
193 -------
194 registry : `SqlRegistry` (subclass)
195 A new `SqlRegistry` subclass instance.
196 """
197 config = cls.forceRegistryConfig(config)
198 config.replaceRoot(butlerRoot)
199 DatabaseClass = config.getDatabaseClass()
200 database = DatabaseClass.fromUri(
201 config.connectionString.render_as_string(hide_password=False),
202 origin=config.get("origin", 0),
203 namespace=config.get("namespace"),
204 writeable=writeable,
205 )
206 managerTypes = RegistryManagerTypes.fromConfig(config)
207 with database.session():
208 managers = managerTypes.loadRepo(database)
209 if defaults is None:
210 defaults = RegistryDefaults()
211 return cls(database, defaults, managers)
213 def __init__(self, database: Database, defaults: RegistryDefaults, managers: RegistryManagerInstances):
214 self._db = database
215 self._managers = managers
216 self.storageClasses = StorageClassFactory()
217 # Intentionally invoke property setter to initialize defaults. This
218 # can only be done after most of the rest of Registry has already been
219 # initialized, and must be done before the property getter is used.
220 self.defaults = defaults
221 # In the future DatasetIdFactory may become configurable and this
222 # instance will need to be shared with datasets manager.
223 self.datasetIdFactory = DatasetIdFactory()
225 def __str__(self) -> str:
226 return str(self._db)
228 def __repr__(self) -> str:
229 return f"SqlRegistry({self._db!r}, {self.dimensions!r})"
231 def isWriteable(self) -> bool:
232 # Docstring inherited from lsst.daf.butler.registry.Registry
233 return self._db.isWriteable()
235 def copy(self, defaults: Optional[RegistryDefaults] = None) -> Registry:
236 # Docstring inherited from lsst.daf.butler.registry.Registry
237 if defaults is None:
238 # No need to copy, because `RegistryDefaults` is immutable; we
239 # effectively copy on write.
240 defaults = self.defaults
241 return type(self)(self._db, defaults, self._managers)
243 @property
244 def dimensions(self) -> DimensionUniverse:
245 # Docstring inherited from lsst.daf.butler.registry.Registry
246 return self._managers.dimensions.universe
248 def refresh(self) -> None:
249 # Docstring inherited from lsst.daf.butler.registry.Registry
250 with self._db.transaction():
251 self._managers.refresh()
253 @contextlib.contextmanager
254 def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
255 # Docstring inherited from lsst.daf.butler.registry.Registry
256 try:
257 with self._db.transaction(savepoint=savepoint):
258 yield
259 except BaseException:
260 # TODO: this clears the caches sometimes when we wouldn't actually
261 # need to. Can we avoid that?
262 self._managers.dimensions.clearCaches()
263 raise
265 def resetConnectionPool(self) -> None:
266 """Reset SQLAlchemy connection pool for `SqlRegistry` database.
268 This operation is useful when using registry with fork-based
269 multiprocessing. To use registry across fork boundary one has to make
270 sure that there are no currently active connections (no session or
271 transaction is in progress) and connection pool is reset using this
272 method. This method should be called by the child process immediately
273 after the fork.
274 """
275 self._db._engine.dispose()
277 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None:
278 """Add an opaque (to the `Registry`) table for use by a `Datastore` or
279 other data repository client.
281 Opaque table records can be added via `insertOpaqueData`, retrieved via
282 `fetchOpaqueData`, and removed via `deleteOpaqueData`.
284 Parameters
285 ----------
286 tableName : `str`
287 Logical name of the opaque table. This may differ from the
288 actual name used in the database by a prefix and/or suffix.
289 spec : `ddl.TableSpec`
290 Specification for the table to be added.
291 """
292 self._managers.opaque.register(tableName, spec)
294 @transactional
295 def insertOpaqueData(self, tableName: str, *data: dict) -> None:
296 """Insert records into an opaque table.
298 Parameters
299 ----------
300 tableName : `str`
301 Logical name of the opaque table. Must match the name used in a
302 previous call to `registerOpaqueTable`.
303 data
304 Each additional positional argument is a dictionary that represents
305 a single row to be added.
306 """
307 self._managers.opaque[tableName].insert(*data)
309 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[Mapping[str, Any]]:
310 """Retrieve records from an opaque table.
312 Parameters
313 ----------
314 tableName : `str`
315 Logical name of the opaque table. Must match the name used in a
316 previous call to `registerOpaqueTable`.
317 where
318 Additional keyword arguments are interpreted as equality
319 constraints that restrict the returned rows (combined with AND);
320 keyword arguments are column names and values are the values they
321 must have.
323 Yields
324 ------
325 row : `dict`
326 A dictionary representing a single result row.
327 """
328 yield from self._managers.opaque[tableName].fetch(**where)
330 @transactional
331 def deleteOpaqueData(self, tableName: str, **where: Any) -> None:
332 """Remove records from an opaque table.
334 Parameters
335 ----------
336 tableName : `str`
337 Logical name of the opaque table. Must match the name used in a
338 previous call to `registerOpaqueTable`.
339 where
340 Additional keyword arguments are interpreted as equality
341 constraints that restrict the deleted rows (combined with AND);
342 keyword arguments are column names and values are the values they
343 must have.
344 """
345 self._managers.opaque[tableName].delete(where.keys(), where)
347 def registerCollection(
348 self, name: str, type: CollectionType = CollectionType.TAGGED, doc: Optional[str] = None
349 ) -> bool:
350 # Docstring inherited from lsst.daf.butler.registry.Registry
351 _, registered = self._managers.collections.register(name, type, doc=doc)
352 return registered
354 def getCollectionType(self, name: str) -> CollectionType:
355 # Docstring inherited from lsst.daf.butler.registry.Registry
356 return self._managers.collections.find(name).type
358 def _get_collection_record(self, name: str) -> CollectionRecord:
359 # Docstring inherited from lsst.daf.butler.registry.Registry
360 return self._managers.collections.find(name)
362 def registerRun(self, name: str, doc: Optional[str] = None) -> bool:
363 # Docstring inherited from lsst.daf.butler.registry.Registry
364 _, registered = self._managers.collections.register(name, CollectionType.RUN, doc=doc)
365 return registered
367 @transactional
368 def removeCollection(self, name: str) -> None:
369 # Docstring inherited from lsst.daf.butler.registry.Registry
370 self._managers.collections.remove(name)
372 def getCollectionChain(self, parent: str) -> tuple[str, ...]:
373 # Docstring inherited from lsst.daf.butler.registry.Registry
374 record = self._managers.collections.find(parent)
375 if record.type is not CollectionType.CHAINED:
376 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
377 assert isinstance(record, ChainedCollectionRecord)
378 return record.children
380 @transactional
381 def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None:
382 # Docstring inherited from lsst.daf.butler.registry.Registry
383 record = self._managers.collections.find(parent)
384 if record.type is not CollectionType.CHAINED:
385 raise CollectionTypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
386 assert isinstance(record, ChainedCollectionRecord)
387 children = CollectionWildcard.from_expression(children).require_ordered()
388 if children != record.children or flatten:
389 record.update(self._managers.collections, children, flatten=flatten)
391 def getCollectionParentChains(self, collection: str) -> Set[str]:
392 # Docstring inherited from lsst.daf.butler.registry.Registry
393 return {
394 record.name
395 for record in self._managers.collections.getParentChains(
396 self._managers.collections.find(collection).key
397 )
398 }
400 def getCollectionDocumentation(self, collection: str) -> Optional[str]:
401 # Docstring inherited from lsst.daf.butler.registry.Registry
402 return self._managers.collections.getDocumentation(self._managers.collections.find(collection).key)
404 def setCollectionDocumentation(self, collection: str, doc: Optional[str]) -> None:
405 # Docstring inherited from lsst.daf.butler.registry.Registry
406 self._managers.collections.setDocumentation(self._managers.collections.find(collection).key, doc)
408 def getCollectionSummary(self, collection: str) -> CollectionSummary:
409 # Docstring inherited from lsst.daf.butler.registry.Registry
410 record = self._managers.collections.find(collection)
411 return self._managers.datasets.getCollectionSummary(record)
413 def registerDatasetType(self, datasetType: DatasetType) -> bool:
414 # Docstring inherited from lsst.daf.butler.registry.Registry
415 _, inserted = self._managers.datasets.register(datasetType)
416 return inserted
418 def removeDatasetType(self, name: str) -> None:
419 # Docstring inherited from lsst.daf.butler.registry.Registry
420 self._managers.datasets.remove(name)
422 def getDatasetType(self, name: str) -> DatasetType:
423 # Docstring inherited from lsst.daf.butler.registry.Registry
424 parent_name, component = DatasetType.splitDatasetTypeName(name)
425 storage = self._managers.datasets[parent_name]
426 if component is None:
427 return storage.datasetType
428 else:
429 return storage.datasetType.makeComponentDatasetType(component)
431 def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool:
432 # Docstring inherited from lsst.daf.butler.registry.Registry
433 return self._managers.datasets.supportsIdGenerationMode(mode)
435 def findDataset(
436 self,
437 datasetType: Union[DatasetType, str],
438 dataId: Optional[DataId] = None,
439 *,
440 collections: Any = None,
441 timespan: Optional[Timespan] = None,
442 **kwargs: Any,
443 ) -> Optional[DatasetRef]:
444 # Docstring inherited from lsst.daf.butler.registry.Registry
445 if collections is None:
446 if not self.defaults.collections:
447 raise NoDefaultCollectionError(
448 "No collections provided to findDataset, and no defaults from registry construction."
449 )
450 collections = self.defaults.collections
451 backend = queries.SqlQueryBackend(self._db, self._managers)
452 collection_wildcard = CollectionWildcard.from_expression(collections, require_ordered=True)
453 matched_collections = backend.resolve_collection_wildcard(collection_wildcard)
454 parent_dataset_type, components = backend.resolve_single_dataset_type_wildcard(
455 datasetType, components_deprecated=False
456 )
457 if len(components) > 1:
458 raise DatasetTypeError(
459 f"findDataset requires exactly one dataset type; got multiple components {components} "
460 f"for parent dataset type {parent_dataset_type.name}."
461 )
462 component = components[0]
463 dataId = DataCoordinate.standardize(
464 dataId,
465 graph=parent_dataset_type.dimensions,
466 universe=self.dimensions,
467 defaults=self.defaults.dataId,
468 **kwargs,
469 )
470 governor_constraints = {name: {cast(str, dataId[name])} for name in dataId.graph.governors.names}
471 (filtered_collections,) = backend.filter_dataset_collections(
472 [parent_dataset_type],
473 matched_collections,
474 governor_constraints=governor_constraints,
475 ).values()
476 if not filtered_collections:
477 return None
478 if timespan is None:
479 filtered_collections = [
480 collection_record
481 for collection_record in filtered_collections
482 if collection_record.type is not CollectionType.CALIBRATION
483 ]
484 if filtered_collections:
485 requested_columns = {"dataset_id", "run", "collection"}
486 with backend.context() as context:
487 predicate = context.make_data_coordinate_predicate(
488 dataId.subset(parent_dataset_type.dimensions), full=False
489 )
490 if timespan is not None:
491 requested_columns.add("timespan")
492 predicate = predicate.logical_and(
493 context.make_timespan_overlap_predicate(
494 DatasetColumnTag(parent_dataset_type.name, "timespan"), timespan
495 )
496 )
497 relation = backend.make_dataset_query_relation(
498 parent_dataset_type, filtered_collections, requested_columns, context
499 ).with_rows_satisfying(predicate)
500 rows = list(context.fetch_iterable(relation))
501 else:
502 rows = []
503 if not rows:
504 return None
505 elif len(rows) == 1:
506 best_row = rows[0]
507 else:
508 rank_by_collection_key = {record.key: n for n, record in enumerate(filtered_collections)}
509 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
510 row_iter = iter(rows)
511 best_row = next(row_iter)
512 best_rank = rank_by_collection_key[best_row[collection_tag]]
513 have_tie = False
514 for row in row_iter:
515 if (rank := rank_by_collection_key[row[collection_tag]]) < best_rank:
516 best_row = row
517 best_rank = rank
518 have_tie = False
519 elif rank == best_rank:
520 have_tie = True
521 assert timespan is not None, "Rank ties should be impossible given DB constraints."
522 if have_tie:
523 raise LookupError(
524 f"Ambiguous calibration lookup for {parent_dataset_type.name} in collections "
525 f"{collection_wildcard.strings} with timespan {timespan}."
526 )
527 reader = queries.DatasetRefReader(
528 parent_dataset_type,
529 translate_collection=lambda k: self._managers.collections[k].name,
530 )
531 ref = reader.read(best_row, data_id=dataId)
532 if component is not None:
533 ref = ref.makeComponentRef(component)
534 return ref
536 @transactional
537 def insertDatasets(
538 self,
539 datasetType: Union[DatasetType, str],
540 dataIds: Iterable[DataId],
541 run: Optional[str] = None,
542 expand: bool = True,
543 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
544 ) -> List[DatasetRef]:
545 # Docstring inherited from lsst.daf.butler.registry.Registry
546 if isinstance(datasetType, DatasetType):
547 storage = self._managers.datasets.find(datasetType.name)
548 if storage is None:
549 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
550 else:
551 storage = self._managers.datasets.find(datasetType)
552 if storage is None:
553 raise DatasetTypeError(f"DatasetType with name '{datasetType}' has not been registered.")
554 if run is None:
555 if self.defaults.run is None:
556 raise NoDefaultCollectionError(
557 "No run provided to insertDatasets, and no default from registry construction."
558 )
559 run = self.defaults.run
560 runRecord = self._managers.collections.find(run)
561 if runRecord.type is not CollectionType.RUN:
562 raise CollectionTypeError(
563 f"Given collection is of type {runRecord.type.name}; RUN collection required."
564 )
565 assert isinstance(runRecord, RunRecord)
566 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
567 if expand:
568 expandedDataIds = [
569 self.expandDataId(dataId, graph=storage.datasetType.dimensions)
570 for dataId in progress.wrap(dataIds, f"Expanding {storage.datasetType.name} data IDs")
571 ]
572 else:
573 expandedDataIds = [
574 DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions) for dataId in dataIds
575 ]
576 try:
577 refs = list(storage.insert(runRecord, expandedDataIds, idGenerationMode))
578 if self._managers.obscore:
579 context = queries.SqlQueryContext(self._db, self._managers.column_types)
580 self._managers.obscore.add_datasets(refs, context)
581 except sqlalchemy.exc.IntegrityError as err:
582 raise ConflictingDefinitionError(
583 "A database constraint failure was triggered by inserting "
584 f"one or more datasets of type {storage.datasetType} into "
585 f"collection '{run}'. "
586 "This probably means a dataset with the same data ID "
587 "and dataset type already exists, but it may also mean a "
588 "dimension row is missing."
589 ) from err
590 return refs
592 @transactional
593 def _importDatasets(
594 self,
595 datasets: Iterable[DatasetRef],
596 expand: bool = True,
597 idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
598 reuseIds: bool = False,
599 ) -> List[DatasetRef]:
600 # Docstring inherited from lsst.daf.butler.registry.Registry
601 datasets = list(datasets)
602 if not datasets:
603 # nothing to do
604 return []
606 # find dataset type
607 datasetTypes = set(dataset.datasetType for dataset in datasets)
608 if len(datasetTypes) != 1:
609 raise DatasetTypeError(f"Multiple dataset types in input datasets: {datasetTypes}")
610 datasetType = datasetTypes.pop()
612 # get storage handler for this dataset type
613 storage = self._managers.datasets.find(datasetType.name)
614 if storage is None:
615 raise DatasetTypeError(f"DatasetType '{datasetType}' has not been registered.")
617 # find run name
618 runs = set(dataset.run for dataset in datasets)
619 if len(runs) != 1:
620 raise ValueError(f"Multiple run names in input datasets: {runs}")
621 run = runs.pop()
622 if run is None:
623 if self.defaults.run is None:
624 raise NoDefaultCollectionError(
625 "No run provided to ingestDatasets, and no default from registry construction."
626 )
627 run = self.defaults.run
629 runRecord = self._managers.collections.find(run)
630 if runRecord.type is not CollectionType.RUN:
631 raise CollectionTypeError(
632 f"Given collection '{runRecord.name}' is of type {runRecord.type.name};"
633 " RUN collection required."
634 )
635 assert isinstance(runRecord, RunRecord)
637 progress = Progress("daf.butler.Registry.insertDatasets", level=logging.DEBUG)
638 if expand:
639 expandedDatasets = [
640 dataset.expanded(self.expandDataId(dataset.dataId, graph=storage.datasetType.dimensions))
641 for dataset in progress.wrap(datasets, f"Expanding {storage.datasetType.name} data IDs")
642 ]
643 else:
644 expandedDatasets = [
645 DatasetRef(datasetType, dataset.dataId, id=dataset.id, run=dataset.run, conform=True)
646 for dataset in datasets
647 ]
649 try:
650 refs = list(storage.import_(runRecord, expandedDatasets, idGenerationMode, reuseIds))
651 if self._managers.obscore:
652 context = queries.SqlQueryContext(self._db, self._managers.column_types)
653 self._managers.obscore.add_datasets(refs, context)
654 except sqlalchemy.exc.IntegrityError as err:
655 raise ConflictingDefinitionError(
656 "A database constraint failure was triggered by inserting "
657 f"one or more datasets of type {storage.datasetType} into "
658 f"collection '{run}'. "
659 "This probably means a dataset with the same data ID "
660 "and dataset type already exists, but it may also mean a "
661 "dimension row is missing."
662 ) from err
663 return refs
665 def getDataset(self, id: DatasetId) -> Optional[DatasetRef]:
666 # Docstring inherited from lsst.daf.butler.registry.Registry
667 return self._managers.datasets.getDatasetRef(id)
669 @transactional
670 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
671 # Docstring inherited from lsst.daf.butler.registry.Registry
672 progress = Progress("lsst.daf.butler.Registry.removeDatasets", level=logging.DEBUG)
673 for datasetType, refsForType in progress.iter_item_chunks(
674 DatasetRef.groupByType(refs).items(), desc="Removing datasets by type"
675 ):
676 storage = self._managers.datasets[datasetType.name]
677 try:
678 storage.delete(refsForType)
679 except sqlalchemy.exc.IntegrityError as err:
680 raise OrphanedRecordError(
681 "One or more datasets is still present in one or more Datastores."
682 ) from err
684 @transactional
685 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
686 # Docstring inherited from lsst.daf.butler.registry.Registry
687 progress = Progress("lsst.daf.butler.Registry.associate", level=logging.DEBUG)
688 collectionRecord = self._managers.collections.find(collection)
689 if collectionRecord.type is not CollectionType.TAGGED:
690 raise CollectionTypeError(
691 f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED."
692 )
693 for datasetType, refsForType in progress.iter_item_chunks(
694 DatasetRef.groupByType(refs).items(), desc="Associating datasets by type"
695 ):
696 storage = self._managers.datasets[datasetType.name]
697 try:
698 storage.associate(collectionRecord, refsForType)
699 if self._managers.obscore:
700 # If a TAGGED collection is being monitored by ObsCore
701 # manager then we may need to save the dataset.
702 context = queries.SqlQueryContext(self._db, self._managers.column_types)
703 self._managers.obscore.associate(refsForType, collectionRecord, context)
704 except sqlalchemy.exc.IntegrityError as err:
705 raise ConflictingDefinitionError(
706 f"Constraint violation while associating dataset of type {datasetType.name} with "
707 f"collection {collection}. This probably means that one or more datasets with the same "
708 "dataset type and data ID already exist in the collection, but it may also indicate "
709 "that the datasets do not exist."
710 ) from err
712 @transactional
713 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
714 # Docstring inherited from lsst.daf.butler.registry.Registry
715 progress = Progress("lsst.daf.butler.Registry.disassociate", level=logging.DEBUG)
716 collectionRecord = self._managers.collections.find(collection)
717 if collectionRecord.type is not CollectionType.TAGGED:
718 raise CollectionTypeError(
719 f"Collection '{collection}' has type {collectionRecord.type.name}; expected TAGGED."
720 )
721 for datasetType, refsForType in progress.iter_item_chunks(
722 DatasetRef.groupByType(refs).items(), desc="Disassociating datasets by type"
723 ):
724 storage = self._managers.datasets[datasetType.name]
725 storage.disassociate(collectionRecord, refsForType)
726 if self._managers.obscore:
727 self._managers.obscore.disassociate(refsForType, collectionRecord)
729 @transactional
730 def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None:
731 # Docstring inherited from lsst.daf.butler.registry.Registry
732 progress = Progress("lsst.daf.butler.Registry.certify", level=logging.DEBUG)
733 collectionRecord = self._managers.collections.find(collection)
734 for datasetType, refsForType in progress.iter_item_chunks(
735 DatasetRef.groupByType(refs).items(), desc="Certifying datasets by type"
736 ):
737 storage = self._managers.datasets[datasetType.name]
738 storage.certify(
739 collectionRecord,
740 refsForType,
741 timespan,
742 context=queries.SqlQueryContext(self._db, self._managers.column_types),
743 )
745 @transactional
746 def decertify(
747 self,
748 collection: str,
749 datasetType: Union[str, DatasetType],
750 timespan: Timespan,
751 *,
752 dataIds: Optional[Iterable[DataId]] = None,
753 ) -> None:
754 # Docstring inherited from lsst.daf.butler.registry.Registry
755 collectionRecord = self._managers.collections.find(collection)
756 if isinstance(datasetType, str):
757 storage = self._managers.datasets[datasetType]
758 else:
759 storage = self._managers.datasets[datasetType.name]
760 standardizedDataIds = None
761 if dataIds is not None:
762 standardizedDataIds = [
763 DataCoordinate.standardize(d, graph=storage.datasetType.dimensions) for d in dataIds
764 ]
765 storage.decertify(
766 collectionRecord,
767 timespan,
768 dataIds=standardizedDataIds,
769 context=queries.SqlQueryContext(self._db, self._managers.column_types),
770 )
772 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
773 """Return an object that allows a new `Datastore` instance to
774 communicate with this `Registry`.
776 Returns
777 -------
778 manager : `DatastoreRegistryBridgeManager`
779 Object that mediates communication between this `Registry` and its
780 associated datastores.
781 """
782 return self._managers.datastores
784 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
785 # Docstring inherited from lsst.daf.butler.registry.Registry
786 return self._managers.datastores.findDatastores(ref)
788 def expandDataId(
789 self,
790 dataId: Optional[DataId] = None,
791 *,
792 graph: Optional[DimensionGraph] = None,
793 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None,
794 withDefaults: bool = True,
795 **kwargs: Any,
796 ) -> DataCoordinate:
797 # Docstring inherited from lsst.daf.butler.registry.Registry
798 if not withDefaults:
799 defaults = None
800 else:
801 defaults = self.defaults.dataId
802 try:
803 standardized = DataCoordinate.standardize(
804 dataId, graph=graph, universe=self.dimensions, defaults=defaults, **kwargs
805 )
806 except KeyError as exc:
807 # This means either kwargs have some odd name or required
808 # dimension is missing.
809 raise DimensionNameError(str(exc)) from exc
810 if standardized.hasRecords():
811 return standardized
812 if records is None:
813 records = {}
814 elif isinstance(records, NamedKeyMapping):
815 records = records.byName()
816 else:
817 records = dict(records)
818 if isinstance(dataId, DataCoordinate) and dataId.hasRecords():
819 records.update(dataId.records.byName())
820 keys = standardized.byName()
821 context = queries.SqlQueryContext(self._db, self._managers.column_types)
822 for element in standardized.graph.primaryKeyTraversalOrder:
823 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL
824 if record is ...:
825 if isinstance(element, Dimension) and keys.get(element.name) is None:
826 if element in standardized.graph.required:
827 raise DimensionNameError(
828 f"No value or null value for required dimension {element.name}."
829 )
830 keys[element.name] = None
831 record = None
832 else:
833 storage = self._managers.dimensions[element]
834 record = storage.fetch_one(DataCoordinate.standardize(keys, graph=element.graph), context)
835 records[element.name] = record
836 if record is not None:
837 for d in element.implied:
838 value = getattr(record, d.name)
839 if keys.setdefault(d.name, value) != value:
840 raise InconsistentDataIdError(
841 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, "
842 f"but {element.name} implies {d.name}={value!r}."
843 )
844 else:
845 if element in standardized.graph.required:
846 raise DataIdValueError(
847 f"Could not fetch record for required dimension {element.name} via keys {keys}."
848 )
849 if element.alwaysJoin:
850 raise InconsistentDataIdError(
851 f"Could not fetch record for element {element.name} via keys {keys}, ",
852 "but it is marked alwaysJoin=True; this means one or more dimensions are not "
853 "related.",
854 )
855 for d in element.implied:
856 keys.setdefault(d.name, None)
857 records.setdefault(d.name, None)
858 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records)
860 def insertDimensionData(
861 self,
862 element: Union[DimensionElement, str],
863 *data: Union[Mapping[str, Any], DimensionRecord],
864 conform: bool = True,
865 replace: bool = False,
866 skip_existing: bool = False,
867 ) -> None:
868 # Docstring inherited from lsst.daf.butler.registry.Registry
869 if conform:
870 if isinstance(element, str):
871 element = self.dimensions[element]
872 records = [
873 row if isinstance(row, DimensionRecord) else element.RecordClass(**row) for row in data
874 ]
875 else:
876 # Ignore typing since caller said to trust them with conform=False.
877 records = data # type: ignore
878 storage = self._managers.dimensions[element]
879 storage.insert(*records, replace=replace, skip_existing=skip_existing)
881 def syncDimensionData(
882 self,
883 element: Union[DimensionElement, str],
884 row: Union[Mapping[str, Any], DimensionRecord],
885 conform: bool = True,
886 update: bool = False,
887 ) -> Union[bool, Dict[str, Any]]:
888 # Docstring inherited from lsst.daf.butler.registry.Registry
889 if conform:
890 if isinstance(element, str):
891 element = self.dimensions[element]
892 record = row if isinstance(row, DimensionRecord) else element.RecordClass(**row)
893 else:
894 # Ignore typing since caller said to trust them with conform=False.
895 record = row # type: ignore
896 storage = self._managers.dimensions[element]
897 return storage.sync(record, update=update)
899 def queryDatasetTypes(
900 self,
901 expression: Any = ...,
902 *,
903 components: Optional[bool] = None,
904 missing: Optional[List[str]] = None,
905 ) -> Iterable[DatasetType]:
906 # Docstring inherited from lsst.daf.butler.registry.Registry
907 wildcard = DatasetTypeWildcard.from_expression(expression)
908 composition_dict = self._managers.datasets.resolve_wildcard(
909 wildcard,
910 components=components,
911 missing=missing,
912 )
913 result: list[DatasetType] = []
914 for parent_dataset_type, components_for_parent in composition_dict.items():
915 result.extend(
916 parent_dataset_type.makeComponentDatasetType(c) if c is not None else parent_dataset_type
917 for c in components_for_parent
918 )
919 return result
921 def queryCollections(
922 self,
923 expression: Any = ...,
924 datasetType: Optional[DatasetType] = None,
925 collectionTypes: Union[Iterable[CollectionType], CollectionType] = CollectionType.all(),
926 flattenChains: bool = False,
927 includeChains: Optional[bool] = None,
928 ) -> Sequence[str]:
929 # Docstring inherited from lsst.daf.butler.registry.Registry
931 # Right now the datasetTypes argument is completely ignored, but that
932 # is consistent with its [lack of] guarantees. DM-24939 or a follow-up
933 # ticket will take care of that.
934 try:
935 wildcard = CollectionWildcard.from_expression(expression)
936 except TypeError as exc:
937 raise CollectionExpressionError(f"Invalid collection expression '{expression}'") from exc
938 collectionTypes = ensure_iterable(collectionTypes)
939 return [
940 record.name
941 for record in self._managers.collections.resolve_wildcard(
942 wildcard,
943 collection_types=frozenset(collectionTypes),
944 flatten_chains=flattenChains,
945 include_chains=includeChains,
946 )
947 ]
949 def _makeQueryBuilder(
950 self,
951 summary: queries.QuerySummary,
952 doomed_by: Iterable[str] = (),
953 ) -> queries.QueryBuilder:
954 """Return a `QueryBuilder` instance capable of constructing and
955 managing more complex queries than those obtainable via `Registry`
956 interfaces.
958 This is an advanced interface; downstream code should prefer
959 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those
960 are sufficient.
962 Parameters
963 ----------
964 summary : `queries.QuerySummary`
965 Object describing and categorizing the full set of dimensions that
966 will be included in the query.
967 doomed_by : `Iterable` of `str`, optional
968 A list of diagnostic messages that indicate why the query is going
969 to yield no results and should not even be executed. If an empty
970 container (default) the query will be executed unless other code
971 determines that it is doomed.
973 Returns
974 -------
975 builder : `queries.QueryBuilder`
976 Object that can be used to construct and perform advanced queries.
977 """
978 doomed_by = list(doomed_by)
979 backend = queries.SqlQueryBackend(self._db, self._managers)
980 context = backend.context()
981 relation: Relation | None = None
982 if doomed_by:
983 relation = LeafRelation.make_doomed(context.sql_engine, set(), doomed_by)
984 return queries.QueryBuilder(
985 summary,
986 backend=backend,
987 context=context,
988 relation=relation,
989 )
991 def _standardize_query_data_id_args(
992 self, data_id: DataId | None, *, doomed_by: list[str], **kwargs: Any
993 ) -> DataCoordinate:
994 """Preprocess the data ID arguments passed to query* methods.
996 Parameters
997 ----------
998 data_id : `DataId` or `None`
999 Data ID that constrains the query results.
1000 doomed_by : `list` [ `str` ]
1001 List to append messages indicating why the query is doomed to
1002 yield no results.
1003 **kwargs
1004 Additional data ID key-value pairs, extending and overriding
1005 ``data_id``.
1007 Returns
1008 -------
1009 data_id : `DataCoordinate`
1010 Standardized data ID. Will be fully expanded unless expansion
1011 fails, in which case a message will be appended to ``doomed_by``
1012 on return.
1013 """
1014 try:
1015 return self.expandDataId(data_id, **kwargs)
1016 except DataIdValueError as err:
1017 doomed_by.append(str(err))
1018 return DataCoordinate.standardize(
1019 data_id, **kwargs, universe=self.dimensions, defaults=self.defaults.dataId
1020 )
1022 def _standardize_query_dataset_args(
1023 self,
1024 datasets: Any,
1025 collections: Any,
1026 components: bool | None,
1027 mode: Literal["find_first"] | Literal["find_all"] | Literal["constrain"] = "constrain",
1028 *,
1029 doomed_by: list[str],
1030 ) -> tuple[dict[DatasetType, list[str | None]], CollectionWildcard | None]:
1031 """Preprocess dataset arguments passed to query* methods.
1033 Parameters
1034 ----------
1035 datasets : `DatasetType`, `str`, `re.Pattern`, or iterable of these
1036 Expression identifying dataset types. See `queryDatasetTypes` for
1037 details.
1038 collections : `str`, `re.Pattern`, or iterable of these
1039 Expression identifying collections to be searched. See
1040 `queryCollections` for details.
1041 components : `bool`, optional
1042 If `True`, apply all expression patterns to component dataset type
1043 names as well. If `False`, never apply patterns to components.
1044 If `None` (default), apply patterns to components only if their
1045 parent datasets were not matched by the expression.
1046 Fully-specified component datasets (`str` or `DatasetType`
1047 instances) are always included.
1049 Values other than `False` are deprecated, and only `False` will be
1050 supported after v26. After v27 this argument will be removed
1051 entirely.
1052 mode : `str`, optional
1053 The way in which datasets are being used in this query; one of:
1055 - "find_first": this is a query for the first dataset in an
1056 ordered list of collections. Prohibits collection wildcards,
1057 but permits dataset type wildcards.
1059 - "find_all": this is a query for all datasets in all matched
1060 collections. Permits collection and dataset type wildcards.
1062 - "constrain": this is a query for something other than datasets,
1063 with results constrained by dataset existence. Permits
1064 collection wildcards and prohibits ``...`` as a dataset type
1065 wildcard.
1066 doomed_by : `list` [ `str` ]
1067 List to append messages indicating why the query is doomed to
1068 yield no results.
1070 Returns
1071 -------
1072 composition : `defaultdict` [ `DatasetType`, `list` [ `str` ] ]
1073 Dictionary mapping parent dataset type to `list` of components
1074 matched for that dataset type (or `None` for the parent itself).
1075 collections : `CollectionWildcard`
1076 Processed collection expression.
1077 """
1078 composition: dict[DatasetType, list[str | None]] = {}
1079 if datasets is not None:
1080 if not collections:
1081 if not self.defaults.collections:
1082 raise NoDefaultCollectionError("No collections, and no registry default collections.")
1083 collections = self.defaults.collections
1084 else:
1085 collections = CollectionWildcard.from_expression(collections)
1086 if mode == "find_first" and collections.patterns:
1087 raise TypeError(
1088 f"Collection pattern(s) {collections.patterns} not allowed in this context."
1089 )
1090 missing: list[str] = []
1091 composition = self._managers.datasets.resolve_wildcard(
1092 datasets, components=components, missing=missing, explicit_only=(mode == "constrain")
1093 )
1094 if missing and mode == "constrain":
1095 # After v26 this should raise MissingDatasetTypeError, to be
1096 # implemented on DM-36303.
1097 warnings.warn(
1098 f"Dataset type(s) {missing} are not registered; this will be an error after v26.",
1099 FutureWarning,
1100 )
1101 doomed_by.extend(f"Dataset type {name} is not registered." for name in missing)
1102 elif collections:
1103 raise ArgumentError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.")
1104 return composition, collections
1106 def queryDatasets(
1107 self,
1108 datasetType: Any,
1109 *,
1110 collections: Any = None,
1111 dimensions: Optional[Iterable[Union[Dimension, str]]] = None,
1112 dataId: Optional[DataId] = None,
1113 where: str = "",
1114 findFirst: bool = False,
1115 components: Optional[bool] = None,
1116 bind: Optional[Mapping[str, Any]] = None,
1117 check: bool = True,
1118 **kwargs: Any,
1119 ) -> queries.DatasetQueryResults:
1120 # Docstring inherited from lsst.daf.butler.registry.Registry
1121 doomed_by: list[str] = []
1122 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1123 dataset_composition, collections = self._standardize_query_dataset_args(
1124 datasetType,
1125 collections,
1126 components,
1127 mode="find_first" if findFirst else "find_all",
1128 doomed_by=doomed_by,
1129 )
1130 parent_results: list[queries.ParentDatasetQueryResults] = []
1131 for parent_dataset_type, components_for_parent in dataset_composition.items():
1132 # The full set of dimensions in the query is the combination of
1133 # those needed for the DatasetType and those explicitly requested,
1134 # if any.
1135 dimension_names = set(parent_dataset_type.dimensions.names)
1136 if dimensions is not None:
1137 dimension_names.update(self.dimensions.extract(dimensions).names)
1138 # Construct the summary structure needed to construct a
1139 # QueryBuilder.
1140 summary = queries.QuerySummary(
1141 requested=DimensionGraph(self.dimensions, names=dimension_names),
1142 data_id=data_id,
1143 expression=where,
1144 bind=bind,
1145 defaults=self.defaults.dataId,
1146 check=check,
1147 datasets=[parent_dataset_type],
1148 )
1149 builder = self._makeQueryBuilder(summary)
1150 # Add the dataset subquery to the query, telling the QueryBuilder
1151 # to include the rank of the selected collection in the results
1152 # only if we need to findFirst. Note that if any of the
1153 # collections are actually wildcard expressions, and
1154 # findFirst=True, this will raise TypeError for us.
1155 builder.joinDataset(parent_dataset_type, collections, isResult=True, findFirst=findFirst)
1156 query = builder.finish()
1157 parent_results.append(
1158 queries.ParentDatasetQueryResults(
1159 query, parent_dataset_type, components=components_for_parent
1160 )
1161 )
1162 if not parent_results:
1163 doomed_by.extend(
1164 f"No registered dataset type matching {t!r} found, so no matching datasets can "
1165 "exist in any collection."
1166 for t in ensure_iterable(datasetType)
1167 )
1168 return queries.ChainedDatasetQueryResults([], doomed_by=doomed_by)
1169 elif len(parent_results) == 1:
1170 return parent_results[0]
1171 else:
1172 return queries.ChainedDatasetQueryResults(parent_results)
1174 def queryDataIds(
1175 self,
1176 dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str],
1177 *,
1178 dataId: Optional[DataId] = None,
1179 datasets: Any = None,
1180 collections: Any = None,
1181 where: str = "",
1182 components: Optional[bool] = None,
1183 bind: Optional[Mapping[str, Any]] = None,
1184 check: bool = True,
1185 **kwargs: Any,
1186 ) -> queries.DataCoordinateQueryResults:
1187 # Docstring inherited from lsst.daf.butler.registry.Registry
1188 dimensions = ensure_iterable(dimensions)
1189 requestedDimensions = self.dimensions.extract(dimensions)
1190 doomed_by: list[str] = []
1191 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1192 dataset_composition, collections = self._standardize_query_dataset_args(
1193 datasets, collections, components, doomed_by=doomed_by
1194 )
1195 summary = queries.QuerySummary(
1196 requested=requestedDimensions,
1197 data_id=data_id,
1198 expression=where,
1199 bind=bind,
1200 defaults=self.defaults.dataId,
1201 check=check,
1202 datasets=dataset_composition.keys(),
1203 )
1204 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
1205 for datasetType in dataset_composition.keys():
1206 builder.joinDataset(datasetType, collections, isResult=False)
1207 query = builder.finish()
1209 return queries.DataCoordinateQueryResults(query)
1211 def queryDimensionRecords(
1212 self,
1213 element: Union[DimensionElement, str],
1214 *,
1215 dataId: Optional[DataId] = None,
1216 datasets: Any = None,
1217 collections: Any = None,
1218 where: str = "",
1219 components: Optional[bool] = None,
1220 bind: Optional[Mapping[str, Any]] = None,
1221 check: bool = True,
1222 **kwargs: Any,
1223 ) -> queries.DimensionRecordQueryResults:
1224 # Docstring inherited from lsst.daf.butler.registry.Registry
1225 if not isinstance(element, DimensionElement):
1226 try:
1227 element = self.dimensions[element]
1228 except KeyError as e:
1229 raise DimensionNameError(
1230 f"No such dimension '{element}', available dimensions: "
1231 + str(self.dimensions.getStaticElements())
1232 ) from e
1233 doomed_by: list[str] = []
1234 data_id = self._standardize_query_data_id_args(dataId, doomed_by=doomed_by, **kwargs)
1235 dataset_composition, collections = self._standardize_query_dataset_args(
1236 datasets, collections, components, doomed_by=doomed_by
1237 )
1238 summary = queries.QuerySummary(
1239 requested=element.graph,
1240 data_id=data_id,
1241 expression=where,
1242 bind=bind,
1243 defaults=self.defaults.dataId,
1244 check=check,
1245 datasets=dataset_composition.keys(),
1246 )
1247 builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
1248 for datasetType in dataset_composition.keys():
1249 builder.joinDataset(datasetType, collections, isResult=False)
1250 query = builder.finish().with_record_columns(element)
1251 return queries.DatabaseDimensionRecordQueryResults(query, element)
1253 def queryDatasetAssociations(
1254 self,
1255 datasetType: Union[str, DatasetType],
1256 collections: Any = ...,
1257 *,
1258 collectionTypes: Iterable[CollectionType] = CollectionType.all(),
1259 flattenChains: bool = False,
1260 ) -> Iterator[DatasetAssociation]:
1261 # Docstring inherited from lsst.daf.butler.registry.Registry
1262 if collections is None:
1263 if not self.defaults.collections:
1264 raise NoDefaultCollectionError(
1265 "No collections provided to queryDatasetAssociations, "
1266 "and no defaults from registry construction."
1267 )
1268 collections = self.defaults.collections
1269 collections = CollectionWildcard.from_expression(collections)
1270 backend = queries.SqlQueryBackend(self._db, self._managers)
1271 parent_dataset_type, _ = backend.resolve_single_dataset_type_wildcard(datasetType, components=False)
1272 timespan_tag = DatasetColumnTag(parent_dataset_type.name, "timespan")
1273 collection_tag = DatasetColumnTag(parent_dataset_type.name, "collection")
1274 for parent_collection_record in backend.resolve_collection_wildcard(
1275 collections,
1276 collection_types=frozenset(collectionTypes),
1277 flatten_chains=flattenChains,
1278 ):
1279 # Resolve this possibly-chained collection into a list of
1280 # non-CHAINED collections that actually hold datasets of this
1281 # type.
1282 candidate_collection_records = backend.resolve_dataset_collections(
1283 parent_dataset_type,
1284 CollectionWildcard.from_names([parent_collection_record.name]),
1285 allow_calibration_collections=True,
1286 governor_constraints={},
1287 )
1288 if not candidate_collection_records:
1289 continue
1290 with backend.context() as context:
1291 relation = backend.make_dataset_query_relation(
1292 parent_dataset_type,
1293 candidate_collection_records,
1294 columns={"dataset_id", "run", "timespan", "collection"},
1295 context=context,
1296 )
1297 reader = queries.DatasetRefReader(
1298 parent_dataset_type,
1299 translate_collection=lambda k: self._managers.collections[k].name,
1300 full=False,
1301 )
1302 for row in context.fetch_iterable(relation):
1303 ref = reader.read(row)
1304 collection_record = self._managers.collections[row[collection_tag]]
1305 if collection_record.type is CollectionType.CALIBRATION:
1306 timespan = row[timespan_tag]
1307 else:
1308 # For backwards compatibility and (possibly?) user
1309 # convenience we continue to define the timespan of a
1310 # DatasetAssociation row for a non-CALIBRATION
1311 # collection to be None rather than a fully unbounded
1312 # timespan.
1313 timespan = None
1314 yield DatasetAssociation(ref=ref, collection=collection_record.name, timespan=timespan)
1316 storageClasses: StorageClassFactory
1317 """All storage classes known to the registry (`StorageClassFactory`).
1318 """