Coverage for python/lsst/daf/butler/registry/_registry.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "Registry",
26)
28from collections import defaultdict
29import contextlib
30import logging
31from typing import (
32 Any,
33 Dict,
34 Iterable,
35 Iterator,
36 List,
37 Mapping,
38 Optional,
39 Set,
40 Type,
41 TYPE_CHECKING,
42 Union,
43)
45import sqlalchemy
47from ..core import (
48 Config,
49 DataCoordinate,
50 DataCoordinateIterable,
51 DataId,
52 DatasetRef,
53 DatasetType,
54 ddl,
55 Dimension,
56 DimensionElement,
57 DimensionGraph,
58 DimensionRecord,
59 DimensionUniverse,
60 NamedKeyMapping,
61 NameLookupMapping,
62 StorageClassFactory,
63)
64from . import queries
65from ..core.utils import doImport, iterable, transactional
66from ._config import RegistryConfig
67from ._collectionType import CollectionType
68from ._exceptions import ConflictingDefinitionError, InconsistentDataIdError, OrphanedRecordError
69from .wildcards import CategorizedWildcard, CollectionQuery, CollectionSearch, Ellipsis
70from .interfaces import ChainedCollectionRecord, RunRecord
71from .versions import ButlerVersionsManager, DigestMismatchError
73if TYPE_CHECKING: 73 ↛ 74line 73 didn't jump to line 74, because the condition on line 73 was never true
74 from ..butlerConfig import ButlerConfig
75 from .interfaces import (
76 ButlerAttributeManager,
77 CollectionManager,
78 Database,
79 OpaqueTableStorageManager,
80 DimensionRecordStorageManager,
81 DatasetRecordStorageManager,
82 DatastoreRegistryBridgeManager,
83 )
86_LOG = logging.getLogger(__name__)
89class Registry:
90 """Registry interface.
92 Parameters
93 ----------
94 database : `Database`
95 Database instance to store Registry.
96 universe : `DimensionUniverse`
97 Full set of dimensions for Registry.
98 attributes : `type`
99 Manager class implementing `ButlerAttributeManager`.
100 opaque : `type`
101 Manager class implementing `OpaqueTableStorageManager`.
102 dimensions : `type`
103 Manager class implementing `DimensionRecordStorageManager`.
104 collections : `type`
105 Manager class implementing `CollectionManager`.
106 datasets : `type`
107 Manager class implementing `DatasetRecordStorageManager`.
108 datastoreBridges : `type`
109 Manager class implementing `DatastoreRegistryBridgeManager`.
110 writeable : `bool`, optional
111 If True then Registry will support write operations.
112 create : `bool`, optional
113 If True then database schema will be initialized, it must be empty
114 before instantiating Registry.
115 """
117 defaultConfigFile: Optional[str] = None
118 """Path to configuration defaults. Accessed within the ``config`` resource
119 or relative to a search path. Can be None if no defaults specified.
120 """
122 @classmethod
123 def fromConfig(cls, config: Union[ButlerConfig, RegistryConfig, Config, str], create: bool = False,
124 butlerRoot: Optional[str] = None, writeable: bool = True) -> Registry:
125 """Create `Registry` subclass instance from `config`.
127 Uses ``registry.cls`` from `config` to determine which subclass to
128 instantiate.
130 Parameters
131 ----------
132 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
133 Registry configuration
134 create : `bool`, optional
135 Assume empty Registry and create a new one.
136 butlerRoot : `str`, optional
137 Path to the repository root this `Registry` will manage.
138 writeable : `bool`, optional
139 If `True` (default) create a read-write connection to the database.
141 Returns
142 -------
143 registry : `Registry` (subclass)
144 A new `Registry` subclass instance.
145 """
146 if not isinstance(config, RegistryConfig):
147 if isinstance(config, str) or isinstance(config, Config):
148 config = RegistryConfig(config)
149 else:
150 raise ValueError("Incompatible Registry configuration: {}".format(config))
151 config.replaceRoot(butlerRoot)
152 DatabaseClass = config.getDatabaseClass()
153 database = DatabaseClass.fromUri(str(config.connectionString), origin=config.get("origin", 0),
154 namespace=config.get("namespace"), writeable=writeable)
155 universe = DimensionUniverse(config)
156 attributes = doImport(config["managers", "attributes"])
157 opaque = doImport(config["managers", "opaque"])
158 dimensions = doImport(config["managers", "dimensions"])
159 collections = doImport(config["managers", "collections"])
160 datasets = doImport(config["managers", "datasets"])
161 datastoreBridges = doImport(config["managers", "datastores"])
163 return cls(database, universe, dimensions=dimensions, attributes=attributes, opaque=opaque,
164 collections=collections, datasets=datasets, datastoreBridges=datastoreBridges,
165 writeable=writeable, create=create)
167 def __init__(self, database: Database, universe: DimensionUniverse, *,
168 attributes: Type[ButlerAttributeManager],
169 opaque: Type[OpaqueTableStorageManager],
170 dimensions: Type[DimensionRecordStorageManager],
171 collections: Type[CollectionManager],
172 datasets: Type[DatasetRecordStorageManager],
173 datastoreBridges: Type[DatastoreRegistryBridgeManager],
174 writeable: bool = True,
175 create: bool = False):
176 self._db = database
177 self.storageClasses = StorageClassFactory()
178 with self._db.declareStaticTables(create=create) as context:
179 self._attributes = attributes.initialize(self._db, context)
180 self._dimensions = dimensions.initialize(self._db, context, universe=universe)
181 self._collections = collections.initialize(self._db, context)
182 self._datasets = datasets.initialize(self._db, context,
183 collections=self._collections,
184 universe=self.dimensions)
185 self._opaque = opaque.initialize(self._db, context)
186 self._datastoreBridges = datastoreBridges.initialize(self._db, context,
187 opaque=self._opaque,
188 datasets=datasets,
189 universe=self.dimensions)
190 versions = ButlerVersionsManager(
191 self._attributes,
192 dict(
193 attributes=self._attributes,
194 opaque=self._opaque,
195 dimensions=self._dimensions,
196 collections=self._collections,
197 datasets=self._datasets,
198 datastores=self._datastoreBridges,
199 )
200 )
201 # store managers and their versions in attributes table
202 context.addInitializer(lambda db: versions.storeManagersConfig())
203 context.addInitializer(lambda db: versions.storeManagersVersions())
205 if not create:
206 # verify that configured versions are compatible with schema
207 versions.checkManagersConfig()
208 versions.checkManagersVersions(writeable)
209 try:
210 versions.checkManagersDigests()
211 except DigestMismatchError as exc:
212 # potentially digest mismatch is a serious error but during
213 # development it could be benign, treat this as warning for
214 # now.
215 _LOG.warning(f"Registry schema digest mismatch: {exc}")
217 self._collections.refresh()
218 self._datasets.refresh(universe=self._dimensions.universe)
220 def __str__(self) -> str:
221 return str(self._db)
223 def __repr__(self) -> str:
224 return f"Registry({self._db!r}, {self.dimensions!r})"
226 def isWriteable(self) -> bool:
227 """Return `True` if this registry allows write operations, and `False`
228 otherwise.
229 """
230 return self._db.isWriteable()
232 @property
233 def dimensions(self) -> DimensionUniverse:
234 """All dimensions recognized by this `Registry` (`DimensionUniverse`).
235 """
236 return self._dimensions.universe
238 @contextlib.contextmanager
239 def transaction(self) -> Iterator[None]:
240 """Return a context manager that represents a transaction.
241 """
242 # TODO make savepoint=False the default.
243 try:
244 with self._db.transaction():
245 yield
246 except BaseException:
247 # TODO: this clears the caches sometimes when we wouldn't actually
248 # need to. Can we avoid that?
249 self._dimensions.clearCaches()
250 raise
252 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None:
253 """Add an opaque (to the `Registry`) table for use by a `Datastore` or
254 other data repository client.
256 Opaque table records can be added via `insertOpaqueData`, retrieved via
257 `fetchOpaqueData`, and removed via `deleteOpaqueData`.
259 Parameters
260 ----------
261 tableName : `str`
262 Logical name of the opaque table. This may differ from the
263 actual name used in the database by a prefix and/or suffix.
264 spec : `ddl.TableSpec`
265 Specification for the table to be added.
266 """
267 self._opaque.register(tableName, spec)
269 @transactional
270 def insertOpaqueData(self, tableName: str, *data: dict) -> None:
271 """Insert records into an opaque table.
273 Parameters
274 ----------
275 tableName : `str`
276 Logical name of the opaque table. Must match the name used in a
277 previous call to `registerOpaqueTable`.
278 data
279 Each additional positional argument is a dictionary that represents
280 a single row to be added.
281 """
282 self._opaque[tableName].insert(*data)
284 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[dict]:
285 """Retrieve records from an opaque table.
287 Parameters
288 ----------
289 tableName : `str`
290 Logical name of the opaque table. Must match the name used in a
291 previous call to `registerOpaqueTable`.
292 where
293 Additional keyword arguments are interpreted as equality
294 constraints that restrict the returned rows (combined with AND);
295 keyword arguments are column names and values are the values they
296 must have.
298 Yields
299 ------
300 row : `dict`
301 A dictionary representing a single result row.
302 """
303 yield from self._opaque[tableName].fetch(**where)
305 @transactional
306 def deleteOpaqueData(self, tableName: str, **where: Any) -> None:
307 """Remove records from an opaque table.
309 Parameters
310 ----------
311 tableName : `str`
312 Logical name of the opaque table. Must match the name used in a
313 previous call to `registerOpaqueTable`.
314 where
315 Additional keyword arguments are interpreted as equality
316 constraints that restrict the deleted rows (combined with AND);
317 keyword arguments are column names and values are the values they
318 must have.
319 """
320 self._opaque[tableName].delete(**where)
322 def registerCollection(self, name: str, type: CollectionType = CollectionType.TAGGED) -> None:
323 """Add a new collection if one with the given name does not exist.
325 Parameters
326 ----------
327 name : `str`
328 The name of the collection to create.
329 type : `CollectionType`
330 Enum value indicating the type of collection to create.
332 Notes
333 -----
334 This method cannot be called within transactions, as it needs to be
335 able to perform its own transaction to be concurrent.
336 """
337 self._collections.register(name, type)
339 def getCollectionType(self, name: str) -> CollectionType:
340 """Return an enumeration value indicating the type of the given
341 collection.
343 Parameters
344 ----------
345 name : `str`
346 The name of the collection.
348 Returns
349 -------
350 type : `CollectionType`
351 Enum value indicating the type of this collection.
353 Raises
354 ------
355 MissingCollectionError
356 Raised if no collection with the given name exists.
357 """
358 return self._collections.find(name).type
360 def registerRun(self, name: str) -> None:
361 """Add a new run if one with the given name does not exist.
363 Parameters
364 ----------
365 name : `str`
366 The name of the run to create.
368 Notes
369 -----
370 This method cannot be called within transactions, as it needs to be
371 able to perform its own transaction to be concurrent.
372 """
373 self._collections.register(name, CollectionType.RUN)
375 @transactional
376 def removeCollection(self, name: str) -> None:
377 """Completely remove the given collection.
379 Parameters
380 ----------
381 name : `str`
382 The name of the collection to remove.
384 Raises
385 ------
386 MissingCollectionError
387 Raised if no collection with the given name exists.
389 Notes
390 -----
391 If this is a `~CollectionType.RUN` collection, all datasets and quanta
392 in it are also fully removed. This requires that those datasets be
393 removed (or at least trashed) from any datastores that hold them first.
395 A collection may not be deleted as long as it is referenced by a
396 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must
397 be deleted or redefined first.
398 """
399 self._collections.remove(name)
401 def getCollectionChain(self, parent: str) -> CollectionSearch:
402 """Return the child collections in a `~CollectionType.CHAINED`
403 collection.
405 Parameters
406 ----------
407 parent : `str`
408 Name of the chained collection. Must have already been added via
409 a call to `Registry.registerCollection`.
411 Returns
412 -------
413 children : `CollectionSearch`
414 An object that defines the search path of the collection.
415 See :ref:`daf_butler_collection_expressions` for more information.
417 Raises
418 ------
419 MissingCollectionError
420 Raised if ``parent`` does not exist in the `Registry`.
421 TypeError
422 Raised if ``parent`` does not correspond to a
423 `~CollectionType.CHAINED` collection.
424 """
425 record = self._collections.find(parent)
426 if record.type is not CollectionType.CHAINED:
427 raise TypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
428 assert isinstance(record, ChainedCollectionRecord)
429 return record.children
431 @transactional
432 def setCollectionChain(self, parent: str, children: Any) -> None:
433 """Define or redefine a `~CollectionType.CHAINED` collection.
435 Parameters
436 ----------
437 parent : `str`
438 Name of the chained collection. Must have already been added via
439 a call to `Registry.registerCollection`.
440 children : `Any`
441 An expression defining an ordered search of child collections,
442 generally an iterable of `str`. Restrictions on the dataset types
443 to be searched can also be included, by passing mapping or an
444 iterable containing tuples; see
445 :ref:`daf_butler_collection_expressions` for more information.
447 Raises
448 ------
449 MissingCollectionError
450 Raised when any of the given collections do not exist in the
451 `Registry`.
452 TypeError
453 Raised if ``parent`` does not correspond to a
454 `~CollectionType.CHAINED` collection.
455 ValueError
456 Raised if the given collections contains a cycle.
457 """
458 record = self._collections.find(parent)
459 if record.type is not CollectionType.CHAINED:
460 raise TypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
461 assert isinstance(record, ChainedCollectionRecord)
462 children = CollectionSearch.fromExpression(children)
463 if children != record.children:
464 record.update(self._collections, children)
466 def registerDatasetType(self, datasetType: DatasetType) -> bool:
467 """
468 Add a new `DatasetType` to the Registry.
470 It is not an error to register the same `DatasetType` twice.
472 Parameters
473 ----------
474 datasetType : `DatasetType`
475 The `DatasetType` to be added.
477 Returns
478 -------
479 inserted : `bool`
480 `True` if ``datasetType`` was inserted, `False` if an identical
481 existing `DatsetType` was found. Note that in either case the
482 DatasetType is guaranteed to be defined in the Registry
483 consistently with the given definition.
485 Raises
486 ------
487 ValueError
488 Raised if the dimensions or storage class are invalid.
489 ConflictingDefinitionError
490 Raised if this DatasetType is already registered with a different
491 definition.
493 Notes
494 -----
495 This method cannot be called within transactions, as it needs to be
496 able to perform its own transaction to be concurrent.
497 """
498 _, inserted = self._datasets.register(datasetType)
499 return inserted
501 def getDatasetType(self, name: str) -> DatasetType:
502 """Get the `DatasetType`.
504 Parameters
505 ----------
506 name : `str`
507 Name of the type.
509 Returns
510 -------
511 type : `DatasetType`
512 The `DatasetType` associated with the given name.
514 Raises
515 ------
516 KeyError
517 Requested named DatasetType could not be found in registry.
518 """
519 return self._datasets[name].datasetType
521 def findDataset(self, datasetType: Union[DatasetType, str], dataId: Optional[DataId] = None, *,
522 collections: Any, **kwargs: Any) -> Optional[DatasetRef]:
523 """Find a dataset given its `DatasetType` and data ID.
525 This can be used to obtain a `DatasetRef` that permits the dataset to
526 be read from a `Datastore`. If the dataset is a component and can not
527 be found using the provided dataset type, a dataset ref for the parent
528 will be returned instead but with the correct dataset type.
530 Parameters
531 ----------
532 datasetType : `DatasetType` or `str`
533 A `DatasetType` or the name of one.
534 dataId : `dict` or `DataCoordinate`, optional
535 A `dict`-like object containing the `Dimension` links that identify
536 the dataset within a collection.
537 collections
538 An expression that fully or partially identifies the collections
539 to search for the dataset, such as a `str`, `DatasetType`, or
540 iterable thereof. See :ref:`daf_butler_collection_expressions`
541 for more information.
542 **kwargs
543 Additional keyword arguments passed to
544 `DataCoordinate.standardize` to convert ``dataId`` to a true
545 `DataCoordinate` or augment an existing one.
547 Returns
548 -------
549 ref : `DatasetRef`
550 A reference to the dataset, or `None` if no matching Dataset
551 was found.
553 Raises
554 ------
555 LookupError
556 Raised if one or more data ID keys are missing.
557 KeyError
558 Raised if the dataset type does not exist.
559 MissingCollectionError
560 Raised if any of ``collections`` does not exist in the registry.
561 """
562 if isinstance(datasetType, DatasetType):
563 storage = self._datasets[datasetType.name]
564 else:
565 storage = self._datasets[datasetType]
566 dataId = DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions,
567 universe=self.dimensions, **kwargs)
568 collections = CollectionSearch.fromExpression(collections)
569 for collectionRecord in collections.iter(self._collections, datasetType=storage.datasetType):
570 result = storage.find(collectionRecord, dataId)
571 if result is not None:
572 return result
574 return None
576 @transactional
577 def insertDatasets(self, datasetType: Union[DatasetType, str], dataIds: Iterable[DataId],
578 run: str) -> List[DatasetRef]:
579 """Insert one or more datasets into the `Registry`
581 This always adds new datasets; to associate existing datasets with
582 a new collection, use ``associate``.
584 Parameters
585 ----------
586 datasetType : `DatasetType` or `str`
587 A `DatasetType` or the name of one.
588 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate`
589 Dimension-based identifiers for the new datasets.
590 run : `str`
591 The name of the run that produced the datasets.
593 Returns
594 -------
595 refs : `list` of `DatasetRef`
596 Resolved `DatasetRef` instances for all given data IDs (in the same
597 order).
599 Raises
600 ------
601 ConflictingDefinitionError
602 If a dataset with the same dataset type and data ID as one of those
603 given already exists in ``run``.
604 MissingCollectionError
605 Raised if ``run`` does not exist in the registry.
606 """
607 if isinstance(datasetType, DatasetType):
608 storage = self._datasets.find(datasetType.name)
609 if storage is None:
610 raise LookupError(f"DatasetType '{datasetType}' has not been registered.")
611 else:
612 storage = self._datasets.find(datasetType)
613 if storage is None:
614 raise LookupError(f"DatasetType with name '{datasetType}' has not been registered.")
615 runRecord = self._collections.find(run)
616 if runRecord.type is not CollectionType.RUN:
617 raise TypeError("Given collection is of type {runRecord.type.name}; RUN collection required.")
618 assert isinstance(runRecord, RunRecord)
619 expandedDataIds = [self.expandDataId(dataId, graph=storage.datasetType.dimensions)
620 for dataId in dataIds]
621 try:
622 refs = list(storage.insert(runRecord, expandedDataIds))
623 except sqlalchemy.exc.IntegrityError as err:
624 raise ConflictingDefinitionError(f"A database constraint failure was triggered by inserting "
625 f"one or more datasets of type {storage.datasetType} into "
626 f"collection '{run}'. "
627 f"This probably means a dataset with the same data ID "
628 f"and dataset type already exists, but it may also mean a "
629 f"dimension row is missing.") from err
630 return refs
632 def getDataset(self, id: int) -> Optional[DatasetRef]:
633 """Retrieve a Dataset entry.
635 Parameters
636 ----------
637 id : `int`
638 The unique identifier for the dataset.
640 Returns
641 -------
642 ref : `DatasetRef` or `None`
643 A ref to the Dataset, or `None` if no matching Dataset
644 was found.
645 """
646 ref = self._datasets.getDatasetRef(id, universe=self.dimensions)
647 if ref is None:
648 return None
649 return ref
651 @transactional
652 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
653 """Remove datasets from the Registry.
655 The datasets will be removed unconditionally from all collections, and
656 any `Quantum` that consumed this dataset will instead be marked with
657 having a NULL input. `Datastore` records will *not* be deleted; the
658 caller is responsible for ensuring that the dataset has already been
659 removed from all Datastores.
661 Parameters
662 ----------
663 refs : `Iterable` of `DatasetRef`
664 References to the datasets to be removed. Must include a valid
665 ``id`` attribute, and should be considered invalidated upon return.
667 Raises
668 ------
669 AmbiguousDatasetError
670 Raised if any ``ref.id`` is `None`.
671 OrphanedRecordError
672 Raised if any dataset is still present in any `Datastore`.
673 """
674 for datasetType, refsForType in DatasetRef.groupByType(refs).items():
675 storage = self._datasets.find(datasetType.name)
676 assert storage is not None
677 try:
678 storage.delete(refsForType)
679 except sqlalchemy.exc.IntegrityError as err:
680 raise OrphanedRecordError("One or more datasets is still "
681 "present in one or more Datastores.") from err
683 @transactional
684 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
685 """Add existing datasets to a `~CollectionType.TAGGED` collection.
687 If a DatasetRef with the same exact integer ID is already in a
688 collection nothing is changed. If a `DatasetRef` with the same
689 `DatasetType` and data ID but with different integer ID
690 exists in the collection, `ConflictingDefinitionError` is raised.
692 Parameters
693 ----------
694 collection : `str`
695 Indicates the collection the datasets should be associated with.
696 refs : `Iterable` [ `DatasetRef` ]
697 An iterable of resolved `DatasetRef` instances that already exist
698 in this `Registry`.
700 Raises
701 ------
702 ConflictingDefinitionError
703 If a Dataset with the given `DatasetRef` already exists in the
704 given collection.
705 AmbiguousDatasetError
706 Raised if ``any(ref.id is None for ref in refs)``.
707 MissingCollectionError
708 Raised if ``collection`` does not exist in the registry.
709 TypeError
710 Raise adding new datasets to the given ``collection`` is not
711 allowed.
712 """
713 collectionRecord = self._collections.find(collection)
714 if collectionRecord.type is not CollectionType.TAGGED:
715 raise TypeError(f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED.")
716 for datasetType, refsForType in DatasetRef.groupByType(refs).items():
717 storage = self._datasets.find(datasetType.name)
718 assert storage is not None
719 try:
720 storage.associate(collectionRecord, refsForType)
721 except sqlalchemy.exc.IntegrityError as err:
722 raise ConflictingDefinitionError(
723 f"Constraint violation while associating dataset of type {datasetType.name} with "
724 f"collection {collection}. This probably means that one or more datasets with the same "
725 f"dataset type and data ID already exist in the collection, but it may also indicate "
726 f"that the datasets do not exist."
727 ) from err
729 @transactional
730 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
731 """Remove existing datasets from a `~CollectionType.TAGGED` collection.
733 ``collection`` and ``ref`` combinations that are not currently
734 associated are silently ignored.
736 Parameters
737 ----------
738 collection : `str`
739 The collection the datasets should no longer be associated with.
740 refs : `Iterable` [ `DatasetRef` ]
741 An iterable of resolved `DatasetRef` instances that already exist
742 in this `Registry`.
744 Raises
745 ------
746 AmbiguousDatasetError
747 Raised if any of the given dataset references is unresolved.
748 MissingCollectionError
749 Raised if ``collection`` does not exist in the registry.
750 TypeError
751 Raise adding new datasets to the given ``collection`` is not
752 allowed.
753 """
754 collectionRecord = self._collections.find(collection)
755 if collectionRecord.type is not CollectionType.TAGGED:
756 raise TypeError(f"Collection '{collection}' has type {collectionRecord.type.name}; "
757 "expected TAGGED.")
758 for datasetType, refsForType in DatasetRef.groupByType(refs).items():
759 storage = self._datasets.find(datasetType.name)
760 assert storage is not None
761 storage.disassociate(collectionRecord, refsForType)
763 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
764 """Return an object that allows a new `Datastore` instance to
765 communicate with this `Registry`.
767 Returns
768 -------
769 manager : `DatastoreRegistryBridgeManager`
770 Object that mediates communication between this `Registry` and its
771 associated datastores.
772 """
773 return self._datastoreBridges
775 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
776 """Retrieve datastore locations for a given dataset.
778 Parameters
779 ----------
780 ref : `DatasetRef`
781 A reference to the dataset for which to retrieve storage
782 information.
784 Returns
785 -------
786 datastores : `Iterable` [ `str` ]
787 All the matching datastores holding this dataset.
789 Raises
790 ------
791 AmbiguousDatasetError
792 Raised if ``ref.id`` is `None`.
793 """
794 return self._datastoreBridges.findDatastores(ref)
796 def expandDataId(self, dataId: Optional[DataId] = None, *, graph: Optional[DimensionGraph] = None,
797 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None,
798 **kwargs: Any) -> DataCoordinate:
799 """Expand a dimension-based data ID to include additional information.
801 Parameters
802 ----------
803 dataId : `DataCoordinate` or `dict`, optional
804 Data ID to be expanded; augmented and overridden by ``kwds``.
805 graph : `DimensionGraph`, optional
806 Set of dimensions for the expanded ID. If `None`, the dimensions
807 will be inferred from the keys of ``dataId`` and ``kwds``.
808 Dimensions that are in ``dataId`` or ``kwds`` but not in ``graph``
809 are silently ignored, providing a way to extract and expand a
810 subset of a data ID.
811 records : `Mapping` [`str`, `DimensionRecord`], optional
812 Dimension record data to use before querying the database for that
813 data, keyed by element name.
814 **kwargs
815 Additional keywords are treated like additional key-value pairs for
816 ``dataId``, extending and overriding
818 Returns
819 -------
820 expanded : `DataCoordinate`
821 A data ID that includes full metadata for all of the dimensions it
822 identifieds, i.e. guarantees that ``expanded.hasRecords()`` and
823 ``expanded.hasFull()`` both return `True`.
824 """
825 standardized = DataCoordinate.standardize(dataId, graph=graph, universe=self.dimensions, **kwargs)
826 if standardized.hasRecords():
827 return standardized
828 if records is None:
829 records = {}
830 elif isinstance(records, NamedKeyMapping):
831 records = records.byName()
832 else:
833 records = dict(records)
834 if isinstance(dataId, DataCoordinate) and dataId.hasRecords():
835 records.update(dataId.records.byName())
836 keys = standardized.byName()
837 for element in standardized.graph.primaryKeyTraversalOrder:
838 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL
839 if record is ...:
840 if isinstance(element, Dimension) and keys.get(element.name) is None:
841 if element in standardized.graph.required:
842 raise LookupError(
843 f"No value or null value for required dimension {element.name}."
844 )
845 keys[element.name] = None
846 record = None
847 else:
848 storage = self._dimensions[element]
849 dataIdSet = DataCoordinateIterable.fromScalar(
850 DataCoordinate.standardize(keys, graph=element.graph)
851 )
852 fetched = tuple(storage.fetch(dataIdSet))
853 try:
854 (record,) = fetched
855 except ValueError:
856 record = None
857 records[element.name] = record
858 if record is not None:
859 for d in element.implied:
860 value = getattr(record, d.name)
861 if keys.setdefault(d.name, value) != value:
862 raise InconsistentDataIdError(
863 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, "
864 f"but {element.name} implies {d.name}={value!r}."
865 )
866 else:
867 if element in standardized.graph.required:
868 raise LookupError(
869 f"Could not fetch record for required dimension {element.name} via keys {keys}."
870 )
871 if element.alwaysJoin:
872 raise InconsistentDataIdError(
873 f"Could not fetch record for element {element.name} via keys {keys}, ",
874 "but it is marked alwaysJoin=True; this means one or more dimensions are not "
875 "related."
876 )
877 for d in element.implied:
878 keys.setdefault(d.name, None)
879 records.setdefault(d.name, None)
880 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records)
882 def insertDimensionData(self, element: Union[DimensionElement, str],
883 *data: Union[Mapping[str, Any], DimensionRecord],
884 conform: bool = True) -> None:
885 """Insert one or more dimension records into the database.
887 Parameters
888 ----------
889 element : `DimensionElement` or `str`
890 The `DimensionElement` or name thereof that identifies the table
891 records will be inserted into.
892 data : `dict` or `DimensionRecord` (variadic)
893 One or more records to insert.
894 conform : `bool`, optional
895 If `False` (`True` is default) perform no checking or conversions,
896 and assume that ``element`` is a `DimensionElement` instance and
897 ``data`` is a one or more `DimensionRecord` instances of the
898 appropriate subclass.
899 """
900 if conform:
901 if isinstance(element, str):
902 element = self.dimensions[element]
903 records = [row if isinstance(row, DimensionRecord) else element.RecordClass.fromDict(row)
904 for row in data]
905 else:
906 # Ignore typing since caller said to trust them with conform=False.
907 records = data # type: ignore
908 storage = self._dimensions[element] # type: ignore
909 storage.insert(*records)
911 def syncDimensionData(self, element: Union[DimensionElement, str],
912 row: Union[Mapping[str, Any], DimensionRecord],
913 conform: bool = True) -> bool:
914 """Synchronize the given dimension record with the database, inserting
915 if it does not already exist and comparing values if it does.
917 Parameters
918 ----------
919 element : `DimensionElement` or `str`
920 The `DimensionElement` or name thereof that identifies the table
921 records will be inserted into.
922 row : `dict` or `DimensionRecord`
923 The record to insert.
924 conform : `bool`, optional
925 If `False` (`True` is default) perform no checking or conversions,
926 and assume that ``element`` is a `DimensionElement` instance and
927 ``data`` is a one or more `DimensionRecord` instances of the
928 appropriate subclass.
930 Returns
931 -------
932 inserted : `bool`
933 `True` if a new row was inserted, `False` otherwise.
935 Raises
936 ------
937 ConflictingDefinitionError
938 Raised if the record exists in the database (according to primary
939 key lookup) but is inconsistent with the given one.
941 Notes
942 -----
943 This method cannot be called within transactions, as it needs to be
944 able to perform its own transaction to be concurrent.
945 """
946 if conform:
947 if isinstance(element, str):
948 element = self.dimensions[element]
949 record = row if isinstance(row, DimensionRecord) else element.RecordClass.fromDict(row)
950 else:
951 # Ignore typing since caller said to trust them with conform=False.
952 record = row # type: ignore
953 storage = self._dimensions[element] # type: ignore
954 return storage.sync(record)
956 def queryDatasetTypes(self, expression: Any = ..., *, components: Optional[bool] = None
957 ) -> Iterator[DatasetType]:
958 """Iterate over the dataset types whose names match an expression.
960 Parameters
961 ----------
962 expression : `Any`, optional
963 An expression that fully or partially identifies the dataset types
964 to return, such as a `str`, `re.Pattern`, or iterable thereof.
965 `...` can be used to return all dataset types, and is the default.
966 See :ref:`daf_butler_dataset_type_expressions` for more
967 information.
968 components : `bool`, optional
969 If `True`, apply all expression patterns to component dataset type
970 names as well. If `False`, never apply patterns to components.
971 If `None` (default), apply patterns to components only if their
972 parent datasets were not matched by the expression.
973 Fully-specified component datasets (`str` or `DatasetType`
974 instances) are always included.
976 Yields
977 ------
978 datasetType : `DatasetType`
979 A `DatasetType` instance whose name matches ``expression``.
980 """
981 wildcard = CategorizedWildcard.fromExpression(expression, coerceUnrecognized=lambda d: d.name)
982 if wildcard is Ellipsis:
983 for datasetType in self._datasets:
984 # The dataset type can no longer be a component
985 yield datasetType
986 if components and datasetType.isComposite():
987 # Automatically create the component dataset types
988 for component in datasetType.makeAllComponentDatasetTypes():
989 yield component
990 return
991 done: Set[str] = set()
992 for name in wildcard.strings:
993 storage = self._datasets.find(name)
994 if storage is not None:
995 done.add(storage.datasetType.name)
996 yield storage.datasetType
997 if wildcard.patterns:
998 # If components (the argument) is None, we'll save component
999 # dataset that we might want to match, but only if their parents
1000 # didn't get included.
1001 componentsForLater = []
1002 for registeredDatasetType in self._datasets:
1003 # Components are not stored in registry so expand them here
1004 allDatasetTypes = [registeredDatasetType] \
1005 + registeredDatasetType.makeAllComponentDatasetTypes()
1006 for datasetType in allDatasetTypes:
1007 if datasetType.name in done:
1008 continue
1009 parentName, componentName = datasetType.nameAndComponent()
1010 if componentName is not None and not components:
1011 if components is None and parentName not in done:
1012 componentsForLater.append(datasetType)
1013 continue
1014 if any(p.fullmatch(datasetType.name) for p in wildcard.patterns):
1015 done.add(datasetType.name)
1016 yield datasetType
1017 # Go back and try to match saved components.
1018 for datasetType in componentsForLater:
1019 parentName, _ = datasetType.nameAndComponent()
1020 if parentName not in done and any(p.fullmatch(datasetType.name) for p in wildcard.patterns):
1021 yield datasetType
1023 def queryCollections(self, expression: Any = ...,
1024 datasetType: Optional[DatasetType] = None,
1025 collectionType: Optional[CollectionType] = None,
1026 flattenChains: bool = False,
1027 includeChains: Optional[bool] = None) -> Iterator[str]:
1028 """Iterate over the collections whose names match an expression.
1030 Parameters
1031 ----------
1032 expression : `Any`, optional
1033 An expression that fully or partially identifies the collections
1034 to return, such as a `str`, `re.Pattern`, or iterable thereof.
1035 `...` can be used to return all collections, and is the default.
1036 See :ref:`daf_butler_collection_expressions` for more
1037 information.
1038 datasetType : `DatasetType`, optional
1039 If provided, only yield collections that should be searched for
1040 this dataset type according to ``expression``. If this is
1041 not provided, any dataset type restrictions in ``expression`` are
1042 ignored.
1043 collectionType : `CollectionType`, optional
1044 If provided, only yield collections of this type.
1045 flattenChains : `bool`, optional
1046 If `True` (`False` is default), recursively yield the child
1047 collections of matching `~CollectionType.CHAINED` collections.
1048 includeChains : `bool`, optional
1049 If `True`, yield records for matching `~CollectionType.CHAINED`
1050 collections. Default is the opposite of ``flattenChains``: include
1051 either CHAINED collections or their children, but not both.
1053 Yields
1054 ------
1055 collection : `str`
1056 The name of a collection that matches ``expression``.
1057 """
1058 query = CollectionQuery.fromExpression(expression)
1059 for record in query.iter(self._collections, datasetType=datasetType, collectionType=collectionType,
1060 flattenChains=flattenChains, includeChains=includeChains):
1061 yield record.name
1063 def makeQueryBuilder(self, summary: queries.QuerySummary) -> queries.QueryBuilder:
1064 """Return a `QueryBuilder` instance capable of constructing and
1065 managing more complex queries than those obtainable via `Registry`
1066 interfaces.
1068 This is an advanced interface; downstream code should prefer
1069 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those
1070 are sufficient.
1072 Parameters
1073 ----------
1074 summary : `queries.QuerySummary`
1075 Object describing and categorizing the full set of dimensions that
1076 will be included in the query.
1078 Returns
1079 -------
1080 builder : `queries.QueryBuilder`
1081 Object that can be used to construct and perform advanced queries.
1082 """
1083 return queries.QueryBuilder(
1084 summary,
1085 queries.RegistryManagers(
1086 collections=self._collections,
1087 dimensions=self._dimensions,
1088 datasets=self._datasets
1089 )
1090 )
1092 def queryDatasets(self, datasetType: Any, *,
1093 collections: Any,
1094 dimensions: Optional[Iterable[Union[Dimension, str]]] = None,
1095 dataId: Optional[DataId] = None,
1096 where: Optional[str] = None,
1097 deduplicate: bool = False,
1098 components: Optional[bool] = None,
1099 **kwargs: Any) -> queries.DatasetQueryResults:
1100 """Query for and iterate over dataset references matching user-provided
1101 criteria.
1103 Parameters
1104 ----------
1105 datasetType
1106 An expression that fully or partially identifies the dataset types
1107 to be queried. Allowed types include `DatasetType`, `str`,
1108 `re.Pattern`, and iterables thereof. The special value `...` can
1109 be used to query all dataset types. See
1110 :ref:`daf_butler_dataset_type_expressions` for more information.
1111 collections
1112 An expression that fully or partially identifies the collections
1113 to search for datasets, such as a `str`, `re.Pattern`, or iterable
1114 thereof. `...` can be used to return all collections. See
1115 :ref:`daf_butler_collection_expressions` for more information.
1116 dimensions : `~collections.abc.Iterable` of `Dimension` or `str`
1117 Dimensions to include in the query (in addition to those used
1118 to identify the queried dataset type(s)), either to constrain
1119 the resulting datasets to those for which a matching dimension
1120 exists, or to relate the dataset type's dimensions to dimensions
1121 referenced by the ``dataId`` or ``where`` arguments.
1122 dataId : `dict` or `DataCoordinate`, optional
1123 A data ID whose key-value pairs are used as equality constraints
1124 in the query.
1125 where : `str`, optional
1126 A string expression similar to a SQL WHERE clause. May involve
1127 any column of a dimension table or (as a shortcut for the primary
1128 key column of a dimension table) dimension name. See
1129 :ref:`daf_butler_dimension_expressions` for more information.
1130 deduplicate : `bool`, optional
1131 If `True` (`False` is default), for each result data ID, only
1132 yield one `DatasetRef` of each `DatasetType`, from the first
1133 collection in which a dataset of that dataset type appears
1134 (according to the order of ``collections`` passed in). If `True`,
1135 ``collections`` must not contain regular expressions and may not
1136 be `...`.
1137 components : `bool`, optional
1138 If `True`, apply all dataset expression patterns to component
1139 dataset type names as well. If `False`, never apply patterns to
1140 components. If `None` (default), apply patterns to components only
1141 if their parent datasets were not matched by the expression.
1142 Fully-specified component datasets (`str` or `DatasetType`
1143 instances) are always included.
1144 **kwargs
1145 Additional keyword arguments are forwarded to
1146 `DataCoordinate.standardize` when processing the ``dataId``
1147 argument (and may be used to provide a constraining data ID even
1148 when the ``dataId`` argument is `None`).
1150 Returns
1151 -------
1152 refs : `queries.DatasetQueryResults`
1153 Dataset references matching the given query criteria.
1155 Raises
1156 ------
1157 TypeError
1158 Raised when the arguments are incompatible, such as when a
1159 collection wildcard is passed when ``deduplicate`` is `True`.
1161 Notes
1162 -----
1163 When multiple dataset types are queried in a single call, the
1164 results of this operation are equivalent to querying for each dataset
1165 type separately in turn, and no information about the relationships
1166 between datasets of different types is included. In contexts where
1167 that kind of information is important, the recommended pattern is to
1168 use `queryDataIds` to first obtain data IDs (possibly with the
1169 desired dataset types and collections passed as constraints to the
1170 query), and then use multiple (generally much simpler) calls to
1171 `queryDatasets` with the returned data IDs passed as constraints.
1172 """
1173 # Standardize the collections expression.
1174 if deduplicate:
1175 collections = CollectionSearch.fromExpression(collections)
1176 else:
1177 collections = CollectionQuery.fromExpression(collections)
1178 # Standardize and expand the data ID provided as a constraint.
1179 standardizedDataId = self.expandDataId(dataId, **kwargs)
1181 # We can only query directly if given a non-component DatasetType
1182 # instance. If we were given an expression or str or a component
1183 # DatasetType instance, we'll populate this dict, recurse, and return.
1184 # If we already have a non-component DatasetType, it will remain None
1185 # and we'll run the query directly.
1186 composition: Optional[
1187 Dict[
1188 DatasetType, # parent dataset type
1189 List[Optional[str]] # component name, or None for parent
1190 ]
1191 ] = None
1192 if not isinstance(datasetType, DatasetType):
1193 # We were given a dataset type expression (which may be as simple
1194 # as a str). Loop over all matching datasets, delegating handling
1195 # of the `components` argument to queryDatasetTypes, as we populate
1196 # the composition dict.
1197 composition = defaultdict(list)
1198 for trueDatasetType in self.queryDatasetTypes(datasetType, components=components):
1199 parentName, componentName = trueDatasetType.nameAndComponent()
1200 if componentName is not None:
1201 parentDatasetType = self.getDatasetType(parentName)
1202 composition.setdefault(parentDatasetType, []).append(componentName)
1203 else:
1204 composition.setdefault(trueDatasetType, []).append(None)
1205 elif datasetType.isComponent():
1206 # We were given a true DatasetType instance, but it's a component.
1207 # the composition dict will have exactly one item.
1208 parentName, componentName = datasetType.nameAndComponent()
1209 parentDatasetType = self.getDatasetType(parentName)
1210 composition = {parentDatasetType: [componentName]}
1211 if composition is not None:
1212 # We need to recurse. Do that once for each parent dataset type.
1213 chain = []
1214 for parentDatasetType, componentNames in composition.items():
1215 parentResults = self.queryDatasets(
1216 parentDatasetType,
1217 collections=collections,
1218 dimensions=dimensions,
1219 dataId=standardizedDataId,
1220 where=where,
1221 deduplicate=deduplicate
1222 )
1223 if isinstance(parentResults, queries.ParentDatasetQueryResults):
1224 chain.append(
1225 parentResults.withComponents(componentNames)
1226 )
1227 else:
1228 # Should only happen if we know there would be no results.
1229 assert isinstance(parentResults, queries.ChainedDatasetQueryResults) \
1230 and not parentResults._chain
1231 return queries.ChainedDatasetQueryResults(chain)
1232 # If we get here, there's no need to recurse (or we are already
1233 # recursing; there can only ever be one level of recursion).
1235 # The full set of dimensions in the query is the combination of those
1236 # needed for the DatasetType and those explicitly requested, if any.
1237 requestedDimensionNames = set(datasetType.dimensions.names)
1238 if dimensions is not None:
1239 requestedDimensionNames.update(self.dimensions.extract(dimensions).names)
1240 # Construct the summary structure needed to construct a QueryBuilder.
1241 summary = queries.QuerySummary(
1242 requested=DimensionGraph(self.dimensions, names=requestedDimensionNames),
1243 dataId=standardizedDataId,
1244 expression=where,
1245 )
1246 builder = self.makeQueryBuilder(summary)
1247 # Add the dataset subquery to the query, telling the QueryBuilder to
1248 # include the rank of the selected collection in the results only if we
1249 # need to deduplicate. Note that if any of the collections are
1250 # actually wildcard expressions, and we've asked for deduplication,
1251 # this will raise TypeError for us.
1252 if not builder.joinDataset(datasetType, collections, isResult=True, deduplicate=deduplicate):
1253 return queries.ChainedDatasetQueryResults(())
1254 query = builder.finish()
1255 return queries.ParentDatasetQueryResults(self._db, query, components=[None])
1257 def queryDataIds(self, dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], *,
1258 dataId: Optional[DataId] = None,
1259 datasets: Any = None,
1260 collections: Any = None,
1261 where: Optional[str] = None,
1262 components: Optional[bool] = None,
1263 **kwargs: Any) -> queries.DataCoordinateQueryResults:
1264 """Query for data IDs matching user-provided criteria.
1266 Parameters
1267 ----------
1268 dimensions : `Dimension` or `str`, or iterable thereof
1269 The dimensions of the data IDs to yield, as either `Dimension`
1270 instances or `str`. Will be automatically expanded to a complete
1271 `DimensionGraph`.
1272 dataId : `dict` or `DataCoordinate`, optional
1273 A data ID whose key-value pairs are used as equality constraints
1274 in the query.
1275 datasets : `Any`, optional
1276 An expression that fully or partially identifies dataset types
1277 that should constrain the yielded data IDs. For example, including
1278 "raw" here would constrain the yielded ``instrument``,
1279 ``exposure``, ``detector``, and ``physical_filter`` values to only
1280 those for which at least one "raw" dataset exists in
1281 ``collections``. Allowed types include `DatasetType`, `str`,
1282 `re.Pattern`, and iterables thereof. Unlike other dataset type
1283 expressions, ``...`` is not permitted - it doesn't make sense to
1284 constrain data IDs on the existence of *all* datasets.
1285 See :ref:`daf_butler_dataset_type_expressions` for more
1286 information.
1287 collections: `Any`, optional
1288 An expression that fully or partially identifies the collections
1289 to search for datasets, such as a `str`, `re.Pattern`, or iterable
1290 thereof. `...` can be used to return all collections. Must be
1291 provided if ``datasets`` is, and is ignored if it is not. See
1292 :ref:`daf_butler_collection_expressions` for more information.
1293 where : `str`, optional
1294 A string expression similar to a SQL WHERE clause. May involve
1295 any column of a dimension table or (as a shortcut for the primary
1296 key column of a dimension table) dimension name. See
1297 :ref:`daf_butler_dimension_expressions` for more information.
1298 components : `bool`, optional
1299 If `True`, apply all dataset expression patterns to component
1300 dataset type names as well. If `False`, never apply patterns to
1301 components. If `None` (default), apply patterns to components only
1302 if their parent datasets were not matched by the expression.
1303 Fully-specified component datasets (`str` or `DatasetType`
1304 instances) are always included.
1305 **kwargs
1306 Additional keyword arguments are forwarded to
1307 `DataCoordinate.standardize` when processing the ``dataId``
1308 argument (and may be used to provide a constraining data ID even
1309 when the ``dataId`` argument is `None`).
1311 Returns
1312 -------
1313 dataIds : `DataCoordinateQueryResults`
1314 Data IDs matching the given query parameters. These are guaranteed
1315 to identify all dimensions (`DataCoordinate.hasFull` returns
1316 `True`), but will not contain `DimensionRecord` objects
1317 (`DataCoordinate.hasRecords` returns `False`). Call
1318 `DataCoordinateQueryResults.expanded` on the returned object to
1319 fetch those (and consider using
1320 `DataCoordinateQueryResults.materialize` on the returned object
1321 first if the expected number of rows is very large). See
1322 documentation for those methods for additional information.
1323 """
1324 dimensions = iterable(dimensions)
1325 standardizedDataId = self.expandDataId(dataId, **kwargs)
1326 standardizedDatasetTypes = set()
1327 requestedDimensions = self.dimensions.extract(dimensions)
1328 queryDimensionNames = set(requestedDimensions.names)
1329 if datasets is not None:
1330 if collections is None:
1331 raise TypeError("Cannot pass 'datasets' without 'collections'.")
1332 for datasetType in self.queryDatasetTypes(datasets, components=components):
1333 queryDimensionNames.update(datasetType.dimensions.names)
1334 # If any matched dataset type is a component, just operate on
1335 # its parent instead, because Registry doesn't know anything
1336 # about what components exist, and here (unlike queryDatasets)
1337 # we don't care about returning them.
1338 parentDatasetTypeName, componentName = datasetType.nameAndComponent()
1339 if componentName is not None:
1340 datasetType = self.getDatasetType(parentDatasetTypeName)
1341 standardizedDatasetTypes.add(datasetType)
1342 # Preprocess collections expression in case the original included
1343 # single-pass iterators (we'll want to use it multiple times
1344 # below).
1345 collections = CollectionQuery.fromExpression(collections)
1347 summary = queries.QuerySummary(
1348 requested=DimensionGraph(self.dimensions, names=queryDimensionNames),
1349 dataId=standardizedDataId,
1350 expression=where,
1351 )
1352 builder = self.makeQueryBuilder(summary)
1353 for datasetType in standardizedDatasetTypes:
1354 builder.joinDataset(datasetType, collections, isResult=False)
1355 query = builder.finish()
1356 return queries.DataCoordinateQueryResults(self._db, query)
1358 def queryDimensionRecords(self, element: Union[DimensionElement, str], *,
1359 dataId: Optional[DataId] = None,
1360 datasets: Any = None,
1361 collections: Any = None,
1362 where: Optional[str] = None,
1363 components: Optional[bool] = None,
1364 **kwargs: Any) -> Iterator[DimensionRecord]:
1365 """Query for dimension information matching user-provided criteria.
1367 Parameters
1368 ----------
1369 element : `DimensionElement` or `str`
1370 The dimension element to obtain r
1371 dataId : `dict` or `DataCoordinate`, optional
1372 A data ID whose key-value pairs are used as equality constraints
1373 in the query.
1374 datasets : `Any`, optional
1375 An expression that fully or partially identifies dataset types
1376 that should constrain the yielded records. See `queryDataIds` and
1377 :ref:`daf_butler_dataset_type_expressions` for more information.
1378 collections: `Any`, optional
1379 An expression that fully or partially identifies the collections
1380 to search for datasets. See `queryDataIds` and
1381 :ref:`daf_butler_collection_expressions` for more information.
1382 where : `str`, optional
1383 A string expression similar to a SQL WHERE clause. See
1384 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more
1385 information.
1386 components : `bool`, optional
1387 Whether to apply dataset expressions to components as well.
1388 See `queryDataIds` for more information.
1389 **kwargs
1390 Additional keyword arguments are forwarded to
1391 `DataCoordinate.standardize` when processing the ``dataId``
1392 argument (and may be used to provide a constraining data ID even
1393 when the ``dataId`` argument is `None`).
1395 Returns
1396 -------
1397 dataIds : `DataCoordinateQueryResults`
1398 Data IDs matching the given query parameters.
1399 """
1400 if not isinstance(element, DimensionElement):
1401 element = self.dimensions[element]
1402 dataIds = self.queryDataIds(element.graph, dataId=dataId, datasets=datasets, collections=collections,
1403 where=where, components=components, **kwargs)
1404 return iter(self._dimensions[element].fetch(dataIds))
1406 storageClasses: StorageClassFactory
1407 """All storage classes known to the registry (`StorageClassFactory`).
1408 """