Coverage for python/lsst/daf/butler/registry/_registry.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "Registry",
26)
28from collections import defaultdict
29import contextlib
30import logging
31from typing import (
32 Any,
33 Dict,
34 Iterable,
35 Iterator,
36 List,
37 Mapping,
38 Optional,
39 Set,
40 Type,
41 TYPE_CHECKING,
42 Union,
43)
45import sqlalchemy
47from ..core import (
48 Config,
49 DataCoordinate,
50 DataCoordinateIterable,
51 DataId,
52 DatasetRef,
53 DatasetType,
54 ddl,
55 Dimension,
56 DimensionElement,
57 DimensionGraph,
58 DimensionRecord,
59 DimensionUniverse,
60 NamedKeyMapping,
61 NameLookupMapping,
62 StorageClassFactory,
63)
64from . import queries
65from ..core.utils import doImport, iterable, transactional
66from ._config import RegistryConfig
67from ._collectionType import CollectionType
68from ._exceptions import ConflictingDefinitionError, InconsistentDataIdError, OrphanedRecordError
69from .wildcards import CategorizedWildcard, CollectionQuery, CollectionSearch, Ellipsis
70from .interfaces import ChainedCollectionRecord, RunRecord
71from .versions import ButlerVersionsManager, DigestMismatchError
73if TYPE_CHECKING: 73 ↛ 74line 73 didn't jump to line 74, because the condition on line 73 was never true
74 from ..butlerConfig import ButlerConfig
75 from .interfaces import (
76 ButlerAttributeManager,
77 CollectionManager,
78 Database,
79 OpaqueTableStorageManager,
80 DimensionRecordStorageManager,
81 DatasetRecordStorageManager,
82 DatastoreRegistryBridgeManager,
83 )
86_LOG = logging.getLogger(__name__)
89class Registry:
90 """Registry interface.
92 Parameters
93 ----------
94 database : `Database`
95 Database instance to store Registry.
96 universe : `DimensionUniverse`
97 Full set of dimensions for Registry.
98 attributes : `type`
99 Manager class implementing `ButlerAttributeManager`.
100 opaque : `type`
101 Manager class implementing `OpaqueTableStorageManager`.
102 dimensions : `type`
103 Manager class implementing `DimensionRecordStorageManager`.
104 collections : `type`
105 Manager class implementing `CollectionManager`.
106 datasets : `type`
107 Manager class implementing `DatasetRecordStorageManager`.
108 datastoreBridges : `type`
109 Manager class implementing `DatastoreRegistryBridgeManager`.
110 writeable : `bool`, optional
111 If True then Registry will support write operations.
112 create : `bool`, optional
113 If True then database schema will be initialized, it must be empty
114 before instantiating Registry.
115 """
117 defaultConfigFile: Optional[str] = None
118 """Path to configuration defaults. Accessed within the ``config`` resource
119 or relative to a search path. Can be None if no defaults specified.
120 """
122 @classmethod
123 def fromConfig(cls, config: Union[ButlerConfig, RegistryConfig, Config, str], create: bool = False,
124 butlerRoot: Optional[str] = None, writeable: bool = True) -> Registry:
125 """Create `Registry` subclass instance from `config`.
127 Uses ``registry.cls`` from `config` to determine which subclass to
128 instantiate.
130 Parameters
131 ----------
132 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
133 Registry configuration
134 create : `bool`, optional
135 Assume empty Registry and create a new one.
136 butlerRoot : `str`, optional
137 Path to the repository root this `Registry` will manage.
138 writeable : `bool`, optional
139 If `True` (default) create a read-write connection to the database.
141 Returns
142 -------
143 registry : `Registry` (subclass)
144 A new `Registry` subclass instance.
145 """
146 if not isinstance(config, RegistryConfig):
147 if isinstance(config, str) or isinstance(config, Config):
148 config = RegistryConfig(config)
149 else:
150 raise ValueError("Incompatible Registry configuration: {}".format(config))
151 config.replaceRoot(butlerRoot)
152 DatabaseClass = config.getDatabaseClass()
153 database = DatabaseClass.fromUri(str(config.connectionString), origin=config.get("origin", 0),
154 namespace=config.get("namespace"), writeable=writeable)
155 universe = DimensionUniverse(config)
156 attributes = doImport(config["managers", "attributes"])
157 opaque = doImport(config["managers", "opaque"])
158 dimensions = doImport(config["managers", "dimensions"])
159 collections = doImport(config["managers", "collections"])
160 datasets = doImport(config["managers", "datasets"])
161 datastoreBridges = doImport(config["managers", "datastores"])
163 return cls(database, universe, dimensions=dimensions, attributes=attributes, opaque=opaque,
164 collections=collections, datasets=datasets, datastoreBridges=datastoreBridges,
165 writeable=writeable, create=create)
167 def __init__(self, database: Database, universe: DimensionUniverse, *,
168 attributes: Type[ButlerAttributeManager],
169 opaque: Type[OpaqueTableStorageManager],
170 dimensions: Type[DimensionRecordStorageManager],
171 collections: Type[CollectionManager],
172 datasets: Type[DatasetRecordStorageManager],
173 datastoreBridges: Type[DatastoreRegistryBridgeManager],
174 writeable: bool = True,
175 create: bool = False):
176 self._db = database
177 self.storageClasses = StorageClassFactory()
178 with self._db.declareStaticTables(create=create) as context:
179 self._attributes = attributes.initialize(self._db, context)
180 self._dimensions = dimensions.initialize(self._db, context, universe=universe)
181 self._collections = collections.initialize(self._db, context)
182 self._datasets = datasets.initialize(self._db, context,
183 collections=self._collections,
184 universe=self.dimensions)
185 self._opaque = opaque.initialize(self._db, context)
186 self._datastoreBridges = datastoreBridges.initialize(self._db, context,
187 opaque=self._opaque,
188 datasets=datasets,
189 universe=self.dimensions)
190 versions = ButlerVersionsManager(
191 self._attributes,
192 dict(
193 attributes=self._attributes,
194 opaque=self._opaque,
195 dimensions=self._dimensions,
196 collections=self._collections,
197 datasets=self._datasets,
198 datastores=self._datastoreBridges,
199 )
200 )
201 # store managers and their versions in attributes table
202 context.addInitializer(lambda db: versions.storeManagersConfig())
203 context.addInitializer(lambda db: versions.storeManagersVersions())
205 if not create:
206 # verify that configured versions are compatible with schema
207 versions.checkManagersConfig()
208 versions.checkManagersVersions(writeable)
209 try:
210 versions.checkManagersDigests()
211 except DigestMismatchError as exc:
212 # potentially digest mismatch is a serious error but during
213 # development it could be benign, treat this as warning for
214 # now.
215 _LOG.warning(f"Registry schema digest mismatch: {exc}")
217 self._collections.refresh()
218 self._datasets.refresh(universe=self._dimensions.universe)
220 def __str__(self) -> str:
221 return str(self._db)
223 def __repr__(self) -> str:
224 return f"Registry({self._db!r}, {self.dimensions!r})"
226 def isWriteable(self) -> bool:
227 """Return `True` if this registry allows write operations, and `False`
228 otherwise.
229 """
230 return self._db.isWriteable()
232 @property
233 def dimensions(self) -> DimensionUniverse:
234 """All dimensions recognized by this `Registry` (`DimensionUniverse`).
235 """
236 return self._dimensions.universe
238 @contextlib.contextmanager
239 def transaction(self, *, savepoint: bool = False) -> Iterator[None]:
240 """Return a context manager that represents a transaction.
241 """
242 try:
243 with self._db.transaction(savepoint=savepoint):
244 yield
245 except BaseException:
246 # TODO: this clears the caches sometimes when we wouldn't actually
247 # need to. Can we avoid that?
248 self._dimensions.clearCaches()
249 raise
251 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None:
252 """Add an opaque (to the `Registry`) table for use by a `Datastore` or
253 other data repository client.
255 Opaque table records can be added via `insertOpaqueData`, retrieved via
256 `fetchOpaqueData`, and removed via `deleteOpaqueData`.
258 Parameters
259 ----------
260 tableName : `str`
261 Logical name of the opaque table. This may differ from the
262 actual name used in the database by a prefix and/or suffix.
263 spec : `ddl.TableSpec`
264 Specification for the table to be added.
265 """
266 self._opaque.register(tableName, spec)
268 @transactional
269 def insertOpaqueData(self, tableName: str, *data: dict) -> None:
270 """Insert records into an opaque table.
272 Parameters
273 ----------
274 tableName : `str`
275 Logical name of the opaque table. Must match the name used in a
276 previous call to `registerOpaqueTable`.
277 data
278 Each additional positional argument is a dictionary that represents
279 a single row to be added.
280 """
281 self._opaque[tableName].insert(*data)
283 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[dict]:
284 """Retrieve records from an opaque table.
286 Parameters
287 ----------
288 tableName : `str`
289 Logical name of the opaque table. Must match the name used in a
290 previous call to `registerOpaqueTable`.
291 where
292 Additional keyword arguments are interpreted as equality
293 constraints that restrict the returned rows (combined with AND);
294 keyword arguments are column names and values are the values they
295 must have.
297 Yields
298 ------
299 row : `dict`
300 A dictionary representing a single result row.
301 """
302 yield from self._opaque[tableName].fetch(**where)
304 @transactional
305 def deleteOpaqueData(self, tableName: str, **where: Any) -> None:
306 """Remove records from an opaque table.
308 Parameters
309 ----------
310 tableName : `str`
311 Logical name of the opaque table. Must match the name used in a
312 previous call to `registerOpaqueTable`.
313 where
314 Additional keyword arguments are interpreted as equality
315 constraints that restrict the deleted rows (combined with AND);
316 keyword arguments are column names and values are the values they
317 must have.
318 """
319 self._opaque[tableName].delete(**where)
321 def registerCollection(self, name: str, type: CollectionType = CollectionType.TAGGED) -> None:
322 """Add a new collection if one with the given name does not exist.
324 Parameters
325 ----------
326 name : `str`
327 The name of the collection to create.
328 type : `CollectionType`
329 Enum value indicating the type of collection to create.
331 Notes
332 -----
333 This method cannot be called within transactions, as it needs to be
334 able to perform its own transaction to be concurrent.
335 """
336 self._collections.register(name, type)
338 def getCollectionType(self, name: str) -> CollectionType:
339 """Return an enumeration value indicating the type of the given
340 collection.
342 Parameters
343 ----------
344 name : `str`
345 The name of the collection.
347 Returns
348 -------
349 type : `CollectionType`
350 Enum value indicating the type of this collection.
352 Raises
353 ------
354 MissingCollectionError
355 Raised if no collection with the given name exists.
356 """
357 return self._collections.find(name).type
359 def registerRun(self, name: str) -> None:
360 """Add a new run if one with the given name does not exist.
362 Parameters
363 ----------
364 name : `str`
365 The name of the run to create.
367 Notes
368 -----
369 This method cannot be called within transactions, as it needs to be
370 able to perform its own transaction to be concurrent.
371 """
372 self._collections.register(name, CollectionType.RUN)
374 @transactional
375 def removeCollection(self, name: str) -> None:
376 """Completely remove the given collection.
378 Parameters
379 ----------
380 name : `str`
381 The name of the collection to remove.
383 Raises
384 ------
385 MissingCollectionError
386 Raised if no collection with the given name exists.
388 Notes
389 -----
390 If this is a `~CollectionType.RUN` collection, all datasets and quanta
391 in it are also fully removed. This requires that those datasets be
392 removed (or at least trashed) from any datastores that hold them first.
394 A collection may not be deleted as long as it is referenced by a
395 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must
396 be deleted or redefined first.
397 """
398 self._collections.remove(name)
400 def getCollectionChain(self, parent: str) -> CollectionSearch:
401 """Return the child collections in a `~CollectionType.CHAINED`
402 collection.
404 Parameters
405 ----------
406 parent : `str`
407 Name of the chained collection. Must have already been added via
408 a call to `Registry.registerCollection`.
410 Returns
411 -------
412 children : `CollectionSearch`
413 An object that defines the search path of the collection.
414 See :ref:`daf_butler_collection_expressions` for more information.
416 Raises
417 ------
418 MissingCollectionError
419 Raised if ``parent`` does not exist in the `Registry`.
420 TypeError
421 Raised if ``parent`` does not correspond to a
422 `~CollectionType.CHAINED` collection.
423 """
424 record = self._collections.find(parent)
425 if record.type is not CollectionType.CHAINED:
426 raise TypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
427 assert isinstance(record, ChainedCollectionRecord)
428 return record.children
430 @transactional
431 def setCollectionChain(self, parent: str, children: Any) -> None:
432 """Define or redefine a `~CollectionType.CHAINED` collection.
434 Parameters
435 ----------
436 parent : `str`
437 Name of the chained collection. Must have already been added via
438 a call to `Registry.registerCollection`.
439 children : `Any`
440 An expression defining an ordered search of child collections,
441 generally an iterable of `str`. Restrictions on the dataset types
442 to be searched can also be included, by passing mapping or an
443 iterable containing tuples; see
444 :ref:`daf_butler_collection_expressions` for more information.
446 Raises
447 ------
448 MissingCollectionError
449 Raised when any of the given collections do not exist in the
450 `Registry`.
451 TypeError
452 Raised if ``parent`` does not correspond to a
453 `~CollectionType.CHAINED` collection.
454 ValueError
455 Raised if the given collections contains a cycle.
456 """
457 record = self._collections.find(parent)
458 if record.type is not CollectionType.CHAINED:
459 raise TypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
460 assert isinstance(record, ChainedCollectionRecord)
461 children = CollectionSearch.fromExpression(children)
462 if children != record.children:
463 record.update(self._collections, children)
465 def registerDatasetType(self, datasetType: DatasetType) -> bool:
466 """
467 Add a new `DatasetType` to the Registry.
469 It is not an error to register the same `DatasetType` twice.
471 Parameters
472 ----------
473 datasetType : `DatasetType`
474 The `DatasetType` to be added.
476 Returns
477 -------
478 inserted : `bool`
479 `True` if ``datasetType`` was inserted, `False` if an identical
480 existing `DatsetType` was found. Note that in either case the
481 DatasetType is guaranteed to be defined in the Registry
482 consistently with the given definition.
484 Raises
485 ------
486 ValueError
487 Raised if the dimensions or storage class are invalid.
488 ConflictingDefinitionError
489 Raised if this DatasetType is already registered with a different
490 definition.
492 Notes
493 -----
494 This method cannot be called within transactions, as it needs to be
495 able to perform its own transaction to be concurrent.
496 """
497 _, inserted = self._datasets.register(datasetType)
498 return inserted
500 def getDatasetType(self, name: str) -> DatasetType:
501 """Get the `DatasetType`.
503 Parameters
504 ----------
505 name : `str`
506 Name of the type.
508 Returns
509 -------
510 type : `DatasetType`
511 The `DatasetType` associated with the given name.
513 Raises
514 ------
515 KeyError
516 Requested named DatasetType could not be found in registry.
517 """
518 return self._datasets[name].datasetType
520 def findDataset(self, datasetType: Union[DatasetType, str], dataId: Optional[DataId] = None, *,
521 collections: Any, **kwargs: Any) -> Optional[DatasetRef]:
522 """Find a dataset given its `DatasetType` and data ID.
524 This can be used to obtain a `DatasetRef` that permits the dataset to
525 be read from a `Datastore`. If the dataset is a component and can not
526 be found using the provided dataset type, a dataset ref for the parent
527 will be returned instead but with the correct dataset type.
529 Parameters
530 ----------
531 datasetType : `DatasetType` or `str`
532 A `DatasetType` or the name of one.
533 dataId : `dict` or `DataCoordinate`, optional
534 A `dict`-like object containing the `Dimension` links that identify
535 the dataset within a collection.
536 collections
537 An expression that fully or partially identifies the collections
538 to search for the dataset, such as a `str`, `DatasetType`, or
539 iterable thereof. See :ref:`daf_butler_collection_expressions`
540 for more information.
541 **kwargs
542 Additional keyword arguments passed to
543 `DataCoordinate.standardize` to convert ``dataId`` to a true
544 `DataCoordinate` or augment an existing one.
546 Returns
547 -------
548 ref : `DatasetRef`
549 A reference to the dataset, or `None` if no matching Dataset
550 was found.
552 Raises
553 ------
554 LookupError
555 Raised if one or more data ID keys are missing.
556 KeyError
557 Raised if the dataset type does not exist.
558 MissingCollectionError
559 Raised if any of ``collections`` does not exist in the registry.
560 """
561 if isinstance(datasetType, DatasetType):
562 storage = self._datasets[datasetType.name]
563 else:
564 storage = self._datasets[datasetType]
565 dataId = DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions,
566 universe=self.dimensions, **kwargs)
567 collections = CollectionSearch.fromExpression(collections)
568 for collectionRecord in collections.iter(self._collections, datasetType=storage.datasetType):
569 result = storage.find(collectionRecord, dataId)
570 if result is not None:
571 return result
573 return None
575 @transactional
576 def insertDatasets(self, datasetType: Union[DatasetType, str], dataIds: Iterable[DataId],
577 run: str) -> List[DatasetRef]:
578 """Insert one or more datasets into the `Registry`
580 This always adds new datasets; to associate existing datasets with
581 a new collection, use ``associate``.
583 Parameters
584 ----------
585 datasetType : `DatasetType` or `str`
586 A `DatasetType` or the name of one.
587 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate`
588 Dimension-based identifiers for the new datasets.
589 run : `str`
590 The name of the run that produced the datasets.
592 Returns
593 -------
594 refs : `list` of `DatasetRef`
595 Resolved `DatasetRef` instances for all given data IDs (in the same
596 order).
598 Raises
599 ------
600 ConflictingDefinitionError
601 If a dataset with the same dataset type and data ID as one of those
602 given already exists in ``run``.
603 MissingCollectionError
604 Raised if ``run`` does not exist in the registry.
605 """
606 if isinstance(datasetType, DatasetType):
607 storage = self._datasets.find(datasetType.name)
608 if storage is None:
609 raise LookupError(f"DatasetType '{datasetType}' has not been registered.")
610 else:
611 storage = self._datasets.find(datasetType)
612 if storage is None:
613 raise LookupError(f"DatasetType with name '{datasetType}' has not been registered.")
614 runRecord = self._collections.find(run)
615 if runRecord.type is not CollectionType.RUN:
616 raise TypeError("Given collection is of type {runRecord.type.name}; RUN collection required.")
617 assert isinstance(runRecord, RunRecord)
618 expandedDataIds = [self.expandDataId(dataId, graph=storage.datasetType.dimensions)
619 for dataId in dataIds]
620 try:
621 refs = list(storage.insert(runRecord, expandedDataIds))
622 except sqlalchemy.exc.IntegrityError as err:
623 raise ConflictingDefinitionError(f"A database constraint failure was triggered by inserting "
624 f"one or more datasets of type {storage.datasetType} into "
625 f"collection '{run}'. "
626 f"This probably means a dataset with the same data ID "
627 f"and dataset type already exists, but it may also mean a "
628 f"dimension row is missing.") from err
629 return refs
631 def getDataset(self, id: int) -> Optional[DatasetRef]:
632 """Retrieve a Dataset entry.
634 Parameters
635 ----------
636 id : `int`
637 The unique identifier for the dataset.
639 Returns
640 -------
641 ref : `DatasetRef` or `None`
642 A ref to the Dataset, or `None` if no matching Dataset
643 was found.
644 """
645 ref = self._datasets.getDatasetRef(id, universe=self.dimensions)
646 if ref is None:
647 return None
648 return ref
650 @transactional
651 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
652 """Remove datasets from the Registry.
654 The datasets will be removed unconditionally from all collections, and
655 any `Quantum` that consumed this dataset will instead be marked with
656 having a NULL input. `Datastore` records will *not* be deleted; the
657 caller is responsible for ensuring that the dataset has already been
658 removed from all Datastores.
660 Parameters
661 ----------
662 refs : `Iterable` of `DatasetRef`
663 References to the datasets to be removed. Must include a valid
664 ``id`` attribute, and should be considered invalidated upon return.
666 Raises
667 ------
668 AmbiguousDatasetError
669 Raised if any ``ref.id`` is `None`.
670 OrphanedRecordError
671 Raised if any dataset is still present in any `Datastore`.
672 """
673 for datasetType, refsForType in DatasetRef.groupByType(refs).items():
674 storage = self._datasets.find(datasetType.name)
675 assert storage is not None
676 try:
677 storage.delete(refsForType)
678 except sqlalchemy.exc.IntegrityError as err:
679 raise OrphanedRecordError("One or more datasets is still "
680 "present in one or more Datastores.") from err
682 @transactional
683 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
684 """Add existing datasets to a `~CollectionType.TAGGED` collection.
686 If a DatasetRef with the same exact integer ID is already in a
687 collection nothing is changed. If a `DatasetRef` with the same
688 `DatasetType` and data ID but with different integer ID
689 exists in the collection, `ConflictingDefinitionError` is raised.
691 Parameters
692 ----------
693 collection : `str`
694 Indicates the collection the datasets should be associated with.
695 refs : `Iterable` [ `DatasetRef` ]
696 An iterable of resolved `DatasetRef` instances that already exist
697 in this `Registry`.
699 Raises
700 ------
701 ConflictingDefinitionError
702 If a Dataset with the given `DatasetRef` already exists in the
703 given collection.
704 AmbiguousDatasetError
705 Raised if ``any(ref.id is None for ref in refs)``.
706 MissingCollectionError
707 Raised if ``collection`` does not exist in the registry.
708 TypeError
709 Raise adding new datasets to the given ``collection`` is not
710 allowed.
711 """
712 collectionRecord = self._collections.find(collection)
713 if collectionRecord.type is not CollectionType.TAGGED:
714 raise TypeError(f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED.")
715 for datasetType, refsForType in DatasetRef.groupByType(refs).items():
716 storage = self._datasets.find(datasetType.name)
717 assert storage is not None
718 try:
719 storage.associate(collectionRecord, refsForType)
720 except sqlalchemy.exc.IntegrityError as err:
721 raise ConflictingDefinitionError(
722 f"Constraint violation while associating dataset of type {datasetType.name} with "
723 f"collection {collection}. This probably means that one or more datasets with the same "
724 f"dataset type and data ID already exist in the collection, but it may also indicate "
725 f"that the datasets do not exist."
726 ) from err
728 @transactional
729 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
730 """Remove existing datasets from a `~CollectionType.TAGGED` collection.
732 ``collection`` and ``ref`` combinations that are not currently
733 associated are silently ignored.
735 Parameters
736 ----------
737 collection : `str`
738 The collection the datasets should no longer be associated with.
739 refs : `Iterable` [ `DatasetRef` ]
740 An iterable of resolved `DatasetRef` instances that already exist
741 in this `Registry`.
743 Raises
744 ------
745 AmbiguousDatasetError
746 Raised if any of the given dataset references is unresolved.
747 MissingCollectionError
748 Raised if ``collection`` does not exist in the registry.
749 TypeError
750 Raise adding new datasets to the given ``collection`` is not
751 allowed.
752 """
753 collectionRecord = self._collections.find(collection)
754 if collectionRecord.type is not CollectionType.TAGGED:
755 raise TypeError(f"Collection '{collection}' has type {collectionRecord.type.name}; "
756 "expected TAGGED.")
757 for datasetType, refsForType in DatasetRef.groupByType(refs).items():
758 storage = self._datasets.find(datasetType.name)
759 assert storage is not None
760 storage.disassociate(collectionRecord, refsForType)
762 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
763 """Return an object that allows a new `Datastore` instance to
764 communicate with this `Registry`.
766 Returns
767 -------
768 manager : `DatastoreRegistryBridgeManager`
769 Object that mediates communication between this `Registry` and its
770 associated datastores.
771 """
772 return self._datastoreBridges
774 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
775 """Retrieve datastore locations for a given dataset.
777 Parameters
778 ----------
779 ref : `DatasetRef`
780 A reference to the dataset for which to retrieve storage
781 information.
783 Returns
784 -------
785 datastores : `Iterable` [ `str` ]
786 All the matching datastores holding this dataset.
788 Raises
789 ------
790 AmbiguousDatasetError
791 Raised if ``ref.id`` is `None`.
792 """
793 return self._datastoreBridges.findDatastores(ref)
795 def expandDataId(self, dataId: Optional[DataId] = None, *, graph: Optional[DimensionGraph] = None,
796 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None,
797 **kwargs: Any) -> DataCoordinate:
798 """Expand a dimension-based data ID to include additional information.
800 Parameters
801 ----------
802 dataId : `DataCoordinate` or `dict`, optional
803 Data ID to be expanded; augmented and overridden by ``kwds``.
804 graph : `DimensionGraph`, optional
805 Set of dimensions for the expanded ID. If `None`, the dimensions
806 will be inferred from the keys of ``dataId`` and ``kwds``.
807 Dimensions that are in ``dataId`` or ``kwds`` but not in ``graph``
808 are silently ignored, providing a way to extract and expand a
809 subset of a data ID.
810 records : `Mapping` [`str`, `DimensionRecord`], optional
811 Dimension record data to use before querying the database for that
812 data, keyed by element name.
813 **kwargs
814 Additional keywords are treated like additional key-value pairs for
815 ``dataId``, extending and overriding
817 Returns
818 -------
819 expanded : `DataCoordinate`
820 A data ID that includes full metadata for all of the dimensions it
821 identifieds, i.e. guarantees that ``expanded.hasRecords()`` and
822 ``expanded.hasFull()`` both return `True`.
823 """
824 standardized = DataCoordinate.standardize(dataId, graph=graph, universe=self.dimensions, **kwargs)
825 if standardized.hasRecords():
826 return standardized
827 if records is None:
828 records = {}
829 elif isinstance(records, NamedKeyMapping):
830 records = records.byName()
831 else:
832 records = dict(records)
833 if isinstance(dataId, DataCoordinate) and dataId.hasRecords():
834 records.update(dataId.records.byName())
835 keys = standardized.byName()
836 for element in standardized.graph.primaryKeyTraversalOrder:
837 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL
838 if record is ...:
839 if isinstance(element, Dimension) and keys.get(element.name) is None:
840 if element in standardized.graph.required:
841 raise LookupError(
842 f"No value or null value for required dimension {element.name}."
843 )
844 keys[element.name] = None
845 record = None
846 else:
847 storage = self._dimensions[element]
848 dataIdSet = DataCoordinateIterable.fromScalar(
849 DataCoordinate.standardize(keys, graph=element.graph)
850 )
851 fetched = tuple(storage.fetch(dataIdSet))
852 try:
853 (record,) = fetched
854 except ValueError:
855 record = None
856 records[element.name] = record
857 if record is not None:
858 for d in element.implied:
859 value = getattr(record, d.name)
860 if keys.setdefault(d.name, value) != value:
861 raise InconsistentDataIdError(
862 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, "
863 f"but {element.name} implies {d.name}={value!r}."
864 )
865 else:
866 if element in standardized.graph.required:
867 raise LookupError(
868 f"Could not fetch record for required dimension {element.name} via keys {keys}."
869 )
870 if element.alwaysJoin:
871 raise InconsistentDataIdError(
872 f"Could not fetch record for element {element.name} via keys {keys}, ",
873 "but it is marked alwaysJoin=True; this means one or more dimensions are not "
874 "related."
875 )
876 for d in element.implied:
877 keys.setdefault(d.name, None)
878 records.setdefault(d.name, None)
879 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records)
881 def insertDimensionData(self, element: Union[DimensionElement, str],
882 *data: Union[Mapping[str, Any], DimensionRecord],
883 conform: bool = True) -> None:
884 """Insert one or more dimension records into the database.
886 Parameters
887 ----------
888 element : `DimensionElement` or `str`
889 The `DimensionElement` or name thereof that identifies the table
890 records will be inserted into.
891 data : `dict` or `DimensionRecord` (variadic)
892 One or more records to insert.
893 conform : `bool`, optional
894 If `False` (`True` is default) perform no checking or conversions,
895 and assume that ``element`` is a `DimensionElement` instance and
896 ``data`` is a one or more `DimensionRecord` instances of the
897 appropriate subclass.
898 """
899 if conform:
900 if isinstance(element, str):
901 element = self.dimensions[element]
902 records = [row if isinstance(row, DimensionRecord) else element.RecordClass.fromDict(row)
903 for row in data]
904 else:
905 # Ignore typing since caller said to trust them with conform=False.
906 records = data # type: ignore
907 storage = self._dimensions[element] # type: ignore
908 storage.insert(*records)
910 def syncDimensionData(self, element: Union[DimensionElement, str],
911 row: Union[Mapping[str, Any], DimensionRecord],
912 conform: bool = True) -> bool:
913 """Synchronize the given dimension record with the database, inserting
914 if it does not already exist and comparing values if it does.
916 Parameters
917 ----------
918 element : `DimensionElement` or `str`
919 The `DimensionElement` or name thereof that identifies the table
920 records will be inserted into.
921 row : `dict` or `DimensionRecord`
922 The record to insert.
923 conform : `bool`, optional
924 If `False` (`True` is default) perform no checking or conversions,
925 and assume that ``element`` is a `DimensionElement` instance and
926 ``data`` is a one or more `DimensionRecord` instances of the
927 appropriate subclass.
929 Returns
930 -------
931 inserted : `bool`
932 `True` if a new row was inserted, `False` otherwise.
934 Raises
935 ------
936 ConflictingDefinitionError
937 Raised if the record exists in the database (according to primary
938 key lookup) but is inconsistent with the given one.
940 Notes
941 -----
942 This method cannot be called within transactions, as it needs to be
943 able to perform its own transaction to be concurrent.
944 """
945 if conform:
946 if isinstance(element, str):
947 element = self.dimensions[element]
948 record = row if isinstance(row, DimensionRecord) else element.RecordClass.fromDict(row)
949 else:
950 # Ignore typing since caller said to trust them with conform=False.
951 record = row # type: ignore
952 storage = self._dimensions[element] # type: ignore
953 return storage.sync(record)
955 def queryDatasetTypes(self, expression: Any = ..., *, components: Optional[bool] = None
956 ) -> Iterator[DatasetType]:
957 """Iterate over the dataset types whose names match an expression.
959 Parameters
960 ----------
961 expression : `Any`, optional
962 An expression that fully or partially identifies the dataset types
963 to return, such as a `str`, `re.Pattern`, or iterable thereof.
964 `...` can be used to return all dataset types, and is the default.
965 See :ref:`daf_butler_dataset_type_expressions` for more
966 information.
967 components : `bool`, optional
968 If `True`, apply all expression patterns to component dataset type
969 names as well. If `False`, never apply patterns to components.
970 If `None` (default), apply patterns to components only if their
971 parent datasets were not matched by the expression.
972 Fully-specified component datasets (`str` or `DatasetType`
973 instances) are always included.
975 Yields
976 ------
977 datasetType : `DatasetType`
978 A `DatasetType` instance whose name matches ``expression``.
979 """
980 wildcard = CategorizedWildcard.fromExpression(expression, coerceUnrecognized=lambda d: d.name)
981 if wildcard is Ellipsis:
982 for datasetType in self._datasets:
983 # The dataset type can no longer be a component
984 yield datasetType
985 if components and datasetType.isComposite():
986 # Automatically create the component dataset types
987 for component in datasetType.makeAllComponentDatasetTypes():
988 yield component
989 return
990 done: Set[str] = set()
991 for name in wildcard.strings:
992 storage = self._datasets.find(name)
993 if storage is not None:
994 done.add(storage.datasetType.name)
995 yield storage.datasetType
996 if wildcard.patterns:
997 # If components (the argument) is None, we'll save component
998 # dataset that we might want to match, but only if their parents
999 # didn't get included.
1000 componentsForLater = []
1001 for registeredDatasetType in self._datasets:
1002 # Components are not stored in registry so expand them here
1003 allDatasetTypes = [registeredDatasetType] \
1004 + registeredDatasetType.makeAllComponentDatasetTypes()
1005 for datasetType in allDatasetTypes:
1006 if datasetType.name in done:
1007 continue
1008 parentName, componentName = datasetType.nameAndComponent()
1009 if componentName is not None and not components:
1010 if components is None and parentName not in done:
1011 componentsForLater.append(datasetType)
1012 continue
1013 if any(p.fullmatch(datasetType.name) for p in wildcard.patterns):
1014 done.add(datasetType.name)
1015 yield datasetType
1016 # Go back and try to match saved components.
1017 for datasetType in componentsForLater:
1018 parentName, _ = datasetType.nameAndComponent()
1019 if parentName not in done and any(p.fullmatch(datasetType.name) for p in wildcard.patterns):
1020 yield datasetType
1022 def queryCollections(self, expression: Any = ...,
1023 datasetType: Optional[DatasetType] = None,
1024 collectionType: Optional[CollectionType] = None,
1025 flattenChains: bool = False,
1026 includeChains: Optional[bool] = None) -> Iterator[str]:
1027 """Iterate over the collections whose names match an expression.
1029 Parameters
1030 ----------
1031 expression : `Any`, optional
1032 An expression that fully or partially identifies the collections
1033 to return, such as a `str`, `re.Pattern`, or iterable thereof.
1034 `...` can be used to return all collections, and is the default.
1035 See :ref:`daf_butler_collection_expressions` for more
1036 information.
1037 datasetType : `DatasetType`, optional
1038 If provided, only yield collections that should be searched for
1039 this dataset type according to ``expression``. If this is
1040 not provided, any dataset type restrictions in ``expression`` are
1041 ignored.
1042 collectionType : `CollectionType`, optional
1043 If provided, only yield collections of this type.
1044 flattenChains : `bool`, optional
1045 If `True` (`False` is default), recursively yield the child
1046 collections of matching `~CollectionType.CHAINED` collections.
1047 includeChains : `bool`, optional
1048 If `True`, yield records for matching `~CollectionType.CHAINED`
1049 collections. Default is the opposite of ``flattenChains``: include
1050 either CHAINED collections or their children, but not both.
1052 Yields
1053 ------
1054 collection : `str`
1055 The name of a collection that matches ``expression``.
1056 """
1057 query = CollectionQuery.fromExpression(expression)
1058 for record in query.iter(self._collections, datasetType=datasetType, collectionType=collectionType,
1059 flattenChains=flattenChains, includeChains=includeChains):
1060 yield record.name
1062 def makeQueryBuilder(self, summary: queries.QuerySummary) -> queries.QueryBuilder:
1063 """Return a `QueryBuilder` instance capable of constructing and
1064 managing more complex queries than those obtainable via `Registry`
1065 interfaces.
1067 This is an advanced interface; downstream code should prefer
1068 `Registry.queryDataIds` and `Registry.queryDatasets` whenever those
1069 are sufficient.
1071 Parameters
1072 ----------
1073 summary : `queries.QuerySummary`
1074 Object describing and categorizing the full set of dimensions that
1075 will be included in the query.
1077 Returns
1078 -------
1079 builder : `queries.QueryBuilder`
1080 Object that can be used to construct and perform advanced queries.
1081 """
1082 return queries.QueryBuilder(
1083 summary,
1084 queries.RegistryManagers(
1085 collections=self._collections,
1086 dimensions=self._dimensions,
1087 datasets=self._datasets
1088 )
1089 )
1091 def queryDatasets(self, datasetType: Any, *,
1092 collections: Any,
1093 dimensions: Optional[Iterable[Union[Dimension, str]]] = None,
1094 dataId: Optional[DataId] = None,
1095 where: Optional[str] = None,
1096 deduplicate: bool = False,
1097 components: Optional[bool] = None,
1098 **kwargs: Any) -> queries.DatasetQueryResults:
1099 """Query for and iterate over dataset references matching user-provided
1100 criteria.
1102 Parameters
1103 ----------
1104 datasetType
1105 An expression that fully or partially identifies the dataset types
1106 to be queried. Allowed types include `DatasetType`, `str`,
1107 `re.Pattern`, and iterables thereof. The special value `...` can
1108 be used to query all dataset types. See
1109 :ref:`daf_butler_dataset_type_expressions` for more information.
1110 collections
1111 An expression that fully or partially identifies the collections
1112 to search for datasets, such as a `str`, `re.Pattern`, or iterable
1113 thereof. `...` can be used to return all collections. See
1114 :ref:`daf_butler_collection_expressions` for more information.
1115 dimensions : `~collections.abc.Iterable` of `Dimension` or `str`
1116 Dimensions to include in the query (in addition to those used
1117 to identify the queried dataset type(s)), either to constrain
1118 the resulting datasets to those for which a matching dimension
1119 exists, or to relate the dataset type's dimensions to dimensions
1120 referenced by the ``dataId`` or ``where`` arguments.
1121 dataId : `dict` or `DataCoordinate`, optional
1122 A data ID whose key-value pairs are used as equality constraints
1123 in the query.
1124 where : `str`, optional
1125 A string expression similar to a SQL WHERE clause. May involve
1126 any column of a dimension table or (as a shortcut for the primary
1127 key column of a dimension table) dimension name. See
1128 :ref:`daf_butler_dimension_expressions` for more information.
1129 deduplicate : `bool`, optional
1130 If `True` (`False` is default), for each result data ID, only
1131 yield one `DatasetRef` of each `DatasetType`, from the first
1132 collection in which a dataset of that dataset type appears
1133 (according to the order of ``collections`` passed in). If `True`,
1134 ``collections`` must not contain regular expressions and may not
1135 be `...`.
1136 components : `bool`, optional
1137 If `True`, apply all dataset expression patterns to component
1138 dataset type names as well. If `False`, never apply patterns to
1139 components. If `None` (default), apply patterns to components only
1140 if their parent datasets were not matched by the expression.
1141 Fully-specified component datasets (`str` or `DatasetType`
1142 instances) are always included.
1143 **kwargs
1144 Additional keyword arguments are forwarded to
1145 `DataCoordinate.standardize` when processing the ``dataId``
1146 argument (and may be used to provide a constraining data ID even
1147 when the ``dataId`` argument is `None`).
1149 Returns
1150 -------
1151 refs : `queries.DatasetQueryResults`
1152 Dataset references matching the given query criteria.
1154 Raises
1155 ------
1156 TypeError
1157 Raised when the arguments are incompatible, such as when a
1158 collection wildcard is passed when ``deduplicate`` is `True`.
1160 Notes
1161 -----
1162 When multiple dataset types are queried in a single call, the
1163 results of this operation are equivalent to querying for each dataset
1164 type separately in turn, and no information about the relationships
1165 between datasets of different types is included. In contexts where
1166 that kind of information is important, the recommended pattern is to
1167 use `queryDataIds` to first obtain data IDs (possibly with the
1168 desired dataset types and collections passed as constraints to the
1169 query), and then use multiple (generally much simpler) calls to
1170 `queryDatasets` with the returned data IDs passed as constraints.
1171 """
1172 # Standardize the collections expression.
1173 if deduplicate:
1174 collections = CollectionSearch.fromExpression(collections)
1175 else:
1176 collections = CollectionQuery.fromExpression(collections)
1177 # Standardize and expand the data ID provided as a constraint.
1178 standardizedDataId = self.expandDataId(dataId, **kwargs)
1180 # We can only query directly if given a non-component DatasetType
1181 # instance. If we were given an expression or str or a component
1182 # DatasetType instance, we'll populate this dict, recurse, and return.
1183 # If we already have a non-component DatasetType, it will remain None
1184 # and we'll run the query directly.
1185 composition: Optional[
1186 Dict[
1187 DatasetType, # parent dataset type
1188 List[Optional[str]] # component name, or None for parent
1189 ]
1190 ] = None
1191 if not isinstance(datasetType, DatasetType):
1192 # We were given a dataset type expression (which may be as simple
1193 # as a str). Loop over all matching datasets, delegating handling
1194 # of the `components` argument to queryDatasetTypes, as we populate
1195 # the composition dict.
1196 composition = defaultdict(list)
1197 for trueDatasetType in self.queryDatasetTypes(datasetType, components=components):
1198 parentName, componentName = trueDatasetType.nameAndComponent()
1199 if componentName is not None:
1200 parentDatasetType = self.getDatasetType(parentName)
1201 composition.setdefault(parentDatasetType, []).append(componentName)
1202 else:
1203 composition.setdefault(trueDatasetType, []).append(None)
1204 elif datasetType.isComponent():
1205 # We were given a true DatasetType instance, but it's a component.
1206 # the composition dict will have exactly one item.
1207 parentName, componentName = datasetType.nameAndComponent()
1208 parentDatasetType = self.getDatasetType(parentName)
1209 composition = {parentDatasetType: [componentName]}
1210 if composition is not None:
1211 # We need to recurse. Do that once for each parent dataset type.
1212 chain = []
1213 for parentDatasetType, componentNames in composition.items():
1214 parentResults = self.queryDatasets(
1215 parentDatasetType,
1216 collections=collections,
1217 dimensions=dimensions,
1218 dataId=standardizedDataId,
1219 where=where,
1220 deduplicate=deduplicate
1221 )
1222 if isinstance(parentResults, queries.ParentDatasetQueryResults):
1223 chain.append(
1224 parentResults.withComponents(componentNames)
1225 )
1226 else:
1227 # Should only happen if we know there would be no results.
1228 assert isinstance(parentResults, queries.ChainedDatasetQueryResults) \
1229 and not parentResults._chain
1230 return queries.ChainedDatasetQueryResults(chain)
1231 # If we get here, there's no need to recurse (or we are already
1232 # recursing; there can only ever be one level of recursion).
1234 # The full set of dimensions in the query is the combination of those
1235 # needed for the DatasetType and those explicitly requested, if any.
1236 requestedDimensionNames = set(datasetType.dimensions.names)
1237 if dimensions is not None:
1238 requestedDimensionNames.update(self.dimensions.extract(dimensions).names)
1239 # Construct the summary structure needed to construct a QueryBuilder.
1240 summary = queries.QuerySummary(
1241 requested=DimensionGraph(self.dimensions, names=requestedDimensionNames),
1242 dataId=standardizedDataId,
1243 expression=where,
1244 )
1245 builder = self.makeQueryBuilder(summary)
1246 # Add the dataset subquery to the query, telling the QueryBuilder to
1247 # include the rank of the selected collection in the results only if we
1248 # need to deduplicate. Note that if any of the collections are
1249 # actually wildcard expressions, and we've asked for deduplication,
1250 # this will raise TypeError for us.
1251 if not builder.joinDataset(datasetType, collections, isResult=True, deduplicate=deduplicate):
1252 return queries.ChainedDatasetQueryResults(())
1253 query = builder.finish()
1254 return queries.ParentDatasetQueryResults(self._db, query, components=[None])
1256 def queryDataIds(self, dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], *,
1257 dataId: Optional[DataId] = None,
1258 datasets: Any = None,
1259 collections: Any = None,
1260 where: Optional[str] = None,
1261 components: Optional[bool] = None,
1262 **kwargs: Any) -> queries.DataCoordinateQueryResults:
1263 """Query for data IDs matching user-provided criteria.
1265 Parameters
1266 ----------
1267 dimensions : `Dimension` or `str`, or iterable thereof
1268 The dimensions of the data IDs to yield, as either `Dimension`
1269 instances or `str`. Will be automatically expanded to a complete
1270 `DimensionGraph`.
1271 dataId : `dict` or `DataCoordinate`, optional
1272 A data ID whose key-value pairs are used as equality constraints
1273 in the query.
1274 datasets : `Any`, optional
1275 An expression that fully or partially identifies dataset types
1276 that should constrain the yielded data IDs. For example, including
1277 "raw" here would constrain the yielded ``instrument``,
1278 ``exposure``, ``detector``, and ``physical_filter`` values to only
1279 those for which at least one "raw" dataset exists in
1280 ``collections``. Allowed types include `DatasetType`, `str`,
1281 `re.Pattern`, and iterables thereof. Unlike other dataset type
1282 expressions, ``...`` is not permitted - it doesn't make sense to
1283 constrain data IDs on the existence of *all* datasets.
1284 See :ref:`daf_butler_dataset_type_expressions` for more
1285 information.
1286 collections: `Any`, optional
1287 An expression that fully or partially identifies the collections
1288 to search for datasets, such as a `str`, `re.Pattern`, or iterable
1289 thereof. `...` can be used to return all collections. Must be
1290 provided if ``datasets`` is, and is ignored if it is not. See
1291 :ref:`daf_butler_collection_expressions` for more information.
1292 where : `str`, optional
1293 A string expression similar to a SQL WHERE clause. May involve
1294 any column of a dimension table or (as a shortcut for the primary
1295 key column of a dimension table) dimension name. See
1296 :ref:`daf_butler_dimension_expressions` for more information.
1297 components : `bool`, optional
1298 If `True`, apply all dataset expression patterns to component
1299 dataset type names as well. If `False`, never apply patterns to
1300 components. If `None` (default), apply patterns to components only
1301 if their parent datasets were not matched by the expression.
1302 Fully-specified component datasets (`str` or `DatasetType`
1303 instances) are always included.
1304 **kwargs
1305 Additional keyword arguments are forwarded to
1306 `DataCoordinate.standardize` when processing the ``dataId``
1307 argument (and may be used to provide a constraining data ID even
1308 when the ``dataId`` argument is `None`).
1310 Returns
1311 -------
1312 dataIds : `DataCoordinateQueryResults`
1313 Data IDs matching the given query parameters. These are guaranteed
1314 to identify all dimensions (`DataCoordinate.hasFull` returns
1315 `True`), but will not contain `DimensionRecord` objects
1316 (`DataCoordinate.hasRecords` returns `False`). Call
1317 `DataCoordinateQueryResults.expanded` on the returned object to
1318 fetch those (and consider using
1319 `DataCoordinateQueryResults.materialize` on the returned object
1320 first if the expected number of rows is very large). See
1321 documentation for those methods for additional information.
1322 """
1323 dimensions = iterable(dimensions)
1324 standardizedDataId = self.expandDataId(dataId, **kwargs)
1325 standardizedDatasetTypes = set()
1326 requestedDimensions = self.dimensions.extract(dimensions)
1327 queryDimensionNames = set(requestedDimensions.names)
1328 if datasets is not None:
1329 if collections is None:
1330 raise TypeError("Cannot pass 'datasets' without 'collections'.")
1331 for datasetType in self.queryDatasetTypes(datasets, components=components):
1332 queryDimensionNames.update(datasetType.dimensions.names)
1333 # If any matched dataset type is a component, just operate on
1334 # its parent instead, because Registry doesn't know anything
1335 # about what components exist, and here (unlike queryDatasets)
1336 # we don't care about returning them.
1337 parentDatasetTypeName, componentName = datasetType.nameAndComponent()
1338 if componentName is not None:
1339 datasetType = self.getDatasetType(parentDatasetTypeName)
1340 standardizedDatasetTypes.add(datasetType)
1341 # Preprocess collections expression in case the original included
1342 # single-pass iterators (we'll want to use it multiple times
1343 # below).
1344 collections = CollectionQuery.fromExpression(collections)
1346 summary = queries.QuerySummary(
1347 requested=DimensionGraph(self.dimensions, names=queryDimensionNames),
1348 dataId=standardizedDataId,
1349 expression=where,
1350 )
1351 builder = self.makeQueryBuilder(summary)
1352 for datasetType in standardizedDatasetTypes:
1353 builder.joinDataset(datasetType, collections, isResult=False)
1354 query = builder.finish()
1355 return queries.DataCoordinateQueryResults(self._db, query)
1357 def queryDimensionRecords(self, element: Union[DimensionElement, str], *,
1358 dataId: Optional[DataId] = None,
1359 datasets: Any = None,
1360 collections: Any = None,
1361 where: Optional[str] = None,
1362 components: Optional[bool] = None,
1363 **kwargs: Any) -> Iterator[DimensionRecord]:
1364 """Query for dimension information matching user-provided criteria.
1366 Parameters
1367 ----------
1368 element : `DimensionElement` or `str`
1369 The dimension element to obtain r
1370 dataId : `dict` or `DataCoordinate`, optional
1371 A data ID whose key-value pairs are used as equality constraints
1372 in the query.
1373 datasets : `Any`, optional
1374 An expression that fully or partially identifies dataset types
1375 that should constrain the yielded records. See `queryDataIds` and
1376 :ref:`daf_butler_dataset_type_expressions` for more information.
1377 collections: `Any`, optional
1378 An expression that fully or partially identifies the collections
1379 to search for datasets. See `queryDataIds` and
1380 :ref:`daf_butler_collection_expressions` for more information.
1381 where : `str`, optional
1382 A string expression similar to a SQL WHERE clause. See
1383 `queryDataIds` and :ref:`daf_butler_dimension_expressions` for more
1384 information.
1385 components : `bool`, optional
1386 Whether to apply dataset expressions to components as well.
1387 See `queryDataIds` for more information.
1388 **kwargs
1389 Additional keyword arguments are forwarded to
1390 `DataCoordinate.standardize` when processing the ``dataId``
1391 argument (and may be used to provide a constraining data ID even
1392 when the ``dataId`` argument is `None`).
1394 Returns
1395 -------
1396 dataIds : `DataCoordinateQueryResults`
1397 Data IDs matching the given query parameters.
1398 """
1399 if not isinstance(element, DimensionElement):
1400 element = self.dimensions[element]
1401 dataIds = self.queryDataIds(element.graph, dataId=dataId, datasets=datasets, collections=collections,
1402 where=where, components=components, **kwargs)
1403 return iter(self._dimensions[element].fetch(dataIds))
1405 storageClasses: StorageClassFactory
1406 """All storage classes known to the registry (`StorageClassFactory`).
1407 """