Coverage for python/lsst/daf/butler/registry/_registry.py : 11%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24__all__ = (
25 "Registry",
26)
28from collections import defaultdict
29import contextlib
30import sys
31from typing import (
32 Any,
33 Dict,
34 Iterable,
35 Iterator,
36 List,
37 Mapping,
38 Optional,
39 Set,
40 Type,
41 TYPE_CHECKING,
42 Union,
43)
45import sqlalchemy
47from ..core import (
48 Config,
49 DataCoordinate,
50 DataCoordinateIterable,
51 DataId,
52 DatasetRef,
53 DatasetType,
54 ddl,
55 Dimension,
56 DimensionElement,
57 DimensionGraph,
58 DimensionRecord,
59 DimensionUniverse,
60 NamedKeyMapping,
61 NameLookupMapping,
62 StorageClassFactory,
63)
64from ..core.utils import doImport, iterable, transactional
65from ._config import RegistryConfig
66from .queries import (
67 QueryBuilder,
68 QuerySummary,
69)
70from ._collectionType import CollectionType
71from ._exceptions import ConflictingDefinitionError, InconsistentDataIdError, OrphanedRecordError
72from .wildcards import CategorizedWildcard, CollectionQuery, CollectionSearch, Ellipsis
73from .interfaces import ChainedCollectionRecord, RunRecord
74from .versions import ButlerVersionsManager
76if TYPE_CHECKING: 76 ↛ 77line 76 didn't jump to line 77, because the condition on line 76 was never true
77 from ..butlerConfig import ButlerConfig
78 from .interfaces import (
79 ButlerAttributeManager,
80 CollectionManager,
81 Database,
82 OpaqueTableStorageManager,
83 DimensionRecordStorageManager,
84 DatasetRecordStorageManager,
85 DatastoreRegistryBridgeManager,
86 )
89class Registry:
90 """Registry interface.
92 Parameters
93 ----------
94 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
95 Registry configuration
96 """
98 defaultConfigFile: Optional[str] = None
99 """Path to configuration defaults. Accessed within the ``config`` resource
100 or relative to a search path. Can be None if no defaults specified.
101 """
103 @classmethod
104 def fromConfig(cls, config: Union[ButlerConfig, RegistryConfig, Config, str], create: bool = False,
105 butlerRoot: Optional[str] = None, writeable: bool = True) -> Registry:
106 """Create `Registry` subclass instance from `config`.
108 Uses ``registry.cls`` from `config` to determine which subclass to
109 instantiate.
111 Parameters
112 ----------
113 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str`
114 Registry configuration
115 create : `bool`, optional
116 Assume empty Registry and create a new one.
117 butlerRoot : `str`, optional
118 Path to the repository root this `Registry` will manage.
119 writeable : `bool`, optional
120 If `True` (default) create a read-write connection to the database.
122 Returns
123 -------
124 registry : `Registry` (subclass)
125 A new `Registry` subclass instance.
126 """
127 if not isinstance(config, RegistryConfig):
128 if isinstance(config, str) or isinstance(config, Config):
129 config = RegistryConfig(config)
130 else:
131 raise ValueError("Incompatible Registry configuration: {}".format(config))
132 config.replaceRoot(butlerRoot)
133 DatabaseClass = config.getDatabaseClass()
134 database = DatabaseClass.fromUri(str(config.connectionString), origin=config.get("origin", 0),
135 namespace=config.get("namespace"), writeable=writeable)
136 universe = DimensionUniverse(config)
137 attributes = doImport(config["managers", "attributes"])
138 opaque = doImport(config["managers", "opaque"])
139 dimensions = doImport(config["managers", "dimensions"])
140 collections = doImport(config["managers", "collections"])
141 datasets = doImport(config["managers", "datasets"])
142 datastoreBridges = doImport(config["managers", "datastores"])
143 versions = ButlerVersionsManager.fromConfig(config.get("schema_versions"))
145 return cls(database, universe, dimensions=dimensions, attributes=attributes, opaque=opaque,
146 collections=collections, datasets=datasets, datastoreBridges=datastoreBridges,
147 versions=versions, writeable=writeable, create=create)
149 def __init__(self, database: Database, universe: DimensionUniverse, *,
150 attributes: Type[ButlerAttributeManager],
151 opaque: Type[OpaqueTableStorageManager],
152 dimensions: Type[DimensionRecordStorageManager],
153 collections: Type[CollectionManager],
154 datasets: Type[DatasetRecordStorageManager],
155 datastoreBridges: Type[DatastoreRegistryBridgeManager],
156 versions: ButlerVersionsManager,
157 writeable: bool = True,
158 create: bool = False):
159 self._db = database
160 self.storageClasses = StorageClassFactory()
161 with self._db.declareStaticTables(create=create) as context:
162 self._attributes = attributes.initialize(self._db, context)
163 self._dimensions = dimensions.initialize(self._db, context, universe=universe)
164 self._collections = collections.initialize(self._db, context)
165 self._datasets = datasets.initialize(self._db, context,
166 collections=self._collections,
167 universe=self.dimensions)
168 self._opaque = opaque.initialize(self._db, context)
169 self._datastoreBridges = datastoreBridges.initialize(self._db, context,
170 opaque=self._opaque,
171 datasets=datasets,
172 universe=self.dimensions)
173 context.addInitializer(lambda db: versions.storeVersions(self._attributes))
175 # This call does not do anything right now as we do not have a way to
176 # split tables between sub-schemas yet.
177 versions.checkVersionDigests()
178 if not create:
179 # verify that configured versions are compatible with schema
180 versions.checkStoredVersions(self._attributes, writeable)
182 self._collections.refresh()
183 self._datasets.refresh(universe=self._dimensions.universe)
185 def __str__(self) -> str:
186 return str(self._db)
188 def __repr__(self) -> str:
189 return f"Registry({self._db!r}, {self.dimensions!r})"
191 def isWriteable(self) -> bool:
192 """Return `True` if this registry allows write operations, and `False`
193 otherwise.
194 """
195 return self._db.isWriteable()
197 @property
198 def dimensions(self) -> DimensionUniverse:
199 """All dimensions recognized by this `Registry` (`DimensionUniverse`).
200 """
201 return self._dimensions.universe
203 @contextlib.contextmanager
204 def transaction(self) -> Iterator[None]:
205 """Return a context manager that represents a transaction.
206 """
207 # TODO make savepoint=False the default.
208 try:
209 with self._db.transaction():
210 yield
211 except BaseException:
212 # TODO: this clears the caches sometimes when we wouldn't actually
213 # need to. Can we avoid that?
214 self._dimensions.clearCaches()
215 raise
217 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None:
218 """Add an opaque (to the `Registry`) table for use by a `Datastore` or
219 other data repository client.
221 Opaque table records can be added via `insertOpaqueData`, retrieved via
222 `fetchOpaqueData`, and removed via `deleteOpaqueData`.
224 Parameters
225 ----------
226 tableName : `str`
227 Logical name of the opaque table. This may differ from the
228 actual name used in the database by a prefix and/or suffix.
229 spec : `ddl.TableSpec`
230 Specification for the table to be added.
231 """
232 self._opaque.register(tableName, spec)
234 @transactional
235 def insertOpaqueData(self, tableName: str, *data: dict) -> None:
236 """Insert records into an opaque table.
238 Parameters
239 ----------
240 tableName : `str`
241 Logical name of the opaque table. Must match the name used in a
242 previous call to `registerOpaqueTable`.
243 data
244 Each additional positional argument is a dictionary that represents
245 a single row to be added.
246 """
247 self._opaque[tableName].insert(*data)
249 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[dict]:
250 """Retrieve records from an opaque table.
252 Parameters
253 ----------
254 tableName : `str`
255 Logical name of the opaque table. Must match the name used in a
256 previous call to `registerOpaqueTable`.
257 where
258 Additional keyword arguments are interpreted as equality
259 constraints that restrict the returned rows (combined with AND);
260 keyword arguments are column names and values are the values they
261 must have.
263 Yields
264 ------
265 row : `dict`
266 A dictionary representing a single result row.
267 """
268 yield from self._opaque[tableName].fetch(**where)
270 @transactional
271 def deleteOpaqueData(self, tableName: str, **where: Any) -> None:
272 """Remove records from an opaque table.
274 Parameters
275 ----------
276 tableName : `str`
277 Logical name of the opaque table. Must match the name used in a
278 previous call to `registerOpaqueTable`.
279 where
280 Additional keyword arguments are interpreted as equality
281 constraints that restrict the deleted rows (combined with AND);
282 keyword arguments are column names and values are the values they
283 must have.
284 """
285 self._opaque[tableName].delete(**where)
287 def registerCollection(self, name: str, type: CollectionType = CollectionType.TAGGED) -> None:
288 """Add a new collection if one with the given name does not exist.
290 Parameters
291 ----------
292 name : `str`
293 The name of the collection to create.
294 type : `CollectionType`
295 Enum value indicating the type of collection to create.
297 Notes
298 -----
299 This method cannot be called within transactions, as it needs to be
300 able to perform its own transaction to be concurrent.
301 """
302 self._collections.register(name, type)
304 def getCollectionType(self, name: str) -> CollectionType:
305 """Return an enumeration value indicating the type of the given
306 collection.
308 Parameters
309 ----------
310 name : `str`
311 The name of the collection.
313 Returns
314 -------
315 type : `CollectionType`
316 Enum value indicating the type of this collection.
318 Raises
319 ------
320 MissingCollectionError
321 Raised if no collection with the given name exists.
322 """
323 return self._collections.find(name).type
325 def registerRun(self, name: str) -> None:
326 """Add a new run if one with the given name does not exist.
328 Parameters
329 ----------
330 name : `str`
331 The name of the run to create.
333 Notes
334 -----
335 This method cannot be called within transactions, as it needs to be
336 able to perform its own transaction to be concurrent.
337 """
338 self._collections.register(name, CollectionType.RUN)
340 @transactional
341 def removeCollection(self, name: str) -> None:
342 """Completely remove the given collection.
344 Parameters
345 ----------
346 name : `str`
347 The name of the collection to remove.
349 Raises
350 ------
351 MissingCollectionError
352 Raised if no collection with the given name exists.
354 Notes
355 -----
356 If this is a `~CollectionType.RUN` collection, all datasets and quanta
357 in it are also fully removed. This requires that those datasets be
358 removed (or at least trashed) from any datastores that hold them first.
360 A collection may not be deleted as long as it is referenced by a
361 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must
362 be deleted or redefined first.
363 """
364 self._collections.remove(name)
366 def getCollectionChain(self, parent: str) -> CollectionSearch:
367 """Return the child collections in a `~CollectionType.CHAINED`
368 collection.
370 Parameters
371 ----------
372 parent : `str`
373 Name of the chained collection. Must have already been added via
374 a call to `Registry.registerCollection`.
376 Returns
377 -------
378 children : `CollectionSearch`
379 An object that defines the search path of the collection.
380 See :ref:`daf_butler_collection_expressions` for more information.
382 Raises
383 ------
384 MissingCollectionError
385 Raised if ``parent`` does not exist in the `Registry`.
386 TypeError
387 Raised if ``parent`` does not correspond to a
388 `~CollectionType.CHAINED` collection.
389 """
390 record = self._collections.find(parent)
391 if record.type is not CollectionType.CHAINED:
392 raise TypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
393 assert isinstance(record, ChainedCollectionRecord)
394 return record.children
396 @transactional
397 def setCollectionChain(self, parent: str, children: Any) -> None:
398 """Define or redefine a `~CollectionType.CHAINED` collection.
400 Parameters
401 ----------
402 parent : `str`
403 Name of the chained collection. Must have already been added via
404 a call to `Registry.registerCollection`.
405 children : `Any`
406 An expression defining an ordered search of child collections,
407 generally an iterable of `str`. Restrictions on the dataset types
408 to be searched can also be included, by passing mapping or an
409 iterable containing tuples; see
410 :ref:`daf_butler_collection_expressions` for more information.
412 Raises
413 ------
414 MissingCollectionError
415 Raised when any of the given collections do not exist in the
416 `Registry`.
417 TypeError
418 Raised if ``parent`` does not correspond to a
419 `~CollectionType.CHAINED` collection.
420 ValueError
421 Raised if the given collections contains a cycle.
422 """
423 record = self._collections.find(parent)
424 if record.type is not CollectionType.CHAINED:
425 raise TypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.")
426 assert isinstance(record, ChainedCollectionRecord)
427 children = CollectionSearch.fromExpression(children)
428 if children != record.children:
429 record.update(self._collections, children)
431 def registerDatasetType(self, datasetType: DatasetType) -> bool:
432 """
433 Add a new `DatasetType` to the Registry.
435 It is not an error to register the same `DatasetType` twice.
437 Parameters
438 ----------
439 datasetType : `DatasetType`
440 The `DatasetType` to be added.
442 Returns
443 -------
444 inserted : `bool`
445 `True` if ``datasetType`` was inserted, `False` if an identical
446 existing `DatsetType` was found. Note that in either case the
447 DatasetType is guaranteed to be defined in the Registry
448 consistently with the given definition.
450 Raises
451 ------
452 ValueError
453 Raised if the dimensions or storage class are invalid.
454 ConflictingDefinitionError
455 Raised if this DatasetType is already registered with a different
456 definition.
458 Notes
459 -----
460 This method cannot be called within transactions, as it needs to be
461 able to perform its own transaction to be concurrent.
462 """
463 _, inserted = self._datasets.register(datasetType)
464 return inserted
466 def getDatasetType(self, name: str) -> DatasetType:
467 """Get the `DatasetType`.
469 Parameters
470 ----------
471 name : `str`
472 Name of the type.
474 Returns
475 -------
476 type : `DatasetType`
477 The `DatasetType` associated with the given name.
479 Raises
480 ------
481 KeyError
482 Requested named DatasetType could not be found in registry.
483 """
484 storage = self._datasets.find(name)
485 if storage is None:
486 raise KeyError(f"DatasetType '{name}' could not be found.")
487 return storage.datasetType
489 def findDataset(self, datasetType: Union[DatasetType, str], dataId: Optional[DataId] = None, *,
490 collections: Any, **kwargs: Any) -> Optional[DatasetRef]:
491 """Find a dataset given its `DatasetType` and data ID.
493 This can be used to obtain a `DatasetRef` that permits the dataset to
494 be read from a `Datastore`. If the dataset is a component and can not
495 be found using the provided dataset type, a dataset ref for the parent
496 will be returned instead but with the correct dataset type.
498 Parameters
499 ----------
500 datasetType : `DatasetType` or `str`
501 A `DatasetType` or the name of one.
502 dataId : `dict` or `DataCoordinate`, optional
503 A `dict`-like object containing the `Dimension` links that identify
504 the dataset within a collection.
505 collections
506 An expression that fully or partially identifies the collections
507 to search for the dataset, such as a `str`, `re.Pattern`, or
508 iterable thereof. `...` can be used to return all collections.
509 See :ref:`daf_butler_collection_expressions` for more information.
510 **kwargs
511 Additional keyword arguments passed to
512 `DataCoordinate.standardize` to convert ``dataId`` to a true
513 `DataCoordinate` or augment an existing one.
515 Returns
516 -------
517 ref : `DatasetRef`
518 A reference to the dataset, or `None` if no matching Dataset
519 was found.
521 Raises
522 ------
523 LookupError
524 Raised if one or more data ID keys are missing or the dataset type
525 does not exist.
526 MissingCollectionError
527 Raised if any of ``collections`` does not exist in the registry.
528 """
529 if isinstance(datasetType, DatasetType):
530 storage = self._datasets.find(datasetType.name)
531 if storage is None:
532 raise LookupError(f"DatasetType '{datasetType}' has not been registered.")
533 else:
534 storage = self._datasets.find(datasetType)
535 if storage is None:
536 raise LookupError(f"DatasetType with name '{datasetType}' has not been registered.")
537 dataId = DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions,
538 universe=self.dimensions, **kwargs)
539 collections = CollectionSearch.fromExpression(collections)
540 for collectionRecord in collections.iter(self._collections, datasetType=storage.datasetType):
541 result = storage.find(collectionRecord, dataId)
542 if result is not None:
543 return result
545 return None
547 @transactional
548 def insertDatasets(self, datasetType: Union[DatasetType, str], dataIds: Iterable[DataId],
549 run: str) -> List[DatasetRef]:
550 """Insert one or more datasets into the `Registry`
552 This always adds new datasets; to associate existing datasets with
553 a new collection, use ``associate``.
555 Parameters
556 ----------
557 datasetType : `DatasetType` or `str`
558 A `DatasetType` or the name of one.
559 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate`
560 Dimension-based identifiers for the new datasets.
561 run : `str`
562 The name of the run that produced the datasets.
564 Returns
565 -------
566 refs : `list` of `DatasetRef`
567 Resolved `DatasetRef` instances for all given data IDs (in the same
568 order).
570 Raises
571 ------
572 ConflictingDefinitionError
573 If a dataset with the same dataset type and data ID as one of those
574 given already exists in ``run``.
575 MissingCollectionError
576 Raised if ``run`` does not exist in the registry.
577 """
578 if isinstance(datasetType, DatasetType):
579 storage = self._datasets.find(datasetType.name)
580 if storage is None:
581 raise LookupError(f"DatasetType '{datasetType}' has not been registered.")
582 else:
583 storage = self._datasets.find(datasetType)
584 if storage is None:
585 raise LookupError(f"DatasetType with name '{datasetType}' has not been registered.")
586 runRecord = self._collections.find(run)
587 if runRecord.type is not CollectionType.RUN:
588 raise TypeError("Given collection is of type {runRecord.type.name}; RUN collection required.")
589 assert isinstance(runRecord, RunRecord)
590 expandedDataIds = [self.expandDataId(dataId, graph=storage.datasetType.dimensions)
591 for dataId in dataIds]
592 try:
593 refs = list(storage.insert(runRecord, expandedDataIds))
594 except sqlalchemy.exc.IntegrityError as err:
595 raise ConflictingDefinitionError(f"A database constraint failure was triggered by inserting "
596 f"one or more datasets of type {storage.datasetType} into "
597 f"collection '{run}'. "
598 f"This probably means a dataset with the same data ID "
599 f"and dataset type already exists, but it may also mean a "
600 f"dimension row is missing.") from err
601 return refs
603 def getDataset(self, id: int) -> Optional[DatasetRef]:
604 """Retrieve a Dataset entry.
606 Parameters
607 ----------
608 id : `int`
609 The unique identifier for the dataset.
611 Returns
612 -------
613 ref : `DatasetRef` or `None`
614 A ref to the Dataset, or `None` if no matching Dataset
615 was found.
616 """
617 ref = self._datasets.getDatasetRef(id, universe=self.dimensions)
618 if ref is None:
619 return None
620 return ref
622 @transactional
623 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None:
624 """Remove datasets from the Registry.
626 The datasets will be removed unconditionally from all collections, and
627 any `Quantum` that consumed this dataset will instead be marked with
628 having a NULL input. `Datastore` records will *not* be deleted; the
629 caller is responsible for ensuring that the dataset has already been
630 removed from all Datastores.
632 Parameters
633 ----------
634 refs : `Iterable` of `DatasetRef`
635 References to the datasets to be removed. Must include a valid
636 ``id`` attribute, and should be considered invalidated upon return.
638 Raises
639 ------
640 AmbiguousDatasetError
641 Raised if any ``ref.id`` is `None`.
642 OrphanedRecordError
643 Raised if any dataset is still present in any `Datastore`.
644 """
645 for datasetType, refsForType in DatasetRef.groupByType(refs).items():
646 storage = self._datasets.find(datasetType.name)
647 assert storage is not None
648 try:
649 storage.delete(refsForType)
650 except sqlalchemy.exc.IntegrityError as err:
651 raise OrphanedRecordError("One or more datasets is still "
652 "present in one or more Datastores.") from err
654 @transactional
655 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
656 """Add existing datasets to a `~CollectionType.TAGGED` collection.
658 If a DatasetRef with the same exact integer ID is already in a
659 collection nothing is changed. If a `DatasetRef` with the same
660 `DatasetType` and data ID but with different integer ID
661 exists in the collection, `ConflictingDefinitionError` is raised.
663 Parameters
664 ----------
665 collection : `str`
666 Indicates the collection the datasets should be associated with.
667 refs : `Iterable` [ `DatasetRef` ]
668 An iterable of resolved `DatasetRef` instances that already exist
669 in this `Registry`.
671 Raises
672 ------
673 ConflictingDefinitionError
674 If a Dataset with the given `DatasetRef` already exists in the
675 given collection.
676 AmbiguousDatasetError
677 Raised if ``any(ref.id is None for ref in refs)``.
678 MissingCollectionError
679 Raised if ``collection`` does not exist in the registry.
680 TypeError
681 Raise adding new datasets to the given ``collection`` is not
682 allowed.
683 """
684 collectionRecord = self._collections.find(collection)
685 if collectionRecord.type is not CollectionType.TAGGED:
686 raise TypeError(f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED.")
687 for datasetType, refsForType in DatasetRef.groupByType(refs).items():
688 storage = self._datasets.find(datasetType.name)
689 assert storage is not None
690 try:
691 storage.associate(collectionRecord, refsForType)
692 except sqlalchemy.exc.IntegrityError as err:
693 raise ConflictingDefinitionError(
694 f"Constraint violation while associating dataset of type {datasetType.name} with "
695 f"collection {collection}. This probably means that one or more datasets with the same "
696 f"dataset type and data ID already exist in the collection, but it may also indicate "
697 f"that the datasets do not exist."
698 ) from err
700 @transactional
701 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None:
702 """Remove existing datasets from a `~CollectionType.TAGGED` collection.
704 ``collection`` and ``ref`` combinations that are not currently
705 associated are silently ignored.
707 Parameters
708 ----------
709 collection : `str`
710 The collection the datasets should no longer be associated with.
711 refs : `Iterable` [ `DatasetRef` ]
712 An iterable of resolved `DatasetRef` instances that already exist
713 in this `Registry`.
715 Raises
716 ------
717 AmbiguousDatasetError
718 Raised if any of the given dataset references is unresolved.
719 MissingCollectionError
720 Raised if ``collection`` does not exist in the registry.
721 TypeError
722 Raise adding new datasets to the given ``collection`` is not
723 allowed.
724 """
725 collectionRecord = self._collections.find(collection)
726 if collectionRecord.type is not CollectionType.TAGGED:
727 raise TypeError(f"Collection '{collection}' has type {collectionRecord.type.name}; "
728 "expected TAGGED.")
729 for datasetType, refsForType in DatasetRef.groupByType(refs).items():
730 storage = self._datasets.find(datasetType.name)
731 assert storage is not None
732 storage.disassociate(collectionRecord, refsForType)
734 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager:
735 """Return an object that allows a new `Datastore` instance to
736 communicate with this `Registry`.
738 Returns
739 -------
740 manager : `DatastoreRegistryBridgeManager`
741 Object that mediates communication between this `Registry` and its
742 associated datastores.
743 """
744 return self._datastoreBridges
746 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]:
747 """Retrieve datastore locations for a given dataset.
749 Parameters
750 ----------
751 ref : `DatasetRef`
752 A reference to the dataset for which to retrieve storage
753 information.
755 Returns
756 -------
757 datastores : `Iterable` [ `str` ]
758 All the matching datastores holding this dataset.
760 Raises
761 ------
762 AmbiguousDatasetError
763 Raised if ``ref.id`` is `None`.
764 """
765 return self._datastoreBridges.findDatastores(ref)
767 def expandDataId(self, dataId: Optional[DataId] = None, *, graph: Optional[DimensionGraph] = None,
768 records: Optional[NameLookupMapping[DimensionElement, Optional[DimensionRecord]]] = None,
769 **kwargs: Any) -> DataCoordinate:
770 """Expand a dimension-based data ID to include additional information.
772 Parameters
773 ----------
774 dataId : `DataCoordinate` or `dict`, optional
775 Data ID to be expanded; augmented and overridden by ``kwds``.
776 graph : `DimensionGraph`, optional
777 Set of dimensions for the expanded ID. If `None`, the dimensions
778 will be inferred from the keys of ``dataId`` and ``kwds``.
779 Dimensions that are in ``dataId`` or ``kwds`` but not in ``graph``
780 are silently ignored, providing a way to extract and expand a
781 subset of a data ID.
782 records : `Mapping` [`str`, `DimensionRecord`], optional
783 Dimension record data to use before querying the database for that
784 data, keyed by element name.
785 **kwargs
786 Additional keywords are treated like additional key-value pairs for
787 ``dataId``, extending and overriding
789 Returns
790 -------
791 expanded : `DataCoordinate`
792 A data ID that includes full metadata for all of the dimensions it
793 identifieds, i.e. guarantees that ``expanded.hasRecords()`` and
794 ``expanded.hasFull()`` both return `True`.
795 """
796 standardized = DataCoordinate.standardize(dataId, graph=graph, universe=self.dimensions, **kwargs)
797 if standardized.hasRecords():
798 return standardized
799 if records is None:
800 records = {}
801 elif isinstance(records, NamedKeyMapping):
802 records = records.byName()
803 else:
804 records = dict(records)
805 if isinstance(dataId, DataCoordinate) and dataId.hasRecords():
806 records.update(dataId.records.byName())
807 keys = standardized.byName()
808 for element in standardized.graph.primaryKeyTraversalOrder:
809 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL
810 if record is ...:
811 if isinstance(element, Dimension) and keys.get(element.name) is None:
812 if element in standardized.graph.required:
813 raise LookupError(
814 f"No value or null value for required dimension {element.name}."
815 )
816 keys[element.name] = None
817 record = None
818 else:
819 storage = self._dimensions[element]
820 dataIdSet = DataCoordinateIterable.fromScalar(
821 DataCoordinate.standardize(keys, graph=element.graph)
822 )
823 fetched = tuple(storage.fetch(dataIdSet))
824 try:
825 (record,) = fetched
826 except ValueError:
827 record = None
828 records[element.name] = record
829 if record is not None:
830 for d in element.implied:
831 value = getattr(record, d.name)
832 if keys.setdefault(d.name, value) != value:
833 raise InconsistentDataIdError(
834 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, "
835 f"but {element.name} implies {d.name}={value!r}."
836 )
837 else:
838 if element in standardized.graph.required:
839 raise LookupError(
840 f"Could not fetch record for required dimension {element.name} via keys {keys}."
841 )
842 if element.alwaysJoin:
843 raise InconsistentDataIdError(
844 f"Could not fetch record for element {element.name} via keys {keys}, ",
845 "but it is marked alwaysJoin=True; this means one or more dimensions are not "
846 "related."
847 )
848 for d in element.implied:
849 keys.setdefault(d.name, None)
850 records.setdefault(d.name, None)
851 return DataCoordinate.standardize(keys, graph=standardized.graph).expanded(records=records)
853 def insertDimensionData(self, element: Union[DimensionElement, str],
854 *data: Union[Mapping[str, Any], DimensionRecord],
855 conform: bool = True) -> None:
856 """Insert one or more dimension records into the database.
858 Parameters
859 ----------
860 element : `DimensionElement` or `str`
861 The `DimensionElement` or name thereof that identifies the table
862 records will be inserted into.
863 data : `dict` or `DimensionRecord` (variadic)
864 One or more records to insert.
865 conform : `bool`, optional
866 If `False` (`True` is default) perform no checking or conversions,
867 and assume that ``element`` is a `DimensionElement` instance and
868 ``data`` is a one or more `DimensionRecord` instances of the
869 appropriate subclass.
870 """
871 if conform:
872 if isinstance(element, str):
873 element = self.dimensions[element]
874 records = [row if isinstance(row, DimensionRecord) else element.RecordClass.fromDict(row)
875 for row in data]
876 else:
877 # Ignore typing since caller said to trust them with conform=False.
878 records = data # type: ignore
879 storage = self._dimensions[element] # type: ignore
880 storage.insert(*records)
882 def syncDimensionData(self, element: Union[DimensionElement, str],
883 row: Union[Mapping[str, Any], DimensionRecord],
884 conform: bool = True) -> bool:
885 """Synchronize the given dimension record with the database, inserting
886 if it does not already exist and comparing values if it does.
888 Parameters
889 ----------
890 element : `DimensionElement` or `str`
891 The `DimensionElement` or name thereof that identifies the table
892 records will be inserted into.
893 row : `dict` or `DimensionRecord`
894 The record to insert.
895 conform : `bool`, optional
896 If `False` (`True` is default) perform no checking or conversions,
897 and assume that ``element`` is a `DimensionElement` instance and
898 ``data`` is a one or more `DimensionRecord` instances of the
899 appropriate subclass.
901 Returns
902 -------
903 inserted : `bool`
904 `True` if a new row was inserted, `False` otherwise.
906 Raises
907 ------
908 ConflictingDefinitionError
909 Raised if the record exists in the database (according to primary
910 key lookup) but is inconsistent with the given one.
912 Notes
913 -----
914 This method cannot be called within transactions, as it needs to be
915 able to perform its own transaction to be concurrent.
916 """
917 if conform:
918 if isinstance(element, str):
919 element = self.dimensions[element]
920 record = row if isinstance(row, DimensionRecord) else element.RecordClass.fromDict(row)
921 else:
922 # Ignore typing since caller said to trust them with conform=False.
923 record = row # type: ignore
924 storage = self._dimensions[element] # type: ignore
925 return storage.sync(record)
927 def queryDatasetTypes(self, expression: Any = ..., *, components: Optional[bool] = None
928 ) -> Iterator[DatasetType]:
929 """Iterate over the dataset types whose names match an expression.
931 Parameters
932 ----------
933 expression : `Any`, optional
934 An expression that fully or partially identifies the dataset types
935 to return, such as a `str`, `re.Pattern`, or iterable thereof.
936 `...` can be used to return all dataset types, and is the default.
937 See :ref:`daf_butler_dataset_type_expressions` for more
938 information.
939 components : `bool`, optional
940 If `True`, apply all expression patterns to component dataset type
941 names as well. If `False`, never apply patterns to components.
942 If `None` (default), apply patterns to components only if their
943 parent datasets were not matched by the expression.
944 Fully-specified component datasets (`str` or `DatasetType`
945 instances) are always included.
947 Yields
948 ------
949 datasetType : `DatasetType`
950 A `DatasetType` instance whose name matches ``expression``.
951 """
952 wildcard = CategorizedWildcard.fromExpression(expression, coerceUnrecognized=lambda d: d.name)
953 if wildcard is Ellipsis:
954 for datasetType in self._datasets:
955 # The dataset type can no longer be a component
956 yield datasetType
957 if components and datasetType.isComposite():
958 # Automatically create the component dataset types
959 for component in datasetType.makeAllComponentDatasetTypes():
960 yield component
961 return
962 done: Set[str] = set()
963 for name in wildcard.strings:
964 storage = self._datasets.find(name)
965 if storage is not None:
966 done.add(storage.datasetType.name)
967 yield storage.datasetType
968 if wildcard.patterns:
969 # If components (the argument) is None, we'll save component
970 # dataset that we might want to match, but only if their parents
971 # didn't get included.
972 componentsForLater = []
973 for registeredDatasetType in self._datasets:
974 # Components are not stored in registry so expand them here
975 allDatasetTypes = [registeredDatasetType] \
976 + registeredDatasetType.makeAllComponentDatasetTypes()
977 for datasetType in allDatasetTypes:
978 if datasetType.name in done:
979 continue
980 parentName, componentName = datasetType.nameAndComponent()
981 if componentName is not None and not components:
982 if components is None and parentName not in done:
983 componentsForLater.append(datasetType)
984 continue
985 if any(p.fullmatch(datasetType.name) for p in wildcard.patterns):
986 done.add(datasetType.name)
987 yield datasetType
988 # Go back and try to match saved components.
989 for datasetType in componentsForLater:
990 parentName, _ = datasetType.nameAndComponent()
991 if parentName not in done and any(p.fullmatch(datasetType.name) for p in wildcard.patterns):
992 yield datasetType
994 def queryCollections(self, expression: Any = ...,
995 datasetType: Optional[DatasetType] = None,
996 collectionType: Optional[CollectionType] = None,
997 flattenChains: bool = False,
998 includeChains: Optional[bool] = None) -> Iterator[str]:
999 """Iterate over the collections whose names match an expression.
1001 Parameters
1002 ----------
1003 expression : `Any`, optional
1004 An expression that fully or partially identifies the collections
1005 to return, such as a `str`, `re.Pattern`, or iterable thereof.
1006 `...` can be used to return all collections, and is the default.
1007 See :ref:`daf_butler_collection_expressions` for more
1008 information.
1009 datasetType : `DatasetType`, optional
1010 If provided, only yield collections that should be searched for
1011 this dataset type according to ``expression``. If this is
1012 not provided, any dataset type restrictions in ``expression`` are
1013 ignored.
1014 collectionType : `CollectionType`, optional
1015 If provided, only yield collections of this type.
1016 flattenChains : `bool`, optional
1017 If `True` (`False` is default), recursively yield the child
1018 collections of matching `~CollectionType.CHAINED` collections.
1019 includeChains : `bool`, optional
1020 If `True`, yield records for matching `~CollectionType.CHAINED`
1021 collections. Default is the opposite of ``flattenChains``: include
1022 either CHAINED collections or their children, but not both.
1024 Yields
1025 ------
1026 collection : `str`
1027 The name of a collection that matches ``expression``.
1028 """
1029 query = CollectionQuery.fromExpression(expression)
1030 for record in query.iter(self._collections, datasetType=datasetType, collectionType=collectionType,
1031 flattenChains=flattenChains, includeChains=includeChains):
1032 yield record.name
1034 def makeQueryBuilder(self, summary: QuerySummary) -> QueryBuilder:
1035 """Return a `QueryBuilder` instance capable of constructing and
1036 managing more complex queries than those obtainable via `Registry`
1037 interfaces.
1039 This is an advanced interface; downstream code should prefer
1040 `Registry.queryDimensions` and `Registry.queryDatasets` whenever those
1041 are sufficient.
1043 Parameters
1044 ----------
1045 summary : `QuerySummary`
1046 Object describing and categorizing the full set of dimensions that
1047 will be included in the query.
1049 Returns
1050 -------
1051 builder : `QueryBuilder`
1052 Object that can be used to construct and perform advanced queries.
1053 """
1054 return QueryBuilder(summary=summary,
1055 collections=self._collections,
1056 dimensions=self._dimensions,
1057 datasets=self._datasets)
1059 def queryDimensions(self, dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], *,
1060 dataId: Optional[DataId] = None,
1061 datasets: Any = None,
1062 collections: Any = None,
1063 where: Optional[str] = None,
1064 expand: bool = True,
1065 components: Optional[bool] = None,
1066 **kwargs: Any) -> Iterator[DataCoordinate]:
1067 """Query for and iterate over data IDs matching user-provided criteria.
1069 Parameters
1070 ----------
1071 dimensions : `Dimension` or `str`, or iterable thereof
1072 The dimensions of the data IDs to yield, as either `Dimension`
1073 instances or `str`. Will be automatically expanded to a complete
1074 `DimensionGraph`.
1075 dataId : `dict` or `DataCoordinate`, optional
1076 A data ID whose key-value pairs are used as equality constraints
1077 in the query.
1078 datasets : `Any`, optional
1079 An expression that fully or partially identifies dataset types
1080 that should constrain the yielded data IDs. For example, including
1081 "raw" here would constrain the yielded ``instrument``,
1082 ``exposure``, ``detector``, and ``physical_filter`` values to only
1083 those for which at least one "raw" dataset exists in
1084 ``collections``. Allowed types include `DatasetType`, `str`,
1085 `re.Pattern`, and iterables thereof. Unlike other dataset type
1086 expressions, `...` is not permitted - it doesn't make sense to
1087 constrain data IDs on the existence of *all* datasets.
1088 See :ref:`daf_butler_dataset_type_expressions` for more
1089 information.
1090 collections: `Any`, optional
1091 An expression that fully or partially identifies the collections
1092 to search for datasets, such as a `str`, `re.Pattern`, or iterable
1093 thereof. `...` can be used to return all collections. Must be
1094 provided if ``datasets`` is, and is ignored if it is not. See
1095 :ref:`daf_butler_collection_expressions` for more information.
1096 where : `str`, optional
1097 A string expression similar to a SQL WHERE clause. May involve
1098 any column of a dimension table or (as a shortcut for the primary
1099 key column of a dimension table) dimension name. See
1100 :ref:`daf_butler_dimension_expressions` for more information.
1101 expand : `bool`, optional
1102 If `True` (default) yield `DataCoordinate` instances for which
1103 `~DataCoordinate.hasRecords` is guaranteed to return `True`,
1104 performing extra database fetches as necessary.
1105 components : `bool`, optional
1106 If `True`, apply all dataset expression patterns to component
1107 dataset type names as well. If `False`, never apply patterns to
1108 components. If `None` (default), apply patterns to components only
1109 if their parent datasets were not matched by the expression.
1110 Fully-specified component datasets (`str` or `DatasetType`
1111 instances) are always included.
1112 **kwargs
1113 Additional keyword arguments are forwarded to
1114 `DataCoordinate.standardize` when processing the ``dataId``
1115 argument (and may be used to provide a constraining data ID even
1116 when the ``dataId`` argument is `None`).
1118 Yields
1119 ------
1120 dataId : `DataCoordinate`
1121 Data IDs matching the given query parameters. Order is
1122 unspecified.
1123 """
1124 dimensions = iterable(dimensions)
1125 standardizedDataId = self.expandDataId(dataId, **kwargs)
1126 standardizedDatasetTypes = set()
1127 requestedDimensionNames = set(self.dimensions.extract(dimensions).names)
1128 if datasets is not None:
1129 if collections is None:
1130 raise TypeError("Cannot pass 'datasets' without 'collections'.")
1131 for datasetType in self.queryDatasetTypes(datasets, components=components):
1132 requestedDimensionNames.update(datasetType.dimensions.names)
1133 # If any matched dataset type is a component, just operate on
1134 # its parent instead, because Registry doesn't know anything
1135 # about what components exist, and here (unlike queryDatasets)
1136 # we don't care about returning them.
1137 parentDatasetTypeName, componentName = datasetType.nameAndComponent()
1138 if componentName is not None:
1139 datasetType = self.getDatasetType(parentDatasetTypeName)
1140 standardizedDatasetTypes.add(datasetType)
1141 # Preprocess collections expression in case the original included
1142 # single-pass iterators (we'll want to use it multiple times
1143 # below).
1144 collections = CollectionQuery.fromExpression(collections)
1146 summary = QuerySummary(
1147 requested=DimensionGraph(self.dimensions, names=requestedDimensionNames),
1148 dataId=standardizedDataId,
1149 expression=where,
1150 )
1151 builder = self.makeQueryBuilder(summary)
1152 for datasetType in standardizedDatasetTypes:
1153 builder.joinDataset(datasetType, collections, isResult=False)
1154 query = builder.finish()
1155 predicate = query.predicate()
1156 for row in self._db.query(query.sql):
1157 if predicate(row):
1158 result = query.extractDataId(row)
1159 if expand:
1160 yield self.expandDataId(
1161 result,
1162 records=standardizedDataId.records,
1163 )
1164 else:
1165 yield result
1167 def queryDatasets(self, datasetType: Any, *,
1168 collections: Any,
1169 dimensions: Optional[Iterable[Union[Dimension, str]]] = None,
1170 dataId: Optional[DataId] = None,
1171 where: Optional[str] = None,
1172 deduplicate: bool = False,
1173 expand: bool = True,
1174 components: Optional[bool] = None,
1175 **kwargs: Any) -> Iterator[DatasetRef]:
1176 """Query for and iterate over dataset references matching user-provided
1177 criteria.
1179 Parameters
1180 ----------
1181 datasetType
1182 An expression that fully or partially identifies the dataset types
1183 to be queried. Allowed types include `DatasetType`, `str`,
1184 `re.Pattern`, and iterables thereof. The special value `...` can
1185 be used to query all dataset types. See
1186 :ref:`daf_butler_dataset_type_expressions` for more information.
1187 collections
1188 An expression that fully or partially identifies the collections
1189 to search for datasets, such as a `str`, `re.Pattern`, or iterable
1190 thereof. `...` can be used to return all collections. See
1191 :ref:`daf_butler_collection_expressions` for more information.
1192 dimensions : `~collections.abc.Iterable` of `Dimension` or `str`
1193 Dimensions to include in the query (in addition to those used
1194 to identify the queried dataset type(s)), either to constrain
1195 the resulting datasets to those for which a matching dimension
1196 exists, or to relate the dataset type's dimensions to dimensions
1197 referenced by the ``dataId`` or ``where`` arguments.
1198 dataId : `dict` or `DataCoordinate`, optional
1199 A data ID whose key-value pairs are used as equality constraints
1200 in the query.
1201 where : `str`, optional
1202 A string expression similar to a SQL WHERE clause. May involve
1203 any column of a dimension table or (as a shortcut for the primary
1204 key column of a dimension table) dimension name. See
1205 :ref:`daf_butler_dimension_expressions` for more information.
1206 deduplicate : `bool`, optional
1207 If `True` (`False` is default), for each result data ID, only
1208 yield one `DatasetRef` of each `DatasetType`, from the first
1209 collection in which a dataset of that dataset type appears
1210 (according to the order of ``collections`` passed in). If `True`,
1211 ``collections`` must not contain regular expressions and may not
1212 be `...`.
1213 expand : `bool`, optional
1214 If `True` (default) attach `DataCoordinate` instances for which
1215 `~DataCoordinate.hasRecords` is guaranteed to return `True`,
1216 performing extra database fetches as necessary.
1217 components : `bool`, optional
1218 If `True`, apply all dataset expression patterns to component
1219 dataset type names as well. If `False`, never apply patterns to
1220 components. If `None` (default), apply patterns to components only
1221 if their parent datasets were not matched by the expression.
1222 Fully-specified component datasets (`str` or `DatasetType`
1223 instances) are always included.
1224 **kwargs
1225 Additional keyword arguments are forwarded to
1226 `DataCoordinate.standardize` when processing the ``dataId``
1227 argument (and may be used to provide a constraining data ID even
1228 when the ``dataId`` argument is `None`).
1230 Yields
1231 ------
1232 ref : `DatasetRef`
1233 Dataset references matching the given query criteria. These
1234 are grouped by `DatasetType` if the query evaluates to multiple
1235 dataset types, but order is otherwise unspecified.
1237 Raises
1238 ------
1239 TypeError
1240 Raised when the arguments are incompatible, such as when a
1241 collection wildcard is passed when ``deduplicate`` is `True`.
1243 Notes
1244 -----
1245 When multiple dataset types are queried in a single call, the
1246 results of this operation are equivalent to querying for each dataset
1247 type separately in turn, and no information about the relationships
1248 between datasets of different types is included. In contexts where
1249 that kind of information is important, the recommended pattern is to
1250 use `queryDimensions` to first obtain data IDs (possibly with the
1251 desired dataset types and collections passed as constraints to the
1252 query), and then use multiple (generally much simpler) calls to
1253 `queryDatasets` with the returned data IDs passed as constraints.
1254 """
1255 # Standardize the collections expression.
1256 if deduplicate:
1257 collections = CollectionSearch.fromExpression(collections)
1258 else:
1259 collections = CollectionQuery.fromExpression(collections)
1260 # Standardize and expand the data ID provided as a constraint.
1261 standardizedDataId = self.expandDataId(dataId, **kwargs)
1263 # We can only query directly if given a non-component DatasetType
1264 # instance. If we were given an expression or str or a component
1265 # DatasetType instance, we'll populate this dict, recurse, and return.
1266 # If we already have a non-component DatasetType, it will remain None
1267 # and we'll run the query directly.
1268 composition: Optional[
1269 Dict[
1270 DatasetType, # parent dataset type
1271 List[Optional[str]] # component name, or None for parent
1272 ]
1273 ] = None
1274 if not isinstance(datasetType, DatasetType):
1275 # We were given a dataset type expression (which may be as simple
1276 # as a str). Loop over all matching datasets, delegating handling
1277 # of the `components` argument to queryDatasetTypes, as we populate
1278 # the composition dict.
1279 composition = defaultdict(list)
1280 for trueDatasetType in self.queryDatasetTypes(datasetType, components=components):
1281 parentName, componentName = trueDatasetType.nameAndComponent()
1282 if componentName is not None:
1283 parentDatasetType = self.getDatasetType(parentName)
1284 composition.setdefault(parentDatasetType, []).append(componentName)
1285 else:
1286 composition.setdefault(trueDatasetType, []).append(None)
1287 elif datasetType.isComponent():
1288 # We were given a true DatasetType instance, but it's a component.
1289 # the composition dict will have exactly one item.
1290 parentName, componentName = datasetType.nameAndComponent()
1291 parentDatasetType = self.getDatasetType(parentName)
1292 composition = {parentDatasetType: [componentName]}
1293 if composition is not None:
1294 # We need to recurse. Do that once for each parent dataset type.
1295 for parentDatasetType, componentNames in composition.items():
1296 for parentRef in self.queryDatasets(parentDatasetType, collections=collections,
1297 dimensions=dimensions, dataId=standardizedDataId,
1298 where=where, deduplicate=deduplicate):
1299 # Loop over components, yielding one for each one for each
1300 # one requested.
1301 for componentName in componentNames:
1302 if componentName is None:
1303 yield parentRef
1304 else:
1305 yield parentRef.makeComponentRef(componentName)
1306 return
1307 # If we get here, there's no need to recurse (or we are already
1308 # recursing; there can only ever be one level of recursion).
1310 # The full set of dimensions in the query is the combination of those
1311 # needed for the DatasetType and those explicitly requested, if any.
1312 requestedDimensionNames = set(datasetType.dimensions.names)
1313 if dimensions is not None:
1314 requestedDimensionNames.update(self.dimensions.extract(dimensions).names)
1315 # Construct the summary structure needed to construct a QueryBuilder.
1316 summary = QuerySummary(
1317 requested=DimensionGraph(self.dimensions, names=requestedDimensionNames),
1318 dataId=standardizedDataId,
1319 expression=where,
1320 )
1321 builder = self.makeQueryBuilder(summary)
1322 # Add the dataset subquery to the query, telling the QueryBuilder to
1323 # include the rank of the selected collection in the results only if we
1324 # need to deduplicate. Note that if any of the collections are
1325 # actually wildcard expressions, and we've asked for deduplication,
1326 # this will raise TypeError for us.
1327 if not builder.joinDataset(datasetType, collections, isResult=True, addRank=deduplicate):
1328 return
1329 query = builder.finish()
1330 predicate = query.predicate()
1331 if not deduplicate:
1332 # No need to de-duplicate across collections.
1333 for row in self._db.query(query.sql):
1334 if predicate(row):
1335 dataId = query.extractDataId(row, graph=datasetType.dimensions)
1336 if expand:
1337 dataId = self.expandDataId(
1338 dataId,
1339 records=standardizedDataId.records
1340 )
1341 yield query.extractDatasetRef(row, datasetType, dataId)[0]
1342 else:
1343 # For each data ID, yield only the DatasetRef with the lowest
1344 # collection rank.
1345 bestRefs = {}
1346 bestRanks: Dict[DataCoordinate, int] = {}
1347 for row in self._db.query(query.sql):
1348 if predicate(row):
1349 ref, rank = query.extractDatasetRef(row, datasetType)
1350 bestRank = bestRanks.get(ref.dataId, sys.maxsize)
1351 assert rank is not None
1352 if rank < bestRank:
1353 bestRefs[ref.dataId] = ref
1354 bestRanks[ref.dataId] = rank
1355 # If caller requested expanded data IDs, we defer that until here
1356 # so we do as little expansion as possible.
1357 if expand:
1358 for ref in bestRefs.values():
1359 dataId = self.expandDataId(
1360 ref.dataId,
1361 records=standardizedDataId.records
1362 )
1363 yield ref.expanded(dataId)
1364 else:
1365 yield from bestRefs.values()
1367 storageClasses: StorageClassFactory
1368 """All storage classes known to the registry (`StorageClassFactory`).
1369 """