Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "Registry", 

26) 

27 

28from collections import defaultdict 

29import contextlib 

30import sys 

31from typing import ( 

32 Any, 

33 Dict, 

34 Iterable, 

35 Iterator, 

36 List, 

37 Mapping, 

38 Optional, 

39 Set, 

40 Type, 

41 TYPE_CHECKING, 

42 Union, 

43) 

44 

45import sqlalchemy 

46 

47from ..core import ( 

48 Config, 

49 DataCoordinate, 

50 DataId, 

51 DatasetRef, 

52 DatasetType, 

53 ddl, 

54 Dimension, 

55 DimensionElement, 

56 DimensionGraph, 

57 DimensionRecord, 

58 DimensionUniverse, 

59 ExpandedDataCoordinate, 

60 NamedKeyDict, 

61 StorageClassFactory, 

62) 

63from ..core.utils import doImport, iterable, transactional 

64from ._config import RegistryConfig 

65from .queries import ( 

66 QueryBuilder, 

67 QuerySummary, 

68) 

69from ._collectionType import CollectionType 

70from ._exceptions import ConflictingDefinitionError, InconsistentDataIdError, OrphanedRecordError 

71from .wildcards import CategorizedWildcard, CollectionQuery, CollectionSearch, Ellipsis 

72from .interfaces import ChainedCollectionRecord, RunRecord 

73 

74if TYPE_CHECKING: 74 ↛ 75line 74 didn't jump to line 75, because the condition on line 74 was never true

75 from ..butlerConfig import ButlerConfig 

76 from .interfaces import ( 

77 ButlerAttributeManager, 

78 CollectionManager, 

79 Database, 

80 OpaqueTableStorageManager, 

81 DimensionRecordStorageManager, 

82 DatasetRecordStorageManager, 

83 DatastoreRegistryBridgeManager, 

84 ) 

85 

86 

87class Registry: 

88 """Registry interface. 

89 

90 Parameters 

91 ---------- 

92 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

93 Registry configuration 

94 """ 

95 

96 defaultConfigFile = None 

97 """Path to configuration defaults. Relative to $DAF_BUTLER_DIR/config or 

98 absolute path. Can be None if no defaults specified. 

99 """ 

100 

101 @classmethod 

102 def fromConfig(cls, config: Union[ButlerConfig, RegistryConfig, Config, str], create: bool = False, 

103 butlerRoot: Optional[str] = None, writeable: bool = True) -> Registry: 

104 """Create `Registry` subclass instance from `config`. 

105 

106 Uses ``registry.cls`` from `config` to determine which subclass to 

107 instantiate. 

108 

109 Parameters 

110 ---------- 

111 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

112 Registry configuration 

113 create : `bool`, optional 

114 Assume empty Registry and create a new one. 

115 butlerRoot : `str`, optional 

116 Path to the repository root this `Registry` will manage. 

117 writeable : `bool`, optional 

118 If `True` (default) create a read-write connection to the database. 

119 

120 Returns 

121 ------- 

122 registry : `Registry` (subclass) 

123 A new `Registry` subclass instance. 

124 """ 

125 if not isinstance(config, RegistryConfig): 

126 if isinstance(config, str) or isinstance(config, Config): 

127 config = RegistryConfig(config) 

128 else: 

129 raise ValueError("Incompatible Registry configuration: {}".format(config)) 

130 config.replaceRoot(butlerRoot) 

131 DatabaseClass = config.getDatabaseClass() 

132 database = DatabaseClass.fromUri(str(config.connectionString), origin=config.get("origin", 0), 

133 namespace=config.get("namespace"), writeable=writeable) 

134 universe = DimensionUniverse(config) 

135 attributes = doImport(config["managers", "attributes"]) 

136 opaque = doImport(config["managers", "opaque"]) 

137 dimensions = doImport(config["managers", "dimensions"]) 

138 collections = doImport(config["managers", "collections"]) 

139 datasets = doImport(config["managers", "datasets"]) 

140 datastoreBridges = doImport(config["managers", "datastores"]) 

141 return cls(database, universe, dimensions=dimensions, attributes=attributes, opaque=opaque, 

142 collections=collections, datasets=datasets, datastoreBridges=datastoreBridges, 

143 create=create) 

144 

145 def __init__(self, database: Database, universe: DimensionUniverse, *, 

146 attributes: Type[ButlerAttributeManager], 

147 opaque: Type[OpaqueTableStorageManager], 

148 dimensions: Type[DimensionRecordStorageManager], 

149 collections: Type[CollectionManager], 

150 datasets: Type[DatasetRecordStorageManager], 

151 datastoreBridges: Type[DatastoreRegistryBridgeManager], 

152 create: bool = False): 

153 self._db = database 

154 self.storageClasses = StorageClassFactory() 

155 with self._db.declareStaticTables(create=create) as context: 

156 self._attributes = attributes.initialize(self._db, context) 

157 self._dimensions = dimensions.initialize(self._db, context, universe=universe) 

158 self._collections = collections.initialize(self._db, context) 

159 self._datasets = datasets.initialize(self._db, context, 

160 collections=self._collections, 

161 universe=self.dimensions) 

162 self._opaque = opaque.initialize(self._db, context) 

163 self._datastoreBridges = datastoreBridges.initialize(self._db, context, 

164 opaque=self._opaque, 

165 datasets=datasets, 

166 universe=self.dimensions) 

167 self._collections.refresh() 

168 self._datasets.refresh(universe=self._dimensions.universe) 

169 

170 def __str__(self) -> str: 

171 return str(self._db) 

172 

173 def __repr__(self) -> str: 

174 return f"Registry({self._db!r}, {self.dimensions!r})" 

175 

176 def isWriteable(self) -> bool: 

177 """Return `True` if this registry allows write operations, and `False` 

178 otherwise. 

179 """ 

180 return self._db.isWriteable() 

181 

182 @property 

183 def dimensions(self) -> DimensionUniverse: 

184 """All dimensions recognized by this `Registry` (`DimensionUniverse`). 

185 """ 

186 return self._dimensions.universe 

187 

188 @contextlib.contextmanager 

189 def transaction(self) -> Iterator[None]: 

190 """Return a context manager that represents a transaction. 

191 """ 

192 # TODO make savepoint=False the default. 

193 try: 

194 with self._db.transaction(): 

195 yield 

196 except BaseException: 

197 # TODO: this clears the caches sometimes when we wouldn't actually 

198 # need to. Can we avoid that? 

199 self._dimensions.clearCaches() 

200 raise 

201 

202 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

203 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

204 other data repository client. 

205 

206 Opaque table records can be added via `insertOpaqueData`, retrieved via 

207 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

208 

209 Parameters 

210 ---------- 

211 tableName : `str` 

212 Logical name of the opaque table. This may differ from the 

213 actual name used in the database by a prefix and/or suffix. 

214 spec : `ddl.TableSpec` 

215 Specification for the table to be added. 

216 """ 

217 self._opaque.register(tableName, spec) 

218 

219 @transactional 

220 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

221 """Insert records into an opaque table. 

222 

223 Parameters 

224 ---------- 

225 tableName : `str` 

226 Logical name of the opaque table. Must match the name used in a 

227 previous call to `registerOpaqueTable`. 

228 data 

229 Each additional positional argument is a dictionary that represents 

230 a single row to be added. 

231 """ 

232 self._opaque[tableName].insert(*data) 

233 

234 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[dict]: 

235 """Retrieve records from an opaque table. 

236 

237 Parameters 

238 ---------- 

239 tableName : `str` 

240 Logical name of the opaque table. Must match the name used in a 

241 previous call to `registerOpaqueTable`. 

242 where 

243 Additional keyword arguments are interpreted as equality 

244 constraints that restrict the returned rows (combined with AND); 

245 keyword arguments are column names and values are the values they 

246 must have. 

247 

248 Yields 

249 ------ 

250 row : `dict` 

251 A dictionary representing a single result row. 

252 """ 

253 yield from self._opaque[tableName].fetch(**where) 

254 

255 @transactional 

256 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

257 """Remove records from an opaque table. 

258 

259 Parameters 

260 ---------- 

261 tableName : `str` 

262 Logical name of the opaque table. Must match the name used in a 

263 previous call to `registerOpaqueTable`. 

264 where 

265 Additional keyword arguments are interpreted as equality 

266 constraints that restrict the deleted rows (combined with AND); 

267 keyword arguments are column names and values are the values they 

268 must have. 

269 """ 

270 self._opaque[tableName].delete(**where) 

271 

272 def registerCollection(self, name: str, type: CollectionType = CollectionType.TAGGED) -> None: 

273 """Add a new collection if one with the given name does not exist. 

274 

275 Parameters 

276 ---------- 

277 name : `str` 

278 The name of the collection to create. 

279 type : `CollectionType` 

280 Enum value indicating the type of collection to create. 

281 

282 Notes 

283 ----- 

284 This method cannot be called within transactions, as it needs to be 

285 able to perform its own transaction to be concurrent. 

286 """ 

287 self._collections.register(name, type) 

288 

289 def getCollectionType(self, name: str) -> CollectionType: 

290 """Return an enumeration value indicating the type of the given 

291 collection. 

292 

293 Parameters 

294 ---------- 

295 name : `str` 

296 The name of the collection. 

297 

298 Returns 

299 ------- 

300 type : `CollectionType` 

301 Enum value indicating the type of this collection. 

302 

303 Raises 

304 ------ 

305 MissingCollectionError 

306 Raised if no collection with the given name exists. 

307 """ 

308 return self._collections.find(name).type 

309 

310 def registerRun(self, name: str) -> None: 

311 """Add a new run if one with the given name does not exist. 

312 

313 Parameters 

314 ---------- 

315 name : `str` 

316 The name of the run to create. 

317 

318 Notes 

319 ----- 

320 This method cannot be called within transactions, as it needs to be 

321 able to perform its own transaction to be concurrent. 

322 """ 

323 self._collections.register(name, CollectionType.RUN) 

324 

325 @transactional 

326 def removeCollection(self, name: str) -> None: 

327 """Completely remove the given collection. 

328 

329 Parameters 

330 ---------- 

331 name : `str` 

332 The name of the collection to remove. 

333 

334 Raises 

335 ------ 

336 MissingCollectionError 

337 Raised if no collection with the given name exists. 

338 

339 Notes 

340 ----- 

341 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

342 in it are also fully removed. This requires that those datasets be 

343 removed (or at least trashed) from any datastores that hold them first. 

344 

345 A collection may not be deleted as long as it is referenced by a 

346 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

347 be deleted or redefined first. 

348 """ 

349 self._collections.remove(name) 

350 

351 def getCollectionChain(self, parent: str) -> CollectionSearch: 

352 """Return the child collections in a `~CollectionType.CHAINED` 

353 collection. 

354 

355 Parameters 

356 ---------- 

357 parent : `str` 

358 Name of the chained collection. Must have already been added via 

359 a call to `Registry.registerCollection`. 

360 

361 Returns 

362 ------- 

363 children : `CollectionSearch` 

364 An object that defines the search path of the collection. 

365 See :ref:`daf_butler_collection_expressions` for more information. 

366 

367 Raises 

368 ------ 

369 MissingCollectionError 

370 Raised if ``parent`` does not exist in the `Registry`. 

371 TypeError 

372 Raised if ``parent`` does not correspond to a 

373 `~CollectionType.CHAINED` collection. 

374 """ 

375 record = self._collections.find(parent) 

376 if record.type is not CollectionType.CHAINED: 

377 raise TypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

378 assert isinstance(record, ChainedCollectionRecord) 

379 return record.children 

380 

381 @transactional 

382 def setCollectionChain(self, parent: str, children: Any) -> None: 

383 """Define or redefine a `~CollectionType.CHAINED` collection. 

384 

385 Parameters 

386 ---------- 

387 parent : `str` 

388 Name of the chained collection. Must have already been added via 

389 a call to `Registry.registerCollection`. 

390 children : `Any` 

391 An expression defining an ordered search of child collections, 

392 generally an iterable of `str`. Restrictions on the dataset types 

393 to be searched can also be included, by passing mapping or an 

394 iterable containing tuples; see 

395 :ref:`daf_butler_collection_expressions` for more information. 

396 

397 Raises 

398 ------ 

399 MissingCollectionError 

400 Raised when any of the given collections do not exist in the 

401 `Registry`. 

402 TypeError 

403 Raised if ``parent`` does not correspond to a 

404 `~CollectionType.CHAINED` collection. 

405 ValueError 

406 Raised if the given collections contains a cycle. 

407 """ 

408 record = self._collections.find(parent) 

409 if record.type is not CollectionType.CHAINED: 

410 raise TypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

411 assert isinstance(record, ChainedCollectionRecord) 

412 children = CollectionSearch.fromExpression(children) 

413 if children != record.children: 

414 record.update(self._collections, children) 

415 

416 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

417 """ 

418 Add a new `DatasetType` to the Registry. 

419 

420 It is not an error to register the same `DatasetType` twice. 

421 

422 Parameters 

423 ---------- 

424 datasetType : `DatasetType` 

425 The `DatasetType` to be added. 

426 

427 Returns 

428 ------- 

429 inserted : `bool` 

430 `True` if ``datasetType`` was inserted, `False` if an identical 

431 existing `DatsetType` was found. Note that in either case the 

432 DatasetType is guaranteed to be defined in the Registry 

433 consistently with the given definition. 

434 

435 Raises 

436 ------ 

437 ValueError 

438 Raised if the dimensions or storage class are invalid. 

439 ConflictingDefinitionError 

440 Raised if this DatasetType is already registered with a different 

441 definition. 

442 

443 Notes 

444 ----- 

445 This method cannot be called within transactions, as it needs to be 

446 able to perform its own transaction to be concurrent. 

447 """ 

448 _, inserted = self._datasets.register(datasetType) 

449 return inserted 

450 

451 def getDatasetType(self, name: str) -> DatasetType: 

452 """Get the `DatasetType`. 

453 

454 Parameters 

455 ---------- 

456 name : `str` 

457 Name of the type. 

458 

459 Returns 

460 ------- 

461 type : `DatasetType` 

462 The `DatasetType` associated with the given name. 

463 

464 Raises 

465 ------ 

466 KeyError 

467 Requested named DatasetType could not be found in registry. 

468 """ 

469 storage = self._datasets.find(name) 

470 if storage is None: 

471 raise KeyError(f"DatasetType '{name}' could not be found.") 

472 return storage.datasetType 

473 

474 def findDataset(self, datasetType: Union[DatasetType, str], dataId: Optional[DataId] = None, *, 

475 collections: Any, **kwargs: Any) -> Optional[DatasetRef]: 

476 """Find a dataset given its `DatasetType` and data ID. 

477 

478 This can be used to obtain a `DatasetRef` that permits the dataset to 

479 be read from a `Datastore`. If the dataset is a component and can not 

480 be found using the provided dataset type, a dataset ref for the parent 

481 will be returned instead but with the correct dataset type. 

482 

483 Parameters 

484 ---------- 

485 datasetType : `DatasetType` or `str` 

486 A `DatasetType` or the name of one. 

487 dataId : `dict` or `DataCoordinate`, optional 

488 A `dict`-like object containing the `Dimension` links that identify 

489 the dataset within a collection. 

490 collections 

491 An expression that fully or partially identifies the collections 

492 to search for the dataset, such as a `str`, `re.Pattern`, or 

493 iterable thereof. `...` can be used to return all collections. 

494 See :ref:`daf_butler_collection_expressions` for more information. 

495 **kwargs 

496 Additional keyword arguments passed to 

497 `DataCoordinate.standardize` to convert ``dataId`` to a true 

498 `DataCoordinate` or augment an existing one. 

499 

500 Returns 

501 ------- 

502 ref : `DatasetRef` 

503 A reference to the dataset, or `None` if no matching Dataset 

504 was found. 

505 

506 Raises 

507 ------ 

508 LookupError 

509 Raised if one or more data ID keys are missing or the dataset type 

510 does not exist. 

511 MissingCollectionError 

512 Raised if any of ``collections`` does not exist in the registry. 

513 """ 

514 if isinstance(datasetType, DatasetType): 

515 storage = self._datasets.find(datasetType.name) 

516 if storage is None: 

517 raise LookupError(f"DatasetType '{datasetType}' has not been registered.") 

518 else: 

519 storage = self._datasets.find(datasetType) 

520 if storage is None: 

521 raise LookupError(f"DatasetType with name '{datasetType}' has not been registered.") 

522 dataId = DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions, 

523 universe=self.dimensions, **kwargs) 

524 collections = CollectionSearch.fromExpression(collections) 

525 for collectionRecord in collections.iter(self._collections, datasetType=storage.datasetType): 

526 result = storage.find(collectionRecord, dataId) 

527 if result is not None: 

528 return result 

529 

530 # fallback to the parent if we got nothing and this was a component 

531 if storage.datasetType.isComponent(): 

532 parentType, _ = storage.datasetType.nameAndComponent() 

533 parentRef = self.findDataset(parentType, dataId, collections=collections, **kwargs) 

534 if parentRef is not None: 

535 # Should already conform and we know no components 

536 return DatasetRef(storage.datasetType, parentRef.dataId, id=parentRef.id, 

537 run=parentRef.run, conform=False, hasParentId=True) 

538 

539 return None 

540 

541 @transactional 

542 def insertDatasets(self, datasetType: Union[DatasetType, str], dataIds: Iterable[DataId], 

543 run: str) -> List[DatasetRef]: 

544 """Insert one or more datasets into the `Registry` 

545 

546 This always adds new datasets; to associate existing datasets with 

547 a new collection, use ``associate``. 

548 

549 Parameters 

550 ---------- 

551 datasetType : `DatasetType` or `str` 

552 A `DatasetType` or the name of one. 

553 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

554 Dimension-based identifiers for the new datasets. 

555 run : `str` 

556 The name of the run that produced the datasets. 

557 

558 Returns 

559 ------- 

560 refs : `list` of `DatasetRef` 

561 Resolved `DatasetRef` instances for all given data IDs (in the same 

562 order). 

563 

564 Raises 

565 ------ 

566 ConflictingDefinitionError 

567 If a dataset with the same dataset type and data ID as one of those 

568 given already exists in ``run``. 

569 MissingCollectionError 

570 Raised if ``run`` does not exist in the registry. 

571 """ 

572 if isinstance(datasetType, DatasetType): 

573 storage = self._datasets.find(datasetType.name) 

574 if storage is None: 

575 raise LookupError(f"DatasetType '{datasetType}' has not been registered.") 

576 else: 

577 storage = self._datasets.find(datasetType) 

578 if storage is None: 

579 raise LookupError(f"DatasetType with name '{datasetType}' has not been registered.") 

580 runRecord = self._collections.find(run) 

581 if runRecord.type is not CollectionType.RUN: 

582 raise TypeError("Given collection is of type {runRecord.type.name}; RUN collection required.") 

583 assert isinstance(runRecord, RunRecord) 

584 expandedDataIds = [self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

585 for dataId in dataIds] 

586 try: 

587 refs = list(storage.insert(runRecord, expandedDataIds)) 

588 except sqlalchemy.exc.IntegrityError as err: 

589 raise ConflictingDefinitionError(f"A database constraint failure was triggered by inserting " 

590 f"one or more datasets of type {storage.datasetType} into " 

591 f"collection '{run}'. " 

592 f"This probably means a dataset with the same data ID " 

593 f"and dataset type already exists, but it may also mean a " 

594 f"dimension row is missing.") from err 

595 return refs 

596 

597 def getDataset(self, id: int) -> Optional[DatasetRef]: 

598 """Retrieve a Dataset entry. 

599 

600 Parameters 

601 ---------- 

602 id : `int` 

603 The unique identifier for the dataset. 

604 

605 Returns 

606 ------- 

607 ref : `DatasetRef` or `None` 

608 A ref to the Dataset, or `None` if no matching Dataset 

609 was found. 

610 """ 

611 ref = self._datasets.getDatasetRef(id, universe=self.dimensions) 

612 if ref is None: 

613 return None 

614 return ref 

615 

616 @transactional 

617 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

618 """Remove datasets from the Registry. 

619 

620 The datasets will be removed unconditionally from all collections, and 

621 any `Quantum` that consumed this dataset will instead be marked with 

622 having a NULL input. `Datastore` records will *not* be deleted; the 

623 caller is responsible for ensuring that the dataset has already been 

624 removed from all Datastores. 

625 

626 Parameters 

627 ---------- 

628 refs : `Iterable` of `DatasetRef` 

629 References to the datasets to be removed. Must include a valid 

630 ``id`` attribute, and should be considered invalidated upon return. 

631 

632 Raises 

633 ------ 

634 AmbiguousDatasetError 

635 Raised if any ``ref.id`` is `None`. 

636 OrphanedRecordError 

637 Raised if any dataset is still present in any `Datastore`. 

638 """ 

639 for datasetType, refsForType in DatasetRef.groupByType(refs).items(): 

640 storage = self._datasets.find(datasetType.name) 

641 assert storage is not None 

642 try: 

643 storage.delete(refsForType) 

644 except sqlalchemy.exc.IntegrityError as err: 

645 raise OrphanedRecordError("One or more datasets is still " 

646 "present in one or more Datastores.") from err 

647 

648 @transactional 

649 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

650 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

651 

652 If a DatasetRef with the same exact integer ID is already in a 

653 collection nothing is changed. If a `DatasetRef` with the same 

654 `DatasetType` and data ID but with different integer ID 

655 exists in the collection, `ConflictingDefinitionError` is raised. 

656 

657 Parameters 

658 ---------- 

659 collection : `str` 

660 Indicates the collection the datasets should be associated with. 

661 refs : `Iterable` [ `DatasetRef` ] 

662 An iterable of resolved `DatasetRef` instances that already exist 

663 in this `Registry`. 

664 

665 Raises 

666 ------ 

667 ConflictingDefinitionError 

668 If a Dataset with the given `DatasetRef` already exists in the 

669 given collection. 

670 AmbiguousDatasetError 

671 Raised if ``any(ref.id is None for ref in refs)``. 

672 MissingCollectionError 

673 Raised if ``collection`` does not exist in the registry. 

674 TypeError 

675 Raise adding new datasets to the given ``collection`` is not 

676 allowed. 

677 """ 

678 collectionRecord = self._collections.find(collection) 

679 if collectionRecord.type is not CollectionType.TAGGED: 

680 raise TypeError(f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED.") 

681 for datasetType, refsForType in DatasetRef.groupByType(refs).items(): 

682 storage = self._datasets.find(datasetType.name) 

683 assert storage is not None 

684 try: 

685 storage.associate(collectionRecord, refsForType) 

686 except sqlalchemy.exc.IntegrityError as err: 

687 raise ConflictingDefinitionError( 

688 f"Constraint violation while associating dataset of type {datasetType.name} with " 

689 f"collection {collection}. This probably means that one or more datasets with the same " 

690 f"dataset type and data ID already exist in the collection, but it may also indicate " 

691 f"that the datasets do not exist." 

692 ) from err 

693 

694 @transactional 

695 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

696 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

697 

698 ``collection`` and ``ref`` combinations that are not currently 

699 associated are silently ignored. 

700 

701 Parameters 

702 ---------- 

703 collection : `str` 

704 The collection the datasets should no longer be associated with. 

705 refs : `Iterable` [ `DatasetRef` ] 

706 An iterable of resolved `DatasetRef` instances that already exist 

707 in this `Registry`. 

708 

709 Raises 

710 ------ 

711 AmbiguousDatasetError 

712 Raised if any of the given dataset references is unresolved. 

713 MissingCollectionError 

714 Raised if ``collection`` does not exist in the registry. 

715 TypeError 

716 Raise adding new datasets to the given ``collection`` is not 

717 allowed. 

718 """ 

719 collectionRecord = self._collections.find(collection) 

720 if collectionRecord.type is not CollectionType.TAGGED: 

721 raise TypeError(f"Collection '{collection}' has type {collectionRecord.type.name}; " 

722 "expected TAGGED.") 

723 for datasetType, refsForType in DatasetRef.groupByType(refs).items(): 

724 storage = self._datasets.find(datasetType.name) 

725 assert storage is not None 

726 storage.disassociate(collectionRecord, refsForType) 

727 

728 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

729 """Return an object that allows a new `Datastore` instance to 

730 communicate with this `Registry`. 

731 

732 Returns 

733 ------- 

734 manager : `DatastoreRegistryBridgeManager` 

735 Object that mediates communication between this `Registry` and its 

736 associated datastores. 

737 """ 

738 return self._datastoreBridges 

739 

740 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

741 """Retrieve datastore locations for a given dataset. 

742 

743 Parameters 

744 ---------- 

745 ref : `DatasetRef` 

746 A reference to the dataset for which to retrieve storage 

747 information. 

748 

749 Returns 

750 ------- 

751 datastores : `Iterable` [ `str` ] 

752 All the matching datastores holding this dataset. 

753 

754 Raises 

755 ------ 

756 AmbiguousDatasetError 

757 Raised if ``ref.id`` is `None`. 

758 """ 

759 return self._datastoreBridges.findDatastores(ref) 

760 

761 def expandDataId(self, dataId: Optional[DataId] = None, *, graph: Optional[DimensionGraph] = None, 

762 records: Optional[Mapping[DimensionElement, Optional[DimensionRecord]]] = None, 

763 **kwargs: Any) -> ExpandedDataCoordinate: 

764 """Expand a dimension-based data ID to include additional information. 

765 

766 Parameters 

767 ---------- 

768 dataId : `DataCoordinate` or `dict`, optional 

769 Data ID to be expanded; augmented and overridden by ``kwds``. 

770 graph : `DimensionGraph`, optional 

771 Set of dimensions for the expanded ID. If `None`, the dimensions 

772 will be inferred from the keys of ``dataId`` and ``kwds``. 

773 Dimensions that are in ``dataId`` or ``kwds`` but not in ``graph`` 

774 are silently ignored, providing a way to extract and expand a 

775 subset of a data ID. 

776 records : `Mapping` [`DimensionElement`, `DimensionRecord`], optional 

777 Dimension record data to use before querying the database for that 

778 data. 

779 **kwargs 

780 Additional keywords are treated like additional key-value pairs for 

781 ``dataId``, extending and overriding 

782 

783 Returns 

784 ------- 

785 expanded : `ExpandedDataCoordinate` 

786 A data ID that includes full metadata for all of the dimensions it 

787 identifieds. 

788 """ 

789 standardized = DataCoordinate.standardize(dataId, graph=graph, universe=self.dimensions, **kwargs) 

790 if isinstance(standardized, ExpandedDataCoordinate): 

791 return standardized 

792 elif isinstance(dataId, ExpandedDataCoordinate): 

793 records = NamedKeyDict(records) if records is not None else NamedKeyDict() 

794 records.update(dataId.records) 

795 else: 

796 records = NamedKeyDict(records) if records is not None else NamedKeyDict() 

797 keys = dict(standardized.byName()) 

798 for element in standardized.graph.primaryKeyTraversalOrder: 

799 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

800 if record is ...: 

801 storage = self._dimensions[element] 

802 record = storage.fetch(keys) 

803 records[element] = record 

804 if record is not None: 

805 for d in element.implied: 

806 value = getattr(record, d.name) 

807 if keys.setdefault(d.name, value) != value: 

808 raise InconsistentDataIdError( 

809 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

810 f"but {element.name} implies {d.name}={value!r}." 

811 ) 

812 else: 

813 if element in standardized.graph.required: 

814 raise LookupError( 

815 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

816 ) 

817 if element.alwaysJoin: 

818 raise InconsistentDataIdError( 

819 f"Could not fetch record for element {element.name} via keys {keys}, ", 

820 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

821 "related." 

822 ) 

823 records.update((d, None) for d in element.implied) 

824 return ExpandedDataCoordinate(standardized.graph, standardized.values(), records=records) 

825 

826 def insertDimensionData(self, element: Union[DimensionElement, str], 

827 *data: Union[Mapping[str, Any], DimensionRecord], 

828 conform: bool = True) -> None: 

829 """Insert one or more dimension records into the database. 

830 

831 Parameters 

832 ---------- 

833 element : `DimensionElement` or `str` 

834 The `DimensionElement` or name thereof that identifies the table 

835 records will be inserted into. 

836 data : `dict` or `DimensionRecord` (variadic) 

837 One or more records to insert. 

838 conform : `bool`, optional 

839 If `False` (`True` is default) perform no checking or conversions, 

840 and assume that ``element`` is a `DimensionElement` instance and 

841 ``data`` is a one or more `DimensionRecord` instances of the 

842 appropriate subclass. 

843 """ 

844 if conform: 

845 if isinstance(element, str): 

846 element = self.dimensions[element] 

847 records = [row if isinstance(row, DimensionRecord) else element.RecordClass.fromDict(row) 

848 for row in data] 

849 else: 

850 # Ignore typing since caller said to trust them with conform=False. 

851 records = data # type: ignore 

852 storage = self._dimensions[element] # type: ignore 

853 storage.insert(*records) 

854 

855 def syncDimensionData(self, element: Union[DimensionElement, str], 

856 row: Union[Mapping[str, Any], DimensionRecord], 

857 conform: bool = True) -> bool: 

858 """Synchronize the given dimension record with the database, inserting 

859 if it does not already exist and comparing values if it does. 

860 

861 Parameters 

862 ---------- 

863 element : `DimensionElement` or `str` 

864 The `DimensionElement` or name thereof that identifies the table 

865 records will be inserted into. 

866 row : `dict` or `DimensionRecord` 

867 The record to insert. 

868 conform : `bool`, optional 

869 If `False` (`True` is default) perform no checking or conversions, 

870 and assume that ``element`` is a `DimensionElement` instance and 

871 ``data`` is a one or more `DimensionRecord` instances of the 

872 appropriate subclass. 

873 

874 Returns 

875 ------- 

876 inserted : `bool` 

877 `True` if a new row was inserted, `False` otherwise. 

878 

879 Raises 

880 ------ 

881 ConflictingDefinitionError 

882 Raised if the record exists in the database (according to primary 

883 key lookup) but is inconsistent with the given one. 

884 

885 Notes 

886 ----- 

887 This method cannot be called within transactions, as it needs to be 

888 able to perform its own transaction to be concurrent. 

889 """ 

890 if conform: 

891 if isinstance(element, str): 

892 element = self.dimensions[element] 

893 record = row if isinstance(row, DimensionRecord) else element.RecordClass.fromDict(row) 

894 else: 

895 # Ignore typing since caller said to trust them with conform=False. 

896 record = row # type: ignore 

897 storage = self._dimensions[element] # type: ignore 

898 return storage.sync(record) 

899 

900 def queryDatasetTypes(self, expression: Any = ..., *, components: Optional[bool] = None 

901 ) -> Iterator[DatasetType]: 

902 """Iterate over the dataset types whose names match an expression. 

903 

904 Parameters 

905 ---------- 

906 expression : `Any`, optional 

907 An expression that fully or partially identifies the dataset types 

908 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

909 `...` can be used to return all dataset types, and is the default. 

910 See :ref:`daf_butler_dataset_type_expressions` for more 

911 information. 

912 components : `bool`, optional 

913 If `True`, apply all expression patterns to component dataset type 

914 names as well. If `False`, never apply patterns to components. 

915 If `None` (default), apply patterns to components only if their 

916 parent datasets were not matched by the expression. 

917 Fully-specified component datasets (`str` or `DatasetType` 

918 instances) are always included. 

919 

920 Yields 

921 ------ 

922 datasetType : `DatasetType` 

923 A `DatasetType` instance whose name matches ``expression``. 

924 """ 

925 wildcard = CategorizedWildcard.fromExpression(expression, coerceUnrecognized=lambda d: d.name) 

926 if wildcard is Ellipsis: 

927 for datasetType in self._datasets: 

928 if components or not datasetType.isComponent(): 

929 yield datasetType 

930 return 

931 done: Set[str] = set() 

932 for name in wildcard.strings: 

933 storage = self._datasets.find(name) 

934 if storage is not None: 

935 done.add(storage.datasetType.name) 

936 yield storage.datasetType 

937 if wildcard.patterns: 

938 # If components (the argument) is None, we'll save component 

939 # dataset that we might want to match, but only if their parents 

940 # didn't get included. 

941 componentsForLater = [] 

942 for datasetType in self._datasets: 

943 if datasetType.name in done: 

944 continue 

945 parentName, componentName = datasetType.nameAndComponent() 

946 if componentName is not None and not components: 

947 if components is None and parentName not in done: 

948 componentsForLater.append(datasetType) 

949 continue 

950 if any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

951 done.add(datasetType.name) 

952 yield datasetType 

953 # Go back and try to match saved components. 

954 for datasetType in componentsForLater: 

955 parentName, _ = datasetType.nameAndComponent() 

956 if parentName not in done and any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

957 yield datasetType 

958 

959 def queryCollections(self, expression: Any = ..., 

960 datasetType: Optional[DatasetType] = None, 

961 collectionType: Optional[CollectionType] = None, 

962 flattenChains: bool = False, 

963 includeChains: Optional[bool] = None) -> Iterator[str]: 

964 """Iterate over the collections whose names match an expression. 

965 

966 Parameters 

967 ---------- 

968 expression : `Any`, optional 

969 An expression that fully or partially identifies the collections 

970 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

971 `...` can be used to return all collections, and is the default. 

972 See :ref:`daf_butler_collection_expressions` for more 

973 information. 

974 datasetType : `DatasetType`, optional 

975 If provided, only yield collections that should be searched for 

976 this dataset type according to ``expression``. If this is 

977 not provided, any dataset type restrictions in ``expression`` are 

978 ignored. 

979 collectionType : `CollectionType`, optional 

980 If provided, only yield collections of this type. 

981 flattenChains : `bool`, optional 

982 If `True` (`False` is default), recursively yield the child 

983 collections of matching `~CollectionType.CHAINED` collections. 

984 includeChains : `bool`, optional 

985 If `True`, yield records for matching `~CollectionType.CHAINED` 

986 collections. Default is the opposite of ``flattenChains``: include 

987 either CHAINED collections or their children, but not both. 

988 

989 Yields 

990 ------ 

991 collection : `str` 

992 The name of a collection that matches ``expression``. 

993 """ 

994 query = CollectionQuery.fromExpression(expression) 

995 for record in query.iter(self._collections, datasetType=datasetType, collectionType=collectionType, 

996 flattenChains=flattenChains, includeChains=includeChains): 

997 yield record.name 

998 

999 def makeQueryBuilder(self, summary: QuerySummary) -> QueryBuilder: 

1000 """Return a `QueryBuilder` instance capable of constructing and 

1001 managing more complex queries than those obtainable via `Registry` 

1002 interfaces. 

1003 

1004 This is an advanced interface; downstream code should prefer 

1005 `Registry.queryDimensions` and `Registry.queryDatasets` whenever those 

1006 are sufficient. 

1007 

1008 Parameters 

1009 ---------- 

1010 summary : `QuerySummary` 

1011 Object describing and categorizing the full set of dimensions that 

1012 will be included in the query. 

1013 

1014 Returns 

1015 ------- 

1016 builder : `QueryBuilder` 

1017 Object that can be used to construct and perform advanced queries. 

1018 """ 

1019 return QueryBuilder(summary=summary, 

1020 collections=self._collections, 

1021 dimensions=self._dimensions, 

1022 datasets=self._datasets) 

1023 

1024 def queryDimensions(self, dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], *, 

1025 dataId: Optional[DataId] = None, 

1026 datasets: Any = None, 

1027 collections: Any = None, 

1028 where: Optional[str] = None, 

1029 expand: bool = True, 

1030 components: Optional[bool] = None, 

1031 **kwargs: Any) -> Iterator[DataCoordinate]: 

1032 """Query for and iterate over data IDs matching user-provided criteria. 

1033 

1034 Parameters 

1035 ---------- 

1036 dimensions : `Dimension` or `str`, or iterable thereof 

1037 The dimensions of the data IDs to yield, as either `Dimension` 

1038 instances or `str`. Will be automatically expanded to a complete 

1039 `DimensionGraph`. 

1040 dataId : `dict` or `DataCoordinate`, optional 

1041 A data ID whose key-value pairs are used as equality constraints 

1042 in the query. 

1043 datasets : `Any`, optional 

1044 An expression that fully or partially identifies dataset types 

1045 that should constrain the yielded data IDs. For example, including 

1046 "raw" here would constrain the yielded ``instrument``, 

1047 ``exposure``, ``detector``, and ``physical_filter`` values to only 

1048 those for which at least one "raw" dataset exists in 

1049 ``collections``. Allowed types include `DatasetType`, `str`, 

1050 `re.Pattern`, and iterables thereof. Unlike other dataset type 

1051 expressions, `...` is not permitted - it doesn't make sense to 

1052 constrain data IDs on the existence of *all* datasets. 

1053 See :ref:`daf_butler_dataset_type_expressions` for more 

1054 information. 

1055 collections: `Any`, optional 

1056 An expression that fully or partially identifies the collections 

1057 to search for datasets, such as a `str`, `re.Pattern`, or iterable 

1058 thereof. `...` can be used to return all collections. Must be 

1059 provided if ``datasets`` is, and is ignored if it is not. See 

1060 :ref:`daf_butler_collection_expressions` for more information. 

1061 where : `str`, optional 

1062 A string expression similar to a SQL WHERE clause. May involve 

1063 any column of a dimension table or (as a shortcut for the primary 

1064 key column of a dimension table) dimension name. See 

1065 :ref:`daf_butler_dimension_expressions` for more information. 

1066 expand : `bool`, optional 

1067 If `True` (default) yield `ExpandedDataCoordinate` instead of 

1068 minimal `DataCoordinate` base-class instances. 

1069 components : `bool`, optional 

1070 If `True`, apply all dataset expression patterns to component 

1071 dataset type names as well. If `False`, never apply patterns to 

1072 components. If `None` (default), apply patterns to components only 

1073 if their parent datasets were not matched by the expression. 

1074 Fully-specified component datasets (`str` or `DatasetType` 

1075 instances) are always included. 

1076 **kwargs 

1077 Additional keyword arguments are forwarded to 

1078 `DataCoordinate.standardize` when processing the ``dataId`` 

1079 argument (and may be used to provide a constraining data ID even 

1080 when the ``dataId`` argument is `None`). 

1081 

1082 Yields 

1083 ------ 

1084 dataId : `DataCoordinate` 

1085 Data IDs matching the given query parameters. Order is 

1086 unspecified. 

1087 """ 

1088 dimensions = iterable(dimensions) 

1089 standardizedDataId = self.expandDataId(dataId, **kwargs) 

1090 standardizedDatasetTypes = set() 

1091 requestedDimensionNames = set(self.dimensions.extract(dimensions).names) 

1092 if datasets is not None: 

1093 if collections is None: 

1094 raise TypeError("Cannot pass 'datasets' without 'collections'.") 

1095 for datasetType in self.queryDatasetTypes(datasets, components=components): 

1096 requestedDimensionNames.update(datasetType.dimensions.names) 

1097 # If any matched dataset type is a component, just operate on 

1098 # its parent instead, because Registry doesn't know anything 

1099 # about what components exist, and here (unlike queryDatasets) 

1100 # we don't care about returning them. 

1101 parentDatasetTypeName, componentName = datasetType.nameAndComponent() 

1102 if componentName is not None: 

1103 datasetType = self.getDatasetType(parentDatasetTypeName) 

1104 standardizedDatasetTypes.add(datasetType) 

1105 # Preprocess collections expression in case the original included 

1106 # single-pass iterators (we'll want to use it multiple times 

1107 # below). 

1108 collections = CollectionQuery.fromExpression(collections) 

1109 

1110 summary = QuerySummary( 

1111 requested=DimensionGraph(self.dimensions, names=requestedDimensionNames), 

1112 dataId=standardizedDataId, 

1113 expression=where, 

1114 ) 

1115 builder = self.makeQueryBuilder(summary) 

1116 for datasetType in standardizedDatasetTypes: 

1117 builder.joinDataset(datasetType, collections, isResult=False) 

1118 query = builder.finish() 

1119 predicate = query.predicate() 

1120 for row in self._db.query(query.sql): 

1121 if predicate(row): 

1122 result = query.extractDataId(row) 

1123 if expand: 

1124 yield self.expandDataId(result, records=standardizedDataId.records) 

1125 else: 

1126 yield result 

1127 

1128 def queryDatasets(self, datasetType: Any, *, 

1129 collections: Any, 

1130 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1131 dataId: Optional[DataId] = None, 

1132 where: Optional[str] = None, 

1133 deduplicate: bool = False, 

1134 expand: bool = True, 

1135 components: Optional[bool] = None, 

1136 **kwargs: Any) -> Iterator[DatasetRef]: 

1137 """Query for and iterate over dataset references matching user-provided 

1138 criteria. 

1139 

1140 Parameters 

1141 ---------- 

1142 datasetType 

1143 An expression that fully or partially identifies the dataset types 

1144 to be queried. Allowed types include `DatasetType`, `str`, 

1145 `re.Pattern`, and iterables thereof. The special value `...` can 

1146 be used to query all dataset types. See 

1147 :ref:`daf_butler_dataset_type_expressions` for more information. 

1148 collections 

1149 An expression that fully or partially identifies the collections 

1150 to search for datasets, such as a `str`, `re.Pattern`, or iterable 

1151 thereof. `...` can be used to return all collections. See 

1152 :ref:`daf_butler_collection_expressions` for more information. 

1153 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

1154 Dimensions to include in the query (in addition to those used 

1155 to identify the queried dataset type(s)), either to constrain 

1156 the resulting datasets to those for which a matching dimension 

1157 exists, or to relate the dataset type's dimensions to dimensions 

1158 referenced by the ``dataId`` or ``where`` arguments. 

1159 dataId : `dict` or `DataCoordinate`, optional 

1160 A data ID whose key-value pairs are used as equality constraints 

1161 in the query. 

1162 where : `str`, optional 

1163 A string expression similar to a SQL WHERE clause. May involve 

1164 any column of a dimension table or (as a shortcut for the primary 

1165 key column of a dimension table) dimension name. See 

1166 :ref:`daf_butler_dimension_expressions` for more information. 

1167 deduplicate : `bool`, optional 

1168 If `True` (`False` is default), for each result data ID, only 

1169 yield one `DatasetRef` of each `DatasetType`, from the first 

1170 collection in which a dataset of that dataset type appears 

1171 (according to the order of ``collections`` passed in). If `True`, 

1172 ``collections`` must not contain regular expressions and may not 

1173 be `...`. 

1174 expand : `bool`, optional 

1175 If `True` (default) attach `ExpandedDataCoordinate` instead of 

1176 minimal `DataCoordinate` base-class instances. 

1177 components : `bool`, optional 

1178 If `True`, apply all dataset expression patterns to component 

1179 dataset type names as well. If `False`, never apply patterns to 

1180 components. If `None` (default), apply patterns to components only 

1181 if their parent datasets were not matched by the expression. 

1182 Fully-specified component datasets (`str` or `DatasetType` 

1183 instances) are always included. 

1184 **kwargs 

1185 Additional keyword arguments are forwarded to 

1186 `DataCoordinate.standardize` when processing the ``dataId`` 

1187 argument (and may be used to provide a constraining data ID even 

1188 when the ``dataId`` argument is `None`). 

1189 

1190 Yields 

1191 ------ 

1192 ref : `DatasetRef` 

1193 Dataset references matching the given query criteria. These 

1194 are grouped by `DatasetType` if the query evaluates to multiple 

1195 dataset types, but order is otherwise unspecified. 

1196 

1197 Raises 

1198 ------ 

1199 TypeError 

1200 Raised when the arguments are incompatible, such as when a 

1201 collection wildcard is passed when ``deduplicate`` is `True`. 

1202 

1203 Notes 

1204 ----- 

1205 When multiple dataset types are queried in a single call, the 

1206 results of this operation are equivalent to querying for each dataset 

1207 type separately in turn, and no information about the relationships 

1208 between datasets of different types is included. In contexts where 

1209 that kind of information is important, the recommended pattern is to 

1210 use `queryDimensions` to first obtain data IDs (possibly with the 

1211 desired dataset types and collections passed as constraints to the 

1212 query), and then use multiple (generally much simpler) calls to 

1213 `queryDatasets` with the returned data IDs passed as constraints. 

1214 """ 

1215 # Standardize the collections expression. 

1216 if deduplicate: 

1217 collections = CollectionSearch.fromExpression(collections) 

1218 else: 

1219 collections = CollectionQuery.fromExpression(collections) 

1220 # Standardize and expand the data ID provided as a constraint. 

1221 standardizedDataId = self.expandDataId(dataId, **kwargs) 

1222 

1223 # We can only query directly if given a non-component DatasetType 

1224 # instance. If we were given an expression or str or a component 

1225 # DatasetType instance, we'll populate this dict, recurse, and return. 

1226 # If we already have a non-component DatasetType, it will remain None 

1227 # and we'll run the query directly. 

1228 composition: Optional[ 

1229 Dict[ 

1230 DatasetType, # parent dataset type 

1231 List[Optional[str]] # component name, or None for parent 

1232 ] 

1233 ] = None 

1234 if not isinstance(datasetType, DatasetType): 

1235 # We were given a dataset type expression (which may be as simple 

1236 # as a str). Loop over all matching datasets, delegating handling 

1237 # of the `components` argument to queryDatasetTypes, as we populate 

1238 # the composition dict. 

1239 composition = defaultdict(list) 

1240 for trueDatasetType in self.queryDatasetTypes(datasetType, components=components): 

1241 parentName, componentName = trueDatasetType.nameAndComponent() 

1242 if componentName is not None: 

1243 parentDatasetType = self.getDatasetType(parentName) 

1244 composition.setdefault(parentDatasetType, []).append(componentName) 

1245 else: 

1246 composition.setdefault(trueDatasetType, []).append(None) 

1247 elif datasetType.isComponent(): 

1248 # We were given a true DatasetType instance, but it's a component. 

1249 # the composition dict will have exactly one item. 

1250 parentName, componentName = datasetType.nameAndComponent() 

1251 parentDatasetType = self.getDatasetType(parentName) 

1252 composition = {parentDatasetType: [componentName]} 

1253 if composition is not None: 

1254 # We need to recurse. Do that once for each parent dataset type. 

1255 for parentDatasetType, componentNames in composition.items(): 

1256 for parentRef in self.queryDatasets(parentDatasetType, collections=collections, 

1257 dimensions=dimensions, dataId=standardizedDataId, 

1258 where=where, deduplicate=deduplicate): 

1259 # Loop over components, yielding one for each one for each 

1260 # one requested. 

1261 for componentName in componentNames: 

1262 if componentName is None: 

1263 yield parentRef 

1264 else: 

1265 yield parentRef.makeComponentRef(componentName) 

1266 return 

1267 # If we get here, there's no need to recurse (or we are already 

1268 # recursing; there can only ever be one level of recursion). 

1269 

1270 # The full set of dimensions in the query is the combination of those 

1271 # needed for the DatasetType and those explicitly requested, if any. 

1272 requestedDimensionNames = set(datasetType.dimensions.names) 

1273 if dimensions is not None: 

1274 requestedDimensionNames.update(self.dimensions.extract(dimensions).names) 

1275 # Construct the summary structure needed to construct a QueryBuilder. 

1276 summary = QuerySummary( 

1277 requested=DimensionGraph(self.dimensions, names=requestedDimensionNames), 

1278 dataId=standardizedDataId, 

1279 expression=where, 

1280 ) 

1281 builder = self.makeQueryBuilder(summary) 

1282 # Add the dataset subquery to the query, telling the QueryBuilder to 

1283 # include the rank of the selected collection in the results only if we 

1284 # need to deduplicate. Note that if any of the collections are 

1285 # actually wildcard expressions, and we've asked for deduplication, 

1286 # this will raise TypeError for us. 

1287 if not builder.joinDataset(datasetType, collections, isResult=True, addRank=deduplicate): 

1288 return 

1289 query = builder.finish() 

1290 predicate = query.predicate() 

1291 if not deduplicate: 

1292 # No need to de-duplicate across collections. 

1293 for row in self._db.query(query.sql): 

1294 if predicate(row): 

1295 dataId = query.extractDataId(row, graph=datasetType.dimensions) 

1296 if expand: 

1297 dataId = self.expandDataId(dataId, records=standardizedDataId.records) 

1298 yield query.extractDatasetRef(row, datasetType, dataId)[0] 

1299 else: 

1300 # For each data ID, yield only the DatasetRef with the lowest 

1301 # collection rank. 

1302 bestRefs = {} 

1303 bestRanks: Dict[DataCoordinate, int] = {} 

1304 for row in self._db.query(query.sql): 

1305 if predicate(row): 

1306 ref, rank = query.extractDatasetRef(row, datasetType) 

1307 bestRank = bestRanks.get(ref.dataId, sys.maxsize) 

1308 assert rank is not None 

1309 if rank < bestRank: 

1310 bestRefs[ref.dataId] = ref 

1311 bestRanks[ref.dataId] = rank 

1312 # If caller requested expanded data IDs, we defer that until here 

1313 # so we do as little expansion as possible. 

1314 if expand: 

1315 for ref in bestRefs.values(): 

1316 dataId = self.expandDataId(ref.dataId, records=standardizedDataId.records) 

1317 yield ref.expanded(dataId) 

1318 else: 

1319 yield from bestRefs.values() 

1320 

1321 storageClasses: StorageClassFactory 

1322 """All storage classes known to the registry (`StorageClassFactory`). 

1323 """