Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "Registry", 

26) 

27 

28from collections import defaultdict 

29import contextlib 

30import sys 

31from typing import ( 

32 Any, 

33 Dict, 

34 Iterable, 

35 Iterator, 

36 List, 

37 Mapping, 

38 Optional, 

39 Set, 

40 Type, 

41 TYPE_CHECKING, 

42 Union, 

43) 

44 

45import sqlalchemy 

46 

47from ..core import ( 

48 Config, 

49 DataCoordinate, 

50 DataId, 

51 DatasetRef, 

52 DatasetType, 

53 ddl, 

54 Dimension, 

55 DimensionElement, 

56 DimensionGraph, 

57 DimensionRecord, 

58 DimensionUniverse, 

59 ExpandedDataCoordinate, 

60 NamedKeyDict, 

61 StorageClassFactory, 

62) 

63from ..core.utils import doImport, iterable, transactional 

64from ._config import RegistryConfig 

65from .queries import ( 

66 QueryBuilder, 

67 QuerySummary, 

68) 

69from ._collectionType import CollectionType 

70from ._exceptions import ConflictingDefinitionError, InconsistentDataIdError, OrphanedRecordError 

71from .wildcards import CategorizedWildcard, CollectionQuery, CollectionSearch, Ellipsis 

72from .interfaces import ChainedCollectionRecord, RunRecord 

73from .versions import ButlerVersionsManager 

74 

75if TYPE_CHECKING: 75 ↛ 76line 75 didn't jump to line 76, because the condition on line 75 was never true

76 from ..butlerConfig import ButlerConfig 

77 from .interfaces import ( 

78 ButlerAttributeManager, 

79 CollectionManager, 

80 Database, 

81 OpaqueTableStorageManager, 

82 DimensionRecordStorageManager, 

83 DatasetRecordStorageManager, 

84 DatastoreRegistryBridgeManager, 

85 ) 

86 

87 

88class Registry: 

89 """Registry interface. 

90 

91 Parameters 

92 ---------- 

93 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

94 Registry configuration 

95 """ 

96 

97 defaultConfigFile = None 

98 """Path to configuration defaults. Relative to $DAF_BUTLER_DIR/config or 

99 absolute path. Can be None if no defaults specified. 

100 """ 

101 

102 @classmethod 

103 def fromConfig(cls, config: Union[ButlerConfig, RegistryConfig, Config, str], create: bool = False, 

104 butlerRoot: Optional[str] = None, writeable: bool = True) -> Registry: 

105 """Create `Registry` subclass instance from `config`. 

106 

107 Uses ``registry.cls`` from `config` to determine which subclass to 

108 instantiate. 

109 

110 Parameters 

111 ---------- 

112 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

113 Registry configuration 

114 create : `bool`, optional 

115 Assume empty Registry and create a new one. 

116 butlerRoot : `str`, optional 

117 Path to the repository root this `Registry` will manage. 

118 writeable : `bool`, optional 

119 If `True` (default) create a read-write connection to the database. 

120 

121 Returns 

122 ------- 

123 registry : `Registry` (subclass) 

124 A new `Registry` subclass instance. 

125 """ 

126 if not isinstance(config, RegistryConfig): 

127 if isinstance(config, str) or isinstance(config, Config): 

128 config = RegistryConfig(config) 

129 else: 

130 raise ValueError("Incompatible Registry configuration: {}".format(config)) 

131 config.replaceRoot(butlerRoot) 

132 DatabaseClass = config.getDatabaseClass() 

133 database = DatabaseClass.fromUri(str(config.connectionString), origin=config.get("origin", 0), 

134 namespace=config.get("namespace"), writeable=writeable) 

135 universe = DimensionUniverse(config) 

136 attributes = doImport(config["managers", "attributes"]) 

137 opaque = doImport(config["managers", "opaque"]) 

138 dimensions = doImport(config["managers", "dimensions"]) 

139 collections = doImport(config["managers", "collections"]) 

140 datasets = doImport(config["managers", "datasets"]) 

141 datastoreBridges = doImport(config["managers", "datastores"]) 

142 versions = ButlerVersionsManager.fromConfig(config.get("schema_versions")) 

143 

144 return cls(database, universe, dimensions=dimensions, attributes=attributes, opaque=opaque, 

145 collections=collections, datasets=datasets, datastoreBridges=datastoreBridges, 

146 versions=versions, writeable=writeable, create=create) 

147 

148 def __init__(self, database: Database, universe: DimensionUniverse, *, 

149 attributes: Type[ButlerAttributeManager], 

150 opaque: Type[OpaqueTableStorageManager], 

151 dimensions: Type[DimensionRecordStorageManager], 

152 collections: Type[CollectionManager], 

153 datasets: Type[DatasetRecordStorageManager], 

154 datastoreBridges: Type[DatastoreRegistryBridgeManager], 

155 versions: ButlerVersionsManager, 

156 writeable: bool = True, 

157 create: bool = False): 

158 self._db = database 

159 self.storageClasses = StorageClassFactory() 

160 with self._db.declareStaticTables(create=create) as context: 

161 self._attributes = attributes.initialize(self._db, context) 

162 self._dimensions = dimensions.initialize(self._db, context, universe=universe) 

163 self._collections = collections.initialize(self._db, context) 

164 self._datasets = datasets.initialize(self._db, context, 

165 collections=self._collections, 

166 universe=self.dimensions) 

167 self._opaque = opaque.initialize(self._db, context) 

168 self._datastoreBridges = datastoreBridges.initialize(self._db, context, 

169 opaque=self._opaque, 

170 datasets=datasets, 

171 universe=self.dimensions) 

172 context.addInitializer(lambda db: versions.storeVersions(self._attributes)) 

173 

174 # This call does not do anything right now as we do not have a way to 

175 # split tables between sub-schemas yet. 

176 versions.checkVersionDigests() 

177 if not create: 

178 # verify that configured versions are compatible with schema 

179 versions.checkStoredVersions(self._attributes, writeable) 

180 

181 self._collections.refresh() 

182 self._datasets.refresh(universe=self._dimensions.universe) 

183 

184 def __str__(self) -> str: 

185 return str(self._db) 

186 

187 def __repr__(self) -> str: 

188 return f"Registry({self._db!r}, {self.dimensions!r})" 

189 

190 def isWriteable(self) -> bool: 

191 """Return `True` if this registry allows write operations, and `False` 

192 otherwise. 

193 """ 

194 return self._db.isWriteable() 

195 

196 @property 

197 def dimensions(self) -> DimensionUniverse: 

198 """All dimensions recognized by this `Registry` (`DimensionUniverse`). 

199 """ 

200 return self._dimensions.universe 

201 

202 @contextlib.contextmanager 

203 def transaction(self) -> Iterator[None]: 

204 """Return a context manager that represents a transaction. 

205 """ 

206 # TODO make savepoint=False the default. 

207 try: 

208 with self._db.transaction(): 

209 yield 

210 except BaseException: 

211 # TODO: this clears the caches sometimes when we wouldn't actually 

212 # need to. Can we avoid that? 

213 self._dimensions.clearCaches() 

214 raise 

215 

216 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

217 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

218 other data repository client. 

219 

220 Opaque table records can be added via `insertOpaqueData`, retrieved via 

221 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

222 

223 Parameters 

224 ---------- 

225 tableName : `str` 

226 Logical name of the opaque table. This may differ from the 

227 actual name used in the database by a prefix and/or suffix. 

228 spec : `ddl.TableSpec` 

229 Specification for the table to be added. 

230 """ 

231 self._opaque.register(tableName, spec) 

232 

233 @transactional 

234 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

235 """Insert records into an opaque table. 

236 

237 Parameters 

238 ---------- 

239 tableName : `str` 

240 Logical name of the opaque table. Must match the name used in a 

241 previous call to `registerOpaqueTable`. 

242 data 

243 Each additional positional argument is a dictionary that represents 

244 a single row to be added. 

245 """ 

246 self._opaque[tableName].insert(*data) 

247 

248 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[dict]: 

249 """Retrieve records from an opaque table. 

250 

251 Parameters 

252 ---------- 

253 tableName : `str` 

254 Logical name of the opaque table. Must match the name used in a 

255 previous call to `registerOpaqueTable`. 

256 where 

257 Additional keyword arguments are interpreted as equality 

258 constraints that restrict the returned rows (combined with AND); 

259 keyword arguments are column names and values are the values they 

260 must have. 

261 

262 Yields 

263 ------ 

264 row : `dict` 

265 A dictionary representing a single result row. 

266 """ 

267 yield from self._opaque[tableName].fetch(**where) 

268 

269 @transactional 

270 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

271 """Remove records from an opaque table. 

272 

273 Parameters 

274 ---------- 

275 tableName : `str` 

276 Logical name of the opaque table. Must match the name used in a 

277 previous call to `registerOpaqueTable`. 

278 where 

279 Additional keyword arguments are interpreted as equality 

280 constraints that restrict the deleted rows (combined with AND); 

281 keyword arguments are column names and values are the values they 

282 must have. 

283 """ 

284 self._opaque[tableName].delete(**where) 

285 

286 def registerCollection(self, name: str, type: CollectionType = CollectionType.TAGGED) -> None: 

287 """Add a new collection if one with the given name does not exist. 

288 

289 Parameters 

290 ---------- 

291 name : `str` 

292 The name of the collection to create. 

293 type : `CollectionType` 

294 Enum value indicating the type of collection to create. 

295 

296 Notes 

297 ----- 

298 This method cannot be called within transactions, as it needs to be 

299 able to perform its own transaction to be concurrent. 

300 """ 

301 self._collections.register(name, type) 

302 

303 def getCollectionType(self, name: str) -> CollectionType: 

304 """Return an enumeration value indicating the type of the given 

305 collection. 

306 

307 Parameters 

308 ---------- 

309 name : `str` 

310 The name of the collection. 

311 

312 Returns 

313 ------- 

314 type : `CollectionType` 

315 Enum value indicating the type of this collection. 

316 

317 Raises 

318 ------ 

319 MissingCollectionError 

320 Raised if no collection with the given name exists. 

321 """ 

322 return self._collections.find(name).type 

323 

324 def registerRun(self, name: str) -> None: 

325 """Add a new run if one with the given name does not exist. 

326 

327 Parameters 

328 ---------- 

329 name : `str` 

330 The name of the run to create. 

331 

332 Notes 

333 ----- 

334 This method cannot be called within transactions, as it needs to be 

335 able to perform its own transaction to be concurrent. 

336 """ 

337 self._collections.register(name, CollectionType.RUN) 

338 

339 @transactional 

340 def removeCollection(self, name: str) -> None: 

341 """Completely remove the given collection. 

342 

343 Parameters 

344 ---------- 

345 name : `str` 

346 The name of the collection to remove. 

347 

348 Raises 

349 ------ 

350 MissingCollectionError 

351 Raised if no collection with the given name exists. 

352 

353 Notes 

354 ----- 

355 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

356 in it are also fully removed. This requires that those datasets be 

357 removed (or at least trashed) from any datastores that hold them first. 

358 

359 A collection may not be deleted as long as it is referenced by a 

360 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

361 be deleted or redefined first. 

362 """ 

363 self._collections.remove(name) 

364 

365 def getCollectionChain(self, parent: str) -> CollectionSearch: 

366 """Return the child collections in a `~CollectionType.CHAINED` 

367 collection. 

368 

369 Parameters 

370 ---------- 

371 parent : `str` 

372 Name of the chained collection. Must have already been added via 

373 a call to `Registry.registerCollection`. 

374 

375 Returns 

376 ------- 

377 children : `CollectionSearch` 

378 An object that defines the search path of the collection. 

379 See :ref:`daf_butler_collection_expressions` for more information. 

380 

381 Raises 

382 ------ 

383 MissingCollectionError 

384 Raised if ``parent`` does not exist in the `Registry`. 

385 TypeError 

386 Raised if ``parent`` does not correspond to a 

387 `~CollectionType.CHAINED` collection. 

388 """ 

389 record = self._collections.find(parent) 

390 if record.type is not CollectionType.CHAINED: 

391 raise TypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

392 assert isinstance(record, ChainedCollectionRecord) 

393 return record.children 

394 

395 @transactional 

396 def setCollectionChain(self, parent: str, children: Any) -> None: 

397 """Define or redefine a `~CollectionType.CHAINED` collection. 

398 

399 Parameters 

400 ---------- 

401 parent : `str` 

402 Name of the chained collection. Must have already been added via 

403 a call to `Registry.registerCollection`. 

404 children : `Any` 

405 An expression defining an ordered search of child collections, 

406 generally an iterable of `str`. Restrictions on the dataset types 

407 to be searched can also be included, by passing mapping or an 

408 iterable containing tuples; see 

409 :ref:`daf_butler_collection_expressions` for more information. 

410 

411 Raises 

412 ------ 

413 MissingCollectionError 

414 Raised when any of the given collections do not exist in the 

415 `Registry`. 

416 TypeError 

417 Raised if ``parent`` does not correspond to a 

418 `~CollectionType.CHAINED` collection. 

419 ValueError 

420 Raised if the given collections contains a cycle. 

421 """ 

422 record = self._collections.find(parent) 

423 if record.type is not CollectionType.CHAINED: 

424 raise TypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

425 assert isinstance(record, ChainedCollectionRecord) 

426 children = CollectionSearch.fromExpression(children) 

427 if children != record.children: 

428 record.update(self._collections, children) 

429 

430 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

431 """ 

432 Add a new `DatasetType` to the Registry. 

433 

434 It is not an error to register the same `DatasetType` twice. 

435 

436 Parameters 

437 ---------- 

438 datasetType : `DatasetType` 

439 The `DatasetType` to be added. 

440 

441 Returns 

442 ------- 

443 inserted : `bool` 

444 `True` if ``datasetType`` was inserted, `False` if an identical 

445 existing `DatsetType` was found. Note that in either case the 

446 DatasetType is guaranteed to be defined in the Registry 

447 consistently with the given definition. 

448 

449 Raises 

450 ------ 

451 ValueError 

452 Raised if the dimensions or storage class are invalid. 

453 ConflictingDefinitionError 

454 Raised if this DatasetType is already registered with a different 

455 definition. 

456 

457 Notes 

458 ----- 

459 This method cannot be called within transactions, as it needs to be 

460 able to perform its own transaction to be concurrent. 

461 """ 

462 _, inserted = self._datasets.register(datasetType) 

463 return inserted 

464 

465 def getDatasetType(self, name: str) -> DatasetType: 

466 """Get the `DatasetType`. 

467 

468 Parameters 

469 ---------- 

470 name : `str` 

471 Name of the type. 

472 

473 Returns 

474 ------- 

475 type : `DatasetType` 

476 The `DatasetType` associated with the given name. 

477 

478 Raises 

479 ------ 

480 KeyError 

481 Requested named DatasetType could not be found in registry. 

482 """ 

483 storage = self._datasets.find(name) 

484 if storage is None: 

485 raise KeyError(f"DatasetType '{name}' could not be found.") 

486 return storage.datasetType 

487 

488 def findDataset(self, datasetType: Union[DatasetType, str], dataId: Optional[DataId] = None, *, 

489 collections: Any, **kwargs: Any) -> Optional[DatasetRef]: 

490 """Find a dataset given its `DatasetType` and data ID. 

491 

492 This can be used to obtain a `DatasetRef` that permits the dataset to 

493 be read from a `Datastore`. If the dataset is a component and can not 

494 be found using the provided dataset type, a dataset ref for the parent 

495 will be returned instead but with the correct dataset type. 

496 

497 Parameters 

498 ---------- 

499 datasetType : `DatasetType` or `str` 

500 A `DatasetType` or the name of one. 

501 dataId : `dict` or `DataCoordinate`, optional 

502 A `dict`-like object containing the `Dimension` links that identify 

503 the dataset within a collection. 

504 collections 

505 An expression that fully or partially identifies the collections 

506 to search for the dataset, such as a `str`, `re.Pattern`, or 

507 iterable thereof. `...` can be used to return all collections. 

508 See :ref:`daf_butler_collection_expressions` for more information. 

509 **kwargs 

510 Additional keyword arguments passed to 

511 `DataCoordinate.standardize` to convert ``dataId`` to a true 

512 `DataCoordinate` or augment an existing one. 

513 

514 Returns 

515 ------- 

516 ref : `DatasetRef` 

517 A reference to the dataset, or `None` if no matching Dataset 

518 was found. 

519 

520 Raises 

521 ------ 

522 LookupError 

523 Raised if one or more data ID keys are missing or the dataset type 

524 does not exist. 

525 MissingCollectionError 

526 Raised if any of ``collections`` does not exist in the registry. 

527 """ 

528 if isinstance(datasetType, DatasetType): 

529 storage = self._datasets.find(datasetType.name) 

530 if storage is None: 

531 raise LookupError(f"DatasetType '{datasetType}' has not been registered.") 

532 else: 

533 storage = self._datasets.find(datasetType) 

534 if storage is None: 

535 raise LookupError(f"DatasetType with name '{datasetType}' has not been registered.") 

536 dataId = DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions, 

537 universe=self.dimensions, **kwargs) 

538 collections = CollectionSearch.fromExpression(collections) 

539 for collectionRecord in collections.iter(self._collections, datasetType=storage.datasetType): 

540 result = storage.find(collectionRecord, dataId) 

541 if result is not None: 

542 return result 

543 

544 return None 

545 

546 @transactional 

547 def insertDatasets(self, datasetType: Union[DatasetType, str], dataIds: Iterable[DataId], 

548 run: str) -> List[DatasetRef]: 

549 """Insert one or more datasets into the `Registry` 

550 

551 This always adds new datasets; to associate existing datasets with 

552 a new collection, use ``associate``. 

553 

554 Parameters 

555 ---------- 

556 datasetType : `DatasetType` or `str` 

557 A `DatasetType` or the name of one. 

558 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

559 Dimension-based identifiers for the new datasets. 

560 run : `str` 

561 The name of the run that produced the datasets. 

562 

563 Returns 

564 ------- 

565 refs : `list` of `DatasetRef` 

566 Resolved `DatasetRef` instances for all given data IDs (in the same 

567 order). 

568 

569 Raises 

570 ------ 

571 ConflictingDefinitionError 

572 If a dataset with the same dataset type and data ID as one of those 

573 given already exists in ``run``. 

574 MissingCollectionError 

575 Raised if ``run`` does not exist in the registry. 

576 """ 

577 if isinstance(datasetType, DatasetType): 

578 storage = self._datasets.find(datasetType.name) 

579 if storage is None: 

580 raise LookupError(f"DatasetType '{datasetType}' has not been registered.") 

581 else: 

582 storage = self._datasets.find(datasetType) 

583 if storage is None: 

584 raise LookupError(f"DatasetType with name '{datasetType}' has not been registered.") 

585 runRecord = self._collections.find(run) 

586 if runRecord.type is not CollectionType.RUN: 

587 raise TypeError("Given collection is of type {runRecord.type.name}; RUN collection required.") 

588 assert isinstance(runRecord, RunRecord) 

589 expandedDataIds = [self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

590 for dataId in dataIds] 

591 try: 

592 refs = list(storage.insert(runRecord, expandedDataIds)) 

593 except sqlalchemy.exc.IntegrityError as err: 

594 raise ConflictingDefinitionError(f"A database constraint failure was triggered by inserting " 

595 f"one or more datasets of type {storage.datasetType} into " 

596 f"collection '{run}'. " 

597 f"This probably means a dataset with the same data ID " 

598 f"and dataset type already exists, but it may also mean a " 

599 f"dimension row is missing.") from err 

600 return refs 

601 

602 def getDataset(self, id: int) -> Optional[DatasetRef]: 

603 """Retrieve a Dataset entry. 

604 

605 Parameters 

606 ---------- 

607 id : `int` 

608 The unique identifier for the dataset. 

609 

610 Returns 

611 ------- 

612 ref : `DatasetRef` or `None` 

613 A ref to the Dataset, or `None` if no matching Dataset 

614 was found. 

615 """ 

616 ref = self._datasets.getDatasetRef(id, universe=self.dimensions) 

617 if ref is None: 

618 return None 

619 return ref 

620 

621 @transactional 

622 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

623 """Remove datasets from the Registry. 

624 

625 The datasets will be removed unconditionally from all collections, and 

626 any `Quantum` that consumed this dataset will instead be marked with 

627 having a NULL input. `Datastore` records will *not* be deleted; the 

628 caller is responsible for ensuring that the dataset has already been 

629 removed from all Datastores. 

630 

631 Parameters 

632 ---------- 

633 refs : `Iterable` of `DatasetRef` 

634 References to the datasets to be removed. Must include a valid 

635 ``id`` attribute, and should be considered invalidated upon return. 

636 

637 Raises 

638 ------ 

639 AmbiguousDatasetError 

640 Raised if any ``ref.id`` is `None`. 

641 OrphanedRecordError 

642 Raised if any dataset is still present in any `Datastore`. 

643 """ 

644 for datasetType, refsForType in DatasetRef.groupByType(refs).items(): 

645 storage = self._datasets.find(datasetType.name) 

646 assert storage is not None 

647 try: 

648 storage.delete(refsForType) 

649 except sqlalchemy.exc.IntegrityError as err: 

650 raise OrphanedRecordError("One or more datasets is still " 

651 "present in one or more Datastores.") from err 

652 

653 @transactional 

654 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

655 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

656 

657 If a DatasetRef with the same exact integer ID is already in a 

658 collection nothing is changed. If a `DatasetRef` with the same 

659 `DatasetType` and data ID but with different integer ID 

660 exists in the collection, `ConflictingDefinitionError` is raised. 

661 

662 Parameters 

663 ---------- 

664 collection : `str` 

665 Indicates the collection the datasets should be associated with. 

666 refs : `Iterable` [ `DatasetRef` ] 

667 An iterable of resolved `DatasetRef` instances that already exist 

668 in this `Registry`. 

669 

670 Raises 

671 ------ 

672 ConflictingDefinitionError 

673 If a Dataset with the given `DatasetRef` already exists in the 

674 given collection. 

675 AmbiguousDatasetError 

676 Raised if ``any(ref.id is None for ref in refs)``. 

677 MissingCollectionError 

678 Raised if ``collection`` does not exist in the registry. 

679 TypeError 

680 Raise adding new datasets to the given ``collection`` is not 

681 allowed. 

682 """ 

683 collectionRecord = self._collections.find(collection) 

684 if collectionRecord.type is not CollectionType.TAGGED: 

685 raise TypeError(f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED.") 

686 for datasetType, refsForType in DatasetRef.groupByType(refs).items(): 

687 storage = self._datasets.find(datasetType.name) 

688 assert storage is not None 

689 try: 

690 storage.associate(collectionRecord, refsForType) 

691 except sqlalchemy.exc.IntegrityError as err: 

692 raise ConflictingDefinitionError( 

693 f"Constraint violation while associating dataset of type {datasetType.name} with " 

694 f"collection {collection}. This probably means that one or more datasets with the same " 

695 f"dataset type and data ID already exist in the collection, but it may also indicate " 

696 f"that the datasets do not exist." 

697 ) from err 

698 

699 @transactional 

700 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

701 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

702 

703 ``collection`` and ``ref`` combinations that are not currently 

704 associated are silently ignored. 

705 

706 Parameters 

707 ---------- 

708 collection : `str` 

709 The collection the datasets should no longer be associated with. 

710 refs : `Iterable` [ `DatasetRef` ] 

711 An iterable of resolved `DatasetRef` instances that already exist 

712 in this `Registry`. 

713 

714 Raises 

715 ------ 

716 AmbiguousDatasetError 

717 Raised if any of the given dataset references is unresolved. 

718 MissingCollectionError 

719 Raised if ``collection`` does not exist in the registry. 

720 TypeError 

721 Raise adding new datasets to the given ``collection`` is not 

722 allowed. 

723 """ 

724 collectionRecord = self._collections.find(collection) 

725 if collectionRecord.type is not CollectionType.TAGGED: 

726 raise TypeError(f"Collection '{collection}' has type {collectionRecord.type.name}; " 

727 "expected TAGGED.") 

728 for datasetType, refsForType in DatasetRef.groupByType(refs).items(): 

729 storage = self._datasets.find(datasetType.name) 

730 assert storage is not None 

731 storage.disassociate(collectionRecord, refsForType) 

732 

733 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

734 """Return an object that allows a new `Datastore` instance to 

735 communicate with this `Registry`. 

736 

737 Returns 

738 ------- 

739 manager : `DatastoreRegistryBridgeManager` 

740 Object that mediates communication between this `Registry` and its 

741 associated datastores. 

742 """ 

743 return self._datastoreBridges 

744 

745 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

746 """Retrieve datastore locations for a given dataset. 

747 

748 Parameters 

749 ---------- 

750 ref : `DatasetRef` 

751 A reference to the dataset for which to retrieve storage 

752 information. 

753 

754 Returns 

755 ------- 

756 datastores : `Iterable` [ `str` ] 

757 All the matching datastores holding this dataset. 

758 

759 Raises 

760 ------ 

761 AmbiguousDatasetError 

762 Raised if ``ref.id`` is `None`. 

763 """ 

764 return self._datastoreBridges.findDatastores(ref) 

765 

766 def expandDataId(self, dataId: Optional[DataId] = None, *, graph: Optional[DimensionGraph] = None, 

767 records: Optional[Mapping[DimensionElement, Optional[DimensionRecord]]] = None, 

768 **kwargs: Any) -> ExpandedDataCoordinate: 

769 """Expand a dimension-based data ID to include additional information. 

770 

771 Parameters 

772 ---------- 

773 dataId : `DataCoordinate` or `dict`, optional 

774 Data ID to be expanded; augmented and overridden by ``kwds``. 

775 graph : `DimensionGraph`, optional 

776 Set of dimensions for the expanded ID. If `None`, the dimensions 

777 will be inferred from the keys of ``dataId`` and ``kwds``. 

778 Dimensions that are in ``dataId`` or ``kwds`` but not in ``graph`` 

779 are silently ignored, providing a way to extract and expand a 

780 subset of a data ID. 

781 records : `Mapping` [`DimensionElement`, `DimensionRecord`], optional 

782 Dimension record data to use before querying the database for that 

783 data. 

784 **kwargs 

785 Additional keywords are treated like additional key-value pairs for 

786 ``dataId``, extending and overriding 

787 

788 Returns 

789 ------- 

790 expanded : `ExpandedDataCoordinate` 

791 A data ID that includes full metadata for all of the dimensions it 

792 identifieds. 

793 """ 

794 standardized = DataCoordinate.standardize(dataId, graph=graph, universe=self.dimensions, **kwargs) 

795 if isinstance(standardized, ExpandedDataCoordinate): 

796 return standardized 

797 elif isinstance(dataId, ExpandedDataCoordinate): 

798 records = NamedKeyDict(records) if records is not None else NamedKeyDict() 

799 records.update(dataId.records) 

800 else: 

801 records = NamedKeyDict(records) if records is not None else NamedKeyDict() 

802 keys = dict(standardized.byName()) 

803 for element in standardized.graph.primaryKeyTraversalOrder: 

804 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

805 if record is ...: 

806 storage = self._dimensions[element] 

807 record = storage.fetch(keys) 

808 records[element] = record 

809 if record is not None: 

810 for d in element.implied: 

811 value = getattr(record, d.name) 

812 if keys.setdefault(d.name, value) != value: 

813 raise InconsistentDataIdError( 

814 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

815 f"but {element.name} implies {d.name}={value!r}." 

816 ) 

817 else: 

818 if element in standardized.graph.required: 

819 raise LookupError( 

820 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

821 ) 

822 if element.alwaysJoin: 

823 raise InconsistentDataIdError( 

824 f"Could not fetch record for element {element.name} via keys {keys}, ", 

825 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

826 "related." 

827 ) 

828 records.update((d, None) for d in element.implied) 

829 return ExpandedDataCoordinate(standardized.graph, standardized.values(), records=records) 

830 

831 def insertDimensionData(self, element: Union[DimensionElement, str], 

832 *data: Union[Mapping[str, Any], DimensionRecord], 

833 conform: bool = True) -> None: 

834 """Insert one or more dimension records into the database. 

835 

836 Parameters 

837 ---------- 

838 element : `DimensionElement` or `str` 

839 The `DimensionElement` or name thereof that identifies the table 

840 records will be inserted into. 

841 data : `dict` or `DimensionRecord` (variadic) 

842 One or more records to insert. 

843 conform : `bool`, optional 

844 If `False` (`True` is default) perform no checking or conversions, 

845 and assume that ``element`` is a `DimensionElement` instance and 

846 ``data`` is a one or more `DimensionRecord` instances of the 

847 appropriate subclass. 

848 """ 

849 if conform: 

850 if isinstance(element, str): 

851 element = self.dimensions[element] 

852 records = [row if isinstance(row, DimensionRecord) else element.RecordClass.fromDict(row) 

853 for row in data] 

854 else: 

855 # Ignore typing since caller said to trust them with conform=False. 

856 records = data # type: ignore 

857 storage = self._dimensions[element] # type: ignore 

858 storage.insert(*records) 

859 

860 def syncDimensionData(self, element: Union[DimensionElement, str], 

861 row: Union[Mapping[str, Any], DimensionRecord], 

862 conform: bool = True) -> bool: 

863 """Synchronize the given dimension record with the database, inserting 

864 if it does not already exist and comparing values if it does. 

865 

866 Parameters 

867 ---------- 

868 element : `DimensionElement` or `str` 

869 The `DimensionElement` or name thereof that identifies the table 

870 records will be inserted into. 

871 row : `dict` or `DimensionRecord` 

872 The record to insert. 

873 conform : `bool`, optional 

874 If `False` (`True` is default) perform no checking or conversions, 

875 and assume that ``element`` is a `DimensionElement` instance and 

876 ``data`` is a one or more `DimensionRecord` instances of the 

877 appropriate subclass. 

878 

879 Returns 

880 ------- 

881 inserted : `bool` 

882 `True` if a new row was inserted, `False` otherwise. 

883 

884 Raises 

885 ------ 

886 ConflictingDefinitionError 

887 Raised if the record exists in the database (according to primary 

888 key lookup) but is inconsistent with the given one. 

889 

890 Notes 

891 ----- 

892 This method cannot be called within transactions, as it needs to be 

893 able to perform its own transaction to be concurrent. 

894 """ 

895 if conform: 

896 if isinstance(element, str): 

897 element = self.dimensions[element] 

898 record = row if isinstance(row, DimensionRecord) else element.RecordClass.fromDict(row) 

899 else: 

900 # Ignore typing since caller said to trust them with conform=False. 

901 record = row # type: ignore 

902 storage = self._dimensions[element] # type: ignore 

903 return storage.sync(record) 

904 

905 def queryDatasetTypes(self, expression: Any = ..., *, components: Optional[bool] = None 

906 ) -> Iterator[DatasetType]: 

907 """Iterate over the dataset types whose names match an expression. 

908 

909 Parameters 

910 ---------- 

911 expression : `Any`, optional 

912 An expression that fully or partially identifies the dataset types 

913 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

914 `...` can be used to return all dataset types, and is the default. 

915 See :ref:`daf_butler_dataset_type_expressions` for more 

916 information. 

917 components : `bool`, optional 

918 If `True`, apply all expression patterns to component dataset type 

919 names as well. If `False`, never apply patterns to components. 

920 If `None` (default), apply patterns to components only if their 

921 parent datasets were not matched by the expression. 

922 Fully-specified component datasets (`str` or `DatasetType` 

923 instances) are always included. 

924 

925 Yields 

926 ------ 

927 datasetType : `DatasetType` 

928 A `DatasetType` instance whose name matches ``expression``. 

929 """ 

930 wildcard = CategorizedWildcard.fromExpression(expression, coerceUnrecognized=lambda d: d.name) 

931 if wildcard is Ellipsis: 

932 for datasetType in self._datasets: 

933 # The dataset type can no longer be a component 

934 yield datasetType 

935 if components and datasetType.isComposite(): 

936 # Automatically create the component dataset types 

937 for component in datasetType.makeAllComponentDatasetTypes(): 

938 yield component 

939 return 

940 done: Set[str] = set() 

941 for name in wildcard.strings: 

942 storage = self._datasets.find(name) 

943 if storage is not None: 

944 done.add(storage.datasetType.name) 

945 yield storage.datasetType 

946 if wildcard.patterns: 

947 # If components (the argument) is None, we'll save component 

948 # dataset that we might want to match, but only if their parents 

949 # didn't get included. 

950 componentsForLater = [] 

951 for registeredDatasetType in self._datasets: 

952 # Components are not stored in registry so expand them here 

953 allDatasetTypes = [registeredDatasetType] \ 

954 + registeredDatasetType.makeAllComponentDatasetTypes() 

955 for datasetType in allDatasetTypes: 

956 if datasetType.name in done: 

957 continue 

958 parentName, componentName = datasetType.nameAndComponent() 

959 if componentName is not None and not components: 

960 if components is None and parentName not in done: 

961 componentsForLater.append(datasetType) 

962 continue 

963 if any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

964 done.add(datasetType.name) 

965 yield datasetType 

966 # Go back and try to match saved components. 

967 for datasetType in componentsForLater: 

968 parentName, _ = datasetType.nameAndComponent() 

969 if parentName not in done and any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

970 yield datasetType 

971 

972 def queryCollections(self, expression: Any = ..., 

973 datasetType: Optional[DatasetType] = None, 

974 collectionType: Optional[CollectionType] = None, 

975 flattenChains: bool = False, 

976 includeChains: Optional[bool] = None) -> Iterator[str]: 

977 """Iterate over the collections whose names match an expression. 

978 

979 Parameters 

980 ---------- 

981 expression : `Any`, optional 

982 An expression that fully or partially identifies the collections 

983 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

984 `...` can be used to return all collections, and is the default. 

985 See :ref:`daf_butler_collection_expressions` for more 

986 information. 

987 datasetType : `DatasetType`, optional 

988 If provided, only yield collections that should be searched for 

989 this dataset type according to ``expression``. If this is 

990 not provided, any dataset type restrictions in ``expression`` are 

991 ignored. 

992 collectionType : `CollectionType`, optional 

993 If provided, only yield collections of this type. 

994 flattenChains : `bool`, optional 

995 If `True` (`False` is default), recursively yield the child 

996 collections of matching `~CollectionType.CHAINED` collections. 

997 includeChains : `bool`, optional 

998 If `True`, yield records for matching `~CollectionType.CHAINED` 

999 collections. Default is the opposite of ``flattenChains``: include 

1000 either CHAINED collections or their children, but not both. 

1001 

1002 Yields 

1003 ------ 

1004 collection : `str` 

1005 The name of a collection that matches ``expression``. 

1006 """ 

1007 query = CollectionQuery.fromExpression(expression) 

1008 for record in query.iter(self._collections, datasetType=datasetType, collectionType=collectionType, 

1009 flattenChains=flattenChains, includeChains=includeChains): 

1010 yield record.name 

1011 

1012 def makeQueryBuilder(self, summary: QuerySummary) -> QueryBuilder: 

1013 """Return a `QueryBuilder` instance capable of constructing and 

1014 managing more complex queries than those obtainable via `Registry` 

1015 interfaces. 

1016 

1017 This is an advanced interface; downstream code should prefer 

1018 `Registry.queryDimensions` and `Registry.queryDatasets` whenever those 

1019 are sufficient. 

1020 

1021 Parameters 

1022 ---------- 

1023 summary : `QuerySummary` 

1024 Object describing and categorizing the full set of dimensions that 

1025 will be included in the query. 

1026 

1027 Returns 

1028 ------- 

1029 builder : `QueryBuilder` 

1030 Object that can be used to construct and perform advanced queries. 

1031 """ 

1032 return QueryBuilder(summary=summary, 

1033 collections=self._collections, 

1034 dimensions=self._dimensions, 

1035 datasets=self._datasets) 

1036 

1037 def queryDimensions(self, dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], *, 

1038 dataId: Optional[DataId] = None, 

1039 datasets: Any = None, 

1040 collections: Any = None, 

1041 where: Optional[str] = None, 

1042 expand: bool = True, 

1043 components: Optional[bool] = None, 

1044 **kwargs: Any) -> Iterator[DataCoordinate]: 

1045 """Query for and iterate over data IDs matching user-provided criteria. 

1046 

1047 Parameters 

1048 ---------- 

1049 dimensions : `Dimension` or `str`, or iterable thereof 

1050 The dimensions of the data IDs to yield, as either `Dimension` 

1051 instances or `str`. Will be automatically expanded to a complete 

1052 `DimensionGraph`. 

1053 dataId : `dict` or `DataCoordinate`, optional 

1054 A data ID whose key-value pairs are used as equality constraints 

1055 in the query. 

1056 datasets : `Any`, optional 

1057 An expression that fully or partially identifies dataset types 

1058 that should constrain the yielded data IDs. For example, including 

1059 "raw" here would constrain the yielded ``instrument``, 

1060 ``exposure``, ``detector``, and ``physical_filter`` values to only 

1061 those for which at least one "raw" dataset exists in 

1062 ``collections``. Allowed types include `DatasetType`, `str`, 

1063 `re.Pattern`, and iterables thereof. Unlike other dataset type 

1064 expressions, `...` is not permitted - it doesn't make sense to 

1065 constrain data IDs on the existence of *all* datasets. 

1066 See :ref:`daf_butler_dataset_type_expressions` for more 

1067 information. 

1068 collections: `Any`, optional 

1069 An expression that fully or partially identifies the collections 

1070 to search for datasets, such as a `str`, `re.Pattern`, or iterable 

1071 thereof. `...` can be used to return all collections. Must be 

1072 provided if ``datasets`` is, and is ignored if it is not. See 

1073 :ref:`daf_butler_collection_expressions` for more information. 

1074 where : `str`, optional 

1075 A string expression similar to a SQL WHERE clause. May involve 

1076 any column of a dimension table or (as a shortcut for the primary 

1077 key column of a dimension table) dimension name. See 

1078 :ref:`daf_butler_dimension_expressions` for more information. 

1079 expand : `bool`, optional 

1080 If `True` (default) yield `ExpandedDataCoordinate` instead of 

1081 minimal `DataCoordinate` base-class instances. 

1082 components : `bool`, optional 

1083 If `True`, apply all dataset expression patterns to component 

1084 dataset type names as well. If `False`, never apply patterns to 

1085 components. If `None` (default), apply patterns to components only 

1086 if their parent datasets were not matched by the expression. 

1087 Fully-specified component datasets (`str` or `DatasetType` 

1088 instances) are always included. 

1089 **kwargs 

1090 Additional keyword arguments are forwarded to 

1091 `DataCoordinate.standardize` when processing the ``dataId`` 

1092 argument (and may be used to provide a constraining data ID even 

1093 when the ``dataId`` argument is `None`). 

1094 

1095 Yields 

1096 ------ 

1097 dataId : `DataCoordinate` 

1098 Data IDs matching the given query parameters. Order is 

1099 unspecified. 

1100 """ 

1101 dimensions = iterable(dimensions) 

1102 standardizedDataId = self.expandDataId(dataId, **kwargs) 

1103 standardizedDatasetTypes = set() 

1104 requestedDimensionNames = set(self.dimensions.extract(dimensions).names) 

1105 if datasets is not None: 

1106 if collections is None: 

1107 raise TypeError("Cannot pass 'datasets' without 'collections'.") 

1108 for datasetType in self.queryDatasetTypes(datasets, components=components): 

1109 requestedDimensionNames.update(datasetType.dimensions.names) 

1110 # If any matched dataset type is a component, just operate on 

1111 # its parent instead, because Registry doesn't know anything 

1112 # about what components exist, and here (unlike queryDatasets) 

1113 # we don't care about returning them. 

1114 parentDatasetTypeName, componentName = datasetType.nameAndComponent() 

1115 if componentName is not None: 

1116 datasetType = self.getDatasetType(parentDatasetTypeName) 

1117 standardizedDatasetTypes.add(datasetType) 

1118 # Preprocess collections expression in case the original included 

1119 # single-pass iterators (we'll want to use it multiple times 

1120 # below). 

1121 collections = CollectionQuery.fromExpression(collections) 

1122 

1123 summary = QuerySummary( 

1124 requested=DimensionGraph(self.dimensions, names=requestedDimensionNames), 

1125 dataId=standardizedDataId, 

1126 expression=where, 

1127 ) 

1128 builder = self.makeQueryBuilder(summary) 

1129 for datasetType in standardizedDatasetTypes: 

1130 builder.joinDataset(datasetType, collections, isResult=False) 

1131 query = builder.finish() 

1132 predicate = query.predicate() 

1133 for row in self._db.query(query.sql): 

1134 if predicate(row): 

1135 result = query.extractDataId(row) 

1136 if expand: 

1137 yield self.expandDataId(result, records=standardizedDataId.records) 

1138 else: 

1139 yield result 

1140 

1141 def queryDatasets(self, datasetType: Any, *, 

1142 collections: Any, 

1143 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1144 dataId: Optional[DataId] = None, 

1145 where: Optional[str] = None, 

1146 deduplicate: bool = False, 

1147 expand: bool = True, 

1148 components: Optional[bool] = None, 

1149 **kwargs: Any) -> Iterator[DatasetRef]: 

1150 """Query for and iterate over dataset references matching user-provided 

1151 criteria. 

1152 

1153 Parameters 

1154 ---------- 

1155 datasetType 

1156 An expression that fully or partially identifies the dataset types 

1157 to be queried. Allowed types include `DatasetType`, `str`, 

1158 `re.Pattern`, and iterables thereof. The special value `...` can 

1159 be used to query all dataset types. See 

1160 :ref:`daf_butler_dataset_type_expressions` for more information. 

1161 collections 

1162 An expression that fully or partially identifies the collections 

1163 to search for datasets, such as a `str`, `re.Pattern`, or iterable 

1164 thereof. `...` can be used to return all collections. See 

1165 :ref:`daf_butler_collection_expressions` for more information. 

1166 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

1167 Dimensions to include in the query (in addition to those used 

1168 to identify the queried dataset type(s)), either to constrain 

1169 the resulting datasets to those for which a matching dimension 

1170 exists, or to relate the dataset type's dimensions to dimensions 

1171 referenced by the ``dataId`` or ``where`` arguments. 

1172 dataId : `dict` or `DataCoordinate`, optional 

1173 A data ID whose key-value pairs are used as equality constraints 

1174 in the query. 

1175 where : `str`, optional 

1176 A string expression similar to a SQL WHERE clause. May involve 

1177 any column of a dimension table or (as a shortcut for the primary 

1178 key column of a dimension table) dimension name. See 

1179 :ref:`daf_butler_dimension_expressions` for more information. 

1180 deduplicate : `bool`, optional 

1181 If `True` (`False` is default), for each result data ID, only 

1182 yield one `DatasetRef` of each `DatasetType`, from the first 

1183 collection in which a dataset of that dataset type appears 

1184 (according to the order of ``collections`` passed in). If `True`, 

1185 ``collections`` must not contain regular expressions and may not 

1186 be `...`. 

1187 expand : `bool`, optional 

1188 If `True` (default) attach `ExpandedDataCoordinate` instead of 

1189 minimal `DataCoordinate` base-class instances. 

1190 components : `bool`, optional 

1191 If `True`, apply all dataset expression patterns to component 

1192 dataset type names as well. If `False`, never apply patterns to 

1193 components. If `None` (default), apply patterns to components only 

1194 if their parent datasets were not matched by the expression. 

1195 Fully-specified component datasets (`str` or `DatasetType` 

1196 instances) are always included. 

1197 **kwargs 

1198 Additional keyword arguments are forwarded to 

1199 `DataCoordinate.standardize` when processing the ``dataId`` 

1200 argument (and may be used to provide a constraining data ID even 

1201 when the ``dataId`` argument is `None`). 

1202 

1203 Yields 

1204 ------ 

1205 ref : `DatasetRef` 

1206 Dataset references matching the given query criteria. These 

1207 are grouped by `DatasetType` if the query evaluates to multiple 

1208 dataset types, but order is otherwise unspecified. 

1209 

1210 Raises 

1211 ------ 

1212 TypeError 

1213 Raised when the arguments are incompatible, such as when a 

1214 collection wildcard is passed when ``deduplicate`` is `True`. 

1215 

1216 Notes 

1217 ----- 

1218 When multiple dataset types are queried in a single call, the 

1219 results of this operation are equivalent to querying for each dataset 

1220 type separately in turn, and no information about the relationships 

1221 between datasets of different types is included. In contexts where 

1222 that kind of information is important, the recommended pattern is to 

1223 use `queryDimensions` to first obtain data IDs (possibly with the 

1224 desired dataset types and collections passed as constraints to the 

1225 query), and then use multiple (generally much simpler) calls to 

1226 `queryDatasets` with the returned data IDs passed as constraints. 

1227 """ 

1228 # Standardize the collections expression. 

1229 if deduplicate: 

1230 collections = CollectionSearch.fromExpression(collections) 

1231 else: 

1232 collections = CollectionQuery.fromExpression(collections) 

1233 # Standardize and expand the data ID provided as a constraint. 

1234 standardizedDataId = self.expandDataId(dataId, **kwargs) 

1235 

1236 # We can only query directly if given a non-component DatasetType 

1237 # instance. If we were given an expression or str or a component 

1238 # DatasetType instance, we'll populate this dict, recurse, and return. 

1239 # If we already have a non-component DatasetType, it will remain None 

1240 # and we'll run the query directly. 

1241 composition: Optional[ 

1242 Dict[ 

1243 DatasetType, # parent dataset type 

1244 List[Optional[str]] # component name, or None for parent 

1245 ] 

1246 ] = None 

1247 if not isinstance(datasetType, DatasetType): 

1248 # We were given a dataset type expression (which may be as simple 

1249 # as a str). Loop over all matching datasets, delegating handling 

1250 # of the `components` argument to queryDatasetTypes, as we populate 

1251 # the composition dict. 

1252 composition = defaultdict(list) 

1253 for trueDatasetType in self.queryDatasetTypes(datasetType, components=components): 

1254 parentName, componentName = trueDatasetType.nameAndComponent() 

1255 if componentName is not None: 

1256 parentDatasetType = self.getDatasetType(parentName) 

1257 composition.setdefault(parentDatasetType, []).append(componentName) 

1258 else: 

1259 composition.setdefault(trueDatasetType, []).append(None) 

1260 elif datasetType.isComponent(): 

1261 # We were given a true DatasetType instance, but it's a component. 

1262 # the composition dict will have exactly one item. 

1263 parentName, componentName = datasetType.nameAndComponent() 

1264 parentDatasetType = self.getDatasetType(parentName) 

1265 composition = {parentDatasetType: [componentName]} 

1266 if composition is not None: 

1267 # We need to recurse. Do that once for each parent dataset type. 

1268 for parentDatasetType, componentNames in composition.items(): 

1269 for parentRef in self.queryDatasets(parentDatasetType, collections=collections, 

1270 dimensions=dimensions, dataId=standardizedDataId, 

1271 where=where, deduplicate=deduplicate): 

1272 # Loop over components, yielding one for each one for each 

1273 # one requested. 

1274 for componentName in componentNames: 

1275 if componentName is None: 

1276 yield parentRef 

1277 else: 

1278 yield parentRef.makeComponentRef(componentName) 

1279 return 

1280 # If we get here, there's no need to recurse (or we are already 

1281 # recursing; there can only ever be one level of recursion). 

1282 

1283 # The full set of dimensions in the query is the combination of those 

1284 # needed for the DatasetType and those explicitly requested, if any. 

1285 requestedDimensionNames = set(datasetType.dimensions.names) 

1286 if dimensions is not None: 

1287 requestedDimensionNames.update(self.dimensions.extract(dimensions).names) 

1288 # Construct the summary structure needed to construct a QueryBuilder. 

1289 summary = QuerySummary( 

1290 requested=DimensionGraph(self.dimensions, names=requestedDimensionNames), 

1291 dataId=standardizedDataId, 

1292 expression=where, 

1293 ) 

1294 builder = self.makeQueryBuilder(summary) 

1295 # Add the dataset subquery to the query, telling the QueryBuilder to 

1296 # include the rank of the selected collection in the results only if we 

1297 # need to deduplicate. Note that if any of the collections are 

1298 # actually wildcard expressions, and we've asked for deduplication, 

1299 # this will raise TypeError for us. 

1300 if not builder.joinDataset(datasetType, collections, isResult=True, addRank=deduplicate): 

1301 return 

1302 query = builder.finish() 

1303 predicate = query.predicate() 

1304 if not deduplicate: 

1305 # No need to de-duplicate across collections. 

1306 for row in self._db.query(query.sql): 

1307 if predicate(row): 

1308 dataId = query.extractDataId(row, graph=datasetType.dimensions) 

1309 if expand: 

1310 dataId = self.expandDataId(dataId, records=standardizedDataId.records) 

1311 yield query.extractDatasetRef(row, datasetType, dataId)[0] 

1312 else: 

1313 # For each data ID, yield only the DatasetRef with the lowest 

1314 # collection rank. 

1315 bestRefs = {} 

1316 bestRanks: Dict[DataCoordinate, int] = {} 

1317 for row in self._db.query(query.sql): 

1318 if predicate(row): 

1319 ref, rank = query.extractDatasetRef(row, datasetType) 

1320 bestRank = bestRanks.get(ref.dataId, sys.maxsize) 

1321 assert rank is not None 

1322 if rank < bestRank: 

1323 bestRefs[ref.dataId] = ref 

1324 bestRanks[ref.dataId] = rank 

1325 # If caller requested expanded data IDs, we defer that until here 

1326 # so we do as little expansion as possible. 

1327 if expand: 

1328 for ref in bestRefs.values(): 

1329 dataId = self.expandDataId(ref.dataId, records=standardizedDataId.records) 

1330 yield ref.expanded(dataId) 

1331 else: 

1332 yield from bestRefs.values() 

1333 

1334 storageClasses: StorageClassFactory 

1335 """All storage classes known to the registry (`StorageClassFactory`). 

1336 """