Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "Registry", 

26) 

27 

28from collections import defaultdict 

29import contextlib 

30import sys 

31from typing import ( 

32 Any, 

33 Dict, 

34 Iterable, 

35 Iterator, 

36 List, 

37 Mapping, 

38 Optional, 

39 Set, 

40 Type, 

41 TYPE_CHECKING, 

42 Union, 

43) 

44 

45import sqlalchemy 

46 

47from ..core import ( 

48 Config, 

49 DataCoordinate, 

50 DataId, 

51 DatasetRef, 

52 DatasetType, 

53 ddl, 

54 Dimension, 

55 DimensionElement, 

56 DimensionGraph, 

57 DimensionRecord, 

58 DimensionUniverse, 

59 ExpandedDataCoordinate, 

60 NamedKeyDict, 

61 StorageClassFactory, 

62) 

63from ..core.utils import doImport, iterable, transactional 

64from ._config import RegistryConfig 

65from .queries import ( 

66 QueryBuilder, 

67 QuerySummary, 

68) 

69from ._collectionType import CollectionType 

70from ._exceptions import ConflictingDefinitionError, InconsistentDataIdError, OrphanedRecordError 

71from .wildcards import CategorizedWildcard, CollectionQuery, CollectionSearch, Ellipsis 

72from .interfaces import ChainedCollectionRecord, RunRecord 

73from .versions import ButlerVersionsManager 

74 

75if TYPE_CHECKING: 75 ↛ 76line 75 didn't jump to line 76, because the condition on line 75 was never true

76 from ..butlerConfig import ButlerConfig 

77 from .interfaces import ( 

78 ButlerAttributeManager, 

79 CollectionManager, 

80 Database, 

81 OpaqueTableStorageManager, 

82 DimensionRecordStorageManager, 

83 DatasetRecordStorageManager, 

84 DatastoreRegistryBridgeManager, 

85 ) 

86 

87 

88class Registry: 

89 """Registry interface. 

90 

91 Parameters 

92 ---------- 

93 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

94 Registry configuration 

95 """ 

96 

97 defaultConfigFile = None 

98 """Path to configuration defaults. Relative to $DAF_BUTLER_DIR/config or 

99 absolute path. Can be None if no defaults specified. 

100 """ 

101 

102 @classmethod 

103 def fromConfig(cls, config: Union[ButlerConfig, RegistryConfig, Config, str], create: bool = False, 

104 butlerRoot: Optional[str] = None, writeable: bool = True) -> Registry: 

105 """Create `Registry` subclass instance from `config`. 

106 

107 Uses ``registry.cls`` from `config` to determine which subclass to 

108 instantiate. 

109 

110 Parameters 

111 ---------- 

112 config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` 

113 Registry configuration 

114 create : `bool`, optional 

115 Assume empty Registry and create a new one. 

116 butlerRoot : `str`, optional 

117 Path to the repository root this `Registry` will manage. 

118 writeable : `bool`, optional 

119 If `True` (default) create a read-write connection to the database. 

120 

121 Returns 

122 ------- 

123 registry : `Registry` (subclass) 

124 A new `Registry` subclass instance. 

125 """ 

126 if not isinstance(config, RegistryConfig): 

127 if isinstance(config, str) or isinstance(config, Config): 

128 config = RegistryConfig(config) 

129 else: 

130 raise ValueError("Incompatible Registry configuration: {}".format(config)) 

131 config.replaceRoot(butlerRoot) 

132 DatabaseClass = config.getDatabaseClass() 

133 database = DatabaseClass.fromUri(str(config.connectionString), origin=config.get("origin", 0), 

134 namespace=config.get("namespace"), writeable=writeable) 

135 universe = DimensionUniverse(config) 

136 attributes = doImport(config["managers", "attributes"]) 

137 opaque = doImport(config["managers", "opaque"]) 

138 dimensions = doImport(config["managers", "dimensions"]) 

139 collections = doImport(config["managers", "collections"]) 

140 datasets = doImport(config["managers", "datasets"]) 

141 datastoreBridges = doImport(config["managers", "datastores"]) 

142 versions = ButlerVersionsManager.fromConfig(config.get("schema_versions")) 

143 

144 return cls(database, universe, dimensions=dimensions, attributes=attributes, opaque=opaque, 

145 collections=collections, datasets=datasets, datastoreBridges=datastoreBridges, 

146 versions=versions, writeable=writeable, create=create) 

147 

148 def __init__(self, database: Database, universe: DimensionUniverse, *, 

149 attributes: Type[ButlerAttributeManager], 

150 opaque: Type[OpaqueTableStorageManager], 

151 dimensions: Type[DimensionRecordStorageManager], 

152 collections: Type[CollectionManager], 

153 datasets: Type[DatasetRecordStorageManager], 

154 datastoreBridges: Type[DatastoreRegistryBridgeManager], 

155 versions: ButlerVersionsManager, 

156 writeable: bool = True, 

157 create: bool = False): 

158 self._db = database 

159 self.storageClasses = StorageClassFactory() 

160 with self._db.declareStaticTables(create=create) as context: 

161 self._attributes = attributes.initialize(self._db, context) 

162 self._dimensions = dimensions.initialize(self._db, context, universe=universe) 

163 self._collections = collections.initialize(self._db, context) 

164 self._datasets = datasets.initialize(self._db, context, 

165 collections=self._collections, 

166 universe=self.dimensions) 

167 self._opaque = opaque.initialize(self._db, context) 

168 self._datastoreBridges = datastoreBridges.initialize(self._db, context, 

169 opaque=self._opaque, 

170 datasets=datasets, 

171 universe=self.dimensions) 

172 context.addInitializer(lambda db: versions.storeVersions(self._attributes)) 

173 

174 # This call does not do anything right now as we do not have a way to 

175 # split tables between sub-schemas yet. 

176 versions.checkVersionDigests() 

177 if not create: 

178 # verify that configured versions are compatible with schema 

179 versions.checkStoredVersions(self._attributes, writeable) 

180 

181 self._collections.refresh() 

182 self._datasets.refresh(universe=self._dimensions.universe) 

183 

184 def __str__(self) -> str: 

185 return str(self._db) 

186 

187 def __repr__(self) -> str: 

188 return f"Registry({self._db!r}, {self.dimensions!r})" 

189 

190 def isWriteable(self) -> bool: 

191 """Return `True` if this registry allows write operations, and `False` 

192 otherwise. 

193 """ 

194 return self._db.isWriteable() 

195 

196 @property 

197 def dimensions(self) -> DimensionUniverse: 

198 """All dimensions recognized by this `Registry` (`DimensionUniverse`). 

199 """ 

200 return self._dimensions.universe 

201 

202 @contextlib.contextmanager 

203 def transaction(self) -> Iterator[None]: 

204 """Return a context manager that represents a transaction. 

205 """ 

206 # TODO make savepoint=False the default. 

207 try: 

208 with self._db.transaction(): 

209 yield 

210 except BaseException: 

211 # TODO: this clears the caches sometimes when we wouldn't actually 

212 # need to. Can we avoid that? 

213 self._dimensions.clearCaches() 

214 raise 

215 

216 def registerOpaqueTable(self, tableName: str, spec: ddl.TableSpec) -> None: 

217 """Add an opaque (to the `Registry`) table for use by a `Datastore` or 

218 other data repository client. 

219 

220 Opaque table records can be added via `insertOpaqueData`, retrieved via 

221 `fetchOpaqueData`, and removed via `deleteOpaqueData`. 

222 

223 Parameters 

224 ---------- 

225 tableName : `str` 

226 Logical name of the opaque table. This may differ from the 

227 actual name used in the database by a prefix and/or suffix. 

228 spec : `ddl.TableSpec` 

229 Specification for the table to be added. 

230 """ 

231 self._opaque.register(tableName, spec) 

232 

233 @transactional 

234 def insertOpaqueData(self, tableName: str, *data: dict) -> None: 

235 """Insert records into an opaque table. 

236 

237 Parameters 

238 ---------- 

239 tableName : `str` 

240 Logical name of the opaque table. Must match the name used in a 

241 previous call to `registerOpaqueTable`. 

242 data 

243 Each additional positional argument is a dictionary that represents 

244 a single row to be added. 

245 """ 

246 self._opaque[tableName].insert(*data) 

247 

248 def fetchOpaqueData(self, tableName: str, **where: Any) -> Iterator[dict]: 

249 """Retrieve records from an opaque table. 

250 

251 Parameters 

252 ---------- 

253 tableName : `str` 

254 Logical name of the opaque table. Must match the name used in a 

255 previous call to `registerOpaqueTable`. 

256 where 

257 Additional keyword arguments are interpreted as equality 

258 constraints that restrict the returned rows (combined with AND); 

259 keyword arguments are column names and values are the values they 

260 must have. 

261 

262 Yields 

263 ------ 

264 row : `dict` 

265 A dictionary representing a single result row. 

266 """ 

267 yield from self._opaque[tableName].fetch(**where) 

268 

269 @transactional 

270 def deleteOpaqueData(self, tableName: str, **where: Any) -> None: 

271 """Remove records from an opaque table. 

272 

273 Parameters 

274 ---------- 

275 tableName : `str` 

276 Logical name of the opaque table. Must match the name used in a 

277 previous call to `registerOpaqueTable`. 

278 where 

279 Additional keyword arguments are interpreted as equality 

280 constraints that restrict the deleted rows (combined with AND); 

281 keyword arguments are column names and values are the values they 

282 must have. 

283 """ 

284 self._opaque[tableName].delete(**where) 

285 

286 def registerCollection(self, name: str, type: CollectionType = CollectionType.TAGGED) -> None: 

287 """Add a new collection if one with the given name does not exist. 

288 

289 Parameters 

290 ---------- 

291 name : `str` 

292 The name of the collection to create. 

293 type : `CollectionType` 

294 Enum value indicating the type of collection to create. 

295 

296 Notes 

297 ----- 

298 This method cannot be called within transactions, as it needs to be 

299 able to perform its own transaction to be concurrent. 

300 """ 

301 self._collections.register(name, type) 

302 

303 def getCollectionType(self, name: str) -> CollectionType: 

304 """Return an enumeration value indicating the type of the given 

305 collection. 

306 

307 Parameters 

308 ---------- 

309 name : `str` 

310 The name of the collection. 

311 

312 Returns 

313 ------- 

314 type : `CollectionType` 

315 Enum value indicating the type of this collection. 

316 

317 Raises 

318 ------ 

319 MissingCollectionError 

320 Raised if no collection with the given name exists. 

321 """ 

322 return self._collections.find(name).type 

323 

324 def registerRun(self, name: str) -> None: 

325 """Add a new run if one with the given name does not exist. 

326 

327 Parameters 

328 ---------- 

329 name : `str` 

330 The name of the run to create. 

331 

332 Notes 

333 ----- 

334 This method cannot be called within transactions, as it needs to be 

335 able to perform its own transaction to be concurrent. 

336 """ 

337 self._collections.register(name, CollectionType.RUN) 

338 

339 @transactional 

340 def removeCollection(self, name: str) -> None: 

341 """Completely remove the given collection. 

342 

343 Parameters 

344 ---------- 

345 name : `str` 

346 The name of the collection to remove. 

347 

348 Raises 

349 ------ 

350 MissingCollectionError 

351 Raised if no collection with the given name exists. 

352 

353 Notes 

354 ----- 

355 If this is a `~CollectionType.RUN` collection, all datasets and quanta 

356 in it are also fully removed. This requires that those datasets be 

357 removed (or at least trashed) from any datastores that hold them first. 

358 

359 A collection may not be deleted as long as it is referenced by a 

360 `~CollectionType.CHAINED` collection; the ``CHAINED`` collection must 

361 be deleted or redefined first. 

362 """ 

363 self._collections.remove(name) 

364 

365 def getCollectionChain(self, parent: str) -> CollectionSearch: 

366 """Return the child collections in a `~CollectionType.CHAINED` 

367 collection. 

368 

369 Parameters 

370 ---------- 

371 parent : `str` 

372 Name of the chained collection. Must have already been added via 

373 a call to `Registry.registerCollection`. 

374 

375 Returns 

376 ------- 

377 children : `CollectionSearch` 

378 An object that defines the search path of the collection. 

379 See :ref:`daf_butler_collection_expressions` for more information. 

380 

381 Raises 

382 ------ 

383 MissingCollectionError 

384 Raised if ``parent`` does not exist in the `Registry`. 

385 TypeError 

386 Raised if ``parent`` does not correspond to a 

387 `~CollectionType.CHAINED` collection. 

388 """ 

389 record = self._collections.find(parent) 

390 if record.type is not CollectionType.CHAINED: 

391 raise TypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

392 assert isinstance(record, ChainedCollectionRecord) 

393 return record.children 

394 

395 @transactional 

396 def setCollectionChain(self, parent: str, children: Any) -> None: 

397 """Define or redefine a `~CollectionType.CHAINED` collection. 

398 

399 Parameters 

400 ---------- 

401 parent : `str` 

402 Name of the chained collection. Must have already been added via 

403 a call to `Registry.registerCollection`. 

404 children : `Any` 

405 An expression defining an ordered search of child collections, 

406 generally an iterable of `str`. Restrictions on the dataset types 

407 to be searched can also be included, by passing mapping or an 

408 iterable containing tuples; see 

409 :ref:`daf_butler_collection_expressions` for more information. 

410 

411 Raises 

412 ------ 

413 MissingCollectionError 

414 Raised when any of the given collections do not exist in the 

415 `Registry`. 

416 TypeError 

417 Raised if ``parent`` does not correspond to a 

418 `~CollectionType.CHAINED` collection. 

419 ValueError 

420 Raised if the given collections contains a cycle. 

421 """ 

422 record = self._collections.find(parent) 

423 if record.type is not CollectionType.CHAINED: 

424 raise TypeError(f"Collection '{parent}' has type {record.type.name}, not CHAINED.") 

425 assert isinstance(record, ChainedCollectionRecord) 

426 children = CollectionSearch.fromExpression(children) 

427 if children != record.children: 

428 record.update(self._collections, children) 

429 

430 def registerDatasetType(self, datasetType: DatasetType) -> bool: 

431 """ 

432 Add a new `DatasetType` to the Registry. 

433 

434 It is not an error to register the same `DatasetType` twice. 

435 

436 Parameters 

437 ---------- 

438 datasetType : `DatasetType` 

439 The `DatasetType` to be added. 

440 

441 Returns 

442 ------- 

443 inserted : `bool` 

444 `True` if ``datasetType`` was inserted, `False` if an identical 

445 existing `DatsetType` was found. Note that in either case the 

446 DatasetType is guaranteed to be defined in the Registry 

447 consistently with the given definition. 

448 

449 Raises 

450 ------ 

451 ValueError 

452 Raised if the dimensions or storage class are invalid. 

453 ConflictingDefinitionError 

454 Raised if this DatasetType is already registered with a different 

455 definition. 

456 

457 Notes 

458 ----- 

459 This method cannot be called within transactions, as it needs to be 

460 able to perform its own transaction to be concurrent. 

461 """ 

462 _, inserted = self._datasets.register(datasetType) 

463 return inserted 

464 

465 def getDatasetType(self, name: str) -> DatasetType: 

466 """Get the `DatasetType`. 

467 

468 Parameters 

469 ---------- 

470 name : `str` 

471 Name of the type. 

472 

473 Returns 

474 ------- 

475 type : `DatasetType` 

476 The `DatasetType` associated with the given name. 

477 

478 Raises 

479 ------ 

480 KeyError 

481 Requested named DatasetType could not be found in registry. 

482 """ 

483 storage = self._datasets.find(name) 

484 if storage is None: 

485 raise KeyError(f"DatasetType '{name}' could not be found.") 

486 return storage.datasetType 

487 

488 def findDataset(self, datasetType: Union[DatasetType, str], dataId: Optional[DataId] = None, *, 

489 collections: Any, **kwargs: Any) -> Optional[DatasetRef]: 

490 """Find a dataset given its `DatasetType` and data ID. 

491 

492 This can be used to obtain a `DatasetRef` that permits the dataset to 

493 be read from a `Datastore`. If the dataset is a component and can not 

494 be found using the provided dataset type, a dataset ref for the parent 

495 will be returned instead but with the correct dataset type. 

496 

497 Parameters 

498 ---------- 

499 datasetType : `DatasetType` or `str` 

500 A `DatasetType` or the name of one. 

501 dataId : `dict` or `DataCoordinate`, optional 

502 A `dict`-like object containing the `Dimension` links that identify 

503 the dataset within a collection. 

504 collections 

505 An expression that fully or partially identifies the collections 

506 to search for the dataset, such as a `str`, `re.Pattern`, or 

507 iterable thereof. `...` can be used to return all collections. 

508 See :ref:`daf_butler_collection_expressions` for more information. 

509 **kwargs 

510 Additional keyword arguments passed to 

511 `DataCoordinate.standardize` to convert ``dataId`` to a true 

512 `DataCoordinate` or augment an existing one. 

513 

514 Returns 

515 ------- 

516 ref : `DatasetRef` 

517 A reference to the dataset, or `None` if no matching Dataset 

518 was found. 

519 

520 Raises 

521 ------ 

522 LookupError 

523 Raised if one or more data ID keys are missing or the dataset type 

524 does not exist. 

525 MissingCollectionError 

526 Raised if any of ``collections`` does not exist in the registry. 

527 """ 

528 if isinstance(datasetType, DatasetType): 

529 storage = self._datasets.find(datasetType.name) 

530 if storage is None: 

531 raise LookupError(f"DatasetType '{datasetType}' has not been registered.") 

532 else: 

533 storage = self._datasets.find(datasetType) 

534 if storage is None: 

535 raise LookupError(f"DatasetType with name '{datasetType}' has not been registered.") 

536 dataId = DataCoordinate.standardize(dataId, graph=storage.datasetType.dimensions, 

537 universe=self.dimensions, **kwargs) 

538 collections = CollectionSearch.fromExpression(collections) 

539 for collectionRecord in collections.iter(self._collections, datasetType=storage.datasetType): 

540 result = storage.find(collectionRecord, dataId) 

541 if result is not None: 

542 return result 

543 

544 # fallback to the parent if we got nothing and this was a component 

545 if storage.datasetType.isComponent(): 

546 parentType, _ = storage.datasetType.nameAndComponent() 

547 parentRef = self.findDataset(parentType, dataId, collections=collections, **kwargs) 

548 if parentRef is not None: 

549 # Should already conform and we know no components 

550 return DatasetRef(storage.datasetType, parentRef.dataId, id=parentRef.id, 

551 run=parentRef.run, conform=False, hasParentId=True) 

552 

553 return None 

554 

555 @transactional 

556 def insertDatasets(self, datasetType: Union[DatasetType, str], dataIds: Iterable[DataId], 

557 run: str) -> List[DatasetRef]: 

558 """Insert one or more datasets into the `Registry` 

559 

560 This always adds new datasets; to associate existing datasets with 

561 a new collection, use ``associate``. 

562 

563 Parameters 

564 ---------- 

565 datasetType : `DatasetType` or `str` 

566 A `DatasetType` or the name of one. 

567 dataIds : `~collections.abc.Iterable` of `dict` or `DataCoordinate` 

568 Dimension-based identifiers for the new datasets. 

569 run : `str` 

570 The name of the run that produced the datasets. 

571 

572 Returns 

573 ------- 

574 refs : `list` of `DatasetRef` 

575 Resolved `DatasetRef` instances for all given data IDs (in the same 

576 order). 

577 

578 Raises 

579 ------ 

580 ConflictingDefinitionError 

581 If a dataset with the same dataset type and data ID as one of those 

582 given already exists in ``run``. 

583 MissingCollectionError 

584 Raised if ``run`` does not exist in the registry. 

585 """ 

586 if isinstance(datasetType, DatasetType): 

587 storage = self._datasets.find(datasetType.name) 

588 if storage is None: 

589 raise LookupError(f"DatasetType '{datasetType}' has not been registered.") 

590 else: 

591 storage = self._datasets.find(datasetType) 

592 if storage is None: 

593 raise LookupError(f"DatasetType with name '{datasetType}' has not been registered.") 

594 runRecord = self._collections.find(run) 

595 if runRecord.type is not CollectionType.RUN: 

596 raise TypeError("Given collection is of type {runRecord.type.name}; RUN collection required.") 

597 assert isinstance(runRecord, RunRecord) 

598 expandedDataIds = [self.expandDataId(dataId, graph=storage.datasetType.dimensions) 

599 for dataId in dataIds] 

600 try: 

601 refs = list(storage.insert(runRecord, expandedDataIds)) 

602 except sqlalchemy.exc.IntegrityError as err: 

603 raise ConflictingDefinitionError(f"A database constraint failure was triggered by inserting " 

604 f"one or more datasets of type {storage.datasetType} into " 

605 f"collection '{run}'. " 

606 f"This probably means a dataset with the same data ID " 

607 f"and dataset type already exists, but it may also mean a " 

608 f"dimension row is missing.") from err 

609 return refs 

610 

611 def getDataset(self, id: int) -> Optional[DatasetRef]: 

612 """Retrieve a Dataset entry. 

613 

614 Parameters 

615 ---------- 

616 id : `int` 

617 The unique identifier for the dataset. 

618 

619 Returns 

620 ------- 

621 ref : `DatasetRef` or `None` 

622 A ref to the Dataset, or `None` if no matching Dataset 

623 was found. 

624 """ 

625 ref = self._datasets.getDatasetRef(id, universe=self.dimensions) 

626 if ref is None: 

627 return None 

628 return ref 

629 

630 @transactional 

631 def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: 

632 """Remove datasets from the Registry. 

633 

634 The datasets will be removed unconditionally from all collections, and 

635 any `Quantum` that consumed this dataset will instead be marked with 

636 having a NULL input. `Datastore` records will *not* be deleted; the 

637 caller is responsible for ensuring that the dataset has already been 

638 removed from all Datastores. 

639 

640 Parameters 

641 ---------- 

642 refs : `Iterable` of `DatasetRef` 

643 References to the datasets to be removed. Must include a valid 

644 ``id`` attribute, and should be considered invalidated upon return. 

645 

646 Raises 

647 ------ 

648 AmbiguousDatasetError 

649 Raised if any ``ref.id`` is `None`. 

650 OrphanedRecordError 

651 Raised if any dataset is still present in any `Datastore`. 

652 """ 

653 for datasetType, refsForType in DatasetRef.groupByType(refs).items(): 

654 storage = self._datasets.find(datasetType.name) 

655 assert storage is not None 

656 try: 

657 storage.delete(refsForType) 

658 except sqlalchemy.exc.IntegrityError as err: 

659 raise OrphanedRecordError("One or more datasets is still " 

660 "present in one or more Datastores.") from err 

661 

662 @transactional 

663 def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

664 """Add existing datasets to a `~CollectionType.TAGGED` collection. 

665 

666 If a DatasetRef with the same exact integer ID is already in a 

667 collection nothing is changed. If a `DatasetRef` with the same 

668 `DatasetType` and data ID but with different integer ID 

669 exists in the collection, `ConflictingDefinitionError` is raised. 

670 

671 Parameters 

672 ---------- 

673 collection : `str` 

674 Indicates the collection the datasets should be associated with. 

675 refs : `Iterable` [ `DatasetRef` ] 

676 An iterable of resolved `DatasetRef` instances that already exist 

677 in this `Registry`. 

678 

679 Raises 

680 ------ 

681 ConflictingDefinitionError 

682 If a Dataset with the given `DatasetRef` already exists in the 

683 given collection. 

684 AmbiguousDatasetError 

685 Raised if ``any(ref.id is None for ref in refs)``. 

686 MissingCollectionError 

687 Raised if ``collection`` does not exist in the registry. 

688 TypeError 

689 Raise adding new datasets to the given ``collection`` is not 

690 allowed. 

691 """ 

692 collectionRecord = self._collections.find(collection) 

693 if collectionRecord.type is not CollectionType.TAGGED: 

694 raise TypeError(f"Collection '{collection}' has type {collectionRecord.type.name}, not TAGGED.") 

695 for datasetType, refsForType in DatasetRef.groupByType(refs).items(): 

696 storage = self._datasets.find(datasetType.name) 

697 assert storage is not None 

698 try: 

699 storage.associate(collectionRecord, refsForType) 

700 except sqlalchemy.exc.IntegrityError as err: 

701 raise ConflictingDefinitionError( 

702 f"Constraint violation while associating dataset of type {datasetType.name} with " 

703 f"collection {collection}. This probably means that one or more datasets with the same " 

704 f"dataset type and data ID already exist in the collection, but it may also indicate " 

705 f"that the datasets do not exist." 

706 ) from err 

707 

708 @transactional 

709 def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: 

710 """Remove existing datasets from a `~CollectionType.TAGGED` collection. 

711 

712 ``collection`` and ``ref`` combinations that are not currently 

713 associated are silently ignored. 

714 

715 Parameters 

716 ---------- 

717 collection : `str` 

718 The collection the datasets should no longer be associated with. 

719 refs : `Iterable` [ `DatasetRef` ] 

720 An iterable of resolved `DatasetRef` instances that already exist 

721 in this `Registry`. 

722 

723 Raises 

724 ------ 

725 AmbiguousDatasetError 

726 Raised if any of the given dataset references is unresolved. 

727 MissingCollectionError 

728 Raised if ``collection`` does not exist in the registry. 

729 TypeError 

730 Raise adding new datasets to the given ``collection`` is not 

731 allowed. 

732 """ 

733 collectionRecord = self._collections.find(collection) 

734 if collectionRecord.type is not CollectionType.TAGGED: 

735 raise TypeError(f"Collection '{collection}' has type {collectionRecord.type.name}; " 

736 "expected TAGGED.") 

737 for datasetType, refsForType in DatasetRef.groupByType(refs).items(): 

738 storage = self._datasets.find(datasetType.name) 

739 assert storage is not None 

740 storage.disassociate(collectionRecord, refsForType) 

741 

742 def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: 

743 """Return an object that allows a new `Datastore` instance to 

744 communicate with this `Registry`. 

745 

746 Returns 

747 ------- 

748 manager : `DatastoreRegistryBridgeManager` 

749 Object that mediates communication between this `Registry` and its 

750 associated datastores. 

751 """ 

752 return self._datastoreBridges 

753 

754 def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: 

755 """Retrieve datastore locations for a given dataset. 

756 

757 Parameters 

758 ---------- 

759 ref : `DatasetRef` 

760 A reference to the dataset for which to retrieve storage 

761 information. 

762 

763 Returns 

764 ------- 

765 datastores : `Iterable` [ `str` ] 

766 All the matching datastores holding this dataset. 

767 

768 Raises 

769 ------ 

770 AmbiguousDatasetError 

771 Raised if ``ref.id`` is `None`. 

772 """ 

773 return self._datastoreBridges.findDatastores(ref) 

774 

775 def expandDataId(self, dataId: Optional[DataId] = None, *, graph: Optional[DimensionGraph] = None, 

776 records: Optional[Mapping[DimensionElement, Optional[DimensionRecord]]] = None, 

777 **kwargs: Any) -> ExpandedDataCoordinate: 

778 """Expand a dimension-based data ID to include additional information. 

779 

780 Parameters 

781 ---------- 

782 dataId : `DataCoordinate` or `dict`, optional 

783 Data ID to be expanded; augmented and overridden by ``kwds``. 

784 graph : `DimensionGraph`, optional 

785 Set of dimensions for the expanded ID. If `None`, the dimensions 

786 will be inferred from the keys of ``dataId`` and ``kwds``. 

787 Dimensions that are in ``dataId`` or ``kwds`` but not in ``graph`` 

788 are silently ignored, providing a way to extract and expand a 

789 subset of a data ID. 

790 records : `Mapping` [`DimensionElement`, `DimensionRecord`], optional 

791 Dimension record data to use before querying the database for that 

792 data. 

793 **kwargs 

794 Additional keywords are treated like additional key-value pairs for 

795 ``dataId``, extending and overriding 

796 

797 Returns 

798 ------- 

799 expanded : `ExpandedDataCoordinate` 

800 A data ID that includes full metadata for all of the dimensions it 

801 identifieds. 

802 """ 

803 standardized = DataCoordinate.standardize(dataId, graph=graph, universe=self.dimensions, **kwargs) 

804 if isinstance(standardized, ExpandedDataCoordinate): 

805 return standardized 

806 elif isinstance(dataId, ExpandedDataCoordinate): 

807 records = NamedKeyDict(records) if records is not None else NamedKeyDict() 

808 records.update(dataId.records) 

809 else: 

810 records = NamedKeyDict(records) if records is not None else NamedKeyDict() 

811 keys = dict(standardized.byName()) 

812 for element in standardized.graph.primaryKeyTraversalOrder: 

813 record = records.get(element.name, ...) # Use ... to mean not found; None might mean NULL 

814 if record is ...: 

815 storage = self._dimensions[element] 

816 record = storage.fetch(keys) 

817 records[element] = record 

818 if record is not None: 

819 for d in element.implied: 

820 value = getattr(record, d.name) 

821 if keys.setdefault(d.name, value) != value: 

822 raise InconsistentDataIdError( 

823 f"Data ID {standardized} has {d.name}={keys[d.name]!r}, " 

824 f"but {element.name} implies {d.name}={value!r}." 

825 ) 

826 else: 

827 if element in standardized.graph.required: 

828 raise LookupError( 

829 f"Could not fetch record for required dimension {element.name} via keys {keys}." 

830 ) 

831 if element.alwaysJoin: 

832 raise InconsistentDataIdError( 

833 f"Could not fetch record for element {element.name} via keys {keys}, ", 

834 "but it is marked alwaysJoin=True; this means one or more dimensions are not " 

835 "related." 

836 ) 

837 records.update((d, None) for d in element.implied) 

838 return ExpandedDataCoordinate(standardized.graph, standardized.values(), records=records) 

839 

840 def insertDimensionData(self, element: Union[DimensionElement, str], 

841 *data: Union[Mapping[str, Any], DimensionRecord], 

842 conform: bool = True) -> None: 

843 """Insert one or more dimension records into the database. 

844 

845 Parameters 

846 ---------- 

847 element : `DimensionElement` or `str` 

848 The `DimensionElement` or name thereof that identifies the table 

849 records will be inserted into. 

850 data : `dict` or `DimensionRecord` (variadic) 

851 One or more records to insert. 

852 conform : `bool`, optional 

853 If `False` (`True` is default) perform no checking or conversions, 

854 and assume that ``element`` is a `DimensionElement` instance and 

855 ``data`` is a one or more `DimensionRecord` instances of the 

856 appropriate subclass. 

857 """ 

858 if conform: 

859 if isinstance(element, str): 

860 element = self.dimensions[element] 

861 records = [row if isinstance(row, DimensionRecord) else element.RecordClass.fromDict(row) 

862 for row in data] 

863 else: 

864 # Ignore typing since caller said to trust them with conform=False. 

865 records = data # type: ignore 

866 storage = self._dimensions[element] # type: ignore 

867 storage.insert(*records) 

868 

869 def syncDimensionData(self, element: Union[DimensionElement, str], 

870 row: Union[Mapping[str, Any], DimensionRecord], 

871 conform: bool = True) -> bool: 

872 """Synchronize the given dimension record with the database, inserting 

873 if it does not already exist and comparing values if it does. 

874 

875 Parameters 

876 ---------- 

877 element : `DimensionElement` or `str` 

878 The `DimensionElement` or name thereof that identifies the table 

879 records will be inserted into. 

880 row : `dict` or `DimensionRecord` 

881 The record to insert. 

882 conform : `bool`, optional 

883 If `False` (`True` is default) perform no checking or conversions, 

884 and assume that ``element`` is a `DimensionElement` instance and 

885 ``data`` is a one or more `DimensionRecord` instances of the 

886 appropriate subclass. 

887 

888 Returns 

889 ------- 

890 inserted : `bool` 

891 `True` if a new row was inserted, `False` otherwise. 

892 

893 Raises 

894 ------ 

895 ConflictingDefinitionError 

896 Raised if the record exists in the database (according to primary 

897 key lookup) but is inconsistent with the given one. 

898 

899 Notes 

900 ----- 

901 This method cannot be called within transactions, as it needs to be 

902 able to perform its own transaction to be concurrent. 

903 """ 

904 if conform: 

905 if isinstance(element, str): 

906 element = self.dimensions[element] 

907 record = row if isinstance(row, DimensionRecord) else element.RecordClass.fromDict(row) 

908 else: 

909 # Ignore typing since caller said to trust them with conform=False. 

910 record = row # type: ignore 

911 storage = self._dimensions[element] # type: ignore 

912 return storage.sync(record) 

913 

914 def queryDatasetTypes(self, expression: Any = ..., *, components: Optional[bool] = None 

915 ) -> Iterator[DatasetType]: 

916 """Iterate over the dataset types whose names match an expression. 

917 

918 Parameters 

919 ---------- 

920 expression : `Any`, optional 

921 An expression that fully or partially identifies the dataset types 

922 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

923 `...` can be used to return all dataset types, and is the default. 

924 See :ref:`daf_butler_dataset_type_expressions` for more 

925 information. 

926 components : `bool`, optional 

927 If `True`, apply all expression patterns to component dataset type 

928 names as well. If `False`, never apply patterns to components. 

929 If `None` (default), apply patterns to components only if their 

930 parent datasets were not matched by the expression. 

931 Fully-specified component datasets (`str` or `DatasetType` 

932 instances) are always included. 

933 

934 Yields 

935 ------ 

936 datasetType : `DatasetType` 

937 A `DatasetType` instance whose name matches ``expression``. 

938 """ 

939 wildcard = CategorizedWildcard.fromExpression(expression, coerceUnrecognized=lambda d: d.name) 

940 if wildcard is Ellipsis: 

941 for datasetType in self._datasets: 

942 if components or not datasetType.isComponent(): 

943 yield datasetType 

944 return 

945 done: Set[str] = set() 

946 for name in wildcard.strings: 

947 storage = self._datasets.find(name) 

948 if storage is not None: 

949 done.add(storage.datasetType.name) 

950 yield storage.datasetType 

951 if wildcard.patterns: 

952 # If components (the argument) is None, we'll save component 

953 # dataset that we might want to match, but only if their parents 

954 # didn't get included. 

955 componentsForLater = [] 

956 for datasetType in self._datasets: 

957 if datasetType.name in done: 

958 continue 

959 parentName, componentName = datasetType.nameAndComponent() 

960 if componentName is not None and not components: 

961 if components is None and parentName not in done: 

962 componentsForLater.append(datasetType) 

963 continue 

964 if any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

965 done.add(datasetType.name) 

966 yield datasetType 

967 # Go back and try to match saved components. 

968 for datasetType in componentsForLater: 

969 parentName, _ = datasetType.nameAndComponent() 

970 if parentName not in done and any(p.fullmatch(datasetType.name) for p in wildcard.patterns): 

971 yield datasetType 

972 

973 def queryCollections(self, expression: Any = ..., 

974 datasetType: Optional[DatasetType] = None, 

975 collectionType: Optional[CollectionType] = None, 

976 flattenChains: bool = False, 

977 includeChains: Optional[bool] = None) -> Iterator[str]: 

978 """Iterate over the collections whose names match an expression. 

979 

980 Parameters 

981 ---------- 

982 expression : `Any`, optional 

983 An expression that fully or partially identifies the collections 

984 to return, such as a `str`, `re.Pattern`, or iterable thereof. 

985 `...` can be used to return all collections, and is the default. 

986 See :ref:`daf_butler_collection_expressions` for more 

987 information. 

988 datasetType : `DatasetType`, optional 

989 If provided, only yield collections that should be searched for 

990 this dataset type according to ``expression``. If this is 

991 not provided, any dataset type restrictions in ``expression`` are 

992 ignored. 

993 collectionType : `CollectionType`, optional 

994 If provided, only yield collections of this type. 

995 flattenChains : `bool`, optional 

996 If `True` (`False` is default), recursively yield the child 

997 collections of matching `~CollectionType.CHAINED` collections. 

998 includeChains : `bool`, optional 

999 If `True`, yield records for matching `~CollectionType.CHAINED` 

1000 collections. Default is the opposite of ``flattenChains``: include 

1001 either CHAINED collections or their children, but not both. 

1002 

1003 Yields 

1004 ------ 

1005 collection : `str` 

1006 The name of a collection that matches ``expression``. 

1007 """ 

1008 query = CollectionQuery.fromExpression(expression) 

1009 for record in query.iter(self._collections, datasetType=datasetType, collectionType=collectionType, 

1010 flattenChains=flattenChains, includeChains=includeChains): 

1011 yield record.name 

1012 

1013 def makeQueryBuilder(self, summary: QuerySummary) -> QueryBuilder: 

1014 """Return a `QueryBuilder` instance capable of constructing and 

1015 managing more complex queries than those obtainable via `Registry` 

1016 interfaces. 

1017 

1018 This is an advanced interface; downstream code should prefer 

1019 `Registry.queryDimensions` and `Registry.queryDatasets` whenever those 

1020 are sufficient. 

1021 

1022 Parameters 

1023 ---------- 

1024 summary : `QuerySummary` 

1025 Object describing and categorizing the full set of dimensions that 

1026 will be included in the query. 

1027 

1028 Returns 

1029 ------- 

1030 builder : `QueryBuilder` 

1031 Object that can be used to construct and perform advanced queries. 

1032 """ 

1033 return QueryBuilder(summary=summary, 

1034 collections=self._collections, 

1035 dimensions=self._dimensions, 

1036 datasets=self._datasets) 

1037 

1038 def queryDimensions(self, dimensions: Union[Iterable[Union[Dimension, str]], Dimension, str], *, 

1039 dataId: Optional[DataId] = None, 

1040 datasets: Any = None, 

1041 collections: Any = None, 

1042 where: Optional[str] = None, 

1043 expand: bool = True, 

1044 components: Optional[bool] = None, 

1045 **kwargs: Any) -> Iterator[DataCoordinate]: 

1046 """Query for and iterate over data IDs matching user-provided criteria. 

1047 

1048 Parameters 

1049 ---------- 

1050 dimensions : `Dimension` or `str`, or iterable thereof 

1051 The dimensions of the data IDs to yield, as either `Dimension` 

1052 instances or `str`. Will be automatically expanded to a complete 

1053 `DimensionGraph`. 

1054 dataId : `dict` or `DataCoordinate`, optional 

1055 A data ID whose key-value pairs are used as equality constraints 

1056 in the query. 

1057 datasets : `Any`, optional 

1058 An expression that fully or partially identifies dataset types 

1059 that should constrain the yielded data IDs. For example, including 

1060 "raw" here would constrain the yielded ``instrument``, 

1061 ``exposure``, ``detector``, and ``physical_filter`` values to only 

1062 those for which at least one "raw" dataset exists in 

1063 ``collections``. Allowed types include `DatasetType`, `str`, 

1064 `re.Pattern`, and iterables thereof. Unlike other dataset type 

1065 expressions, `...` is not permitted - it doesn't make sense to 

1066 constrain data IDs on the existence of *all* datasets. 

1067 See :ref:`daf_butler_dataset_type_expressions` for more 

1068 information. 

1069 collections: `Any`, optional 

1070 An expression that fully or partially identifies the collections 

1071 to search for datasets, such as a `str`, `re.Pattern`, or iterable 

1072 thereof. `...` can be used to return all collections. Must be 

1073 provided if ``datasets`` is, and is ignored if it is not. See 

1074 :ref:`daf_butler_collection_expressions` for more information. 

1075 where : `str`, optional 

1076 A string expression similar to a SQL WHERE clause. May involve 

1077 any column of a dimension table or (as a shortcut for the primary 

1078 key column of a dimension table) dimension name. See 

1079 :ref:`daf_butler_dimension_expressions` for more information. 

1080 expand : `bool`, optional 

1081 If `True` (default) yield `ExpandedDataCoordinate` instead of 

1082 minimal `DataCoordinate` base-class instances. 

1083 components : `bool`, optional 

1084 If `True`, apply all dataset expression patterns to component 

1085 dataset type names as well. If `False`, never apply patterns to 

1086 components. If `None` (default), apply patterns to components only 

1087 if their parent datasets were not matched by the expression. 

1088 Fully-specified component datasets (`str` or `DatasetType` 

1089 instances) are always included. 

1090 **kwargs 

1091 Additional keyword arguments are forwarded to 

1092 `DataCoordinate.standardize` when processing the ``dataId`` 

1093 argument (and may be used to provide a constraining data ID even 

1094 when the ``dataId`` argument is `None`). 

1095 

1096 Yields 

1097 ------ 

1098 dataId : `DataCoordinate` 

1099 Data IDs matching the given query parameters. Order is 

1100 unspecified. 

1101 """ 

1102 dimensions = iterable(dimensions) 

1103 standardizedDataId = self.expandDataId(dataId, **kwargs) 

1104 standardizedDatasetTypes = set() 

1105 requestedDimensionNames = set(self.dimensions.extract(dimensions).names) 

1106 if datasets is not None: 

1107 if collections is None: 

1108 raise TypeError("Cannot pass 'datasets' without 'collections'.") 

1109 for datasetType in self.queryDatasetTypes(datasets, components=components): 

1110 requestedDimensionNames.update(datasetType.dimensions.names) 

1111 # If any matched dataset type is a component, just operate on 

1112 # its parent instead, because Registry doesn't know anything 

1113 # about what components exist, and here (unlike queryDatasets) 

1114 # we don't care about returning them. 

1115 parentDatasetTypeName, componentName = datasetType.nameAndComponent() 

1116 if componentName is not None: 

1117 datasetType = self.getDatasetType(parentDatasetTypeName) 

1118 standardizedDatasetTypes.add(datasetType) 

1119 # Preprocess collections expression in case the original included 

1120 # single-pass iterators (we'll want to use it multiple times 

1121 # below). 

1122 collections = CollectionQuery.fromExpression(collections) 

1123 

1124 summary = QuerySummary( 

1125 requested=DimensionGraph(self.dimensions, names=requestedDimensionNames), 

1126 dataId=standardizedDataId, 

1127 expression=where, 

1128 ) 

1129 builder = self.makeQueryBuilder(summary) 

1130 for datasetType in standardizedDatasetTypes: 

1131 builder.joinDataset(datasetType, collections, isResult=False) 

1132 query = builder.finish() 

1133 predicate = query.predicate() 

1134 for row in self._db.query(query.sql): 

1135 if predicate(row): 

1136 result = query.extractDataId(row) 

1137 if expand: 

1138 yield self.expandDataId(result, records=standardizedDataId.records) 

1139 else: 

1140 yield result 

1141 

1142 def queryDatasets(self, datasetType: Any, *, 

1143 collections: Any, 

1144 dimensions: Optional[Iterable[Union[Dimension, str]]] = None, 

1145 dataId: Optional[DataId] = None, 

1146 where: Optional[str] = None, 

1147 deduplicate: bool = False, 

1148 expand: bool = True, 

1149 components: Optional[bool] = None, 

1150 **kwargs: Any) -> Iterator[DatasetRef]: 

1151 """Query for and iterate over dataset references matching user-provided 

1152 criteria. 

1153 

1154 Parameters 

1155 ---------- 

1156 datasetType 

1157 An expression that fully or partially identifies the dataset types 

1158 to be queried. Allowed types include `DatasetType`, `str`, 

1159 `re.Pattern`, and iterables thereof. The special value `...` can 

1160 be used to query all dataset types. See 

1161 :ref:`daf_butler_dataset_type_expressions` for more information. 

1162 collections 

1163 An expression that fully or partially identifies the collections 

1164 to search for datasets, such as a `str`, `re.Pattern`, or iterable 

1165 thereof. `...` can be used to return all collections. See 

1166 :ref:`daf_butler_collection_expressions` for more information. 

1167 dimensions : `~collections.abc.Iterable` of `Dimension` or `str` 

1168 Dimensions to include in the query (in addition to those used 

1169 to identify the queried dataset type(s)), either to constrain 

1170 the resulting datasets to those for which a matching dimension 

1171 exists, or to relate the dataset type's dimensions to dimensions 

1172 referenced by the ``dataId`` or ``where`` arguments. 

1173 dataId : `dict` or `DataCoordinate`, optional 

1174 A data ID whose key-value pairs are used as equality constraints 

1175 in the query. 

1176 where : `str`, optional 

1177 A string expression similar to a SQL WHERE clause. May involve 

1178 any column of a dimension table or (as a shortcut for the primary 

1179 key column of a dimension table) dimension name. See 

1180 :ref:`daf_butler_dimension_expressions` for more information. 

1181 deduplicate : `bool`, optional 

1182 If `True` (`False` is default), for each result data ID, only 

1183 yield one `DatasetRef` of each `DatasetType`, from the first 

1184 collection in which a dataset of that dataset type appears 

1185 (according to the order of ``collections`` passed in). If `True`, 

1186 ``collections`` must not contain regular expressions and may not 

1187 be `...`. 

1188 expand : `bool`, optional 

1189 If `True` (default) attach `ExpandedDataCoordinate` instead of 

1190 minimal `DataCoordinate` base-class instances. 

1191 components : `bool`, optional 

1192 If `True`, apply all dataset expression patterns to component 

1193 dataset type names as well. If `False`, never apply patterns to 

1194 components. If `None` (default), apply patterns to components only 

1195 if their parent datasets were not matched by the expression. 

1196 Fully-specified component datasets (`str` or `DatasetType` 

1197 instances) are always included. 

1198 **kwargs 

1199 Additional keyword arguments are forwarded to 

1200 `DataCoordinate.standardize` when processing the ``dataId`` 

1201 argument (and may be used to provide a constraining data ID even 

1202 when the ``dataId`` argument is `None`). 

1203 

1204 Yields 

1205 ------ 

1206 ref : `DatasetRef` 

1207 Dataset references matching the given query criteria. These 

1208 are grouped by `DatasetType` if the query evaluates to multiple 

1209 dataset types, but order is otherwise unspecified. 

1210 

1211 Raises 

1212 ------ 

1213 TypeError 

1214 Raised when the arguments are incompatible, such as when a 

1215 collection wildcard is passed when ``deduplicate`` is `True`. 

1216 

1217 Notes 

1218 ----- 

1219 When multiple dataset types are queried in a single call, the 

1220 results of this operation are equivalent to querying for each dataset 

1221 type separately in turn, and no information about the relationships 

1222 between datasets of different types is included. In contexts where 

1223 that kind of information is important, the recommended pattern is to 

1224 use `queryDimensions` to first obtain data IDs (possibly with the 

1225 desired dataset types and collections passed as constraints to the 

1226 query), and then use multiple (generally much simpler) calls to 

1227 `queryDatasets` with the returned data IDs passed as constraints. 

1228 """ 

1229 # Standardize the collections expression. 

1230 if deduplicate: 

1231 collections = CollectionSearch.fromExpression(collections) 

1232 else: 

1233 collections = CollectionQuery.fromExpression(collections) 

1234 # Standardize and expand the data ID provided as a constraint. 

1235 standardizedDataId = self.expandDataId(dataId, **kwargs) 

1236 

1237 # We can only query directly if given a non-component DatasetType 

1238 # instance. If we were given an expression or str or a component 

1239 # DatasetType instance, we'll populate this dict, recurse, and return. 

1240 # If we already have a non-component DatasetType, it will remain None 

1241 # and we'll run the query directly. 

1242 composition: Optional[ 

1243 Dict[ 

1244 DatasetType, # parent dataset type 

1245 List[Optional[str]] # component name, or None for parent 

1246 ] 

1247 ] = None 

1248 if not isinstance(datasetType, DatasetType): 

1249 # We were given a dataset type expression (which may be as simple 

1250 # as a str). Loop over all matching datasets, delegating handling 

1251 # of the `components` argument to queryDatasetTypes, as we populate 

1252 # the composition dict. 

1253 composition = defaultdict(list) 

1254 for trueDatasetType in self.queryDatasetTypes(datasetType, components=components): 

1255 parentName, componentName = trueDatasetType.nameAndComponent() 

1256 if componentName is not None: 

1257 parentDatasetType = self.getDatasetType(parentName) 

1258 composition.setdefault(parentDatasetType, []).append(componentName) 

1259 else: 

1260 composition.setdefault(trueDatasetType, []).append(None) 

1261 elif datasetType.isComponent(): 

1262 # We were given a true DatasetType instance, but it's a component. 

1263 # the composition dict will have exactly one item. 

1264 parentName, componentName = datasetType.nameAndComponent() 

1265 parentDatasetType = self.getDatasetType(parentName) 

1266 composition = {parentDatasetType: [componentName]} 

1267 if composition is not None: 

1268 # We need to recurse. Do that once for each parent dataset type. 

1269 for parentDatasetType, componentNames in composition.items(): 

1270 for parentRef in self.queryDatasets(parentDatasetType, collections=collections, 

1271 dimensions=dimensions, dataId=standardizedDataId, 

1272 where=where, deduplicate=deduplicate): 

1273 # Loop over components, yielding one for each one for each 

1274 # one requested. 

1275 for componentName in componentNames: 

1276 if componentName is None: 

1277 yield parentRef 

1278 else: 

1279 yield parentRef.makeComponentRef(componentName) 

1280 return 

1281 # If we get here, there's no need to recurse (or we are already 

1282 # recursing; there can only ever be one level of recursion). 

1283 

1284 # The full set of dimensions in the query is the combination of those 

1285 # needed for the DatasetType and those explicitly requested, if any. 

1286 requestedDimensionNames = set(datasetType.dimensions.names) 

1287 if dimensions is not None: 

1288 requestedDimensionNames.update(self.dimensions.extract(dimensions).names) 

1289 # Construct the summary structure needed to construct a QueryBuilder. 

1290 summary = QuerySummary( 

1291 requested=DimensionGraph(self.dimensions, names=requestedDimensionNames), 

1292 dataId=standardizedDataId, 

1293 expression=where, 

1294 ) 

1295 builder = self.makeQueryBuilder(summary) 

1296 # Add the dataset subquery to the query, telling the QueryBuilder to 

1297 # include the rank of the selected collection in the results only if we 

1298 # need to deduplicate. Note that if any of the collections are 

1299 # actually wildcard expressions, and we've asked for deduplication, 

1300 # this will raise TypeError for us. 

1301 if not builder.joinDataset(datasetType, collections, isResult=True, addRank=deduplicate): 

1302 return 

1303 query = builder.finish() 

1304 predicate = query.predicate() 

1305 if not deduplicate: 

1306 # No need to de-duplicate across collections. 

1307 for row in self._db.query(query.sql): 

1308 if predicate(row): 

1309 dataId = query.extractDataId(row, graph=datasetType.dimensions) 

1310 if expand: 

1311 dataId = self.expandDataId(dataId, records=standardizedDataId.records) 

1312 yield query.extractDatasetRef(row, datasetType, dataId)[0] 

1313 else: 

1314 # For each data ID, yield only the DatasetRef with the lowest 

1315 # collection rank. 

1316 bestRefs = {} 

1317 bestRanks: Dict[DataCoordinate, int] = {} 

1318 for row in self._db.query(query.sql): 

1319 if predicate(row): 

1320 ref, rank = query.extractDatasetRef(row, datasetType) 

1321 bestRank = bestRanks.get(ref.dataId, sys.maxsize) 

1322 assert rank is not None 

1323 if rank < bestRank: 

1324 bestRefs[ref.dataId] = ref 

1325 bestRanks[ref.dataId] = rank 

1326 # If caller requested expanded data IDs, we defer that until here 

1327 # so we do as little expansion as possible. 

1328 if expand: 

1329 for ref in bestRefs.values(): 

1330 dataId = self.expandDataId(ref.dataId, records=standardizedDataId.records) 

1331 yield ref.expanded(dataId) 

1332 else: 

1333 yield from bestRefs.values() 

1334 

1335 storageClasses: StorageClassFactory 

1336 """All storage classes known to the registry (`StorageClassFactory`). 

1337 """